2026-05-20 21:39:12 +08:00

378 lines
15 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
'use strict';
const fs = require('node:fs');
const os = require('node:os');
const path = require('node:path');
const { spawn } = require('node:child_process');
const GROUNDING_PROMPT = '请在图中框出汽车上的旧伤/损伤区域。如果能看到损伤用矩形框标出其位置。输出bounding box坐标格式 <bbox>x1 y1 x2 y2</bbox>。';
function emit(stage, message, status = 'running', extra = {}) {
process.stderr.write(JSON.stringify({
type: 'process_event',
stage,
message,
status,
timestamp: new Date().toISOString(),
...extra,
}) + '\n');
}
function readInput() {
const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim();
return raw ? JSON.parse(raw) : {};
}
function clampNumber(value, min, max, fallback) {
const number = Number(value);
if (!Number.isFinite(number)) return fallback;
return Math.max(min, Math.min(max, number));
}
function workspaceRoot(env) {
if (env.VEHICLE_SCRATCH_WORKSPACE_ROOT) return path.resolve(env.VEHICLE_SCRATCH_WORKSPACE_ROOT);
if (env.RZYX_AI_WORKSPACE_ROOT) {
const root = path.resolve(env.RZYX_AI_WORKSPACE_ROOT);
return path.basename(root) === 'vehicle-scratch-inspection' ? root : path.join(root, 'vehicle-scratch-inspection');
}
if (env.RZYX_AI_DATA_DIR) return path.join(path.resolve(env.RZYX_AI_DATA_DIR), 'workspace', 'vehicle-scratch-inspection');
return path.join(os.tmpdir(), 'vehicle-scratch-inspection');
}
function workspaceFor(taskId, env) {
const safe = String(taskId || '').trim();
if (!/^[a-zA-Z0-9_-]{1,80}$/.test(safe)) throw new Error('taskId非法或为空');
const workspacePath = path.join(workspaceRoot(env), safe);
if (!fs.existsSync(workspacePath)) throw new Error(`workspace不存在: ${workspacePath}`);
return { taskId: safe, workspacePath };
}
function readJson(filePath) {
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
}
function writeJson(filePath, value) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8');
}
function parseJsonLoose(value) {
if (!value) return null;
if (typeof value === 'object') return value;
const text = String(value).trim();
try { return JSON.parse(text); } catch {}
const start = text.indexOf('{');
const end = text.lastIndexOf('}');
if (start >= 0 && end > start) {
try { return JSON.parse(text.slice(start, end + 1)); } catch {}
}
return null;
}
function parseBboxes(text) {
const boxes = [];
const re = /<bbox>\s*([0-9.]+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+)\s*<\/bbox>/gi;
let match;
while ((match = re.exec(String(text || '')))) {
const [x1, y1, x2, y2] = match.slice(1).map(Number);
if ([x1, y1, x2, y2].every(Number.isFinite)) boxes.push({ x1, y1, x2, y2 });
}
return boxes;
}
function parseBboxesByTimestamp(text, timestamps) {
const output = new Map();
const source = String(text || '');
for (const ts of timestamps) {
const variants = [Number(ts).toFixed(1), Number(ts).toFixed(2), String(Number(ts))]
.map(item => item.replace('.', '\\.'));
const re = new RegExp(`\\[\\s*(?:${variants.join('|')})\\s*second\\s*\\]\\s*<bbox>\\s*([0-9.]+)\\s+([0-9.]+)\\s+([0-9.]+)\\s+([0-9.]+)\\s*<\\/bbox>`, 'i');
const match = source.match(re);
if (match) output.set(Number(ts), { x1: Number(match[1]), y1: Number(match[2]), x2: Number(match[3]), y2: Number(match[4]) });
}
if (output.size === 0) {
const boxes = parseBboxes(source);
boxes.slice(0, timestamps.length).forEach((box, index) => output.set(Number(timestamps[index]), box));
if (boxes.length === 1 && timestamps.length > 1) {
for (const ts of timestamps) output.set(Number(ts), boxes[0]);
}
}
return output;
}
function nearestFrames(frames, timestamp, windowSec, limit = 5) {
const selected = frames.filter(frame => Math.abs(Number(frame.timestamp) - Number(timestamp)) <= windowSec);
const source = selected.length ? selected : frames.slice().sort((a, b) => Math.abs(a.timestamp - timestamp) - Math.abs(b.timestamp - timestamp));
return source.slice().sort((a, b) => Math.abs(a.timestamp - timestamp) - Math.abs(b.timestamp - timestamp)).slice(0, limit).sort((a, b) => a.timestamp - b.timestamp);
}
function normalizeDamage(raw, index) {
return {
id: raw.id || `damage_${String(index + 1).padStart(3, '0')}`,
timeSecond: Number(raw.timeSecond ?? raw.time_second ?? raw.timestamp ?? 0),
location: raw.location || raw.part || '未知部位',
type: raw.type || '旧伤',
severity: raw.severity || '轻微',
description: raw.description || '',
raw,
};
}
function normalizedToPixel(box, width, height) {
const x1 = Math.round((box.x1 * width) / 1000);
const y1 = Math.round((box.y1 * height) / 1000);
const x2 = Math.round((box.x2 * width) / 1000);
const y2 = Math.round((box.y2 * height) / 1000);
return {
x1: Math.max(0, Math.min(width - 1, x1)),
y1: Math.max(0, Math.min(height - 1, y1)),
x2: Math.max(1, Math.min(width, x2)),
y2: Math.max(1, Math.min(height, y2)),
};
}
function escapeXml(value) {
return String(value || '').replace(/[<>&'"]/g, char => ({
'<': '&lt;',
'>': '&gt;',
'&': '&amp;',
"'": '&apos;',
'"': '&quot;',
}[char]));
}
function writeSvgMarker({ sourcePath, targetPath, bbox, label }) {
const svgPath = targetPath.replace(/\.[^.]+$/, '.svg');
const box = bbox || { x1: 80, y1: 80, x2: 920, y2: 920 };
const width = 1000;
const height = 1000;
const rectW = Math.max(2, box.x2 - box.x1);
const rectH = Math.max(2, box.y2 - box.y1);
const svg = `<svg width="${width}" height="${height}" viewBox="0 0 1000 1000" xmlns="http://www.w3.org/2000/svg">
<rect width="1000" height="1000" fill="#f8fafc"/>
<text x="40" y="55" font-size="28" fill="#334155" font-family="Arial, sans-serif">FFmpeg/sharp不可用已生成SVG标注占位</text>
<text x="40" y="94" font-size="18" fill="#64748b" font-family="Arial, sans-serif">${escapeXml(path.basename(sourcePath))}</text>
<rect x="${box.x1}" y="${box.y1}" width="${rectW}" height="${rectH}" fill="none" stroke="#ff1f1f" stroke-width="8"/>
<text x="${box.x1}" y="${Math.max(130, box.y1 - 16)}" font-size="26" fill="#ff1f1f" font-family="Arial, sans-serif">${escapeXml(label)}</text>
</svg>`;
fs.writeFileSync(svgPath, svg, 'utf8');
return {
marked: Boolean(bbox),
pixel: normalizedToPixel(box, width, height),
targetPath: svgPath,
fallback: 'svg',
};
}
function ffmpegCommand(env) {
if (env.FFMPEG_PATH) return env.FFMPEG_PATH;
try { return require('@ffmpeg-installer/ffmpeg').path; } catch {}
return 'ffmpeg';
}
function ffprobeCommand(env) {
if (env.FFPROBE_PATH) return env.FFPROBE_PATH;
try { return require('@ffprobe-installer/ffprobe').path; } catch {}
return 'ffprobe';
}
function runProcess(command, args) {
return new Promise((resolve, reject) => {
const child = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
child.stdout.on('data', chunk => { stdout += chunk.toString(); });
child.stderr.on('data', chunk => { stderr += chunk.toString(); });
child.on('error', reject);
child.on('close', code => code === 0 ? resolve({ stdout, stderr }) : reject(new Error(`${command} exited ${code}: ${stderr.slice(-800)}`)));
});
}
async function imageSize(filePath, env) {
const { stdout } = await runProcess(ffprobeCommand(env), [
'-v', 'error',
'-select_streams', 'v:0',
'-show_entries', 'stream=width,height',
'-of', 'json',
filePath,
]);
const parsed = JSON.parse(stdout || '{}');
const stream = parsed.streams?.[0] || {};
return { width: Number(stream.width || 1), height: Number(stream.height || 1) };
}
async function markImage({ sourcePath, targetPath, bbox, label, env }) {
fs.mkdirSync(path.dirname(targetPath), { recursive: true });
if (!bbox) {
fs.copyFileSync(sourcePath, targetPath);
return { marked: false, pixel: null };
}
try {
// Prefer sharp when installed by the skill runtime.
const sharp = require('sharp');
const metadata = await sharp(sourcePath).metadata();
const width = metadata.width || 1;
const height = metadata.height || 1;
const pixel = normalizedToPixel(bbox, width, height);
const rectW = Math.max(2, pixel.x2 - pixel.x1);
const rectH = Math.max(2, pixel.y2 - pixel.y1);
const svg = `<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg"><rect x="${pixel.x1}" y="${pixel.y1}" width="${rectW}" height="${rectH}" fill="none" stroke="#ff1f1f" stroke-width="4"/></svg>`;
await sharp(sourcePath).composite([{ input: Buffer.from(svg), top: 0, left: 0 }]).jpeg({ quality: 92 }).toFile(targetPath);
return { marked: true, pixel };
} catch {
try {
const { width, height } = await imageSize(sourcePath, env);
const pixel = normalizedToPixel(bbox, width, height);
await runProcess(ffmpegCommand(env), [
'-hide_banner',
'-y',
'-i', sourcePath,
'-vf', `drawbox=x=${pixel.x1}:y=${pixel.y1}:w=${Math.max(2, pixel.x2 - pixel.x1)}:h=${Math.max(2, pixel.y2 - pixel.y1)}:color=red@1.0:t=4`,
'-q:v', '2',
targetPath,
]);
return { marked: true, pixel };
} catch {
return writeSvgMarker({ sourcePath, targetPath, bbox, label });
}
}
}
function imageContent(filePath) {
return {
type: 'image_url',
image_url: { url: `data:image/jpeg;base64,${fs.readFileSync(filePath).toString('base64')}` },
};
}
async function callGroundingApi({ env, content }) {
const apiKey = env.ARK_API_KEY;
if (!apiKey) throw new Error('缺少ARK_API_KEY');
const apiUrl = env.ARK_API_URL || 'https://ark.cn-beijing.volces.com/api/v3/chat/completions';
const model = env.DAMAGE_GROUNDING_MODEL || 'doubao-seed-2-0-pro-260215';
emit('ground_damages', '调用豆包grounding模型', 'running', { model });
const response = await fetch(apiUrl, {
method: 'POST',
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
body: JSON.stringify({ model, messages: [{ role: 'user', content }], max_tokens: 2048, temperature: 0.1 }),
signal: AbortSignal.timeout(600000),
});
const text = await response.text();
let body;
try { body = JSON.parse(text); } catch { body = { raw: text }; }
if (!response.ok) throw new Error(JSON.stringify(body).slice(0, 1000));
return body.choices?.[0]?.message?.content || '';
}
function collectGroundingResults(input) {
const candidate = input.groundingResults ?? input.modelResult ?? input.result ?? input.mockResult;
if (!candidate) return null;
const parsed = parseJsonLoose(candidate) || candidate;
if (Array.isArray(parsed)) return parsed;
if (Array.isArray(parsed.results)) return parsed.results;
if (Array.isArray(parsed.groundingResults)) return parsed.groundingResults;
return [parsed];
}
function externalTextFor(results, damage, index) {
if (!results) return null;
const found = results.find(item => {
if (!item || typeof item !== 'object') return false;
return item.damageId === damage.id || item.id === damage.id || Number(item.index) === index || Number(item.damageIndex) === index;
}) || results[index];
if (!found) return null;
if (typeof found === 'string') return found;
if (found.bbox) {
const b = found.bbox.normalized || found.bbox;
if ([b.x1, b.y1, b.x2, b.y2].every(Number.isFinite)) return `<bbox>${b.x1} ${b.y1} ${b.x2} ${b.y2}</bbox>`;
}
return found.text || found.content || found.raw || JSON.stringify(found);
}
async function run(input, env = process.env) {
if (!input?.taskId) throw new Error('taskId不能为空');
const workspace = workspaceFor(input.taskId, env);
const videoInfo = readJson(path.join(workspace.workspacePath, 'video_info.json'));
const damagesDoc = readJson(path.join(workspace.workspacePath, 'damages.json'));
const frames = (videoInfo.frames || []).map(frame => ({
...frame,
path: path.isAbsolute(frame.path) ? frame.path : path.join(workspace.workspacePath, frame.relativePath || frame.path),
}));
const damages = (damagesDoc.damages || []).map(normalizeDamage);
const groundingWindow = clampNumber(input.groundingWindow, 0.2, 10, 2);
const externalResults = collectGroundingResults(input);
if (!externalResults && !env.ARK_API_KEY) {
return {
success: false,
needsModelVision: true,
taskId: workspace.taskId,
workspacePath: workspace.workspacePath,
prompt: GROUNDING_PROMPT,
damages,
error: '未配置ARK_API_KEY且未提供groundingResults。请使用豆包grounding按prompt返回bbox后重试。',
};
}
const annotations = [];
let totalMarked = 0;
for (let i = 0; i < damages.length; i += 1) {
const damage = damages[i];
const selectedFrames = nearestFrames(frames, damage.timeSecond, groundingWindow, 5);
const content = [];
for (const frame of selectedFrames) {
content.push({ type: 'text', text: `[${Number(frame.timestamp).toFixed(2)} second]` });
content.push(imageContent(frame.path));
}
content.push({ type: 'text', text: `${GROUNDING_PROMPT}\n损伤:${damage.location} ${damage.type}。描述:${damage.description}` });
const groundingText = externalTextFor(externalResults, damage, i) || await callGroundingApi({ env, content });
const bboxMap = parseBboxesByTimestamp(groundingText, selectedFrames.map(frame => frame.timestamp));
const markedFrames = [];
const damageId = damage.id || `damage_${String(i + 1).padStart(3, '0')}`;
for (const frame of selectedFrames) {
const bbox = bboxMap.get(Number(frame.timestamp)) || null;
const markedName = `${damageId}_${Number(frame.timestamp).toFixed(2).replace('.', '_')}s.jpg`;
const markedPath = path.join(workspace.workspacePath, 'marked_frames', markedName);
const markResult = await markImage({ sourcePath: frame.path, targetPath: markedPath, bbox, label: `${damage.location}${damage.type}`, env });
if (bbox) totalMarked += 1;
const finalMarkedPath = markResult.targetPath || markedPath;
const finalRelativePath = path.relative(workspace.workspacePath, finalMarkedPath).replace(/\\/g, '/');
markedFrames.push({
timestamp: frame.timestamp,
sourcePath: frame.path,
sourceRelativePath: frame.relativePath,
markedPath: finalMarkedPath,
markedRelativePath: finalRelativePath,
bbox: bbox ? { normalized: bbox, pixel: markResult.pixel } : null,
groundingRaw: groundingText,
groundingStatus: bbox ? 'grounded' : 'no_bbox',
});
}
annotations.push({ damageId, damage, markedFrames });
}
writeJson(path.join(workspace.workspacePath, 'grounding.json'), {
taskId: workspace.taskId,
workspacePath: workspace.workspacePath,
totalDamages: damages.length,
totalMarked,
annotations,
generatedAt: new Date().toISOString(),
});
emit('ground_damages', 'grounding完成并写入grounding.json', 'completed', { totalDamages: damages.length, totalMarked });
return { success: true, totalDamages: damages.length, totalMarked };
}
(async () => {
try {
process.stdout.write(JSON.stringify(await run(readInput())));
} catch (err) {
process.stdout.write(JSON.stringify({
success: false,
error: err && err.message ? err.message : String(err),
}));
}
})();