378 lines
15 KiB
JavaScript
Raw Permalink Normal View History

2026-05-20 21:39:12 +08:00
#!/usr/bin/env node
'use strict';
const fs = require('node:fs');
const os = require('node:os');
const path = require('node:path');
const { spawn } = require('node:child_process');
const GROUNDING_PROMPT = '请在图中框出汽车上的旧伤/损伤区域。如果能看到损伤用矩形框标出其位置。输出bounding box坐标格式 <bbox>x1 y1 x2 y2</bbox>。';
function emit(stage, message, status = 'running', extra = {}) {
process.stderr.write(JSON.stringify({
type: 'process_event',
stage,
message,
status,
timestamp: new Date().toISOString(),
...extra,
}) + '\n');
}
function readInput() {
const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim();
return raw ? JSON.parse(raw) : {};
}
function clampNumber(value, min, max, fallback) {
const number = Number(value);
if (!Number.isFinite(number)) return fallback;
return Math.max(min, Math.min(max, number));
}
function workspaceRoot(env) {
if (env.VEHICLE_SCRATCH_WORKSPACE_ROOT) return path.resolve(env.VEHICLE_SCRATCH_WORKSPACE_ROOT);
if (env.RZYX_AI_WORKSPACE_ROOT) {
const root = path.resolve(env.RZYX_AI_WORKSPACE_ROOT);
return path.basename(root) === 'vehicle-scratch-inspection' ? root : path.join(root, 'vehicle-scratch-inspection');
}
if (env.RZYX_AI_DATA_DIR) return path.join(path.resolve(env.RZYX_AI_DATA_DIR), 'workspace', 'vehicle-scratch-inspection');
return path.join(os.tmpdir(), 'vehicle-scratch-inspection');
}
function workspaceFor(taskId, env) {
const safe = String(taskId || '').trim();
if (!/^[a-zA-Z0-9_-]{1,80}$/.test(safe)) throw new Error('taskId非法或为空');
const workspacePath = path.join(workspaceRoot(env), safe);
if (!fs.existsSync(workspacePath)) throw new Error(`workspace不存在: ${workspacePath}`);
return { taskId: safe, workspacePath };
}
function readJson(filePath) {
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
}
function writeJson(filePath, value) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8');
}
function parseJsonLoose(value) {
if (!value) return null;
if (typeof value === 'object') return value;
const text = String(value).trim();
try { return JSON.parse(text); } catch {}
const start = text.indexOf('{');
const end = text.lastIndexOf('}');
if (start >= 0 && end > start) {
try { return JSON.parse(text.slice(start, end + 1)); } catch {}
}
return null;
}
function parseBboxes(text) {
const boxes = [];
const re = /<bbox>\s*([0-9.]+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+)\s*<\/bbox>/gi;
let match;
while ((match = re.exec(String(text || '')))) {
const [x1, y1, x2, y2] = match.slice(1).map(Number);
if ([x1, y1, x2, y2].every(Number.isFinite)) boxes.push({ x1, y1, x2, y2 });
}
return boxes;
}
function parseBboxesByTimestamp(text, timestamps) {
const output = new Map();
const source = String(text || '');
for (const ts of timestamps) {
const variants = [Number(ts).toFixed(1), Number(ts).toFixed(2), String(Number(ts))]
.map(item => item.replace('.', '\\.'));
const re = new RegExp(`\\[\\s*(?:${variants.join('|')})\\s*second\\s*\\]\\s*<bbox>\\s*([0-9.]+)\\s+([0-9.]+)\\s+([0-9.]+)\\s+([0-9.]+)\\s*<\\/bbox>`, 'i');
const match = source.match(re);
if (match) output.set(Number(ts), { x1: Number(match[1]), y1: Number(match[2]), x2: Number(match[3]), y2: Number(match[4]) });
}
if (output.size === 0) {
const boxes = parseBboxes(source);
boxes.slice(0, timestamps.length).forEach((box, index) => output.set(Number(timestamps[index]), box));
if (boxes.length === 1 && timestamps.length > 1) {
for (const ts of timestamps) output.set(Number(ts), boxes[0]);
}
}
return output;
}
function nearestFrames(frames, timestamp, windowSec, limit = 5) {
const selected = frames.filter(frame => Math.abs(Number(frame.timestamp) - Number(timestamp)) <= windowSec);
const source = selected.length ? selected : frames.slice().sort((a, b) => Math.abs(a.timestamp - timestamp) - Math.abs(b.timestamp - timestamp));
return source.slice().sort((a, b) => Math.abs(a.timestamp - timestamp) - Math.abs(b.timestamp - timestamp)).slice(0, limit).sort((a, b) => a.timestamp - b.timestamp);
}
function normalizeDamage(raw, index) {
return {
id: raw.id || `damage_${String(index + 1).padStart(3, '0')}`,
timeSecond: Number(raw.timeSecond ?? raw.time_second ?? raw.timestamp ?? 0),
location: raw.location || raw.part || '未知部位',
type: raw.type || '旧伤',
severity: raw.severity || '轻微',
description: raw.description || '',
raw,
};
}
function normalizedToPixel(box, width, height) {
const x1 = Math.round((box.x1 * width) / 1000);
const y1 = Math.round((box.y1 * height) / 1000);
const x2 = Math.round((box.x2 * width) / 1000);
const y2 = Math.round((box.y2 * height) / 1000);
return {
x1: Math.max(0, Math.min(width - 1, x1)),
y1: Math.max(0, Math.min(height - 1, y1)),
x2: Math.max(1, Math.min(width, x2)),
y2: Math.max(1, Math.min(height, y2)),
};
}
function escapeXml(value) {
return String(value || '').replace(/[<>&'"]/g, char => ({
'<': '&lt;',
'>': '&gt;',
'&': '&amp;',
"'": '&apos;',
'"': '&quot;',
}[char]));
}
function writeSvgMarker({ sourcePath, targetPath, bbox, label }) {
const svgPath = targetPath.replace(/\.[^.]+$/, '.svg');
const box = bbox || { x1: 80, y1: 80, x2: 920, y2: 920 };
const width = 1000;
const height = 1000;
const rectW = Math.max(2, box.x2 - box.x1);
const rectH = Math.max(2, box.y2 - box.y1);
const svg = `<svg width="${width}" height="${height}" viewBox="0 0 1000 1000" xmlns="http://www.w3.org/2000/svg">
<rect width="1000" height="1000" fill="#f8fafc"/>
<text x="40" y="55" font-size="28" fill="#334155" font-family="Arial, sans-serif">FFmpeg/sharp不可用已生成SVG标注占位</text>
<text x="40" y="94" font-size="18" fill="#64748b" font-family="Arial, sans-serif">${escapeXml(path.basename(sourcePath))}</text>
<rect x="${box.x1}" y="${box.y1}" width="${rectW}" height="${rectH}" fill="none" stroke="#ff1f1f" stroke-width="8"/>
<text x="${box.x1}" y="${Math.max(130, box.y1 - 16)}" font-size="26" fill="#ff1f1f" font-family="Arial, sans-serif">${escapeXml(label)}</text>
</svg>`;
fs.writeFileSync(svgPath, svg, 'utf8');
return {
marked: Boolean(bbox),
pixel: normalizedToPixel(box, width, height),
targetPath: svgPath,
fallback: 'svg',
};
}
function ffmpegCommand(env) {
if (env.FFMPEG_PATH) return env.FFMPEG_PATH;
try { return require('@ffmpeg-installer/ffmpeg').path; } catch {}
return 'ffmpeg';
}
function ffprobeCommand(env) {
if (env.FFPROBE_PATH) return env.FFPROBE_PATH;
try { return require('@ffprobe-installer/ffprobe').path; } catch {}
return 'ffprobe';
}
function runProcess(command, args) {
return new Promise((resolve, reject) => {
const child = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
child.stdout.on('data', chunk => { stdout += chunk.toString(); });
child.stderr.on('data', chunk => { stderr += chunk.toString(); });
child.on('error', reject);
child.on('close', code => code === 0 ? resolve({ stdout, stderr }) : reject(new Error(`${command} exited ${code}: ${stderr.slice(-800)}`)));
});
}
async function imageSize(filePath, env) {
const { stdout } = await runProcess(ffprobeCommand(env), [
'-v', 'error',
'-select_streams', 'v:0',
'-show_entries', 'stream=width,height',
'-of', 'json',
filePath,
]);
const parsed = JSON.parse(stdout || '{}');
const stream = parsed.streams?.[0] || {};
return { width: Number(stream.width || 1), height: Number(stream.height || 1) };
}
async function markImage({ sourcePath, targetPath, bbox, label, env }) {
fs.mkdirSync(path.dirname(targetPath), { recursive: true });
if (!bbox) {
fs.copyFileSync(sourcePath, targetPath);
return { marked: false, pixel: null };
}
try {
// Prefer sharp when installed by the skill runtime.
const sharp = require('sharp');
const metadata = await sharp(sourcePath).metadata();
const width = metadata.width || 1;
const height = metadata.height || 1;
const pixel = normalizedToPixel(bbox, width, height);
const rectW = Math.max(2, pixel.x2 - pixel.x1);
const rectH = Math.max(2, pixel.y2 - pixel.y1);
const svg = `<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg"><rect x="${pixel.x1}" y="${pixel.y1}" width="${rectW}" height="${rectH}" fill="none" stroke="#ff1f1f" stroke-width="4"/></svg>`;
await sharp(sourcePath).composite([{ input: Buffer.from(svg), top: 0, left: 0 }]).jpeg({ quality: 92 }).toFile(targetPath);
return { marked: true, pixel };
} catch {
try {
const { width, height } = await imageSize(sourcePath, env);
const pixel = normalizedToPixel(bbox, width, height);
await runProcess(ffmpegCommand(env), [
'-hide_banner',
'-y',
'-i', sourcePath,
'-vf', `drawbox=x=${pixel.x1}:y=${pixel.y1}:w=${Math.max(2, pixel.x2 - pixel.x1)}:h=${Math.max(2, pixel.y2 - pixel.y1)}:color=red@1.0:t=4`,
'-q:v', '2',
targetPath,
]);
return { marked: true, pixel };
} catch {
return writeSvgMarker({ sourcePath, targetPath, bbox, label });
}
}
}
function imageContent(filePath) {
return {
type: 'image_url',
image_url: { url: `data:image/jpeg;base64,${fs.readFileSync(filePath).toString('base64')}` },
};
}
async function callGroundingApi({ env, content }) {
const apiKey = env.ARK_API_KEY;
if (!apiKey) throw new Error('缺少ARK_API_KEY');
const apiUrl = env.ARK_API_URL || 'https://ark.cn-beijing.volces.com/api/v3/chat/completions';
const model = env.DAMAGE_GROUNDING_MODEL || 'doubao-seed-2-0-pro-260215';
emit('ground_damages', '调用豆包grounding模型', 'running', { model });
const response = await fetch(apiUrl, {
method: 'POST',
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
body: JSON.stringify({ model, messages: [{ role: 'user', content }], max_tokens: 2048, temperature: 0.1 }),
signal: AbortSignal.timeout(600000),
});
const text = await response.text();
let body;
try { body = JSON.parse(text); } catch { body = { raw: text }; }
if (!response.ok) throw new Error(JSON.stringify(body).slice(0, 1000));
return body.choices?.[0]?.message?.content || '';
}
function collectGroundingResults(input) {
const candidate = input.groundingResults ?? input.modelResult ?? input.result ?? input.mockResult;
if (!candidate) return null;
const parsed = parseJsonLoose(candidate) || candidate;
if (Array.isArray(parsed)) return parsed;
if (Array.isArray(parsed.results)) return parsed.results;
if (Array.isArray(parsed.groundingResults)) return parsed.groundingResults;
return [parsed];
}
function externalTextFor(results, damage, index) {
if (!results) return null;
const found = results.find(item => {
if (!item || typeof item !== 'object') return false;
return item.damageId === damage.id || item.id === damage.id || Number(item.index) === index || Number(item.damageIndex) === index;
}) || results[index];
if (!found) return null;
if (typeof found === 'string') return found;
if (found.bbox) {
const b = found.bbox.normalized || found.bbox;
if ([b.x1, b.y1, b.x2, b.y2].every(Number.isFinite)) return `<bbox>${b.x1} ${b.y1} ${b.x2} ${b.y2}</bbox>`;
}
return found.text || found.content || found.raw || JSON.stringify(found);
}
async function run(input, env = process.env) {
if (!input?.taskId) throw new Error('taskId不能为空');
const workspace = workspaceFor(input.taskId, env);
const videoInfo = readJson(path.join(workspace.workspacePath, 'video_info.json'));
const damagesDoc = readJson(path.join(workspace.workspacePath, 'damages.json'));
const frames = (videoInfo.frames || []).map(frame => ({
...frame,
path: path.isAbsolute(frame.path) ? frame.path : path.join(workspace.workspacePath, frame.relativePath || frame.path),
}));
const damages = (damagesDoc.damages || []).map(normalizeDamage);
const groundingWindow = clampNumber(input.groundingWindow, 0.2, 10, 2);
const externalResults = collectGroundingResults(input);
if (!externalResults && !env.ARK_API_KEY) {
return {
success: false,
needsModelVision: true,
taskId: workspace.taskId,
workspacePath: workspace.workspacePath,
prompt: GROUNDING_PROMPT,
damages,
error: '未配置ARK_API_KEY且未提供groundingResults。请使用豆包grounding按prompt返回bbox后重试。',
};
}
const annotations = [];
let totalMarked = 0;
for (let i = 0; i < damages.length; i += 1) {
const damage = damages[i];
const selectedFrames = nearestFrames(frames, damage.timeSecond, groundingWindow, 5);
const content = [];
for (const frame of selectedFrames) {
content.push({ type: 'text', text: `[${Number(frame.timestamp).toFixed(2)} second]` });
content.push(imageContent(frame.path));
}
content.push({ type: 'text', text: `${GROUNDING_PROMPT}\n损伤:${damage.location} ${damage.type}。描述:${damage.description}` });
const groundingText = externalTextFor(externalResults, damage, i) || await callGroundingApi({ env, content });
const bboxMap = parseBboxesByTimestamp(groundingText, selectedFrames.map(frame => frame.timestamp));
const markedFrames = [];
const damageId = damage.id || `damage_${String(i + 1).padStart(3, '0')}`;
for (const frame of selectedFrames) {
const bbox = bboxMap.get(Number(frame.timestamp)) || null;
const markedName = `${damageId}_${Number(frame.timestamp).toFixed(2).replace('.', '_')}s.jpg`;
const markedPath = path.join(workspace.workspacePath, 'marked_frames', markedName);
const markResult = await markImage({ sourcePath: frame.path, targetPath: markedPath, bbox, label: `${damage.location}${damage.type}`, env });
if (bbox) totalMarked += 1;
const finalMarkedPath = markResult.targetPath || markedPath;
const finalRelativePath = path.relative(workspace.workspacePath, finalMarkedPath).replace(/\\/g, '/');
markedFrames.push({
timestamp: frame.timestamp,
sourcePath: frame.path,
sourceRelativePath: frame.relativePath,
markedPath: finalMarkedPath,
markedRelativePath: finalRelativePath,
bbox: bbox ? { normalized: bbox, pixel: markResult.pixel } : null,
groundingRaw: groundingText,
groundingStatus: bbox ? 'grounded' : 'no_bbox',
});
}
annotations.push({ damageId, damage, markedFrames });
}
writeJson(path.join(workspace.workspacePath, 'grounding.json'), {
taskId: workspace.taskId,
workspacePath: workspace.workspacePath,
totalDamages: damages.length,
totalMarked,
annotations,
generatedAt: new Date().toISOString(),
});
emit('ground_damages', 'grounding完成并写入grounding.json', 'completed', { totalDamages: damages.length, totalMarked });
return { success: true, totalDamages: damages.length, totalMarked };
}
(async () => {
try {
process.stdout.write(JSON.stringify(await run(readInput())));
} catch (err) {
process.stdout.write(JSON.stringify({
success: false,
error: err && err.message ? err.message : String(err),
}));
}
})();