#!/usr/bin/env node 'use strict'; const fs = require('node:fs'); const os = require('node:os'); const path = require('node:path'); const { spawn } = require('node:child_process'); const GROUNDING_PROMPT = '请在图中框出汽车上的旧伤/损伤区域。如果能看到损伤,用矩形框标出其位置。输出bounding box坐标,格式 x1 y1 x2 y2。'; function emit(stage, message, status = 'running', extra = {}) { process.stderr.write(JSON.stringify({ type: 'process_event', stage, message, status, timestamp: new Date().toISOString(), ...extra, }) + '\n'); } function readInput() { const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim(); return raw ? JSON.parse(raw) : {}; } function clampNumber(value, min, max, fallback) { const number = Number(value); if (!Number.isFinite(number)) return fallback; return Math.max(min, Math.min(max, number)); } function workspaceRoot(env) { if (env.VEHICLE_SCRATCH_WORKSPACE_ROOT) return path.resolve(env.VEHICLE_SCRATCH_WORKSPACE_ROOT); if (env.RZYX_AI_WORKSPACE_ROOT) { const root = path.resolve(env.RZYX_AI_WORKSPACE_ROOT); return path.basename(root) === 'vehicle-scratch-inspection' ? root : path.join(root, 'vehicle-scratch-inspection'); } if (env.RZYX_AI_DATA_DIR) return path.join(path.resolve(env.RZYX_AI_DATA_DIR), 'workspace', 'vehicle-scratch-inspection'); return path.join(os.tmpdir(), 'vehicle-scratch-inspection'); } function workspaceFor(taskId, env) { const safe = String(taskId || '').trim(); if (!/^[a-zA-Z0-9_-]{1,80}$/.test(safe)) throw new Error('taskId非法或为空'); const workspacePath = path.join(workspaceRoot(env), safe); if (!fs.existsSync(workspacePath)) throw new Error(`workspace不存在: ${workspacePath}`); return { taskId: safe, workspacePath }; } function readJson(filePath) { return JSON.parse(fs.readFileSync(filePath, 'utf8')); } function writeJson(filePath, value) { fs.mkdirSync(path.dirname(filePath), { recursive: true }); fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8'); } function parseJsonLoose(value) { if (!value) return null; if (typeof value === 'object') return value; const text = String(value).trim(); try { return JSON.parse(text); } catch {} const start = text.indexOf('{'); const end = text.lastIndexOf('}'); if (start >= 0 && end > start) { try { return JSON.parse(text.slice(start, end + 1)); } catch {} } return null; } function parseBboxes(text) { const boxes = []; const re = /\s*([0-9.]+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+)\s*<\/bbox>/gi; let match; while ((match = re.exec(String(text || '')))) { const [x1, y1, x2, y2] = match.slice(1).map(Number); if ([x1, y1, x2, y2].every(Number.isFinite)) boxes.push({ x1, y1, x2, y2 }); } return boxes; } function parseBboxesByTimestamp(text, timestamps) { const output = new Map(); const source = String(text || ''); for (const ts of timestamps) { const variants = [Number(ts).toFixed(1), Number(ts).toFixed(2), String(Number(ts))] .map(item => item.replace('.', '\\.')); const re = new RegExp(`\\[\\s*(?:${variants.join('|')})\\s*second\\s*\\]\\s*\\s*([0-9.]+)\\s+([0-9.]+)\\s+([0-9.]+)\\s+([0-9.]+)\\s*<\\/bbox>`, 'i'); const match = source.match(re); if (match) output.set(Number(ts), { x1: Number(match[1]), y1: Number(match[2]), x2: Number(match[3]), y2: Number(match[4]) }); } if (output.size === 0) { const boxes = parseBboxes(source); boxes.slice(0, timestamps.length).forEach((box, index) => output.set(Number(timestamps[index]), box)); if (boxes.length === 1 && timestamps.length > 1) { for (const ts of timestamps) output.set(Number(ts), boxes[0]); } } return output; } function nearestFrames(frames, timestamp, windowSec, limit = 5) { const selected = frames.filter(frame => Math.abs(Number(frame.timestamp) - Number(timestamp)) <= windowSec); const source = selected.length ? selected : frames.slice().sort((a, b) => Math.abs(a.timestamp - timestamp) - Math.abs(b.timestamp - timestamp)); return source.slice().sort((a, b) => Math.abs(a.timestamp - timestamp) - Math.abs(b.timestamp - timestamp)).slice(0, limit).sort((a, b) => a.timestamp - b.timestamp); } function normalizeDamage(raw, index) { return { id: raw.id || `damage_${String(index + 1).padStart(3, '0')}`, timeSecond: Number(raw.timeSecond ?? raw.time_second ?? raw.timestamp ?? 0), location: raw.location || raw.part || '未知部位', type: raw.type || '旧伤', severity: raw.severity || '轻微', description: raw.description || '', raw, }; } function normalizedToPixel(box, width, height) { const x1 = Math.round((box.x1 * width) / 1000); const y1 = Math.round((box.y1 * height) / 1000); const x2 = Math.round((box.x2 * width) / 1000); const y2 = Math.round((box.y2 * height) / 1000); return { x1: Math.max(0, Math.min(width - 1, x1)), y1: Math.max(0, Math.min(height - 1, y1)), x2: Math.max(1, Math.min(width, x2)), y2: Math.max(1, Math.min(height, y2)), }; } function escapeXml(value) { return String(value || '').replace(/[<>&'"]/g, char => ({ '<': '<', '>': '>', '&': '&', "'": ''', '"': '"', }[char])); } function writeSvgMarker({ sourcePath, targetPath, bbox, label }) { const svgPath = targetPath.replace(/\.[^.]+$/, '.svg'); const box = bbox || { x1: 80, y1: 80, x2: 920, y2: 920 }; const width = 1000; const height = 1000; const rectW = Math.max(2, box.x2 - box.x1); const rectH = Math.max(2, box.y2 - box.y1); const svg = ` FFmpeg/sharp不可用,已生成SVG标注占位 ${escapeXml(path.basename(sourcePath))} ${escapeXml(label)} `; fs.writeFileSync(svgPath, svg, 'utf8'); return { marked: Boolean(bbox), pixel: normalizedToPixel(box, width, height), targetPath: svgPath, fallback: 'svg', }; } function ffmpegCommand(env) { if (env.FFMPEG_PATH) return env.FFMPEG_PATH; try { return require('@ffmpeg-installer/ffmpeg').path; } catch {} return 'ffmpeg'; } function ffprobeCommand(env) { if (env.FFPROBE_PATH) return env.FFPROBE_PATH; try { return require('@ffprobe-installer/ffprobe').path; } catch {} return 'ffprobe'; } function runProcess(command, args) { return new Promise((resolve, reject) => { const child = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] }); let stdout = ''; let stderr = ''; child.stdout.on('data', chunk => { stdout += chunk.toString(); }); child.stderr.on('data', chunk => { stderr += chunk.toString(); }); child.on('error', reject); child.on('close', code => code === 0 ? resolve({ stdout, stderr }) : reject(new Error(`${command} exited ${code}: ${stderr.slice(-800)}`))); }); } async function imageSize(filePath, env) { const { stdout } = await runProcess(ffprobeCommand(env), [ '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=width,height', '-of', 'json', filePath, ]); const parsed = JSON.parse(stdout || '{}'); const stream = parsed.streams?.[0] || {}; return { width: Number(stream.width || 1), height: Number(stream.height || 1) }; } async function markImage({ sourcePath, targetPath, bbox, label, env }) { fs.mkdirSync(path.dirname(targetPath), { recursive: true }); if (!bbox) { fs.copyFileSync(sourcePath, targetPath); return { marked: false, pixel: null }; } try { // Prefer sharp when installed by the skill runtime. const sharp = require('sharp'); const metadata = await sharp(sourcePath).metadata(); const width = metadata.width || 1; const height = metadata.height || 1; const pixel = normalizedToPixel(bbox, width, height); const rectW = Math.max(2, pixel.x2 - pixel.x1); const rectH = Math.max(2, pixel.y2 - pixel.y1); const svg = ``; await sharp(sourcePath).composite([{ input: Buffer.from(svg), top: 0, left: 0 }]).jpeg({ quality: 92 }).toFile(targetPath); return { marked: true, pixel }; } catch { try { const { width, height } = await imageSize(sourcePath, env); const pixel = normalizedToPixel(bbox, width, height); await runProcess(ffmpegCommand(env), [ '-hide_banner', '-y', '-i', sourcePath, '-vf', `drawbox=x=${pixel.x1}:y=${pixel.y1}:w=${Math.max(2, pixel.x2 - pixel.x1)}:h=${Math.max(2, pixel.y2 - pixel.y1)}:color=red@1.0:t=4`, '-q:v', '2', targetPath, ]); return { marked: true, pixel }; } catch { return writeSvgMarker({ sourcePath, targetPath, bbox, label }); } } } function imageContent(filePath) { return { type: 'image_url', image_url: { url: `data:image/jpeg;base64,${fs.readFileSync(filePath).toString('base64')}` }, }; } async function callGroundingApi({ env, content }) { const apiKey = env.ARK_API_KEY; if (!apiKey) throw new Error('缺少ARK_API_KEY'); const apiUrl = env.ARK_API_URL || 'https://ark.cn-beijing.volces.com/api/v3/chat/completions'; const model = env.DAMAGE_GROUNDING_MODEL || 'doubao-seed-2-0-pro-260215'; emit('ground_damages', '调用豆包grounding模型', 'running', { model }); const response = await fetch(apiUrl, { method: 'POST', headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' }, body: JSON.stringify({ model, messages: [{ role: 'user', content }], max_tokens: 2048, temperature: 0.1 }), signal: AbortSignal.timeout(600000), }); const text = await response.text(); let body; try { body = JSON.parse(text); } catch { body = { raw: text }; } if (!response.ok) throw new Error(JSON.stringify(body).slice(0, 1000)); return body.choices?.[0]?.message?.content || ''; } function collectGroundingResults(input) { const candidate = input.groundingResults ?? input.modelResult ?? input.result ?? input.mockResult; if (!candidate) return null; const parsed = parseJsonLoose(candidate) || candidate; if (Array.isArray(parsed)) return parsed; if (Array.isArray(parsed.results)) return parsed.results; if (Array.isArray(parsed.groundingResults)) return parsed.groundingResults; return [parsed]; } function externalTextFor(results, damage, index) { if (!results) return null; const found = results.find(item => { if (!item || typeof item !== 'object') return false; return item.damageId === damage.id || item.id === damage.id || Number(item.index) === index || Number(item.damageIndex) === index; }) || results[index]; if (!found) return null; if (typeof found === 'string') return found; if (found.bbox) { const b = found.bbox.normalized || found.bbox; if ([b.x1, b.y1, b.x2, b.y2].every(Number.isFinite)) return `${b.x1} ${b.y1} ${b.x2} ${b.y2}`; } return found.text || found.content || found.raw || JSON.stringify(found); } async function run(input, env = process.env) { if (!input?.taskId) throw new Error('taskId不能为空'); const workspace = workspaceFor(input.taskId, env); const videoInfo = readJson(path.join(workspace.workspacePath, 'video_info.json')); const damagesDoc = readJson(path.join(workspace.workspacePath, 'damages.json')); const frames = (videoInfo.frames || []).map(frame => ({ ...frame, path: path.isAbsolute(frame.path) ? frame.path : path.join(workspace.workspacePath, frame.relativePath || frame.path), })); const damages = (damagesDoc.damages || []).map(normalizeDamage); const groundingWindow = clampNumber(input.groundingWindow, 0.2, 10, 2); const externalResults = collectGroundingResults(input); if (!externalResults && !env.ARK_API_KEY) { return { success: false, needsModelVision: true, taskId: workspace.taskId, workspacePath: workspace.workspacePath, prompt: GROUNDING_PROMPT, damages, error: '未配置ARK_API_KEY且未提供groundingResults。请使用豆包grounding按prompt返回bbox后重试。', }; } const annotations = []; let totalMarked = 0; for (let i = 0; i < damages.length; i += 1) { const damage = damages[i]; const selectedFrames = nearestFrames(frames, damage.timeSecond, groundingWindow, 5); const content = []; for (const frame of selectedFrames) { content.push({ type: 'text', text: `[${Number(frame.timestamp).toFixed(2)} second]` }); content.push(imageContent(frame.path)); } content.push({ type: 'text', text: `${GROUNDING_PROMPT}\n损伤:${damage.location} ${damage.type}。描述:${damage.description}` }); const groundingText = externalTextFor(externalResults, damage, i) || await callGroundingApi({ env, content }); const bboxMap = parseBboxesByTimestamp(groundingText, selectedFrames.map(frame => frame.timestamp)); const markedFrames = []; const damageId = damage.id || `damage_${String(i + 1).padStart(3, '0')}`; for (const frame of selectedFrames) { const bbox = bboxMap.get(Number(frame.timestamp)) || null; const markedName = `${damageId}_${Number(frame.timestamp).toFixed(2).replace('.', '_')}s.jpg`; const markedPath = path.join(workspace.workspacePath, 'marked_frames', markedName); const markResult = await markImage({ sourcePath: frame.path, targetPath: markedPath, bbox, label: `${damage.location}${damage.type}`, env }); if (bbox) totalMarked += 1; const finalMarkedPath = markResult.targetPath || markedPath; const finalRelativePath = path.relative(workspace.workspacePath, finalMarkedPath).replace(/\\/g, '/'); markedFrames.push({ timestamp: frame.timestamp, sourcePath: frame.path, sourceRelativePath: frame.relativePath, markedPath: finalMarkedPath, markedRelativePath: finalRelativePath, bbox: bbox ? { normalized: bbox, pixel: markResult.pixel } : null, groundingRaw: groundingText, groundingStatus: bbox ? 'grounded' : 'no_bbox', }); } annotations.push({ damageId, damage, markedFrames }); } writeJson(path.join(workspace.workspacePath, 'grounding.json'), { taskId: workspace.taskId, workspacePath: workspace.workspacePath, totalDamages: damages.length, totalMarked, annotations, generatedAt: new Date().toISOString(), }); emit('ground_damages', 'grounding完成并写入grounding.json', 'completed', { totalDamages: damages.length, totalMarked }); return { success: true, totalDamages: damages.length, totalMarked }; } (async () => { try { process.stdout.write(JSON.stringify(await run(readInput()))); } catch (err) { process.stdout.write(JSON.stringify({ success: false, error: err && err.message ? err.message : String(err), })); } })();