#!/usr/bin/env node 'use strict'; const fs = require('node:fs'); const os = require('node:os'); const path = require('node:path'); const DEDUPE_PROMPT = `你是汽车损伤去重专家。以下是AI在视频不同帧上检测到的多处损伤记录,由于视频连续帧中同一物理损伤可能被多次报告,请判断哪些记录实际上是同一处物理损伤。 合并规则: 1. 位置相同或相近的同类型损伤视为同一处。 2. 位置描述措辞略有不同但指同一部位的也应合并。 3. 时间相近且位置类型一致的大概率是同一处。 4. 不同位置或不同类型的损伤不应合并。 5. 同一批次中连续时间检测到的同位置同类型损伤,应该合并。 只输出JSON:{"groups":[{"merged_location":"左前翼子板","merged_type":"划痕","merged_severity":"轻微","merged_description":"合并后的综合描述","member_indices":[0,3,5]}]}`; const BEST_FRAME_PROMPT = `以下是同一处车辆旧伤在不同时刻的多张标注画面,损伤区域已用红色方框标出。 请综合以下标准,选出最能清晰展示该损伤的帧: 1. 红框标注精准,不偏移、不过大、不遗漏。 2. 损伤区域清晰可见,不模糊、不被遮挡。 3. 拍摄角度合适,能看到损伤全貌。 4. 光照条件好,不过暗、不过曝、无强反光。 只输出JSON:{"best_timestamps":[12.4],"reasons":["损伤清晰且红框准确"]}`; function emit(stage, message, status = 'running', extra = {}) { process.stderr.write(JSON.stringify({ type: 'process_event', stage, message, status, timestamp: new Date().toISOString(), ...extra, }) + '\n'); } function readInput() { const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim(); return raw ? JSON.parse(raw) : {}; } function workspaceRoot(env) { if (env.VEHICLE_SCRATCH_WORKSPACE_ROOT) return path.resolve(env.VEHICLE_SCRATCH_WORKSPACE_ROOT); if (env.RZYX_AI_WORKSPACE_ROOT) { const root = path.resolve(env.RZYX_AI_WORKSPACE_ROOT); return path.basename(root) === 'vehicle-scratch-inspection' ? root : path.join(root, 'vehicle-scratch-inspection'); } if (env.RZYX_AI_DATA_DIR) return path.join(path.resolve(env.RZYX_AI_DATA_DIR), 'workspace', 'vehicle-scratch-inspection'); return path.join(os.tmpdir(), 'vehicle-scratch-inspection'); } function workspaceFor(taskId, env) { const safe = String(taskId || '').trim(); if (!/^[a-zA-Z0-9_-]{1,80}$/.test(safe)) throw new Error('taskId非法或为空'); const workspacePath = path.join(workspaceRoot(env), safe); if (!fs.existsSync(workspacePath)) throw new Error(`workspace不存在: ${workspacePath}`); return { taskId: safe, workspacePath }; } function readJson(filePath) { return JSON.parse(fs.readFileSync(filePath, 'utf8')); } function writeJson(filePath, value) { fs.mkdirSync(path.dirname(filePath), { recursive: true }); fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8'); } function parseJsonLoose(value) { if (!value) return null; if (typeof value === 'object') return value; const text = String(value).trim(); const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i); const body = fenced ? fenced[1].trim() : text; try { return JSON.parse(body); } catch {} const start = body.indexOf('{'); const end = body.lastIndexOf('}'); if (start >= 0 && end > start) { try { return JSON.parse(body.slice(start, end + 1)); } catch {} } return null; } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } function clampNumber(value, min, max, fallback) { const number = Number(value); if (!Number.isFinite(number)) return fallback; return Math.max(min, Math.min(max, number)); } function normalizeLocation(value) { return String(value || '') .replace(/\s+/g, '') .replace(/[左右]侧/g, match => match[0]) .replace(/车门/g, '门') .replace(/前叶子板/g, '前翼子板') .replace(/后叶子板/g, '后翼子板') .replace(/保险杆/g, '保险杠') .replace(/漆面/g, '') .trim(); } function severityRank(value) { const text = String(value || ''); if (text.includes('严重')) return 3; if (text.includes('中')) return 2; return 1; } function normalizeAnnotation(annotation, index) { const damage = annotation.damage || annotation; return { index, damageId: annotation.damageId || damage.id || `damage_${String(index + 1).padStart(3, '0')}`, location: damage.location || damage.part || annotation.location || annotation.part || '未知部位', type: damage.type || annotation.type || '旧伤', severity: damage.severity || annotation.severity || '轻微', description: damage.description || annotation.description || '', timeSecond: Number(damage.timeSecond ?? damage.time_second ?? damage.timestamp ?? annotation.timestamp ?? 0), markedFrames: annotation.markedFrames || [], raw: annotation, }; } function heuristicDedupe(annotations) { const groups = []; for (const item of annotations) { const key = `${normalizeLocation(item.location)}|${String(item.type).trim()}`; let group = groups.find(candidate => candidate.key === key); if (!group) { group = { key, members: [] }; groups.push(group); } group.members.push(item); } return groups.map(group => buildGroup(group.members)); } function buildGroup(members, override = {}) { const sorted = members.slice().sort((a, b) => severityRank(b.severity) - severityRank(a.severity)); const main = sorted[0] || members[0]; const markedFrames = members.flatMap(item => item.markedFrames || []); const descriptions = [...new Set(members.map(item => item.description).filter(Boolean))]; return { mergedLocation: override.merged_location || override.mergedLocation || main.location, mergedType: override.merged_type || override.mergedType || main.type, mergedSeverity: override.merged_severity || override.mergedSeverity || main.severity, mergedDescription: override.merged_description || override.mergedDescription || descriptions.join(';') || main.description, memberIndices: members.map(item => item.index), sourceDamageIds: members.map(item => item.damageId), markedFrames, }; } function groupsFromModelResult(raw, annotations) { const parsed = parseJsonLoose(raw); const groups = Array.isArray(parsed?.groups) ? parsed.groups : null; if (!groups) return null; return groups.map(group => { const members = (group.member_indices || group.memberIndices || []) .map(index => annotations[Number(index)]) .filter(Boolean); return members.length ? buildGroup(members, group) : null; }).filter(Boolean); } function imageContent(filePath) { const ext = path.extname(filePath).toLowerCase(); const mimeType = ext === '.png' ? 'image/png' : ext === '.webp' ? 'image/webp' : ext === '.svg' ? 'image/svg+xml' : 'image/jpeg'; return { type: 'image_url', image_url: { url: `data:${mimeType};base64,${fs.readFileSync(filePath).toString('base64')}` }, }; } async function callModel({ env, content, maxTokens = 2048 }) { const apiKey = env.ARK_API_KEY; if (!apiKey) throw new Error('缺少ARK_API_KEY'); const apiUrl = env.ARK_API_URL || 'https://ark.cn-beijing.volces.com/api/v3/chat/completions'; const model = env.BEST_FRAME_MODEL || 'doubao-seed-2-0-pro-260215'; for (let attempt = 1; attempt <= 3; attempt += 1) { try { emit('best_frame_model', '调用最佳帧模型', 'running', { model, attempt, totalAttempts: 3 }); const response = await fetch(apiUrl, { method: 'POST', headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' }, body: JSON.stringify({ model, messages: [{ role: 'user', content }], max_tokens: maxTokens, temperature: 0.1, }), signal: AbortSignal.timeout(600000), }); const text = await response.text(); let body; try { body = JSON.parse(text); } catch { body = { raw: text }; } if (!response.ok) throw new Error(JSON.stringify(body).slice(0, 1000)); return body.choices?.[0]?.message?.content || ''; } catch (err) { const msg = err && err.message ? err.message : String(err); if (attempt < 3 && /429|TooManyRequests|timeout|ECONNRESET|ETIMEDOUT/i.test(msg)) { await sleep(2500 * attempt); continue; } throw err; } } throw new Error('最佳帧模型调用失败'); } async function dedupeWithModel({ env, annotations }) { const records = annotations.map(item => ({ index: item.index, damageId: item.damageId, timeSecond: item.timeSecond, location: item.location, type: item.type, severity: item.severity, description: item.description, })); const content = [ { type: 'text', text: `${DEDUPE_PROMPT}\n\n损伤记录:\n${JSON.stringify(records, null, 2)}` }, ]; const text = await callModel({ env, content, maxTokens: 4096 }); const groups = groupsFromModelResult(text, annotations); if (!groups?.length) throw new Error('模型去重结果无法解析'); return groups; } function frameScore(frame) { let score = 0; if (frame.bbox?.normalized) { score += 100; const b = frame.bbox.normalized; const area = Math.max(0, b.x2 - b.x1) * Math.max(0, b.y2 - b.y1); if (area >= 1000 && area <= 180000) score += 40; if (b.x1 > 5 && b.y1 > 5 && b.x2 < 995 && b.y2 < 995) score += 20; const cx = (b.x1 + b.x2) / 2; const cy = (b.y1 + b.y2) / 2; score += Math.max(0, 30 - Math.abs(cx - 500) / 20 - Math.abs(cy - 500) / 20); } try { score += Math.min(30, fs.statSync(frame.markedPath || frame.path).size / 50000); } catch {} return score; } function mimeTypeFor(filePath) { const ext = path.extname(filePath || '').toLowerCase(); if (ext === '.png') return 'image/png'; if (ext === '.webp') return 'image/webp'; if (ext === '.svg') return 'image/svg+xml'; return 'image/jpeg'; } function chooseHeuristic(frames, topN) { if (frames.length <= 3) return frames; return frames.slice().sort((a, b) => frameScore(b) - frameScore(a)).slice(0, topN); } function normalizeTimestamp(value) { const number = Number(value); return Number.isFinite(number) ? Number(number.toFixed(2)) : null; } function frameByTimestamp(frames, timestamp) { const target = normalizeTimestamp(timestamp); if (target === null) return null; return frames.find(frame => Math.abs(Number(frame.timestamp || 0) - target) <= 0.05) || frames.find(frame => Math.abs(Number(frame.timestamp || 0) - target) <= 0.2) || null; } function bestFrameResultFor(raw, index, damageId) { const parsed = parseJsonLoose(raw); if (!parsed) return null; if (Array.isArray(parsed)) return parsed[index] || null; if (Array.isArray(parsed.results)) return parsed.results[index] || null; if (Array.isArray(parsed.bestFrameResults)) return parsed.bestFrameResults[index] || null; if (parsed[damageId]) return parsed[damageId]; if (parsed[String(index)]) return parsed[String(index)]; return parsed; } function chooseFromModelResult(frames, raw, topN) { const parsed = parseJsonLoose(raw) || raw; const timestamps = parsed?.best_timestamps || parsed?.bestTimestamps || parsed?.selected_timestamps || parsed?.selectedTimestamps; if (!Array.isArray(timestamps)) return null; const selected = []; for (const timestamp of timestamps) { const frame = frameByTimestamp(frames, timestamp); if (frame && !selected.includes(frame)) selected.push(frame); } return selected.length ? selected.slice(0, topN) : null; } async function chooseWithModel({ env, frames, group, topN, damageId }) { const candidates = frames .slice() .sort((a, b) => frameScore(b) - frameScore(a)) .slice(0, 12); const content = [ { type: 'text', text: `${BEST_FRAME_PROMPT}\n\n本次需要选择最佳${topN}张。损伤:${group.mergedLocation} ${group.mergedType},严重程度:${group.mergedSeverity},描述:${group.mergedDescription || ''}。候选帧如下。` }, ]; for (const frame of candidates) { content.push({ type: 'text', text: `[${Number(frame.timestamp || 0).toFixed(2)} second] ${damageId}` }); content.push(imageContent(frame.markedPath || frame.path)); } const text = await callModel({ env, content, maxTokens: 2048 }); const selected = chooseFromModelResult(candidates, text, topN); if (!selected?.length) throw new Error('模型最佳帧结果无法解析'); return selected; } function publicUrl(workspace, relativePath) { return `/workspace/vehicle-scratch-inspection/${workspace.taskId}/${relativePath.replace(/\\/g, '/')}`; } function copyBestFrame(workspace, frame, damageId, order) { const source = frame.markedPath || frame.path; const ext = path.extname(source) || '.jpg'; const name = `best_${String(order).padStart(2, '0')}_${damageId}_${Number(frame.timestamp || 0).toFixed(2).replace('.', '_')}s${ext}`; const relativePath = `best_frames/${name}`; const target = path.join(workspace.workspacePath, relativePath); fs.mkdirSync(path.dirname(target), { recursive: true }); fs.copyFileSync(source, target); return { timestamp: Number(frame.timestamp || 0), path: target, relativePath, url: publicUrl(workspace, relativePath), bbox: frame.bbox?.normalized || null, sourceRelativePath: frame.markedRelativePath || frame.sourceRelativePath || null, }; } async function run(input, env = process.env) { if (!input?.taskId) throw new Error('taskId不能为空'); const workspace = workspaceFor(input.taskId, env); const grounding = readJson(path.join(workspace.workspacePath, 'grounding.json')); const annotations = (grounding.annotations || []).map(normalizeAnnotation); const topN = Math.round(clampNumber(input.topN, 1, 5, 1)); emit('select_best_frames', '开始去重合并旧伤', 'running', { annotationCount: annotations.length }); let groups = groupsFromModelResult(input.dedupeResult || input.modelResult, annotations); if (!groups && env.ARK_API_KEY && annotations.length > 1) { try { groups = await dedupeWithModel({ env, annotations }); } catch (err) { emit('select_best_frames', '模型去重失败,使用启发式去重兜底', 'running', { error: err && err.message ? err.message : String(err) }); } } if (!groups) groups = heuristicDedupe(annotations); const damages = []; const bestFrameImages = []; for (let index = 0; index < groups.length; index += 1) { const group = groups[index]; const damageId = `damage_${String(index + 1).padStart(3, '0')}`; const availableFrames = (group.markedFrames || []).filter(frame => frame && (frame.markedPath || frame.path)); let selected = null; const externalBestFrameResult = bestFrameResultFor(input.bestFrameResults, index, damageId); if (externalBestFrameResult) selected = chooseFromModelResult(availableFrames, externalBestFrameResult, topN); if (!selected && availableFrames.length > 3 && env.ARK_API_KEY) { try { selected = await chooseWithModel({ env, frames: availableFrames, group, topN, damageId }); } catch (err) { emit('select_best_frames', '模型选帧失败,使用启发式评分兜底', 'running', { damageId, error: err && err.message ? err.message : String(err) }); } } if (!selected) selected = chooseHeuristic(availableFrames, topN); const bestFrames = []; for (const frame of selected) { const image = copyBestFrame(workspace, frame, damageId, bestFrameImages.length + 1); bestFrames.push(image); bestFrameImages.push({ ...image, mimeType: mimeTypeFor(image.path), label: `旧伤最佳展示帧 ${damageId}`, purpose: 'best_damage_frame', damageId, }); } damages.push({ id: damageId, location: group.mergedLocation, type: group.mergedType, severity: group.mergedSeverity, description: group.mergedDescription, memberIndices: group.memberIndices, sourceDamageIds: group.sourceDamageIds, timestamps: availableFrames.map(frame => Number(frame.timestamp || 0)), bestFrames, }); } const output = { taskId: workspace.taskId, workspacePath: workspace.workspacePath, totalDamages: damages.length, bestFrameCount: bestFrameImages.length, damages, bestFrameImages, generatedAt: new Date().toISOString(), }; writeJson(path.join(workspace.workspacePath, 'best_frames.json'), output); emit('select_best_frames', '最佳帧筛选完成并写入best_frames.json', 'completed', { totalDamages: damages.length, bestFrameCount: bestFrameImages.length }); return { success: true, totalDamages: damages.length, bestFrameCount: bestFrameImages.length }; } (async () => { try { process.stdout.write(JSON.stringify(await run(readInput()))); } catch (err) { process.stdout.write(JSON.stringify({ success: false, error: err && err.message ? err.message : String(err), })); } })();