2026-05-20 21:39:12 +08:00

428 lines
17 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
'use strict';
const fs = require('node:fs');
const os = require('node:os');
const path = require('node:path');
const DEDUPE_PROMPT = `你是汽车损伤去重专家。以下是AI在视频不同帧上检测到的多处损伤记录由于视频连续帧中同一物理损伤可能被多次报告请判断哪些记录实际上是同一处物理损伤。
合并规则:
1. 位置相同或相近的同类型损伤视为同一处。
2. 位置描述措辞略有不同但指同一部位的也应合并。
3. 时间相近且位置类型一致的大概率是同一处。
4. 不同位置或不同类型的损伤不应合并。
5. 同一批次中连续时间检测到的同位置同类型损伤,应该合并。
只输出JSON{"groups":[{"merged_location":"左前翼子板","merged_type":"划痕","merged_severity":"轻微","merged_description":"合并后的综合描述","member_indices":[0,3,5]}]}`;
const BEST_FRAME_PROMPT = `以下是同一处车辆旧伤在不同时刻的多张标注画面,损伤区域已用红色方框标出。
请综合以下标准,选出最能清晰展示该损伤的帧:
1. 红框标注精准,不偏移、不过大、不遗漏。
2. 损伤区域清晰可见,不模糊、不被遮挡。
3. 拍摄角度合适,能看到损伤全貌。
4. 光照条件好,不过暗、不过曝、无强反光。
只输出JSON{"best_timestamps":[12.4],"reasons":["损伤清晰且红框准确"]}`;
function emit(stage, message, status = 'running', extra = {}) {
process.stderr.write(JSON.stringify({
type: 'process_event',
stage,
message,
status,
timestamp: new Date().toISOString(),
...extra,
}) + '\n');
}
function readInput() {
const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim();
return raw ? JSON.parse(raw) : {};
}
function workspaceRoot(env) {
if (env.VEHICLE_SCRATCH_WORKSPACE_ROOT) return path.resolve(env.VEHICLE_SCRATCH_WORKSPACE_ROOT);
if (env.RZYX_AI_WORKSPACE_ROOT) {
const root = path.resolve(env.RZYX_AI_WORKSPACE_ROOT);
return path.basename(root) === 'vehicle-scratch-inspection' ? root : path.join(root, 'vehicle-scratch-inspection');
}
if (env.RZYX_AI_DATA_DIR) return path.join(path.resolve(env.RZYX_AI_DATA_DIR), 'workspace', 'vehicle-scratch-inspection');
return path.join(os.tmpdir(), 'vehicle-scratch-inspection');
}
function workspaceFor(taskId, env) {
const safe = String(taskId || '').trim();
if (!/^[a-zA-Z0-9_-]{1,80}$/.test(safe)) throw new Error('taskId非法或为空');
const workspacePath = path.join(workspaceRoot(env), safe);
if (!fs.existsSync(workspacePath)) throw new Error(`workspace不存在: ${workspacePath}`);
return { taskId: safe, workspacePath };
}
function readJson(filePath) {
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
}
function writeJson(filePath, value) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8');
}
function parseJsonLoose(value) {
if (!value) return null;
if (typeof value === 'object') return value;
const text = String(value).trim();
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
const body = fenced ? fenced[1].trim() : text;
try { return JSON.parse(body); } catch {}
const start = body.indexOf('{');
const end = body.lastIndexOf('}');
if (start >= 0 && end > start) {
try { return JSON.parse(body.slice(start, end + 1)); } catch {}
}
return null;
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function clampNumber(value, min, max, fallback) {
const number = Number(value);
if (!Number.isFinite(number)) return fallback;
return Math.max(min, Math.min(max, number));
}
function normalizeLocation(value) {
return String(value || '')
.replace(/\s+/g, '')
.replace(/[左右]侧/g, match => match[0])
.replace(/车门/g, '门')
.replace(/前叶子板/g, '前翼子板')
.replace(/后叶子板/g, '后翼子板')
.replace(/保险杆/g, '保险杠')
.replace(/漆面/g, '')
.trim();
}
function severityRank(value) {
const text = String(value || '');
if (text.includes('严重')) return 3;
if (text.includes('中')) return 2;
return 1;
}
function normalizeAnnotation(annotation, index) {
const damage = annotation.damage || annotation;
return {
index,
damageId: annotation.damageId || damage.id || `damage_${String(index + 1).padStart(3, '0')}`,
location: damage.location || damage.part || annotation.location || annotation.part || '未知部位',
type: damage.type || annotation.type || '旧伤',
severity: damage.severity || annotation.severity || '轻微',
description: damage.description || annotation.description || '',
timeSecond: Number(damage.timeSecond ?? damage.time_second ?? damage.timestamp ?? annotation.timestamp ?? 0),
markedFrames: annotation.markedFrames || [],
raw: annotation,
};
}
function heuristicDedupe(annotations) {
const groups = [];
for (const item of annotations) {
const key = `${normalizeLocation(item.location)}|${String(item.type).trim()}`;
let group = groups.find(candidate => candidate.key === key);
if (!group) {
group = { key, members: [] };
groups.push(group);
}
group.members.push(item);
}
return groups.map(group => buildGroup(group.members));
}
function buildGroup(members, override = {}) {
const sorted = members.slice().sort((a, b) => severityRank(b.severity) - severityRank(a.severity));
const main = sorted[0] || members[0];
const markedFrames = members.flatMap(item => item.markedFrames || []);
const descriptions = [...new Set(members.map(item => item.description).filter(Boolean))];
return {
mergedLocation: override.merged_location || override.mergedLocation || main.location,
mergedType: override.merged_type || override.mergedType || main.type,
mergedSeverity: override.merged_severity || override.mergedSeverity || main.severity,
mergedDescription: override.merged_description || override.mergedDescription || descriptions.join('') || main.description,
memberIndices: members.map(item => item.index),
sourceDamageIds: members.map(item => item.damageId),
markedFrames,
};
}
function groupsFromModelResult(raw, annotations) {
const parsed = parseJsonLoose(raw);
const groups = Array.isArray(parsed?.groups) ? parsed.groups : null;
if (!groups) return null;
return groups.map(group => {
const members = (group.member_indices || group.memberIndices || [])
.map(index => annotations[Number(index)])
.filter(Boolean);
return members.length ? buildGroup(members, group) : null;
}).filter(Boolean);
}
function imageContent(filePath) {
const ext = path.extname(filePath).toLowerCase();
const mimeType = ext === '.png' ? 'image/png' : ext === '.webp' ? 'image/webp' : ext === '.svg' ? 'image/svg+xml' : 'image/jpeg';
return {
type: 'image_url',
image_url: { url: `data:${mimeType};base64,${fs.readFileSync(filePath).toString('base64')}` },
};
}
async function callModel({ env, content, maxTokens = 2048 }) {
const apiKey = env.ARK_API_KEY;
if (!apiKey) throw new Error('缺少ARK_API_KEY');
const apiUrl = env.ARK_API_URL || 'https://ark.cn-beijing.volces.com/api/v3/chat/completions';
const model = env.BEST_FRAME_MODEL || 'doubao-seed-2-0-pro-260215';
for (let attempt = 1; attempt <= 3; attempt += 1) {
try {
emit('best_frame_model', '调用最佳帧模型', 'running', { model, attempt, totalAttempts: 3 });
const response = await fetch(apiUrl, {
method: 'POST',
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages: [{ role: 'user', content }],
max_tokens: maxTokens,
temperature: 0.1,
}),
signal: AbortSignal.timeout(600000),
});
const text = await response.text();
let body;
try { body = JSON.parse(text); } catch { body = { raw: text }; }
if (!response.ok) throw new Error(JSON.stringify(body).slice(0, 1000));
return body.choices?.[0]?.message?.content || '';
} catch (err) {
const msg = err && err.message ? err.message : String(err);
if (attempt < 3 && /429|TooManyRequests|timeout|ECONNRESET|ETIMEDOUT/i.test(msg)) {
await sleep(2500 * attempt);
continue;
}
throw err;
}
}
throw new Error('最佳帧模型调用失败');
}
async function dedupeWithModel({ env, annotations }) {
const records = annotations.map(item => ({
index: item.index,
damageId: item.damageId,
timeSecond: item.timeSecond,
location: item.location,
type: item.type,
severity: item.severity,
description: item.description,
}));
const content = [
{ type: 'text', text: `${DEDUPE_PROMPT}\n\n损伤记录:\n${JSON.stringify(records, null, 2)}` },
];
const text = await callModel({ env, content, maxTokens: 4096 });
const groups = groupsFromModelResult(text, annotations);
if (!groups?.length) throw new Error('模型去重结果无法解析');
return groups;
}
function frameScore(frame) {
let score = 0;
if (frame.bbox?.normalized) {
score += 100;
const b = frame.bbox.normalized;
const area = Math.max(0, b.x2 - b.x1) * Math.max(0, b.y2 - b.y1);
if (area >= 1000 && area <= 180000) score += 40;
if (b.x1 > 5 && b.y1 > 5 && b.x2 < 995 && b.y2 < 995) score += 20;
const cx = (b.x1 + b.x2) / 2;
const cy = (b.y1 + b.y2) / 2;
score += Math.max(0, 30 - Math.abs(cx - 500) / 20 - Math.abs(cy - 500) / 20);
}
try {
score += Math.min(30, fs.statSync(frame.markedPath || frame.path).size / 50000);
} catch {}
return score;
}
function mimeTypeFor(filePath) {
const ext = path.extname(filePath || '').toLowerCase();
if (ext === '.png') return 'image/png';
if (ext === '.webp') return 'image/webp';
if (ext === '.svg') return 'image/svg+xml';
return 'image/jpeg';
}
function chooseHeuristic(frames, topN) {
if (frames.length <= 3) return frames;
return frames.slice().sort((a, b) => frameScore(b) - frameScore(a)).slice(0, topN);
}
function normalizeTimestamp(value) {
const number = Number(value);
return Number.isFinite(number) ? Number(number.toFixed(2)) : null;
}
function frameByTimestamp(frames, timestamp) {
const target = normalizeTimestamp(timestamp);
if (target === null) return null;
return frames.find(frame => Math.abs(Number(frame.timestamp || 0) - target) <= 0.05)
|| frames.find(frame => Math.abs(Number(frame.timestamp || 0) - target) <= 0.2)
|| null;
}
function bestFrameResultFor(raw, index, damageId) {
const parsed = parseJsonLoose(raw);
if (!parsed) return null;
if (Array.isArray(parsed)) return parsed[index] || null;
if (Array.isArray(parsed.results)) return parsed.results[index] || null;
if (Array.isArray(parsed.bestFrameResults)) return parsed.bestFrameResults[index] || null;
if (parsed[damageId]) return parsed[damageId];
if (parsed[String(index)]) return parsed[String(index)];
return parsed;
}
function chooseFromModelResult(frames, raw, topN) {
const parsed = parseJsonLoose(raw) || raw;
const timestamps = parsed?.best_timestamps || parsed?.bestTimestamps || parsed?.selected_timestamps || parsed?.selectedTimestamps;
if (!Array.isArray(timestamps)) return null;
const selected = [];
for (const timestamp of timestamps) {
const frame = frameByTimestamp(frames, timestamp);
if (frame && !selected.includes(frame)) selected.push(frame);
}
return selected.length ? selected.slice(0, topN) : null;
}
async function chooseWithModel({ env, frames, group, topN, damageId }) {
const candidates = frames
.slice()
.sort((a, b) => frameScore(b) - frameScore(a))
.slice(0, 12);
const content = [
{ type: 'text', text: `${BEST_FRAME_PROMPT}\n\n本次需要选择最佳${topN}张。损伤:${group.mergedLocation} ${group.mergedType},严重程度:${group.mergedSeverity},描述:${group.mergedDescription || ''}。候选帧如下。` },
];
for (const frame of candidates) {
content.push({ type: 'text', text: `[${Number(frame.timestamp || 0).toFixed(2)} second] ${damageId}` });
content.push(imageContent(frame.markedPath || frame.path));
}
const text = await callModel({ env, content, maxTokens: 2048 });
const selected = chooseFromModelResult(candidates, text, topN);
if (!selected?.length) throw new Error('模型最佳帧结果无法解析');
return selected;
}
function publicUrl(workspace, relativePath) {
return `/workspace/vehicle-scratch-inspection/${workspace.taskId}/${relativePath.replace(/\\/g, '/')}`;
}
function copyBestFrame(workspace, frame, damageId, order) {
const source = frame.markedPath || frame.path;
const ext = path.extname(source) || '.jpg';
const name = `best_${String(order).padStart(2, '0')}_${damageId}_${Number(frame.timestamp || 0).toFixed(2).replace('.', '_')}s${ext}`;
const relativePath = `best_frames/${name}`;
const target = path.join(workspace.workspacePath, relativePath);
fs.mkdirSync(path.dirname(target), { recursive: true });
fs.copyFileSync(source, target);
return {
timestamp: Number(frame.timestamp || 0),
path: target,
relativePath,
url: publicUrl(workspace, relativePath),
bbox: frame.bbox?.normalized || null,
sourceRelativePath: frame.markedRelativePath || frame.sourceRelativePath || null,
};
}
async function run(input, env = process.env) {
if (!input?.taskId) throw new Error('taskId不能为空');
const workspace = workspaceFor(input.taskId, env);
const grounding = readJson(path.join(workspace.workspacePath, 'grounding.json'));
const annotations = (grounding.annotations || []).map(normalizeAnnotation);
const topN = Math.round(clampNumber(input.topN, 1, 5, 1));
emit('select_best_frames', '开始去重合并旧伤', 'running', { annotationCount: annotations.length });
let groups = groupsFromModelResult(input.dedupeResult || input.modelResult, annotations);
if (!groups && env.ARK_API_KEY && annotations.length > 1) {
try {
groups = await dedupeWithModel({ env, annotations });
} catch (err) {
emit('select_best_frames', '模型去重失败,使用启发式去重兜底', 'running', { error: err && err.message ? err.message : String(err) });
}
}
if (!groups) groups = heuristicDedupe(annotations);
const damages = [];
const bestFrameImages = [];
for (let index = 0; index < groups.length; index += 1) {
const group = groups[index];
const damageId = `damage_${String(index + 1).padStart(3, '0')}`;
const availableFrames = (group.markedFrames || []).filter(frame => frame && (frame.markedPath || frame.path));
let selected = null;
const externalBestFrameResult = bestFrameResultFor(input.bestFrameResults, index, damageId);
if (externalBestFrameResult) selected = chooseFromModelResult(availableFrames, externalBestFrameResult, topN);
if (!selected && availableFrames.length > 3 && env.ARK_API_KEY) {
try {
selected = await chooseWithModel({ env, frames: availableFrames, group, topN, damageId });
} catch (err) {
emit('select_best_frames', '模型选帧失败,使用启发式评分兜底', 'running', { damageId, error: err && err.message ? err.message : String(err) });
}
}
if (!selected) selected = chooseHeuristic(availableFrames, topN);
const bestFrames = [];
for (const frame of selected) {
const image = copyBestFrame(workspace, frame, damageId, bestFrameImages.length + 1);
bestFrames.push(image);
bestFrameImages.push({
...image,
mimeType: mimeTypeFor(image.path),
label: `旧伤最佳展示帧 ${damageId}`,
purpose: 'best_damage_frame',
damageId,
});
}
damages.push({
id: damageId,
location: group.mergedLocation,
type: group.mergedType,
severity: group.mergedSeverity,
description: group.mergedDescription,
memberIndices: group.memberIndices,
sourceDamageIds: group.sourceDamageIds,
timestamps: availableFrames.map(frame => Number(frame.timestamp || 0)),
bestFrames,
});
}
const output = {
taskId: workspace.taskId,
workspacePath: workspace.workspacePath,
totalDamages: damages.length,
bestFrameCount: bestFrameImages.length,
damages,
bestFrameImages,
generatedAt: new Date().toISOString(),
};
writeJson(path.join(workspace.workspacePath, 'best_frames.json'), output);
emit('select_best_frames', '最佳帧筛选完成并写入best_frames.json', 'completed', { totalDamages: damages.length, bestFrameCount: bestFrameImages.length });
return { success: true, totalDamages: damages.length, bestFrameCount: bestFrameImages.length };
}
(async () => {
try {
process.stdout.write(JSON.stringify(await run(readInput())));
} catch (err) {
process.stdout.write(JSON.stringify({
success: false,
error: err && err.message ? err.message : String(err),
}));
}
})();