428 lines
17 KiB
JavaScript
428 lines
17 KiB
JavaScript
#!/usr/bin/env node
|
||
'use strict';
|
||
|
||
const fs = require('node:fs');
|
||
const os = require('node:os');
|
||
const path = require('node:path');
|
||
|
||
const DEDUPE_PROMPT = `你是汽车损伤去重专家。以下是AI在视频不同帧上检测到的多处损伤记录,由于视频连续帧中同一物理损伤可能被多次报告,请判断哪些记录实际上是同一处物理损伤。
|
||
|
||
合并规则:
|
||
1. 位置相同或相近的同类型损伤视为同一处。
|
||
2. 位置描述措辞略有不同但指同一部位的也应合并。
|
||
3. 时间相近且位置类型一致的大概率是同一处。
|
||
4. 不同位置或不同类型的损伤不应合并。
|
||
5. 同一批次中连续时间检测到的同位置同类型损伤,应该合并。
|
||
|
||
只输出JSON:{"groups":[{"merged_location":"左前翼子板","merged_type":"划痕","merged_severity":"轻微","merged_description":"合并后的综合描述","member_indices":[0,3,5]}]}`;
|
||
|
||
const BEST_FRAME_PROMPT = `以下是同一处车辆旧伤在不同时刻的多张标注画面,损伤区域已用红色方框标出。
|
||
|
||
请综合以下标准,选出最能清晰展示该损伤的帧:
|
||
1. 红框标注精准,不偏移、不过大、不遗漏。
|
||
2. 损伤区域清晰可见,不模糊、不被遮挡。
|
||
3. 拍摄角度合适,能看到损伤全貌。
|
||
4. 光照条件好,不过暗、不过曝、无强反光。
|
||
|
||
只输出JSON:{"best_timestamps":[12.4],"reasons":["损伤清晰且红框准确"]}`;
|
||
|
||
function emit(stage, message, status = 'running', extra = {}) {
|
||
process.stderr.write(JSON.stringify({
|
||
type: 'process_event',
|
||
stage,
|
||
message,
|
||
status,
|
||
timestamp: new Date().toISOString(),
|
||
...extra,
|
||
}) + '\n');
|
||
}
|
||
|
||
function readInput() {
|
||
const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim();
|
||
return raw ? JSON.parse(raw) : {};
|
||
}
|
||
|
||
function workspaceRoot(env) {
|
||
if (env.VEHICLE_SCRATCH_WORKSPACE_ROOT) return path.resolve(env.VEHICLE_SCRATCH_WORKSPACE_ROOT);
|
||
if (env.RZYX_AI_WORKSPACE_ROOT) {
|
||
const root = path.resolve(env.RZYX_AI_WORKSPACE_ROOT);
|
||
return path.basename(root) === 'vehicle-scratch-inspection' ? root : path.join(root, 'vehicle-scratch-inspection');
|
||
}
|
||
if (env.RZYX_AI_DATA_DIR) return path.join(path.resolve(env.RZYX_AI_DATA_DIR), 'workspace', 'vehicle-scratch-inspection');
|
||
return path.join(os.tmpdir(), 'vehicle-scratch-inspection');
|
||
}
|
||
|
||
function workspaceFor(taskId, env) {
|
||
const safe = String(taskId || '').trim();
|
||
if (!/^[a-zA-Z0-9_-]{1,80}$/.test(safe)) throw new Error('taskId非法或为空');
|
||
const workspacePath = path.join(workspaceRoot(env), safe);
|
||
if (!fs.existsSync(workspacePath)) throw new Error(`workspace不存在: ${workspacePath}`);
|
||
return { taskId: safe, workspacePath };
|
||
}
|
||
|
||
function readJson(filePath) {
|
||
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||
}
|
||
|
||
function writeJson(filePath, value) {
|
||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8');
|
||
}
|
||
|
||
function parseJsonLoose(value) {
|
||
if (!value) return null;
|
||
if (typeof value === 'object') return value;
|
||
const text = String(value).trim();
|
||
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
||
const body = fenced ? fenced[1].trim() : text;
|
||
try { return JSON.parse(body); } catch {}
|
||
const start = body.indexOf('{');
|
||
const end = body.lastIndexOf('}');
|
||
if (start >= 0 && end > start) {
|
||
try { return JSON.parse(body.slice(start, end + 1)); } catch {}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function sleep(ms) {
|
||
return new Promise(resolve => setTimeout(resolve, ms));
|
||
}
|
||
|
||
function clampNumber(value, min, max, fallback) {
|
||
const number = Number(value);
|
||
if (!Number.isFinite(number)) return fallback;
|
||
return Math.max(min, Math.min(max, number));
|
||
}
|
||
|
||
function normalizeLocation(value) {
|
||
return String(value || '')
|
||
.replace(/\s+/g, '')
|
||
.replace(/[左右]侧/g, match => match[0])
|
||
.replace(/车门/g, '门')
|
||
.replace(/前叶子板/g, '前翼子板')
|
||
.replace(/后叶子板/g, '后翼子板')
|
||
.replace(/保险杆/g, '保险杠')
|
||
.replace(/漆面/g, '')
|
||
.trim();
|
||
}
|
||
|
||
function severityRank(value) {
|
||
const text = String(value || '');
|
||
if (text.includes('严重')) return 3;
|
||
if (text.includes('中')) return 2;
|
||
return 1;
|
||
}
|
||
|
||
function normalizeAnnotation(annotation, index) {
|
||
const damage = annotation.damage || annotation;
|
||
return {
|
||
index,
|
||
damageId: annotation.damageId || damage.id || `damage_${String(index + 1).padStart(3, '0')}`,
|
||
location: damage.location || damage.part || annotation.location || annotation.part || '未知部位',
|
||
type: damage.type || annotation.type || '旧伤',
|
||
severity: damage.severity || annotation.severity || '轻微',
|
||
description: damage.description || annotation.description || '',
|
||
timeSecond: Number(damage.timeSecond ?? damage.time_second ?? damage.timestamp ?? annotation.timestamp ?? 0),
|
||
markedFrames: annotation.markedFrames || [],
|
||
raw: annotation,
|
||
};
|
||
}
|
||
|
||
function heuristicDedupe(annotations) {
|
||
const groups = [];
|
||
for (const item of annotations) {
|
||
const key = `${normalizeLocation(item.location)}|${String(item.type).trim()}`;
|
||
let group = groups.find(candidate => candidate.key === key);
|
||
if (!group) {
|
||
group = { key, members: [] };
|
||
groups.push(group);
|
||
}
|
||
group.members.push(item);
|
||
}
|
||
return groups.map(group => buildGroup(group.members));
|
||
}
|
||
|
||
function buildGroup(members, override = {}) {
|
||
const sorted = members.slice().sort((a, b) => severityRank(b.severity) - severityRank(a.severity));
|
||
const main = sorted[0] || members[0];
|
||
const markedFrames = members.flatMap(item => item.markedFrames || []);
|
||
const descriptions = [...new Set(members.map(item => item.description).filter(Boolean))];
|
||
return {
|
||
mergedLocation: override.merged_location || override.mergedLocation || main.location,
|
||
mergedType: override.merged_type || override.mergedType || main.type,
|
||
mergedSeverity: override.merged_severity || override.mergedSeverity || main.severity,
|
||
mergedDescription: override.merged_description || override.mergedDescription || descriptions.join(';') || main.description,
|
||
memberIndices: members.map(item => item.index),
|
||
sourceDamageIds: members.map(item => item.damageId),
|
||
markedFrames,
|
||
};
|
||
}
|
||
|
||
function groupsFromModelResult(raw, annotations) {
|
||
const parsed = parseJsonLoose(raw);
|
||
const groups = Array.isArray(parsed?.groups) ? parsed.groups : null;
|
||
if (!groups) return null;
|
||
return groups.map(group => {
|
||
const members = (group.member_indices || group.memberIndices || [])
|
||
.map(index => annotations[Number(index)])
|
||
.filter(Boolean);
|
||
return members.length ? buildGroup(members, group) : null;
|
||
}).filter(Boolean);
|
||
}
|
||
|
||
function imageContent(filePath) {
|
||
const ext = path.extname(filePath).toLowerCase();
|
||
const mimeType = ext === '.png' ? 'image/png' : ext === '.webp' ? 'image/webp' : ext === '.svg' ? 'image/svg+xml' : 'image/jpeg';
|
||
return {
|
||
type: 'image_url',
|
||
image_url: { url: `data:${mimeType};base64,${fs.readFileSync(filePath).toString('base64')}` },
|
||
};
|
||
}
|
||
|
||
async function callModel({ env, content, maxTokens = 2048 }) {
|
||
const apiKey = env.ARK_API_KEY;
|
||
if (!apiKey) throw new Error('缺少ARK_API_KEY');
|
||
const apiUrl = env.ARK_API_URL || 'https://ark.cn-beijing.volces.com/api/v3/chat/completions';
|
||
const model = env.BEST_FRAME_MODEL || 'doubao-seed-2-0-pro-260215';
|
||
for (let attempt = 1; attempt <= 3; attempt += 1) {
|
||
try {
|
||
emit('best_frame_model', '调用最佳帧模型', 'running', { model, attempt, totalAttempts: 3 });
|
||
const response = await fetch(apiUrl, {
|
||
method: 'POST',
|
||
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
model,
|
||
messages: [{ role: 'user', content }],
|
||
max_tokens: maxTokens,
|
||
temperature: 0.1,
|
||
}),
|
||
signal: AbortSignal.timeout(600000),
|
||
});
|
||
const text = await response.text();
|
||
let body;
|
||
try { body = JSON.parse(text); } catch { body = { raw: text }; }
|
||
if (!response.ok) throw new Error(JSON.stringify(body).slice(0, 1000));
|
||
return body.choices?.[0]?.message?.content || '';
|
||
} catch (err) {
|
||
const msg = err && err.message ? err.message : String(err);
|
||
if (attempt < 3 && /429|TooManyRequests|timeout|ECONNRESET|ETIMEDOUT/i.test(msg)) {
|
||
await sleep(2500 * attempt);
|
||
continue;
|
||
}
|
||
throw err;
|
||
}
|
||
}
|
||
throw new Error('最佳帧模型调用失败');
|
||
}
|
||
|
||
async function dedupeWithModel({ env, annotations }) {
|
||
const records = annotations.map(item => ({
|
||
index: item.index,
|
||
damageId: item.damageId,
|
||
timeSecond: item.timeSecond,
|
||
location: item.location,
|
||
type: item.type,
|
||
severity: item.severity,
|
||
description: item.description,
|
||
}));
|
||
const content = [
|
||
{ type: 'text', text: `${DEDUPE_PROMPT}\n\n损伤记录:\n${JSON.stringify(records, null, 2)}` },
|
||
];
|
||
const text = await callModel({ env, content, maxTokens: 4096 });
|
||
const groups = groupsFromModelResult(text, annotations);
|
||
if (!groups?.length) throw new Error('模型去重结果无法解析');
|
||
return groups;
|
||
}
|
||
|
||
function frameScore(frame) {
|
||
let score = 0;
|
||
if (frame.bbox?.normalized) {
|
||
score += 100;
|
||
const b = frame.bbox.normalized;
|
||
const area = Math.max(0, b.x2 - b.x1) * Math.max(0, b.y2 - b.y1);
|
||
if (area >= 1000 && area <= 180000) score += 40;
|
||
if (b.x1 > 5 && b.y1 > 5 && b.x2 < 995 && b.y2 < 995) score += 20;
|
||
const cx = (b.x1 + b.x2) / 2;
|
||
const cy = (b.y1 + b.y2) / 2;
|
||
score += Math.max(0, 30 - Math.abs(cx - 500) / 20 - Math.abs(cy - 500) / 20);
|
||
}
|
||
try {
|
||
score += Math.min(30, fs.statSync(frame.markedPath || frame.path).size / 50000);
|
||
} catch {}
|
||
return score;
|
||
}
|
||
|
||
function mimeTypeFor(filePath) {
|
||
const ext = path.extname(filePath || '').toLowerCase();
|
||
if (ext === '.png') return 'image/png';
|
||
if (ext === '.webp') return 'image/webp';
|
||
if (ext === '.svg') return 'image/svg+xml';
|
||
return 'image/jpeg';
|
||
}
|
||
|
||
function chooseHeuristic(frames, topN) {
|
||
if (frames.length <= 3) return frames;
|
||
return frames.slice().sort((a, b) => frameScore(b) - frameScore(a)).slice(0, topN);
|
||
}
|
||
|
||
function normalizeTimestamp(value) {
|
||
const number = Number(value);
|
||
return Number.isFinite(number) ? Number(number.toFixed(2)) : null;
|
||
}
|
||
|
||
function frameByTimestamp(frames, timestamp) {
|
||
const target = normalizeTimestamp(timestamp);
|
||
if (target === null) return null;
|
||
return frames.find(frame => Math.abs(Number(frame.timestamp || 0) - target) <= 0.05)
|
||
|| frames.find(frame => Math.abs(Number(frame.timestamp || 0) - target) <= 0.2)
|
||
|| null;
|
||
}
|
||
|
||
function bestFrameResultFor(raw, index, damageId) {
|
||
const parsed = parseJsonLoose(raw);
|
||
if (!parsed) return null;
|
||
if (Array.isArray(parsed)) return parsed[index] || null;
|
||
if (Array.isArray(parsed.results)) return parsed.results[index] || null;
|
||
if (Array.isArray(parsed.bestFrameResults)) return parsed.bestFrameResults[index] || null;
|
||
if (parsed[damageId]) return parsed[damageId];
|
||
if (parsed[String(index)]) return parsed[String(index)];
|
||
return parsed;
|
||
}
|
||
|
||
function chooseFromModelResult(frames, raw, topN) {
|
||
const parsed = parseJsonLoose(raw) || raw;
|
||
const timestamps = parsed?.best_timestamps || parsed?.bestTimestamps || parsed?.selected_timestamps || parsed?.selectedTimestamps;
|
||
if (!Array.isArray(timestamps)) return null;
|
||
const selected = [];
|
||
for (const timestamp of timestamps) {
|
||
const frame = frameByTimestamp(frames, timestamp);
|
||
if (frame && !selected.includes(frame)) selected.push(frame);
|
||
}
|
||
return selected.length ? selected.slice(0, topN) : null;
|
||
}
|
||
|
||
async function chooseWithModel({ env, frames, group, topN, damageId }) {
|
||
const candidates = frames
|
||
.slice()
|
||
.sort((a, b) => frameScore(b) - frameScore(a))
|
||
.slice(0, 12);
|
||
const content = [
|
||
{ type: 'text', text: `${BEST_FRAME_PROMPT}\n\n本次需要选择最佳${topN}张。损伤:${group.mergedLocation} ${group.mergedType},严重程度:${group.mergedSeverity},描述:${group.mergedDescription || ''}。候选帧如下。` },
|
||
];
|
||
for (const frame of candidates) {
|
||
content.push({ type: 'text', text: `[${Number(frame.timestamp || 0).toFixed(2)} second] ${damageId}` });
|
||
content.push(imageContent(frame.markedPath || frame.path));
|
||
}
|
||
const text = await callModel({ env, content, maxTokens: 2048 });
|
||
const selected = chooseFromModelResult(candidates, text, topN);
|
||
if (!selected?.length) throw new Error('模型最佳帧结果无法解析');
|
||
return selected;
|
||
}
|
||
|
||
function publicUrl(workspace, relativePath) {
|
||
return `/workspace/vehicle-scratch-inspection/${workspace.taskId}/${relativePath.replace(/\\/g, '/')}`;
|
||
}
|
||
|
||
function copyBestFrame(workspace, frame, damageId, order) {
|
||
const source = frame.markedPath || frame.path;
|
||
const ext = path.extname(source) || '.jpg';
|
||
const name = `best_${String(order).padStart(2, '0')}_${damageId}_${Number(frame.timestamp || 0).toFixed(2).replace('.', '_')}s${ext}`;
|
||
const relativePath = `best_frames/${name}`;
|
||
const target = path.join(workspace.workspacePath, relativePath);
|
||
fs.mkdirSync(path.dirname(target), { recursive: true });
|
||
fs.copyFileSync(source, target);
|
||
return {
|
||
timestamp: Number(frame.timestamp || 0),
|
||
path: target,
|
||
relativePath,
|
||
url: publicUrl(workspace, relativePath),
|
||
bbox: frame.bbox?.normalized || null,
|
||
sourceRelativePath: frame.markedRelativePath || frame.sourceRelativePath || null,
|
||
};
|
||
}
|
||
|
||
async function run(input, env = process.env) {
|
||
if (!input?.taskId) throw new Error('taskId不能为空');
|
||
const workspace = workspaceFor(input.taskId, env);
|
||
const grounding = readJson(path.join(workspace.workspacePath, 'grounding.json'));
|
||
const annotations = (grounding.annotations || []).map(normalizeAnnotation);
|
||
const topN = Math.round(clampNumber(input.topN, 1, 5, 1));
|
||
|
||
emit('select_best_frames', '开始去重合并旧伤', 'running', { annotationCount: annotations.length });
|
||
let groups = groupsFromModelResult(input.dedupeResult || input.modelResult, annotations);
|
||
if (!groups && env.ARK_API_KEY && annotations.length > 1) {
|
||
try {
|
||
groups = await dedupeWithModel({ env, annotations });
|
||
} catch (err) {
|
||
emit('select_best_frames', '模型去重失败,使用启发式去重兜底', 'running', { error: err && err.message ? err.message : String(err) });
|
||
}
|
||
}
|
||
if (!groups) groups = heuristicDedupe(annotations);
|
||
const damages = [];
|
||
const bestFrameImages = [];
|
||
|
||
for (let index = 0; index < groups.length; index += 1) {
|
||
const group = groups[index];
|
||
const damageId = `damage_${String(index + 1).padStart(3, '0')}`;
|
||
const availableFrames = (group.markedFrames || []).filter(frame => frame && (frame.markedPath || frame.path));
|
||
let selected = null;
|
||
const externalBestFrameResult = bestFrameResultFor(input.bestFrameResults, index, damageId);
|
||
if (externalBestFrameResult) selected = chooseFromModelResult(availableFrames, externalBestFrameResult, topN);
|
||
if (!selected && availableFrames.length > 3 && env.ARK_API_KEY) {
|
||
try {
|
||
selected = await chooseWithModel({ env, frames: availableFrames, group, topN, damageId });
|
||
} catch (err) {
|
||
emit('select_best_frames', '模型选帧失败,使用启发式评分兜底', 'running', { damageId, error: err && err.message ? err.message : String(err) });
|
||
}
|
||
}
|
||
if (!selected) selected = chooseHeuristic(availableFrames, topN);
|
||
const bestFrames = [];
|
||
for (const frame of selected) {
|
||
const image = copyBestFrame(workspace, frame, damageId, bestFrameImages.length + 1);
|
||
bestFrames.push(image);
|
||
bestFrameImages.push({
|
||
...image,
|
||
mimeType: mimeTypeFor(image.path),
|
||
label: `旧伤最佳展示帧 ${damageId}`,
|
||
purpose: 'best_damage_frame',
|
||
damageId,
|
||
});
|
||
}
|
||
damages.push({
|
||
id: damageId,
|
||
location: group.mergedLocation,
|
||
type: group.mergedType,
|
||
severity: group.mergedSeverity,
|
||
description: group.mergedDescription,
|
||
memberIndices: group.memberIndices,
|
||
sourceDamageIds: group.sourceDamageIds,
|
||
timestamps: availableFrames.map(frame => Number(frame.timestamp || 0)),
|
||
bestFrames,
|
||
});
|
||
}
|
||
|
||
const output = {
|
||
taskId: workspace.taskId,
|
||
workspacePath: workspace.workspacePath,
|
||
totalDamages: damages.length,
|
||
bestFrameCount: bestFrameImages.length,
|
||
damages,
|
||
bestFrameImages,
|
||
generatedAt: new Date().toISOString(),
|
||
};
|
||
writeJson(path.join(workspace.workspacePath, 'best_frames.json'), output);
|
||
emit('select_best_frames', '最佳帧筛选完成并写入best_frames.json', 'completed', { totalDamages: damages.length, bestFrameCount: bestFrameImages.length });
|
||
return { success: true, totalDamages: damages.length, bestFrameCount: bestFrameImages.length };
|
||
}
|
||
|
||
(async () => {
|
||
try {
|
||
process.stdout.write(JSON.stringify(await run(readInput())));
|
||
} catch (err) {
|
||
process.stdout.write(JSON.stringify({
|
||
success: false,
|
||
error: err && err.message ? err.message : String(err),
|
||
}));
|
||
}
|
||
})();
|