181 lines
7.5 KiB
JavaScript
Raw Permalink Normal View History

2026-05-20 21:39:12 +08:00
#!/usr/bin/env node
'use strict';
const fs = require('fs');
function emit(stage, message, status = 'running', extra = {}) {
process.stderr.write(JSON.stringify({
type: 'process_event',
stage,
message,
status,
timestamp: new Date().toISOString(),
...extra,
}) + '\n');
}
function readInput() {
const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim();
if (!raw) return {};
return JSON.parse(raw);
}
function normalizeDate(value) {
const text = String(value || '').trim();
if (!text) return '';
const compact = text.replace(/[.年/]/g, '-').replace(/月/g, '-').replace(/日/g, '').replace(/\s+/g, '');
const match = compact.match(/^(\d{4})-?(\d{1,2})-?(\d{1,2})$/);
if (!match) return text;
const [, y, m, d] = match;
return `${y}-${String(m).padStart(2, '0')}-${String(d).padStart(2, '0')}`;
}
function cleanAmount(value) {
const text = String(value || '').trim();
if (!text) return '';
return text.replace(/[¥¥,\s]/g, '');
}
function cleanVin(value) {
return String(value || '').trim().replace(/\s+/g, '').toUpperCase();
}
function parseJsonText(value) {
if (!value) return null;
if (typeof value === 'object') return value;
const text = String(value).trim();
if (!text) return null;
try {
return JSON.parse(text);
} catch {
const match = text.match(/\{[\s\S]*\}/);
if (!match) return null;
try {
return JSON.parse(match[0]);
} catch {
return null;
}
}
}
function firstNonEmpty(...values) {
for (const value of values) {
if (value !== undefined && value !== null && value !== '') return value;
}
return '';
}
function readField(source, names) {
if (!source || typeof source !== 'object') return '';
for (const name of names) {
const value = source[name];
if (value === undefined || value === null || value === '') continue;
if (typeof value === 'object') {
return firstNonEmpty(value.words, value.word, value.value, value.text);
}
return String(value).trim();
}
return '';
}
function getCandidate(input) {
return parseJsonText(input.modelResult)
|| parseJsonText(input.ocrResult)
|| parseJsonText(input.mockResult)
|| parseJsonText(input.result)
|| (input.fields ? { fields: input.fields } : null);
}
function normalizeVehicleInvoice(candidate) {
const root = candidate && typeof candidate === 'object' ? candidate : {};
const fields = root.fields && typeof root.fields === 'object' ? root.fields : root;
const words = root.words_result && typeof root.words_result === 'object' ? root.words_result : fields;
const normalizedFields = {
invoiceCode: readField(words, ['invoiceCode', '发票代码']),
invoiceNumber: readField(words, ['invoiceNumber', 'invoiceNo', '发票号码', '发票号']),
issueDate: normalizeDate(readField(words, ['issueDate', '开票日期', '填发日期'])),
buyerName: readField(words, ['buyerName', '购买方名称', '购货单位名称', '购买方', '买方名称']),
buyerId: readField(words, ['buyerId', '购买方纳税人识别号', '购买方身份证号码/组织机构代码', '买方纳税人识别号']),
buyerAddressPhone: readField(words, ['buyerAddressPhone', '购买方地址、电话', '购买方地址电话']),
vehicleType: readField(words, ['vehicleType', '车辆类型']),
brandModel: readField(words, ['brandModel', '厂牌型号', '品牌型号', '厂牌型号及配置']),
origin: readField(words, ['origin', '产地']),
certificateNo: readField(words, ['certificateNo', '合格证号']),
importCertificateNo: readField(words, ['importCertificateNo', '进口证明书号']),
inspectionNo: readField(words, ['inspectionNo', '商检单号']),
engineNo: readField(words, ['engineNo', '发动机号码', '发动机号']),
vin: cleanVin(readField(words, ['vin', 'VIN', '车辆识别代号/车架号码', '车辆识别代号', '车架号码', '车架号'])),
taxpayerNo: readField(words, ['taxpayerNo', '纳税人识别号', '主管税务机关代码']),
sellerName: readField(words, ['sellerName', '销货单位名称', '销售方名称', '销方名称']),
sellerTaxpayerNo: readField(words, ['sellerTaxpayerNo', '销货单位纳税人识别号', '销售方纳税人识别号']),
sellerAccount: readField(words, ['sellerAccount', '销货单位开户银行及账号', '销售方开户行及账号']),
sellerAddressPhone: readField(words, ['sellerAddressPhone', '销货单位地址、电话', '销售方地址电话']),
taxAuthorityCode: readField(words, ['taxAuthorityCode', '主管税务机关代码']),
taxAuthorityName: readField(words, ['taxAuthorityName', '主管税务机关名称']),
totalAmount: cleanAmount(readField(words, ['totalAmount', '价税合计', '价税合计小写', '金额合计'])),
taxRate: readField(words, ['taxRate', '增值税税率或征收率', '税率']),
taxAmount: cleanAmount(readField(words, ['taxAmount', '增值税税额', '税额'])),
totalAmountUpper: readField(words, ['totalAmountUpper', '价税合计大写', '大写']),
tonnage: readField(words, ['tonnage', '吨位']),
limitedPassengerCount: readField(words, ['limitedPassengerCount', '限乘人数']),
};
return {
success: true,
fields: normalizedFields,
raw: root,
};
}
function buildPrompt(input) {
return [
'请使用多模态大模型识别机动车销售统一发票图片或PDF只输出合法JSON不输出解释。',
'只根据图片/PDF可见内容提取字段无法确认的字段输出空字符串不要猜测。',
'日期统一 YYYY-MM-DD金额只保留数字和小数点VIN 保留大写字符。',
'输出schema{"fields":{"invoiceCode":"","invoiceNumber":"","issueDate":"","buyerName":"","buyerId":"","buyerAddressPhone":"","vehicleType":"","brandModel":"","origin":"","certificateNo":"","importCertificateNo":"","inspectionNo":"","engineNo":"","vin":"","taxpayerNo":"","sellerName":"","sellerTaxpayerNo":"","sellerAccount":"","sellerAddressPhone":"","taxAuthorityCode":"","taxAuthorityName":"","totalAmount":"","taxRate":"","taxAmount":"","totalAmountUpper":"","tonnage":"","limitedPassengerCount":""}}',
input.imageUrl ? `图片URL${input.imageUrl}` : '',
input.pdfUrl ? `PDF URL${input.pdfUrl}` : '',
input.imageBase64 ? '已提供 imageBase64请直接看图识别。' : '',
input.pdfBase64 ? '已提供 pdfBase64请直接识别PDF。' : '',
].filter(Boolean).join('\n');
}
async function run(input) {
const candidate = getCandidate(input);
if (candidate) {
emit('normalize', '归一化多模态机动车发票识别结果', 'completed');
return normalizeVehicleInvoice(candidate);
}
return {
success: false,
needsModelVision: true,
error: '未提供多模态模型识别结果。请先让多模态模型查看 imageUrl/imageBase64/pdfUrl/pdfBase64并把模型返回JSON作为 modelResult 传入本skill。',
prompt: buildPrompt(input),
fields: {
invoiceCode: '',
invoiceNumber: '',
issueDate: '',
buyerName: '',
buyerId: '',
vehicleType: '',
brandModel: '',
engineNo: '',
vin: '',
totalAmount: '',
sellerName: '',
},
};
}
(async () => {
try {
const input = readInput();
process.stdout.write(JSON.stringify(await run(input)));
} catch (err) {
process.stdout.write(JSON.stringify({
success: false,
error: err && err.message ? err.message : String(err),
}));
}
})();