181 lines
7.5 KiB
JavaScript
181 lines
7.5 KiB
JavaScript
#!/usr/bin/env node
|
||
'use strict';
|
||
|
||
const fs = require('fs');
|
||
|
||
function emit(stage, message, status = 'running', extra = {}) {
|
||
process.stderr.write(JSON.stringify({
|
||
type: 'process_event',
|
||
stage,
|
||
message,
|
||
status,
|
||
timestamp: new Date().toISOString(),
|
||
...extra,
|
||
}) + '\n');
|
||
}
|
||
|
||
function readInput() {
|
||
const raw = String(process.argv[2] || process.env.SKILL_INPUT || process.env.AIFLOW_SKILL_INPUT || fs.readFileSync(0, 'utf8')).trim();
|
||
if (!raw) return {};
|
||
return JSON.parse(raw);
|
||
}
|
||
|
||
function normalizeDate(value) {
|
||
const text = String(value || '').trim();
|
||
if (!text) return '';
|
||
const compact = text.replace(/[.年/]/g, '-').replace(/月/g, '-').replace(/日/g, '').replace(/\s+/g, '');
|
||
const match = compact.match(/^(\d{4})-?(\d{1,2})-?(\d{1,2})$/);
|
||
if (!match) return text;
|
||
const [, y, m, d] = match;
|
||
return `${y}-${String(m).padStart(2, '0')}-${String(d).padStart(2, '0')}`;
|
||
}
|
||
|
||
function cleanAmount(value) {
|
||
const text = String(value || '').trim();
|
||
if (!text) return '';
|
||
return text.replace(/[¥¥,\s]/g, '');
|
||
}
|
||
|
||
function cleanVin(value) {
|
||
return String(value || '').trim().replace(/\s+/g, '').toUpperCase();
|
||
}
|
||
|
||
function parseJsonText(value) {
|
||
if (!value) return null;
|
||
if (typeof value === 'object') return value;
|
||
const text = String(value).trim();
|
||
if (!text) return null;
|
||
try {
|
||
return JSON.parse(text);
|
||
} catch {
|
||
const match = text.match(/\{[\s\S]*\}/);
|
||
if (!match) return null;
|
||
try {
|
||
return JSON.parse(match[0]);
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
}
|
||
|
||
function firstNonEmpty(...values) {
|
||
for (const value of values) {
|
||
if (value !== undefined && value !== null && value !== '') return value;
|
||
}
|
||
return '';
|
||
}
|
||
|
||
function readField(source, names) {
|
||
if (!source || typeof source !== 'object') return '';
|
||
for (const name of names) {
|
||
const value = source[name];
|
||
if (value === undefined || value === null || value === '') continue;
|
||
if (typeof value === 'object') {
|
||
return firstNonEmpty(value.words, value.word, value.value, value.text);
|
||
}
|
||
return String(value).trim();
|
||
}
|
||
return '';
|
||
}
|
||
|
||
function getCandidate(input) {
|
||
return parseJsonText(input.modelResult)
|
||
|| parseJsonText(input.ocrResult)
|
||
|| parseJsonText(input.mockResult)
|
||
|| parseJsonText(input.result)
|
||
|| (input.fields ? { fields: input.fields } : null);
|
||
}
|
||
|
||
function normalizeVehicleInvoice(candidate) {
|
||
const root = candidate && typeof candidate === 'object' ? candidate : {};
|
||
const fields = root.fields && typeof root.fields === 'object' ? root.fields : root;
|
||
const words = root.words_result && typeof root.words_result === 'object' ? root.words_result : fields;
|
||
const normalizedFields = {
|
||
invoiceCode: readField(words, ['invoiceCode', '发票代码']),
|
||
invoiceNumber: readField(words, ['invoiceNumber', 'invoiceNo', '发票号码', '发票号']),
|
||
issueDate: normalizeDate(readField(words, ['issueDate', '开票日期', '填发日期'])),
|
||
buyerName: readField(words, ['buyerName', '购买方名称', '购货单位名称', '购买方', '买方名称']),
|
||
buyerId: readField(words, ['buyerId', '购买方纳税人识别号', '购买方身份证号码/组织机构代码', '买方纳税人识别号']),
|
||
buyerAddressPhone: readField(words, ['buyerAddressPhone', '购买方地址、电话', '购买方地址电话']),
|
||
vehicleType: readField(words, ['vehicleType', '车辆类型']),
|
||
brandModel: readField(words, ['brandModel', '厂牌型号', '品牌型号', '厂牌型号及配置']),
|
||
origin: readField(words, ['origin', '产地']),
|
||
certificateNo: readField(words, ['certificateNo', '合格证号']),
|
||
importCertificateNo: readField(words, ['importCertificateNo', '进口证明书号']),
|
||
inspectionNo: readField(words, ['inspectionNo', '商检单号']),
|
||
engineNo: readField(words, ['engineNo', '发动机号码', '发动机号']),
|
||
vin: cleanVin(readField(words, ['vin', 'VIN', '车辆识别代号/车架号码', '车辆识别代号', '车架号码', '车架号'])),
|
||
taxpayerNo: readField(words, ['taxpayerNo', '纳税人识别号', '主管税务机关代码']),
|
||
sellerName: readField(words, ['sellerName', '销货单位名称', '销售方名称', '销方名称']),
|
||
sellerTaxpayerNo: readField(words, ['sellerTaxpayerNo', '销货单位纳税人识别号', '销售方纳税人识别号']),
|
||
sellerAccount: readField(words, ['sellerAccount', '销货单位开户银行及账号', '销售方开户行及账号']),
|
||
sellerAddressPhone: readField(words, ['sellerAddressPhone', '销货单位地址、电话', '销售方地址电话']),
|
||
taxAuthorityCode: readField(words, ['taxAuthorityCode', '主管税务机关代码']),
|
||
taxAuthorityName: readField(words, ['taxAuthorityName', '主管税务机关名称']),
|
||
totalAmount: cleanAmount(readField(words, ['totalAmount', '价税合计', '价税合计小写', '金额合计'])),
|
||
taxRate: readField(words, ['taxRate', '增值税税率或征收率', '税率']),
|
||
taxAmount: cleanAmount(readField(words, ['taxAmount', '增值税税额', '税额'])),
|
||
totalAmountUpper: readField(words, ['totalAmountUpper', '价税合计大写', '大写']),
|
||
tonnage: readField(words, ['tonnage', '吨位']),
|
||
limitedPassengerCount: readField(words, ['limitedPassengerCount', '限乘人数']),
|
||
};
|
||
return {
|
||
success: true,
|
||
fields: normalizedFields,
|
||
raw: root,
|
||
};
|
||
}
|
||
|
||
function buildPrompt(input) {
|
||
return [
|
||
'请使用多模态大模型识别机动车销售统一发票图片或PDF,只输出合法JSON,不输出解释。',
|
||
'只根据图片/PDF可见内容提取字段,无法确认的字段输出空字符串,不要猜测。',
|
||
'日期统一 YYYY-MM-DD;金额只保留数字和小数点;VIN 保留大写字符。',
|
||
'输出schema:{"fields":{"invoiceCode":"","invoiceNumber":"","issueDate":"","buyerName":"","buyerId":"","buyerAddressPhone":"","vehicleType":"","brandModel":"","origin":"","certificateNo":"","importCertificateNo":"","inspectionNo":"","engineNo":"","vin":"","taxpayerNo":"","sellerName":"","sellerTaxpayerNo":"","sellerAccount":"","sellerAddressPhone":"","taxAuthorityCode":"","taxAuthorityName":"","totalAmount":"","taxRate":"","taxAmount":"","totalAmountUpper":"","tonnage":"","limitedPassengerCount":""}}',
|
||
input.imageUrl ? `图片URL:${input.imageUrl}` : '',
|
||
input.pdfUrl ? `PDF URL:${input.pdfUrl}` : '',
|
||
input.imageBase64 ? '已提供 imageBase64,请直接看图识别。' : '',
|
||
input.pdfBase64 ? '已提供 pdfBase64,请直接识别PDF。' : '',
|
||
].filter(Boolean).join('\n');
|
||
}
|
||
|
||
async function run(input) {
|
||
const candidate = getCandidate(input);
|
||
if (candidate) {
|
||
emit('normalize', '归一化多模态机动车发票识别结果', 'completed');
|
||
return normalizeVehicleInvoice(candidate);
|
||
}
|
||
|
||
return {
|
||
success: false,
|
||
needsModelVision: true,
|
||
error: '未提供多模态模型识别结果。请先让多模态模型查看 imageUrl/imageBase64/pdfUrl/pdfBase64,并把模型返回JSON作为 modelResult 传入本skill。',
|
||
prompt: buildPrompt(input),
|
||
fields: {
|
||
invoiceCode: '',
|
||
invoiceNumber: '',
|
||
issueDate: '',
|
||
buyerName: '',
|
||
buyerId: '',
|
||
vehicleType: '',
|
||
brandModel: '',
|
||
engineNo: '',
|
||
vin: '',
|
||
totalAmount: '',
|
||
sellerName: '',
|
||
},
|
||
};
|
||
}
|
||
|
||
(async () => {
|
||
try {
|
||
const input = readInput();
|
||
process.stdout.write(JSON.stringify(await run(input)));
|
||
} catch (err) {
|
||
process.stdout.write(JSON.stringify({
|
||
success: false,
|
||
error: err && err.message ? err.message : String(err),
|
||
}));
|
||
}
|
||
})();
|