2026-05-20 21:39:12 +08:00

466 lines
15 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const fs = require('node:fs');
const POLICY_LABELS = [
'商业险保单号',
'保险单号',
'保险单号码',
'电子保单号',
'保单号',
'保险合同号',
'合同号',
];
const VIN_LABELS = ['车辆识别代号', '车辆识别代码', '车架号', 'VIN码', 'VIN', '识别代码'];
const USAGE_NATURE_LABELS = ['使用性质', '车辆使用性质', '使用方式', '使用用途', '车辆用途'];
const USAGE_NATURE_VALUES = [
'家庭自用汽车',
'家庭自用',
'非营业个人',
'非营业',
'非营运',
'企业非营业',
'党政机关事业团体非营业',
'预约出租客运',
'网约车',
'出租',
'租赁',
'客运',
'货运',
'营业',
'营运',
];
const INSURANCE_PERIOD_LABELS = [
'保险期间',
'保险期限',
'保险有效期',
'保险起期',
'保险止期',
'起保日期',
'终止日期',
'起止日期',
'承保期间',
];
const NAME_LABELS = [
{ label: '被保险人', terms: ['被保险人名称', '被保险人姓名', '被保险人'] },
{ label: '车主', terms: ['车主姓名', '车主名称', '车主'] },
{ label: '投保人', terms: ['投保人名称', '投保人姓名', '投保人'] },
];
const COVERAGE_TERMS = ['机动车损失保险', '车辆损失保险', '车损险'];
function toHalfWidth(text) {
return String(text || '')
.normalize('NFKC')
.replace(/\u3000/g, ' ')
.replace(/[:﹕]/g, ':')
.replace(/[,、;;]/g, ' ')
.replace(/[(].*?[)]/g, match => match);
}
function normalizeToken(value) {
return String(value || '')
.normalize('NFKC')
.replace(/[^A-Za-z0-9-]/g, '')
.toUpperCase();
}
function compactText(value) {
return String(value || '').normalize('NFKC').replace(/\s+/g, '').toUpperCase();
}
function normalizeName(value) {
return String(value || '').normalize('NFKC').replace(/\s+/g, '').trim();
}
function normalizeDate(year, month, day) {
return [
String(year).padStart(4, '0'),
String(month).padStart(2, '0'),
String(day).padStart(2, '0'),
].join('-');
}
function readInputText(input) {
if (typeof input.rawText === 'string' && input.rawText.trim()) return input.rawText;
if (Array.isArray(input.lines)) return input.lines.filter(Boolean).join('\n');
return '';
}
function linesFromText(text) {
return toHalfWidth(text)
.split(/\r?\n+/)
.map(line => line.trim())
.filter(Boolean);
}
function hasAnyLabel(line, labels) {
return labels.some(label => line.includes(label));
}
function extractCandidates(text) {
const result = [];
const re = /[A-Z0-9][A-Z0-9-]{7,49}/gi;
let match;
while ((match = re.exec(toHalfWidth(text).toUpperCase())) !== null) {
result.push(normalizeToken(match[0]));
}
return [...new Set(result)];
}
function isVin(value) {
const token = normalizeToken(value).replace(/-/g, '');
return /^[A-HJ-NPR-Z0-9]{17}$/.test(token);
}
function isIdCard(value) {
return /^\d{17}[\dX]$/i.test(String(value || '').trim());
}
function isDateLike(value) {
return /^\d{4}[-/.]\d{1,2}[-/.]\d{1,2}$/.test(String(value || ''));
}
function isValidPolicyNo(value, options = {}) {
const token = normalizeToken(value);
if (token.length < 8 || token.length > 50) return false;
if (!/^[A-Z0-9-]+$/.test(token)) return false;
const expectedVin = normalizeToken(options.expectedVin).replace(/-/g, '');
if (isVin(token.replace(/-/g, ''))) {
if (!options.allowVinShape || token.replace(/-/g, '') === expectedVin) return false;
}
if (isIdCard(token) || isDateLike(token)) return false;
const digits = (token.match(/\d/g) || []).length;
const letters = (token.match(/[A-Z]/g) || []).length;
if (digits < 4) return false;
if (letters === 0 && token.length === 11) return false;
return letters > 0 || token.length >= 12;
}
function scorePolicyNo(value) {
const token = normalizeToken(value);
let score = token.length;
if (/[A-Z]/.test(token)) score += 8;
if (/\d/.test(token)) score += 4;
if (token.includes('-')) score -= 2;
if (/^P[A-Z0-9]/.test(token)) score += 3;
return score;
}
function bestPolicyCandidate(candidates, expectedPolicyNo, options = {}) {
const expected = normalizeToken(expectedPolicyNo);
if (expected) {
const exact = candidates.map(normalizeToken).find(item => item === expected);
if (exact) return exact;
}
const valid = candidates
.map(normalizeToken)
.filter(item => isValidPolicyNo(item, options));
if (!valid.length) return '';
return valid.sort((a, b) => scorePolicyNo(b) - scorePolicyNo(a))[0] || '';
}
function extractPolicyNo(lines, fullText, expectedPolicyNo, expectedVin) {
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], POLICY_LABELS)) continue;
const windowText = [lines[i], lines[i + 1] || '', lines[i + 2] || ''].join(' ');
const candidate = bestPolicyCandidate(extractCandidates(windowText), expectedPolicyNo, {
allowVinShape: true,
expectedVin,
});
if (candidate) return candidate;
}
return bestPolicyCandidate(extractCandidates(fullText), expectedPolicyNo, { expectedVin });
}
function extractVin(lines, fullText, expectedVin) {
const expected = normalizeToken(expectedVin).replace(/-/g, '');
if (expected && isVin(expected) && compactText(fullText).includes(expected)) return expected;
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], VIN_LABELS)) continue;
const windowText = [lines[i], lines[i + 1] || ''].join(' ');
const candidate = extractCandidates(windowText)
.map(item => item.replace(/-/g, ''))
.find(isVin);
if (candidate) return candidate;
}
return extractCandidates(fullText)
.map(item => item.replace(/-/g, ''))
.find(isVin) || '';
}
function cleanNameText(text) {
return String(text || '')
.replace(/^(名称|姓名|客户名称|客户姓名)[:]?/, '')
.split(/(?:证件|身份证|统一社会信用|地址|电话|手机|车架|车辆|号牌|保单|保险|发动机|VIN|使用性质|车辆使用性质|使用方式|使用用途|保险期间|保险期限|保险有效期|起保|终止|承保)/)[0]
.replace(/[0-9A-Za-z_*xX:/\\-]/g, ' ')
.trim();
}
function findNamesInText(text) {
const cleaned = cleanNameText(text);
const blacklist = new Set([
'被保险人',
'投保人',
'车主姓名',
'车主名称',
'车主',
'姓名',
'名称',
'机动车',
'商业险',
'保险单',
'保险人',
'使用性质',
'家庭自用汽车',
'有限公司',
]);
const names = [];
const re = /[\u4e00-\u9fa5·]{2,12}/g;
let match;
while ((match = re.exec(cleaned)) !== null) {
const name = normalizeName(match[0]);
if (!blacklist.has(name) && !name.includes('保险') && !name.includes('地址')) {
names.push(name);
}
}
return names;
}
function addNameCandidate(candidates, label, name) {
const normalized = normalizeName(name);
if (!normalized) return;
if (candidates.some(item => item.label === label && item.name === normalized)) return;
if (candidates.some(item => item.name === normalized)) {
const existing = candidates.find(item => item.name === normalized);
if (existing && !existing.label.includes(label)) existing.label = `${existing.label}/${label}`;
return;
}
candidates.push({ label, name: normalized });
}
function extractNameCandidates(lines, fullText, expectedOwnerName) {
const candidates = [];
const compact = compactText(fullText);
const expected = normalizeName(expectedOwnerName);
if (expected && compact.includes(compactText(expected))) {
addNameCandidate(candidates, '全文匹配', expected);
}
for (let i = 0; i < lines.length; i += 1) {
for (const group of NAME_LABELS) {
const term = group.terms.find(item => lines[i].includes(item));
if (!term) continue;
const sameLineTail = lines[i].slice(lines[i].indexOf(term) + term.length).replace(/^[:\s]+/, '');
const windowTexts = [sameLineTail, lines[i + 1] || ''];
for (const text of windowTexts) {
for (const name of findNamesInText(text)) {
addNameCandidate(candidates, group.label, name);
}
}
}
}
return candidates.slice(0, 8);
}
function extractCertificate(fullText) {
const normalized = toHalfWidth(fullText).replace(/\s+/g, '');
const full = normalized.match(/\d{17}[\dXx]/);
if (full) return { certificateNo: full[0].toUpperCase(), certificateNoMasked: false };
const masked = normalized.match(/(?:\d{2,8})[*Xx]{4,14}(?:\d{2,6})|[*Xx]{6,16}(?:\d{2,6})/);
if (masked) return { certificateNo: masked[0].toUpperCase(), certificateNoMasked: true };
return { certificateNo: '', certificateNoMasked: false };
}
function extractPlateNo(lines, fullText) {
const plateRe = /[\u4e00-\u9fa5][A-Z][A-Z0-9]{5,7}/g;
for (const line of lines) {
if (!/(车牌|号牌|牌照|牌号)/.test(line)) continue;
const match = toHalfWidth(line).toUpperCase().match(plateRe);
if (match) return match[0];
}
const match = toHalfWidth(fullText).toUpperCase().match(plateRe);
return match ? match[0] : '';
}
function extractEngineNo(lines) {
for (let i = 0; i < lines.length; i += 1) {
if (!/(发动机号|发动机号码|发动机编号)/.test(lines[i])) continue;
const windowText = [lines[i], lines[i + 1] || ''].join(' ');
const candidates = extractCandidates(windowText).filter(item => !isVin(item) && !isValidPolicyNo(item));
const candidate = candidates.find(item => item.length >= 5 && item.length <= 24);
if (candidate) return candidate;
}
return '';
}
function extractDateCandidatesFromText(text) {
const dates = [];
const re = /(\d{4})[年\-/.](\d{1,2})[月\-/.](\d{1,2})日?/g;
let match;
while ((match = re.exec(toHalfWidth(text))) !== null) {
const date = normalizeDate(match[1], match[2], match[3]);
if (!dates.includes(date)) dates.push(date);
}
return dates;
}
function extractLabelTail(line, labels) {
const matchedLabel = labels.find(label => line.includes(label));
if (!matchedLabel) return '';
return line.slice(line.indexOf(matchedLabel) + matchedLabel.length).replace(/^[:\s]+/, '').trim();
}
function extractUsageNatureFromText(text) {
const normalized = toHalfWidth(text).replace(/\s+/g, '');
for (const value of USAGE_NATURE_VALUES) {
if (normalized.includes(value)) return value;
}
return '';
}
function extractUsageNature(lines, fullText) {
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], USAGE_NATURE_LABELS)) continue;
const windowText = [extractLabelTail(lines[i], USAGE_NATURE_LABELS), lines[i + 1] || ''].join(' ');
const value = extractUsageNatureFromText(windowText);
if (value) return value;
}
return extractUsageNatureFromText(fullText);
}
function extractDates(lines, fullText) {
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], INSURANCE_PERIOD_LABELS)) continue;
const windowText = [extractLabelTail(lines[i], INSURANCE_PERIOD_LABELS), lines[i + 1] || ''].join(' ');
const dates = extractDateCandidatesFromText(windowText);
if (dates.length >= 2) return { startDate: dates[0], endDate: dates[1] };
if (dates.length === 1) {
const nextDates = extractDateCandidatesFromText(lines[i + 1] || '');
if (nextDates.length) return { startDate: dates[0], endDate: nextDates[0] };
}
}
const dates = extractDateCandidatesFromText(fullText);
return { startDate: dates[0] || '', endDate: dates[1] || '' };
}
function extractCoverage(fullText) {
const compact = String(fullText || '').replace(/\s+/g, '');
const evidence = COVERAGE_TERMS.filter(term => compact.includes(term));
return {
hasVehicleDamageCoverage: evidence.length > 0,
coverageEvidence: evidence,
};
}
function buildWarnings({ input, output }) {
const warnings = [];
if (!output.policyNo) warnings.push('未识别到商业险保单号');
if (!output.vin) warnings.push('未识别到车架号VIN');
if (!output.nameCandidates.length) warnings.push('未识别到被保险人/车主/投保人姓名');
if (!output.usageNature) warnings.push('未识别到使用性质');
if (!output.startDate || !output.endDate) warnings.push('未识别到保险期间');
if (!output.hasVehicleDamageCoverage) warnings.push('未识别到车损险险种证据');
const expectedPolicyNo = normalizeToken(input.expectedPolicyNo);
if (expectedPolicyNo && output.policyNo && expectedPolicyNo !== output.policyNo) {
warnings.push('OCR保单号与订单已填商业险保单号不一致');
}
const expectedVin = normalizeToken(input.expectedVin).replace(/-/g, '');
if (expectedVin && output.vin && expectedVin !== output.vin) {
warnings.push('OCR车架号VIN与订单VIN不一致');
}
const expectedOwnerName = normalizeName(input.expectedOwnerName);
if (expectedOwnerName && output.nameCandidates.length) {
const matched = output.nameCandidates.some(item => normalizeName(item.name) === expectedOwnerName);
if (!matched) warnings.push('OCR姓名候选与订单车主姓名不一致');
}
return warnings;
}
function computeConfidence(output) {
let score = 0.2;
if (output.policyNo) score += 0.25;
if (output.vin) score += 0.25;
if (output.nameCandidates.length) score += 0.15;
if (output.usageNature) score += 0.05;
if (output.startDate && output.endDate) score += 0.05;
if (output.hasVehicleDamageCoverage) score += 0.1;
if (output.certificateNo) score += 0.03;
if (output.plateNo || output.engineNo) score += 0.02;
return Math.max(0, Math.min(0.99, Number(score.toFixed(2))));
}
function parsePolicy(input) {
const rawText = readInputText(input);
if (!rawText.trim()) {
return {
success: false,
policyNo: '',
vin: '',
nameCandidates: [],
certificateNo: '',
certificateNoMasked: false,
plateNo: '',
engineNo: '',
usageNature: '',
startDate: '',
endDate: '',
hasVehicleDamageCoverage: false,
coverageEvidence: [],
confidence: 0,
warnings: [input.imageUrl ? '当前脚本不直接OCR图片请先传入rawText' : '缺少rawText或lines'],
rawText: '',
};
}
const fullText = toHalfWidth(rawText);
const lines = linesFromText(fullText);
const certificate = extractCertificate(fullText);
const usageNature = extractUsageNature(lines, fullText);
const dates = extractDates(lines, fullText);
const coverage = extractCoverage(fullText);
const output = {
success: true,
policyNo: extractPolicyNo(lines, fullText, input.expectedPolicyNo, input.expectedVin),
vin: extractVin(lines, fullText, input.expectedVin),
nameCandidates: extractNameCandidates(lines, fullText, input.expectedOwnerName),
...certificate,
plateNo: extractPlateNo(lines, fullText),
engineNo: extractEngineNo(lines),
usageNature,
...dates,
...coverage,
confidence: 0,
warnings: [],
rawText: fullText,
};
output.warnings = buildWarnings({ input, output });
output.confidence = computeConfidence(output);
return output;
}
async function main() {
try {
const stdin = fs.readFileSync(0, 'utf8');
const input = JSON.parse(stdin || '{}');
process.stdout.write(JSON.stringify(parsePolicy(input)));
} catch (error) {
process.stdout.write(JSON.stringify({ success: false, error: error.message }));
process.exitCode = 0;
}
}
if (require.main === module) {
main();
}
module.exports = {
parsePolicy,
extractPolicyNo,
extractVin,
extractNameCandidates,
};