466 lines
15 KiB
JavaScript
Raw Normal View History

2026-05-20 21:39:12 +08:00
const fs = require('node:fs');
const POLICY_LABELS = [
'商业险保单号',
'保险单号',
'保险单号码',
'电子保单号',
'保单号',
'保险合同号',
'合同号',
];
const VIN_LABELS = ['车辆识别代号', '车辆识别代码', '车架号', 'VIN码', 'VIN', '识别代码'];
const USAGE_NATURE_LABELS = ['使用性质', '车辆使用性质', '使用方式', '使用用途', '车辆用途'];
const USAGE_NATURE_VALUES = [
'家庭自用汽车',
'家庭自用',
'非营业个人',
'非营业',
'非营运',
'企业非营业',
'党政机关事业团体非营业',
'预约出租客运',
'网约车',
'出租',
'租赁',
'客运',
'货运',
'营业',
'营运',
];
const INSURANCE_PERIOD_LABELS = [
'保险期间',
'保险期限',
'保险有效期',
'保险起期',
'保险止期',
'起保日期',
'终止日期',
'起止日期',
'承保期间',
];
const NAME_LABELS = [
{ label: '被保险人', terms: ['被保险人名称', '被保险人姓名', '被保险人'] },
{ label: '车主', terms: ['车主姓名', '车主名称', '车主'] },
{ label: '投保人', terms: ['投保人名称', '投保人姓名', '投保人'] },
];
const COVERAGE_TERMS = ['机动车损失保险', '车辆损失保险', '车损险'];
function toHalfWidth(text) {
return String(text || '')
.normalize('NFKC')
.replace(/\u3000/g, ' ')
.replace(/[:﹕]/g, ':')
.replace(/[,、;;]/g, ' ')
.replace(/[(].*?[)]/g, match => match);
}
function normalizeToken(value) {
return String(value || '')
.normalize('NFKC')
.replace(/[^A-Za-z0-9-]/g, '')
.toUpperCase();
}
function compactText(value) {
return String(value || '').normalize('NFKC').replace(/\s+/g, '').toUpperCase();
}
function normalizeName(value) {
return String(value || '').normalize('NFKC').replace(/\s+/g, '').trim();
}
function normalizeDate(year, month, day) {
return [
String(year).padStart(4, '0'),
String(month).padStart(2, '0'),
String(day).padStart(2, '0'),
].join('-');
}
function readInputText(input) {
if (typeof input.rawText === 'string' && input.rawText.trim()) return input.rawText;
if (Array.isArray(input.lines)) return input.lines.filter(Boolean).join('\n');
return '';
}
function linesFromText(text) {
return toHalfWidth(text)
.split(/\r?\n+/)
.map(line => line.trim())
.filter(Boolean);
}
function hasAnyLabel(line, labels) {
return labels.some(label => line.includes(label));
}
function extractCandidates(text) {
const result = [];
const re = /[A-Z0-9][A-Z0-9-]{7,49}/gi;
let match;
while ((match = re.exec(toHalfWidth(text).toUpperCase())) !== null) {
result.push(normalizeToken(match[0]));
}
return [...new Set(result)];
}
function isVin(value) {
const token = normalizeToken(value).replace(/-/g, '');
return /^[A-HJ-NPR-Z0-9]{17}$/.test(token);
}
function isIdCard(value) {
return /^\d{17}[\dX]$/i.test(String(value || '').trim());
}
function isDateLike(value) {
return /^\d{4}[-/.]\d{1,2}[-/.]\d{1,2}$/.test(String(value || ''));
}
function isValidPolicyNo(value, options = {}) {
const token = normalizeToken(value);
if (token.length < 8 || token.length > 50) return false;
if (!/^[A-Z0-9-]+$/.test(token)) return false;
const expectedVin = normalizeToken(options.expectedVin).replace(/-/g, '');
if (isVin(token.replace(/-/g, ''))) {
if (!options.allowVinShape || token.replace(/-/g, '') === expectedVin) return false;
}
if (isIdCard(token) || isDateLike(token)) return false;
const digits = (token.match(/\d/g) || []).length;
const letters = (token.match(/[A-Z]/g) || []).length;
if (digits < 4) return false;
if (letters === 0 && token.length === 11) return false;
return letters > 0 || token.length >= 12;
}
function scorePolicyNo(value) {
const token = normalizeToken(value);
let score = token.length;
if (/[A-Z]/.test(token)) score += 8;
if (/\d/.test(token)) score += 4;
if (token.includes('-')) score -= 2;
if (/^P[A-Z0-9]/.test(token)) score += 3;
return score;
}
function bestPolicyCandidate(candidates, expectedPolicyNo, options = {}) {
const expected = normalizeToken(expectedPolicyNo);
if (expected) {
const exact = candidates.map(normalizeToken).find(item => item === expected);
if (exact) return exact;
}
const valid = candidates
.map(normalizeToken)
.filter(item => isValidPolicyNo(item, options));
if (!valid.length) return '';
return valid.sort((a, b) => scorePolicyNo(b) - scorePolicyNo(a))[0] || '';
}
function extractPolicyNo(lines, fullText, expectedPolicyNo, expectedVin) {
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], POLICY_LABELS)) continue;
const windowText = [lines[i], lines[i + 1] || '', lines[i + 2] || ''].join(' ');
const candidate = bestPolicyCandidate(extractCandidates(windowText), expectedPolicyNo, {
allowVinShape: true,
expectedVin,
});
if (candidate) return candidate;
}
return bestPolicyCandidate(extractCandidates(fullText), expectedPolicyNo, { expectedVin });
}
function extractVin(lines, fullText, expectedVin) {
const expected = normalizeToken(expectedVin).replace(/-/g, '');
if (expected && isVin(expected) && compactText(fullText).includes(expected)) return expected;
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], VIN_LABELS)) continue;
const windowText = [lines[i], lines[i + 1] || ''].join(' ');
const candidate = extractCandidates(windowText)
.map(item => item.replace(/-/g, ''))
.find(isVin);
if (candidate) return candidate;
}
return extractCandidates(fullText)
.map(item => item.replace(/-/g, ''))
.find(isVin) || '';
}
function cleanNameText(text) {
return String(text || '')
.replace(/^(名称|姓名|客户名称|客户姓名)[:]?/, '')
.split(/(?:证件|身份证|统一社会信用|地址|电话|手机|车架|车辆|号牌|保单|保险|发动机|VIN|使用性质|车辆使用性质|使用方式|使用用途|保险期间|保险期限|保险有效期|起保|终止|承保)/)[0]
.replace(/[0-9A-Za-z_*xX:/\\-]/g, ' ')
.trim();
}
function findNamesInText(text) {
const cleaned = cleanNameText(text);
const blacklist = new Set([
'被保险人',
'投保人',
'车主姓名',
'车主名称',
'车主',
'姓名',
'名称',
'机动车',
'商业险',
'保险单',
'保险人',
'使用性质',
'家庭自用汽车',
'有限公司',
]);
const names = [];
const re = /[\u4e00-\u9fa5·]{2,12}/g;
let match;
while ((match = re.exec(cleaned)) !== null) {
const name = normalizeName(match[0]);
if (!blacklist.has(name) && !name.includes('保险') && !name.includes('地址')) {
names.push(name);
}
}
return names;
}
function addNameCandidate(candidates, label, name) {
const normalized = normalizeName(name);
if (!normalized) return;
if (candidates.some(item => item.label === label && item.name === normalized)) return;
if (candidates.some(item => item.name === normalized)) {
const existing = candidates.find(item => item.name === normalized);
if (existing && !existing.label.includes(label)) existing.label = `${existing.label}/${label}`;
return;
}
candidates.push({ label, name: normalized });
}
function extractNameCandidates(lines, fullText, expectedOwnerName) {
const candidates = [];
const compact = compactText(fullText);
const expected = normalizeName(expectedOwnerName);
if (expected && compact.includes(compactText(expected))) {
addNameCandidate(candidates, '全文匹配', expected);
}
for (let i = 0; i < lines.length; i += 1) {
for (const group of NAME_LABELS) {
const term = group.terms.find(item => lines[i].includes(item));
if (!term) continue;
const sameLineTail = lines[i].slice(lines[i].indexOf(term) + term.length).replace(/^[:\s]+/, '');
const windowTexts = [sameLineTail, lines[i + 1] || ''];
for (const text of windowTexts) {
for (const name of findNamesInText(text)) {
addNameCandidate(candidates, group.label, name);
}
}
}
}
return candidates.slice(0, 8);
}
function extractCertificate(fullText) {
const normalized = toHalfWidth(fullText).replace(/\s+/g, '');
const full = normalized.match(/\d{17}[\dXx]/);
if (full) return { certificateNo: full[0].toUpperCase(), certificateNoMasked: false };
const masked = normalized.match(/(?:\d{2,8})[*Xx]{4,14}(?:\d{2,6})|[*Xx]{6,16}(?:\d{2,6})/);
if (masked) return { certificateNo: masked[0].toUpperCase(), certificateNoMasked: true };
return { certificateNo: '', certificateNoMasked: false };
}
function extractPlateNo(lines, fullText) {
const plateRe = /[\u4e00-\u9fa5][A-Z][A-Z0-9]{5,7}/g;
for (const line of lines) {
if (!/(车牌|号牌|牌照|牌号)/.test(line)) continue;
const match = toHalfWidth(line).toUpperCase().match(plateRe);
if (match) return match[0];
}
const match = toHalfWidth(fullText).toUpperCase().match(plateRe);
return match ? match[0] : '';
}
function extractEngineNo(lines) {
for (let i = 0; i < lines.length; i += 1) {
if (!/(发动机号|发动机号码|发动机编号)/.test(lines[i])) continue;
const windowText = [lines[i], lines[i + 1] || ''].join(' ');
const candidates = extractCandidates(windowText).filter(item => !isVin(item) && !isValidPolicyNo(item));
const candidate = candidates.find(item => item.length >= 5 && item.length <= 24);
if (candidate) return candidate;
}
return '';
}
function extractDateCandidatesFromText(text) {
const dates = [];
const re = /(\d{4})[年\-/.](\d{1,2})[月\-/.](\d{1,2})日?/g;
let match;
while ((match = re.exec(toHalfWidth(text))) !== null) {
const date = normalizeDate(match[1], match[2], match[3]);
if (!dates.includes(date)) dates.push(date);
}
return dates;
}
function extractLabelTail(line, labels) {
const matchedLabel = labels.find(label => line.includes(label));
if (!matchedLabel) return '';
return line.slice(line.indexOf(matchedLabel) + matchedLabel.length).replace(/^[:\s]+/, '').trim();
}
function extractUsageNatureFromText(text) {
const normalized = toHalfWidth(text).replace(/\s+/g, '');
for (const value of USAGE_NATURE_VALUES) {
if (normalized.includes(value)) return value;
}
return '';
}
function extractUsageNature(lines, fullText) {
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], USAGE_NATURE_LABELS)) continue;
const windowText = [extractLabelTail(lines[i], USAGE_NATURE_LABELS), lines[i + 1] || ''].join(' ');
const value = extractUsageNatureFromText(windowText);
if (value) return value;
}
return extractUsageNatureFromText(fullText);
}
function extractDates(lines, fullText) {
for (let i = 0; i < lines.length; i += 1) {
if (!hasAnyLabel(lines[i], INSURANCE_PERIOD_LABELS)) continue;
const windowText = [extractLabelTail(lines[i], INSURANCE_PERIOD_LABELS), lines[i + 1] || ''].join(' ');
const dates = extractDateCandidatesFromText(windowText);
if (dates.length >= 2) return { startDate: dates[0], endDate: dates[1] };
if (dates.length === 1) {
const nextDates = extractDateCandidatesFromText(lines[i + 1] || '');
if (nextDates.length) return { startDate: dates[0], endDate: nextDates[0] };
}
}
const dates = extractDateCandidatesFromText(fullText);
return { startDate: dates[0] || '', endDate: dates[1] || '' };
}
function extractCoverage(fullText) {
const compact = String(fullText || '').replace(/\s+/g, '');
const evidence = COVERAGE_TERMS.filter(term => compact.includes(term));
return {
hasVehicleDamageCoverage: evidence.length > 0,
coverageEvidence: evidence,
};
}
function buildWarnings({ input, output }) {
const warnings = [];
if (!output.policyNo) warnings.push('未识别到商业险保单号');
if (!output.vin) warnings.push('未识别到车架号VIN');
if (!output.nameCandidates.length) warnings.push('未识别到被保险人/车主/投保人姓名');
if (!output.usageNature) warnings.push('未识别到使用性质');
if (!output.startDate || !output.endDate) warnings.push('未识别到保险期间');
if (!output.hasVehicleDamageCoverage) warnings.push('未识别到车损险险种证据');
const expectedPolicyNo = normalizeToken(input.expectedPolicyNo);
if (expectedPolicyNo && output.policyNo && expectedPolicyNo !== output.policyNo) {
warnings.push('OCR保单号与订单已填商业险保单号不一致');
}
const expectedVin = normalizeToken(input.expectedVin).replace(/-/g, '');
if (expectedVin && output.vin && expectedVin !== output.vin) {
warnings.push('OCR车架号VIN与订单VIN不一致');
}
const expectedOwnerName = normalizeName(input.expectedOwnerName);
if (expectedOwnerName && output.nameCandidates.length) {
const matched = output.nameCandidates.some(item => normalizeName(item.name) === expectedOwnerName);
if (!matched) warnings.push('OCR姓名候选与订单车主姓名不一致');
}
return warnings;
}
function computeConfidence(output) {
let score = 0.2;
if (output.policyNo) score += 0.25;
if (output.vin) score += 0.25;
if (output.nameCandidates.length) score += 0.15;
if (output.usageNature) score += 0.05;
if (output.startDate && output.endDate) score += 0.05;
if (output.hasVehicleDamageCoverage) score += 0.1;
if (output.certificateNo) score += 0.03;
if (output.plateNo || output.engineNo) score += 0.02;
return Math.max(0, Math.min(0.99, Number(score.toFixed(2))));
}
function parsePolicy(input) {
const rawText = readInputText(input);
if (!rawText.trim()) {
return {
success: false,
policyNo: '',
vin: '',
nameCandidates: [],
certificateNo: '',
certificateNoMasked: false,
plateNo: '',
engineNo: '',
usageNature: '',
startDate: '',
endDate: '',
hasVehicleDamageCoverage: false,
coverageEvidence: [],
confidence: 0,
warnings: [input.imageUrl ? '当前脚本不直接OCR图片请先传入rawText' : '缺少rawText或lines'],
rawText: '',
};
}
const fullText = toHalfWidth(rawText);
const lines = linesFromText(fullText);
const certificate = extractCertificate(fullText);
const usageNature = extractUsageNature(lines, fullText);
const dates = extractDates(lines, fullText);
const coverage = extractCoverage(fullText);
const output = {
success: true,
policyNo: extractPolicyNo(lines, fullText, input.expectedPolicyNo, input.expectedVin),
vin: extractVin(lines, fullText, input.expectedVin),
nameCandidates: extractNameCandidates(lines, fullText, input.expectedOwnerName),
...certificate,
plateNo: extractPlateNo(lines, fullText),
engineNo: extractEngineNo(lines),
usageNature,
...dates,
...coverage,
confidence: 0,
warnings: [],
rawText: fullText,
};
output.warnings = buildWarnings({ input, output });
output.confidence = computeConfidence(output);
return output;
}
async function main() {
try {
const stdin = fs.readFileSync(0, 'utf8');
const input = JSON.parse(stdin || '{}');
process.stdout.write(JSON.stringify(parsePolicy(input)));
} catch (error) {
process.stdout.write(JSON.stringify({ success: false, error: error.message }));
process.exitCode = 0;
}
}
if (require.main === module) {
main();
}
module.exports = {
parsePolicy,
extractPolicyNo,
extractVin,
extractNameCandidates,
};