const fs = require('node:fs'); const POLICY_LABELS = [ '商业险保单号', '保险单号', '保险单号码', '电子保单号', '保单号', '保险合同号', '合同号', ]; const VIN_LABELS = ['车辆识别代号', '车辆识别代码', '车架号', 'VIN码', 'VIN', '识别代码']; const USAGE_NATURE_LABELS = ['使用性质', '车辆使用性质', '使用方式', '使用用途', '车辆用途']; const USAGE_NATURE_VALUES = [ '家庭自用汽车', '家庭自用', '非营业个人', '非营业', '非营运', '企业非营业', '党政机关事业团体非营业', '预约出租客运', '网约车', '出租', '租赁', '客运', '货运', '营业', '营运', ]; const INSURANCE_PERIOD_LABELS = [ '保险期间', '保险期限', '保险有效期', '保险起期', '保险止期', '起保日期', '终止日期', '起止日期', '承保期间', ]; const NAME_LABELS = [ { label: '被保险人', terms: ['被保险人名称', '被保险人姓名', '被保险人'] }, { label: '车主', terms: ['车主姓名', '车主名称', '车主'] }, { label: '投保人', terms: ['投保人名称', '投保人姓名', '投保人'] }, ]; const COVERAGE_TERMS = ['机动车损失保险', '车辆损失保险', '车损险']; function toHalfWidth(text) { return String(text || '') .normalize('NFKC') .replace(/\u3000/g, ' ') .replace(/[:﹕]/g, ':') .replace(/[,、;;]/g, ' ') .replace(/[((].*?[))]/g, match => match); } function normalizeToken(value) { return String(value || '') .normalize('NFKC') .replace(/[^A-Za-z0-9-]/g, '') .toUpperCase(); } function compactText(value) { return String(value || '').normalize('NFKC').replace(/\s+/g, '').toUpperCase(); } function normalizeName(value) { return String(value || '').normalize('NFKC').replace(/\s+/g, '').trim(); } function normalizeDate(year, month, day) { return [ String(year).padStart(4, '0'), String(month).padStart(2, '0'), String(day).padStart(2, '0'), ].join('-'); } function readInputText(input) { if (typeof input.rawText === 'string' && input.rawText.trim()) return input.rawText; if (Array.isArray(input.lines)) return input.lines.filter(Boolean).join('\n'); return ''; } function linesFromText(text) { return toHalfWidth(text) .split(/\r?\n+/) .map(line => line.trim()) .filter(Boolean); } function hasAnyLabel(line, labels) { return labels.some(label => line.includes(label)); } function extractCandidates(text) { const result = []; const re = /[A-Z0-9][A-Z0-9-]{7,49}/gi; let match; while ((match = re.exec(toHalfWidth(text).toUpperCase())) !== null) { result.push(normalizeToken(match[0])); } return [...new Set(result)]; } function isVin(value) { const token = normalizeToken(value).replace(/-/g, ''); return /^[A-HJ-NPR-Z0-9]{17}$/.test(token); } function isIdCard(value) { return /^\d{17}[\dX]$/i.test(String(value || '').trim()); } function isDateLike(value) { return /^\d{4}[-/.]\d{1,2}[-/.]\d{1,2}$/.test(String(value || '')); } function isValidPolicyNo(value, options = {}) { const token = normalizeToken(value); if (token.length < 8 || token.length > 50) return false; if (!/^[A-Z0-9-]+$/.test(token)) return false; const expectedVin = normalizeToken(options.expectedVin).replace(/-/g, ''); if (isVin(token.replace(/-/g, ''))) { if (!options.allowVinShape || token.replace(/-/g, '') === expectedVin) return false; } if (isIdCard(token) || isDateLike(token)) return false; const digits = (token.match(/\d/g) || []).length; const letters = (token.match(/[A-Z]/g) || []).length; if (digits < 4) return false; if (letters === 0 && token.length === 11) return false; return letters > 0 || token.length >= 12; } function scorePolicyNo(value) { const token = normalizeToken(value); let score = token.length; if (/[A-Z]/.test(token)) score += 8; if (/\d/.test(token)) score += 4; if (token.includes('-')) score -= 2; if (/^P[A-Z0-9]/.test(token)) score += 3; return score; } function bestPolicyCandidate(candidates, expectedPolicyNo, options = {}) { const expected = normalizeToken(expectedPolicyNo); if (expected) { const exact = candidates.map(normalizeToken).find(item => item === expected); if (exact) return exact; } const valid = candidates .map(normalizeToken) .filter(item => isValidPolicyNo(item, options)); if (!valid.length) return ''; return valid.sort((a, b) => scorePolicyNo(b) - scorePolicyNo(a))[0] || ''; } function extractPolicyNo(lines, fullText, expectedPolicyNo, expectedVin) { for (let i = 0; i < lines.length; i += 1) { if (!hasAnyLabel(lines[i], POLICY_LABELS)) continue; const windowText = [lines[i], lines[i + 1] || '', lines[i + 2] || ''].join(' '); const candidate = bestPolicyCandidate(extractCandidates(windowText), expectedPolicyNo, { allowVinShape: true, expectedVin, }); if (candidate) return candidate; } return bestPolicyCandidate(extractCandidates(fullText), expectedPolicyNo, { expectedVin }); } function extractVin(lines, fullText, expectedVin) { const expected = normalizeToken(expectedVin).replace(/-/g, ''); if (expected && isVin(expected) && compactText(fullText).includes(expected)) return expected; for (let i = 0; i < lines.length; i += 1) { if (!hasAnyLabel(lines[i], VIN_LABELS)) continue; const windowText = [lines[i], lines[i + 1] || ''].join(' '); const candidate = extractCandidates(windowText) .map(item => item.replace(/-/g, '')) .find(isVin); if (candidate) return candidate; } return extractCandidates(fullText) .map(item => item.replace(/-/g, '')) .find(isVin) || ''; } function cleanNameText(text) { return String(text || '') .replace(/^(名称|姓名|客户名称|客户姓名)[::]?/, '') .split(/(?:证件|身份证|统一社会信用|地址|电话|手机|车架|车辆|号牌|保单|保险|发动机|VIN|使用性质|车辆使用性质|使用方式|使用用途|保险期间|保险期限|保险有效期|起保|终止|承保)/)[0] .replace(/[0-9A-Za-z_*xX::/\\-]/g, ' ') .trim(); } function findNamesInText(text) { const cleaned = cleanNameText(text); const blacklist = new Set([ '被保险人', '投保人', '车主姓名', '车主名称', '车主', '姓名', '名称', '机动车', '商业险', '保险单', '保险人', '使用性质', '家庭自用汽车', '有限公司', ]); const names = []; const re = /[\u4e00-\u9fa5·]{2,12}/g; let match; while ((match = re.exec(cleaned)) !== null) { const name = normalizeName(match[0]); if (!blacklist.has(name) && !name.includes('保险') && !name.includes('地址')) { names.push(name); } } return names; } function addNameCandidate(candidates, label, name) { const normalized = normalizeName(name); if (!normalized) return; if (candidates.some(item => item.label === label && item.name === normalized)) return; if (candidates.some(item => item.name === normalized)) { const existing = candidates.find(item => item.name === normalized); if (existing && !existing.label.includes(label)) existing.label = `${existing.label}/${label}`; return; } candidates.push({ label, name: normalized }); } function extractNameCandidates(lines, fullText, expectedOwnerName) { const candidates = []; const compact = compactText(fullText); const expected = normalizeName(expectedOwnerName); if (expected && compact.includes(compactText(expected))) { addNameCandidate(candidates, '全文匹配', expected); } for (let i = 0; i < lines.length; i += 1) { for (const group of NAME_LABELS) { const term = group.terms.find(item => lines[i].includes(item)); if (!term) continue; const sameLineTail = lines[i].slice(lines[i].indexOf(term) + term.length).replace(/^[::\s]+/, ''); const windowTexts = [sameLineTail, lines[i + 1] || '']; for (const text of windowTexts) { for (const name of findNamesInText(text)) { addNameCandidate(candidates, group.label, name); } } } } return candidates.slice(0, 8); } function extractCertificate(fullText) { const normalized = toHalfWidth(fullText).replace(/\s+/g, ''); const full = normalized.match(/\d{17}[\dXx]/); if (full) return { certificateNo: full[0].toUpperCase(), certificateNoMasked: false }; const masked = normalized.match(/(?:\d{2,8})[*Xx]{4,14}(?:\d{2,6})|[*Xx]{6,16}(?:\d{2,6})/); if (masked) return { certificateNo: masked[0].toUpperCase(), certificateNoMasked: true }; return { certificateNo: '', certificateNoMasked: false }; } function extractPlateNo(lines, fullText) { const plateRe = /[\u4e00-\u9fa5][A-Z][A-Z0-9]{5,7}/g; for (const line of lines) { if (!/(车牌|号牌|牌照|牌号)/.test(line)) continue; const match = toHalfWidth(line).toUpperCase().match(plateRe); if (match) return match[0]; } const match = toHalfWidth(fullText).toUpperCase().match(plateRe); return match ? match[0] : ''; } function extractEngineNo(lines) { for (let i = 0; i < lines.length; i += 1) { if (!/(发动机号|发动机号码|发动机编号)/.test(lines[i])) continue; const windowText = [lines[i], lines[i + 1] || ''].join(' '); const candidates = extractCandidates(windowText).filter(item => !isVin(item) && !isValidPolicyNo(item)); const candidate = candidates.find(item => item.length >= 5 && item.length <= 24); if (candidate) return candidate; } return ''; } function extractDateCandidatesFromText(text) { const dates = []; const re = /(\d{4})[年\-/.](\d{1,2})[月\-/.](\d{1,2})日?/g; let match; while ((match = re.exec(toHalfWidth(text))) !== null) { const date = normalizeDate(match[1], match[2], match[3]); if (!dates.includes(date)) dates.push(date); } return dates; } function extractLabelTail(line, labels) { const matchedLabel = labels.find(label => line.includes(label)); if (!matchedLabel) return ''; return line.slice(line.indexOf(matchedLabel) + matchedLabel.length).replace(/^[::\s]+/, '').trim(); } function extractUsageNatureFromText(text) { const normalized = toHalfWidth(text).replace(/\s+/g, ''); for (const value of USAGE_NATURE_VALUES) { if (normalized.includes(value)) return value; } return ''; } function extractUsageNature(lines, fullText) { for (let i = 0; i < lines.length; i += 1) { if (!hasAnyLabel(lines[i], USAGE_NATURE_LABELS)) continue; const windowText = [extractLabelTail(lines[i], USAGE_NATURE_LABELS), lines[i + 1] || ''].join(' '); const value = extractUsageNatureFromText(windowText); if (value) return value; } return extractUsageNatureFromText(fullText); } function extractDates(lines, fullText) { for (let i = 0; i < lines.length; i += 1) { if (!hasAnyLabel(lines[i], INSURANCE_PERIOD_LABELS)) continue; const windowText = [extractLabelTail(lines[i], INSURANCE_PERIOD_LABELS), lines[i + 1] || ''].join(' '); const dates = extractDateCandidatesFromText(windowText); if (dates.length >= 2) return { startDate: dates[0], endDate: dates[1] }; if (dates.length === 1) { const nextDates = extractDateCandidatesFromText(lines[i + 1] || ''); if (nextDates.length) return { startDate: dates[0], endDate: nextDates[0] }; } } const dates = extractDateCandidatesFromText(fullText); return { startDate: dates[0] || '', endDate: dates[1] || '' }; } function extractCoverage(fullText) { const compact = String(fullText || '').replace(/\s+/g, ''); const evidence = COVERAGE_TERMS.filter(term => compact.includes(term)); return { hasVehicleDamageCoverage: evidence.length > 0, coverageEvidence: evidence, }; } function buildWarnings({ input, output }) { const warnings = []; if (!output.policyNo) warnings.push('未识别到商业险保单号'); if (!output.vin) warnings.push('未识别到车架号VIN'); if (!output.nameCandidates.length) warnings.push('未识别到被保险人/车主/投保人姓名'); if (!output.usageNature) warnings.push('未识别到使用性质'); if (!output.startDate || !output.endDate) warnings.push('未识别到保险期间'); if (!output.hasVehicleDamageCoverage) warnings.push('未识别到车损险险种证据'); const expectedPolicyNo = normalizeToken(input.expectedPolicyNo); if (expectedPolicyNo && output.policyNo && expectedPolicyNo !== output.policyNo) { warnings.push('OCR保单号与订单已填商业险保单号不一致'); } const expectedVin = normalizeToken(input.expectedVin).replace(/-/g, ''); if (expectedVin && output.vin && expectedVin !== output.vin) { warnings.push('OCR车架号VIN与订单VIN不一致'); } const expectedOwnerName = normalizeName(input.expectedOwnerName); if (expectedOwnerName && output.nameCandidates.length) { const matched = output.nameCandidates.some(item => normalizeName(item.name) === expectedOwnerName); if (!matched) warnings.push('OCR姓名候选与订单车主姓名不一致'); } return warnings; } function computeConfidence(output) { let score = 0.2; if (output.policyNo) score += 0.25; if (output.vin) score += 0.25; if (output.nameCandidates.length) score += 0.15; if (output.usageNature) score += 0.05; if (output.startDate && output.endDate) score += 0.05; if (output.hasVehicleDamageCoverage) score += 0.1; if (output.certificateNo) score += 0.03; if (output.plateNo || output.engineNo) score += 0.02; return Math.max(0, Math.min(0.99, Number(score.toFixed(2)))); } function parsePolicy(input) { const rawText = readInputText(input); if (!rawText.trim()) { return { success: false, policyNo: '', vin: '', nameCandidates: [], certificateNo: '', certificateNoMasked: false, plateNo: '', engineNo: '', usageNature: '', startDate: '', endDate: '', hasVehicleDamageCoverage: false, coverageEvidence: [], confidence: 0, warnings: [input.imageUrl ? '当前脚本不直接OCR图片,请先传入rawText' : '缺少rawText或lines'], rawText: '', }; } const fullText = toHalfWidth(rawText); const lines = linesFromText(fullText); const certificate = extractCertificate(fullText); const usageNature = extractUsageNature(lines, fullText); const dates = extractDates(lines, fullText); const coverage = extractCoverage(fullText); const output = { success: true, policyNo: extractPolicyNo(lines, fullText, input.expectedPolicyNo, input.expectedVin), vin: extractVin(lines, fullText, input.expectedVin), nameCandidates: extractNameCandidates(lines, fullText, input.expectedOwnerName), ...certificate, plateNo: extractPlateNo(lines, fullText), engineNo: extractEngineNo(lines), usageNature, ...dates, ...coverage, confidence: 0, warnings: [], rawText: fullText, }; output.warnings = buildWarnings({ input, output }); output.confidence = computeConfidence(output); return output; } async function main() { try { const stdin = fs.readFileSync(0, 'utf8'); const input = JSON.parse(stdin || '{}'); process.stdout.write(JSON.stringify(parsePolicy(input))); } catch (error) { process.stdout.write(JSON.stringify({ success: false, error: error.message })); process.exitCode = 0; } } if (require.main === module) { main(); } module.exports = { parsePolicy, extractPolicyNo, extractVin, extractNameCandidates, };