import type { FinancialStatementKind } from '@/lib/types'; import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine'; import type { TaxonomyHydrationInput } from '@/lib/server/taxonomy/types'; type ValidationCase = { name: string; expectedPack: string; input: TaxonomyHydrationInput; requiredSurfaceKeys: Partial>; requiredKpiKeys?: string[]; }; type ValidationFailure = { name: string; issues: string[]; }; const UNIVERSAL_INCOME_KEYS = [ 'revenue', 'gross_profit', 'operating_expenses', 'operating_income', 'income_tax_expense', 'net_income' ] as const; const EXPENSE_BREAKDOWN_KEYS = [ 'selling_general_and_administrative', 'research_and_development', 'other_operating_expense' ] as const; const CORPUS: ValidationCase[] = [ { name: 'core-msft-2026-01-28', expectedPack: 'core', input: { filingId: 1, ticker: 'MSFT', cik: '0000789019', accessionNumber: '0001193125-26-027207', filingDate: '2026-01-28', filingType: '10-Q', filingUrl: 'https://www.sec.gov/Archives/edgar/data/789019/000119312526027207/', primaryDocument: 'msft-20251231.htm' }, requiredSurfaceKeys: { income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS], balance: ['total_assets'] } }, { name: 'bank-jpm-2026-02-13', expectedPack: 'bank_lender', input: { filingId: 2, ticker: 'JPM', cik: '0000019617', accessionNumber: '0001628280-26-008131', filingDate: '2026-02-13', filingType: '10-K', filingUrl: 'https://www.sec.gov/Archives/edgar/data/19617/000162828026008131/', primaryDocument: 'jpm-20251231.htm' }, requiredSurfaceKeys: { income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'net_interest_income', 'noninterest_income'], balance: ['loans', 'deposits'] }, requiredKpiKeys: ['net_interest_margin'] }, { name: 'insurance-aig-2026-02-12', expectedPack: 'insurance', input: { filingId: 3, ticker: 'AIG', cik: '0000005272', accessionNumber: '0000005272-26-000023', filingDate: '2026-02-12', filingType: '10-K', filingUrl: 'https://www.sec.gov/Archives/edgar/data/5272/000000527226000023/', primaryDocument: 'aig-20251231.htm' }, requiredSurfaceKeys: { income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'premiums', 'claims_and_benefits'], balance: ['policy_liabilities'] }, requiredKpiKeys: ['combined_ratio'] }, { name: 'reit-o-2026-02-25', expectedPack: 'reit_real_estate', input: { filingId: 4, ticker: 'O', cik: '0000726728', accessionNumber: '0000726728-26-000011', filingDate: '2026-02-25', filingType: '10-K', filingUrl: 'https://www.sec.gov/Archives/edgar/data/726728/000072672826000011/', primaryDocument: 'o-20251231.htm' }, requiredSurfaceKeys: { income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'rental_revenue'], balance: ['investment_property', 'total_assets'] }, requiredKpiKeys: ['property_count'] }, { name: 'broker-blk-2026-02-25', expectedPack: 'broker_asset_manager', input: { filingId: 5, ticker: 'BLK', cik: '0002012383', accessionNumber: '0001193125-26-071966', filingDate: '2026-02-25', filingType: '10-K', filingUrl: 'https://www.sec.gov/Archives/edgar/data/2012383/000119312526071966/', primaryDocument: 'blk-20251231.htm' }, requiredSurfaceKeys: { income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'fee_revenue'], balance: ['total_assets', 'total_liabilities'] }, requiredKpiKeys: ['aum', 'fee_paying_aum'] } ]; const FALLBACK_WARNINGS = new Set([ 'surface_rows_deferred_to_typescript', 'ts_compact_surface_fallback_used' ]); function parseCaseFilter(argv: string[]) { for (const arg of argv) { if (arg === '--help' || arg === '-h') { console.log('Validate live SEC representative filings for each active taxonomy pack.'); console.log(''); console.log('Usage:'); console.log(' bun run scripts/validate-taxonomy-packs.ts'); console.log(' bun run scripts/validate-taxonomy-packs.ts --case=bank-jpm-2026-02-13'); process.exit(0); } if (arg.startsWith('--case=')) { const value = arg.slice('--case='.length).trim(); return value.length > 0 ? value : null; } } return null; } function keysForStatement( result: Awaited>, statement: FinancialStatementKind ) { return (result.surface_rows[statement] ?? []).map((row) => row.key); } async function validateCase(testCase: ValidationCase): Promise { const startedAt = Date.now(); const result = await hydrateFilingTaxonomySnapshot(testCase.input); const issues: string[] = []; const warnings = result.normalization_summary.warnings ?? []; const kpiKeys = result.kpi_rows.map((row) => row.key); if (result.parse_status !== 'ready') { issues.push(`parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ''}`); } if (result.fiscal_pack !== testCase.expectedPack) { issues.push(`fiscal_pack=${result.fiscal_pack ?? 'null'} expected=${testCase.expectedPack}`); } if ((Object.values(result.surface_rows) as Array>).every((rows) => rows.length === 0)) { issues.push('surface_rows are empty'); } const fallbackWarning = warnings.find((warning) => FALLBACK_WARNINGS.has(warning)); if (fallbackWarning) { issues.push(`unexpected fallback warning=${fallbackWarning}`); } for (const [statement, requiredKeys] of Object.entries(testCase.requiredSurfaceKeys) as Array< [FinancialStatementKind, string[]] >) { const actualKeys = new Set(keysForStatement(result, statement)); for (const requiredKey of requiredKeys) { if (!actualKeys.has(requiredKey)) { issues.push(`${statement} missing surface key=${requiredKey}`); } } } for (const requiredKpiKey of testCase.requiredKpiKeys ?? []) { if (!kpiKeys.includes(requiredKpiKey)) { issues.push(`missing kpi key=${requiredKpiKey}`); } } const durationMs = Date.now() - startedAt; const incomeKeys = keysForStatement(result, 'income'); const balanceKeys = keysForStatement(result, 'balance'); console.log( [ `[validate-taxonomy-packs] ${testCase.name}`, `status=${issues.length === 0 ? 'pass' : 'fail'}`, `parse=${result.parse_status}`, `pack=${result.fiscal_pack ?? 'null'}`, `income=${incomeKeys.join(',') || '-'}`, `balance=${balanceKeys.join(',') || '-'}`, `kpis=${kpiKeys.join(',') || '-'}`, `warnings=${warnings.join(',') || '-'}`, `durationMs=${durationMs}` ].join(' ') ); if (issues.length === 0) { return null; } return { name: testCase.name, issues }; } async function main() { process.env.XBRL_ENGINE_TIMEOUT_MS = process.env.XBRL_ENGINE_TIMEOUT_MS ?? '180000'; const requestedCase = parseCaseFilter(process.argv.slice(2)); const selectedCases = requestedCase ? CORPUS.filter((testCase) => testCase.name === requestedCase) : CORPUS; if (selectedCases.length === 0) { console.error(`[validate-taxonomy-packs] unknown case: ${requestedCase}`); process.exitCode = 1; return; } const failures: ValidationFailure[] = []; const startedAt = Date.now(); for (const testCase of selectedCases) { try { const failure = await validateCase(testCase); if (failure) { failures.push(failure); } } catch (error) { failures.push({ name: testCase.name, issues: [error instanceof Error ? error.message : String(error)] }); } await Bun.sleep(150); } console.log( `[validate-taxonomy-packs] completed cases=${selectedCases.length} failures=${failures.length} durationSec=${( (Date.now() - startedAt) / 1000 ).toFixed(1)}` ); if (failures.length === 0) { return; } for (const failure of failures) { console.error(`[validate-taxonomy-packs] ${failure.name}`); for (const issue of failure.issues) { console.error(` - ${issue}`); } } process.exitCode = 1; } void main();