import { chromium } from '@playwright/test'; import type { FinancialStatementKind } from '@/lib/types'; import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine'; import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types'; type ComparisonTarget = { statement: Extract; surfaceKey: string; fiscalAiLabels: string[]; allowNotMeaningful?: boolean; notMeaningfulWarnings?: string[]; }; type CompanyCase = { ticker: string; exchangeTicker: string; cik: string; form: '10-K'; comparisons: ComparisonTarget[]; }; type SecRecentFilings = { accessionNumber: string[]; primaryDocument: string[]; filingDate: string[]; form: string[]; }; type SecSubmission = { filings?: { recent?: SecRecentFilings; }; }; type FiscalAiTableRow = { label: string; normalizedLabel: string; valueText: string; value: number | null; }; type FiscalAiTable = { columnLabel: string; rows: FiscalAiTableRow[]; }; type ComparisonRow = { statement: Extract; surfaceKey: string; fiscalAiLabel: string | null; fiscalAiValueM: number | null; ourValueM: number | null; absDiffM: number | null; relDiff: number | null; status: 'pass' | 'fail' | 'missing_reference' | 'missing_ours' | 'not_meaningful'; }; type ResultPeriod = TaxonomyHydrationResult['periods'][number] & { period_start?: string | null; period_end?: string | null; }; const SEC_USER_AGENT = 'fiscal-clone/compare-fiscal-ai (contact: local-dev)'; const BROWSER_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36'; const VALUE_TOLERANCE_M = 1; const RELATIVE_TOLERANCE = 0.005; const CASES: CompanyCase[] = [ { ticker: 'MSFT', exchangeTicker: 'NasdaqGS-MSFT', cik: '0000789019', form: '10-K', comparisons: [ { statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Revenues'] }, { statement: 'income', surfaceKey: 'gross_profit', fiscalAiLabels: ['Gross Profit'], allowNotMeaningful: true, notMeaningfulWarnings: ['gross_profit_not_meaningful_broker_pack'] }, { statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Operating Expense'] }, { statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Profit', 'Operating Income'] }, { statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Provision for Income Taxes', 'Income Tax Expense', 'Income Taxes'] }, { statement: 'income', surfaceKey: 'net_income', fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income'] }, ] }, { ticker: 'JPM', exchangeTicker: 'NYSE-JPM', cik: '0000019617', form: '10-K', comparisons: [ { statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Net Revenues', 'Total Revenues'] }, { statement: 'income', surfaceKey: 'gross_profit', fiscalAiLabels: ['Gross Profit'], allowNotMeaningful: true, notMeaningfulWarnings: ['gross_profit_not_meaningful_bank_pack'] }, { statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Total Operating Expenses', 'Non-Interest Expense'] }, { statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Pre-Tax Income', 'Operating Income', 'Operating Profit'] }, { statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] }, { statement: 'income', surfaceKey: 'net_income', fiscalAiLabels: ['Net Income to Common', 'Net Income Attributable to Common Shareholders', 'Net Income'] }, ] }, { ticker: 'AIG', exchangeTicker: 'NYSE-AIG', cik: '0000005272', form: '10-K', comparisons: [ { statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Revenues', 'Revenue'] }, { statement: 'income', surfaceKey: 'gross_profit', fiscalAiLabels: ['Gross Profit'], allowNotMeaningful: true, notMeaningfulWarnings: ['gross_profit_not_meaningful_insurance_pack'] }, { statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Insurance Benefits & Claims'] }, { statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Income', 'Operating Profit'] }, { statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] }, { statement: 'income', surfaceKey: 'net_income', fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income'] }, ] }, { ticker: 'O', exchangeTicker: 'NYSE-O', cik: '0000726728', form: '10-K', comparisons: [ { statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Property Revenue', 'Rental Revenue', 'Total Revenues'] }, { statement: 'income', surfaceKey: 'gross_profit', fiscalAiLabels: ['Gross Profit', 'Property Operating Profit'] }, { statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'General and Administrative'] }, { statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Profit', 'Operating Income'] }, { statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] }, { statement: 'income', surfaceKey: 'net_income', fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income'] } ] }, { ticker: 'BLK', exchangeTicker: 'NYSE-BLK', cik: '0002012383', form: '10-K', comparisons: [ { statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Revenues', 'Investment advisory and administration fees', 'Advisory and other revenue'] }, { statement: 'income', surfaceKey: 'gross_profit', fiscalAiLabels: ['Gross Profit'], allowNotMeaningful: true, notMeaningfulWarnings: ['gross_profit_not_meaningful_broker_pack'] }, { statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Operating Expense'] }, { statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Income', 'Operating Profit'] }, { statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] }, { statement: 'income', surfaceKey: 'net_income', fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Net Income Applicable to BlackRock, Inc.', 'Net Income'] } ] } ]; function parseTickerFilter(argv: string[]) { for (const arg of argv) { if (arg === '--help' || arg === '-h') { console.log('Compare live Fiscal.ai standardized statement rows against local sidecar output.'); console.log(''); console.log('Usage:'); console.log(' bun run scripts/compare-fiscal-ai-statements.ts'); console.log(' bun run scripts/compare-fiscal-ai-statements.ts --ticker=MSFT'); process.exit(0); } if (arg.startsWith('--ticker=')) { const value = arg.slice('--ticker='.length).trim().toUpperCase(); return value.length > 0 ? value : null; } } return null; } function normalizeLabel(value: string) { return value .toLowerCase() .replace(/&/g, ' and ') .replace(/[^a-z0-9]+/g, ' ') .trim() .replace(/\s+/g, ' '); } function parseDisplayedNumber(value: string) { const trimmed = value.trim(); if (!trimmed || /^[-–—]+$/.test(trimmed) || /pricing/i.test(trimmed)) { return null; } const negative = trimmed.startsWith('(') && trimmed.endsWith(')'); const normalized = trimmed .replace(/,/g, '') .replace(/[%$]/g, '') .replace(/[()]/g, '') .trim(); if (!normalized) { return null; } const parsed = Number.parseFloat(normalized); if (!Number.isFinite(parsed)) { return null; } return negative ? -Math.abs(parsed) : parsed; } function roundMillions(value: number | null) { if (value === null || !Number.isFinite(value)) { return null; } return Math.round(value / 1_000_000); } function absoluteDiff(left: number | null, right: number | null) { if (left === null || right === null) { return null; } return Math.abs(left - right); } function relativeDiff(left: number | null, right: number | null) { if (left === null || right === null) { return null; } const baseline = Math.max(Math.abs(right), 1); return Math.abs(left - right) / baseline; } function periodStart(period: ResultPeriod) { return period.periodStart ?? period.period_start ?? null; } function periodEnd(period: ResultPeriod) { return period.periodEnd ?? period.period_end ?? null; } function chooseDurationPeriodId(result: TaxonomyHydrationResult) { const annualPeriods = result.periods .filter((period): period is ResultPeriod => Boolean(periodStart(period as ResultPeriod) && periodEnd(period as ResultPeriod))) .map((period) => { const durationDays = Math.round( (Date.parse(periodEnd(period) as string) - Date.parse(periodStart(period) as string)) / (1000 * 60 * 60 * 24) ); return { period, durationDays }; }) .filter((entry) => entry.durationDays >= 300) .sort((left, right) => { return Date.parse(periodEnd(right.period) as string) - Date.parse(periodEnd(left.period) as string); }); return annualPeriods[0]?.period.id ?? null; } function chooseInstantPeriodId(result: TaxonomyHydrationResult) { const instantPeriods = result.periods .filter((period): period is ResultPeriod => !periodStart(period as ResultPeriod) && Boolean(periodEnd(period as ResultPeriod))) .sort((left, right) => Date.parse(periodEnd(right) as string) - Date.parse(periodEnd(left) as string)); return instantPeriods[0]?.id ?? null; } function findSurfaceValue( result: TaxonomyHydrationResult, statement: Extract, surfaceKey: string ) { const rows = result.surface_rows[statement] ?? []; const row = rows.find((entry) => entry.key === surfaceKey) ?? null; if (!row) { return { row: null, value: null }; } const periodId = statement === 'balance' ? chooseInstantPeriodId(result) : chooseDurationPeriodId(result); if (periodId) { const directValue = row.values[periodId]; if (directValue !== null && directValue !== undefined) { return { row, value: directValue }; } } const periodById = new Map( result.periods.map((period) => [period.id, period as ResultPeriod]) ); const fallback = Object.entries(row.values) .filter((entry): entry is [string, number] => entry[1] !== null) .sort((left, right) => { const leftPeriod = periodById.get(left[0]); const rightPeriod = periodById.get(right[0]); const leftDate = leftPeriod ? Date.parse(periodEnd(leftPeriod) ?? '') : Number.NaN; const rightDate = rightPeriod ? Date.parse(periodEnd(rightPeriod) ?? '') : Number.NaN; if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) { return rightDate - leftDate; } return right[0].localeCompare(left[0]); })[0]; return { row, value: fallback?.[1] ?? null }; } function rowResolutionMethod(row: Record | null) { if (!row) { return null; } return (row.resolutionMethod ?? row.resolution_method ?? null) as string | null; } function rowWarningCodes(row: Record | null) { if (!row) { return [] as string[]; } const value = row.warningCodes ?? row.warning_codes ?? []; return Array.isArray(value) ? value.filter((entry): entry is string => typeof entry === 'string') : []; } function buildSecFilingUrl(cik: string, accessionNumber: string) { return `https://www.sec.gov/Archives/edgar/data/${Number.parseInt(cik, 10)}/${accessionNumber.replace(/-/g, '')}/`; } async function fetchLatestAnnualFiling(company: CompanyCase): Promise { const cik = company.cik.padStart(10, '0'); const response = await fetch(`https://data.sec.gov/submissions/CIK${cik}.json`, { headers: { 'user-agent': SEC_USER_AGENT, accept: 'application/json' } }); if (!response.ok) { throw new Error(`SEC submissions fetch failed for ${company.ticker}: ${response.status}`); } const payload = await response.json() as SecSubmission; const recent = payload.filings?.recent; if (!recent) { throw new Error(`SEC submissions payload missing recent filings for ${company.ticker}`); } for (let index = 0; index < recent.form.length; index += 1) { if (recent.form[index] !== company.form) { continue; } const accessionNumber = recent.accessionNumber[index]; const filingDate = recent.filingDate[index]; const primaryDocument = recent.primaryDocument[index]; if (!accessionNumber || !filingDate || !primaryDocument) { continue; } return { filingId: index + 1, ticker: company.ticker, cik: company.cik, accessionNumber, filingDate, filingType: company.form, filingUrl: buildSecFilingUrl(company.cik, accessionNumber), primaryDocument }; } throw new Error(`No ${company.form} found in SEC recent filings for ${company.ticker}`); } async function scrapeFiscalAiTable( page: import('@playwright/test').Page, exchangeTicker: string, statement: 'income' | 'balance' ): Promise { const pagePath = statement === 'income' ? 'income-statement' : 'balance-sheet'; const url = `https://fiscal.ai/company/${exchangeTicker}/financials/${pagePath}/annual/?templateType=standardized`; await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120_000 }); await page.waitForSelector('table', { timeout: 120_000 }); await page.waitForTimeout(2_500); return await page.evaluate(() => { function normalizeLabel(value: string) { return value .toLowerCase() .replace(/&/g, ' and ') .replace(/[^a-z0-9]+/g, ' ') .trim() .replace(/\s+/g, ' '); } function parseDisplayedNumber(value: string) { const trimmed = value.trim(); if (!trimmed || /^[-–—]+$/.test(trimmed) || /pricing/i.test(trimmed)) { return null; } const negative = trimmed.startsWith('(') && trimmed.endsWith(')'); const normalized = trimmed .replace(/,/g, '') .replace(/[%$]/g, '') .replace(/[()]/g, '') .trim(); if (!normalized) { return null; } const parsed = Number.parseFloat(normalized); return Number.isFinite(parsed) ? (negative ? -Math.abs(parsed) : parsed) : null; } const table = document.querySelector('table'); if (!table) { throw new Error('Fiscal.ai table not found'); } const headerCells = Array.from(table.querySelectorAll('tr:first-child th, tr:first-child td')) .map((cell) => cell.textContent?.trim() ?? '') .filter((value) => value.length > 0); const annualColumnIndex = headerCells.findIndex((value, index) => index > 0 && value !== 'LTM'); if (annualColumnIndex < 0) { throw new Error(`Could not locate latest annual column in headers: ${headerCells.join(' | ')}`); } const rows = Array.from(table.querySelectorAll('tr')) .slice(1) .map((row) => { const cells = Array.from(row.querySelectorAll('td')); if (cells.length <= annualColumnIndex) { return null; } const label = cells[0]?.textContent?.trim() ?? ''; const valueText = cells[annualColumnIndex]?.textContent?.trim() ?? ''; if (!label) { return null; } return { label, normalizedLabel: normalizeLabel(label), valueText, value: parseDisplayedNumber(valueText) }; }) .filter((entry): entry is FiscalAiTableRow => entry !== null); return { columnLabel: headerCells[annualColumnIndex] ?? 'unknown', rows }; }); } function findFiscalAiRow(rows: FiscalAiTableRow[], candidates: string[]) { const normalizedCandidates = candidates.map(normalizeLabel); const benignExtraTokens = new Set(['total', 'net']); for (const candidate of normalizedCandidates) { const exactMatch = rows.find((row) => row.normalizedLabel === candidate); if (exactMatch) { return exactMatch; } } for (const candidate of normalizedCandidates) { const candidateTokens = candidate.split(' ').filter((token) => token.length > 0); const relaxedMatch = rows.find((row) => { const rowTokens = row.normalizedLabel.split(' ').filter((token) => token.length > 0); const sharedPrefix = row.normalizedLabel.startsWith(candidate) || candidate.startsWith(row.normalizedLabel); if (!sharedPrefix) { return false; } const longer = rowTokens.length >= candidateTokens.length ? rowTokens : candidateTokens; const shorter = rowTokens.length >= candidateTokens.length ? candidateTokens : rowTokens; const extraTokens = longer.filter((token) => !shorter.includes(token)); return extraTokens.length > 0 && extraTokens.every((token) => benignExtraTokens.has(token)); }); if (relaxedMatch) { return relaxedMatch; } } return null; } function compareRow( target: ComparisonTarget, result: TaxonomyHydrationResult, fiscalAiTable: FiscalAiTable ): ComparisonRow { const fiscalAiRow = findFiscalAiRow(fiscalAiTable.rows, target.fiscalAiLabels); const fiscalAiValueM = fiscalAiRow?.value ?? null; const ourSurface = findSurfaceValue(result, target.statement, target.surfaceKey); const ourValueM = roundMillions(ourSurface.value); const absDiffM = absoluteDiff(ourValueM, fiscalAiValueM); const relDiffValue = relativeDiff(ourValueM, fiscalAiValueM); const resolutionMethod = rowResolutionMethod(ourSurface.row as Record | null); const warningCodes = rowWarningCodes(ourSurface.row as Record | null); let status: ComparisonRow['status']; if ( ourSurface.row && ourValueM === null && target.allowNotMeaningful && resolutionMethod === 'not_meaningful' && (target.notMeaningfulWarnings?.length ?? 0) > 0 && target.notMeaningfulWarnings!.some((warning) => warningCodes.includes(warning)) ) { status = 'not_meaningful'; } else if (!fiscalAiRow) { status = 'missing_reference'; } else if (ourValueM === null) { status = 'missing_ours'; } else if ( absDiffM !== null && relDiffValue !== null && (absDiffM <= VALUE_TOLERANCE_M || relDiffValue <= RELATIVE_TOLERANCE) ) { status = 'pass'; } else { status = 'fail'; } return { statement: target.statement, surfaceKey: target.surfaceKey, fiscalAiLabel: fiscalAiRow?.label ?? null, fiscalAiValueM, ourValueM, absDiffM, relDiff: relDiffValue, status }; } async function compareCase(page: import('@playwright/test').Page, company: CompanyCase) { const filing = await fetchLatestAnnualFiling(company); const result = await hydrateFilingTaxonomySnapshot(filing); if (result.parse_status !== 'ready') { throw new Error(`${company.ticker} parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ''}`); } const incomeTable = await scrapeFiscalAiTable(page, company.exchangeTicker, 'income'); const balanceTable = await scrapeFiscalAiTable(page, company.exchangeTicker, 'balance'); const rows = company.comparisons.map((target) => { const table = target.statement === 'income' ? incomeTable : balanceTable; return compareRow(target, result, table); }); const failures = rows.filter((row) => row.status === 'fail' || row.status === 'missing_ours'); console.log( `[compare-fiscal-ai] ${company.ticker} filing=${filing.accessionNumber} fiscal_pack=${result.fiscal_pack ?? 'null'} income_column="${incomeTable.columnLabel}" balance_column="${balanceTable.columnLabel}" pass=${rows.length - failures.length}/${rows.length}` ); for (const row of rows) { console.log( [ ' ', row.status.toUpperCase(), `${row.statement}.${row.surfaceKey}`, `fiscal_label=${row.fiscalAiLabel ?? 'null'}`, `ours_m=${row.ourValueM ?? 'null'}`, `fiscal_m=${row.fiscalAiValueM ?? 'null'}`, `abs_diff_m=${row.absDiffM ?? 'null'}`, `rel_diff=${row.relDiff === null ? 'null' : row.relDiff.toFixed(4)}` ].join(' ') ); } return { ticker: company.ticker, filing, fiscalPack: result.fiscal_pack, rows, failures }; } async function main() { process.env.XBRL_ENGINE_TIMEOUT_MS = process.env.XBRL_ENGINE_TIMEOUT_MS ?? '180000'; const tickerFilter = parseTickerFilter(process.argv.slice(2)); const selectedCases = tickerFilter ? CASES.filter((entry) => entry.ticker === tickerFilter) : CASES; if (selectedCases.length === 0) { console.error(`[compare-fiscal-ai] unknown ticker: ${tickerFilter}`); process.exitCode = 1; return; } const browser = await chromium.launch({ headless: false }); const page = await browser.newPage({ userAgent: BROWSER_USER_AGENT }); const failures: Array<{ ticker: string; row: ComparisonRow }> = []; try { for (const company of selectedCases) { const result = await compareCase(page, company); for (const failure of result.failures) { failures.push({ ticker: company.ticker, row: failure }); } await Bun.sleep(150); } } finally { await browser.close(); } console.log(`[compare-fiscal-ai] completed cases=${selectedCases.length} failures=${failures.length}`); if (failures.length === 0) { return; } for (const failure of failures) { console.error( `[compare-fiscal-ai] ${failure.ticker} ${failure.row.statement}.${failure.row.surfaceKey} status=${failure.row.status} ours_m=${failure.row.ourValueM ?? 'null'} fiscal_m=${failure.row.fiscalAiValueM ?? 'null'} fiscal_label=${failure.row.fiscalAiLabel ?? 'null'}` ); } process.exitCode = 1; } void main().catch((error) => { const message = error instanceof Error ? error.message : String(error); console.error(`[compare-fiscal-ai] fatal: ${message}`); process.exitCode = 1; });