868 lines
30 KiB
TypeScript
868 lines
30 KiB
TypeScript
import { chromium } from '@playwright/test';
|
|
|
|
import type { FinancialStatementKind } from '@/lib/types';
|
|
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
|
|
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
|
|
|
|
type ComparisonTarget = {
|
|
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>;
|
|
surfaceKey: string;
|
|
fiscalAiLabels: string[];
|
|
allowNotMeaningful?: boolean;
|
|
notMeaningfulWarnings?: string[];
|
|
};
|
|
|
|
type CompanyCase = {
|
|
ticker: string;
|
|
exchangeTicker: string;
|
|
cik: string;
|
|
form: '10-K';
|
|
comparisons: ComparisonTarget[];
|
|
};
|
|
|
|
type SecRecentFilings = {
|
|
accessionNumber: string[];
|
|
primaryDocument: string[];
|
|
filingDate: string[];
|
|
form: string[];
|
|
};
|
|
|
|
type SecSubmission = {
|
|
filings?: {
|
|
recent?: SecRecentFilings;
|
|
};
|
|
};
|
|
|
|
type FiscalAiTableRow = {
|
|
label: string;
|
|
normalizedLabel: string;
|
|
valueText: string;
|
|
value: number | null;
|
|
};
|
|
|
|
type FiscalAiTable = {
|
|
columnLabel: string;
|
|
rows: FiscalAiTableRow[];
|
|
};
|
|
|
|
type ComparisonRow = {
|
|
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>;
|
|
surfaceKey: string;
|
|
fiscalAiLabel: string | null;
|
|
fiscalAiValueM: number | null;
|
|
ourValueM: number | null;
|
|
absDiffM: number | null;
|
|
relDiff: number | null;
|
|
status: 'pass' | 'fail' | 'missing_reference' | 'missing_ours' | 'not_meaningful';
|
|
};
|
|
|
|
type ResultPeriod = TaxonomyHydrationResult['periods'][number] & {
|
|
period_start?: string | null;
|
|
period_end?: string | null;
|
|
};
|
|
|
|
const SEC_USER_AGENT = 'fiscal-clone/compare-fiscal-ai (contact: local-dev)';
|
|
const BROWSER_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36';
|
|
const VALUE_TOLERANCE_M = 1;
|
|
const RELATIVE_TOLERANCE = 0.005;
|
|
|
|
const CASES: CompanyCase[] = [
|
|
{
|
|
ticker: 'MSFT',
|
|
exchangeTicker: 'NasdaqGS-MSFT',
|
|
cik: '0000789019',
|
|
form: '10-K',
|
|
comparisons: [
|
|
{ statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Revenues'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'gross_profit',
|
|
fiscalAiLabels: ['Gross Profit'],
|
|
allowNotMeaningful: true,
|
|
notMeaningfulWarnings: ['gross_profit_not_meaningful_broker_pack']
|
|
},
|
|
{ statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Operating Expense'] },
|
|
{ statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Profit', 'Operating Income'] },
|
|
{ statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Provision for Income Taxes', 'Income Tax Expense', 'Income Taxes'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'net_income',
|
|
fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income']
|
|
},
|
|
{ statement: 'balance', surfaceKey: 'current_assets', fiscalAiLabels: ['Current Assets', 'Total Current Assets'] },
|
|
{ statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Operating Cash Flow', 'Net Cash from Operations', 'Net Cash Provided by Operating'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'capital_expenditures', fiscalAiLabels: ['Capital Expenditures', 'Capital Expenditure'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'free_cash_flow', fiscalAiLabels: ['Free Cash Flow', 'Levered Free Cash Flow'] },
|
|
]
|
|
},
|
|
{
|
|
ticker: 'JPM',
|
|
exchangeTicker: 'NYSE-JPM',
|
|
cik: '0000019617',
|
|
form: '10-K',
|
|
comparisons: [
|
|
{ statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Net Revenues', 'Total Revenues'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'gross_profit',
|
|
fiscalAiLabels: ['Gross Profit'],
|
|
allowNotMeaningful: true,
|
|
notMeaningfulWarnings: ['gross_profit_not_meaningful_bank_pack']
|
|
},
|
|
{ statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Total Operating Expenses', 'Non-Interest Expense'] },
|
|
{ statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Pre-Tax Income', 'Operating Income', 'Operating Profit'] },
|
|
{ statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'net_income',
|
|
fiscalAiLabels: ['Net Income to Common', 'Net Income Attributable to Common Shareholders', 'Net Income']
|
|
},
|
|
{ statement: 'balance', surfaceKey: 'loans', fiscalAiLabels: ['Net Loans', 'Loans', 'Loans Receivable'] },
|
|
{ statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Net Cash from Operating Activities', 'Net Cash Provided by Operating'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'investing_cash_flow', fiscalAiLabels: ['Cash from Investing Activities', 'Net Cash from Investing Activities', 'Net Cash Provided by Investing'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'financing_cash_flow', fiscalAiLabels: ['Cash from Financing Activities', 'Net Cash from Financing Activities', 'Net Cash Provided by Financing'] },
|
|
]
|
|
},
|
|
{
|
|
ticker: 'AIG',
|
|
exchangeTicker: 'NYSE-AIG',
|
|
cik: '0000005272',
|
|
form: '10-K',
|
|
comparisons: [
|
|
{ statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Revenues', 'Revenue'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'gross_profit',
|
|
fiscalAiLabels: ['Gross Profit'],
|
|
allowNotMeaningful: true,
|
|
notMeaningfulWarnings: ['gross_profit_not_meaningful_insurance_pack']
|
|
},
|
|
{ statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Insurance Benefits & Claims'] },
|
|
{ statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Income', 'Operating Profit'] },
|
|
{ statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'net_income',
|
|
fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income']
|
|
},
|
|
{
|
|
statement: 'balance',
|
|
surfaceKey: 'deferred_acquisition_costs',
|
|
fiscalAiLabels: [
|
|
'Deferred Acquisition Costs',
|
|
'Deferred Policy Acquisition Costs',
|
|
'Deferred Policy Acquisition Costs and Value of Business Acquired'
|
|
]
|
|
},
|
|
{ statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Operating Cash Flow', 'Net Cash from Operations', 'Net Cash Provided by Operating'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'free_cash_flow', fiscalAiLabels: ['Free Cash Flow', 'Levered Free Cash Flow'] },
|
|
]
|
|
},
|
|
{
|
|
ticker: 'O',
|
|
exchangeTicker: 'NYSE-O',
|
|
cik: '0000726728',
|
|
form: '10-K',
|
|
comparisons: [
|
|
{ statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Property Revenue', 'Rental Revenue', 'Total Revenues'] },
|
|
{ statement: 'income', surfaceKey: 'gross_profit', fiscalAiLabels: ['Gross Profit', 'Property Operating Profit'] },
|
|
{ statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'General and Administrative'] },
|
|
{ statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Profit', 'Operating Income'] },
|
|
{ statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'net_income',
|
|
fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income']
|
|
},
|
|
{
|
|
statement: 'balance',
|
|
surfaceKey: 'investment_property',
|
|
fiscalAiLabels: [
|
|
'Investment Property',
|
|
'Investment Properties',
|
|
'Real Estate Investment Property, Net',
|
|
'Real Estate Investment Property, at Cost',
|
|
'Total real estate held for investment, at cost'
|
|
]
|
|
},
|
|
{ statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Operating Cash Flow', 'Net Cash from Operations', 'Net Cash Provided by Operating'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'capital_expenditures', fiscalAiLabels: ['Capital Expenditures', 'Capital Expenditure'] },
|
|
{ statement: 'cash_flow', surfaceKey: 'free_cash_flow', fiscalAiLabels: ['Free Cash Flow', 'Levered Free Cash Flow'] }
|
|
]
|
|
},
|
|
{
|
|
ticker: 'BLK',
|
|
exchangeTicker: 'NYSE-BLK',
|
|
cik: '0002012383',
|
|
form: '10-K',
|
|
comparisons: [
|
|
{ statement: 'income', surfaceKey: 'revenue', fiscalAiLabels: ['Total Revenues', 'Investment advisory and administration fees', 'Advisory and other revenue'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'gross_profit',
|
|
fiscalAiLabels: ['Gross Profit'],
|
|
allowNotMeaningful: true,
|
|
notMeaningfulWarnings: ['gross_profit_not_meaningful_broker_pack']
|
|
},
|
|
{ statement: 'income', surfaceKey: 'operating_expenses', fiscalAiLabels: ['Operating Expenses', 'Operating Expense'] },
|
|
{ statement: 'income', surfaceKey: 'operating_income', fiscalAiLabels: ['Operating Income', 'Operating Profit'] },
|
|
{ statement: 'income', surfaceKey: 'income_tax_expense', fiscalAiLabels: ['Income Taxes', 'Income Tax Expense'] },
|
|
{
|
|
statement: 'income',
|
|
surfaceKey: 'net_income',
|
|
fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Net Income Applicable to BlackRock, Inc.', 'Net Income']
|
|
}
|
|
]
|
|
}
|
|
];
|
|
|
|
function parseTickerFilter(argv: string[]) {
|
|
let ticker: string | null = null;
|
|
let statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'> | null = null;
|
|
|
|
for (const arg of argv) {
|
|
if (arg === '--help' || arg === '-h') {
|
|
console.log('Compare live Fiscal.ai standardized statement rows against local sidecar output.');
|
|
console.log('');
|
|
console.log('Usage:');
|
|
console.log(' bun run scripts/compare-fiscal-ai-statements.ts');
|
|
console.log(' bun run scripts/compare-fiscal-ai-statements.ts --ticker=MSFT');
|
|
console.log(' bun run scripts/compare-fiscal-ai-statements.ts --statement=balance');
|
|
console.log(' bun run scripts/compare-fiscal-ai-statements.ts --statement=cash_flow');
|
|
process.exit(0);
|
|
}
|
|
|
|
if (arg.startsWith('--ticker=')) {
|
|
const value = arg.slice('--ticker='.length).trim().toUpperCase();
|
|
ticker = value.length > 0 ? value : null;
|
|
continue;
|
|
}
|
|
|
|
if (arg.startsWith('--statement=')) {
|
|
const value = arg.slice('--statement='.length).trim().toLowerCase().replace(/-/g, '_');
|
|
if (value === 'income' || value === 'balance' || value === 'cash_flow') {
|
|
statement = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
return { ticker, statement };
|
|
}
|
|
|
|
function normalizeLabel(value: string) {
|
|
return value
|
|
.toLowerCase()
|
|
.replace(/&/g, ' and ')
|
|
.replace(/[^a-z0-9]+/g, ' ')
|
|
.trim()
|
|
.replace(/\s+/g, ' ');
|
|
}
|
|
|
|
function parseDisplayedNumber(value: string) {
|
|
const trimmed = value.trim();
|
|
if (!trimmed || /^[-–—]+$/.test(trimmed) || /pricing/i.test(trimmed)) {
|
|
return null;
|
|
}
|
|
|
|
const negative = trimmed.startsWith('(') && trimmed.endsWith(')');
|
|
const normalized = trimmed
|
|
.replace(/,/g, '')
|
|
.replace(/[%$]/g, '')
|
|
.replace(/[()]/g, '')
|
|
.trim();
|
|
|
|
if (!normalized) {
|
|
return null;
|
|
}
|
|
|
|
const parsed = Number.parseFloat(normalized);
|
|
if (!Number.isFinite(parsed)) {
|
|
return null;
|
|
}
|
|
|
|
return negative ? -Math.abs(parsed) : parsed;
|
|
}
|
|
|
|
function roundMillions(value: number | null) {
|
|
if (value === null || !Number.isFinite(value)) {
|
|
return null;
|
|
}
|
|
|
|
return Math.round(value / 1_000_000);
|
|
}
|
|
|
|
function absoluteDiff(left: number | null, right: number | null) {
|
|
if (left === null || right === null) {
|
|
return null;
|
|
}
|
|
|
|
return Math.abs(left - right);
|
|
}
|
|
|
|
function relativeDiff(left: number | null, right: number | null) {
|
|
if (left === null || right === null) {
|
|
return null;
|
|
}
|
|
|
|
const baseline = Math.max(Math.abs(right), 1);
|
|
return Math.abs(left - right) / baseline;
|
|
}
|
|
|
|
function periodStart(period: ResultPeriod): string | null {
|
|
const start = ('periodStart' in period ? period.periodStart : undefined) ?? period.period_start ?? null;
|
|
return typeof start === 'string' ? start : null;
|
|
}
|
|
|
|
function periodEnd(period: ResultPeriod): string | null {
|
|
const end = ('periodEnd' in period ? period.periodEnd : undefined) ?? period.period_end ?? null;
|
|
return typeof end === 'string' ? end : null;
|
|
}
|
|
|
|
function chooseDurationPeriodId(result: TaxonomyHydrationResult) {
|
|
const annualPeriods = result.periods
|
|
.filter((period): period is ResultPeriod => Boolean(periodStart(period as ResultPeriod) && periodEnd(period as ResultPeriod)))
|
|
.map((period) => {
|
|
const durationDays = Math.round(
|
|
(Date.parse(periodEnd(period) as string) - Date.parse(periodStart(period) as string)) / (1000 * 60 * 60 * 24)
|
|
);
|
|
return { period, durationDays };
|
|
})
|
|
.filter((entry) => entry.durationDays >= 300)
|
|
.sort((left, right) => {
|
|
return Date.parse(periodEnd(right.period) as string) - Date.parse(periodEnd(left.period) as string);
|
|
});
|
|
|
|
return annualPeriods[0]?.period.id ?? null;
|
|
}
|
|
|
|
function chooseInstantPeriodId(result: TaxonomyHydrationResult) {
|
|
const instantPeriods = result.periods
|
|
.filter((period): period is ResultPeriod => !periodStart(period as ResultPeriod) && Boolean(periodEnd(period as ResultPeriod)))
|
|
.sort((left, right) => Date.parse(periodEnd(right) as string) - Date.parse(periodEnd(left) as string));
|
|
|
|
return instantPeriods[0]?.id ?? null;
|
|
}
|
|
|
|
function parseColumnLabelPeriodEnd(columnLabel: string) {
|
|
const match = columnLabel.match(/^([A-Za-z]{3})\s+'?(\d{2,4})$/);
|
|
if (!match) {
|
|
return null;
|
|
}
|
|
|
|
const [, monthToken, yearToken] = match;
|
|
const monthMap: Record<string, number> = {
|
|
jan: 0,
|
|
feb: 1,
|
|
mar: 2,
|
|
apr: 3,
|
|
may: 4,
|
|
jun: 5,
|
|
jul: 6,
|
|
aug: 7,
|
|
sep: 8,
|
|
oct: 9,
|
|
nov: 10,
|
|
dec: 11
|
|
};
|
|
const month = monthMap[monthToken.toLowerCase()];
|
|
if (month === undefined) {
|
|
return null;
|
|
}
|
|
|
|
const parsedYear = Number.parseInt(yearToken, 10);
|
|
if (!Number.isFinite(parsedYear)) {
|
|
return null;
|
|
}
|
|
|
|
const year = yearToken.length === 2 ? 2000 + parsedYear : parsedYear;
|
|
return { month, year };
|
|
}
|
|
|
|
function choosePeriodIdForColumnLabel(
|
|
result: TaxonomyHydrationResult,
|
|
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>,
|
|
columnLabel: string
|
|
) {
|
|
const parsed = parseColumnLabelPeriodEnd(columnLabel);
|
|
if (!parsed) {
|
|
return null;
|
|
}
|
|
|
|
const matchingPeriods = result.periods
|
|
.filter((period): period is ResultPeriod => {
|
|
const end = periodEnd(period as ResultPeriod);
|
|
if (!end) {
|
|
return false;
|
|
}
|
|
const endDate = new Date(end);
|
|
if (Number.isNaN(endDate.getTime())) {
|
|
return false;
|
|
}
|
|
|
|
const periodMatchesStatement = statement === 'balance'
|
|
? !periodStart(period as ResultPeriod)
|
|
: Boolean(periodStart(period as ResultPeriod));
|
|
if (!periodMatchesStatement) {
|
|
return false;
|
|
}
|
|
|
|
return endDate.getUTCFullYear() === parsed.year && endDate.getUTCMonth() === parsed.month;
|
|
})
|
|
.sort((left, right) => {
|
|
if (statement !== 'balance') {
|
|
const leftStart = periodStart(left);
|
|
const rightStart = periodStart(right);
|
|
const leftDuration = leftStart
|
|
? Math.round((Date.parse(periodEnd(left) as string) - Date.parse(leftStart)) / (1000 * 60 * 60 * 24))
|
|
: -1;
|
|
const rightDuration = rightStart
|
|
? Math.round((Date.parse(periodEnd(right) as string) - Date.parse(rightStart)) / (1000 * 60 * 60 * 24))
|
|
: -1;
|
|
|
|
if (leftDuration !== rightDuration) {
|
|
return rightDuration - leftDuration;
|
|
}
|
|
}
|
|
|
|
return Date.parse(periodEnd(right) as string) - Date.parse(periodEnd(left) as string);
|
|
});
|
|
|
|
return matchingPeriods[0]?.id ?? null;
|
|
}
|
|
|
|
function findSurfaceValue(
|
|
result: TaxonomyHydrationResult,
|
|
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>,
|
|
surfaceKey: string,
|
|
referenceColumnLabel?: string
|
|
) {
|
|
const rows = result.surface_rows[statement] ?? [];
|
|
const row = rows.find((entry) => entry.key === surfaceKey) ?? null;
|
|
if (!row) {
|
|
return { row: null, value: null };
|
|
}
|
|
|
|
const periodId = (referenceColumnLabel
|
|
? choosePeriodIdForColumnLabel(result, statement, referenceColumnLabel)
|
|
: null) ?? (statement === 'balance'
|
|
? chooseInstantPeriodId(result)
|
|
: chooseDurationPeriodId(result));
|
|
|
|
if (periodId) {
|
|
const directValue = row.values[periodId];
|
|
if (directValue !== null && directValue !== undefined) {
|
|
return { row, value: directValue };
|
|
}
|
|
}
|
|
|
|
const periodById = new Map(
|
|
result.periods.map((period) => [period.id, period as ResultPeriod])
|
|
);
|
|
|
|
const fallback = Object.entries(row.values)
|
|
.filter((entry): entry is [string, number] => entry[1] !== null)
|
|
.sort((left, right) => {
|
|
const leftPeriod = periodById.get(left[0]);
|
|
const rightPeriod = periodById.get(right[0]);
|
|
const leftDate = leftPeriod ? Date.parse(periodEnd(leftPeriod) ?? '') : Number.NaN;
|
|
const rightDate = rightPeriod ? Date.parse(periodEnd(rightPeriod) ?? '') : Number.NaN;
|
|
|
|
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
|
return rightDate - leftDate;
|
|
}
|
|
|
|
return right[0].localeCompare(left[0]);
|
|
})[0];
|
|
|
|
return {
|
|
row,
|
|
value: fallback?.[1] ?? null
|
|
};
|
|
}
|
|
|
|
function rowResolutionMethod(row: Record<string, unknown> | null) {
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
return (row.resolutionMethod ?? row.resolution_method ?? null) as string | null;
|
|
}
|
|
|
|
function rowWarningCodes(row: Record<string, unknown> | null) {
|
|
if (!row) {
|
|
return [] as string[];
|
|
}
|
|
|
|
const value = row.warningCodes ?? row.warning_codes ?? [];
|
|
return Array.isArray(value) ? value.filter((entry): entry is string => typeof entry === 'string') : [];
|
|
}
|
|
|
|
function buildSecFilingUrl(cik: string, accessionNumber: string) {
|
|
return `https://www.sec.gov/Archives/edgar/data/${Number.parseInt(cik, 10)}/${accessionNumber.replace(/-/g, '')}/`;
|
|
}
|
|
|
|
async function fetchLatestAnnualFiling(company: CompanyCase): Promise<TaxonomyHydrationInput> {
|
|
const cik = company.cik.padStart(10, '0');
|
|
const response = await fetch(`https://data.sec.gov/submissions/CIK${cik}.json`, {
|
|
headers: {
|
|
'user-agent': SEC_USER_AGENT,
|
|
accept: 'application/json'
|
|
}
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`SEC submissions fetch failed for ${company.ticker}: ${response.status}`);
|
|
}
|
|
|
|
const payload = await response.json() as SecSubmission;
|
|
const recent = payload.filings?.recent;
|
|
if (!recent) {
|
|
throw new Error(`SEC submissions payload missing recent filings for ${company.ticker}`);
|
|
}
|
|
|
|
for (let index = 0; index < recent.form.length; index += 1) {
|
|
if (recent.form[index] !== company.form) {
|
|
continue;
|
|
}
|
|
|
|
const accessionNumber = recent.accessionNumber[index];
|
|
const filingDate = recent.filingDate[index];
|
|
const primaryDocument = recent.primaryDocument[index];
|
|
if (!accessionNumber || !filingDate || !primaryDocument) {
|
|
continue;
|
|
}
|
|
|
|
return {
|
|
filingId: index + 1,
|
|
ticker: company.ticker,
|
|
cik: company.cik,
|
|
accessionNumber,
|
|
filingDate,
|
|
filingType: company.form,
|
|
filingUrl: buildSecFilingUrl(company.cik, accessionNumber),
|
|
primaryDocument
|
|
};
|
|
}
|
|
|
|
throw new Error(`No ${company.form} found in SEC recent filings for ${company.ticker}`);
|
|
}
|
|
|
|
async function scrapeFiscalAiTable(
|
|
page: import('@playwright/test').Page,
|
|
exchangeTicker: string,
|
|
statement: 'income' | 'balance' | 'cash_flow'
|
|
): Promise<FiscalAiTable> {
|
|
const pagePath = statement === 'income'
|
|
? 'income-statement'
|
|
: statement === 'balance'
|
|
? 'balance-sheet'
|
|
: 'cash-flow-statement';
|
|
const url = `https://fiscal.ai/company/${exchangeTicker}/financials/${pagePath}/annual/?templateType=standardized`;
|
|
|
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120_000 });
|
|
await page.waitForSelector('table', { timeout: 120_000 });
|
|
await page.waitForTimeout(2_500);
|
|
await page.evaluate(async () => {
|
|
window.scrollTo(0, document.body.scrollHeight);
|
|
await new Promise((resolve) => setTimeout(resolve, 750));
|
|
window.scrollTo(0, 0);
|
|
await new Promise((resolve) => setTimeout(resolve, 250));
|
|
});
|
|
|
|
return await page.evaluate(() => {
|
|
function normalizeLabel(value: string) {
|
|
return value
|
|
.toLowerCase()
|
|
.replace(/&/g, ' and ')
|
|
.replace(/[^a-z0-9]+/g, ' ')
|
|
.trim()
|
|
.replace(/\s+/g, ' ');
|
|
}
|
|
|
|
function parseDisplayedNumber(value: string) {
|
|
const trimmed = value.trim();
|
|
if (!trimmed || /^[-–—]+$/.test(trimmed) || /pricing/i.test(trimmed)) {
|
|
return null;
|
|
}
|
|
|
|
const negative = trimmed.startsWith('(') && trimmed.endsWith(')');
|
|
const normalized = trimmed
|
|
.replace(/,/g, '')
|
|
.replace(/[%$]/g, '')
|
|
.replace(/[()]/g, '')
|
|
.trim();
|
|
|
|
if (!normalized) {
|
|
return null;
|
|
}
|
|
|
|
const parsed = Number.parseFloat(normalized);
|
|
return Number.isFinite(parsed) ? (negative ? -Math.abs(parsed) : parsed) : null;
|
|
}
|
|
|
|
const tables = Array.from(document.querySelectorAll('table'));
|
|
if (tables.length === 0) {
|
|
throw new Error('Fiscal.ai table not found');
|
|
}
|
|
|
|
const rowsByLabel = new Map<string, FiscalAiTableRow>();
|
|
let columnLabel = 'unknown';
|
|
|
|
for (const table of tables) {
|
|
const headerCells = Array.from(table.querySelectorAll('tr:first-child th, tr:first-child td'))
|
|
.map((cell) => cell.textContent?.trim() ?? '')
|
|
.filter((value) => value.length > 0);
|
|
const annualColumnIndex = headerCells.findIndex((value, index) => index > 0 && value !== 'LTM');
|
|
if (annualColumnIndex < 0) {
|
|
continue;
|
|
}
|
|
|
|
if (columnLabel === 'unknown') {
|
|
columnLabel = headerCells[annualColumnIndex] ?? 'unknown';
|
|
}
|
|
|
|
for (const row of Array.from(table.querySelectorAll('tr')).slice(1)) {
|
|
const cells = Array.from(row.querySelectorAll('td'));
|
|
if (cells.length <= annualColumnIndex) {
|
|
continue;
|
|
}
|
|
|
|
const label = cells[0]?.textContent?.trim() ?? '';
|
|
const valueText = cells[annualColumnIndex]?.textContent?.trim() ?? '';
|
|
if (!label) {
|
|
continue;
|
|
}
|
|
|
|
rowsByLabel.set(label, {
|
|
label,
|
|
normalizedLabel: normalizeLabel(label),
|
|
valueText,
|
|
value: parseDisplayedNumber(valueText)
|
|
});
|
|
}
|
|
}
|
|
|
|
const rows = Array.from(rowsByLabel.values());
|
|
|
|
return {
|
|
columnLabel,
|
|
rows
|
|
};
|
|
});
|
|
}
|
|
|
|
function findFiscalAiRow(rows: FiscalAiTableRow[], candidates: string[]) {
|
|
const normalizedCandidates = candidates.map(normalizeLabel);
|
|
const benignExtraTokens = new Set(['total', 'net']);
|
|
|
|
for (const candidate of normalizedCandidates) {
|
|
const exactMatch = rows.find((row) => row.normalizedLabel === candidate);
|
|
if (exactMatch) {
|
|
return exactMatch;
|
|
}
|
|
}
|
|
|
|
for (const candidate of normalizedCandidates) {
|
|
const candidateTokens = candidate.split(' ').filter((token) => token.length > 0);
|
|
const relaxedMatch = rows.find((row) => {
|
|
const rowTokens = row.normalizedLabel.split(' ').filter((token) => token.length > 0);
|
|
const sharedPrefix = row.normalizedLabel.startsWith(candidate) || candidate.startsWith(row.normalizedLabel);
|
|
if (!sharedPrefix) {
|
|
return false;
|
|
}
|
|
|
|
const longer = rowTokens.length >= candidateTokens.length ? rowTokens : candidateTokens;
|
|
const shorter = rowTokens.length >= candidateTokens.length ? candidateTokens : rowTokens;
|
|
const extraTokens = longer.filter((token) => !shorter.includes(token));
|
|
return extraTokens.length > 0 && extraTokens.every((token) => benignExtraTokens.has(token));
|
|
});
|
|
if (relaxedMatch) {
|
|
return relaxedMatch;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function compareRow(
|
|
target: ComparisonTarget,
|
|
result: TaxonomyHydrationResult,
|
|
fiscalAiTable: FiscalAiTable
|
|
): ComparisonRow {
|
|
const fiscalAiRow = findFiscalAiRow(fiscalAiTable.rows, target.fiscalAiLabels);
|
|
const fiscalAiValueM = fiscalAiRow?.value ?? null;
|
|
const ourSurface = findSurfaceValue(result, target.statement, target.surfaceKey, fiscalAiTable.columnLabel);
|
|
const ourValueM = roundMillions(ourSurface.value);
|
|
const absDiffM = absoluteDiff(ourValueM, fiscalAiValueM);
|
|
const relDiffValue = relativeDiff(ourValueM, fiscalAiValueM);
|
|
const resolutionMethod = rowResolutionMethod(ourSurface.row as Record<string, unknown> | null);
|
|
const warningCodes = rowWarningCodes(ourSurface.row as Record<string, unknown> | null);
|
|
|
|
let status: ComparisonRow['status'];
|
|
if (
|
|
ourSurface.row &&
|
|
ourValueM === null &&
|
|
target.allowNotMeaningful &&
|
|
resolutionMethod === 'not_meaningful' &&
|
|
(target.notMeaningfulWarnings?.length ?? 0) > 0 &&
|
|
target.notMeaningfulWarnings!.some((warning) => warningCodes.includes(warning))
|
|
) {
|
|
status = 'not_meaningful';
|
|
} else if (!fiscalAiRow) {
|
|
status = 'missing_reference';
|
|
} else if (ourValueM === null) {
|
|
status = 'missing_ours';
|
|
} else if (
|
|
absDiffM !== null &&
|
|
relDiffValue !== null &&
|
|
(absDiffM <= VALUE_TOLERANCE_M || relDiffValue <= RELATIVE_TOLERANCE)
|
|
) {
|
|
status = 'pass';
|
|
} else {
|
|
status = 'fail';
|
|
}
|
|
|
|
return {
|
|
statement: target.statement,
|
|
surfaceKey: target.surfaceKey,
|
|
fiscalAiLabel: fiscalAiRow?.label ?? null,
|
|
fiscalAiValueM,
|
|
ourValueM,
|
|
absDiffM,
|
|
relDiff: relDiffValue,
|
|
status
|
|
};
|
|
}
|
|
|
|
async function compareCase(page: import('@playwright/test').Page, company: CompanyCase) {
|
|
const filing = await fetchLatestAnnualFiling(company);
|
|
const result = await hydrateFilingTaxonomySnapshot(filing);
|
|
|
|
if (result.parse_status !== 'ready') {
|
|
throw new Error(`${company.ticker} parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ''}`);
|
|
}
|
|
|
|
const statementKinds = new Set(company.comparisons.map((target) => target.statement));
|
|
const incomeTable = statementKinds.has('income')
|
|
? await scrapeFiscalAiTable(page, company.exchangeTicker, 'income')
|
|
: null;
|
|
const balanceTable = statementKinds.has('balance')
|
|
? await scrapeFiscalAiTable(page, company.exchangeTicker, 'balance')
|
|
: null;
|
|
const cashFlowTable = statementKinds.has('cash_flow')
|
|
? await scrapeFiscalAiTable(page, company.exchangeTicker, 'cash_flow')
|
|
: null;
|
|
const rows = company.comparisons.map((target) => {
|
|
const table = target.statement === 'income'
|
|
? incomeTable
|
|
: target.statement === 'balance'
|
|
? balanceTable
|
|
: cashFlowTable;
|
|
if (!table) {
|
|
throw new Error(`Missing scraped table for ${target.statement}`);
|
|
}
|
|
return compareRow(target, result, table);
|
|
});
|
|
|
|
const failures = rows.filter(
|
|
(row) => row.status === 'fail' || row.status === 'missing_ours' || row.status === 'missing_reference'
|
|
);
|
|
|
|
console.log(
|
|
`[compare-fiscal-ai] ${company.ticker} filing=${filing.accessionNumber} fiscal_pack=${result.fiscal_pack ?? 'null'} income_column="${incomeTable?.columnLabel ?? 'n/a'}" balance_column="${balanceTable?.columnLabel ?? 'n/a'}" cash_flow_column="${cashFlowTable?.columnLabel ?? 'n/a'}" pass=${rows.length - failures.length}/${rows.length}`
|
|
);
|
|
for (const row of rows) {
|
|
console.log(
|
|
[
|
|
' ',
|
|
row.status.toUpperCase(),
|
|
`${row.statement}.${row.surfaceKey}`,
|
|
`fiscal_label=${row.fiscalAiLabel ?? 'null'}`,
|
|
`ours_m=${row.ourValueM ?? 'null'}`,
|
|
`fiscal_m=${row.fiscalAiValueM ?? 'null'}`,
|
|
`abs_diff_m=${row.absDiffM ?? 'null'}`,
|
|
`rel_diff=${row.relDiff === null ? 'null' : row.relDiff.toFixed(4)}`
|
|
].join(' ')
|
|
);
|
|
}
|
|
|
|
return {
|
|
ticker: company.ticker,
|
|
filing,
|
|
fiscalPack: result.fiscal_pack,
|
|
rows,
|
|
failures
|
|
};
|
|
}
|
|
|
|
async function main() {
|
|
process.env.XBRL_ENGINE_TIMEOUT_MS = process.env.XBRL_ENGINE_TIMEOUT_MS ?? '180000';
|
|
const filters = parseTickerFilter(process.argv.slice(2));
|
|
const selectedCases = (filters.ticker
|
|
? CASES.filter((entry) => entry.ticker === filters.ticker)
|
|
: CASES
|
|
)
|
|
.map((entry) => ({
|
|
...entry,
|
|
comparisons: filters.statement
|
|
? entry.comparisons.filter((target) => target.statement === filters.statement)
|
|
: entry.comparisons
|
|
}))
|
|
.filter((entry) => entry.comparisons.length > 0);
|
|
|
|
if (selectedCases.length === 0) {
|
|
console.error(
|
|
`[compare-fiscal-ai] no matching cases for ticker=${filters.ticker ?? 'all'} statement=${filters.statement ?? 'all'}`
|
|
);
|
|
process.exitCode = 1;
|
|
return;
|
|
}
|
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage({
|
|
userAgent: BROWSER_USER_AGENT
|
|
});
|
|
|
|
const failures: Array<{ ticker: string; row: ComparisonRow }> = [];
|
|
|
|
try {
|
|
for (const company of selectedCases) {
|
|
const result = await compareCase(page, company);
|
|
for (const failure of result.failures) {
|
|
failures.push({
|
|
ticker: company.ticker,
|
|
row: failure
|
|
});
|
|
}
|
|
|
|
await Bun.sleep(150);
|
|
}
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
|
|
console.log(`[compare-fiscal-ai] completed cases=${selectedCases.length} failures=${failures.length}`);
|
|
|
|
if (failures.length === 0) {
|
|
return;
|
|
}
|
|
|
|
for (const failure of failures) {
|
|
console.error(
|
|
`[compare-fiscal-ai] ${failure.ticker} ${failure.row.statement}.${failure.row.surfaceKey} status=${failure.row.status} ours_m=${failure.row.ourValueM ?? 'null'} fiscal_m=${failure.row.fiscalAiValueM ?? 'null'} fiscal_label=${failure.row.fiscalAiLabel ?? 'null'}`
|
|
);
|
|
}
|
|
|
|
process.exitCode = 1;
|
|
}
|
|
|
|
void main().catch((error) => {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
console.error(`[compare-fiscal-ai] fatal: ${message}`);
|
|
process.exitCode = 1;
|
|
});
|