285 lines
8.2 KiB
TypeScript
285 lines
8.2 KiB
TypeScript
import type { FinancialStatementKind } from '@/lib/types';
|
|
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
|
|
import type { TaxonomyHydrationInput } from '@/lib/server/taxonomy/types';
|
|
|
|
type ValidationCase = {
|
|
name: string;
|
|
expectedPack: string;
|
|
input: TaxonomyHydrationInput;
|
|
requiredSurfaceKeys: Partial<Record<FinancialStatementKind, string[]>>;
|
|
requiredKpiKeys?: string[];
|
|
};
|
|
|
|
type ValidationFailure = {
|
|
name: string;
|
|
issues: string[];
|
|
};
|
|
|
|
const UNIVERSAL_INCOME_KEYS = [
|
|
'revenue',
|
|
'gross_profit',
|
|
'operating_expenses',
|
|
'operating_income',
|
|
'income_tax_expense',
|
|
'net_income'
|
|
] as const;
|
|
|
|
const EXPENSE_BREAKDOWN_KEYS = [
|
|
'selling_general_and_administrative',
|
|
'research_and_development',
|
|
'other_operating_expense'
|
|
] as const;
|
|
|
|
const CORPUS: ValidationCase[] = [
|
|
{
|
|
name: 'core-msft-2026-01-28',
|
|
expectedPack: 'core',
|
|
input: {
|
|
filingId: 1,
|
|
ticker: 'MSFT',
|
|
cik: '0000789019',
|
|
accessionNumber: '0001193125-26-027207',
|
|
filingDate: '2026-01-28',
|
|
filingType: '10-Q',
|
|
filingUrl: 'https://www.sec.gov/Archives/edgar/data/789019/000119312526027207/',
|
|
primaryDocument: 'msft-20251231.htm'
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS],
|
|
balance: ['total_assets']
|
|
}
|
|
},
|
|
{
|
|
name: 'bank-jpm-2026-02-13',
|
|
expectedPack: 'bank_lender',
|
|
input: {
|
|
filingId: 2,
|
|
ticker: 'JPM',
|
|
cik: '0000019617',
|
|
accessionNumber: '0001628280-26-008131',
|
|
filingDate: '2026-02-13',
|
|
filingType: '10-K',
|
|
filingUrl: 'https://www.sec.gov/Archives/edgar/data/19617/000162828026008131/',
|
|
primaryDocument: 'jpm-20251231.htm'
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'net_interest_income', 'noninterest_income'],
|
|
balance: ['loans', 'deposits']
|
|
},
|
|
requiredKpiKeys: ['net_interest_margin']
|
|
},
|
|
{
|
|
name: 'insurance-aig-2026-02-12',
|
|
expectedPack: 'insurance',
|
|
input: {
|
|
filingId: 3,
|
|
ticker: 'AIG',
|
|
cik: '0000005272',
|
|
accessionNumber: '0000005272-26-000023',
|
|
filingDate: '2026-02-12',
|
|
filingType: '10-K',
|
|
filingUrl: 'https://www.sec.gov/Archives/edgar/data/5272/000000527226000023/',
|
|
primaryDocument: 'aig-20251231.htm'
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'premiums', 'claims_and_benefits'],
|
|
balance: ['policy_liabilities']
|
|
},
|
|
requiredKpiKeys: ['combined_ratio']
|
|
},
|
|
{
|
|
name: 'reit-o-2026-02-25',
|
|
expectedPack: 'reit_real_estate',
|
|
input: {
|
|
filingId: 4,
|
|
ticker: 'O',
|
|
cik: '0000726728',
|
|
accessionNumber: '0000726728-26-000011',
|
|
filingDate: '2026-02-25',
|
|
filingType: '10-K',
|
|
filingUrl: 'https://www.sec.gov/Archives/edgar/data/726728/000072672826000011/',
|
|
primaryDocument: 'o-20251231.htm'
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'rental_revenue'],
|
|
balance: ['investment_property', 'total_assets']
|
|
},
|
|
requiredKpiKeys: ['property_count']
|
|
},
|
|
{
|
|
name: 'broker-blk-2026-02-25',
|
|
expectedPack: 'broker_asset_manager',
|
|
input: {
|
|
filingId: 5,
|
|
ticker: 'BLK',
|
|
cik: '0002012383',
|
|
accessionNumber: '0001193125-26-071966',
|
|
filingDate: '2026-02-25',
|
|
filingType: '10-K',
|
|
filingUrl: 'https://www.sec.gov/Archives/edgar/data/2012383/000119312526071966/',
|
|
primaryDocument: 'blk-20251231.htm'
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'fee_revenue'],
|
|
balance: ['total_assets', 'total_liabilities']
|
|
},
|
|
requiredKpiKeys: ['aum', 'fee_paying_aum']
|
|
}
|
|
];
|
|
|
|
const FALLBACK_WARNINGS = new Set([
|
|
'surface_rows_deferred_to_typescript',
|
|
'ts_compact_surface_fallback_used'
|
|
]);
|
|
|
|
function parseCaseFilter(argv: string[]) {
|
|
for (const arg of argv) {
|
|
if (arg === '--help' || arg === '-h') {
|
|
console.log('Validate live SEC representative filings for each active taxonomy pack.');
|
|
console.log('');
|
|
console.log('Usage:');
|
|
console.log(' bun run scripts/validate-taxonomy-packs.ts');
|
|
console.log(' bun run scripts/validate-taxonomy-packs.ts --case=bank-jpm-2026-02-13');
|
|
process.exit(0);
|
|
}
|
|
|
|
if (arg.startsWith('--case=')) {
|
|
const value = arg.slice('--case='.length).trim();
|
|
return value.length > 0 ? value : null;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function keysForStatement(
|
|
result: Awaited<ReturnType<typeof hydrateFilingTaxonomySnapshot>>,
|
|
statement: FinancialStatementKind
|
|
) {
|
|
return (result.surface_rows[statement] ?? []).map((row) => row.key);
|
|
}
|
|
|
|
async function validateCase(testCase: ValidationCase): Promise<ValidationFailure | null> {
|
|
const startedAt = Date.now();
|
|
const result = await hydrateFilingTaxonomySnapshot(testCase.input);
|
|
const issues: string[] = [];
|
|
const warnings = result.normalization_summary.warnings ?? [];
|
|
const kpiKeys = result.kpi_rows.map((row) => row.key);
|
|
|
|
if (result.parse_status !== 'ready') {
|
|
issues.push(`parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ''}`);
|
|
}
|
|
|
|
if (result.fiscal_pack !== testCase.expectedPack) {
|
|
issues.push(`fiscal_pack=${result.fiscal_pack ?? 'null'} expected=${testCase.expectedPack}`);
|
|
}
|
|
|
|
if ((Object.values(result.surface_rows) as Array<Array<{ key: string }>>).every((rows) => rows.length === 0)) {
|
|
issues.push('surface_rows are empty');
|
|
}
|
|
|
|
const fallbackWarning = warnings.find((warning) => FALLBACK_WARNINGS.has(warning));
|
|
if (fallbackWarning) {
|
|
issues.push(`unexpected fallback warning=${fallbackWarning}`);
|
|
}
|
|
|
|
for (const [statement, requiredKeys] of Object.entries(testCase.requiredSurfaceKeys) as Array<
|
|
[FinancialStatementKind, string[]]
|
|
>) {
|
|
const actualKeys = new Set(keysForStatement(result, statement));
|
|
for (const requiredKey of requiredKeys) {
|
|
if (!actualKeys.has(requiredKey)) {
|
|
issues.push(`${statement} missing surface key=${requiredKey}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const requiredKpiKey of testCase.requiredKpiKeys ?? []) {
|
|
if (!kpiKeys.includes(requiredKpiKey)) {
|
|
issues.push(`missing kpi key=${requiredKpiKey}`);
|
|
}
|
|
}
|
|
|
|
const durationMs = Date.now() - startedAt;
|
|
const incomeKeys = keysForStatement(result, 'income');
|
|
const balanceKeys = keysForStatement(result, 'balance');
|
|
console.log(
|
|
[
|
|
`[validate-taxonomy-packs] ${testCase.name}`,
|
|
`status=${issues.length === 0 ? 'pass' : 'fail'}`,
|
|
`parse=${result.parse_status}`,
|
|
`pack=${result.fiscal_pack ?? 'null'}`,
|
|
`income=${incomeKeys.join(',') || '-'}`,
|
|
`balance=${balanceKeys.join(',') || '-'}`,
|
|
`kpis=${kpiKeys.join(',') || '-'}`,
|
|
`warnings=${warnings.join(',') || '-'}`,
|
|
`durationMs=${durationMs}`
|
|
].join(' ')
|
|
);
|
|
|
|
if (issues.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
name: testCase.name,
|
|
issues
|
|
};
|
|
}
|
|
|
|
async function main() {
|
|
process.env.XBRL_ENGINE_TIMEOUT_MS = process.env.XBRL_ENGINE_TIMEOUT_MS ?? '180000';
|
|
|
|
const requestedCase = parseCaseFilter(process.argv.slice(2));
|
|
const selectedCases = requestedCase
|
|
? CORPUS.filter((testCase) => testCase.name === requestedCase)
|
|
: CORPUS;
|
|
|
|
if (selectedCases.length === 0) {
|
|
console.error(`[validate-taxonomy-packs] unknown case: ${requestedCase}`);
|
|
process.exitCode = 1;
|
|
return;
|
|
}
|
|
|
|
const failures: ValidationFailure[] = [];
|
|
const startedAt = Date.now();
|
|
|
|
for (const testCase of selectedCases) {
|
|
try {
|
|
const failure = await validateCase(testCase);
|
|
if (failure) {
|
|
failures.push(failure);
|
|
}
|
|
} catch (error) {
|
|
failures.push({
|
|
name: testCase.name,
|
|
issues: [error instanceof Error ? error.message : String(error)]
|
|
});
|
|
}
|
|
|
|
await Bun.sleep(150);
|
|
}
|
|
|
|
console.log(
|
|
`[validate-taxonomy-packs] completed cases=${selectedCases.length} failures=${failures.length} durationSec=${(
|
|
(Date.now() - startedAt) /
|
|
1000
|
|
).toFixed(1)}`
|
|
);
|
|
|
|
if (failures.length === 0) {
|
|
return;
|
|
}
|
|
|
|
for (const failure of failures) {
|
|
console.error(`[validate-taxonomy-packs] ${failure.name}`);
|
|
for (const issue of failure.issues) {
|
|
console.error(` - ${issue}`);
|
|
}
|
|
}
|
|
|
|
process.exitCode = 1;
|
|
}
|
|
|
|
void main();
|