- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI - add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading - auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
425 lines
11 KiB
TypeScript
425 lines
11 KiB
TypeScript
import type { FinancialStatementKind } from "@/lib/types";
|
|
import { hydrateFilingTaxonomySnapshot } from "@/lib/server/taxonomy/engine";
|
|
import type { TaxonomyHydrationInput } from "@/lib/server/taxonomy/types";
|
|
|
|
type ValidationCase = {
|
|
name: string;
|
|
expectedPack: string;
|
|
input: TaxonomyHydrationInput;
|
|
requiredSurfaceKeys: Partial<Record<FinancialStatementKind, string[]>>;
|
|
requiredKpiKeys?: string[];
|
|
};
|
|
|
|
type ValidationFailure = {
|
|
name: string;
|
|
issues: string[];
|
|
};
|
|
|
|
const UNIVERSAL_INCOME_KEYS = [
|
|
"revenue",
|
|
"gross_profit",
|
|
"operating_expenses",
|
|
"operating_income",
|
|
"income_tax_expense",
|
|
"net_income",
|
|
] as const;
|
|
|
|
const EXPENSE_BREAKDOWN_KEYS = [
|
|
"selling_general_and_administrative",
|
|
"research_and_development",
|
|
"other_operating_expense",
|
|
] as const;
|
|
|
|
const CORPUS: ValidationCase[] = [
|
|
{
|
|
name: "core-msft-2026-01-28",
|
|
expectedPack: "core",
|
|
input: {
|
|
filingId: 1,
|
|
ticker: "MSFT",
|
|
cik: "0000789019",
|
|
accessionNumber: "0001193125-26-027207",
|
|
filingDate: "2026-01-28",
|
|
filingType: "10-Q",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/789019/000119312526027207/",
|
|
primaryDocument: "msft-20251231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS],
|
|
balance: ["total_assets"],
|
|
},
|
|
},
|
|
{
|
|
name: "bank-jpm-2026-02-13",
|
|
expectedPack: "bank_lender",
|
|
input: {
|
|
filingId: 2,
|
|
ticker: "JPM",
|
|
cik: "0000019617",
|
|
accessionNumber: "0001628280-26-008131",
|
|
filingDate: "2026-02-13",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/19617/000162828026008131/",
|
|
primaryDocument: "jpm-20251231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"net_interest_income",
|
|
"noninterest_income",
|
|
],
|
|
balance: ["loans", "deposits"],
|
|
},
|
|
requiredKpiKeys: ["net_interest_margin"],
|
|
},
|
|
{
|
|
name: "insurance-aig-2026-02-12",
|
|
expectedPack: "insurance",
|
|
input: {
|
|
filingId: 3,
|
|
ticker: "AIG",
|
|
cik: "0000005272",
|
|
accessionNumber: "0000005272-26-000023",
|
|
filingDate: "2026-02-12",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/5272/000000527226000023/",
|
|
primaryDocument: "aig-20251231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"premiums",
|
|
"claims_and_benefits",
|
|
],
|
|
balance: ["policy_liabilities"],
|
|
},
|
|
requiredKpiKeys: ["combined_ratio"],
|
|
},
|
|
{
|
|
name: "reit-o-2026-02-25",
|
|
expectedPack: "reit_real_estate",
|
|
input: {
|
|
filingId: 4,
|
|
ticker: "O",
|
|
cik: "0000726728",
|
|
accessionNumber: "0000726728-26-000011",
|
|
filingDate: "2026-02-25",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/726728/000072672826000011/",
|
|
primaryDocument: "o-20251231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"rental_revenue",
|
|
],
|
|
balance: ["investment_property", "total_assets"],
|
|
},
|
|
requiredKpiKeys: ["property_count"],
|
|
},
|
|
{
|
|
name: "broker-blk-2026-02-25",
|
|
expectedPack: "broker_asset_manager",
|
|
input: {
|
|
filingId: 5,
|
|
ticker: "BLK",
|
|
cik: "0002012383",
|
|
accessionNumber: "0001193125-26-071966",
|
|
filingDate: "2026-02-25",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/2012383/000119312526071966/",
|
|
primaryDocument: "blk-20251231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"fee_revenue",
|
|
],
|
|
balance: ["total_assets", "total_liabilities"],
|
|
},
|
|
requiredKpiKeys: ["aum", "fee_paying_aum"],
|
|
},
|
|
{
|
|
name: "software-orcl-2025-06-18",
|
|
expectedPack: "software",
|
|
input: {
|
|
filingId: 6,
|
|
ticker: "ORCL",
|
|
cik: "0001341439",
|
|
accessionNumber: "0000950170-25-087926",
|
|
filingDate: "2025-06-18",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/1341439/000095017025087926/",
|
|
primaryDocument: "orcl-20250531.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"software_license_revenue",
|
|
"maintenance_and_support_revenue",
|
|
"cost_of_software_revenue",
|
|
],
|
|
balance: ["capitalized_software_costs", "deferred_revenue"],
|
|
},
|
|
},
|
|
{
|
|
name: "mining-fcx-2025-02-14",
|
|
expectedPack: "extractive_mining",
|
|
input: {
|
|
filingId: 7,
|
|
ticker: "FCX",
|
|
cik: "0000831259",
|
|
accessionNumber: "0000831259-25-000006",
|
|
filingDate: "2025-02-14",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/831259/000083125925000006/",
|
|
primaryDocument: "fcx-20241231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"mining_revenue",
|
|
"production_costs",
|
|
"exploration_expense",
|
|
],
|
|
balance: ["mining_properties", "rehabilitation_liabilities"],
|
|
},
|
|
},
|
|
{
|
|
name: "mortgage-rkt-2026-02-27",
|
|
expectedPack: "mortgage_banking",
|
|
input: {
|
|
filingId: 8,
|
|
ticker: "RKT",
|
|
cik: "0001805284",
|
|
accessionNumber: "0001628280-26-013283",
|
|
filingDate: "2026-02-27",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/1805284/000162828026013283/",
|
|
primaryDocument: "rkt-20251231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"mortgage_banking_revenue",
|
|
"servicing_fees",
|
|
"interest_income",
|
|
],
|
|
balance: ["loans_held_for_sale", "mortgage_servicing_rights"],
|
|
},
|
|
},
|
|
{
|
|
name: "casino-mgm-2026-02-11",
|
|
expectedPack: "entertainment_casinos",
|
|
input: {
|
|
filingId: 9,
|
|
ticker: "MGM",
|
|
cik: "0000789570",
|
|
accessionNumber: "0000789570-26-000018",
|
|
filingDate: "2026-02-11",
|
|
filingType: "10-K",
|
|
filingUrl:
|
|
"https://www.sec.gov/Archives/edgar/data/789570/000078957026000018/",
|
|
primaryDocument: "mgm-20251231.htm",
|
|
},
|
|
requiredSurfaceKeys: {
|
|
income: [
|
|
...UNIVERSAL_INCOME_KEYS,
|
|
...EXPENSE_BREAKDOWN_KEYS,
|
|
"gaming_revenue",
|
|
"hotel_and_resort_revenue",
|
|
"food_and_beverage_revenue",
|
|
],
|
|
balance: ["casino_properties", "gaming_tax_liability"],
|
|
},
|
|
},
|
|
];
|
|
|
|
const FALLBACK_WARNINGS = new Set([
|
|
"surface_rows_deferred_to_typescript",
|
|
"ts_compact_surface_fallback_used",
|
|
]);
|
|
|
|
function parseCaseFilter(argv: string[]) {
|
|
for (const arg of argv) {
|
|
if (arg === "--help" || arg === "-h") {
|
|
console.log(
|
|
"Validate live SEC representative filings for each active taxonomy pack.",
|
|
);
|
|
console.log("");
|
|
console.log("Usage:");
|
|
console.log(" bun run scripts/validate-taxonomy-packs.ts");
|
|
console.log(
|
|
" bun run scripts/validate-taxonomy-packs.ts --case=bank-jpm-2026-02-13",
|
|
);
|
|
process.exit(0);
|
|
}
|
|
|
|
if (arg.startsWith("--case=")) {
|
|
const value = arg.slice("--case=".length).trim();
|
|
return value.length > 0 ? value : null;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function keysForStatement(
|
|
result: Awaited<ReturnType<typeof hydrateFilingTaxonomySnapshot>>,
|
|
statement: FinancialStatementKind,
|
|
) {
|
|
return (result.surface_rows[statement] ?? []).map((row) => row.key);
|
|
}
|
|
|
|
async function validateCase(
|
|
testCase: ValidationCase,
|
|
): Promise<ValidationFailure | null> {
|
|
const startedAt = Date.now();
|
|
const result = await hydrateFilingTaxonomySnapshot(testCase.input);
|
|
const issues: string[] = [];
|
|
const warnings = result.normalization_summary.warnings ?? [];
|
|
const kpiKeys = result.kpi_rows.map((row) => row.key);
|
|
|
|
if (result.parse_status !== "ready") {
|
|
issues.push(
|
|
`parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ""}`,
|
|
);
|
|
}
|
|
|
|
if (result.fiscal_pack !== testCase.expectedPack) {
|
|
issues.push(
|
|
`fiscal_pack=${result.fiscal_pack ?? "null"} expected=${testCase.expectedPack}`,
|
|
);
|
|
}
|
|
|
|
if (
|
|
(Object.values(result.surface_rows) as Array<Array<{ key: string }>>).every(
|
|
(rows) => rows.length === 0,
|
|
)
|
|
) {
|
|
issues.push("surface_rows are empty");
|
|
}
|
|
|
|
const fallbackWarning = warnings.find((warning) =>
|
|
FALLBACK_WARNINGS.has(warning),
|
|
);
|
|
if (fallbackWarning) {
|
|
issues.push(`unexpected fallback warning=${fallbackWarning}`);
|
|
}
|
|
|
|
for (const [statement, requiredKeys] of Object.entries(
|
|
testCase.requiredSurfaceKeys,
|
|
) as Array<[FinancialStatementKind, string[]]>) {
|
|
const actualKeys = new Set(keysForStatement(result, statement));
|
|
for (const requiredKey of requiredKeys) {
|
|
if (!actualKeys.has(requiredKey)) {
|
|
issues.push(`${statement} missing surface key=${requiredKey}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const requiredKpiKey of testCase.requiredKpiKeys ?? []) {
|
|
if (!kpiKeys.includes(requiredKpiKey)) {
|
|
issues.push(`missing kpi key=${requiredKpiKey}`);
|
|
}
|
|
}
|
|
|
|
const durationMs = Date.now() - startedAt;
|
|
const incomeKeys = keysForStatement(result, "income");
|
|
const balanceKeys = keysForStatement(result, "balance");
|
|
console.log(
|
|
[
|
|
`[validate-taxonomy-packs] ${testCase.name}`,
|
|
`status=${issues.length === 0 ? "pass" : "fail"}`,
|
|
`parse=${result.parse_status}`,
|
|
`pack=${result.fiscal_pack ?? "null"}`,
|
|
`income=${incomeKeys.join(",") || "-"}`,
|
|
`balance=${balanceKeys.join(",") || "-"}`,
|
|
`kpis=${kpiKeys.join(",") || "-"}`,
|
|
`warnings=${warnings.join(",") || "-"}`,
|
|
`durationMs=${durationMs}`,
|
|
].join(" "),
|
|
);
|
|
|
|
if (issues.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
name: testCase.name,
|
|
issues,
|
|
};
|
|
}
|
|
|
|
async function main() {
|
|
process.env.XBRL_ENGINE_TIMEOUT_MS =
|
|
process.env.XBRL_ENGINE_TIMEOUT_MS ?? "180000";
|
|
|
|
const requestedCase = parseCaseFilter(process.argv.slice(2));
|
|
const selectedCases = requestedCase
|
|
? CORPUS.filter((testCase) => testCase.name === requestedCase)
|
|
: CORPUS;
|
|
|
|
if (selectedCases.length === 0) {
|
|
console.error(`[validate-taxonomy-packs] unknown case: ${requestedCase}`);
|
|
process.exitCode = 1;
|
|
return;
|
|
}
|
|
|
|
const failures: ValidationFailure[] = [];
|
|
const startedAt = Date.now();
|
|
|
|
for (const testCase of selectedCases) {
|
|
try {
|
|
const failure = await validateCase(testCase);
|
|
if (failure) {
|
|
failures.push(failure);
|
|
}
|
|
} catch (error) {
|
|
failures.push({
|
|
name: testCase.name,
|
|
issues: [error instanceof Error ? error.message : String(error)],
|
|
});
|
|
}
|
|
|
|
await Bun.sleep(150);
|
|
}
|
|
|
|
console.log(
|
|
`[validate-taxonomy-packs] completed cases=${selectedCases.length} failures=${failures.length} durationSec=${(
|
|
(Date.now() - startedAt) /
|
|
1000
|
|
).toFixed(1)}`,
|
|
);
|
|
|
|
if (failures.length === 0) {
|
|
return;
|
|
}
|
|
|
|
for (const failure of failures) {
|
|
console.error(`[validate-taxonomy-packs] ${failure.name}`);
|
|
for (const issue of failure.issues) {
|
|
console.error(` - ${issue}`);
|
|
}
|
|
}
|
|
|
|
process.exitCode = 1;
|
|
}
|
|
|
|
void main();
|