Add history window controls and expand taxonomy pack support

- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI
- add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading
- auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
This commit is contained in:
2026-03-18 23:40:28 -04:00
parent f8426c4dde
commit 17de3dd72d
102 changed files with 14978 additions and 1316 deletions

View File

@@ -1,6 +1,6 @@
import type { FinancialStatementKind } from '@/lib/types';
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
import type { TaxonomyHydrationInput } from '@/lib/server/taxonomy/types';
import type { FinancialStatementKind } from "@/lib/types";
import { hydrateFilingTaxonomySnapshot } from "@/lib/server/taxonomy/engine";
import type { TaxonomyHydrationInput } from "@/lib/server/taxonomy/types";
type ValidationCase = {
name: string;
@@ -16,135 +16,262 @@ type ValidationFailure = {
};
const UNIVERSAL_INCOME_KEYS = [
'revenue',
'gross_profit',
'operating_expenses',
'operating_income',
'income_tax_expense',
'net_income'
"revenue",
"gross_profit",
"operating_expenses",
"operating_income",
"income_tax_expense",
"net_income",
] as const;
const EXPENSE_BREAKDOWN_KEYS = [
'selling_general_and_administrative',
'research_and_development',
'other_operating_expense'
"selling_general_and_administrative",
"research_and_development",
"other_operating_expense",
] as const;
const CORPUS: ValidationCase[] = [
{
name: 'core-msft-2026-01-28',
expectedPack: 'core',
name: "core-msft-2026-01-28",
expectedPack: "core",
input: {
filingId: 1,
ticker: 'MSFT',
cik: '0000789019',
accessionNumber: '0001193125-26-027207',
filingDate: '2026-01-28',
filingType: '10-Q',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/789019/000119312526027207/',
primaryDocument: 'msft-20251231.htm'
ticker: "MSFT",
cik: "0000789019",
accessionNumber: "0001193125-26-027207",
filingDate: "2026-01-28",
filingType: "10-Q",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/789019/000119312526027207/",
primaryDocument: "msft-20251231.htm",
},
requiredSurfaceKeys: {
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS],
balance: ['total_assets']
}
balance: ["total_assets"],
},
},
{
name: 'bank-jpm-2026-02-13',
expectedPack: 'bank_lender',
name: "bank-jpm-2026-02-13",
expectedPack: "bank_lender",
input: {
filingId: 2,
ticker: 'JPM',
cik: '0000019617',
accessionNumber: '0001628280-26-008131',
filingDate: '2026-02-13',
filingType: '10-K',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/19617/000162828026008131/',
primaryDocument: 'jpm-20251231.htm'
ticker: "JPM",
cik: "0000019617",
accessionNumber: "0001628280-26-008131",
filingDate: "2026-02-13",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/19617/000162828026008131/",
primaryDocument: "jpm-20251231.htm",
},
requiredSurfaceKeys: {
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'net_interest_income', 'noninterest_income'],
balance: ['loans', 'deposits']
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"net_interest_income",
"noninterest_income",
],
balance: ["loans", "deposits"],
},
requiredKpiKeys: ['net_interest_margin']
requiredKpiKeys: ["net_interest_margin"],
},
{
name: 'insurance-aig-2026-02-12',
expectedPack: 'insurance',
name: "insurance-aig-2026-02-12",
expectedPack: "insurance",
input: {
filingId: 3,
ticker: 'AIG',
cik: '0000005272',
accessionNumber: '0000005272-26-000023',
filingDate: '2026-02-12',
filingType: '10-K',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/5272/000000527226000023/',
primaryDocument: 'aig-20251231.htm'
ticker: "AIG",
cik: "0000005272",
accessionNumber: "0000005272-26-000023",
filingDate: "2026-02-12",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/5272/000000527226000023/",
primaryDocument: "aig-20251231.htm",
},
requiredSurfaceKeys: {
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'premiums', 'claims_and_benefits'],
balance: ['policy_liabilities']
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"premiums",
"claims_and_benefits",
],
balance: ["policy_liabilities"],
},
requiredKpiKeys: ['combined_ratio']
requiredKpiKeys: ["combined_ratio"],
},
{
name: 'reit-o-2026-02-25',
expectedPack: 'reit_real_estate',
name: "reit-o-2026-02-25",
expectedPack: "reit_real_estate",
input: {
filingId: 4,
ticker: 'O',
cik: '0000726728',
accessionNumber: '0000726728-26-000011',
filingDate: '2026-02-25',
filingType: '10-K',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/726728/000072672826000011/',
primaryDocument: 'o-20251231.htm'
ticker: "O",
cik: "0000726728",
accessionNumber: "0000726728-26-000011",
filingDate: "2026-02-25",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/726728/000072672826000011/",
primaryDocument: "o-20251231.htm",
},
requiredSurfaceKeys: {
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'rental_revenue'],
balance: ['investment_property', 'total_assets']
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"rental_revenue",
],
balance: ["investment_property", "total_assets"],
},
requiredKpiKeys: ['property_count']
requiredKpiKeys: ["property_count"],
},
{
name: 'broker-blk-2026-02-25',
expectedPack: 'broker_asset_manager',
name: "broker-blk-2026-02-25",
expectedPack: "broker_asset_manager",
input: {
filingId: 5,
ticker: 'BLK',
cik: '0002012383',
accessionNumber: '0001193125-26-071966',
filingDate: '2026-02-25',
filingType: '10-K',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/2012383/000119312526071966/',
primaryDocument: 'blk-20251231.htm'
ticker: "BLK",
cik: "0002012383",
accessionNumber: "0001193125-26-071966",
filingDate: "2026-02-25",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/2012383/000119312526071966/",
primaryDocument: "blk-20251231.htm",
},
requiredSurfaceKeys: {
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'fee_revenue'],
balance: ['total_assets', 'total_liabilities']
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"fee_revenue",
],
balance: ["total_assets", "total_liabilities"],
},
requiredKpiKeys: ['aum', 'fee_paying_aum']
}
requiredKpiKeys: ["aum", "fee_paying_aum"],
},
{
name: "software-orcl-2025-06-18",
expectedPack: "software",
input: {
filingId: 6,
ticker: "ORCL",
cik: "0001341439",
accessionNumber: "0000950170-25-087926",
filingDate: "2025-06-18",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/1341439/000095017025087926/",
primaryDocument: "orcl-20250531.htm",
},
requiredSurfaceKeys: {
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"software_license_revenue",
"maintenance_and_support_revenue",
"cost_of_software_revenue",
],
balance: ["capitalized_software_costs", "deferred_revenue"],
},
},
{
name: "mining-fcx-2025-02-14",
expectedPack: "extractive_mining",
input: {
filingId: 7,
ticker: "FCX",
cik: "0000831259",
accessionNumber: "0000831259-25-000006",
filingDate: "2025-02-14",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/831259/000083125925000006/",
primaryDocument: "fcx-20241231.htm",
},
requiredSurfaceKeys: {
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"mining_revenue",
"production_costs",
"exploration_expense",
],
balance: ["mining_properties", "rehabilitation_liabilities"],
},
},
{
name: "mortgage-rkt-2026-02-27",
expectedPack: "mortgage_banking",
input: {
filingId: 8,
ticker: "RKT",
cik: "0001805284",
accessionNumber: "0001628280-26-013283",
filingDate: "2026-02-27",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/1805284/000162828026013283/",
primaryDocument: "rkt-20251231.htm",
},
requiredSurfaceKeys: {
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"mortgage_banking_revenue",
"servicing_fees",
"interest_income",
],
balance: ["loans_held_for_sale", "mortgage_servicing_rights"],
},
},
{
name: "casino-mgm-2026-02-11",
expectedPack: "entertainment_casinos",
input: {
filingId: 9,
ticker: "MGM",
cik: "0000789570",
accessionNumber: "0000789570-26-000018",
filingDate: "2026-02-11",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/789570/000078957026000018/",
primaryDocument: "mgm-20251231.htm",
},
requiredSurfaceKeys: {
income: [
...UNIVERSAL_INCOME_KEYS,
...EXPENSE_BREAKDOWN_KEYS,
"gaming_revenue",
"hotel_and_resort_revenue",
"food_and_beverage_revenue",
],
balance: ["casino_properties", "gaming_tax_liability"],
},
},
];
const FALLBACK_WARNINGS = new Set([
'surface_rows_deferred_to_typescript',
'ts_compact_surface_fallback_used'
"surface_rows_deferred_to_typescript",
"ts_compact_surface_fallback_used",
]);
function parseCaseFilter(argv: string[]) {
for (const arg of argv) {
if (arg === '--help' || arg === '-h') {
console.log('Validate live SEC representative filings for each active taxonomy pack.');
console.log('');
console.log('Usage:');
console.log(' bun run scripts/validate-taxonomy-packs.ts');
console.log(' bun run scripts/validate-taxonomy-packs.ts --case=bank-jpm-2026-02-13');
if (arg === "--help" || arg === "-h") {
console.log(
"Validate live SEC representative filings for each active taxonomy pack.",
);
console.log("");
console.log("Usage:");
console.log(" bun run scripts/validate-taxonomy-packs.ts");
console.log(
" bun run scripts/validate-taxonomy-packs.ts --case=bank-jpm-2026-02-13",
);
process.exit(0);
}
if (arg.startsWith('--case=')) {
const value = arg.slice('--case='.length).trim();
if (arg.startsWith("--case=")) {
const value = arg.slice("--case=".length).trim();
return value.length > 0 ? value : null;
}
}
@@ -154,38 +281,50 @@ function parseCaseFilter(argv: string[]) {
function keysForStatement(
result: Awaited<ReturnType<typeof hydrateFilingTaxonomySnapshot>>,
statement: FinancialStatementKind
statement: FinancialStatementKind,
) {
return (result.surface_rows[statement] ?? []).map((row) => row.key);
}
async function validateCase(testCase: ValidationCase): Promise<ValidationFailure | null> {
async function validateCase(
testCase: ValidationCase,
): Promise<ValidationFailure | null> {
const startedAt = Date.now();
const result = await hydrateFilingTaxonomySnapshot(testCase.input);
const issues: string[] = [];
const warnings = result.normalization_summary.warnings ?? [];
const kpiKeys = result.kpi_rows.map((row) => row.key);
if (result.parse_status !== 'ready') {
issues.push(`parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ''}`);
if (result.parse_status !== "ready") {
issues.push(
`parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ""}`,
);
}
if (result.fiscal_pack !== testCase.expectedPack) {
issues.push(`fiscal_pack=${result.fiscal_pack ?? 'null'} expected=${testCase.expectedPack}`);
issues.push(
`fiscal_pack=${result.fiscal_pack ?? "null"} expected=${testCase.expectedPack}`,
);
}
if ((Object.values(result.surface_rows) as Array<Array<{ key: string }>>).every((rows) => rows.length === 0)) {
issues.push('surface_rows are empty');
if (
(Object.values(result.surface_rows) as Array<Array<{ key: string }>>).every(
(rows) => rows.length === 0,
)
) {
issues.push("surface_rows are empty");
}
const fallbackWarning = warnings.find((warning) => FALLBACK_WARNINGS.has(warning));
const fallbackWarning = warnings.find((warning) =>
FALLBACK_WARNINGS.has(warning),
);
if (fallbackWarning) {
issues.push(`unexpected fallback warning=${fallbackWarning}`);
}
for (const [statement, requiredKeys] of Object.entries(testCase.requiredSurfaceKeys) as Array<
[FinancialStatementKind, string[]]
>) {
for (const [statement, requiredKeys] of Object.entries(
testCase.requiredSurfaceKeys,
) as Array<[FinancialStatementKind, string[]]>) {
const actualKeys = new Set(keysForStatement(result, statement));
for (const requiredKey of requiredKeys) {
if (!actualKeys.has(requiredKey)) {
@@ -201,20 +340,20 @@ async function validateCase(testCase: ValidationCase): Promise<ValidationFailure
}
const durationMs = Date.now() - startedAt;
const incomeKeys = keysForStatement(result, 'income');
const balanceKeys = keysForStatement(result, 'balance');
const incomeKeys = keysForStatement(result, "income");
const balanceKeys = keysForStatement(result, "balance");
console.log(
[
`[validate-taxonomy-packs] ${testCase.name}`,
`status=${issues.length === 0 ? 'pass' : 'fail'}`,
`status=${issues.length === 0 ? "pass" : "fail"}`,
`parse=${result.parse_status}`,
`pack=${result.fiscal_pack ?? 'null'}`,
`income=${incomeKeys.join(',') || '-'}`,
`balance=${balanceKeys.join(',') || '-'}`,
`kpis=${kpiKeys.join(',') || '-'}`,
`warnings=${warnings.join(',') || '-'}`,
`durationMs=${durationMs}`
].join(' ')
`pack=${result.fiscal_pack ?? "null"}`,
`income=${incomeKeys.join(",") || "-"}`,
`balance=${balanceKeys.join(",") || "-"}`,
`kpis=${kpiKeys.join(",") || "-"}`,
`warnings=${warnings.join(",") || "-"}`,
`durationMs=${durationMs}`,
].join(" "),
);
if (issues.length === 0) {
@@ -223,12 +362,13 @@ async function validateCase(testCase: ValidationCase): Promise<ValidationFailure
return {
name: testCase.name,
issues
issues,
};
}
async function main() {
process.env.XBRL_ENGINE_TIMEOUT_MS = process.env.XBRL_ENGINE_TIMEOUT_MS ?? '180000';
process.env.XBRL_ENGINE_TIMEOUT_MS =
process.env.XBRL_ENGINE_TIMEOUT_MS ?? "180000";
const requestedCase = parseCaseFilter(process.argv.slice(2));
const selectedCases = requestedCase
@@ -253,7 +393,7 @@ async function main() {
} catch (error) {
failures.push({
name: testCase.name,
issues: [error instanceof Error ? error.message : String(error)]
issues: [error instanceof Error ? error.message : String(error)],
});
}
@@ -264,7 +404,7 @@ async function main() {
`[validate-taxonomy-packs] completed cases=${selectedCases.length} failures=${failures.length} durationSec=${(
(Date.now() - startedAt) /
1000
).toFixed(1)}`
).toFixed(1)}`,
);
if (failures.length === 0) {