import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync, } from "node:fs"; import { join } from "node:path"; type FinancialUnit = "currency" | "percent" | "ratio" | "shares" | "count"; type FinancialCadence = "annual" | "quarterly" | "ltm"; type FinancialStatementKind = | "income" | "balance" | "cash_flow" | "disclosure" | "equity" | "comprehensive_income"; type SignTransform = "invert" | "absolute"; type SurfaceDefinition = { surface_key: string; statement: FinancialStatementKind; label: string; category: string; order: number; unit: FinancialUnit; rollup_policy?: string; allowed_source_concepts: string[]; allowed_authoritative_concepts?: string[]; formula_fallback?: | { op: "sum" | "subtract" | "divide"; sources: string[]; treat_null_as_zero?: boolean; } | string | null; detail_grouping_policy?: string; materiality_policy?: string; include_in_output?: boolean; sign_transform?: "invert"; }; type SurfacePackFile = { version: string; pack: string; surfaces: SurfaceDefinition[]; }; type ComputationSpec = | { type: "ratio"; numerator: string; denominator: string } | { type: "yoy_growth"; source: string } | { type: "cagr"; source: string; years: number } | { type: "per_share"; source: string; shares_key: string } | { type: "simple"; formula: string }; type ComputedDefinition = { key: string; label: string; category: string; order: number; unit: FinancialUnit; computation: ComputationSpec; supported_cadences?: FinancialCadence[]; requires_external_data?: string[]; }; type ComputedPackFile = { version: string; pack: string; computed: ComputedDefinition[]; }; type KpiDefinition = { key: string; label: string; unit: string; }; type KpiPackFile = { version: string; pack: string; kpis: KpiDefinition[]; }; const TAXONOMY_DIR = join(process.cwd(), "rust", "taxonomy", "fiscal", "v1"); const OUTPUT_DIR = join(process.cwd(), "lib", "generated"); const PACK_ORDER = [ "core", "bank_lender", "insurance", "reit_real_estate", "broker_asset_manager", ] as const; type PackName = (typeof PACK_ORDER)[number]; function log(message: string) { console.log(`[generate-taxonomy] ${message}`); } function loadSurfacePacks(): Map { const packs = new Map(); for (const pack of PACK_ORDER) { const path = join(TAXONOMY_DIR, `${pack}.surface.json`); if (!existsSync(path)) { continue; } const raw = readFileSync(path, "utf8"); const file = JSON.parse(raw) as SurfacePackFile; packs.set(pack, file); } return packs; } function loadComputedPacks(): Map { const packs = new Map(); for (const pack of PACK_ORDER) { const path = join(TAXONOMY_DIR, `${pack}.computed.json`); if (!existsSync(path)) { continue; } const raw = readFileSync(path, "utf8"); const file = JSON.parse(raw) as ComputedPackFile; packs.set(pack, file); } return packs; } function loadKpiPacks(): Map { const packs = new Map(); for (const pack of PACK_ORDER) { const path = join(TAXONOMY_DIR, "kpis", `${pack}.kpis.json`); if (!existsSync(path)) { continue; } const raw = readFileSync(path, "utf8"); const file = JSON.parse(raw) as KpiPackFile; packs.set(pack, file); } return packs; } function validateSurfacePack(pack: SurfacePackFile, errors: string[]) { const keysByStatement = new Map>(); for (const surface of pack.surfaces) { const keySet = keysByStatement.get(surface.statement) || new Set(); if (keySet.has(surface.surface_key)) { errors.push( `${pack.pack}: duplicate surface_key "${surface.surface_key}" in statement "${surface.statement}"`, ); } keySet.add(surface.surface_key); keysByStatement.set(surface.statement, keySet); if (!surface.label) { errors.push( `${pack.pack}: surface "${surface.surface_key}" missing label`, ); } const validStatements: FinancialStatementKind[] = [ "income", "balance", "cash_flow", "disclosure", "equity", "comprehensive_income", ]; if (!validStatements.includes(surface.statement)) { errors.push( `${pack.pack}: surface "${surface.surface_key}" has invalid statement "${surface.statement}"`, ); } } } function validateComputedPack( pack: ComputedPackFile, surfaceKeys: Set, errors: string[], ) { const keys = new Set(); for (const computed of pack.computed) { if (keys.has(computed.key)) { errors.push(`${pack.pack}: duplicate computed key "${computed.key}"`); } keys.add(computed.key); if (!computed.label) { errors.push(`${pack.pack}: computed "${computed.key}" missing label`); } const spec = computed.computation; switch (spec.type) { case "ratio": if (!surfaceKeys.has(spec.numerator) && !spec.numerator.includes("_")) { errors.push( `${pack.pack}: computed "${computed.key}" references unknown numerator "${spec.numerator}"`, ); } if ( !surfaceKeys.has(spec.denominator) && !spec.denominator.includes("_") ) { errors.push( `${pack.pack}: computed "${computed.key}" references unknown denominator "${spec.denominator}"`, ); } break; case "yoy_growth": case "cagr": if (!surfaceKeys.has(spec.source)) { errors.push( `${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`, ); } break; case "per_share": if (!surfaceKeys.has(spec.source)) { errors.push( `${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`, ); } if (!surfaceKeys.has(spec.shares_key)) { errors.push( `${pack.pack}: computed "${computed.key}" references unknown shares_key "${spec.shares_key}"`, ); } break; } } } function generateTypesFile(): string { return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten export type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count'; export type FinancialCadence = 'annual' | 'quarterly' | 'ltm'; export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'disclosure' | 'equity' | 'comprehensive_income'; export type SignTransform = 'invert' | 'absolute'; export type ComputationSpec = | { type: 'ratio'; numerator: string; denominator: string } | { type: 'yoy_growth'; source: string } | { type: 'cagr'; source: string; years: number } | { type: 'per_share'; source: string; shares_key: string } | { type: 'simple'; formula: string }; export type SurfaceDefinition = { surface_key: string; statement: FinancialStatementKind; label: string; category: string; order: number; unit: FinancialUnit; rollup_policy?: string; allowed_source_concepts: string[]; allowed_authoritative_concepts?: string[]; formula_fallback?: { op: 'sum' | 'subtract' | 'divide'; sources: string[]; treat_null_as_zero?: boolean; } | string | null; detail_grouping_policy?: string; materiality_policy?: string; include_in_output?: boolean; sign_transform?: SignTransform; }; export type ComputedDefinition = { key: string; label: string; category: string; order: number; unit: FinancialUnit; computation: ComputationSpec; supported_cadences?: FinancialCadence[]; requires_external_data?: string[]; }; export type KpiDefinition = { key: string; label: string; unit: string; }; export const RATIO_CATEGORIES = ['margins', 'returns', 'financial_health', 'per_share', 'growth', 'valuation'] as const; export type RatioCategory = (typeof RATIO_CATEGORIES)[number]; `; } function generateSurfaceFile( statement: string, surfaces: SurfaceDefinition[], ): string { const sorted = [...surfaces].sort((a, b) => a.order - b.order); const constName = `${statement.toUpperCase()}_SURFACES`; return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { SurfaceDefinition } from '../types'; export const ${constName}: SurfaceDefinition[] = ${JSON.stringify(sorted, null, 2)}; `; } function generateSurfacesIndex( surfacesByStatement: Map, ): string { const statements = [...surfacesByStatement.keys()].sort(); const imports = statements .map((s) => `import { ${s.toUpperCase()}_SURFACES } from './${s}';`) .join("\n"); const exports = statements .map((s) => ` ${s}: ${s.toUpperCase()}_SURFACES,`) .join("\n"); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten ${imports} export const ALL_SURFACES_BY_STATEMENT = { ${exports} } as const; export { ${statements.map((s) => `${s.toUpperCase()}_SURFACES`).join(", ")} }; `; } function generateComputedFile( name: string, definitions: ComputedDefinition[], ): string { const sorted = [...definitions].sort((a, b) => a.order - b.order); const constName = name.toUpperCase().replace(/-/g, "_"); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { ComputedDefinition } from '../types'; export const ${constName}: ComputedDefinition[] = ${JSON.stringify(sorted, null, 2)}; `; } function generateComputedIndex( files: { name: string; definitions: ComputedDefinition[] }[], ): string { const imports = files .map((f) => { const constName = f.name.toUpperCase().replace(/-/g, "_"); return `import { ${constName} } from './${f.name}';`; }) .join("\n"); const allExports = files .map((f) => ` ...${f.name.toUpperCase().replace(/-/g, "_")},`) .join("\n"); const filingDerived = files .flatMap((f) => f.definitions) .filter( (d) => !d.requires_external_data || d.requires_external_data.length === 0, ) .sort((a, b) => a.order - b.order); const marketDerived = files .flatMap((f) => f.definitions) .filter( (d) => d.requires_external_data && d.requires_external_data.length > 0, ) .sort((a, b) => a.order - b.order); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { ComputedDefinition } from '../types'; ${imports} export const ALL_COMPUTED: ComputedDefinition[] = [ ${allExports} ]; export const FILING_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(filingDerived, null, 2)}; export const MARKET_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(marketDerived, null, 2)}; export { ${files.map((f) => f.name.toUpperCase().replace(/-/g, "_")).join(", ")} }; `; } function generateKpiFile(pack: string, kpis: KpiDefinition[]): string { const constName = `${pack.toUpperCase().replace(/-/g, "_")}_KPIS`; return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { KpiDefinition } from '../types'; export const ${constName}: KpiDefinition[] = ${JSON.stringify(kpis, null, 2)}; `; } function generateKpiIndex( packs: { pack: string; kpis: KpiDefinition[] }[], ): string { const imports = packs .map((p) => { const constName = p.pack.toUpperCase().replace(/-/g, "_"); return `import { ${constName}_KPIS } from './${p.pack}';`; }) .join("\n"); const exports = packs .map((p) => ` ...${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS,`) .join("\n"); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { KpiDefinition } from '../types'; ${imports} export const ALL_KPIS: KpiDefinition[] = [ ${exports} ]; export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS`).join(", ")} }; `; } function generateMainIndex(): string { return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten export type { FinancialUnit, FinancialCadence, FinancialStatementKind, ComputationSpec, SurfaceDefinition, ComputedDefinition, KpiDefinition, } from './types'; export { RATIO_CATEGORIES, type RatioCategory } from './types'; export { INCOME_SURFACES, BALANCE_SURFACES, CASH_FLOW_SURFACES, ALL_SURFACES_BY_STATEMENT, } from './surfaces'; export { ALL_COMPUTED, FILING_DERIVED_COMPUTED, MARKET_DERIVED_COMPUTED, CORE, } from './computed'; export { ALL_KPIS, CORE_KPIS } from './kpis'; `; } async function main() { log("Loading taxonomy files..."); const surfacePacks = loadSurfacePacks(); const computedPacks = loadComputedPacks(); const kpiPacks = loadKpiPacks(); log( `Loaded ${surfacePacks.size} surface packs, ${computedPacks.size} computed packs, ${kpiPacks.size} KPI packs`, ); const errors: string[] = []; log("Validating taxonomy files..."); for (const [, pack] of surfacePacks) { validateSurfacePack(pack, errors); } const allSurfaceKeys = new Set(); for (const [, pack] of surfacePacks) { for (const surface of pack.surfaces) { allSurfaceKeys.add(surface.surface_key); } } for (const [, pack] of computedPacks) { validateComputedPack(pack, allSurfaceKeys, errors); } if (errors.length > 0) { console.error("Validation errors:"); for (const error of errors) { console.error(` - ${error}`); } process.exit(1); } log("Creating output directories..."); mkdirSync(join(OUTPUT_DIR, "surfaces"), { recursive: true }); mkdirSync(join(OUTPUT_DIR, "computed"), { recursive: true }); mkdirSync(join(OUTPUT_DIR, "kpis"), { recursive: true }); log("Generating types..."); writeFileSync(join(OUTPUT_DIR, "types.ts"), generateTypesFile()); log("Generating surfaces..."); const coreSurfaces = surfacePacks.get("core"); if (coreSurfaces) { const surfacesByStatement = new Map(); for (const surface of coreSurfaces.surfaces) { const existing = surfacesByStatement.get(surface.statement) || []; existing.push(surface); surfacesByStatement.set(surface.statement, existing); } for (const [statement, surfaces] of surfacesByStatement) { writeFileSync( join(OUTPUT_DIR, "surfaces", `${statement}.ts`), generateSurfaceFile(statement, surfaces), ); } writeFileSync( join(OUTPUT_DIR, "surfaces", "index.ts"), generateSurfacesIndex(surfacesByStatement), ); } log("Generating computed definitions..."); const computedFiles: { name: string; definitions: ComputedDefinition[] }[] = []; for (const [pack, file] of computedPacks) { computedFiles.push({ name: pack, definitions: file.computed }); writeFileSync( join(OUTPUT_DIR, "computed", `${pack}.ts`), generateComputedFile(pack, file.computed), ); } writeFileSync( join(OUTPUT_DIR, "computed", "index.ts"), generateComputedIndex(computedFiles), ); log("Generating KPI definitions..."); const kpiFiles: { pack: string; kpis: KpiDefinition[] }[] = []; for (const [pack, file] of kpiPacks) { kpiFiles.push({ pack, kpis: file.kpis }); writeFileSync( join(OUTPUT_DIR, "kpis", `${pack}.ts`), generateKpiFile(pack, file.kpis), ); } writeFileSync( join(OUTPUT_DIR, "kpis", "index.ts"), generateKpiIndex(kpiFiles), ); log("Generating main index..."); writeFileSync(join(OUTPUT_DIR, "index.ts"), generateMainIndex()); const surfaceCount = coreSurfaces?.surfaces.length || 0; const computedCount = computedFiles.reduce( (sum, f) => sum + f.definitions.length, 0, ); const kpiCount = kpiFiles.reduce((sum, f) => sum + f.kpis.length, 0); log( `Generated ${surfaceCount} surfaces, ${computedCount} computed definitions, ${kpiCount} KPIs`, ); log(`Output written to ${OUTPUT_DIR}`); } main().catch((error) => { console.error("Generation failed:", error); process.exit(1); });