import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count'; type FinancialCadence = 'annual' | 'quarterly' | 'ltm'; type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income'; type SignTransform = 'invert' | 'absolute'; type SurfaceDefinition = { surface_key: string; statement: FinancialStatementKind; label: string; category: string; order: number; unit: FinancialUnit; rollup_policy?: string; allowed_source_concepts: string[]; allowed_authoritative_concepts?: string[]; formula_fallback?: { op: 'sum' | 'subtract' | 'divide'; sources: string[]; treat_null_as_zero?: boolean; } | string | null; detail_grouping_policy?: string; materiality_policy?: string; include_in_output?: boolean; sign_transform?: 'invert'; }; type SurfacePackFile = { version: string; pack: string; surfaces: SurfaceDefinition[]; }; type ComputationSpec = | { type: 'ratio'; numerator: string; denominator: string } | { type: 'yoy_growth'; source: string } | { type: 'cagr'; source: string; years: number } | { type: 'per_share'; source: string; shares_key: string } | { type: 'simple'; formula: string }; type ComputedDefinition = { key: string; label: string; category: string; order: number; unit: FinancialUnit; computation: ComputationSpec; supported_cadences?: FinancialCadence[]; requires_external_data?: string[]; }; type ComputedPackFile = { version: string; pack: string; computed: ComputedDefinition[]; }; type KpiDefinition = { key: string; label: string; unit: string; }; type KpiPackFile = { version: string; pack: string; kpis: KpiDefinition[]; }; const TAXONOMY_DIR = join(process.cwd(), 'rust', 'taxonomy', 'fiscal', 'v1'); const OUTPUT_DIR = join(process.cwd(), 'lib', 'generated'); const PACK_ORDER = ['core', 'bank_lender', 'insurance', 'reit_real_estate', 'broker_asset_manager'] as const; type PackName = (typeof PACK_ORDER)[number]; function log(message: string) { console.log(`[generate-taxonomy] ${message}`); } function loadSurfacePacks(): Map { const packs = new Map(); for (const pack of PACK_ORDER) { const path = join(TAXONOMY_DIR, `${pack}.surface.json`); if (!existsSync(path)) { continue; } const raw = readFileSync(path, 'utf8'); const file = JSON.parse(raw) as SurfacePackFile; packs.set(pack, file); } return packs; } function loadComputedPacks(): Map { const packs = new Map(); for (const pack of PACK_ORDER) { const path = join(TAXONOMY_DIR, `${pack}.computed.json`); if (!existsSync(path)) { continue; } const raw = readFileSync(path, 'utf8'); const file = JSON.parse(raw) as ComputedPackFile; packs.set(pack, file); } return packs; } function loadKpiPacks(): Map { const packs = new Map(); for (const pack of PACK_ORDER) { const path = join(TAXONOMY_DIR, 'kpis', `${pack}.kpis.json`); if (!existsSync(path)) { continue; } const raw = readFileSync(path, 'utf8'); const file = JSON.parse(raw) as KpiPackFile; packs.set(pack, file); } return packs; } function validateSurfacePack(pack: SurfacePackFile, errors: string[]) { const keysByStatement = new Map>(); for (const surface of pack.surfaces) { const keySet = keysByStatement.get(surface.statement) || new Set(); if (keySet.has(surface.surface_key)) { errors.push(`${pack.pack}: duplicate surface_key "${surface.surface_key}" in statement "${surface.statement}"`); } keySet.add(surface.surface_key); keysByStatement.set(surface.statement, keySet); if (!surface.label) { errors.push(`${pack.pack}: surface "${surface.surface_key}" missing label`); } const validStatements: FinancialStatementKind[] = ['income', 'balance', 'cash_flow', 'equity', 'comprehensive_income']; if (!validStatements.includes(surface.statement)) { errors.push(`${pack.pack}: surface "${surface.surface_key}" has invalid statement "${surface.statement}"`); } } } function validateComputedPack(pack: ComputedPackFile, surfaceKeys: Set, errors: string[]) { const keys = new Set(); for (const computed of pack.computed) { if (keys.has(computed.key)) { errors.push(`${pack.pack}: duplicate computed key "${computed.key}"`); } keys.add(computed.key); if (!computed.label) { errors.push(`${pack.pack}: computed "${computed.key}" missing label`); } const spec = computed.computation; switch (spec.type) { case 'ratio': if (!surfaceKeys.has(spec.numerator) && !spec.numerator.includes('_')) { errors.push(`${pack.pack}: computed "${computed.key}" references unknown numerator "${spec.numerator}"`); } if (!surfaceKeys.has(spec.denominator) && !spec.denominator.includes('_')) { errors.push(`${pack.pack}: computed "${computed.key}" references unknown denominator "${spec.denominator}"`); } break; case 'yoy_growth': case 'cagr': if (!surfaceKeys.has(spec.source)) { errors.push(`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`); } break; case 'per_share': if (!surfaceKeys.has(spec.source)) { errors.push(`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`); } if (!surfaceKeys.has(spec.shares_key)) { errors.push(`${pack.pack}: computed "${computed.key}" references unknown shares_key "${spec.shares_key}"`); } break; } } } function generateTypesFile(): string { return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten export type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count'; export type FinancialCadence = 'annual' | 'quarterly' | 'ltm'; export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income'; export type SignTransform = 'invert' | 'absolute'; export type ComputationSpec = | { type: 'ratio'; numerator: string; denominator: string } | { type: 'yoy_growth'; source: string } | { type: 'cagr'; source: string; years: number } | { type: 'per_share'; source: string; shares_key: string } | { type: 'simple'; formula: string }; export type SurfaceDefinition = { surface_key: string; statement: FinancialStatementKind; label: string; category: string; order: number; unit: FinancialUnit; rollup_policy?: string; allowed_source_concepts: string[]; allowed_authoritative_concepts?: string[]; formula_fallback?: { op: 'sum' | 'subtract' | 'divide'; sources: string[]; treat_null_as_zero?: boolean; } | string | null; detail_grouping_policy?: string; materiality_policy?: string; include_in_output?: boolean; sign_transform?: SignTransform; }; export type ComputedDefinition = { key: string; label: string; category: string; order: number; unit: FinancialUnit; computation: ComputationSpec; supported_cadences?: FinancialCadence[]; requires_external_data?: string[]; }; export type KpiDefinition = { key: string; label: string; unit: string; }; export const RATIO_CATEGORIES = ['margins', 'returns', 'financial_health', 'per_share', 'growth', 'valuation'] as const; export type RatioCategory = (typeof RATIO_CATEGORIES)[number]; `; } function generateSurfaceFile(statement: string, surfaces: SurfaceDefinition[]): string { const sorted = [...surfaces].sort((a, b) => a.order - b.order); const constName = `${statement.toUpperCase()}_SURFACES`; return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { SurfaceDefinition } from '../types'; export const ${constName}: SurfaceDefinition[] = ${JSON.stringify(sorted, null, 2)}; `; } function generateSurfacesIndex(surfacesByStatement: Map): string { const statements = [...surfacesByStatement.keys()].sort(); const imports = statements .map((s) => `import { ${s.toUpperCase()}_SURFACES } from './${s}';`) .join('\n'); const exports = statements.map((s) => ` ${s}: ${s.toUpperCase()}_SURFACES,`).join('\n'); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten ${imports} export const ALL_SURFACES_BY_STATEMENT = { ${exports} } as const; export { ${statements.map((s) => `${s.toUpperCase()}_SURFACES`).join(', ')} }; `; } function generateComputedFile( name: string, definitions: ComputedDefinition[] ): string { const sorted = [...definitions].sort((a, b) => a.order - b.order); const constName = name.toUpperCase().replace(/-/g, '_'); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { ComputedDefinition } from '../types'; export const ${constName}: ComputedDefinition[] = ${JSON.stringify(sorted, null, 2)}; `; } function generateComputedIndex(files: { name: string; definitions: ComputedDefinition[] }[]): string { const imports = files .map((f) => { const constName = f.name.toUpperCase().replace(/-/g, '_'); return `import { ${constName} } from './${f.name}';`; }) .join('\n'); const allExports = files .map((f) => ` ...${f.name.toUpperCase().replace(/-/g, '_')},`) .join('\n'); const filingDerived = files .flatMap((f) => f.definitions) .filter((d) => !d.requires_external_data || d.requires_external_data.length === 0) .sort((a, b) => a.order - b.order); const marketDerived = files .flatMap((f) => f.definitions) .filter((d) => d.requires_external_data && d.requires_external_data.length > 0) .sort((a, b) => a.order - b.order); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { ComputedDefinition } from '../types'; ${imports} export const ALL_COMPUTED: ComputedDefinition[] = [ ${allExports} ]; export const FILING_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(filingDerived, null, 2)}; export const MARKET_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(marketDerived, null, 2)}; export { ${files.map((f) => f.name.toUpperCase().replace(/-/g, '_')).join(', ')} }; `; } function generateKpiFile(pack: string, kpis: KpiDefinition[]): string { const constName = `${pack.toUpperCase().replace(/-/g, '_')}_KPIS`; return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { KpiDefinition } from '../types'; export const ${constName}: KpiDefinition[] = ${JSON.stringify(kpis, null, 2)}; `; } function generateKpiIndex(packs: { pack: string; kpis: KpiDefinition[] }[]): string { const imports = packs .map((p) => { const constName = p.pack.toUpperCase().replace(/-/g, '_'); return `import { ${constName}_KPIS } from './${p.pack}';`; }) .join('\n'); const exports = packs.map((p) => ` ...${p.pack.toUpperCase().replace(/-/g, '_')}_KPIS,`).join('\n'); return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten import type { KpiDefinition } from '../types'; ${imports} export const ALL_KPIS: KpiDefinition[] = [ ${exports} ]; export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, '_')}_KPIS`).join(', ')} }; `; } function generateMainIndex(): string { return `// Auto-generated by scripts/generate-taxonomy.ts // DO NOT EDIT MANUALLY - changes will be overwritten export type { FinancialUnit, FinancialCadence, FinancialStatementKind, ComputationSpec, SurfaceDefinition, ComputedDefinition, KpiDefinition, } from './types'; export { RATIO_CATEGORIES, type RatioCategory } from './types'; export { INCOME_SURFACES, BALANCE_SURFACES, CASH_FLOW_SURFACES, ALL_SURFACES_BY_STATEMENT, } from './surfaces'; export { ALL_COMPUTED, FILING_DERIVED_COMPUTED, MARKET_DERIVED_COMPUTED, CORE, } from './computed'; export { ALL_KPIS, CORE_KPIS } from './kpis'; `; } async function main() { log('Loading taxonomy files...'); const surfacePacks = loadSurfacePacks(); const computedPacks = loadComputedPacks(); const kpiPacks = loadKpiPacks(); log(`Loaded ${surfacePacks.size} surface packs, ${computedPacks.size} computed packs, ${kpiPacks.size} KPI packs`); const errors: string[] = []; log('Validating taxonomy files...'); for (const [, pack] of surfacePacks) { validateSurfacePack(pack, errors); } const allSurfaceKeys = new Set(); for (const [, pack] of surfacePacks) { for (const surface of pack.surfaces) { allSurfaceKeys.add(surface.surface_key); } } for (const [, pack] of computedPacks) { validateComputedPack(pack, allSurfaceKeys, errors); } if (errors.length > 0) { console.error('Validation errors:'); for (const error of errors) { console.error(` - ${error}`); } process.exit(1); } log('Creating output directories...'); mkdirSync(join(OUTPUT_DIR, 'surfaces'), { recursive: true }); mkdirSync(join(OUTPUT_DIR, 'computed'), { recursive: true }); mkdirSync(join(OUTPUT_DIR, 'kpis'), { recursive: true }); log('Generating types...'); writeFileSync(join(OUTPUT_DIR, 'types.ts'), generateTypesFile()); log('Generating surfaces...'); const coreSurfaces = surfacePacks.get('core'); if (coreSurfaces) { const surfacesByStatement = new Map(); for (const surface of coreSurfaces.surfaces) { const existing = surfacesByStatement.get(surface.statement) || []; existing.push(surface); surfacesByStatement.set(surface.statement, existing); } for (const [statement, surfaces] of surfacesByStatement) { writeFileSync( join(OUTPUT_DIR, 'surfaces', `${statement}.ts`), generateSurfaceFile(statement, surfaces) ); } writeFileSync( join(OUTPUT_DIR, 'surfaces', 'index.ts'), generateSurfacesIndex(surfacesByStatement) ); } log('Generating computed definitions...'); const computedFiles: { name: string; definitions: ComputedDefinition[] }[] = []; for (const [pack, file] of computedPacks) { computedFiles.push({ name: pack, definitions: file.computed }); writeFileSync( join(OUTPUT_DIR, 'computed', `${pack}.ts`), generateComputedFile(pack, file.computed) ); } writeFileSync(join(OUTPUT_DIR, 'computed', 'index.ts'), generateComputedIndex(computedFiles)); log('Generating KPI definitions...'); const kpiFiles: { pack: string; kpis: KpiDefinition[] }[] = []; for (const [pack, file] of kpiPacks) { kpiFiles.push({ pack, kpis: file.kpis }); writeFileSync( join(OUTPUT_DIR, 'kpis', `${pack}.ts`), generateKpiFile(pack, file.kpis) ); } writeFileSync(join(OUTPUT_DIR, 'kpis', 'index.ts'), generateKpiIndex(kpiFiles)); log('Generating main index...'); writeFileSync(join(OUTPUT_DIR, 'index.ts'), generateMainIndex()); const surfaceCount = coreSurfaces?.surfaces.length || 0; const computedCount = computedFiles.reduce((sum, f) => sum + f.definitions.length, 0); const kpiCount = kpiFiles.reduce((sum, f) => sum + f.kpis.length, 0); log(`Generated ${surfaceCount} surfaces, ${computedCount} computed definitions, ${kpiCount} KPIs`); log(`Output written to ${OUTPUT_DIR}`); } main().catch((error) => { console.error('Generation failed:', error); process.exit(1); });