Consolidate metric definitions with Rust JSON as single source of truth

- Add core.computed.json with 32 ratio definitions (filing + market derived)
- Add Rust types for ComputedDefinition and ComputationSpec
- Create generate-taxonomy.ts to generate TypeScript from Rust JSON
- Generate lib/generated/ (gitignored) with surfaces, computed, kpis
- Update financial-metrics.ts to use generated definitions
- Add build-time generation via 'bun run generate'
- Add taxonomy architecture documentation

Two-phase ratio computation:
- Filing-derived: margins, returns, per-share, growth (Rust computes)
- Market-derived: valuation ratios (TypeScript computes with price data)

All 32 ratios defined in core.computed.json:
- Margins: gross, operating, ebitda, net, fcf
- Returns: roa, roe, roic, roce
- Financial health: debt_to_equity, net_debt_to_ebitda, cash_to_debt, current_ratio
- Per-share: revenue, fcf, book_value
- Growth: yoy metrics + 3y/5y cagr
- Valuation: market_cap, ev, p/e, p/fcf, p/b, ev/sales, ev/ebitda, ev/fcf
This commit is contained in:
2026-03-15 15:22:51 -04:00
parent ed4420b8db
commit 24aa8e33d4
11 changed files with 1453 additions and 123 deletions

View File

@@ -0,0 +1,529 @@
import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count';
type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income';
type SignTransform = 'invert' | 'absolute';
type SurfaceDefinition = {
surface_key: string;
statement: FinancialStatementKind;
label: string;
category: string;
order: number;
unit: FinancialUnit;
rollup_policy?: string;
allowed_source_concepts: string[];
allowed_authoritative_concepts?: string[];
formula_fallback?: {
op: 'sum' | 'subtract' | 'divide';
sources: string[];
treat_null_as_zero?: boolean;
} | string | null;
detail_grouping_policy?: string;
materiality_policy?: string;
include_in_output?: boolean;
sign_transform?: 'invert';
};
type SurfacePackFile = {
version: string;
pack: string;
surfaces: SurfaceDefinition[];
};
type ComputationSpec =
| { type: 'ratio'; numerator: string; denominator: string }
| { type: 'yoy_growth'; source: string }
| { type: 'cagr'; source: string; years: number }
| { type: 'per_share'; source: string; shares_key: string }
| { type: 'simple'; formula: string };
type ComputedDefinition = {
key: string;
label: string;
category: string;
order: number;
unit: FinancialUnit;
computation: ComputationSpec;
supported_cadences?: FinancialCadence[];
requires_external_data?: string[];
};
type ComputedPackFile = {
version: string;
pack: string;
computed: ComputedDefinition[];
};
type KpiDefinition = {
key: string;
label: string;
unit: string;
};
type KpiPackFile = {
version: string;
pack: string;
kpis: KpiDefinition[];
};
const TAXONOMY_DIR = join(process.cwd(), 'rust', 'taxonomy', 'fiscal', 'v1');
const OUTPUT_DIR = join(process.cwd(), 'lib', 'generated');
const PACK_ORDER = ['core', 'bank_lender', 'insurance', 'reit_real_estate', 'broker_asset_manager'] as const;
type PackName = (typeof PACK_ORDER)[number];
function log(message: string) {
console.log(`[generate-taxonomy] ${message}`);
}
function loadSurfacePacks(): Map<PackName, SurfacePackFile> {
const packs = new Map<PackName, SurfacePackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, `${pack}.surface.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, 'utf8');
const file = JSON.parse(raw) as SurfacePackFile;
packs.set(pack, file);
}
return packs;
}
function loadComputedPacks(): Map<PackName, ComputedPackFile> {
const packs = new Map<PackName, ComputedPackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, `${pack}.computed.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, 'utf8');
const file = JSON.parse(raw) as ComputedPackFile;
packs.set(pack, file);
}
return packs;
}
function loadKpiPacks(): Map<PackName, KpiPackFile> {
const packs = new Map<PackName, KpiPackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, 'kpis', `${pack}.kpis.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, 'utf8');
const file = JSON.parse(raw) as KpiPackFile;
packs.set(pack, file);
}
return packs;
}
function validateSurfacePack(pack: SurfacePackFile, errors: string[]) {
const keysByStatement = new Map<string, Set<string>>();
for (const surface of pack.surfaces) {
const keySet = keysByStatement.get(surface.statement) || new Set<string>();
if (keySet.has(surface.surface_key)) {
errors.push(`${pack.pack}: duplicate surface_key "${surface.surface_key}" in statement "${surface.statement}"`);
}
keySet.add(surface.surface_key);
keysByStatement.set(surface.statement, keySet);
if (!surface.label) {
errors.push(`${pack.pack}: surface "${surface.surface_key}" missing label`);
}
const validStatements: FinancialStatementKind[] = ['income', 'balance', 'cash_flow', 'equity', 'comprehensive_income'];
if (!validStatements.includes(surface.statement)) {
errors.push(`${pack.pack}: surface "${surface.surface_key}" has invalid statement "${surface.statement}"`);
}
}
}
function validateComputedPack(pack: ComputedPackFile, surfaceKeys: Set<string>, errors: string[]) {
const keys = new Set<string>();
for (const computed of pack.computed) {
if (keys.has(computed.key)) {
errors.push(`${pack.pack}: duplicate computed key "${computed.key}"`);
}
keys.add(computed.key);
if (!computed.label) {
errors.push(`${pack.pack}: computed "${computed.key}" missing label`);
}
const spec = computed.computation;
switch (spec.type) {
case 'ratio':
if (!surfaceKeys.has(spec.numerator) && !spec.numerator.includes('_')) {
errors.push(`${pack.pack}: computed "${computed.key}" references unknown numerator "${spec.numerator}"`);
}
if (!surfaceKeys.has(spec.denominator) && !spec.denominator.includes('_')) {
errors.push(`${pack.pack}: computed "${computed.key}" references unknown denominator "${spec.denominator}"`);
}
break;
case 'yoy_growth':
case 'cagr':
if (!surfaceKeys.has(spec.source)) {
errors.push(`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`);
}
break;
case 'per_share':
if (!surfaceKeys.has(spec.source)) {
errors.push(`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`);
}
if (!surfaceKeys.has(spec.shares_key)) {
errors.push(`${pack.pack}: computed "${computed.key}" references unknown shares_key "${spec.shares_key}"`);
}
break;
}
}
}
function generateTypesFile(): string {
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
export type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count';
export type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income';
export type SignTransform = 'invert' | 'absolute';
export type ComputationSpec =
| { type: 'ratio'; numerator: string; denominator: string }
| { type: 'yoy_growth'; source: string }
| { type: 'cagr'; source: string; years: number }
| { type: 'per_share'; source: string; shares_key: string }
| { type: 'simple'; formula: string };
export type SurfaceDefinition = {
surface_key: string;
statement: FinancialStatementKind;
label: string;
category: string;
order: number;
unit: FinancialUnit;
rollup_policy?: string;
allowed_source_concepts: string[];
allowed_authoritative_concepts?: string[];
formula_fallback?: {
op: 'sum' | 'subtract' | 'divide';
sources: string[];
treat_null_as_zero?: boolean;
} | string | null;
detail_grouping_policy?: string;
materiality_policy?: string;
include_in_output?: boolean;
sign_transform?: SignTransform;
};
export type ComputedDefinition = {
key: string;
label: string;
category: string;
order: number;
unit: FinancialUnit;
computation: ComputationSpec;
supported_cadences?: FinancialCadence[];
requires_external_data?: string[];
};
export type KpiDefinition = {
key: string;
label: string;
unit: string;
};
export const RATIO_CATEGORIES = ['margins', 'returns', 'financial_health', 'per_share', 'growth', 'valuation'] as const;
export type RatioCategory = (typeof RATIO_CATEGORIES)[number];
`;
}
function generateSurfaceFile(statement: string, surfaces: SurfaceDefinition[]): string {
const sorted = [...surfaces].sort((a, b) => a.order - b.order);
const constName = `${statement.toUpperCase()}_SURFACES`;
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { SurfaceDefinition } from '../types';
export const ${constName}: SurfaceDefinition[] = ${JSON.stringify(sorted, null, 2)};
`;
}
function generateSurfacesIndex(surfacesByStatement: Map<string, SurfaceDefinition[]>): string {
const statements = [...surfacesByStatement.keys()].sort();
const imports = statements
.map((s) => `import { ${s.toUpperCase()}_SURFACES } from './${s}';`)
.join('\n');
const exports = statements.map((s) => ` ${s}: ${s.toUpperCase()}_SURFACES,`).join('\n');
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
${imports}
export const ALL_SURFACES_BY_STATEMENT = {
${exports}
} as const;
export { ${statements.map((s) => `${s.toUpperCase()}_SURFACES`).join(', ')} };
`;
}
function generateComputedFile(
name: string,
definitions: ComputedDefinition[]
): string {
const sorted = [...definitions].sort((a, b) => a.order - b.order);
const constName = name.toUpperCase().replace(/-/g, '_');
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { ComputedDefinition } from '../types';
export const ${constName}: ComputedDefinition[] = ${JSON.stringify(sorted, null, 2)};
`;
}
function generateComputedIndex(files: { name: string; definitions: ComputedDefinition[] }[]): string {
const imports = files
.map((f) => {
const constName = f.name.toUpperCase().replace(/-/g, '_');
return `import { ${constName} } from './${f.name}';`;
})
.join('\n');
const allExports = files
.map((f) => ` ...${f.name.toUpperCase().replace(/-/g, '_')},`)
.join('\n');
const filingDerived = files
.flatMap((f) => f.definitions)
.filter((d) => !d.requires_external_data || d.requires_external_data.length === 0)
.sort((a, b) => a.order - b.order);
const marketDerived = files
.flatMap((f) => f.definitions)
.filter((d) => d.requires_external_data && d.requires_external_data.length > 0)
.sort((a, b) => a.order - b.order);
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { ComputedDefinition } from '../types';
${imports}
export const ALL_COMPUTED: ComputedDefinition[] = [
${allExports}
];
export const FILING_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(filingDerived, null, 2)};
export const MARKET_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(marketDerived, null, 2)};
export { ${files.map((f) => f.name.toUpperCase().replace(/-/g, '_')).join(', ')} };
`;
}
function generateKpiFile(pack: string, kpis: KpiDefinition[]): string {
const constName = `${pack.toUpperCase().replace(/-/g, '_')}_KPIS`;
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { KpiDefinition } from '../types';
export const ${constName}: KpiDefinition[] = ${JSON.stringify(kpis, null, 2)};
`;
}
function generateKpiIndex(packs: { pack: string; kpis: KpiDefinition[] }[]): string {
const imports = packs
.map((p) => {
const constName = p.pack.toUpperCase().replace(/-/g, '_');
return `import { ${constName}_KPIS } from './${p.pack}';`;
})
.join('\n');
const exports = packs.map((p) => ` ...${p.pack.toUpperCase().replace(/-/g, '_')}_KPIS,`).join('\n');
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { KpiDefinition } from '../types';
${imports}
export const ALL_KPIS: KpiDefinition[] = [
${exports}
];
export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, '_')}_KPIS`).join(', ')} };
`;
}
function generateMainIndex(): string {
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
export type {
FinancialUnit,
FinancialCadence,
FinancialStatementKind,
ComputationSpec,
SurfaceDefinition,
ComputedDefinition,
KpiDefinition,
} from './types';
export { RATIO_CATEGORIES, type RatioCategory } from './types';
export {
INCOME_SURFACES,
BALANCE_SURFACES,
CASH_FLOW_SURFACES,
ALL_SURFACES_BY_STATEMENT,
} from './surfaces';
export {
ALL_COMPUTED,
FILING_DERIVED_COMPUTED,
MARKET_DERIVED_COMPUTED,
CORE,
} from './computed';
export { ALL_KPIS, CORE_KPIS } from './kpis';
`;
}
async function main() {
log('Loading taxonomy files...');
const surfacePacks = loadSurfacePacks();
const computedPacks = loadComputedPacks();
const kpiPacks = loadKpiPacks();
log(`Loaded ${surfacePacks.size} surface packs, ${computedPacks.size} computed packs, ${kpiPacks.size} KPI packs`);
const errors: string[] = [];
log('Validating taxonomy files...');
for (const [, pack] of surfacePacks) {
validateSurfacePack(pack, errors);
}
const allSurfaceKeys = new Set<string>();
for (const [, pack] of surfacePacks) {
for (const surface of pack.surfaces) {
allSurfaceKeys.add(surface.surface_key);
}
}
for (const [, pack] of computedPacks) {
validateComputedPack(pack, allSurfaceKeys, errors);
}
if (errors.length > 0) {
console.error('Validation errors:');
for (const error of errors) {
console.error(` - ${error}`);
}
process.exit(1);
}
log('Creating output directories...');
mkdirSync(join(OUTPUT_DIR, 'surfaces'), { recursive: true });
mkdirSync(join(OUTPUT_DIR, 'computed'), { recursive: true });
mkdirSync(join(OUTPUT_DIR, 'kpis'), { recursive: true });
log('Generating types...');
writeFileSync(join(OUTPUT_DIR, 'types.ts'), generateTypesFile());
log('Generating surfaces...');
const coreSurfaces = surfacePacks.get('core');
if (coreSurfaces) {
const surfacesByStatement = new Map<string, SurfaceDefinition[]>();
for (const surface of coreSurfaces.surfaces) {
const existing = surfacesByStatement.get(surface.statement) || [];
existing.push(surface);
surfacesByStatement.set(surface.statement, existing);
}
for (const [statement, surfaces] of surfacesByStatement) {
writeFileSync(
join(OUTPUT_DIR, 'surfaces', `${statement}.ts`),
generateSurfaceFile(statement, surfaces)
);
}
writeFileSync(
join(OUTPUT_DIR, 'surfaces', 'index.ts'),
generateSurfacesIndex(surfacesByStatement)
);
}
log('Generating computed definitions...');
const computedFiles: { name: string; definitions: ComputedDefinition[] }[] = [];
for (const [pack, file] of computedPacks) {
computedFiles.push({ name: pack, definitions: file.computed });
writeFileSync(
join(OUTPUT_DIR, 'computed', `${pack}.ts`),
generateComputedFile(pack, file.computed)
);
}
writeFileSync(join(OUTPUT_DIR, 'computed', 'index.ts'), generateComputedIndex(computedFiles));
log('Generating KPI definitions...');
const kpiFiles: { pack: string; kpis: KpiDefinition[] }[] = [];
for (const [pack, file] of kpiPacks) {
kpiFiles.push({ pack, kpis: file.kpis });
writeFileSync(
join(OUTPUT_DIR, 'kpis', `${pack}.ts`),
generateKpiFile(pack, file.kpis)
);
}
writeFileSync(join(OUTPUT_DIR, 'kpis', 'index.ts'), generateKpiIndex(kpiFiles));
log('Generating main index...');
writeFileSync(join(OUTPUT_DIR, 'index.ts'), generateMainIndex());
const surfaceCount = coreSurfaces?.surfaces.length || 0;
const computedCount = computedFiles.reduce((sum, f) => sum + f.definitions.length, 0);
const kpiCount = kpiFiles.reduce((sum, f) => sum + f.kpis.length, 0);
log(`Generated ${surfaceCount} surfaces, ${computedCount} computed definitions, ${kpiCount} KPIs`);
log(`Output written to ${OUTPUT_DIR}`);
}
main().catch((error) => {
console.error('Generation failed:', error);
process.exit(1);
});