Consolidate metric definitions with Rust JSON as single source of truth
- Add core.computed.json with 32 ratio definitions (filing + market derived) - Add Rust types for ComputedDefinition and ComputationSpec - Create generate-taxonomy.ts to generate TypeScript from Rust JSON - Generate lib/generated/ (gitignored) with surfaces, computed, kpis - Update financial-metrics.ts to use generated definitions - Add build-time generation via 'bun run generate' - Add taxonomy architecture documentation Two-phase ratio computation: - Filing-derived: margins, returns, per-share, growth (Rust computes) - Market-derived: valuation ratios (TypeScript computes with price data) All 32 ratios defined in core.computed.json: - Margins: gross, operating, ebitda, net, fcf - Returns: roa, roe, roic, roce - Financial health: debt_to_equity, net_debt_to_ebitda, cash_to_debt, current_ratio - Per-share: revenue, fcf, book_value - Growth: yoy metrics + 3y/5y cagr - Valuation: market_cap, ev, p/e, p/fcf, p/b, ev/sales, ev/ebitda, ev/fcf
This commit is contained in:
529
scripts/generate-taxonomy.ts
Normal file
529
scripts/generate-taxonomy.ts
Normal file
@@ -0,0 +1,529 @@
|
||||
import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count';
|
||||
type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
|
||||
type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income';
|
||||
type SignTransform = 'invert' | 'absolute';
|
||||
|
||||
type SurfaceDefinition = {
|
||||
surface_key: string;
|
||||
statement: FinancialStatementKind;
|
||||
label: string;
|
||||
category: string;
|
||||
order: number;
|
||||
unit: FinancialUnit;
|
||||
rollup_policy?: string;
|
||||
allowed_source_concepts: string[];
|
||||
allowed_authoritative_concepts?: string[];
|
||||
formula_fallback?: {
|
||||
op: 'sum' | 'subtract' | 'divide';
|
||||
sources: string[];
|
||||
treat_null_as_zero?: boolean;
|
||||
} | string | null;
|
||||
detail_grouping_policy?: string;
|
||||
materiality_policy?: string;
|
||||
include_in_output?: boolean;
|
||||
sign_transform?: 'invert';
|
||||
};
|
||||
|
||||
type SurfacePackFile = {
|
||||
version: string;
|
||||
pack: string;
|
||||
surfaces: SurfaceDefinition[];
|
||||
};
|
||||
|
||||
type ComputationSpec =
|
||||
| { type: 'ratio'; numerator: string; denominator: string }
|
||||
| { type: 'yoy_growth'; source: string }
|
||||
| { type: 'cagr'; source: string; years: number }
|
||||
| { type: 'per_share'; source: string; shares_key: string }
|
||||
| { type: 'simple'; formula: string };
|
||||
|
||||
type ComputedDefinition = {
|
||||
key: string;
|
||||
label: string;
|
||||
category: string;
|
||||
order: number;
|
||||
unit: FinancialUnit;
|
||||
computation: ComputationSpec;
|
||||
supported_cadences?: FinancialCadence[];
|
||||
requires_external_data?: string[];
|
||||
};
|
||||
|
||||
type ComputedPackFile = {
|
||||
version: string;
|
||||
pack: string;
|
||||
computed: ComputedDefinition[];
|
||||
};
|
||||
|
||||
type KpiDefinition = {
|
||||
key: string;
|
||||
label: string;
|
||||
unit: string;
|
||||
};
|
||||
|
||||
type KpiPackFile = {
|
||||
version: string;
|
||||
pack: string;
|
||||
kpis: KpiDefinition[];
|
||||
};
|
||||
|
||||
const TAXONOMY_DIR = join(process.cwd(), 'rust', 'taxonomy', 'fiscal', 'v1');
|
||||
const OUTPUT_DIR = join(process.cwd(), 'lib', 'generated');
|
||||
|
||||
const PACK_ORDER = ['core', 'bank_lender', 'insurance', 'reit_real_estate', 'broker_asset_manager'] as const;
|
||||
type PackName = (typeof PACK_ORDER)[number];
|
||||
|
||||
function log(message: string) {
|
||||
console.log(`[generate-taxonomy] ${message}`);
|
||||
}
|
||||
|
||||
function loadSurfacePacks(): Map<PackName, SurfacePackFile> {
|
||||
const packs = new Map<PackName, SurfacePackFile>();
|
||||
|
||||
for (const pack of PACK_ORDER) {
|
||||
const path = join(TAXONOMY_DIR, `${pack}.surface.json`);
|
||||
if (!existsSync(path)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const raw = readFileSync(path, 'utf8');
|
||||
const file = JSON.parse(raw) as SurfacePackFile;
|
||||
packs.set(pack, file);
|
||||
}
|
||||
|
||||
return packs;
|
||||
}
|
||||
|
||||
function loadComputedPacks(): Map<PackName, ComputedPackFile> {
|
||||
const packs = new Map<PackName, ComputedPackFile>();
|
||||
|
||||
for (const pack of PACK_ORDER) {
|
||||
const path = join(TAXONOMY_DIR, `${pack}.computed.json`);
|
||||
if (!existsSync(path)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const raw = readFileSync(path, 'utf8');
|
||||
const file = JSON.parse(raw) as ComputedPackFile;
|
||||
packs.set(pack, file);
|
||||
}
|
||||
|
||||
return packs;
|
||||
}
|
||||
|
||||
function loadKpiPacks(): Map<PackName, KpiPackFile> {
|
||||
const packs = new Map<PackName, KpiPackFile>();
|
||||
|
||||
for (const pack of PACK_ORDER) {
|
||||
const path = join(TAXONOMY_DIR, 'kpis', `${pack}.kpis.json`);
|
||||
if (!existsSync(path)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const raw = readFileSync(path, 'utf8');
|
||||
const file = JSON.parse(raw) as KpiPackFile;
|
||||
packs.set(pack, file);
|
||||
}
|
||||
|
||||
return packs;
|
||||
}
|
||||
|
||||
function validateSurfacePack(pack: SurfacePackFile, errors: string[]) {
|
||||
const keysByStatement = new Map<string, Set<string>>();
|
||||
|
||||
for (const surface of pack.surfaces) {
|
||||
const keySet = keysByStatement.get(surface.statement) || new Set<string>();
|
||||
if (keySet.has(surface.surface_key)) {
|
||||
errors.push(`${pack.pack}: duplicate surface_key "${surface.surface_key}" in statement "${surface.statement}"`);
|
||||
}
|
||||
keySet.add(surface.surface_key);
|
||||
keysByStatement.set(surface.statement, keySet);
|
||||
|
||||
if (!surface.label) {
|
||||
errors.push(`${pack.pack}: surface "${surface.surface_key}" missing label`);
|
||||
}
|
||||
|
||||
const validStatements: FinancialStatementKind[] = ['income', 'balance', 'cash_flow', 'equity', 'comprehensive_income'];
|
||||
if (!validStatements.includes(surface.statement)) {
|
||||
errors.push(`${pack.pack}: surface "${surface.surface_key}" has invalid statement "${surface.statement}"`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function validateComputedPack(pack: ComputedPackFile, surfaceKeys: Set<string>, errors: string[]) {
|
||||
const keys = new Set<string>();
|
||||
|
||||
for (const computed of pack.computed) {
|
||||
if (keys.has(computed.key)) {
|
||||
errors.push(`${pack.pack}: duplicate computed key "${computed.key}"`);
|
||||
}
|
||||
keys.add(computed.key);
|
||||
|
||||
if (!computed.label) {
|
||||
errors.push(`${pack.pack}: computed "${computed.key}" missing label`);
|
||||
}
|
||||
|
||||
const spec = computed.computation;
|
||||
switch (spec.type) {
|
||||
case 'ratio':
|
||||
if (!surfaceKeys.has(spec.numerator) && !spec.numerator.includes('_')) {
|
||||
errors.push(`${pack.pack}: computed "${computed.key}" references unknown numerator "${spec.numerator}"`);
|
||||
}
|
||||
if (!surfaceKeys.has(spec.denominator) && !spec.denominator.includes('_')) {
|
||||
errors.push(`${pack.pack}: computed "${computed.key}" references unknown denominator "${spec.denominator}"`);
|
||||
}
|
||||
break;
|
||||
case 'yoy_growth':
|
||||
case 'cagr':
|
||||
if (!surfaceKeys.has(spec.source)) {
|
||||
errors.push(`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`);
|
||||
}
|
||||
break;
|
||||
case 'per_share':
|
||||
if (!surfaceKeys.has(spec.source)) {
|
||||
errors.push(`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`);
|
||||
}
|
||||
if (!surfaceKeys.has(spec.shares_key)) {
|
||||
errors.push(`${pack.pack}: computed "${computed.key}" references unknown shares_key "${spec.shares_key}"`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function generateTypesFile(): string {
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
export type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count';
|
||||
|
||||
export type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
|
||||
|
||||
export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income';
|
||||
|
||||
export type SignTransform = 'invert' | 'absolute';
|
||||
|
||||
export type ComputationSpec =
|
||||
| { type: 'ratio'; numerator: string; denominator: string }
|
||||
| { type: 'yoy_growth'; source: string }
|
||||
| { type: 'cagr'; source: string; years: number }
|
||||
| { type: 'per_share'; source: string; shares_key: string }
|
||||
| { type: 'simple'; formula: string };
|
||||
|
||||
export type SurfaceDefinition = {
|
||||
surface_key: string;
|
||||
statement: FinancialStatementKind;
|
||||
label: string;
|
||||
category: string;
|
||||
order: number;
|
||||
unit: FinancialUnit;
|
||||
rollup_policy?: string;
|
||||
allowed_source_concepts: string[];
|
||||
allowed_authoritative_concepts?: string[];
|
||||
formula_fallback?: {
|
||||
op: 'sum' | 'subtract' | 'divide';
|
||||
sources: string[];
|
||||
treat_null_as_zero?: boolean;
|
||||
} | string | null;
|
||||
detail_grouping_policy?: string;
|
||||
materiality_policy?: string;
|
||||
include_in_output?: boolean;
|
||||
sign_transform?: SignTransform;
|
||||
};
|
||||
|
||||
export type ComputedDefinition = {
|
||||
key: string;
|
||||
label: string;
|
||||
category: string;
|
||||
order: number;
|
||||
unit: FinancialUnit;
|
||||
computation: ComputationSpec;
|
||||
supported_cadences?: FinancialCadence[];
|
||||
requires_external_data?: string[];
|
||||
};
|
||||
|
||||
export type KpiDefinition = {
|
||||
key: string;
|
||||
label: string;
|
||||
unit: string;
|
||||
};
|
||||
|
||||
export const RATIO_CATEGORIES = ['margins', 'returns', 'financial_health', 'per_share', 'growth', 'valuation'] as const;
|
||||
export type RatioCategory = (typeof RATIO_CATEGORIES)[number];
|
||||
`;
|
||||
}
|
||||
|
||||
function generateSurfaceFile(statement: string, surfaces: SurfaceDefinition[]): string {
|
||||
const sorted = [...surfaces].sort((a, b) => a.order - b.order);
|
||||
const constName = `${statement.toUpperCase()}_SURFACES`;
|
||||
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
import type { SurfaceDefinition } from '../types';
|
||||
|
||||
export const ${constName}: SurfaceDefinition[] = ${JSON.stringify(sorted, null, 2)};
|
||||
`;
|
||||
}
|
||||
|
||||
function generateSurfacesIndex(surfacesByStatement: Map<string, SurfaceDefinition[]>): string {
|
||||
const statements = [...surfacesByStatement.keys()].sort();
|
||||
|
||||
const imports = statements
|
||||
.map((s) => `import { ${s.toUpperCase()}_SURFACES } from './${s}';`)
|
||||
.join('\n');
|
||||
|
||||
const exports = statements.map((s) => ` ${s}: ${s.toUpperCase()}_SURFACES,`).join('\n');
|
||||
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
${imports}
|
||||
|
||||
export const ALL_SURFACES_BY_STATEMENT = {
|
||||
${exports}
|
||||
} as const;
|
||||
|
||||
export { ${statements.map((s) => `${s.toUpperCase()}_SURFACES`).join(', ')} };
|
||||
`;
|
||||
}
|
||||
|
||||
function generateComputedFile(
|
||||
name: string,
|
||||
definitions: ComputedDefinition[]
|
||||
): string {
|
||||
const sorted = [...definitions].sort((a, b) => a.order - b.order);
|
||||
const constName = name.toUpperCase().replace(/-/g, '_');
|
||||
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
import type { ComputedDefinition } from '../types';
|
||||
|
||||
export const ${constName}: ComputedDefinition[] = ${JSON.stringify(sorted, null, 2)};
|
||||
`;
|
||||
}
|
||||
|
||||
function generateComputedIndex(files: { name: string; definitions: ComputedDefinition[] }[]): string {
|
||||
const imports = files
|
||||
.map((f) => {
|
||||
const constName = f.name.toUpperCase().replace(/-/g, '_');
|
||||
return `import { ${constName} } from './${f.name}';`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
const allExports = files
|
||||
.map((f) => ` ...${f.name.toUpperCase().replace(/-/g, '_')},`)
|
||||
.join('\n');
|
||||
|
||||
const filingDerived = files
|
||||
.flatMap((f) => f.definitions)
|
||||
.filter((d) => !d.requires_external_data || d.requires_external_data.length === 0)
|
||||
.sort((a, b) => a.order - b.order);
|
||||
|
||||
const marketDerived = files
|
||||
.flatMap((f) => f.definitions)
|
||||
.filter((d) => d.requires_external_data && d.requires_external_data.length > 0)
|
||||
.sort((a, b) => a.order - b.order);
|
||||
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
import type { ComputedDefinition } from '../types';
|
||||
|
||||
${imports}
|
||||
|
||||
export const ALL_COMPUTED: ComputedDefinition[] = [
|
||||
${allExports}
|
||||
];
|
||||
|
||||
export const FILING_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(filingDerived, null, 2)};
|
||||
|
||||
export const MARKET_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(marketDerived, null, 2)};
|
||||
|
||||
export { ${files.map((f) => f.name.toUpperCase().replace(/-/g, '_')).join(', ')} };
|
||||
`;
|
||||
}
|
||||
|
||||
function generateKpiFile(pack: string, kpis: KpiDefinition[]): string {
|
||||
const constName = `${pack.toUpperCase().replace(/-/g, '_')}_KPIS`;
|
||||
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
import type { KpiDefinition } from '../types';
|
||||
|
||||
export const ${constName}: KpiDefinition[] = ${JSON.stringify(kpis, null, 2)};
|
||||
`;
|
||||
}
|
||||
|
||||
function generateKpiIndex(packs: { pack: string; kpis: KpiDefinition[] }[]): string {
|
||||
const imports = packs
|
||||
.map((p) => {
|
||||
const constName = p.pack.toUpperCase().replace(/-/g, '_');
|
||||
return `import { ${constName}_KPIS } from './${p.pack}';`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
const exports = packs.map((p) => ` ...${p.pack.toUpperCase().replace(/-/g, '_')}_KPIS,`).join('\n');
|
||||
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
import type { KpiDefinition } from '../types';
|
||||
|
||||
${imports}
|
||||
|
||||
export const ALL_KPIS: KpiDefinition[] = [
|
||||
${exports}
|
||||
];
|
||||
|
||||
export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, '_')}_KPIS`).join(', ')} };
|
||||
`;
|
||||
}
|
||||
|
||||
function generateMainIndex(): string {
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
|
||||
export type {
|
||||
FinancialUnit,
|
||||
FinancialCadence,
|
||||
FinancialStatementKind,
|
||||
ComputationSpec,
|
||||
SurfaceDefinition,
|
||||
ComputedDefinition,
|
||||
KpiDefinition,
|
||||
} from './types';
|
||||
|
||||
export { RATIO_CATEGORIES, type RatioCategory } from './types';
|
||||
|
||||
export {
|
||||
INCOME_SURFACES,
|
||||
BALANCE_SURFACES,
|
||||
CASH_FLOW_SURFACES,
|
||||
ALL_SURFACES_BY_STATEMENT,
|
||||
} from './surfaces';
|
||||
|
||||
export {
|
||||
ALL_COMPUTED,
|
||||
FILING_DERIVED_COMPUTED,
|
||||
MARKET_DERIVED_COMPUTED,
|
||||
CORE,
|
||||
} from './computed';
|
||||
|
||||
export { ALL_KPIS, CORE_KPIS } from './kpis';
|
||||
`;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
log('Loading taxonomy files...');
|
||||
|
||||
const surfacePacks = loadSurfacePacks();
|
||||
const computedPacks = loadComputedPacks();
|
||||
const kpiPacks = loadKpiPacks();
|
||||
|
||||
log(`Loaded ${surfacePacks.size} surface packs, ${computedPacks.size} computed packs, ${kpiPacks.size} KPI packs`);
|
||||
|
||||
const errors: string[] = [];
|
||||
|
||||
log('Validating taxonomy files...');
|
||||
|
||||
for (const [, pack] of surfacePacks) {
|
||||
validateSurfacePack(pack, errors);
|
||||
}
|
||||
|
||||
const allSurfaceKeys = new Set<string>();
|
||||
for (const [, pack] of surfacePacks) {
|
||||
for (const surface of pack.surfaces) {
|
||||
allSurfaceKeys.add(surface.surface_key);
|
||||
}
|
||||
}
|
||||
|
||||
for (const [, pack] of computedPacks) {
|
||||
validateComputedPack(pack, allSurfaceKeys, errors);
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
console.error('Validation errors:');
|
||||
for (const error of errors) {
|
||||
console.error(` - ${error}`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
log('Creating output directories...');
|
||||
mkdirSync(join(OUTPUT_DIR, 'surfaces'), { recursive: true });
|
||||
mkdirSync(join(OUTPUT_DIR, 'computed'), { recursive: true });
|
||||
mkdirSync(join(OUTPUT_DIR, 'kpis'), { recursive: true });
|
||||
|
||||
log('Generating types...');
|
||||
writeFileSync(join(OUTPUT_DIR, 'types.ts'), generateTypesFile());
|
||||
|
||||
log('Generating surfaces...');
|
||||
const coreSurfaces = surfacePacks.get('core');
|
||||
if (coreSurfaces) {
|
||||
const surfacesByStatement = new Map<string, SurfaceDefinition[]>();
|
||||
|
||||
for (const surface of coreSurfaces.surfaces) {
|
||||
const existing = surfacesByStatement.get(surface.statement) || [];
|
||||
existing.push(surface);
|
||||
surfacesByStatement.set(surface.statement, existing);
|
||||
}
|
||||
|
||||
for (const [statement, surfaces] of surfacesByStatement) {
|
||||
writeFileSync(
|
||||
join(OUTPUT_DIR, 'surfaces', `${statement}.ts`),
|
||||
generateSurfaceFile(statement, surfaces)
|
||||
);
|
||||
}
|
||||
|
||||
writeFileSync(
|
||||
join(OUTPUT_DIR, 'surfaces', 'index.ts'),
|
||||
generateSurfacesIndex(surfacesByStatement)
|
||||
);
|
||||
}
|
||||
|
||||
log('Generating computed definitions...');
|
||||
const computedFiles: { name: string; definitions: ComputedDefinition[] }[] = [];
|
||||
|
||||
for (const [pack, file] of computedPacks) {
|
||||
computedFiles.push({ name: pack, definitions: file.computed });
|
||||
writeFileSync(
|
||||
join(OUTPUT_DIR, 'computed', `${pack}.ts`),
|
||||
generateComputedFile(pack, file.computed)
|
||||
);
|
||||
}
|
||||
|
||||
writeFileSync(join(OUTPUT_DIR, 'computed', 'index.ts'), generateComputedIndex(computedFiles));
|
||||
|
||||
log('Generating KPI definitions...');
|
||||
const kpiFiles: { pack: string; kpis: KpiDefinition[] }[] = [];
|
||||
|
||||
for (const [pack, file] of kpiPacks) {
|
||||
kpiFiles.push({ pack, kpis: file.kpis });
|
||||
writeFileSync(
|
||||
join(OUTPUT_DIR, 'kpis', `${pack}.ts`),
|
||||
generateKpiFile(pack, file.kpis)
|
||||
);
|
||||
}
|
||||
|
||||
writeFileSync(join(OUTPUT_DIR, 'kpis', 'index.ts'), generateKpiIndex(kpiFiles));
|
||||
|
||||
log('Generating main index...');
|
||||
writeFileSync(join(OUTPUT_DIR, 'index.ts'), generateMainIndex());
|
||||
|
||||
const surfaceCount = coreSurfaces?.surfaces.length || 0;
|
||||
const computedCount = computedFiles.reduce((sum, f) => sum + f.definitions.length, 0);
|
||||
const kpiCount = kpiFiles.reduce((sum, f) => sum + f.kpis.length, 0);
|
||||
|
||||
log(`Generated ${surfaceCount} surfaces, ${computedCount} computed definitions, ${kpiCount} KPIs`);
|
||||
log(`Output written to ${OUTPUT_DIR}`);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('Generation failed:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user