Files
Neon-Desk/scripts/generate-taxonomy.ts
francy51 14a7773504 Add consolidated disclosure statement type
Create unified disclosure statement to organize footnote disclosures
separate from primary financial statements. Disclosures are now grouped
by type (tax, debt, securities, derivatives, leases, intangibles, ma,
revenue, cash_flow) in a dedicated statement type for cleaner UI
presentation.
2026-03-16 18:54:23 -04:00

612 lines
16 KiB
TypeScript

import {
existsSync,
mkdirSync,
readdirSync,
readFileSync,
writeFileSync,
} from "node:fs";
import { join } from "node:path";
type FinancialUnit = "currency" | "percent" | "ratio" | "shares" | "count";
type FinancialCadence = "annual" | "quarterly" | "ltm";
type FinancialStatementKind =
| "income"
| "balance"
| "cash_flow"
| "disclosure"
| "equity"
| "comprehensive_income";
type SignTransform = "invert" | "absolute";
type SurfaceDefinition = {
surface_key: string;
statement: FinancialStatementKind;
label: string;
category: string;
order: number;
unit: FinancialUnit;
rollup_policy?: string;
allowed_source_concepts: string[];
allowed_authoritative_concepts?: string[];
formula_fallback?:
| {
op: "sum" | "subtract" | "divide";
sources: string[];
treat_null_as_zero?: boolean;
}
| string
| null;
detail_grouping_policy?: string;
materiality_policy?: string;
include_in_output?: boolean;
sign_transform?: "invert";
};
type SurfacePackFile = {
version: string;
pack: string;
surfaces: SurfaceDefinition[];
};
type ComputationSpec =
| { type: "ratio"; numerator: string; denominator: string }
| { type: "yoy_growth"; source: string }
| { type: "cagr"; source: string; years: number }
| { type: "per_share"; source: string; shares_key: string }
| { type: "simple"; formula: string };
type ComputedDefinition = {
key: string;
label: string;
category: string;
order: number;
unit: FinancialUnit;
computation: ComputationSpec;
supported_cadences?: FinancialCadence[];
requires_external_data?: string[];
};
type ComputedPackFile = {
version: string;
pack: string;
computed: ComputedDefinition[];
};
type KpiDefinition = {
key: string;
label: string;
unit: string;
};
type KpiPackFile = {
version: string;
pack: string;
kpis: KpiDefinition[];
};
const TAXONOMY_DIR = join(process.cwd(), "rust", "taxonomy", "fiscal", "v1");
const OUTPUT_DIR = join(process.cwd(), "lib", "generated");
const PACK_ORDER = [
"core",
"bank_lender",
"insurance",
"reit_real_estate",
"broker_asset_manager",
] as const;
type PackName = (typeof PACK_ORDER)[number];
function log(message: string) {
console.log(`[generate-taxonomy] ${message}`);
}
function loadSurfacePacks(): Map<PackName, SurfacePackFile> {
const packs = new Map<PackName, SurfacePackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, `${pack}.surface.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, "utf8");
const file = JSON.parse(raw) as SurfacePackFile;
packs.set(pack, file);
}
return packs;
}
function loadComputedPacks(): Map<PackName, ComputedPackFile> {
const packs = new Map<PackName, ComputedPackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, `${pack}.computed.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, "utf8");
const file = JSON.parse(raw) as ComputedPackFile;
packs.set(pack, file);
}
return packs;
}
function loadKpiPacks(): Map<PackName, KpiPackFile> {
const packs = new Map<PackName, KpiPackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, "kpis", `${pack}.kpis.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, "utf8");
const file = JSON.parse(raw) as KpiPackFile;
packs.set(pack, file);
}
return packs;
}
function validateSurfacePack(pack: SurfacePackFile, errors: string[]) {
const keysByStatement = new Map<string, Set<string>>();
for (const surface of pack.surfaces) {
const keySet = keysByStatement.get(surface.statement) || new Set<string>();
if (keySet.has(surface.surface_key)) {
errors.push(
`${pack.pack}: duplicate surface_key "${surface.surface_key}" in statement "${surface.statement}"`,
);
}
keySet.add(surface.surface_key);
keysByStatement.set(surface.statement, keySet);
if (!surface.label) {
errors.push(
`${pack.pack}: surface "${surface.surface_key}" missing label`,
);
}
const validStatements: FinancialStatementKind[] = [
"income",
"balance",
"cash_flow",
"disclosure",
"equity",
"comprehensive_income",
];
if (!validStatements.includes(surface.statement)) {
errors.push(
`${pack.pack}: surface "${surface.surface_key}" has invalid statement "${surface.statement}"`,
);
}
}
}
function validateComputedPack(
pack: ComputedPackFile,
surfaceKeys: Set<string>,
errors: string[],
) {
const keys = new Set<string>();
for (const computed of pack.computed) {
if (keys.has(computed.key)) {
errors.push(`${pack.pack}: duplicate computed key "${computed.key}"`);
}
keys.add(computed.key);
if (!computed.label) {
errors.push(`${pack.pack}: computed "${computed.key}" missing label`);
}
const spec = computed.computation;
switch (spec.type) {
case "ratio":
if (!surfaceKeys.has(spec.numerator) && !spec.numerator.includes("_")) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown numerator "${spec.numerator}"`,
);
}
if (
!surfaceKeys.has(spec.denominator) &&
!spec.denominator.includes("_")
) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown denominator "${spec.denominator}"`,
);
}
break;
case "yoy_growth":
case "cagr":
if (!surfaceKeys.has(spec.source)) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`,
);
}
break;
case "per_share":
if (!surfaceKeys.has(spec.source)) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`,
);
}
if (!surfaceKeys.has(spec.shares_key)) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown shares_key "${spec.shares_key}"`,
);
}
break;
}
}
}
function generateTypesFile(): string {
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
export type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count';
export type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'disclosure' | 'equity' | 'comprehensive_income';
export type SignTransform = 'invert' | 'absolute';
export type ComputationSpec =
| { type: 'ratio'; numerator: string; denominator: string }
| { type: 'yoy_growth'; source: string }
| { type: 'cagr'; source: string; years: number }
| { type: 'per_share'; source: string; shares_key: string }
| { type: 'simple'; formula: string };
export type SurfaceDefinition = {
surface_key: string;
statement: FinancialStatementKind;
label: string;
category: string;
order: number;
unit: FinancialUnit;
rollup_policy?: string;
allowed_source_concepts: string[];
allowed_authoritative_concepts?: string[];
formula_fallback?: {
op: 'sum' | 'subtract' | 'divide';
sources: string[];
treat_null_as_zero?: boolean;
} | string | null;
detail_grouping_policy?: string;
materiality_policy?: string;
include_in_output?: boolean;
sign_transform?: SignTransform;
};
export type ComputedDefinition = {
key: string;
label: string;
category: string;
order: number;
unit: FinancialUnit;
computation: ComputationSpec;
supported_cadences?: FinancialCadence[];
requires_external_data?: string[];
};
export type KpiDefinition = {
key: string;
label: string;
unit: string;
};
export const RATIO_CATEGORIES = ['margins', 'returns', 'financial_health', 'per_share', 'growth', 'valuation'] as const;
export type RatioCategory = (typeof RATIO_CATEGORIES)[number];
`;
}
function generateSurfaceFile(
statement: string,
surfaces: SurfaceDefinition[],
): string {
const sorted = [...surfaces].sort((a, b) => a.order - b.order);
const constName = `${statement.toUpperCase()}_SURFACES`;
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { SurfaceDefinition } from '../types';
export const ${constName}: SurfaceDefinition[] = ${JSON.stringify(sorted, null, 2)};
`;
}
function generateSurfacesIndex(
surfacesByStatement: Map<string, SurfaceDefinition[]>,
): string {
const statements = [...surfacesByStatement.keys()].sort();
const imports = statements
.map((s) => `import { ${s.toUpperCase()}_SURFACES } from './${s}';`)
.join("\n");
const exports = statements
.map((s) => ` ${s}: ${s.toUpperCase()}_SURFACES,`)
.join("\n");
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
${imports}
export const ALL_SURFACES_BY_STATEMENT = {
${exports}
} as const;
export { ${statements.map((s) => `${s.toUpperCase()}_SURFACES`).join(", ")} };
`;
}
function generateComputedFile(
name: string,
definitions: ComputedDefinition[],
): string {
const sorted = [...definitions].sort((a, b) => a.order - b.order);
const constName = name.toUpperCase().replace(/-/g, "_");
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { ComputedDefinition } from '../types';
export const ${constName}: ComputedDefinition[] = ${JSON.stringify(sorted, null, 2)};
`;
}
function generateComputedIndex(
files: { name: string; definitions: ComputedDefinition[] }[],
): string {
const imports = files
.map((f) => {
const constName = f.name.toUpperCase().replace(/-/g, "_");
return `import { ${constName} } from './${f.name}';`;
})
.join("\n");
const allExports = files
.map((f) => ` ...${f.name.toUpperCase().replace(/-/g, "_")},`)
.join("\n");
const filingDerived = files
.flatMap((f) => f.definitions)
.filter(
(d) => !d.requires_external_data || d.requires_external_data.length === 0,
)
.sort((a, b) => a.order - b.order);
const marketDerived = files
.flatMap((f) => f.definitions)
.filter(
(d) => d.requires_external_data && d.requires_external_data.length > 0,
)
.sort((a, b) => a.order - b.order);
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { ComputedDefinition } from '../types';
${imports}
export const ALL_COMPUTED: ComputedDefinition[] = [
${allExports}
];
export const FILING_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(filingDerived, null, 2)};
export const MARKET_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(marketDerived, null, 2)};
export { ${files.map((f) => f.name.toUpperCase().replace(/-/g, "_")).join(", ")} };
`;
}
function generateKpiFile(pack: string, kpis: KpiDefinition[]): string {
const constName = `${pack.toUpperCase().replace(/-/g, "_")}_KPIS`;
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { KpiDefinition } from '../types';
export const ${constName}: KpiDefinition[] = ${JSON.stringify(kpis, null, 2)};
`;
}
function generateKpiIndex(
packs: { pack: string; kpis: KpiDefinition[] }[],
): string {
const imports = packs
.map((p) => {
const constName = p.pack.toUpperCase().replace(/-/g, "_");
return `import { ${constName}_KPIS } from './${p.pack}';`;
})
.join("\n");
const exports = packs
.map((p) => ` ...${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS,`)
.join("\n");
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { KpiDefinition } from '../types';
${imports}
export const ALL_KPIS: KpiDefinition[] = [
${exports}
];
export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS`).join(", ")} };
`;
}
function generateMainIndex(): string {
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
export type {
FinancialUnit,
FinancialCadence,
FinancialStatementKind,
ComputationSpec,
SurfaceDefinition,
ComputedDefinition,
KpiDefinition,
} from './types';
export { RATIO_CATEGORIES, type RatioCategory } from './types';
export {
INCOME_SURFACES,
BALANCE_SURFACES,
CASH_FLOW_SURFACES,
ALL_SURFACES_BY_STATEMENT,
} from './surfaces';
export {
ALL_COMPUTED,
FILING_DERIVED_COMPUTED,
MARKET_DERIVED_COMPUTED,
CORE,
} from './computed';
export { ALL_KPIS, CORE_KPIS } from './kpis';
`;
}
async function main() {
log("Loading taxonomy files...");
const surfacePacks = loadSurfacePacks();
const computedPacks = loadComputedPacks();
const kpiPacks = loadKpiPacks();
log(
`Loaded ${surfacePacks.size} surface packs, ${computedPacks.size} computed packs, ${kpiPacks.size} KPI packs`,
);
const errors: string[] = [];
log("Validating taxonomy files...");
for (const [, pack] of surfacePacks) {
validateSurfacePack(pack, errors);
}
const allSurfaceKeys = new Set<string>();
for (const [, pack] of surfacePacks) {
for (const surface of pack.surfaces) {
allSurfaceKeys.add(surface.surface_key);
}
}
for (const [, pack] of computedPacks) {
validateComputedPack(pack, allSurfaceKeys, errors);
}
if (errors.length > 0) {
console.error("Validation errors:");
for (const error of errors) {
console.error(` - ${error}`);
}
process.exit(1);
}
log("Creating output directories...");
mkdirSync(join(OUTPUT_DIR, "surfaces"), { recursive: true });
mkdirSync(join(OUTPUT_DIR, "computed"), { recursive: true });
mkdirSync(join(OUTPUT_DIR, "kpis"), { recursive: true });
log("Generating types...");
writeFileSync(join(OUTPUT_DIR, "types.ts"), generateTypesFile());
log("Generating surfaces...");
const coreSurfaces = surfacePacks.get("core");
if (coreSurfaces) {
const surfacesByStatement = new Map<string, SurfaceDefinition[]>();
for (const surface of coreSurfaces.surfaces) {
const existing = surfacesByStatement.get(surface.statement) || [];
existing.push(surface);
surfacesByStatement.set(surface.statement, existing);
}
for (const [statement, surfaces] of surfacesByStatement) {
writeFileSync(
join(OUTPUT_DIR, "surfaces", `${statement}.ts`),
generateSurfaceFile(statement, surfaces),
);
}
writeFileSync(
join(OUTPUT_DIR, "surfaces", "index.ts"),
generateSurfacesIndex(surfacesByStatement),
);
}
log("Generating computed definitions...");
const computedFiles: { name: string; definitions: ComputedDefinition[] }[] =
[];
for (const [pack, file] of computedPacks) {
computedFiles.push({ name: pack, definitions: file.computed });
writeFileSync(
join(OUTPUT_DIR, "computed", `${pack}.ts`),
generateComputedFile(pack, file.computed),
);
}
writeFileSync(
join(OUTPUT_DIR, "computed", "index.ts"),
generateComputedIndex(computedFiles),
);
log("Generating KPI definitions...");
const kpiFiles: { pack: string; kpis: KpiDefinition[] }[] = [];
for (const [pack, file] of kpiPacks) {
kpiFiles.push({ pack, kpis: file.kpis });
writeFileSync(
join(OUTPUT_DIR, "kpis", `${pack}.ts`),
generateKpiFile(pack, file.kpis),
);
}
writeFileSync(
join(OUTPUT_DIR, "kpis", "index.ts"),
generateKpiIndex(kpiFiles),
);
log("Generating main index...");
writeFileSync(join(OUTPUT_DIR, "index.ts"), generateMainIndex());
const surfaceCount = coreSurfaces?.surfaces.length || 0;
const computedCount = computedFiles.reduce(
(sum, f) => sum + f.definitions.length,
0,
);
const kpiCount = kpiFiles.reduce((sum, f) => sum + f.kpis.length, 0);
log(
`Generated ${surfaceCount} surfaces, ${computedCount} computed definitions, ${kpiCount} KPIs`,
);
log(`Output written to ${OUTPUT_DIR}`);
}
main().catch((error) => {
console.error("Generation failed:", error);
process.exit(1);
});