Files
Neon-Desk/scripts/generate-taxonomy.ts
francy51 17de3dd72d Add history window controls and expand taxonomy pack support
- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI
- add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading
- auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
2026-03-18 23:40:28 -04:00

658 lines
17 KiB
TypeScript

import {
existsSync,
mkdirSync,
readdirSync,
readFileSync,
writeFileSync,
} from "node:fs";
import { join } from "node:path";
type FinancialUnit = "currency" | "percent" | "ratio" | "shares" | "count";
type FinancialCadence = "annual" | "quarterly" | "ltm";
type FinancialStatementKind =
| "income"
| "balance"
| "cash_flow"
| "disclosure"
| "equity"
| "comprehensive_income";
type SignTransform = "invert" | "absolute";
type SurfaceDefinition = {
surface_key: string;
statement: FinancialStatementKind;
label: string;
category: string;
order: number;
unit: FinancialUnit;
rollup_policy?: string;
allowed_source_concepts: string[];
allowed_authoritative_concepts?: string[];
formula_fallback?:
| {
op: "sum" | "subtract" | "divide";
sources: string[];
treat_null_as_zero?: boolean;
}
| string
| null;
detail_grouping_policy?: string;
materiality_policy?: string;
include_in_output?: boolean;
sign_transform?: "invert";
};
type SurfacePackFile = {
version: string;
pack: string;
surfaces: SurfaceDefinition[];
};
type ComputationSpec =
| { type: "ratio"; numerator: string; denominator: string }
| { type: "yoy_growth"; source: string }
| { type: "cagr"; source: string; years: number }
| { type: "per_share"; source: string; shares_key: string }
| { type: "simple"; formula: string };
type ComputedDefinition = {
key: string;
label: string;
category: string;
order: number;
unit: FinancialUnit;
computation: ComputationSpec;
supported_cadences?: FinancialCadence[];
requires_external_data?: string[];
};
type ComputedPackFile = {
version: string;
pack: string;
computed: ComputedDefinition[];
};
type KpiDefinition = {
key: string;
label: string;
unit: string;
};
type KpiPackFile = {
version: string;
pack: string;
kpis: KpiDefinition[];
};
const TAXONOMY_DIR = join(process.cwd(), "rust", "taxonomy", "fiscal", "v1");
const OUTPUT_DIR = join(process.cwd(), "lib", "generated");
const PACK_ORDER = [
"core",
"bank_lender",
"insurance",
"reit_real_estate",
"broker_asset_manager",
"agriculture",
"contractors_construction",
"contractors_federal_government",
"development_stage",
"entertainment_broadcasters",
"entertainment_cable_television",
"entertainment_casinos",
"entertainment_films",
"entertainment_music",
"extractive_mining",
"mortgage_banking",
"title_plant",
"franchisors",
"not_for_profit",
"plan_defined_benefit",
"plan_defined_contribution",
"plan_health_welfare",
"real_estate_general",
"real_estate_common_interest",
"real_estate_retail_land",
"real_estate_time_sharing",
"software",
"steamship",
] as const;
type PackName = (typeof PACK_ORDER)[number];
function log(message: string) {
console.log(`[generate-taxonomy] ${message}`);
}
function loadSurfacePacks(): Map<PackName, SurfacePackFile> {
const packs = new Map<PackName, SurfacePackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, `${pack}.surface.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, "utf8");
const file = JSON.parse(raw) as SurfacePackFile;
packs.set(pack, file);
}
return packs;
}
function loadComputedPacks(): Map<PackName, ComputedPackFile> {
const packs = new Map<PackName, ComputedPackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, `${pack}.computed.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, "utf8");
const file = JSON.parse(raw) as ComputedPackFile;
packs.set(pack, file);
}
return packs;
}
function loadKpiPacks(): Map<PackName, KpiPackFile> {
const packs = new Map<PackName, KpiPackFile>();
for (const pack of PACK_ORDER) {
const path = join(TAXONOMY_DIR, "kpis", `${pack}.kpis.json`);
if (!existsSync(path)) {
continue;
}
const raw = readFileSync(path, "utf8");
const file = JSON.parse(raw) as KpiPackFile;
packs.set(pack, file);
}
return packs;
}
function validateSurfacePack(pack: SurfacePackFile, errors: string[]) {
const keysByStatement = new Map<string, Set<string>>();
for (const surface of pack.surfaces) {
const keySet = keysByStatement.get(surface.statement) || new Set<string>();
if (keySet.has(surface.surface_key)) {
errors.push(
`${pack.pack}: duplicate surface_key "${surface.surface_key}" in statement "${surface.statement}"`,
);
}
keySet.add(surface.surface_key);
keysByStatement.set(surface.statement, keySet);
if (!surface.label) {
errors.push(
`${pack.pack}: surface "${surface.surface_key}" missing label`,
);
}
const validStatements: FinancialStatementKind[] = [
"income",
"balance",
"cash_flow",
"disclosure",
"equity",
"comprehensive_income",
];
if (!validStatements.includes(surface.statement)) {
errors.push(
`${pack.pack}: surface "${surface.surface_key}" has invalid statement "${surface.statement}"`,
);
}
}
}
function validateComputedPack(
pack: ComputedPackFile,
surfaceKeys: Set<string>,
errors: string[],
) {
const keys = new Set<string>();
for (const computed of pack.computed) {
if (keys.has(computed.key)) {
errors.push(`${pack.pack}: duplicate computed key "${computed.key}"`);
}
keys.add(computed.key);
if (!computed.label) {
errors.push(`${pack.pack}: computed "${computed.key}" missing label`);
}
const spec = computed.computation;
switch (spec.type) {
case "ratio":
if (!surfaceKeys.has(spec.numerator) && !spec.numerator.includes("_")) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown numerator "${spec.numerator}"`,
);
}
if (
!surfaceKeys.has(spec.denominator) &&
!spec.denominator.includes("_")
) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown denominator "${spec.denominator}"`,
);
}
break;
case "yoy_growth":
case "cagr":
if (!surfaceKeys.has(spec.source)) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`,
);
}
break;
case "per_share":
if (!surfaceKeys.has(spec.source)) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`,
);
}
if (!surfaceKeys.has(spec.shares_key)) {
errors.push(
`${pack.pack}: computed "${computed.key}" references unknown shares_key "${spec.shares_key}"`,
);
}
break;
}
}
}
function generateTypesFile(): string {
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
export type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count';
export type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'disclosure' | 'equity' | 'comprehensive_income';
export type SignTransform = 'invert' | 'absolute';
export type ComputationSpec =
| { type: 'ratio'; numerator: string; denominator: string }
| { type: 'yoy_growth'; source: string }
| { type: 'cagr'; source: string; years: number }
| { type: 'per_share'; source: string; shares_key: string }
| { type: 'simple'; formula: string };
export type SurfaceDefinition = {
surface_key: string;
statement: FinancialStatementKind;
label: string;
category: string;
order: number;
unit: FinancialUnit;
rollup_policy?: string;
allowed_source_concepts: string[];
allowed_authoritative_concepts?: string[];
formula_fallback?: {
op: 'sum' | 'subtract' | 'divide';
sources: string[];
treat_null_as_zero?: boolean;
} | string | null;
detail_grouping_policy?: string;
materiality_policy?: string;
include_in_output?: boolean;
sign_transform?: SignTransform;
};
export type ComputedDefinition = {
key: string;
label: string;
category: string;
order: number;
unit: FinancialUnit;
computation: ComputationSpec;
supported_cadences?: FinancialCadence[];
requires_external_data?: string[];
};
export type KpiDefinition = {
key: string;
label: string;
unit: string;
};
export const RATIO_CATEGORIES = ['margins', 'returns', 'financial_health', 'per_share', 'growth', 'valuation'] as const;
export type RatioCategory = (typeof RATIO_CATEGORIES)[number];
`;
}
function generateSurfaceFile(
statement: string,
surfaces: SurfaceDefinition[],
): string {
const sorted = [...surfaces].sort((a, b) => a.order - b.order);
const constName = `${statement.toUpperCase()}_SURFACES`;
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { SurfaceDefinition } from '../types';
export const ${constName}: SurfaceDefinition[] = ${JSON.stringify(sorted, null, 2)};
`;
}
function generateSurfacesIndex(
surfacesByStatement: Map<string, SurfaceDefinition[]>,
): string {
const statements = [...surfacesByStatement.keys()].sort();
const imports = statements
.map((s) => `import { ${s.toUpperCase()}_SURFACES } from './${s}';`)
.join("\n");
const exports = statements
.map((s) => ` ${s}: ${s.toUpperCase()}_SURFACES,`)
.join("\n");
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
${imports}
export const ALL_SURFACES_BY_STATEMENT = {
${exports}
} as const;
export { ${statements.map((s) => `${s.toUpperCase()}_SURFACES`).join(", ")} };
`;
}
function generateComputedFile(
name: string,
definitions: ComputedDefinition[],
): string {
const sorted = [...definitions].sort((a, b) => a.order - b.order);
const constName = name.toUpperCase().replace(/-/g, "_");
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { ComputedDefinition } from '../types';
export const ${constName}: ComputedDefinition[] = ${JSON.stringify(sorted, null, 2)};
`;
}
function generateComputedIndex(
files: { name: string; definitions: ComputedDefinition[] }[],
): string {
const imports = files
.map((f) => {
const constName = f.name.toUpperCase().replace(/-/g, "_");
return `import { ${constName} } from './${f.name}';`;
})
.join("\n");
const allExports = files
.map((f) => ` ...${f.name.toUpperCase().replace(/-/g, "_")},`)
.join("\n");
const filingDerived = files
.flatMap((f) => f.definitions)
.filter(
(d) => !d.requires_external_data || d.requires_external_data.length === 0,
)
.sort((a, b) => a.order - b.order);
const marketDerived = files
.flatMap((f) => f.definitions)
.filter(
(d) => d.requires_external_data && d.requires_external_data.length > 0,
)
.sort((a, b) => a.order - b.order);
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { ComputedDefinition } from '../types';
${imports}
export const ALL_COMPUTED: ComputedDefinition[] = [
${allExports}
];
export const FILING_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(filingDerived, null, 2)};
export const MARKET_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(marketDerived, null, 2)};
export { ${files.map((f) => f.name.toUpperCase().replace(/-/g, "_")).join(", ")} };
`;
}
function generateKpiFile(pack: string, kpis: KpiDefinition[]): string {
const constName = `${pack.toUpperCase().replace(/-/g, "_")}_KPIS`;
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { KpiDefinition } from '../types';
export const ${constName}: KpiDefinition[] = ${JSON.stringify(kpis, null, 2)};
`;
}
function generateKpiIndex(
packs: { pack: string; kpis: KpiDefinition[] }[],
): string {
const imports = packs
.map((p) => {
const constName = p.pack.toUpperCase().replace(/-/g, "_");
return `import { ${constName}_KPIS } from './${p.pack}';`;
})
.join("\n");
const exports = packs
.map((p) => ` ...${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS,`)
.join("\n");
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
import type { KpiDefinition } from '../types';
${imports}
export const ALL_KPIS: KpiDefinition[] = [
${exports}
];
export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS`).join(", ")} };
`;
}
function buildUnionSurfaceDefinitions(
surfacePacks: Map<PackName, SurfacePackFile>,
): Map<string, SurfaceDefinition[]> {
const surfacesByStatement = new Map<string, SurfaceDefinition[]>();
const seen = new Set<string>();
for (const pack of PACK_ORDER) {
const file = surfacePacks.get(pack);
if (!file) {
continue;
}
for (const surface of file.surfaces) {
const dedupeKey = `${surface.statement}:${surface.surface_key}`;
if (seen.has(dedupeKey)) {
continue;
}
seen.add(dedupeKey);
const existing = surfacesByStatement.get(surface.statement) || [];
existing.push(surface);
surfacesByStatement.set(surface.statement, existing);
}
}
return surfacesByStatement;
}
function generateMainIndex(): string {
return `// Auto-generated by scripts/generate-taxonomy.ts
// DO NOT EDIT MANUALLY - changes will be overwritten
export type {
FinancialUnit,
FinancialCadence,
FinancialStatementKind,
ComputationSpec,
SurfaceDefinition,
ComputedDefinition,
KpiDefinition,
} from './types';
export { RATIO_CATEGORIES, type RatioCategory } from './types';
export {
INCOME_SURFACES,
BALANCE_SURFACES,
CASH_FLOW_SURFACES,
ALL_SURFACES_BY_STATEMENT,
} from './surfaces';
export {
ALL_COMPUTED,
FILING_DERIVED_COMPUTED,
MARKET_DERIVED_COMPUTED,
CORE,
} from './computed';
export { ALL_KPIS, CORE_KPIS } from './kpis';
`;
}
async function main() {
log("Loading taxonomy files...");
const surfacePacks = loadSurfacePacks();
const computedPacks = loadComputedPacks();
const kpiPacks = loadKpiPacks();
log(
`Loaded ${surfacePacks.size} surface packs, ${computedPacks.size} computed packs, ${kpiPacks.size} KPI packs`,
);
const errors: string[] = [];
log("Validating taxonomy files...");
for (const [, pack] of surfacePacks) {
validateSurfacePack(pack, errors);
}
const allSurfaceKeys = new Set<string>();
for (const [, pack] of surfacePacks) {
for (const surface of pack.surfaces) {
allSurfaceKeys.add(surface.surface_key);
}
}
for (const [, pack] of computedPacks) {
validateComputedPack(pack, allSurfaceKeys, errors);
}
if (errors.length > 0) {
console.error("Validation errors:");
for (const error of errors) {
console.error(` - ${error}`);
}
process.exit(1);
}
log("Creating output directories...");
mkdirSync(join(OUTPUT_DIR, "surfaces"), { recursive: true });
mkdirSync(join(OUTPUT_DIR, "computed"), { recursive: true });
mkdirSync(join(OUTPUT_DIR, "kpis"), { recursive: true });
log("Generating types...");
writeFileSync(join(OUTPUT_DIR, "types.ts"), generateTypesFile());
log("Generating surfaces...");
const unionSurfaceDefinitions = buildUnionSurfaceDefinitions(surfacePacks);
if (unionSurfaceDefinitions.size > 0) {
for (const [statement, surfaces] of unionSurfaceDefinitions) {
writeFileSync(
join(OUTPUT_DIR, "surfaces", `${statement}.ts`),
generateSurfaceFile(statement, surfaces),
);
}
writeFileSync(
join(OUTPUT_DIR, "surfaces", "index.ts"),
generateSurfacesIndex(unionSurfaceDefinitions),
);
}
log("Generating computed definitions...");
const computedFiles: { name: string; definitions: ComputedDefinition[] }[] =
[];
for (const [pack, file] of computedPacks) {
computedFiles.push({ name: pack, definitions: file.computed });
writeFileSync(
join(OUTPUT_DIR, "computed", `${pack}.ts`),
generateComputedFile(pack, file.computed),
);
}
writeFileSync(
join(OUTPUT_DIR, "computed", "index.ts"),
generateComputedIndex(computedFiles),
);
log("Generating KPI definitions...");
const kpiFiles: { pack: string; kpis: KpiDefinition[] }[] = [];
for (const [pack, file] of kpiPacks) {
kpiFiles.push({ pack, kpis: file.kpis });
writeFileSync(
join(OUTPUT_DIR, "kpis", `${pack}.ts`),
generateKpiFile(pack, file.kpis),
);
}
writeFileSync(
join(OUTPUT_DIR, "kpis", "index.ts"),
generateKpiIndex(kpiFiles),
);
log("Generating main index...");
writeFileSync(join(OUTPUT_DIR, "index.ts"), generateMainIndex());
const surfaceCount = [...unionSurfaceDefinitions.values()].reduce(
(sum, surfaces) => sum + surfaces.length,
0,
);
const computedCount = computedFiles.reduce(
(sum, f) => sum + f.definitions.length,
0,
);
const kpiCount = kpiFiles.reduce((sum, f) => sum + f.kpis.length, 0);
log(
`Generated ${surfaceCount} surfaces, ${computedCount} computed definitions, ${kpiCount} KPIs`,
);
log(`Output written to ${OUTPUT_DIR}`);
}
main().catch((error) => {
console.error("Generation failed:", error);
process.exit(1);
});