- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI - add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading - auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
658 lines
17 KiB
TypeScript
658 lines
17 KiB
TypeScript
import {
|
|
existsSync,
|
|
mkdirSync,
|
|
readdirSync,
|
|
readFileSync,
|
|
writeFileSync,
|
|
} from "node:fs";
|
|
import { join } from "node:path";
|
|
|
|
type FinancialUnit = "currency" | "percent" | "ratio" | "shares" | "count";
|
|
type FinancialCadence = "annual" | "quarterly" | "ltm";
|
|
type FinancialStatementKind =
|
|
| "income"
|
|
| "balance"
|
|
| "cash_flow"
|
|
| "disclosure"
|
|
| "equity"
|
|
| "comprehensive_income";
|
|
type SignTransform = "invert" | "absolute";
|
|
|
|
type SurfaceDefinition = {
|
|
surface_key: string;
|
|
statement: FinancialStatementKind;
|
|
label: string;
|
|
category: string;
|
|
order: number;
|
|
unit: FinancialUnit;
|
|
rollup_policy?: string;
|
|
allowed_source_concepts: string[];
|
|
allowed_authoritative_concepts?: string[];
|
|
formula_fallback?:
|
|
| {
|
|
op: "sum" | "subtract" | "divide";
|
|
sources: string[];
|
|
treat_null_as_zero?: boolean;
|
|
}
|
|
| string
|
|
| null;
|
|
detail_grouping_policy?: string;
|
|
materiality_policy?: string;
|
|
include_in_output?: boolean;
|
|
sign_transform?: "invert";
|
|
};
|
|
|
|
type SurfacePackFile = {
|
|
version: string;
|
|
pack: string;
|
|
surfaces: SurfaceDefinition[];
|
|
};
|
|
|
|
type ComputationSpec =
|
|
| { type: "ratio"; numerator: string; denominator: string }
|
|
| { type: "yoy_growth"; source: string }
|
|
| { type: "cagr"; source: string; years: number }
|
|
| { type: "per_share"; source: string; shares_key: string }
|
|
| { type: "simple"; formula: string };
|
|
|
|
type ComputedDefinition = {
|
|
key: string;
|
|
label: string;
|
|
category: string;
|
|
order: number;
|
|
unit: FinancialUnit;
|
|
computation: ComputationSpec;
|
|
supported_cadences?: FinancialCadence[];
|
|
requires_external_data?: string[];
|
|
};
|
|
|
|
type ComputedPackFile = {
|
|
version: string;
|
|
pack: string;
|
|
computed: ComputedDefinition[];
|
|
};
|
|
|
|
type KpiDefinition = {
|
|
key: string;
|
|
label: string;
|
|
unit: string;
|
|
};
|
|
|
|
type KpiPackFile = {
|
|
version: string;
|
|
pack: string;
|
|
kpis: KpiDefinition[];
|
|
};
|
|
|
|
const TAXONOMY_DIR = join(process.cwd(), "rust", "taxonomy", "fiscal", "v1");
|
|
const OUTPUT_DIR = join(process.cwd(), "lib", "generated");
|
|
|
|
const PACK_ORDER = [
|
|
"core",
|
|
"bank_lender",
|
|
"insurance",
|
|
"reit_real_estate",
|
|
"broker_asset_manager",
|
|
"agriculture",
|
|
"contractors_construction",
|
|
"contractors_federal_government",
|
|
"development_stage",
|
|
"entertainment_broadcasters",
|
|
"entertainment_cable_television",
|
|
"entertainment_casinos",
|
|
"entertainment_films",
|
|
"entertainment_music",
|
|
"extractive_mining",
|
|
"mortgage_banking",
|
|
"title_plant",
|
|
"franchisors",
|
|
"not_for_profit",
|
|
"plan_defined_benefit",
|
|
"plan_defined_contribution",
|
|
"plan_health_welfare",
|
|
"real_estate_general",
|
|
"real_estate_common_interest",
|
|
"real_estate_retail_land",
|
|
"real_estate_time_sharing",
|
|
"software",
|
|
"steamship",
|
|
] as const;
|
|
type PackName = (typeof PACK_ORDER)[number];
|
|
|
|
function log(message: string) {
|
|
console.log(`[generate-taxonomy] ${message}`);
|
|
}
|
|
|
|
function loadSurfacePacks(): Map<PackName, SurfacePackFile> {
|
|
const packs = new Map<PackName, SurfacePackFile>();
|
|
|
|
for (const pack of PACK_ORDER) {
|
|
const path = join(TAXONOMY_DIR, `${pack}.surface.json`);
|
|
if (!existsSync(path)) {
|
|
continue;
|
|
}
|
|
|
|
const raw = readFileSync(path, "utf8");
|
|
const file = JSON.parse(raw) as SurfacePackFile;
|
|
packs.set(pack, file);
|
|
}
|
|
|
|
return packs;
|
|
}
|
|
|
|
function loadComputedPacks(): Map<PackName, ComputedPackFile> {
|
|
const packs = new Map<PackName, ComputedPackFile>();
|
|
|
|
for (const pack of PACK_ORDER) {
|
|
const path = join(TAXONOMY_DIR, `${pack}.computed.json`);
|
|
if (!existsSync(path)) {
|
|
continue;
|
|
}
|
|
|
|
const raw = readFileSync(path, "utf8");
|
|
const file = JSON.parse(raw) as ComputedPackFile;
|
|
packs.set(pack, file);
|
|
}
|
|
|
|
return packs;
|
|
}
|
|
|
|
function loadKpiPacks(): Map<PackName, KpiPackFile> {
|
|
const packs = new Map<PackName, KpiPackFile>();
|
|
|
|
for (const pack of PACK_ORDER) {
|
|
const path = join(TAXONOMY_DIR, "kpis", `${pack}.kpis.json`);
|
|
if (!existsSync(path)) {
|
|
continue;
|
|
}
|
|
|
|
const raw = readFileSync(path, "utf8");
|
|
const file = JSON.parse(raw) as KpiPackFile;
|
|
packs.set(pack, file);
|
|
}
|
|
|
|
return packs;
|
|
}
|
|
|
|
function validateSurfacePack(pack: SurfacePackFile, errors: string[]) {
|
|
const keysByStatement = new Map<string, Set<string>>();
|
|
|
|
for (const surface of pack.surfaces) {
|
|
const keySet = keysByStatement.get(surface.statement) || new Set<string>();
|
|
if (keySet.has(surface.surface_key)) {
|
|
errors.push(
|
|
`${pack.pack}: duplicate surface_key "${surface.surface_key}" in statement "${surface.statement}"`,
|
|
);
|
|
}
|
|
keySet.add(surface.surface_key);
|
|
keysByStatement.set(surface.statement, keySet);
|
|
|
|
if (!surface.label) {
|
|
errors.push(
|
|
`${pack.pack}: surface "${surface.surface_key}" missing label`,
|
|
);
|
|
}
|
|
|
|
const validStatements: FinancialStatementKind[] = [
|
|
"income",
|
|
"balance",
|
|
"cash_flow",
|
|
"disclosure",
|
|
"equity",
|
|
"comprehensive_income",
|
|
];
|
|
if (!validStatements.includes(surface.statement)) {
|
|
errors.push(
|
|
`${pack.pack}: surface "${surface.surface_key}" has invalid statement "${surface.statement}"`,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
function validateComputedPack(
|
|
pack: ComputedPackFile,
|
|
surfaceKeys: Set<string>,
|
|
errors: string[],
|
|
) {
|
|
const keys = new Set<string>();
|
|
|
|
for (const computed of pack.computed) {
|
|
if (keys.has(computed.key)) {
|
|
errors.push(`${pack.pack}: duplicate computed key "${computed.key}"`);
|
|
}
|
|
keys.add(computed.key);
|
|
|
|
if (!computed.label) {
|
|
errors.push(`${pack.pack}: computed "${computed.key}" missing label`);
|
|
}
|
|
|
|
const spec = computed.computation;
|
|
switch (spec.type) {
|
|
case "ratio":
|
|
if (!surfaceKeys.has(spec.numerator) && !spec.numerator.includes("_")) {
|
|
errors.push(
|
|
`${pack.pack}: computed "${computed.key}" references unknown numerator "${spec.numerator}"`,
|
|
);
|
|
}
|
|
if (
|
|
!surfaceKeys.has(spec.denominator) &&
|
|
!spec.denominator.includes("_")
|
|
) {
|
|
errors.push(
|
|
`${pack.pack}: computed "${computed.key}" references unknown denominator "${spec.denominator}"`,
|
|
);
|
|
}
|
|
break;
|
|
case "yoy_growth":
|
|
case "cagr":
|
|
if (!surfaceKeys.has(spec.source)) {
|
|
errors.push(
|
|
`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`,
|
|
);
|
|
}
|
|
break;
|
|
case "per_share":
|
|
if (!surfaceKeys.has(spec.source)) {
|
|
errors.push(
|
|
`${pack.pack}: computed "${computed.key}" references unknown source "${spec.source}"`,
|
|
);
|
|
}
|
|
if (!surfaceKeys.has(spec.shares_key)) {
|
|
errors.push(
|
|
`${pack.pack}: computed "${computed.key}" references unknown shares_key "${spec.shares_key}"`,
|
|
);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
function generateTypesFile(): string {
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
export type FinancialUnit = 'currency' | 'percent' | 'ratio' | 'shares' | 'count';
|
|
|
|
export type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
|
|
|
|
export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'disclosure' | 'equity' | 'comprehensive_income';
|
|
|
|
export type SignTransform = 'invert' | 'absolute';
|
|
|
|
export type ComputationSpec =
|
|
| { type: 'ratio'; numerator: string; denominator: string }
|
|
| { type: 'yoy_growth'; source: string }
|
|
| { type: 'cagr'; source: string; years: number }
|
|
| { type: 'per_share'; source: string; shares_key: string }
|
|
| { type: 'simple'; formula: string };
|
|
|
|
export type SurfaceDefinition = {
|
|
surface_key: string;
|
|
statement: FinancialStatementKind;
|
|
label: string;
|
|
category: string;
|
|
order: number;
|
|
unit: FinancialUnit;
|
|
rollup_policy?: string;
|
|
allowed_source_concepts: string[];
|
|
allowed_authoritative_concepts?: string[];
|
|
formula_fallback?: {
|
|
op: 'sum' | 'subtract' | 'divide';
|
|
sources: string[];
|
|
treat_null_as_zero?: boolean;
|
|
} | string | null;
|
|
detail_grouping_policy?: string;
|
|
materiality_policy?: string;
|
|
include_in_output?: boolean;
|
|
sign_transform?: SignTransform;
|
|
};
|
|
|
|
export type ComputedDefinition = {
|
|
key: string;
|
|
label: string;
|
|
category: string;
|
|
order: number;
|
|
unit: FinancialUnit;
|
|
computation: ComputationSpec;
|
|
supported_cadences?: FinancialCadence[];
|
|
requires_external_data?: string[];
|
|
};
|
|
|
|
export type KpiDefinition = {
|
|
key: string;
|
|
label: string;
|
|
unit: string;
|
|
};
|
|
|
|
export const RATIO_CATEGORIES = ['margins', 'returns', 'financial_health', 'per_share', 'growth', 'valuation'] as const;
|
|
export type RatioCategory = (typeof RATIO_CATEGORIES)[number];
|
|
`;
|
|
}
|
|
|
|
function generateSurfaceFile(
|
|
statement: string,
|
|
surfaces: SurfaceDefinition[],
|
|
): string {
|
|
const sorted = [...surfaces].sort((a, b) => a.order - b.order);
|
|
const constName = `${statement.toUpperCase()}_SURFACES`;
|
|
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
import type { SurfaceDefinition } from '../types';
|
|
|
|
export const ${constName}: SurfaceDefinition[] = ${JSON.stringify(sorted, null, 2)};
|
|
`;
|
|
}
|
|
|
|
function generateSurfacesIndex(
|
|
surfacesByStatement: Map<string, SurfaceDefinition[]>,
|
|
): string {
|
|
const statements = [...surfacesByStatement.keys()].sort();
|
|
|
|
const imports = statements
|
|
.map((s) => `import { ${s.toUpperCase()}_SURFACES } from './${s}';`)
|
|
.join("\n");
|
|
|
|
const exports = statements
|
|
.map((s) => ` ${s}: ${s.toUpperCase()}_SURFACES,`)
|
|
.join("\n");
|
|
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
${imports}
|
|
|
|
export const ALL_SURFACES_BY_STATEMENT = {
|
|
${exports}
|
|
} as const;
|
|
|
|
export { ${statements.map((s) => `${s.toUpperCase()}_SURFACES`).join(", ")} };
|
|
`;
|
|
}
|
|
|
|
function generateComputedFile(
|
|
name: string,
|
|
definitions: ComputedDefinition[],
|
|
): string {
|
|
const sorted = [...definitions].sort((a, b) => a.order - b.order);
|
|
const constName = name.toUpperCase().replace(/-/g, "_");
|
|
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
import type { ComputedDefinition } from '../types';
|
|
|
|
export const ${constName}: ComputedDefinition[] = ${JSON.stringify(sorted, null, 2)};
|
|
`;
|
|
}
|
|
|
|
function generateComputedIndex(
|
|
files: { name: string; definitions: ComputedDefinition[] }[],
|
|
): string {
|
|
const imports = files
|
|
.map((f) => {
|
|
const constName = f.name.toUpperCase().replace(/-/g, "_");
|
|
return `import { ${constName} } from './${f.name}';`;
|
|
})
|
|
.join("\n");
|
|
|
|
const allExports = files
|
|
.map((f) => ` ...${f.name.toUpperCase().replace(/-/g, "_")},`)
|
|
.join("\n");
|
|
|
|
const filingDerived = files
|
|
.flatMap((f) => f.definitions)
|
|
.filter(
|
|
(d) => !d.requires_external_data || d.requires_external_data.length === 0,
|
|
)
|
|
.sort((a, b) => a.order - b.order);
|
|
|
|
const marketDerived = files
|
|
.flatMap((f) => f.definitions)
|
|
.filter(
|
|
(d) => d.requires_external_data && d.requires_external_data.length > 0,
|
|
)
|
|
.sort((a, b) => a.order - b.order);
|
|
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
import type { ComputedDefinition } from '../types';
|
|
|
|
${imports}
|
|
|
|
export const ALL_COMPUTED: ComputedDefinition[] = [
|
|
${allExports}
|
|
];
|
|
|
|
export const FILING_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(filingDerived, null, 2)};
|
|
|
|
export const MARKET_DERIVED_COMPUTED: ComputedDefinition[] = ${JSON.stringify(marketDerived, null, 2)};
|
|
|
|
export { ${files.map((f) => f.name.toUpperCase().replace(/-/g, "_")).join(", ")} };
|
|
`;
|
|
}
|
|
|
|
function generateKpiFile(pack: string, kpis: KpiDefinition[]): string {
|
|
const constName = `${pack.toUpperCase().replace(/-/g, "_")}_KPIS`;
|
|
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
import type { KpiDefinition } from '../types';
|
|
|
|
export const ${constName}: KpiDefinition[] = ${JSON.stringify(kpis, null, 2)};
|
|
`;
|
|
}
|
|
|
|
function generateKpiIndex(
|
|
packs: { pack: string; kpis: KpiDefinition[] }[],
|
|
): string {
|
|
const imports = packs
|
|
.map((p) => {
|
|
const constName = p.pack.toUpperCase().replace(/-/g, "_");
|
|
return `import { ${constName}_KPIS } from './${p.pack}';`;
|
|
})
|
|
.join("\n");
|
|
|
|
const exports = packs
|
|
.map((p) => ` ...${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS,`)
|
|
.join("\n");
|
|
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
import type { KpiDefinition } from '../types';
|
|
|
|
${imports}
|
|
|
|
export const ALL_KPIS: KpiDefinition[] = [
|
|
${exports}
|
|
];
|
|
|
|
export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS`).join(", ")} };
|
|
`;
|
|
}
|
|
|
|
function buildUnionSurfaceDefinitions(
|
|
surfacePacks: Map<PackName, SurfacePackFile>,
|
|
): Map<string, SurfaceDefinition[]> {
|
|
const surfacesByStatement = new Map<string, SurfaceDefinition[]>();
|
|
const seen = new Set<string>();
|
|
|
|
for (const pack of PACK_ORDER) {
|
|
const file = surfacePacks.get(pack);
|
|
if (!file) {
|
|
continue;
|
|
}
|
|
|
|
for (const surface of file.surfaces) {
|
|
const dedupeKey = `${surface.statement}:${surface.surface_key}`;
|
|
if (seen.has(dedupeKey)) {
|
|
continue;
|
|
}
|
|
|
|
seen.add(dedupeKey);
|
|
const existing = surfacesByStatement.get(surface.statement) || [];
|
|
existing.push(surface);
|
|
surfacesByStatement.set(surface.statement, existing);
|
|
}
|
|
}
|
|
|
|
return surfacesByStatement;
|
|
}
|
|
|
|
function generateMainIndex(): string {
|
|
return `// Auto-generated by scripts/generate-taxonomy.ts
|
|
// DO NOT EDIT MANUALLY - changes will be overwritten
|
|
|
|
export type {
|
|
FinancialUnit,
|
|
FinancialCadence,
|
|
FinancialStatementKind,
|
|
ComputationSpec,
|
|
SurfaceDefinition,
|
|
ComputedDefinition,
|
|
KpiDefinition,
|
|
} from './types';
|
|
|
|
export { RATIO_CATEGORIES, type RatioCategory } from './types';
|
|
|
|
export {
|
|
INCOME_SURFACES,
|
|
BALANCE_SURFACES,
|
|
CASH_FLOW_SURFACES,
|
|
ALL_SURFACES_BY_STATEMENT,
|
|
} from './surfaces';
|
|
|
|
export {
|
|
ALL_COMPUTED,
|
|
FILING_DERIVED_COMPUTED,
|
|
MARKET_DERIVED_COMPUTED,
|
|
CORE,
|
|
} from './computed';
|
|
|
|
export { ALL_KPIS, CORE_KPIS } from './kpis';
|
|
`;
|
|
}
|
|
|
|
async function main() {
|
|
log("Loading taxonomy files...");
|
|
|
|
const surfacePacks = loadSurfacePacks();
|
|
const computedPacks = loadComputedPacks();
|
|
const kpiPacks = loadKpiPacks();
|
|
|
|
log(
|
|
`Loaded ${surfacePacks.size} surface packs, ${computedPacks.size} computed packs, ${kpiPacks.size} KPI packs`,
|
|
);
|
|
|
|
const errors: string[] = [];
|
|
|
|
log("Validating taxonomy files...");
|
|
|
|
for (const [, pack] of surfacePacks) {
|
|
validateSurfacePack(pack, errors);
|
|
}
|
|
|
|
const allSurfaceKeys = new Set<string>();
|
|
for (const [, pack] of surfacePacks) {
|
|
for (const surface of pack.surfaces) {
|
|
allSurfaceKeys.add(surface.surface_key);
|
|
}
|
|
}
|
|
|
|
for (const [, pack] of computedPacks) {
|
|
validateComputedPack(pack, allSurfaceKeys, errors);
|
|
}
|
|
|
|
if (errors.length > 0) {
|
|
console.error("Validation errors:");
|
|
for (const error of errors) {
|
|
console.error(` - ${error}`);
|
|
}
|
|
process.exit(1);
|
|
}
|
|
|
|
log("Creating output directories...");
|
|
mkdirSync(join(OUTPUT_DIR, "surfaces"), { recursive: true });
|
|
mkdirSync(join(OUTPUT_DIR, "computed"), { recursive: true });
|
|
mkdirSync(join(OUTPUT_DIR, "kpis"), { recursive: true });
|
|
|
|
log("Generating types...");
|
|
writeFileSync(join(OUTPUT_DIR, "types.ts"), generateTypesFile());
|
|
|
|
log("Generating surfaces...");
|
|
const unionSurfaceDefinitions = buildUnionSurfaceDefinitions(surfacePacks);
|
|
if (unionSurfaceDefinitions.size > 0) {
|
|
for (const [statement, surfaces] of unionSurfaceDefinitions) {
|
|
writeFileSync(
|
|
join(OUTPUT_DIR, "surfaces", `${statement}.ts`),
|
|
generateSurfaceFile(statement, surfaces),
|
|
);
|
|
}
|
|
|
|
writeFileSync(
|
|
join(OUTPUT_DIR, "surfaces", "index.ts"),
|
|
generateSurfacesIndex(unionSurfaceDefinitions),
|
|
);
|
|
}
|
|
|
|
log("Generating computed definitions...");
|
|
const computedFiles: { name: string; definitions: ComputedDefinition[] }[] =
|
|
[];
|
|
|
|
for (const [pack, file] of computedPacks) {
|
|
computedFiles.push({ name: pack, definitions: file.computed });
|
|
writeFileSync(
|
|
join(OUTPUT_DIR, "computed", `${pack}.ts`),
|
|
generateComputedFile(pack, file.computed),
|
|
);
|
|
}
|
|
|
|
writeFileSync(
|
|
join(OUTPUT_DIR, "computed", "index.ts"),
|
|
generateComputedIndex(computedFiles),
|
|
);
|
|
|
|
log("Generating KPI definitions...");
|
|
const kpiFiles: { pack: string; kpis: KpiDefinition[] }[] = [];
|
|
|
|
for (const [pack, file] of kpiPacks) {
|
|
kpiFiles.push({ pack, kpis: file.kpis });
|
|
writeFileSync(
|
|
join(OUTPUT_DIR, "kpis", `${pack}.ts`),
|
|
generateKpiFile(pack, file.kpis),
|
|
);
|
|
}
|
|
|
|
writeFileSync(
|
|
join(OUTPUT_DIR, "kpis", "index.ts"),
|
|
generateKpiIndex(kpiFiles),
|
|
);
|
|
|
|
log("Generating main index...");
|
|
writeFileSync(join(OUTPUT_DIR, "index.ts"), generateMainIndex());
|
|
|
|
const surfaceCount = [...unionSurfaceDefinitions.values()].reduce(
|
|
(sum, surfaces) => sum + surfaces.length,
|
|
0,
|
|
);
|
|
const computedCount = computedFiles.reduce(
|
|
(sum, f) => sum + f.definitions.length,
|
|
0,
|
|
);
|
|
const kpiCount = kpiFiles.reduce((sum, f) => sum + f.kpis.length, 0);
|
|
|
|
log(
|
|
`Generated ${surfaceCount} surfaces, ${computedCount} computed definitions, ${kpiCount} KPIs`,
|
|
);
|
|
log(`Output written to ${OUTPUT_DIR}`);
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error("Generation failed:", error);
|
|
process.exit(1);
|
|
});
|