Files
Neon-Desk/rust/fiscal-xbrl-core/src/taxonomy_loader.rs
francy51 17de3dd72d Add history window controls and expand taxonomy pack support
- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI
- add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading
- auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
2026-03-18 23:40:28 -04:00

498 lines
14 KiB
Rust

use anyhow::{anyhow, Context, Result};
use serde::Deserialize;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::PathBuf;
use crate::pack_selector::FiscalPack;
fn default_include_in_output() -> bool {
true
}
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum SurfaceSignTransform {
Invert,
Absolute,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SurfacePackFile {
pub version: String,
pub pack: String,
pub surfaces: Vec<SurfaceDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SurfaceDefinition {
pub surface_key: String,
pub statement: String,
pub label: String,
pub category: String,
pub order: i64,
pub unit: String,
pub rollup_policy: String,
pub allowed_source_concepts: Vec<String>,
pub allowed_authoritative_concepts: Vec<String>,
pub formula_fallback: Option<SurfaceFormulaFallback>,
pub detail_grouping_policy: String,
pub materiality_policy: String,
#[serde(default = "default_include_in_output")]
pub include_in_output: bool,
#[serde(default)]
pub sign_transform: Option<SurfaceSignTransform>,
}
#[derive(Debug, Deserialize, Clone)]
#[serde(untagged)]
pub enum SurfaceFormulaFallback {
LegacyString(#[allow(dead_code)] String),
Structured(SurfaceFormula),
}
impl SurfaceFormulaFallback {
pub fn structured(&self) -> Option<&SurfaceFormula> {
match self {
Self::Structured(formula) => Some(formula),
Self::LegacyString(_) => None,
}
}
}
#[derive(Debug, Deserialize, Clone)]
pub struct SurfaceFormula {
pub op: SurfaceFormulaOp,
pub sources: Vec<String>,
#[serde(default)]
pub treat_null_as_zero: bool,
}
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum SurfaceFormulaOp {
Sum,
Subtract,
Divide,
}
#[derive(Debug, Deserialize, Clone)]
pub struct CrosswalkFile {
pub version: String,
pub regime: String,
pub mappings: std::collections::HashMap<String, CrosswalkMapping>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct CrosswalkMapping {
pub surface_key: String,
pub authoritative_concept_key: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct KpiPackFile {
pub version: String,
pub pack: String,
pub kpis: Vec<KpiDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct KpiDefinition {
pub key: String,
pub label: String,
pub unit: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ComputedPackFile {
pub version: String,
pub pack: String,
pub computed: Vec<ComputedDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ComputedDefinition {
pub key: String,
pub label: String,
pub category: String,
pub order: i64,
pub unit: String,
pub computation: ComputationSpec,
#[serde(default)]
pub supported_cadences: Vec<String>,
#[serde(default)]
pub requires_external_data: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ComputationSpec {
Ratio {
numerator: String,
denominator: String,
},
YoyGrowth {
source: String,
},
Cagr {
source: String,
years: i64,
},
PerShare {
source: String,
shares_key: String,
},
Simple {
formula: String,
},
}
#[derive(Debug, Deserialize, Clone)]
pub struct UniversalIncomeFile {
pub version: String,
pub rows: Vec<UniversalIncomeDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct UniversalIncomeDefinition {
pub key: String,
pub statement: String,
pub label: String,
pub category: String,
pub order: i64,
pub unit: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct IncomeBridgeFile {
pub version: String,
pub pack: String,
pub rows: HashMap<String, IncomeBridgeRow>,
}
#[derive(Debug, Deserialize, Clone, Default)]
pub struct IncomeBridgeComponents {
#[serde(default)]
pub positive: Vec<String>,
#[serde(default)]
pub negative: Vec<String>,
}
#[derive(Debug, Deserialize, Clone, Default)]
pub struct IncomeBridgeConceptGroups {
#[serde(default)]
pub positive: Vec<IncomeBridgeConceptGroup>,
#[serde(default)]
pub negative: Vec<IncomeBridgeConceptGroup>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct IncomeBridgeConceptGroup {
pub name: String,
pub concepts: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct IncomeBridgeRow {
#[serde(default)]
pub direct_authoritative_concepts: Vec<String>,
#[serde(default)]
pub direct_source_concepts: Vec<String>,
#[serde(default)]
pub component_surfaces: IncomeBridgeComponents,
#[serde(default)]
pub component_concept_groups: IncomeBridgeConceptGroups,
pub formula: String,
#[serde(default)]
pub not_meaningful_for_pack: bool,
#[serde(default)]
pub warning_codes_when_used: Vec<String>,
}
pub fn resolve_taxonomy_dir() -> Result<PathBuf> {
let mut candidates = Vec::new();
if let Some(value) = env::var("FISCAL_TAXONOMY_DIR")
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
{
candidates.push(PathBuf::from(value));
}
if let Ok(current_dir) = env::current_dir() {
candidates.push(current_dir.join("rust").join("taxonomy"));
candidates.push(current_dir.join("taxonomy"));
}
candidates.push(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../taxonomy"));
if let Ok(executable) = env::current_exe() {
if let Some(parent) = executable.parent() {
candidates.push(parent.join("../rust/taxonomy"));
candidates.push(parent.join("../taxonomy"));
}
}
candidates
.into_iter()
.find(|path| path.is_dir())
.ok_or_else(|| {
anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory")
})
}
pub fn load_surface_pack(pack: FiscalPack) -> Result<SurfacePackFile> {
let taxonomy_dir = resolve_taxonomy_dir()?;
let path = taxonomy_dir
.join("fiscal")
.join("v1")
.join(format!("{}.surface.json", pack.as_str()));
let mut file = load_surface_pack_file(&path)?;
if !matches!(pack, FiscalPack::Core) {
let core_path = taxonomy_dir
.join("fiscal")
.join("v1")
.join("core.surface.json");
let core_file = load_surface_pack_file(&core_path)?;
let pack_inherited_keys = file
.surfaces
.iter()
.filter(|surface| surface.statement == "balance" || surface.statement == "cash_flow")
.map(|surface| (surface.statement.clone(), surface.surface_key.clone()))
.collect::<std::collections::HashSet<_>>();
file.surfaces.extend(
core_file
.surfaces
.into_iter()
.filter(|surface| {
surface.statement == "balance" || surface.statement == "cash_flow"
})
.filter(|surface| {
!pack_inherited_keys
.contains(&(surface.statement.clone(), surface.surface_key.clone()))
}),
);
}
let _ = (&file.version, &file.pack);
Ok(file)
}
fn load_surface_pack_file(path: &PathBuf) -> Result<SurfacePackFile> {
let raw = fs::read_to_string(path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
serde_json::from_str::<SurfacePackFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})
}
pub fn load_crosswalk(regime: &str) -> Result<Option<CrosswalkFile>> {
let file_name = match regime {
"us-gaap" => "us-gaap.json",
"ifrs-full" => "ifrs.json",
_ => return Ok(None),
};
let taxonomy_dir = resolve_taxonomy_dir()?;
let path = taxonomy_dir.join("crosswalk").join(file_name);
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<CrosswalkFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = (&file.version, &file.regime);
Ok(Some(file))
}
pub fn load_kpi_pack(pack: FiscalPack) -> Result<KpiPackFile> {
let taxonomy_dir = resolve_taxonomy_dir()?;
let path = taxonomy_dir
.join("fiscal")
.join("v1")
.join("kpis")
.join(format!("{}.kpis.json", pack.as_str()));
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<KpiPackFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = (&file.version, &file.pack);
Ok(file)
}
pub fn load_computed_pack(pack: FiscalPack) -> Result<ComputedPackFile> {
let taxonomy_dir = resolve_taxonomy_dir()?;
let path = taxonomy_dir
.join("fiscal")
.join("v1")
.join(format!("{}.computed.json", pack.as_str()));
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<ComputedPackFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = (&file.version, &file.pack);
Ok(file)
}
pub fn load_universal_income_definitions() -> Result<UniversalIncomeFile> {
let taxonomy_dir = resolve_taxonomy_dir()?;
let path = taxonomy_dir
.join("fiscal")
.join("v1")
.join("universal_income.surface.json");
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<UniversalIncomeFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = &file.version;
Ok(file)
}
pub fn load_income_bridge(pack: FiscalPack) -> Result<IncomeBridgeFile> {
let taxonomy_dir = resolve_taxonomy_dir()?;
let path = taxonomy_dir
.join("fiscal")
.join("v1")
.join(format!("{}.income-bridge.json", pack.as_str()));
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<IncomeBridgeFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = (&file.version, &file.pack);
Ok(file)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolves_taxonomy_dir_and_loads_core_pack() {
let taxonomy_dir =
resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests");
assert!(taxonomy_dir.exists());
let surface_pack =
load_surface_pack(FiscalPack::Core).expect("core surface pack should load");
assert_eq!(surface_pack.pack, "core");
assert!(!surface_pack.surfaces.is_empty());
let kpi_pack = load_kpi_pack(FiscalPack::Core).expect("core kpi pack should load");
assert_eq!(kpi_pack.pack, "core");
let computed_pack =
load_computed_pack(FiscalPack::Core).expect("core computed pack should load");
assert_eq!(computed_pack.pack, "core");
let universal_income =
load_universal_income_definitions().expect("universal income config should load");
assert!(!universal_income.rows.is_empty());
let core_bridge = load_income_bridge(FiscalPack::Core).expect("core bridge should load");
assert_eq!(core_bridge.pack, "core");
}
#[test]
fn loads_all_non_core_pack_assets() {
let packs = [
FiscalPack::BankLender,
FiscalPack::Insurance,
FiscalPack::ReitRealEstate,
FiscalPack::BrokerAssetManager,
FiscalPack::Agriculture,
FiscalPack::ContractorsConstruction,
FiscalPack::ContractorsFederalGovernment,
FiscalPack::DevelopmentStage,
FiscalPack::EntertainmentBroadcasters,
FiscalPack::EntertainmentCableTelevision,
FiscalPack::EntertainmentCasinos,
FiscalPack::EntertainmentFilms,
FiscalPack::EntertainmentMusic,
FiscalPack::ExtractiveMining,
FiscalPack::MortgageBanking,
FiscalPack::TitlePlant,
FiscalPack::Franchisors,
FiscalPack::NotForProfit,
FiscalPack::PlanDefinedBenefit,
FiscalPack::PlanDefinedContribution,
FiscalPack::PlanHealthWelfare,
FiscalPack::RealEstateGeneral,
FiscalPack::RealEstateCommonInterest,
FiscalPack::RealEstateRetailLand,
FiscalPack::RealEstateTimeSharing,
FiscalPack::Software,
FiscalPack::Steamship,
];
for pack in packs {
let surface_pack = load_surface_pack(pack)
.unwrap_or_else(|error| panic!("surface pack {} failed: {error}", pack.as_str()));
assert_eq!(surface_pack.pack, pack.as_str());
assert!(
!surface_pack.surfaces.is_empty(),
"{} should define surfaces",
pack.as_str()
);
let bridge = load_income_bridge(pack)
.unwrap_or_else(|error| panic!("income bridge {} failed: {error}", pack.as_str()));
assert_eq!(bridge.pack, pack.as_str());
assert!(bridge.rows.contains_key("revenue"));
assert!(bridge.rows.contains_key("net_income"));
let kpi_pack = load_kpi_pack(pack)
.unwrap_or_else(|error| panic!("kpi pack {} failed: {error}", pack.as_str()));
assert_eq!(kpi_pack.pack, pack.as_str());
}
}
}