Expand backend financial statement parsers

This commit is contained in:
2026-03-12 21:15:54 -04:00
parent 33ce48f53c
commit 7a7a78340f
13 changed files with 4398 additions and 456 deletions

View File

@@ -1,12 +1,22 @@
use anyhow::{anyhow, Context, Result};
use serde::Deserialize;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::collections::HashMap;
use std::path::PathBuf;
use crate::pack_selector::FiscalPack;
fn default_include_in_output() -> bool {
true
}
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum SurfaceSignTransform {
Invert,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SurfacePackFile {
pub version: String,
@@ -25,9 +35,44 @@ pub struct SurfaceDefinition {
pub rollup_policy: String,
pub allowed_source_concepts: Vec<String>,
pub allowed_authoritative_concepts: Vec<String>,
pub formula_fallback: Option<serde_json::Value>,
pub formula_fallback: Option<SurfaceFormulaFallback>,
pub detail_grouping_policy: String,
pub materiality_policy: String,
#[serde(default = "default_include_in_output")]
pub include_in_output: bool,
#[serde(default)]
pub sign_transform: Option<SurfaceSignTransform>,
}
#[derive(Debug, Deserialize, Clone)]
#[serde(untagged)]
pub enum SurfaceFormulaFallback {
LegacyString(#[allow(dead_code)] String),
Structured(SurfaceFormula),
}
impl SurfaceFormulaFallback {
pub fn structured(&self) -> Option<&SurfaceFormula> {
match self {
Self::Structured(formula) => Some(formula),
Self::LegacyString(_) => None,
}
}
}
#[derive(Debug, Deserialize, Clone)]
pub struct SurfaceFormula {
pub op: SurfaceFormulaOp,
pub sources: Vec<String>,
#[serde(default)]
pub treat_null_as_zero: bool,
}
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum SurfaceFormulaOp {
Sum,
Subtract,
}
#[derive(Debug, Deserialize, Clone)]
@@ -147,7 +192,9 @@ pub fn resolve_taxonomy_dir() -> Result<PathBuf> {
candidates
.into_iter()
.find(|path| path.is_dir())
.ok_or_else(|| anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory"))
.ok_or_else(|| {
anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory")
})
}
pub fn load_surface_pack(pack: FiscalPack) -> Result<SurfacePackFile> {
@@ -156,14 +203,52 @@ pub fn load_surface_pack(pack: FiscalPack) -> Result<SurfacePackFile> {
.join("fiscal")
.join("v1")
.join(format!("{}.surface.json", pack.as_str()));
let raw = fs::read_to_string(&path)
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
let file = serde_json::from_str::<SurfacePackFile>(&raw)
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
let mut file = load_surface_pack_file(&path)?;
if !matches!(pack, FiscalPack::Core) {
let core_path = taxonomy_dir
.join("fiscal")
.join("v1")
.join("core.surface.json");
let core_file = load_surface_pack_file(&core_path)?;
let pack_inherited_keys = file
.surfaces
.iter()
.filter(|surface| surface.statement == "balance" || surface.statement == "cash_flow")
.map(|surface| (surface.statement.clone(), surface.surface_key.clone()))
.collect::<std::collections::HashSet<_>>();
file.surfaces.extend(
core_file
.surfaces
.into_iter()
.filter(|surface| surface.statement == "balance" || surface.statement == "cash_flow")
.filter(|surface| {
!pack_inherited_keys
.contains(&(surface.statement.clone(), surface.surface_key.clone()))
}),
);
}
let _ = (&file.version, &file.pack);
Ok(file)
}
fn load_surface_pack_file(path: &PathBuf) -> Result<SurfacePackFile> {
let raw = fs::read_to_string(path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
serde_json::from_str::<SurfacePackFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})
}
pub fn load_crosswalk(regime: &str) -> Result<Option<CrosswalkFile>> {
let file_name = match regime {
"us-gaap" => "us-gaap.json",
@@ -173,10 +258,18 @@ pub fn load_crosswalk(regime: &str) -> Result<Option<CrosswalkFile>> {
let taxonomy_dir = resolve_taxonomy_dir()?;
let path = taxonomy_dir.join("crosswalk").join(file_name);
let raw = fs::read_to_string(&path)
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
let file = serde_json::from_str::<CrosswalkFile>(&raw)
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<CrosswalkFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = (&file.version, &file.regime);
Ok(Some(file))
}
@@ -188,10 +281,18 @@ pub fn load_kpi_pack(pack: FiscalPack) -> Result<KpiPackFile> {
.join("v1")
.join("kpis")
.join(format!("{}.kpis.json", pack.as_str()));
let raw = fs::read_to_string(&path)
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
let file = serde_json::from_str::<KpiPackFile>(&raw)
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<KpiPackFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = (&file.version, &file.pack);
Ok(file)
}
@@ -202,10 +303,18 @@ pub fn load_universal_income_definitions() -> Result<UniversalIncomeFile> {
.join("fiscal")
.join("v1")
.join("universal_income.surface.json");
let raw = fs::read_to_string(&path)
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
let file = serde_json::from_str::<UniversalIncomeFile>(&raw)
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<UniversalIncomeFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = &file.version;
Ok(file)
}
@@ -216,10 +325,18 @@ pub fn load_income_bridge(pack: FiscalPack) -> Result<IncomeBridgeFile> {
.join("fiscal")
.join("v1")
.join(format!("{}.income-bridge.json", pack.as_str()));
let raw = fs::read_to_string(&path)
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
let file = serde_json::from_str::<IncomeBridgeFile>(&raw)
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
let raw = fs::read_to_string(&path).with_context(|| {
format!(
"taxonomy resolution failed: unable to read {}",
path.display()
)
})?;
let file = serde_json::from_str::<IncomeBridgeFile>(&raw).with_context(|| {
format!(
"taxonomy resolution failed: unable to parse {}",
path.display()
)
})?;
let _ = (&file.version, &file.pack);
Ok(file)
}
@@ -230,17 +347,20 @@ mod tests {
#[test]
fn resolves_taxonomy_dir_and_loads_core_pack() {
let taxonomy_dir = resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests");
let taxonomy_dir =
resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests");
assert!(taxonomy_dir.exists());
let surface_pack = load_surface_pack(FiscalPack::Core).expect("core surface pack should load");
let surface_pack =
load_surface_pack(FiscalPack::Core).expect("core surface pack should load");
assert_eq!(surface_pack.pack, "core");
assert!(!surface_pack.surfaces.is_empty());
let kpi_pack = load_kpi_pack(FiscalPack::Core).expect("core kpi pack should load");
assert_eq!(kpi_pack.pack, "core");
let universal_income = load_universal_income_definitions().expect("universal income config should load");
let universal_income =
load_universal_income_definitions().expect("universal income config should load");
assert!(!universal_income.rows.is_empty());
let core_bridge = load_income_bridge(FiscalPack::Core).expect("core bridge should load");