use anyhow::Result; use std::collections::{BTreeMap, HashMap, HashSet}; use crate::pack_selector::FiscalPack; use crate::taxonomy_loader::{ load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula, SurfaceFormulaOp, SurfaceSignTransform, }; use crate::{ ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput, PeriodOutput, StatementRowMap, StatementRowOutput, SurfaceRowMap, SurfaceRowOutput, }; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum MappingMethod { AuthoritativeDirect, DirectSourceConcept, AggregateChildren, TaxonomyKpi, UnmappedResidual, } impl MappingMethod { pub fn as_str(&self) -> &'static str { match self { MappingMethod::AuthoritativeDirect => "authoritative_direct", MappingMethod::DirectSourceConcept => "direct_source_concept", MappingMethod::AggregateChildren => "aggregate_children", MappingMethod::TaxonomyKpi => "taxonomy_kpi", MappingMethod::UnmappedResidual => "unmapped_residual", } } } #[derive(Debug, Clone, Default)] pub struct MappingAssignment { pub authoritative_concept_key: Option, pub mapping_method: Option, pub surface_key: Option, pub detail_parent_surface_key: Option, pub kpi_key: Option, pub residual_flag: bool, } #[derive(Debug, Default)] pub struct CompactSurfaceModel { pub surface_rows: SurfaceRowMap, pub detail_rows: DetailRowStatementMap, pub normalization_summary: NormalizationSummaryOutput, pub concept_mappings: HashMap, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum MatchRole { Surface, Detail, } #[derive(Debug, Clone)] struct MatchedStatementRow<'a> { row: &'a StatementRowOutput, authoritative_concept_key: Option, mapping_method: MappingMethod, match_role: MatchRole, rank: i64, } #[derive(Debug, Default, Clone)] struct ConsumedSources { row_keys: HashSet, concept_keys: HashSet, } impl ConsumedSources { fn insert_row(&mut self, row: &StatementRowOutput) { self.row_keys.insert(row.key.clone()); self.concept_keys.insert(row.concept_key.clone()); } fn insert_surface_row(&mut self, row: &SurfaceRowOutput) { self.row_keys.extend(row.source_row_keys.iter().cloned()); self.concept_keys .extend(row.source_concepts.iter().cloned()); } fn extend(&mut self, other: Self) { self.row_keys.extend(other.row_keys); self.concept_keys.extend(other.concept_keys); } fn matches_statement_row(&self, row: &StatementRowOutput) -> bool { self.row_keys.contains(&row.key) || self.concept_keys.contains(&row.concept_key) } } #[derive(Debug)] struct SurfaceResolution { values: BTreeMap>, source_concepts: Vec, source_row_keys: Vec, source_fact_ids: Vec, has_dimensions: bool, resolved_source_row_keys: BTreeMap>, consumed_sources: ConsumedSources, formula_used: bool, } pub fn build_compact_surface_model( periods: &[PeriodOutput], statement_rows: &StatementRowMap, taxonomy_regime: &str, fiscal_pack: FiscalPack, warnings: Vec, ) -> Result { let pack = load_surface_pack(fiscal_pack)?; let crosswalk = load_crosswalk(taxonomy_regime)?; let mut surface_rows = empty_surface_row_map(); let mut detail_rows = empty_detail_row_map(); let mut concept_mappings = HashMap::::new(); let mut surface_row_count = 0usize; let mut detail_row_count = 0usize; let mut unmapped_row_count = 0usize; let mut material_unmapped_row_count = 0usize; for statement in statement_keys() { let rows = statement_rows.get(statement).cloned().unwrap_or_default(); let mut statement_definitions = pack .surfaces .iter() .filter(|definition| definition.statement == statement) .collect::>(); statement_definitions.sort_by(|left, right| { left.order .cmp(&right.order) .then_with(|| left.label.cmp(&right.label)) }); let mut used_row_keys = HashSet::::new(); let mut consumed_sources = ConsumedSources::default(); let mut statement_surface_rows = Vec::::new(); let mut statement_detail_rows = BTreeMap::>::new(); let mut resolved_statement_surfaces = HashMap::::new(); for definition in statement_definitions { let matches = rows .iter() .filter(|row| !used_row_keys.contains(&row.key)) .filter_map(|row| match_statement_row(row, definition, crosswalk.as_ref())) .collect::>(); let direct_surface_matches = matches .iter() .filter(|matched| matched.match_role == MatchRole::Surface) .cloned() .collect::>(); let detail_component_matches = matches .iter() .filter(|matched| matched.match_role == MatchRole::Detail) .cloned() .collect::>(); let detail_matches = if definition.detail_grouping_policy == "group_all_children" { if detail_component_matches.is_empty() && definition.rollup_policy == "aggregate_children" { Vec::new() } else { detail_component_matches.clone() } } else { Vec::new() }; let surface_resolution = resolve_surface( definition, periods, &direct_surface_matches, &detail_component_matches, &resolved_statement_surfaces, ); let Some(surface_resolution) = surface_resolution else { continue; }; for matched in &direct_surface_matches { used_row_keys.insert(matched.row.key.clone()); consumed_sources.insert_row(matched.row); concept_mappings.insert( matched.row.concept_key.clone(), MappingAssignment { authoritative_concept_key: matched.authoritative_concept_key.clone(), mapping_method: Some(matched.mapping_method), surface_key: Some(definition.surface_key.clone()), detail_parent_surface_key: None, kpi_key: None, residual_flag: false, }, ); } let details = detail_matches .iter() .map(|matched| { used_row_keys.insert(matched.row.key.clone()); consumed_sources.insert_row(matched.row); concept_mappings.insert( matched.row.concept_key.clone(), MappingAssignment { authoritative_concept_key: matched.authoritative_concept_key.clone(), mapping_method: Some(matched.mapping_method), surface_key: Some(definition.surface_key.clone()), detail_parent_surface_key: Some(definition.surface_key.clone()), kpi_key: None, residual_flag: false, }, ); build_detail_row( matched.row, &definition.surface_key, false, definition.sign_transform, ) }) .collect::>(); if !details.is_empty() && definition.include_in_output { detail_row_count += details.len(); statement_detail_rows.insert(definition.surface_key.clone(), details); } let detail_count = if definition.include_in_output { statement_detail_rows .get(&definition.surface_key) .map(|rows| rows.len() as i64) } else { None }; let row = SurfaceRowOutput { key: definition.surface_key.clone(), label: definition.label.clone(), category: definition.category.clone(), template_section: definition.category.clone(), order: definition.order, unit: definition.unit.clone(), values: surface_resolution.values, source_concepts: surface_resolution.source_concepts, source_row_keys: surface_resolution.source_row_keys, source_fact_ids: surface_resolution.source_fact_ids, formula_key: if surface_resolution.formula_used || definition.formula_fallback.is_some() { definition .formula_fallback .as_ref() .map(|_| definition.surface_key.clone()) } else { None }, has_dimensions: surface_resolution.has_dimensions, resolved_source_row_keys: surface_resolution.resolved_source_row_keys, statement: Some(definition.statement.clone()), detail_count, resolution_method: None, confidence: None, warning_codes: vec![], }; consumed_sources.extend(surface_resolution.consumed_sources.clone()); resolved_statement_surfaces.insert(definition.surface_key.clone(), row.clone()); if definition.include_in_output { statement_surface_rows.push(row); surface_row_count += 1; } let _ = &definition.materiality_policy; } statement_surface_rows.sort_by(|left, right| { left.order .cmp(&right.order) .then_with(|| left.label.cmp(&right.label)) }); let baseline = baseline_for_statement(statement, &statement_surface_rows); let threshold = materiality_threshold(statement, baseline); let residual_rows = rows .iter() .filter(|row| !used_row_keys.contains(&row.key)) .filter(|row| !consumed_sources.matches_statement_row(row)) .filter(|row| has_any_value(&row.values)) .map(|row| { concept_mappings.insert( row.concept_key.clone(), MappingAssignment { authoritative_concept_key: None, mapping_method: Some(MappingMethod::UnmappedResidual), surface_key: None, detail_parent_surface_key: Some("unmapped".to_string()), kpi_key: None, residual_flag: true, }, ); build_detail_row(row, "unmapped", true, None) }) .collect::>(); if !residual_rows.is_empty() { unmapped_row_count += residual_rows.len(); material_unmapped_row_count += residual_rows .iter() .filter(|row| max_abs_value(&row.values) >= threshold) .count(); detail_row_count += residual_rows.len(); statement_detail_rows.insert("unmapped".to_string(), residual_rows); } surface_rows.insert(statement.to_string(), statement_surface_rows); detail_rows.insert(statement.to_string(), statement_detail_rows); } Ok(CompactSurfaceModel { surface_rows, detail_rows, normalization_summary: NormalizationSummaryOutput { surface_row_count, detail_row_count, kpi_row_count: 0, unmapped_row_count, material_unmapped_row_count, warnings, }, concept_mappings, }) } fn resolve_surface( definition: &SurfaceDefinition, periods: &[PeriodOutput], direct_surface_matches: &[MatchedStatementRow<'_>], detail_component_matches: &[MatchedStatementRow<'_>], resolved_statement_surfaces: &HashMap, ) -> Option { if definition.rollup_policy == "formula_only" { return resolve_formula_surface(definition, periods, resolved_statement_surfaces) .map(|resolution| apply_sign_transform(resolution, definition.sign_transform)); } if !direct_surface_matches.is_empty() { return Some(apply_sign_transform( resolve_direct_surface(periods, direct_surface_matches), definition.sign_transform, )); } if definition.rollup_policy == "aggregate_children" && !detail_component_matches.is_empty() { return Some(apply_sign_transform( resolve_aggregate_surface(periods, detail_component_matches), definition.sign_transform, )); } if definition.rollup_policy == "aggregate_children" && definition.formula_fallback.is_some() { return resolve_formula_surface(definition, periods, resolved_statement_surfaces) .map(|resolution| apply_sign_transform(resolution, definition.sign_transform)); } if definition.rollup_policy == "direct_or_formula" { return resolve_formula_surface(definition, periods, resolved_statement_surfaces) .map(|resolution| apply_sign_transform(resolution, definition.sign_transform)); } None } fn resolve_direct_surface( periods: &[PeriodOutput], matches: &[MatchedStatementRow<'_>], ) -> SurfaceResolution { let mut values = BTreeMap::new(); let mut resolved_source_row_keys = BTreeMap::new(); for period in periods { let period_matches = matches .iter() .filter(|matched| { matched .row .values .get(&period.id) .copied() .flatten() .is_some() }) .cloned() .collect::>(); let chosen = if period_matches.is_empty() { None } else { Some(pick_best_match(&period_matches)) }; values.insert( period.id.clone(), chosen.and_then(|matched| matched.row.values.get(&period.id).copied().flatten()), ); resolved_source_row_keys.insert( period.id.clone(), chosen.map(|matched| matched.row.key.clone()), ); } let mut consumed_sources = ConsumedSources::default(); for matched in matches { consumed_sources.insert_row(matched.row); } SurfaceResolution { values, source_concepts: unique_sorted_strings( matches .iter() .map(|matched| matched.row.qname.clone()) .collect::>(), ), source_row_keys: unique_sorted_strings( matches .iter() .map(|matched| matched.row.key.clone()) .collect::>(), ), source_fact_ids: unique_sorted_i64( matches .iter() .flat_map(|matched| matched.row.source_fact_ids.clone()) .collect::>(), ), has_dimensions: matches.iter().any(|matched| matched.row.has_dimensions), resolved_source_row_keys, consumed_sources, formula_used: false, } } fn resolve_aggregate_surface( periods: &[PeriodOutput], matches: &[MatchedStatementRow<'_>], ) -> SurfaceResolution { let aggregate_matches = matches .iter() .map(|matched| { let mut aggregate = matched.clone(); aggregate.mapping_method = MappingMethod::AggregateChildren; aggregate }) .collect::>(); let mut consumed_sources = ConsumedSources::default(); for matched in &aggregate_matches { consumed_sources.insert_row(matched.row); } SurfaceResolution { values: build_surface_values(periods, &aggregate_matches), source_concepts: unique_sorted_strings( aggregate_matches .iter() .map(|matched| matched.row.qname.clone()) .collect::>(), ), source_row_keys: unique_sorted_strings( aggregate_matches .iter() .map(|matched| matched.row.key.clone()) .collect::>(), ), source_fact_ids: unique_sorted_i64( aggregate_matches .iter() .flat_map(|matched| matched.row.source_fact_ids.clone()) .collect::>(), ), has_dimensions: aggregate_matches .iter() .any(|matched| matched.row.has_dimensions), resolved_source_row_keys: periods .iter() .map(|period| (period.id.clone(), None)) .collect(), consumed_sources, formula_used: false, } } fn resolve_formula_surface( definition: &SurfaceDefinition, periods: &[PeriodOutput], resolved_statement_surfaces: &HashMap, ) -> Option { let formula = definition .formula_fallback .as_ref() .and_then(|formula| formula.structured())?; let source_rows = formula .sources .iter() .filter_map(|source_key| resolved_statement_surfaces.get(source_key)) .collect::>(); if source_rows.is_empty() { return None; } let values = periods .iter() .map(|period| { ( period.id.clone(), evaluate_formula_for_period(formula, &period.id, &source_rows), ) }) .collect::>(); if !has_any_value(&values) { return None; } let mut consumed_sources = ConsumedSources::default(); for row in &source_rows { consumed_sources.insert_surface_row(row); } Some(SurfaceResolution { values, source_concepts: unique_sorted_strings( source_rows .iter() .flat_map(|row| row.source_concepts.clone()) .collect::>(), ), source_row_keys: unique_sorted_strings( source_rows .iter() .flat_map(|row| row.source_row_keys.clone()) .collect::>(), ), source_fact_ids: unique_sorted_i64( source_rows .iter() .flat_map(|row| row.source_fact_ids.clone()) .collect::>(), ), has_dimensions: source_rows.iter().any(|row| row.has_dimensions), resolved_source_row_keys: periods .iter() .map(|period| (period.id.clone(), None)) .collect(), consumed_sources, formula_used: true, }) } fn evaluate_formula_for_period( formula: &SurfaceFormula, period_id: &str, source_rows: &[&SurfaceRowOutput], ) -> Option { let values = formula .sources .iter() .map(|source_key| { source_rows .iter() .find(|row| row.key == *source_key) .and_then(|row| row.values.get(period_id).copied().flatten()) }) .collect::>(); match formula.op { SurfaceFormulaOp::Sum => sum_formula_values(&values, formula.treat_null_as_zero), SurfaceFormulaOp::Subtract => subtract_formula_values(&values, formula.treat_null_as_zero), } } fn sum_formula_values(values: &[Option], treat_null_as_zero: bool) -> Option { if treat_null_as_zero { if values.iter().all(|value| value.is_none()) { return None; } return Some(values.iter().map(|value| value.unwrap_or(0.0)).sum()); } if values.iter().any(|value| value.is_none()) { return None; } Some(values.iter().map(|value| value.unwrap_or(0.0)).sum()) } fn subtract_formula_values(values: &[Option], treat_null_as_zero: bool) -> Option { if values.len() != 2 { return None; } let left = if treat_null_as_zero { values[0].unwrap_or(0.0) } else { values[0]? }; let right = if treat_null_as_zero { values[1].unwrap_or(0.0) } else { values[1]? }; if !treat_null_as_zero && values.iter().all(|value| value.is_none()) { return None; } Some(left - right) } pub fn merge_mapping_assignments( primary: &mut HashMap, secondary: HashMap, ) { for (concept_key, assignment) in secondary { let existing = primary.entry(concept_key).or_default(); existing.authoritative_concept_key = existing .authoritative_concept_key .clone() .or(assignment.authoritative_concept_key); if existing.mapping_method.is_none() || matches!( existing.mapping_method, Some(MappingMethod::UnmappedResidual) ) { existing.mapping_method = assignment.mapping_method; } if existing.surface_key.is_none() { existing.surface_key = assignment.surface_key; } if existing.detail_parent_surface_key.is_none() { existing.detail_parent_surface_key = assignment.detail_parent_surface_key; } if existing.kpi_key.is_none() { existing.kpi_key = assignment.kpi_key; } existing.residual_flag = existing.residual_flag && assignment.residual_flag; } } pub fn apply_mapping_assignments( concepts: &mut [ConceptOutput], facts: &mut [FactOutput], mappings: &HashMap, ) { for concept in concepts { if let Some(mapping) = mappings.get(&concept.concept_key) { concept.authoritative_concept_key = mapping.authoritative_concept_key.clone(); concept.mapping_method = mapping .mapping_method .map(|method| method.as_str().to_string()); concept.surface_key = mapping.surface_key.clone(); concept.detail_parent_surface_key = mapping.detail_parent_surface_key.clone(); concept.kpi_key = mapping.kpi_key.clone(); concept.residual_flag = mapping.residual_flag; } } for fact in facts { if let Some(mapping) = mappings.get(&fact.concept_key) { fact.authoritative_concept_key = mapping.authoritative_concept_key.clone(); fact.mapping_method = mapping .mapping_method .map(|method| method.as_str().to_string()); fact.surface_key = mapping.surface_key.clone(); fact.detail_parent_surface_key = mapping.detail_parent_surface_key.clone(); fact.kpi_key = mapping.kpi_key.clone(); fact.residual_flag = mapping.residual_flag; } } } fn match_statement_row<'a>( row: &'a StatementRowOutput, definition: &SurfaceDefinition, crosswalk: Option<&CrosswalkFile>, ) -> Option> { let authoritative_mapping = crosswalk.and_then(|crosswalk| crosswalk.mappings.get(&row.qname)); let authoritative_concept_key = authoritative_mapping .map(|mapping| mapping.authoritative_concept_key.clone()) .or_else(|| { if !row.is_extension { Some(row.qname.clone()) } else { None } }); let matches_authoritative = authoritative_concept_key.as_ref().map_or(false, |concept| { definition .allowed_authoritative_concepts .iter() .any(|candidate| candidate_matches(candidate, concept)) }) || authoritative_mapping .map(|mapping| mapping.surface_key == definition.surface_key) .unwrap_or(false); if matches_authoritative { return Some(MatchedStatementRow { row, authoritative_concept_key, mapping_method: MappingMethod::AuthoritativeDirect, match_role: MatchRole::Surface, rank: 0, }); } let matches_source = definition.allowed_source_concepts.iter().any(|candidate| { candidate_matches(candidate, &row.qname) || candidate_matches(candidate, &row.local_name) }); if matches_source { return Some(MatchedStatementRow { row, authoritative_concept_key, mapping_method: MappingMethod::DirectSourceConcept, match_role: if definition.rollup_policy == "aggregate_children" { MatchRole::Detail } else { MatchRole::Surface }, rank: 1, }); } None } fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> { matches .iter() .min_by(|left, right| { left.rank .cmp(&right.rank) .then_with(|| { let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 }; let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 }; left_dimension_rank.cmp(&right_dimension_rank) }) .then_with(|| left.row.order.cmp(&right.row.order)) .then_with(|| { max_abs_value(&right.row.values) .partial_cmp(&max_abs_value(&left.row.values)) .unwrap_or(std::cmp::Ordering::Equal) }) .then_with(|| left.row.label.cmp(&right.row.label)) }) .expect("pick_best_match requires at least one match") } fn build_surface_values( periods: &[PeriodOutput], matches: &[MatchedStatementRow<'_>], ) -> BTreeMap> { periods .iter() .map(|period| { let value = if matches.len() == 1 { matches .first() .and_then(|matched| matched.row.values.get(&period.id).copied()) .flatten() } else { sum_nullable_values( matches .iter() .map(|matched| matched.row.values.get(&period.id).copied().flatten()) .collect::>(), ) }; (period.id.clone(), value) }) .collect() } fn sum_nullable_values(values: Vec>) -> Option { if values.iter().all(|value| value.is_none()) { return None; } Some(values.into_iter().map(|value| value.unwrap_or(0.0)).sum()) } fn build_detail_row( row: &StatementRowOutput, parent_surface_key: &str, residual_flag: bool, sign_transform: Option, ) -> DetailRowOutput { DetailRowOutput { key: row.key.clone(), parent_surface_key: parent_surface_key.to_string(), label: row.label.clone(), concept_key: row.concept_key.clone(), qname: row.qname.clone(), namespace_uri: row.namespace_uri.clone(), local_name: row.local_name.clone(), unit: row.units.values().find_map(|value| value.clone()), values: transform_values(&row.values, sign_transform), source_fact_ids: row.source_fact_ids.clone(), is_extension: row.is_extension, dimensions_summary: if row.has_dimensions { vec!["has_dimensions".to_string()] } else { vec![] }, residual_flag, } } fn apply_sign_transform( mut resolution: SurfaceResolution, sign_transform: Option, ) -> SurfaceResolution { resolution.values = transform_values(&resolution.values, sign_transform); resolution } fn transform_values( values: &BTreeMap>, sign_transform: Option, ) -> BTreeMap> { values .iter() .map(|(period_id, value)| { ( period_id.clone(), match sign_transform { Some(SurfaceSignTransform::Invert) => value.map(|amount| -amount), None => *value, }, ) }) .collect() } fn has_any_value(values: &BTreeMap>) -> bool { values.values().any(|value| value.is_some()) } fn max_abs_value(values: &BTreeMap>) -> f64 { values .values() .fold(0.0_f64, |max, value| max.max(value.unwrap_or(0.0).abs())) } fn baseline_for_statement(statement: &str, surface_rows: &[SurfaceRowOutput]) -> f64 { let anchor_key = match statement { "balance" => "total_assets", "cash_flow" => "operating_cash_flow", _ => "revenue", }; surface_rows .iter() .find(|row| row.key == anchor_key) .map(|row| max_abs_value(&row.values)) .unwrap_or(0.0) } fn materiality_threshold(statement: &str, baseline: f64) -> f64 { if statement == "balance" { return (baseline * 0.005).max(5_000_000.0); } (baseline * 0.01).max(1_000_000.0) } fn unique_sorted_strings(values: Vec) -> Vec { let mut values = values .into_iter() .collect::>() .into_iter() .collect::>(); values.sort(); values } fn unique_sorted_i64(values: Vec) -> Vec { let mut values = values .into_iter() .collect::>() .into_iter() .collect::>(); values.sort(); values } fn candidate_matches(candidate: &str, actual: &str) -> bool { candidate.eq_ignore_ascii_case(actual) || candidate .rsplit_once(':') .map(|(_, local_name)| local_name.eq_ignore_ascii_case(actual)) .unwrap_or(false) || actual .rsplit_once(':') .map(|(_, local_name)| local_name.eq_ignore_ascii_case(candidate)) .unwrap_or(false) } fn statement_keys() -> [&'static str; 5] { [ "income", "balance", "cash_flow", "equity", "comprehensive_income", ] } fn empty_surface_row_map() -> SurfaceRowMap { statement_keys() .into_iter() .map(|key| (key.to_string(), Vec::new())) .collect() } fn empty_detail_row_map() -> DetailRowStatementMap { statement_keys() .into_iter() .map(|key| (key.to_string(), BTreeMap::new())) .collect() } #[cfg(test)] mod tests { use super::*; use crate::pack_selector::FiscalPack; use crate::{PeriodOutput, StatementRowOutput}; fn period(id: &str) -> PeriodOutput { PeriodOutput { id: id.to_string(), filing_id: 1, accession_number: "0000000000-00-000001".to_string(), filing_date: "2025-12-31".to_string(), period_start: Some("2025-01-01".to_string()), period_end: Some("2025-12-31".to_string()), filing_type: "10-K".to_string(), period_label: id.to_string(), } } fn row(key: &str, qname: &str, statement: &str, value: f64) -> StatementRowOutput { row_with_values( key, qname, statement, BTreeMap::from([("p1".to_string(), Some(value))]), ) } fn row_with_values( key: &str, qname: &str, statement: &str, values: BTreeMap>, ) -> StatementRowOutput { StatementRowOutput { key: key.to_string(), label: key.to_string(), concept_key: format!( "http://fasb.org/us-gaap/2024#{}", qname.split(':').nth(1).unwrap_or(key) ), qname: qname.to_string(), namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), local_name: qname.split(':').nth(1).unwrap_or(key).to_string(), is_extension: false, statement: statement.to_string(), role_uri: Some(statement.to_string()), order: 1, depth: 0, parent_key: None, units: values .keys() .map(|period_id| (period_id.clone(), Some("iso4217:USD".to_string()))) .collect(), values, has_dimensions: false, source_fact_ids: vec![1], } } fn empty_map() -> StatementRowMap { BTreeMap::from([ ("income".to_string(), Vec::new()), ("balance".to_string(), Vec::new()), ("cash_flow".to_string(), Vec::new()), ("equity".to_string(), Vec::new()), ("comprehensive_income".to_string(), Vec::new()), ]) } #[test] fn prefers_direct_authoritative_row_over_child_aggregation() { let mut rows = empty_map(); rows.get_mut("income").unwrap().extend([ row("op-expenses", "us-gaap:OperatingExpenses", "income", 40.0), row( "sga", "us-gaap:SellingGeneralAndAdministrativeExpense", "income", 25.0, ), row( "rd", "us-gaap:ResearchAndDevelopmentExpense", "income", 15.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Core, vec![], ) .expect("compact model should build"); let op_expenses = model .surface_rows .get("income") .unwrap() .iter() .find(|row| row.key == "operating_expenses") .unwrap(); assert_eq!(op_expenses.values.get("p1").copied().flatten(), Some(40.0)); assert_eq!(op_expenses.detail_count, Some(2)); } #[test] fn emits_unmapped_residual_rows() { let mut rows = empty_map(); rows.get_mut("income") .unwrap() .push(row("custom", "company:CustomMetric", "income", 12.0)); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Core, vec![], ) .expect("compact model should build"); let residual_rows = model .detail_rows .get("income") .unwrap() .get("unmapped") .unwrap(); assert_eq!(residual_rows.len(), 1); assert!(residual_rows[0].residual_flag); } #[test] fn flattens_balance_aliases_and_prunes_balance_unmapped_rows() { let mut rows = empty_map(); rows.get_mut("balance").unwrap().extend([ row( "receivable-primary", "us-gaap:AccountsReceivableNetCurrent", "balance", 25.0, ), row( "receivable-alias", "us-gaap:ReceivablesNetCurrent", "balance", 25.0, ), row( "custom-balance", "company:CustomBalanceMetric", "balance", 9.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Core, vec![], ) .expect("compact model should build"); let receivables = model .surface_rows .get("balance") .unwrap() .iter() .find(|row| row.key == "accounts_receivable") .unwrap(); assert_eq!(receivables.values.get("p1").copied().flatten(), Some(25.0)); assert_eq!( receivables.source_row_keys, vec![ "receivable-alias".to_string(), "receivable-primary".to_string() ] ); assert_eq!( receivables .resolved_source_row_keys .get("p1") .cloned() .flatten(), Some("receivable-alias".to_string()) ); let unmapped = model .detail_rows .get("balance") .and_then(|groups| groups.get("unmapped")) .cloned() .unwrap_or_default(); assert_eq!(unmapped.len(), 1); assert_eq!(unmapped[0].key, "custom-balance"); } #[test] fn merges_period_sparse_balance_aliases_into_one_row() { let mut rows = empty_map(); rows.get_mut("balance").unwrap().extend([ row_with_values( "receivable-p1", "us-gaap:AccountsReceivableNetCurrent", "balance", BTreeMap::from([("p1".to_string(), Some(10.0)), ("p2".to_string(), None)]), ), row_with_values( "receivable-p2", "us-gaap:ReceivablesNetCurrent", "balance", BTreeMap::from([("p1".to_string(), None), ("p2".to_string(), Some(18.0))]), ), ]); let periods = vec![ period("p1"), PeriodOutput { id: "p2".to_string(), filing_id: 2, accession_number: "0000000000-00-000002".to_string(), filing_date: "2026-12-31".to_string(), period_start: Some("2026-01-01".to_string()), period_end: Some("2026-12-31".to_string()), filing_type: "10-K".to_string(), period_label: "p2".to_string(), }, ]; let model = build_compact_surface_model(&periods, &rows, "us-gaap", FiscalPack::Core, vec![]) .expect("compact model should build"); let receivables = model .surface_rows .get("balance") .unwrap() .iter() .find(|row| row.key == "accounts_receivable") .unwrap(); assert_eq!(receivables.values.get("p1").copied().flatten(), Some(10.0)); assert_eq!(receivables.values.get("p2").copied().flatten(), Some(18.0)); assert_eq!( receivables .resolved_source_row_keys .get("p1") .cloned() .flatten(), Some("receivable-p1".to_string()) ); assert_eq!( receivables .resolved_source_row_keys .get("p2") .cloned() .flatten(), Some("receivable-p2".to_string()) ); } #[test] fn derives_balance_formula_rows_and_hides_helper_surfaces() { let mut rows = empty_map(); rows.get_mut("balance").unwrap().extend([ row( "cash", "us-gaap:CashAndCashEquivalentsAtCarryingValue", "balance", 100.0, ), row( "marketable", "us-gaap:ShortTermInvestments", "balance", 40.0, ), row("ap", "us-gaap:AccountsPayableCurrent", "balance", 30.0), row( "deferred-current", "us-gaap:DeferredRevenueCurrent", "balance", 15.0, ), row( "deferred-noncurrent", "us-gaap:DeferredRevenueNoncurrent", "balance", 5.0, ), row("short-debt", "us-gaap:ShortTermBorrowings", "balance", 10.0), row( "long-debt", "us-gaap:LongTermDebtNoncurrent", "balance", 50.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Core, vec![], ) .expect("compact model should build"); let balance_rows = model.surface_rows.get("balance").unwrap(); let total_cash = balance_rows .iter() .find(|row| row.key == "total_cash_and_equivalents") .unwrap(); let unearned_revenue = balance_rows .iter() .find(|row| row.key == "unearned_revenue") .unwrap(); let total_debt = balance_rows .iter() .find(|row| row.key == "total_debt") .unwrap(); let net_cash = balance_rows .iter() .find(|row| row.key == "net_cash_position") .unwrap(); assert_eq!(total_cash.values.get("p1").copied().flatten(), Some(140.0)); assert_eq!( unearned_revenue.values.get("p1").copied().flatten(), Some(20.0) ); assert_eq!(total_debt.values.get("p1").copied().flatten(), Some(60.0)); assert_eq!(net_cash.values.get("p1").copied().flatten(), Some(80.0)); assert!(balance_rows .iter() .all(|row| row.key != "deferred_revenue_current" && row.key != "deferred_revenue_noncurrent")); assert!(model .detail_rows .get("balance") .unwrap() .get("deferred_revenue_current") .is_none()); } #[test] fn merges_core_balance_rows_into_sector_packs() { let mut rows = empty_map(); rows.get_mut("balance").unwrap().extend([ row( "cash", "us-gaap:CashAndCashEquivalentsAtCarryingValue", "balance", 20.0, ), row( "loans", "us-gaap:LoansReceivableNetReportedAmount", "balance", 80.0, ), row("deposits", "us-gaap:DepositsLiabilities", "balance", 70.0), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::BankLender, vec![], ) .expect("compact model should build"); let balance_rows = model.surface_rows.get("balance").unwrap(); let cash = balance_rows .iter() .find(|row| row.key == "cash_and_equivalents") .unwrap(); let loans = balance_rows.iter().find(|row| row.key == "loans").unwrap(); let deposits = balance_rows .iter() .find(|row| row.key == "deposits") .unwrap(); assert_eq!(cash.category, "current_assets"); assert_eq!(loans.category, "noncurrent_assets"); assert_eq!(deposits.category, "current_liabilities"); } #[test] fn maps_insurance_deferred_acquisition_costs_and_prunes_unmapped_rows() { let mut rows = empty_map(); rows.get_mut("balance").unwrap().extend([ row( "dac-voba", "us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired", "balance", 2106.0, ), row( "custom-balance", "company:CustomInsuranceBalanceMetric", "balance", 12.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Insurance, vec![], ) .expect("compact model should build"); let dac = model .surface_rows .get("balance") .unwrap() .iter() .find(|row| row.key == "deferred_acquisition_costs") .unwrap(); assert_eq!(dac.category, "noncurrent_assets"); assert_eq!(dac.values.get("p1").copied().flatten(), Some(2106.0)); assert_eq!( dac.source_concepts, vec!["us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired".to_string()] ); assert_eq!(dac.source_row_keys, vec!["dac-voba".to_string()]); let unmapped = model .detail_rows .get("balance") .and_then(|groups| groups.get("unmapped")) .cloned() .unwrap_or_default(); assert_eq!(unmapped.len(), 1); assert_eq!(unmapped[0].key, "custom-balance"); } #[test] fn derives_cash_flow_rows_applies_signs_and_hides_helper_surfaces() { let mut rows = empty_map(); rows.get_mut("cash_flow").unwrap().extend([ row( "cfo", "us-gaap:NetCashProvidedByUsedInOperatingActivities", "cash_flow", 120.0, ), row( "capex", "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", "cash_flow", 35.0, ), row("debt-repaid", "us-gaap:RepaymentsOfDebt", "cash_flow", 14.0), row( "share-repurchase", "us-gaap:PaymentsForRepurchaseOfCommonStock", "cash_flow", 11.0, ), row( "contract-incurred", "us-gaap:ContractWithCustomerLiabilityIncurred", "cash_flow", 40.0, ), row( "contract-recognized", "us-gaap:ContractWithCustomerLiabilityRevenueRecognized", "cash_flow", 15.0, ), row( "other-current-assets", "us-gaap:IncreaseDecreaseInOtherCurrentAssets", "cash_flow", 6.0, ), row( "other-current-liabilities", "us-gaap:IncreaseDecreaseInOtherCurrentLiabilities", "cash_flow", 4.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Core, vec![], ) .expect("compact model should build"); let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap(); let capex = cash_flow_rows .iter() .find(|row| row.key == "capital_expenditures") .unwrap(); let debt_repaid = cash_flow_rows .iter() .find(|row| row.key == "debt_repaid") .unwrap(); let repurchases = cash_flow_rows .iter() .find(|row| row.key == "share_repurchases") .unwrap(); let changes_unearned = cash_flow_rows .iter() .find(|row| row.key == "changes_unearned_revenue") .unwrap(); let changes_other = cash_flow_rows .iter() .find(|row| row.key == "changes_other_operating_activities") .unwrap(); let free_cash_flow = cash_flow_rows .iter() .find(|row| row.key == "free_cash_flow") .unwrap(); assert_eq!(capex.values.get("p1").copied().flatten(), Some(-35.0)); assert_eq!(debt_repaid.values.get("p1").copied().flatten(), Some(-14.0)); assert_eq!(repurchases.values.get("p1").copied().flatten(), Some(-11.0)); assert_eq!( changes_unearned.values.get("p1").copied().flatten(), Some(25.0) ); assert_eq!( changes_other.values.get("p1").copied().flatten(), Some(-10.0) ); assert_eq!( free_cash_flow.values.get("p1").copied().flatten(), Some(85.0) ); assert!(cash_flow_rows.iter().all(|row| { row.key != "contract_liability_incurred" && row.key != "contract_liability_recognized" && row.key != "changes_other_current_assets" && row.key != "changes_other_current_liabilities" })); } #[test] fn prunes_consumed_cash_flow_rows_from_unmapped() { let mut rows = empty_map(); rows.get_mut("cash_flow").unwrap().extend([ row( "capex-primary", "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", "cash_flow", 20.0, ), row( "capex-alias", "us-gaap:CapitalExpendituresIncurredButNotYetPaid", "cash_flow", 20.0, ), row( "custom-cash-flow", "company:CustomCashFlowMetric", "cash_flow", 8.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Core, vec![], ) .expect("compact model should build"); let capex = model .surface_rows .get("cash_flow") .unwrap() .iter() .find(|row| row.key == "capital_expenditures") .unwrap(); assert_eq!(capex.values.get("p1").copied().flatten(), Some(-20.0)); let unmapped = model .detail_rows .get("cash_flow") .and_then(|groups| groups.get("unmapped")) .cloned() .unwrap_or_default(); assert_eq!(unmapped.len(), 1); assert_eq!(unmapped[0].key, "custom-cash-flow"); } #[test] fn merges_core_cash_flow_rows_into_sector_packs() { let mut rows = empty_map(); rows.get_mut("cash_flow").unwrap().extend([ row( "cfo", "us-gaap:NetCashProvidedByUsedInOperatingActivities", "cash_flow", 90.0, ), row( "capex", "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", "cash_flow", 25.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::Insurance, vec![], ) .expect("compact model should build"); let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap(); let cfo = cash_flow_rows .iter() .find(|row| row.key == "operating_cash_flow") .unwrap(); let capex = cash_flow_rows .iter() .find(|row| row.key == "capital_expenditures") .unwrap(); let free_cash_flow = cash_flow_rows .iter() .find(|row| row.key == "free_cash_flow") .unwrap(); assert_eq!(cfo.category, "operating"); assert_eq!(capex.category, "investing"); assert_eq!(capex.values.get("p1").copied().flatten(), Some(-25.0)); assert_eq!( free_cash_flow.values.get("p1").copied().flatten(), Some(65.0) ); } #[test] fn reit_cash_flow_override_keeps_capex_separate_from_acquisitions() { let mut rows = empty_map(); rows.get_mut("cash_flow").unwrap().extend([ row( "reit-capex", "us-gaap:PaymentsToAcquireCommercialRealEstate", "cash_flow", 300.0, ), row( "capital-improvements", "us-gaap:PaymentsForCapitalImprovements", "cash_flow", 20.0, ), row( "reit-acquisition", "us-gaap:PaymentsToAcquireInterestInSubsidiariesAndAffiliates", "cash_flow", 15.0, ), ]); let model = build_compact_surface_model( &[period("p1")], &rows, "us-gaap", FiscalPack::ReitRealEstate, vec![], ) .expect("compact model should build"); let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap(); let capex = cash_flow_rows .iter() .find(|row| row.key == "capital_expenditures") .unwrap(); let acquisitions = cash_flow_rows .iter() .find(|row| row.key == "acquisitions") .unwrap(); assert_eq!(capex.values.get("p1").copied().flatten(), Some(-320.0)); assert_eq!( acquisitions.values.get("p1").copied().flatten(), Some(-15.0) ); } }