From 249131ec00c810269c9356fa8e301e1b6459d938 Mon Sep 17 00:00:00 2001 From: francy51 Date: Sat, 21 Mar 2026 01:35:49 -0400 Subject: [PATCH] Fix MSFT expense normalization fallback --- rust/fiscal-xbrl-core/src/universal_income.rs | 271 +++++++++++++++++- .../fiscal/v1/core.income-bridge.json | 1 + 2 files changed, 260 insertions(+), 12 deletions(-) diff --git a/rust/fiscal-xbrl-core/src/universal_income.rs b/rust/fiscal-xbrl-core/src/universal_income.rs index f0effe0..7fcc167 100644 --- a/rust/fiscal-xbrl-core/src/universal_income.rs +++ b/rust/fiscal-xbrl-core/src/universal_income.rs @@ -352,6 +352,7 @@ fn resolve_universal_row( bridge_row, periods, income_statement_rows, + facts, income_surface_rows, crosswalk, ); @@ -391,6 +392,7 @@ fn resolve_universal_row( bridge_row, periods, income_statement_rows, + facts, income_surface_rows, crosswalk, ) @@ -401,6 +403,7 @@ fn build_formula_row( bridge_row: &IncomeBridgeRow, periods: &[PeriodOutput], income_statement_rows: &[StatementRowOutput], + facts: &[FactOutput], income_surface_rows: &[SurfaceRowOutput], crosswalk: Option<&CrosswalkFile>, ) -> ResolvedUniversalRow { @@ -432,12 +435,16 @@ fn build_formula_row( .collect::>(); let (positive_group_sources, positive_group_rows) = collect_group_sources( + periods, income_statement_rows, + facts, &bridge_row.component_concept_groups.positive, crosswalk, ); let (negative_group_sources, negative_group_rows) = collect_group_sources( + periods, income_statement_rows, + facts, &bridge_row.component_concept_groups.negative, crosswalk, ); @@ -728,10 +735,8 @@ fn build_residual_other_operating_expense_row( .sum::(); let residual = operating_value - sga_value - component_total; - let period_value = if residual.abs() <= RESIDUAL_TOLERANCE || residual <= 0.0 { - if residual.abs() <= RESIDUAL_TOLERANCE { - suppressed_zero_residual = true; - } + let period_value = if residual.abs() <= RESIDUAL_TOLERANCE { + suppressed_zero_residual = true; None } else { used_sources.push(surface_source(operating_expenses)); @@ -1097,7 +1102,9 @@ fn null_surface_row( } fn collect_group_sources<'a>( + periods: &[PeriodOutput], income_statement_rows: &'a [StatementRowOutput], + facts: &'a [FactOutput], groups: &[IncomeBridgeConceptGroup], crosswalk: Option<&CrosswalkFile>, ) -> (Vec, Vec<&'a StatementRowOutput>) { @@ -1119,15 +1126,109 @@ fn collect_group_sources<'a>( }) .collect::>(); + let has_statement_values = matches.iter().any(|row| has_any_value(&row.values)); + for row in matches { sources.push(statement_row_source(row)); rows.push(row); } + + if has_statement_values { + continue; + } + + if let Some(source) = collect_group_fact_source(periods, facts, group, crosswalk) { + sources.push(source); + } } (sources, rows) } +fn collect_group_fact_source( + periods: &[PeriodOutput], + facts: &[FactOutput], + group: &IncomeBridgeConceptGroup, + crosswalk: Option<&CrosswalkFile>, +) -> Option { + let matches = facts + .iter() + .filter(|fact| { + fact.statement_kind.as_deref() == Some("income") || fact.statement_kind.is_none() + }) + .filter(|fact| { + group.concepts.iter().any(|candidate| { + candidate_matches(candidate, &fact.qname) + || candidate_matches(candidate, &fact.local_name) + || fact_authoritative_concept_key(fact, crosswalk) + .map(|concept| candidate_matches(candidate, &concept)) + .unwrap_or(false) + }) + }) + .collect::>(); + + if matches.is_empty() { + return None; + } + + let mut values = BTreeMap::>::new(); + let mut matched_facts = Vec::<&FactOutput>::new(); + + for period in periods { + let period_matches = matches + .iter() + .copied() + .filter(|fact| fact_matches_period(fact, period)) + .collect::>(); + let mut canonical_matches = BTreeMap::::new(); + + for fact in period_matches { + let canonical_key = fact_authoritative_concept_key(fact, crosswalk) + .unwrap_or_else(|| fact.qname.clone()); + canonical_matches + .entry(canonical_key) + .and_modify(|current| { + if compare_fact_matches_raw(fact, current).is_lt() { + *current = fact; + } + }) + .or_insert(fact); + } + + let period_facts = canonical_matches.values().copied().collect::>(); + let period_value = if period_facts.is_empty() { + None + } else { + Some(period_facts.iter().map(|fact| fact.value_num).sum()) + }; + + values.insert(period.id.clone(), period_value); + matched_facts.extend(period_facts); + } + + if !has_any_value(&values) { + return None; + } + + Some(ValueSource { + values, + source_concepts: unique_sorted_strings( + matched_facts + .iter() + .map(|fact| fact.qname.clone()) + .collect::>(), + ), + source_row_keys: unique_sorted_strings( + matched_facts + .iter() + .map(|fact| fact.concept_key.clone()) + .collect::>(), + ), + source_fact_ids: vec![], + has_dimensions: matched_facts.iter().any(|fact| !fact.is_dimensionless), + }) +} + fn find_surface_row<'a>( income_surface_rows: &'a [SurfaceRowOutput], key: &str, @@ -1324,7 +1425,9 @@ fn match_direct_facts<'a>( ) -> Option>> { let income_facts = facts .iter() - .filter(|fact| fact.statement_kind.as_deref() == Some("income")) + .filter(|fact| { + fact.statement_kind.as_deref() == Some("income") || fact.statement_kind.is_none() + }) .collect::>(); let mut matches = BTreeMap::>::new(); @@ -1448,22 +1551,37 @@ fn compare_statement_matches( } fn compare_fact_matches(left: &MatchedFact<'_>, right: &MatchedFact<'_>) -> std::cmp::Ordering { - left.rank - .cmp(&right.rank) + compare_fact_matches_raw(left.fact, right.fact) + .then_with(|| left.rank.cmp(&right.rank)) + .then_with(|| left.fact.local_name.cmp(&right.fact.local_name)) +} + +fn compare_fact_matches_raw(left: &FactOutput, right: &FactOutput) -> std::cmp::Ordering { + let left_statement_rank = if left.statement_kind.as_deref() == Some("income") { + 0 + } else { + 1 + }; + let right_statement_rank = if right.statement_kind.as_deref() == Some("income") { + 0 + } else { + 1 + }; + + left_statement_rank + .cmp(&right_statement_rank) .then_with(|| { - let left_dimension_rank = if left.fact.is_dimensionless { 0 } else { 1 }; - let right_dimension_rank = if right.fact.is_dimensionless { 0 } else { 1 }; + let left_dimension_rank = if left.is_dimensionless { 0 } else { 1 }; + let right_dimension_rank = if right.is_dimensionless { 0 } else { 1 }; left_dimension_rank.cmp(&right_dimension_rank) }) .then_with(|| { right - .fact .value_num .abs() - .partial_cmp(&left.fact.value_num.abs()) + .partial_cmp(&left.value_num.abs()) .unwrap_or(std::cmp::Ordering::Equal) }) - .then_with(|| left.fact.local_name.cmp(&right.fact.local_name)) } fn statement_row_source(row: &StatementRowOutput) -> ValueSource { @@ -1893,6 +2011,16 @@ mod tests { } } + fn fact_with_statement_kind( + local_name: &str, + value: f64, + statement_kind: Option<&str>, + ) -> FactOutput { + let mut fact = fact(local_name, value); + fact.statement_kind = statement_kind.map(|value| value.to_string()); + fact + } + #[test] fn derives_gross_profit_from_revenue_minus_cost_of_revenue() { let mut rows = empty_rows(); @@ -2047,6 +2175,93 @@ mod tests { assert_eq!(detail_rows.len(), 2); } + #[test] + fn derives_sga_from_matching_facts_when_statement_rows_are_empty() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().extend([ + row_with_values( + "sales-and-marketing-empty", + "unknown:SellingAndMarketingExpense", + BTreeMap::new(), + ), + row_with_values( + "g-and-a-empty", + "unknown:GeneralAndAdministrativeExpense", + BTreeMap::new(), + ), + ]); + let facts = vec![ + fact_with_statement_kind("SellingAndMarketingExpense", 30.0, None), + fact_with_statement_kind("GeneralAndAdministrativeExpense", 10.0, None), + ]; + let mut model = empty_model(); + + apply_universal_income_rows( + &[period("p1")], + &rows, + &facts, + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); + + let sga = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "selling_general_and_administrative") + .unwrap(); + + assert_eq!(sga.values.get("p1").copied().flatten(), Some(40.0)); + assert_eq!(sga.resolution_method.as_deref(), Some("formula_derived")); + assert!(sga + .source_concepts + .contains(&"us-gaap:GeneralAndAdministrativeExpense".to_string())); + assert!(sga + .source_concepts + .contains(&"us-gaap:SellingAndMarketingExpense".to_string())); + } + + #[test] + fn derives_operating_expenses_from_selling_and_marketing_alias_facts() { + let rows = empty_rows(); + let facts = vec![ + fact_with_statement_kind("SellingAndMarketingExpense", 30.0, None), + fact_with_statement_kind("GeneralAndAdministrativeExpense", 10.0, None), + fact_with_statement_kind("ResearchAndDevelopmentExpense", 40.0, Some("income")), + ]; + let mut model = empty_model(); + + apply_universal_income_rows( + &[period("p1")], + &rows, + &facts, + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); + + let operating_expenses = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "operating_expenses") + .unwrap(); + + assert_eq!( + operating_expenses.values.get("p1").copied().flatten(), + Some(80.0) + ); + assert_eq!( + operating_expenses.resolution_method.as_deref(), + Some("formula_derived") + ); + } + #[test] fn prefers_direct_sga_over_residual_inference() { let mut rows = empty_rows(); @@ -2131,6 +2346,38 @@ mod tests { .contains(&"other_operating_expense_suppressed_zero_residual".to_string())); } + #[test] + fn keeps_negative_other_operating_expense_residuals() { + let rows = empty_rows(); + let mut model = empty_model(); + model.surface_rows.get_mut("income").unwrap().extend([ + surface_row("operating_expenses", 100.0), + surface_row("selling_general_and_administrative", 70.0), + surface_row("research_and_development", 40.0), + ]); + + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); + + let other = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "other_operating_expense") + .unwrap(); + + assert_eq!(other.values.get("p1").copied().flatten(), Some(-10.0)); + assert_eq!(other.resolution_method.as_deref(), Some("formula_derived")); + } + #[test] fn derives_sga_from_operating_expenses_before_other_operating_expense_residual() { let rows = empty_rows(); diff --git a/rust/taxonomy/fiscal/v1/core.income-bridge.json b/rust/taxonomy/fiscal/v1/core.income-bridge.json index f2a1500..fb1004b 100644 --- a/rust/taxonomy/fiscal/v1/core.income-bridge.json +++ b/rust/taxonomy/fiscal/v1/core.income-bridge.json @@ -95,6 +95,7 @@ "concepts": [ "us-gaap:SellingGeneralAndAdministrativeExpense", "us-gaap:ResearchAndDevelopmentExpense", + "us-gaap:SellingAndMarketingExpense", "us-gaap:MarketingExpense", "us-gaap:GeneralAndAdministrativeExpense", "us-gaap:LaborAndRelatedExpense",