Fix MSFT expense normalization fallback

2026-03-21 01:35:49 -04:00
parent 3e09e38dfa
commit 249131ec00
2 changed files with 260 additions and 12 deletions
--- a/rust/fiscal-xbrl-core/src/universal_income.rs
+++ b/rust/fiscal-xbrl-core/src/universal_income.rs
@@ -352,6 +352,7 @@ fn resolve_universal_row(
            bridge_row,
            periods,
            income_statement_rows,
+            facts,
            income_surface_rows,
            crosswalk,
        );
@@ -391,6 +392,7 @@ fn resolve_universal_row(
        bridge_row,
        periods,
        income_statement_rows,
+        facts,
        income_surface_rows,
        crosswalk,
    )
@@ -401,6 +403,7 @@ fn build_formula_row(
    bridge_row: &IncomeBridgeRow,
    periods: &[PeriodOutput],
    income_statement_rows: &[StatementRowOutput],
+    facts: &[FactOutput],
    income_surface_rows: &[SurfaceRowOutput],
    crosswalk: Option<&CrosswalkFile>,
 ) -> ResolvedUniversalRow {
@@ -432,12 +435,16 @@ fn build_formula_row(
        .collect::<Vec<_>>();

    let (positive_group_sources, positive_group_rows) = collect_group_sources(
+        periods,
        income_statement_rows,
+        facts,
        &bridge_row.component_concept_groups.positive,
        crosswalk,
    );
    let (negative_group_sources, negative_group_rows) = collect_group_sources(
+        periods,
        income_statement_rows,
+        facts,
        &bridge_row.component_concept_groups.negative,
        crosswalk,
    );
@@ -728,10 +735,8 @@ fn build_residual_other_operating_expense_row(
            .sum::<f64>();
        let residual = operating_value - sga_value - component_total;

-        let period_value = if residual.abs() <= RESIDUAL_TOLERANCE || residual <= 0.0 {
-            if residual.abs() <= RESIDUAL_TOLERANCE {
-                suppressed_zero_residual = true;
-            }
+        let period_value = if residual.abs() <= RESIDUAL_TOLERANCE {
+            suppressed_zero_residual = true;
            None
        } else {
            used_sources.push(surface_source(operating_expenses));
@@ -1097,7 +1102,9 @@ fn null_surface_row(
 }

 fn collect_group_sources<'a>(
+    periods: &[PeriodOutput],
    income_statement_rows: &'a [StatementRowOutput],
+    facts: &'a [FactOutput],
    groups: &[IncomeBridgeConceptGroup],
    crosswalk: Option<&CrosswalkFile>,
 ) -> (Vec<ValueSource>, Vec<&'a StatementRowOutput>) {
@@ -1119,15 +1126,109 @@ fn collect_group_sources<'a>(
            })
            .collect::<Vec<_>>();

+        let has_statement_values = matches.iter().any(|row| has_any_value(&row.values));
+
        for row in matches {
            sources.push(statement_row_source(row));
            rows.push(row);
        }
+
+        if has_statement_values {
+            continue;
+        }
+
+        if let Some(source) = collect_group_fact_source(periods, facts, group, crosswalk) {
+            sources.push(source);
+        }
    }

    (sources, rows)
 }

+fn collect_group_fact_source(
+    periods: &[PeriodOutput],
+    facts: &[FactOutput],
+    group: &IncomeBridgeConceptGroup,
+    crosswalk: Option<&CrosswalkFile>,
+) -> Option<ValueSource> {
+    let matches = facts
+        .iter()
+        .filter(|fact| {
+            fact.statement_kind.as_deref() == Some("income") || fact.statement_kind.is_none()
+        })
+        .filter(|fact| {
+            group.concepts.iter().any(|candidate| {
+                candidate_matches(candidate, &fact.qname)
+                    || candidate_matches(candidate, &fact.local_name)
+                    || fact_authoritative_concept_key(fact, crosswalk)
+                        .map(|concept| candidate_matches(candidate, &concept))
+                        .unwrap_or(false)
+            })
+        })
+        .collect::<Vec<_>>();
+
+    if matches.is_empty() {
+        return None;
+    }
+
+    let mut values = BTreeMap::<String, Option<f64>>::new();
+    let mut matched_facts = Vec::<&FactOutput>::new();
+
+    for period in periods {
+        let period_matches = matches
+            .iter()
+            .copied()
+            .filter(|fact| fact_matches_period(fact, period))
+            .collect::<Vec<_>>();
+        let mut canonical_matches = BTreeMap::<String, &FactOutput>::new();
+
+        for fact in period_matches {
+            let canonical_key = fact_authoritative_concept_key(fact, crosswalk)
+                .unwrap_or_else(|| fact.qname.clone());
+            canonical_matches
+                .entry(canonical_key)
+                .and_modify(|current| {
+                    if compare_fact_matches_raw(fact, current).is_lt() {
+                        *current = fact;
+                    }
+                })
+                .or_insert(fact);
+        }
+
+        let period_facts = canonical_matches.values().copied().collect::<Vec<_>>();
+        let period_value = if period_facts.is_empty() {
+            None
+        } else {
+            Some(period_facts.iter().map(|fact| fact.value_num).sum())
+        };
+
+        values.insert(period.id.clone(), period_value);
+        matched_facts.extend(period_facts);
+    }
+
+    if !has_any_value(&values) {
+        return None;
+    }
+
+    Some(ValueSource {
+        values,
+        source_concepts: unique_sorted_strings(
+            matched_facts
+                .iter()
+                .map(|fact| fact.qname.clone())
+                .collect::<Vec<_>>(),
+        ),
+        source_row_keys: unique_sorted_strings(
+            matched_facts
+                .iter()
+                .map(|fact| fact.concept_key.clone())
+                .collect::<Vec<_>>(),
+        ),
+        source_fact_ids: vec![],
+        has_dimensions: matched_facts.iter().any(|fact| !fact.is_dimensionless),
+    })
+}
+
 fn find_surface_row<'a>(
    income_surface_rows: &'a [SurfaceRowOutput],
    key: &str,
@@ -1324,7 +1425,9 @@ fn match_direct_facts<'a>(
 ) -> Option<BTreeMap<String, MatchedFact<'a>>> {
    let income_facts = facts
        .iter()
-        .filter(|fact| fact.statement_kind.as_deref() == Some("income"))
+        .filter(|fact| {
+            fact.statement_kind.as_deref() == Some("income") || fact.statement_kind.is_none()
+        })
        .collect::<Vec<_>>();
    let mut matches = BTreeMap::<String, MatchedFact<'a>>::new();

@@ -1448,22 +1551,37 @@ fn compare_statement_matches(
 }

 fn compare_fact_matches(left: &MatchedFact<'_>, right: &MatchedFact<'_>) -> std::cmp::Ordering {
-    left.rank
-        .cmp(&right.rank)
+    compare_fact_matches_raw(left.fact, right.fact)
+        .then_with(|| left.rank.cmp(&right.rank))
+        .then_with(|| left.fact.local_name.cmp(&right.fact.local_name))
+}
+
+fn compare_fact_matches_raw(left: &FactOutput, right: &FactOutput) -> std::cmp::Ordering {
+    let left_statement_rank = if left.statement_kind.as_deref() == Some("income") {
+        0
+    } else {
+        1
+    };
+    let right_statement_rank = if right.statement_kind.as_deref() == Some("income") {
+        0
+    } else {
+        1
+    };
+
+    left_statement_rank
+        .cmp(&right_statement_rank)
        .then_with(|| {
-            let left_dimension_rank = if left.fact.is_dimensionless { 0 } else { 1 };
-            let right_dimension_rank = if right.fact.is_dimensionless { 0 } else { 1 };
+            let left_dimension_rank = if left.is_dimensionless { 0 } else { 1 };
+            let right_dimension_rank = if right.is_dimensionless { 0 } else { 1 };
            left_dimension_rank.cmp(&right_dimension_rank)
        })
        .then_with(|| {
            right
-                .fact
                .value_num
                .abs()
-                .partial_cmp(&left.fact.value_num.abs())
+                .partial_cmp(&left.value_num.abs())
                .unwrap_or(std::cmp::Ordering::Equal)
        })
-        .then_with(|| left.fact.local_name.cmp(&right.fact.local_name))
 }

 fn statement_row_source(row: &StatementRowOutput) -> ValueSource {
@@ -1893,6 +2011,16 @@ mod tests {
        }
    }

+    fn fact_with_statement_kind(
+        local_name: &str,
+        value: f64,
+        statement_kind: Option<&str>,
+    ) -> FactOutput {
+        let mut fact = fact(local_name, value);
+        fact.statement_kind = statement_kind.map(|value| value.to_string());
+        fact
+    }
+
    #[test]
    fn derives_gross_profit_from_revenue_minus_cost_of_revenue() {
        let mut rows = empty_rows();
@@ -2047,6 +2175,93 @@ mod tests {
        assert_eq!(detail_rows.len(), 2);
    }

+    #[test]
+    fn derives_sga_from_matching_facts_when_statement_rows_are_empty() {
+        let mut rows = empty_rows();
+        rows.get_mut("income").unwrap().extend([
+            row_with_values(
+                "sales-and-marketing-empty",
+                "unknown:SellingAndMarketingExpense",
+                BTreeMap::new(),
+            ),
+            row_with_values(
+                "g-and-a-empty",
+                "unknown:GeneralAndAdministrativeExpense",
+                BTreeMap::new(),
+            ),
+        ]);
+        let facts = vec![
+            fact_with_statement_kind("SellingAndMarketingExpense", 30.0, None),
+            fact_with_statement_kind("GeneralAndAdministrativeExpense", 10.0, None),
+        ];
+        let mut model = empty_model();
+
+        apply_universal_income_rows(
+            &[period("p1")],
+            &rows,
+            &facts,
+            "us-gaap",
+            FiscalPack::Core,
+            &mut model,
+        )
+        .expect("universal income rows should build");
+
+        let sga = model
+            .surface_rows
+            .get("income")
+            .unwrap()
+            .iter()
+            .find(|row| row.key == "selling_general_and_administrative")
+            .unwrap();
+
+        assert_eq!(sga.values.get("p1").copied().flatten(), Some(40.0));
+        assert_eq!(sga.resolution_method.as_deref(), Some("formula_derived"));
+        assert!(sga
+            .source_concepts
+            .contains(&"us-gaap:GeneralAndAdministrativeExpense".to_string()));
+        assert!(sga
+            .source_concepts
+            .contains(&"us-gaap:SellingAndMarketingExpense".to_string()));
+    }
+
+    #[test]
+    fn derives_operating_expenses_from_selling_and_marketing_alias_facts() {
+        let rows = empty_rows();
+        let facts = vec![
+            fact_with_statement_kind("SellingAndMarketingExpense", 30.0, None),
+            fact_with_statement_kind("GeneralAndAdministrativeExpense", 10.0, None),
+            fact_with_statement_kind("ResearchAndDevelopmentExpense", 40.0, Some("income")),
+        ];
+        let mut model = empty_model();
+
+        apply_universal_income_rows(
+            &[period("p1")],
+            &rows,
+            &facts,
+            "us-gaap",
+            FiscalPack::Core,
+            &mut model,
+        )
+        .expect("universal income rows should build");
+
+        let operating_expenses = model
+            .surface_rows
+            .get("income")
+            .unwrap()
+            .iter()
+            .find(|row| row.key == "operating_expenses")
+            .unwrap();
+
+        assert_eq!(
+            operating_expenses.values.get("p1").copied().flatten(),
+            Some(80.0)
+        );
+        assert_eq!(
+            operating_expenses.resolution_method.as_deref(),
+            Some("formula_derived")
+        );
+    }
+
    #[test]
    fn prefers_direct_sga_over_residual_inference() {
        let mut rows = empty_rows();
@@ -2131,6 +2346,38 @@ mod tests {
            .contains(&"other_operating_expense_suppressed_zero_residual".to_string()));
    }

+    #[test]
+    fn keeps_negative_other_operating_expense_residuals() {
+        let rows = empty_rows();
+        let mut model = empty_model();
+        model.surface_rows.get_mut("income").unwrap().extend([
+            surface_row("operating_expenses", 100.0),
+            surface_row("selling_general_and_administrative", 70.0),
+            surface_row("research_and_development", 40.0),
+        ]);
+
+        apply_universal_income_rows(
+            &[period("p1")],
+            &rows,
+            &[],
+            "us-gaap",
+            FiscalPack::Core,
+            &mut model,
+        )
+        .expect("universal income rows should build");
+
+        let other = model
+            .surface_rows
+            .get("income")
+            .unwrap()
+            .iter()
+            .find(|row| row.key == "other_operating_expense")
+            .unwrap();
+
+        assert_eq!(other.values.get("p1").copied().flatten(), Some(-10.0));
+        assert_eq!(other.resolution_method.as_deref(), Some("formula_derived"));
+    }
+
    #[test]
    fn derives_sga_from_operating_expenses_before_other_operating_expense_residual() {
        let rows = empty_rows();
--- a/rust/taxonomy/fiscal/v1/core.income-bridge.json
+++ b/rust/taxonomy/fiscal/v1/core.income-bridge.json
@@ -95,6 +95,7 @@
            "concepts": [
              "us-gaap:SellingGeneralAndAdministrativeExpense",
              "us-gaap:ResearchAndDevelopmentExpense",
+              "us-gaap:SellingAndMarketingExpense",
              "us-gaap:MarketingExpense",
              "us-gaap:GeneralAndAdministrativeExpense",
              "us-gaap:LaborAndRelatedExpense",