Fix MSFT expense normalization fallback

This commit is contained in:
2026-03-21 01:35:49 -04:00
parent 3e09e38dfa
commit 249131ec00
2 changed files with 260 additions and 12 deletions

View File

@@ -352,6 +352,7 @@ fn resolve_universal_row(
bridge_row,
periods,
income_statement_rows,
facts,
income_surface_rows,
crosswalk,
);
@@ -391,6 +392,7 @@ fn resolve_universal_row(
bridge_row,
periods,
income_statement_rows,
facts,
income_surface_rows,
crosswalk,
)
@@ -401,6 +403,7 @@ fn build_formula_row(
bridge_row: &IncomeBridgeRow,
periods: &[PeriodOutput],
income_statement_rows: &[StatementRowOutput],
facts: &[FactOutput],
income_surface_rows: &[SurfaceRowOutput],
crosswalk: Option<&CrosswalkFile>,
) -> ResolvedUniversalRow {
@@ -432,12 +435,16 @@ fn build_formula_row(
.collect::<Vec<_>>();
let (positive_group_sources, positive_group_rows) = collect_group_sources(
periods,
income_statement_rows,
facts,
&bridge_row.component_concept_groups.positive,
crosswalk,
);
let (negative_group_sources, negative_group_rows) = collect_group_sources(
periods,
income_statement_rows,
facts,
&bridge_row.component_concept_groups.negative,
crosswalk,
);
@@ -728,10 +735,8 @@ fn build_residual_other_operating_expense_row(
.sum::<f64>();
let residual = operating_value - sga_value - component_total;
let period_value = if residual.abs() <= RESIDUAL_TOLERANCE || residual <= 0.0 {
if residual.abs() <= RESIDUAL_TOLERANCE {
suppressed_zero_residual = true;
}
let period_value = if residual.abs() <= RESIDUAL_TOLERANCE {
suppressed_zero_residual = true;
None
} else {
used_sources.push(surface_source(operating_expenses));
@@ -1097,7 +1102,9 @@ fn null_surface_row(
}
fn collect_group_sources<'a>(
periods: &[PeriodOutput],
income_statement_rows: &'a [StatementRowOutput],
facts: &'a [FactOutput],
groups: &[IncomeBridgeConceptGroup],
crosswalk: Option<&CrosswalkFile>,
) -> (Vec<ValueSource>, Vec<&'a StatementRowOutput>) {
@@ -1119,15 +1126,109 @@ fn collect_group_sources<'a>(
})
.collect::<Vec<_>>();
let has_statement_values = matches.iter().any(|row| has_any_value(&row.values));
for row in matches {
sources.push(statement_row_source(row));
rows.push(row);
}
if has_statement_values {
continue;
}
if let Some(source) = collect_group_fact_source(periods, facts, group, crosswalk) {
sources.push(source);
}
}
(sources, rows)
}
fn collect_group_fact_source(
periods: &[PeriodOutput],
facts: &[FactOutput],
group: &IncomeBridgeConceptGroup,
crosswalk: Option<&CrosswalkFile>,
) -> Option<ValueSource> {
let matches = facts
.iter()
.filter(|fact| {
fact.statement_kind.as_deref() == Some("income") || fact.statement_kind.is_none()
})
.filter(|fact| {
group.concepts.iter().any(|candidate| {
candidate_matches(candidate, &fact.qname)
|| candidate_matches(candidate, &fact.local_name)
|| fact_authoritative_concept_key(fact, crosswalk)
.map(|concept| candidate_matches(candidate, &concept))
.unwrap_or(false)
})
})
.collect::<Vec<_>>();
if matches.is_empty() {
return None;
}
let mut values = BTreeMap::<String, Option<f64>>::new();
let mut matched_facts = Vec::<&FactOutput>::new();
for period in periods {
let period_matches = matches
.iter()
.copied()
.filter(|fact| fact_matches_period(fact, period))
.collect::<Vec<_>>();
let mut canonical_matches = BTreeMap::<String, &FactOutput>::new();
for fact in period_matches {
let canonical_key = fact_authoritative_concept_key(fact, crosswalk)
.unwrap_or_else(|| fact.qname.clone());
canonical_matches
.entry(canonical_key)
.and_modify(|current| {
if compare_fact_matches_raw(fact, current).is_lt() {
*current = fact;
}
})
.or_insert(fact);
}
let period_facts = canonical_matches.values().copied().collect::<Vec<_>>();
let period_value = if period_facts.is_empty() {
None
} else {
Some(period_facts.iter().map(|fact| fact.value_num).sum())
};
values.insert(period.id.clone(), period_value);
matched_facts.extend(period_facts);
}
if !has_any_value(&values) {
return None;
}
Some(ValueSource {
values,
source_concepts: unique_sorted_strings(
matched_facts
.iter()
.map(|fact| fact.qname.clone())
.collect::<Vec<_>>(),
),
source_row_keys: unique_sorted_strings(
matched_facts
.iter()
.map(|fact| fact.concept_key.clone())
.collect::<Vec<_>>(),
),
source_fact_ids: vec![],
has_dimensions: matched_facts.iter().any(|fact| !fact.is_dimensionless),
})
}
fn find_surface_row<'a>(
income_surface_rows: &'a [SurfaceRowOutput],
key: &str,
@@ -1324,7 +1425,9 @@ fn match_direct_facts<'a>(
) -> Option<BTreeMap<String, MatchedFact<'a>>> {
let income_facts = facts
.iter()
.filter(|fact| fact.statement_kind.as_deref() == Some("income"))
.filter(|fact| {
fact.statement_kind.as_deref() == Some("income") || fact.statement_kind.is_none()
})
.collect::<Vec<_>>();
let mut matches = BTreeMap::<String, MatchedFact<'a>>::new();
@@ -1448,22 +1551,37 @@ fn compare_statement_matches(
}
fn compare_fact_matches(left: &MatchedFact<'_>, right: &MatchedFact<'_>) -> std::cmp::Ordering {
left.rank
.cmp(&right.rank)
compare_fact_matches_raw(left.fact, right.fact)
.then_with(|| left.rank.cmp(&right.rank))
.then_with(|| left.fact.local_name.cmp(&right.fact.local_name))
}
fn compare_fact_matches_raw(left: &FactOutput, right: &FactOutput) -> std::cmp::Ordering {
let left_statement_rank = if left.statement_kind.as_deref() == Some("income") {
0
} else {
1
};
let right_statement_rank = if right.statement_kind.as_deref() == Some("income") {
0
} else {
1
};
left_statement_rank
.cmp(&right_statement_rank)
.then_with(|| {
let left_dimension_rank = if left.fact.is_dimensionless { 0 } else { 1 };
let right_dimension_rank = if right.fact.is_dimensionless { 0 } else { 1 };
let left_dimension_rank = if left.is_dimensionless { 0 } else { 1 };
let right_dimension_rank = if right.is_dimensionless { 0 } else { 1 };
left_dimension_rank.cmp(&right_dimension_rank)
})
.then_with(|| {
right
.fact
.value_num
.abs()
.partial_cmp(&left.fact.value_num.abs())
.partial_cmp(&left.value_num.abs())
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| left.fact.local_name.cmp(&right.fact.local_name))
}
fn statement_row_source(row: &StatementRowOutput) -> ValueSource {
@@ -1893,6 +2011,16 @@ mod tests {
}
}
fn fact_with_statement_kind(
local_name: &str,
value: f64,
statement_kind: Option<&str>,
) -> FactOutput {
let mut fact = fact(local_name, value);
fact.statement_kind = statement_kind.map(|value| value.to_string());
fact
}
#[test]
fn derives_gross_profit_from_revenue_minus_cost_of_revenue() {
let mut rows = empty_rows();
@@ -2047,6 +2175,93 @@ mod tests {
assert_eq!(detail_rows.len(), 2);
}
#[test]
fn derives_sga_from_matching_facts_when_statement_rows_are_empty() {
let mut rows = empty_rows();
rows.get_mut("income").unwrap().extend([
row_with_values(
"sales-and-marketing-empty",
"unknown:SellingAndMarketingExpense",
BTreeMap::new(),
),
row_with_values(
"g-and-a-empty",
"unknown:GeneralAndAdministrativeExpense",
BTreeMap::new(),
),
]);
let facts = vec![
fact_with_statement_kind("SellingAndMarketingExpense", 30.0, None),
fact_with_statement_kind("GeneralAndAdministrativeExpense", 10.0, None),
];
let mut model = empty_model();
apply_universal_income_rows(
&[period("p1")],
&rows,
&facts,
"us-gaap",
FiscalPack::Core,
&mut model,
)
.expect("universal income rows should build");
let sga = model
.surface_rows
.get("income")
.unwrap()
.iter()
.find(|row| row.key == "selling_general_and_administrative")
.unwrap();
assert_eq!(sga.values.get("p1").copied().flatten(), Some(40.0));
assert_eq!(sga.resolution_method.as_deref(), Some("formula_derived"));
assert!(sga
.source_concepts
.contains(&"us-gaap:GeneralAndAdministrativeExpense".to_string()));
assert!(sga
.source_concepts
.contains(&"us-gaap:SellingAndMarketingExpense".to_string()));
}
#[test]
fn derives_operating_expenses_from_selling_and_marketing_alias_facts() {
let rows = empty_rows();
let facts = vec![
fact_with_statement_kind("SellingAndMarketingExpense", 30.0, None),
fact_with_statement_kind("GeneralAndAdministrativeExpense", 10.0, None),
fact_with_statement_kind("ResearchAndDevelopmentExpense", 40.0, Some("income")),
];
let mut model = empty_model();
apply_universal_income_rows(
&[period("p1")],
&rows,
&facts,
"us-gaap",
FiscalPack::Core,
&mut model,
)
.expect("universal income rows should build");
let operating_expenses = model
.surface_rows
.get("income")
.unwrap()
.iter()
.find(|row| row.key == "operating_expenses")
.unwrap();
assert_eq!(
operating_expenses.values.get("p1").copied().flatten(),
Some(80.0)
);
assert_eq!(
operating_expenses.resolution_method.as_deref(),
Some("formula_derived")
);
}
#[test]
fn prefers_direct_sga_over_residual_inference() {
let mut rows = empty_rows();
@@ -2131,6 +2346,38 @@ mod tests {
.contains(&"other_operating_expense_suppressed_zero_residual".to_string()));
}
#[test]
fn keeps_negative_other_operating_expense_residuals() {
let rows = empty_rows();
let mut model = empty_model();
model.surface_rows.get_mut("income").unwrap().extend([
surface_row("operating_expenses", 100.0),
surface_row("selling_general_and_administrative", 70.0),
surface_row("research_and_development", 40.0),
]);
apply_universal_income_rows(
&[period("p1")],
&rows,
&[],
"us-gaap",
FiscalPack::Core,
&mut model,
)
.expect("universal income rows should build");
let other = model
.surface_rows
.get("income")
.unwrap()
.iter()
.find(|row| row.key == "other_operating_expense")
.unwrap();
assert_eq!(other.values.get("p1").copied().flatten(), Some(-10.0));
assert_eq!(other.resolution_method.as_deref(), Some("formula_derived"));
}
#[test]
fn derives_sga_from_operating_expenses_before_other_operating_expense_residual() {
let rows = empty_rows();

View File

@@ -95,6 +95,7 @@
"concepts": [
"us-gaap:SellingGeneralAndAdministrativeExpense",
"us-gaap:ResearchAndDevelopmentExpense",
"us-gaap:SellingAndMarketingExpense",
"us-gaap:MarketingExpense",
"us-gaap:GeneralAndAdministrativeExpense",
"us-gaap:LaborAndRelatedExpense",