Files
Neon-Desk/rust/fiscal-xbrl-core/src/surface_mapper.rs
2026-03-15 15:24:24 -04:00

1635 lines
52 KiB
Rust

use anyhow::Result;
use std::collections::{BTreeMap, HashMap, HashSet};
use crate::pack_selector::FiscalPack;
use crate::taxonomy_loader::{
load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula,
SurfaceFormulaOp, SurfaceSignTransform,
};
use crate::{
ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput,
PeriodOutput, StatementRowMap, StatementRowOutput, SurfaceRowMap, SurfaceRowOutput,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MappingMethod {
AuthoritativeDirect,
DirectSourceConcept,
AggregateChildren,
TaxonomyKpi,
UnmappedResidual,
}
impl MappingMethod {
pub fn as_str(&self) -> &'static str {
match self {
MappingMethod::AuthoritativeDirect => "authoritative_direct",
MappingMethod::DirectSourceConcept => "direct_source_concept",
MappingMethod::AggregateChildren => "aggregate_children",
MappingMethod::TaxonomyKpi => "taxonomy_kpi",
MappingMethod::UnmappedResidual => "unmapped_residual",
}
}
}
#[derive(Debug, Clone, Default)]
pub struct MappingAssignment {
pub authoritative_concept_key: Option<String>,
pub mapping_method: Option<MappingMethod>,
pub surface_key: Option<String>,
pub detail_parent_surface_key: Option<String>,
pub kpi_key: Option<String>,
pub residual_flag: bool,
}
#[derive(Debug, Default)]
pub struct CompactSurfaceModel {
pub surface_rows: SurfaceRowMap,
pub detail_rows: DetailRowStatementMap,
pub normalization_summary: NormalizationSummaryOutput,
pub concept_mappings: HashMap<String, MappingAssignment>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MatchRole {
Surface,
Detail,
}
#[derive(Debug, Clone)]
struct MatchedStatementRow<'a> {
row: &'a StatementRowOutput,
authoritative_concept_key: Option<String>,
mapping_method: MappingMethod,
match_role: MatchRole,
rank: i64,
}
#[derive(Debug, Default, Clone)]
struct ConsumedSources {
row_keys: HashSet<String>,
concept_keys: HashSet<String>,
}
impl ConsumedSources {
fn insert_row(&mut self, row: &StatementRowOutput) {
self.row_keys.insert(row.key.clone());
self.concept_keys.insert(row.concept_key.clone());
}
fn insert_surface_row(&mut self, row: &SurfaceRowOutput) {
self.row_keys.extend(row.source_row_keys.iter().cloned());
self.concept_keys
.extend(row.source_concepts.iter().cloned());
}
fn extend(&mut self, other: Self) {
self.row_keys.extend(other.row_keys);
self.concept_keys.extend(other.concept_keys);
}
fn matches_statement_row(&self, row: &StatementRowOutput) -> bool {
self.row_keys.contains(&row.key) || self.concept_keys.contains(&row.concept_key)
}
}
#[derive(Debug)]
struct SurfaceResolution {
values: BTreeMap<String, Option<f64>>,
source_concepts: Vec<String>,
source_row_keys: Vec<String>,
source_fact_ids: Vec<i64>,
has_dimensions: bool,
resolved_source_row_keys: BTreeMap<String, Option<String>>,
consumed_sources: ConsumedSources,
formula_used: bool,
}
pub fn build_compact_surface_model(
periods: &[PeriodOutput],
statement_rows: &StatementRowMap,
taxonomy_regime: &str,
fiscal_pack: FiscalPack,
warnings: Vec<String>,
) -> Result<CompactSurfaceModel> {
let pack = load_surface_pack(fiscal_pack)?;
let crosswalk = load_crosswalk(taxonomy_regime)?;
let mut surface_rows = empty_surface_row_map();
let mut detail_rows = empty_detail_row_map();
let mut concept_mappings = HashMap::<String, MappingAssignment>::new();
let mut surface_row_count = 0usize;
let mut detail_row_count = 0usize;
let mut unmapped_row_count = 0usize;
let mut material_unmapped_row_count = 0usize;
for statement in statement_keys() {
let rows = statement_rows.get(statement).cloned().unwrap_or_default();
let mut statement_definitions = pack
.surfaces
.iter()
.filter(|definition| definition.statement == statement)
.collect::<Vec<_>>();
statement_definitions.sort_by(|left, right| {
left.order
.cmp(&right.order)
.then_with(|| left.label.cmp(&right.label))
});
let mut used_row_keys = HashSet::<String>::new();
let mut consumed_sources = ConsumedSources::default();
let mut statement_surface_rows = Vec::<SurfaceRowOutput>::new();
let mut statement_detail_rows = BTreeMap::<String, Vec<DetailRowOutput>>::new();
let mut resolved_statement_surfaces = HashMap::<String, SurfaceRowOutput>::new();
for definition in statement_definitions {
let matches = rows
.iter()
.filter(|row| !used_row_keys.contains(&row.key))
.filter_map(|row| match_statement_row(row, definition, crosswalk.as_ref()))
.collect::<Vec<_>>();
let direct_surface_matches = matches
.iter()
.filter(|matched| matched.match_role == MatchRole::Surface)
.cloned()
.collect::<Vec<_>>();
let detail_component_matches = matches
.iter()
.filter(|matched| matched.match_role == MatchRole::Detail)
.cloned()
.collect::<Vec<_>>();
let detail_matches = if definition.detail_grouping_policy == "group_all_children" {
if detail_component_matches.is_empty()
&& definition.rollup_policy == "aggregate_children"
{
Vec::new()
} else {
detail_component_matches.clone()
}
} else {
Vec::new()
};
let surface_resolution = resolve_surface(
definition,
periods,
&direct_surface_matches,
&detail_component_matches,
&resolved_statement_surfaces,
);
let Some(surface_resolution) = surface_resolution else {
continue;
};
for matched in &direct_surface_matches {
used_row_keys.insert(matched.row.key.clone());
consumed_sources.insert_row(matched.row);
concept_mappings.insert(
matched.row.concept_key.clone(),
MappingAssignment {
authoritative_concept_key: matched.authoritative_concept_key.clone(),
mapping_method: Some(matched.mapping_method),
surface_key: Some(definition.surface_key.clone()),
detail_parent_surface_key: None,
kpi_key: None,
residual_flag: false,
},
);
}
let details = detail_matches
.iter()
.map(|matched| {
used_row_keys.insert(matched.row.key.clone());
consumed_sources.insert_row(matched.row);
concept_mappings.insert(
matched.row.concept_key.clone(),
MappingAssignment {
authoritative_concept_key: matched.authoritative_concept_key.clone(),
mapping_method: Some(matched.mapping_method),
surface_key: Some(definition.surface_key.clone()),
detail_parent_surface_key: Some(definition.surface_key.clone()),
kpi_key: None,
residual_flag: false,
},
);
build_detail_row(
matched.row,
&definition.surface_key,
false,
definition.sign_transform,
)
})
.collect::<Vec<_>>();
if !details.is_empty() && definition.include_in_output {
detail_row_count += details.len();
statement_detail_rows.insert(definition.surface_key.clone(), details);
}
let detail_count = if definition.include_in_output {
statement_detail_rows
.get(&definition.surface_key)
.map(|rows| rows.len() as i64)
} else {
None
};
let row = SurfaceRowOutput {
key: definition.surface_key.clone(),
label: definition.label.clone(),
category: definition.category.clone(),
template_section: definition.category.clone(),
order: definition.order,
unit: definition.unit.clone(),
values: surface_resolution.values,
source_concepts: surface_resolution.source_concepts,
source_row_keys: surface_resolution.source_row_keys,
source_fact_ids: surface_resolution.source_fact_ids,
formula_key: if surface_resolution.formula_used
|| definition.formula_fallback.is_some()
{
definition
.formula_fallback
.as_ref()
.map(|_| definition.surface_key.clone())
} else {
None
},
has_dimensions: surface_resolution.has_dimensions,
resolved_source_row_keys: surface_resolution.resolved_source_row_keys,
statement: Some(definition.statement.clone()),
detail_count,
resolution_method: None,
confidence: None,
warning_codes: vec![],
};
consumed_sources.extend(surface_resolution.consumed_sources.clone());
resolved_statement_surfaces.insert(definition.surface_key.clone(), row.clone());
if definition.include_in_output {
statement_surface_rows.push(row);
surface_row_count += 1;
}
let _ = &definition.materiality_policy;
}
statement_surface_rows.sort_by(|left, right| {
left.order
.cmp(&right.order)
.then_with(|| left.label.cmp(&right.label))
});
let baseline = baseline_for_statement(statement, &statement_surface_rows);
let threshold = materiality_threshold(statement, baseline);
let residual_rows = rows
.iter()
.filter(|row| !used_row_keys.contains(&row.key))
.filter(|row| !consumed_sources.matches_statement_row(row))
.filter(|row| has_any_value(&row.values))
.map(|row| {
concept_mappings.insert(
row.concept_key.clone(),
MappingAssignment {
authoritative_concept_key: None,
mapping_method: Some(MappingMethod::UnmappedResidual),
surface_key: None,
detail_parent_surface_key: Some("unmapped".to_string()),
kpi_key: None,
residual_flag: true,
},
);
build_detail_row(row, "unmapped", true, None)
})
.collect::<Vec<_>>();
if !residual_rows.is_empty() {
unmapped_row_count += residual_rows.len();
material_unmapped_row_count += residual_rows
.iter()
.filter(|row| max_abs_value(&row.values) >= threshold)
.count();
detail_row_count += residual_rows.len();
statement_detail_rows.insert("unmapped".to_string(), residual_rows);
}
surface_rows.insert(statement.to_string(), statement_surface_rows);
detail_rows.insert(statement.to_string(), statement_detail_rows);
}
Ok(CompactSurfaceModel {
surface_rows,
detail_rows,
normalization_summary: NormalizationSummaryOutput {
surface_row_count,
detail_row_count,
kpi_row_count: 0,
unmapped_row_count,
material_unmapped_row_count,
warnings,
},
concept_mappings,
})
}
fn resolve_surface(
definition: &SurfaceDefinition,
periods: &[PeriodOutput],
direct_surface_matches: &[MatchedStatementRow<'_>],
detail_component_matches: &[MatchedStatementRow<'_>],
resolved_statement_surfaces: &HashMap<String, SurfaceRowOutput>,
) -> Option<SurfaceResolution> {
if definition.rollup_policy == "formula_only" {
return resolve_formula_surface(definition, periods, resolved_statement_surfaces)
.map(|resolution| apply_sign_transform(resolution, definition.sign_transform));
}
if !direct_surface_matches.is_empty() {
return Some(apply_sign_transform(
resolve_direct_surface(periods, direct_surface_matches),
definition.sign_transform,
));
}
if definition.rollup_policy == "aggregate_children" && !detail_component_matches.is_empty() {
return Some(apply_sign_transform(
resolve_aggregate_surface(periods, detail_component_matches),
definition.sign_transform,
));
}
if definition.rollup_policy == "aggregate_children" && definition.formula_fallback.is_some() {
return resolve_formula_surface(definition, periods, resolved_statement_surfaces)
.map(|resolution| apply_sign_transform(resolution, definition.sign_transform));
}
if definition.rollup_policy == "direct_or_formula" {
return resolve_formula_surface(definition, periods, resolved_statement_surfaces)
.map(|resolution| apply_sign_transform(resolution, definition.sign_transform));
}
None
}
fn resolve_direct_surface(
periods: &[PeriodOutput],
matches: &[MatchedStatementRow<'_>],
) -> SurfaceResolution {
let mut values = BTreeMap::new();
let mut resolved_source_row_keys = BTreeMap::new();
for period in periods {
let period_matches = matches
.iter()
.filter(|matched| {
matched
.row
.values
.get(&period.id)
.copied()
.flatten()
.is_some()
})
.cloned()
.collect::<Vec<_>>();
let chosen = if period_matches.is_empty() {
None
} else {
Some(pick_best_match(&period_matches))
};
values.insert(
period.id.clone(),
chosen.and_then(|matched| matched.row.values.get(&period.id).copied().flatten()),
);
resolved_source_row_keys.insert(
period.id.clone(),
chosen.map(|matched| matched.row.key.clone()),
);
}
let mut consumed_sources = ConsumedSources::default();
for matched in matches {
consumed_sources.insert_row(matched.row);
}
SurfaceResolution {
values,
source_concepts: unique_sorted_strings(
matches
.iter()
.map(|matched| matched.row.qname.clone())
.collect::<Vec<_>>(),
),
source_row_keys: unique_sorted_strings(
matches
.iter()
.map(|matched| matched.row.key.clone())
.collect::<Vec<_>>(),
),
source_fact_ids: unique_sorted_i64(
matches
.iter()
.flat_map(|matched| matched.row.source_fact_ids.clone())
.collect::<Vec<_>>(),
),
has_dimensions: matches.iter().any(|matched| matched.row.has_dimensions),
resolved_source_row_keys,
consumed_sources,
formula_used: false,
}
}
fn resolve_aggregate_surface(
periods: &[PeriodOutput],
matches: &[MatchedStatementRow<'_>],
) -> SurfaceResolution {
let aggregate_matches = matches
.iter()
.map(|matched| {
let mut aggregate = matched.clone();
aggregate.mapping_method = MappingMethod::AggregateChildren;
aggregate
})
.collect::<Vec<_>>();
let mut consumed_sources = ConsumedSources::default();
for matched in &aggregate_matches {
consumed_sources.insert_row(matched.row);
}
SurfaceResolution {
values: build_surface_values(periods, &aggregate_matches),
source_concepts: unique_sorted_strings(
aggregate_matches
.iter()
.map(|matched| matched.row.qname.clone())
.collect::<Vec<_>>(),
),
source_row_keys: unique_sorted_strings(
aggregate_matches
.iter()
.map(|matched| matched.row.key.clone())
.collect::<Vec<_>>(),
),
source_fact_ids: unique_sorted_i64(
aggregate_matches
.iter()
.flat_map(|matched| matched.row.source_fact_ids.clone())
.collect::<Vec<_>>(),
),
has_dimensions: aggregate_matches
.iter()
.any(|matched| matched.row.has_dimensions),
resolved_source_row_keys: periods
.iter()
.map(|period| (period.id.clone(), None))
.collect(),
consumed_sources,
formula_used: false,
}
}
fn resolve_formula_surface(
definition: &SurfaceDefinition,
periods: &[PeriodOutput],
resolved_statement_surfaces: &HashMap<String, SurfaceRowOutput>,
) -> Option<SurfaceResolution> {
let formula = definition
.formula_fallback
.as_ref()
.and_then(|formula| formula.structured())?;
let source_rows = formula
.sources
.iter()
.filter_map(|source_key| resolved_statement_surfaces.get(source_key))
.collect::<Vec<_>>();
if source_rows.is_empty() {
return None;
}
let values = periods
.iter()
.map(|period| {
(
period.id.clone(),
evaluate_formula_for_period(formula, &period.id, &source_rows),
)
})
.collect::<BTreeMap<_, _>>();
if !has_any_value(&values) {
return None;
}
let mut consumed_sources = ConsumedSources::default();
for row in &source_rows {
consumed_sources.insert_surface_row(row);
}
Some(SurfaceResolution {
values,
source_concepts: unique_sorted_strings(
source_rows
.iter()
.flat_map(|row| row.source_concepts.clone())
.collect::<Vec<_>>(),
),
source_row_keys: unique_sorted_strings(
source_rows
.iter()
.flat_map(|row| row.source_row_keys.clone())
.collect::<Vec<_>>(),
),
source_fact_ids: unique_sorted_i64(
source_rows
.iter()
.flat_map(|row| row.source_fact_ids.clone())
.collect::<Vec<_>>(),
),
has_dimensions: source_rows.iter().any(|row| row.has_dimensions),
resolved_source_row_keys: periods
.iter()
.map(|period| (period.id.clone(), None))
.collect(),
consumed_sources,
formula_used: true,
})
}
fn evaluate_formula_for_period(
formula: &SurfaceFormula,
period_id: &str,
source_rows: &[&SurfaceRowOutput],
) -> Option<f64> {
let values = formula
.sources
.iter()
.map(|source_key| {
source_rows
.iter()
.find(|row| row.key == *source_key)
.and_then(|row| row.values.get(period_id).copied().flatten())
})
.collect::<Vec<_>>();
match formula.op {
SurfaceFormulaOp::Sum => sum_formula_values(&values, formula.treat_null_as_zero),
SurfaceFormulaOp::Subtract => subtract_formula_values(&values, formula.treat_null_as_zero),
}
}
fn sum_formula_values(values: &[Option<f64>], treat_null_as_zero: bool) -> Option<f64> {
if treat_null_as_zero {
if values.iter().all(|value| value.is_none()) {
return None;
}
return Some(values.iter().map(|value| value.unwrap_or(0.0)).sum());
}
if values.iter().any(|value| value.is_none()) {
return None;
}
Some(values.iter().map(|value| value.unwrap_or(0.0)).sum())
}
fn subtract_formula_values(values: &[Option<f64>], treat_null_as_zero: bool) -> Option<f64> {
if values.len() != 2 {
return None;
}
let left = if treat_null_as_zero {
values[0].unwrap_or(0.0)
} else {
values[0]?
};
let right = if treat_null_as_zero {
values[1].unwrap_or(0.0)
} else {
values[1]?
};
if !treat_null_as_zero && values.iter().all(|value| value.is_none()) {
return None;
}
Some(left - right)
}
pub fn merge_mapping_assignments(
primary: &mut HashMap<String, MappingAssignment>,
secondary: HashMap<String, MappingAssignment>,
) {
for (concept_key, assignment) in secondary {
let existing = primary.entry(concept_key).or_default();
existing.authoritative_concept_key = existing
.authoritative_concept_key
.clone()
.or(assignment.authoritative_concept_key);
if existing.mapping_method.is_none()
|| matches!(
existing.mapping_method,
Some(MappingMethod::UnmappedResidual)
)
{
existing.mapping_method = assignment.mapping_method;
}
if existing.surface_key.is_none() {
existing.surface_key = assignment.surface_key;
}
if existing.detail_parent_surface_key.is_none() {
existing.detail_parent_surface_key = assignment.detail_parent_surface_key;
}
if existing.kpi_key.is_none() {
existing.kpi_key = assignment.kpi_key;
}
existing.residual_flag = existing.residual_flag && assignment.residual_flag;
}
}
pub fn apply_mapping_assignments(
concepts: &mut [ConceptOutput],
facts: &mut [FactOutput],
mappings: &HashMap<String, MappingAssignment>,
) {
for concept in concepts {
if let Some(mapping) = mappings.get(&concept.concept_key) {
concept.authoritative_concept_key = mapping.authoritative_concept_key.clone();
concept.mapping_method = mapping
.mapping_method
.map(|method| method.as_str().to_string());
concept.surface_key = mapping.surface_key.clone();
concept.detail_parent_surface_key = mapping.detail_parent_surface_key.clone();
concept.kpi_key = mapping.kpi_key.clone();
concept.residual_flag = mapping.residual_flag;
}
}
for fact in facts {
if let Some(mapping) = mappings.get(&fact.concept_key) {
fact.authoritative_concept_key = mapping.authoritative_concept_key.clone();
fact.mapping_method = mapping
.mapping_method
.map(|method| method.as_str().to_string());
fact.surface_key = mapping.surface_key.clone();
fact.detail_parent_surface_key = mapping.detail_parent_surface_key.clone();
fact.kpi_key = mapping.kpi_key.clone();
fact.residual_flag = mapping.residual_flag;
}
}
}
fn match_statement_row<'a>(
row: &'a StatementRowOutput,
definition: &SurfaceDefinition,
crosswalk: Option<&CrosswalkFile>,
) -> Option<MatchedStatementRow<'a>> {
let authoritative_mapping = crosswalk.and_then(|crosswalk| crosswalk.mappings.get(&row.qname));
let authoritative_concept_key = authoritative_mapping
.map(|mapping| mapping.authoritative_concept_key.clone())
.or_else(|| {
if !row.is_extension {
Some(row.qname.clone())
} else {
None
}
});
let matches_authoritative = authoritative_concept_key.as_ref().map_or(false, |concept| {
definition
.allowed_authoritative_concepts
.iter()
.any(|candidate| candidate_matches(candidate, concept))
}) || authoritative_mapping
.map(|mapping| mapping.surface_key == definition.surface_key)
.unwrap_or(false);
if matches_authoritative {
return Some(MatchedStatementRow {
row,
authoritative_concept_key,
mapping_method: MappingMethod::AuthoritativeDirect,
match_role: MatchRole::Surface,
rank: 0,
});
}
let matches_source = definition.allowed_source_concepts.iter().any(|candidate| {
candidate_matches(candidate, &row.qname) || candidate_matches(candidate, &row.local_name)
});
if matches_source {
return Some(MatchedStatementRow {
row,
authoritative_concept_key,
mapping_method: MappingMethod::DirectSourceConcept,
match_role: if definition.rollup_policy == "aggregate_children" {
MatchRole::Detail
} else {
MatchRole::Surface
},
rank: 1,
});
}
None
}
fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> {
matches
.iter()
.min_by(|left, right| {
left.rank
.cmp(&right.rank)
.then_with(|| {
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
left_dimension_rank.cmp(&right_dimension_rank)
})
.then_with(|| left.row.order.cmp(&right.row.order))
.then_with(|| {
max_abs_value(&right.row.values)
.partial_cmp(&max_abs_value(&left.row.values))
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| left.row.label.cmp(&right.row.label))
})
.expect("pick_best_match requires at least one match")
}
fn build_surface_values(
periods: &[PeriodOutput],
matches: &[MatchedStatementRow<'_>],
) -> BTreeMap<String, Option<f64>> {
periods
.iter()
.map(|period| {
let value = if matches.len() == 1 {
matches
.first()
.and_then(|matched| matched.row.values.get(&period.id).copied())
.flatten()
} else {
sum_nullable_values(
matches
.iter()
.map(|matched| matched.row.values.get(&period.id).copied().flatten())
.collect::<Vec<_>>(),
)
};
(period.id.clone(), value)
})
.collect()
}
fn sum_nullable_values(values: Vec<Option<f64>>) -> Option<f64> {
if values.iter().all(|value| value.is_none()) {
return None;
}
Some(values.into_iter().map(|value| value.unwrap_or(0.0)).sum())
}
fn build_detail_row(
row: &StatementRowOutput,
parent_surface_key: &str,
residual_flag: bool,
sign_transform: Option<SurfaceSignTransform>,
) -> DetailRowOutput {
DetailRowOutput {
key: row.key.clone(),
parent_surface_key: parent_surface_key.to_string(),
label: row.label.clone(),
concept_key: row.concept_key.clone(),
qname: row.qname.clone(),
namespace_uri: row.namespace_uri.clone(),
local_name: row.local_name.clone(),
unit: row.units.values().find_map(|value| value.clone()),
values: transform_values(&row.values, sign_transform),
source_fact_ids: row.source_fact_ids.clone(),
is_extension: row.is_extension,
dimensions_summary: if row.has_dimensions {
vec!["has_dimensions".to_string()]
} else {
vec![]
},
residual_flag,
}
}
fn apply_sign_transform(
mut resolution: SurfaceResolution,
sign_transform: Option<SurfaceSignTransform>,
) -> SurfaceResolution {
resolution.values = transform_values(&resolution.values, sign_transform);
resolution
}
fn transform_values(
values: &BTreeMap<String, Option<f64>>,
sign_transform: Option<SurfaceSignTransform>,
) -> BTreeMap<String, Option<f64>> {
values
.iter()
.map(|(period_id, value)| {
(
period_id.clone(),
match sign_transform {
Some(SurfaceSignTransform::Invert) => value.map(|amount| -amount),
None => *value,
},
)
})
.collect()
}
fn has_any_value(values: &BTreeMap<String, Option<f64>>) -> bool {
values.values().any(|value| value.is_some())
}
fn max_abs_value(values: &BTreeMap<String, Option<f64>>) -> f64 {
values
.values()
.fold(0.0_f64, |max, value| max.max(value.unwrap_or(0.0).abs()))
}
fn baseline_for_statement(statement: &str, surface_rows: &[SurfaceRowOutput]) -> f64 {
let anchor_key = match statement {
"balance" => "total_assets",
"cash_flow" => "operating_cash_flow",
_ => "revenue",
};
surface_rows
.iter()
.find(|row| row.key == anchor_key)
.map(|row| max_abs_value(&row.values))
.unwrap_or(0.0)
}
fn materiality_threshold(statement: &str, baseline: f64) -> f64 {
if statement == "balance" {
return (baseline * 0.005).max(5_000_000.0);
}
(baseline * 0.01).max(1_000_000.0)
}
fn unique_sorted_strings(values: Vec<String>) -> Vec<String> {
let mut values = values
.into_iter()
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
values.sort();
values
}
fn unique_sorted_i64(values: Vec<i64>) -> Vec<i64> {
let mut values = values
.into_iter()
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
values.sort();
values
}
fn candidate_matches(candidate: &str, actual: &str) -> bool {
candidate.eq_ignore_ascii_case(actual)
|| candidate
.rsplit_once(':')
.map(|(_, local_name)| local_name.eq_ignore_ascii_case(actual))
.unwrap_or(false)
|| actual
.rsplit_once(':')
.map(|(_, local_name)| local_name.eq_ignore_ascii_case(candidate))
.unwrap_or(false)
}
fn statement_keys() -> [&'static str; 5] {
[
"income",
"balance",
"cash_flow",
"equity",
"comprehensive_income",
]
}
fn empty_surface_row_map() -> SurfaceRowMap {
statement_keys()
.into_iter()
.map(|key| (key.to_string(), Vec::new()))
.collect()
}
fn empty_detail_row_map() -> DetailRowStatementMap {
statement_keys()
.into_iter()
.map(|key| (key.to_string(), BTreeMap::new()))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pack_selector::FiscalPack;
use crate::{PeriodOutput, StatementRowOutput};
fn period(id: &str) -> PeriodOutput {
PeriodOutput {
id: id.to_string(),
filing_id: 1,
accession_number: "0000000000-00-000001".to_string(),
filing_date: "2025-12-31".to_string(),
period_start: Some("2025-01-01".to_string()),
period_end: Some("2025-12-31".to_string()),
filing_type: "10-K".to_string(),
period_label: id.to_string(),
}
}
fn row(key: &str, qname: &str, statement: &str, value: f64) -> StatementRowOutput {
row_with_values(
key,
qname,
statement,
BTreeMap::from([("p1".to_string(), Some(value))]),
)
}
fn row_with_values(
key: &str,
qname: &str,
statement: &str,
values: BTreeMap<String, Option<f64>>,
) -> StatementRowOutput {
StatementRowOutput {
key: key.to_string(),
label: key.to_string(),
concept_key: format!(
"http://fasb.org/us-gaap/2024#{}",
qname.split(':').nth(1).unwrap_or(key)
),
qname: qname.to_string(),
namespace_uri: "http://fasb.org/us-gaap/2024".to_string(),
local_name: qname.split(':').nth(1).unwrap_or(key).to_string(),
is_extension: false,
statement: statement.to_string(),
role_uri: Some(statement.to_string()),
order: 1,
depth: 0,
parent_key: None,
units: values
.keys()
.map(|period_id| (period_id.clone(), Some("iso4217:USD".to_string())))
.collect(),
values,
has_dimensions: false,
source_fact_ids: vec![1],
}
}
fn empty_map() -> StatementRowMap {
BTreeMap::from([
("income".to_string(), Vec::new()),
("balance".to_string(), Vec::new()),
("cash_flow".to_string(), Vec::new()),
("equity".to_string(), Vec::new()),
("comprehensive_income".to_string(), Vec::new()),
])
}
#[test]
fn prefers_direct_authoritative_row_over_child_aggregation() {
let mut rows = empty_map();
rows.get_mut("income").unwrap().extend([
row("op-expenses", "us-gaap:OperatingExpenses", "income", 40.0),
row(
"sga",
"us-gaap:SellingGeneralAndAdministrativeExpense",
"income",
25.0,
),
row(
"rd",
"us-gaap:ResearchAndDevelopmentExpense",
"income",
15.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Core,
vec![],
)
.expect("compact model should build");
let op_expenses = model
.surface_rows
.get("income")
.unwrap()
.iter()
.find(|row| row.key == "operating_expenses")
.unwrap();
assert_eq!(op_expenses.values.get("p1").copied().flatten(), Some(40.0));
assert_eq!(op_expenses.detail_count, Some(2));
}
#[test]
fn emits_unmapped_residual_rows() {
let mut rows = empty_map();
rows.get_mut("income")
.unwrap()
.push(row("custom", "company:CustomMetric", "income", 12.0));
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Core,
vec![],
)
.expect("compact model should build");
let residual_rows = model
.detail_rows
.get("income")
.unwrap()
.get("unmapped")
.unwrap();
assert_eq!(residual_rows.len(), 1);
assert!(residual_rows[0].residual_flag);
}
#[test]
fn flattens_balance_aliases_and_prunes_balance_unmapped_rows() {
let mut rows = empty_map();
rows.get_mut("balance").unwrap().extend([
row(
"receivable-primary",
"us-gaap:AccountsReceivableNetCurrent",
"balance",
25.0,
),
row(
"receivable-alias",
"us-gaap:ReceivablesNetCurrent",
"balance",
25.0,
),
row(
"custom-balance",
"company:CustomBalanceMetric",
"balance",
9.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Core,
vec![],
)
.expect("compact model should build");
let receivables = model
.surface_rows
.get("balance")
.unwrap()
.iter()
.find(|row| row.key == "accounts_receivable")
.unwrap();
assert_eq!(receivables.values.get("p1").copied().flatten(), Some(25.0));
assert_eq!(
receivables.source_row_keys,
vec![
"receivable-alias".to_string(),
"receivable-primary".to_string()
]
);
assert_eq!(
receivables
.resolved_source_row_keys
.get("p1")
.cloned()
.flatten(),
Some("receivable-alias".to_string())
);
let unmapped = model
.detail_rows
.get("balance")
.and_then(|groups| groups.get("unmapped"))
.cloned()
.unwrap_or_default();
assert_eq!(unmapped.len(), 1);
assert_eq!(unmapped[0].key, "custom-balance");
}
#[test]
fn merges_period_sparse_balance_aliases_into_one_row() {
let mut rows = empty_map();
rows.get_mut("balance").unwrap().extend([
row_with_values(
"receivable-p1",
"us-gaap:AccountsReceivableNetCurrent",
"balance",
BTreeMap::from([("p1".to_string(), Some(10.0)), ("p2".to_string(), None)]),
),
row_with_values(
"receivable-p2",
"us-gaap:ReceivablesNetCurrent",
"balance",
BTreeMap::from([("p1".to_string(), None), ("p2".to_string(), Some(18.0))]),
),
]);
let periods = vec![
period("p1"),
PeriodOutput {
id: "p2".to_string(),
filing_id: 2,
accession_number: "0000000000-00-000002".to_string(),
filing_date: "2026-12-31".to_string(),
period_start: Some("2026-01-01".to_string()),
period_end: Some("2026-12-31".to_string()),
filing_type: "10-K".to_string(),
period_label: "p2".to_string(),
},
];
let model =
build_compact_surface_model(&periods, &rows, "us-gaap", FiscalPack::Core, vec![])
.expect("compact model should build");
let receivables = model
.surface_rows
.get("balance")
.unwrap()
.iter()
.find(|row| row.key == "accounts_receivable")
.unwrap();
assert_eq!(receivables.values.get("p1").copied().flatten(), Some(10.0));
assert_eq!(receivables.values.get("p2").copied().flatten(), Some(18.0));
assert_eq!(
receivables
.resolved_source_row_keys
.get("p1")
.cloned()
.flatten(),
Some("receivable-p1".to_string())
);
assert_eq!(
receivables
.resolved_source_row_keys
.get("p2")
.cloned()
.flatten(),
Some("receivable-p2".to_string())
);
}
#[test]
fn derives_balance_formula_rows_and_hides_helper_surfaces() {
let mut rows = empty_map();
rows.get_mut("balance").unwrap().extend([
row(
"cash",
"us-gaap:CashAndCashEquivalentsAtCarryingValue",
"balance",
100.0,
),
row(
"marketable",
"us-gaap:ShortTermInvestments",
"balance",
40.0,
),
row("ap", "us-gaap:AccountsPayableCurrent", "balance", 30.0),
row(
"deferred-current",
"us-gaap:DeferredRevenueCurrent",
"balance",
15.0,
),
row(
"deferred-noncurrent",
"us-gaap:DeferredRevenueNoncurrent",
"balance",
5.0,
),
row("short-debt", "us-gaap:ShortTermBorrowings", "balance", 10.0),
row(
"long-debt",
"us-gaap:LongTermDebtNoncurrent",
"balance",
50.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Core,
vec![],
)
.expect("compact model should build");
let balance_rows = model.surface_rows.get("balance").unwrap();
let total_cash = balance_rows
.iter()
.find(|row| row.key == "total_cash_and_equivalents")
.unwrap();
let unearned_revenue = balance_rows
.iter()
.find(|row| row.key == "unearned_revenue")
.unwrap();
let total_debt = balance_rows
.iter()
.find(|row| row.key == "total_debt")
.unwrap();
let net_cash = balance_rows
.iter()
.find(|row| row.key == "net_cash_position")
.unwrap();
assert_eq!(total_cash.values.get("p1").copied().flatten(), Some(140.0));
assert_eq!(
unearned_revenue.values.get("p1").copied().flatten(),
Some(20.0)
);
assert_eq!(total_debt.values.get("p1").copied().flatten(), Some(60.0));
assert_eq!(net_cash.values.get("p1").copied().flatten(), Some(80.0));
assert!(balance_rows
.iter()
.all(|row| row.key != "deferred_revenue_current"
&& row.key != "deferred_revenue_noncurrent"));
assert!(model
.detail_rows
.get("balance")
.unwrap()
.get("deferred_revenue_current")
.is_none());
}
#[test]
fn merges_core_balance_rows_into_sector_packs() {
let mut rows = empty_map();
rows.get_mut("balance").unwrap().extend([
row(
"cash",
"us-gaap:CashAndCashEquivalentsAtCarryingValue",
"balance",
20.0,
),
row(
"loans",
"us-gaap:LoansReceivableNetReportedAmount",
"balance",
80.0,
),
row("deposits", "us-gaap:DepositsLiabilities", "balance", 70.0),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::BankLender,
vec![],
)
.expect("compact model should build");
let balance_rows = model.surface_rows.get("balance").unwrap();
let cash = balance_rows
.iter()
.find(|row| row.key == "cash_and_equivalents")
.unwrap();
let loans = balance_rows.iter().find(|row| row.key == "loans").unwrap();
let deposits = balance_rows
.iter()
.find(|row| row.key == "deposits")
.unwrap();
assert_eq!(cash.category, "current_assets");
assert_eq!(loans.category, "noncurrent_assets");
assert_eq!(deposits.category, "current_liabilities");
}
#[test]
fn maps_insurance_deferred_acquisition_costs_and_prunes_unmapped_rows() {
let mut rows = empty_map();
rows.get_mut("balance").unwrap().extend([
row(
"dac-voba",
"us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired",
"balance",
2106.0,
),
row(
"custom-balance",
"company:CustomInsuranceBalanceMetric",
"balance",
12.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Insurance,
vec![],
)
.expect("compact model should build");
let dac = model
.surface_rows
.get("balance")
.unwrap()
.iter()
.find(|row| row.key == "deferred_acquisition_costs")
.unwrap();
assert_eq!(dac.category, "noncurrent_assets");
assert_eq!(dac.values.get("p1").copied().flatten(), Some(2106.0));
assert_eq!(
dac.source_concepts,
vec!["us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired".to_string()]
);
assert_eq!(dac.source_row_keys, vec!["dac-voba".to_string()]);
let unmapped = model
.detail_rows
.get("balance")
.and_then(|groups| groups.get("unmapped"))
.cloned()
.unwrap_or_default();
assert_eq!(unmapped.len(), 1);
assert_eq!(unmapped[0].key, "custom-balance");
}
#[test]
fn derives_cash_flow_rows_applies_signs_and_hides_helper_surfaces() {
let mut rows = empty_map();
rows.get_mut("cash_flow").unwrap().extend([
row(
"cfo",
"us-gaap:NetCashProvidedByUsedInOperatingActivities",
"cash_flow",
120.0,
),
row(
"capex",
"us-gaap:PaymentsToAcquirePropertyPlantAndEquipment",
"cash_flow",
35.0,
),
row("debt-repaid", "us-gaap:RepaymentsOfDebt", "cash_flow", 14.0),
row(
"share-repurchase",
"us-gaap:PaymentsForRepurchaseOfCommonStock",
"cash_flow",
11.0,
),
row(
"contract-incurred",
"us-gaap:ContractWithCustomerLiabilityIncurred",
"cash_flow",
40.0,
),
row(
"contract-recognized",
"us-gaap:ContractWithCustomerLiabilityRevenueRecognized",
"cash_flow",
15.0,
),
row(
"other-current-assets",
"us-gaap:IncreaseDecreaseInOtherCurrentAssets",
"cash_flow",
6.0,
),
row(
"other-current-liabilities",
"us-gaap:IncreaseDecreaseInOtherCurrentLiabilities",
"cash_flow",
4.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Core,
vec![],
)
.expect("compact model should build");
let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap();
let capex = cash_flow_rows
.iter()
.find(|row| row.key == "capital_expenditures")
.unwrap();
let debt_repaid = cash_flow_rows
.iter()
.find(|row| row.key == "debt_repaid")
.unwrap();
let repurchases = cash_flow_rows
.iter()
.find(|row| row.key == "share_repurchases")
.unwrap();
let changes_unearned = cash_flow_rows
.iter()
.find(|row| row.key == "changes_unearned_revenue")
.unwrap();
let changes_other = cash_flow_rows
.iter()
.find(|row| row.key == "changes_other_operating_activities")
.unwrap();
let free_cash_flow = cash_flow_rows
.iter()
.find(|row| row.key == "free_cash_flow")
.unwrap();
assert_eq!(capex.values.get("p1").copied().flatten(), Some(-35.0));
assert_eq!(debt_repaid.values.get("p1").copied().flatten(), Some(-14.0));
assert_eq!(repurchases.values.get("p1").copied().flatten(), Some(-11.0));
assert_eq!(
changes_unearned.values.get("p1").copied().flatten(),
Some(25.0)
);
assert_eq!(
changes_other.values.get("p1").copied().flatten(),
Some(-10.0)
);
assert_eq!(
free_cash_flow.values.get("p1").copied().flatten(),
Some(85.0)
);
assert!(cash_flow_rows.iter().all(|row| {
row.key != "contract_liability_incurred"
&& row.key != "contract_liability_recognized"
&& row.key != "changes_other_current_assets"
&& row.key != "changes_other_current_liabilities"
}));
}
#[test]
fn prunes_consumed_cash_flow_rows_from_unmapped() {
let mut rows = empty_map();
rows.get_mut("cash_flow").unwrap().extend([
row(
"capex-primary",
"us-gaap:PaymentsToAcquirePropertyPlantAndEquipment",
"cash_flow",
20.0,
),
row(
"capex-alias",
"us-gaap:CapitalExpendituresIncurredButNotYetPaid",
"cash_flow",
20.0,
),
row(
"custom-cash-flow",
"company:CustomCashFlowMetric",
"cash_flow",
8.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Core,
vec![],
)
.expect("compact model should build");
let capex = model
.surface_rows
.get("cash_flow")
.unwrap()
.iter()
.find(|row| row.key == "capital_expenditures")
.unwrap();
assert_eq!(capex.values.get("p1").copied().flatten(), Some(-20.0));
let unmapped = model
.detail_rows
.get("cash_flow")
.and_then(|groups| groups.get("unmapped"))
.cloned()
.unwrap_or_default();
assert_eq!(unmapped.len(), 1);
assert_eq!(unmapped[0].key, "custom-cash-flow");
}
#[test]
fn merges_core_cash_flow_rows_into_sector_packs() {
let mut rows = empty_map();
rows.get_mut("cash_flow").unwrap().extend([
row(
"cfo",
"us-gaap:NetCashProvidedByUsedInOperatingActivities",
"cash_flow",
90.0,
),
row(
"capex",
"us-gaap:PaymentsToAcquirePropertyPlantAndEquipment",
"cash_flow",
25.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::Insurance,
vec![],
)
.expect("compact model should build");
let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap();
let cfo = cash_flow_rows
.iter()
.find(|row| row.key == "operating_cash_flow")
.unwrap();
let capex = cash_flow_rows
.iter()
.find(|row| row.key == "capital_expenditures")
.unwrap();
let free_cash_flow = cash_flow_rows
.iter()
.find(|row| row.key == "free_cash_flow")
.unwrap();
assert_eq!(cfo.category, "operating");
assert_eq!(capex.category, "investing");
assert_eq!(capex.values.get("p1").copied().flatten(), Some(-25.0));
assert_eq!(
free_cash_flow.values.get("p1").copied().flatten(),
Some(65.0)
);
}
#[test]
fn reit_cash_flow_override_keeps_capex_separate_from_acquisitions() {
let mut rows = empty_map();
rows.get_mut("cash_flow").unwrap().extend([
row(
"reit-capex",
"us-gaap:PaymentsToAcquireCommercialRealEstate",
"cash_flow",
300.0,
),
row(
"capital-improvements",
"us-gaap:PaymentsForCapitalImprovements",
"cash_flow",
20.0,
),
row(
"reit-acquisition",
"us-gaap:PaymentsToAcquireInterestInSubsidiariesAndAffiliates",
"cash_flow",
15.0,
),
]);
let model = build_compact_surface_model(
&[period("p1")],
&rows,
"us-gaap",
FiscalPack::ReitRealEstate,
vec![],
)
.expect("compact model should build");
let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap();
let capex = cash_flow_rows
.iter()
.find(|row| row.key == "capital_expenditures")
.unwrap();
let acquisitions = cash_flow_rows
.iter()
.find(|row| row.key == "acquisitions")
.unwrap();
assert_eq!(capex.values.get("p1").copied().flatten(), Some(-320.0));
assert_eq!(
acquisitions.values.get("p1").copied().flatten(),
Some(-15.0)
);
}
}