Integrate crabrl parser into taxonomy hydration

2026-03-16 15:18:01 -04:00
parent cf084793ed
commit a58b07456e
23 changed files with 4696 additions and 2466 deletions
--- a/rust/fiscal-xbrl-core/src/surface_mapper.rs
+++ b/rust/fiscal-xbrl-core/src/surface_mapper.rs
@@ -3,8 +3,8 @@ use std::collections::{BTreeMap, HashMap, HashSet};

 use crate::pack_selector::FiscalPack;
 use crate::taxonomy_loader::{
-    load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula,
-    SurfaceFormulaOp, SurfaceSignTransform,
+    load_crosswalk, load_income_bridge, load_surface_pack, CrosswalkFile, IncomeBridgeFile,
+    IncomeBridgeRow, SurfaceDefinition, SurfaceFormula, SurfaceFormulaOp, SurfaceSignTransform,
 };
 use crate::{
    ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput,
@@ -114,6 +114,7 @@ pub fn build_compact_surface_model(
 ) -> Result<CompactSurfaceModel> {
    let pack = load_surface_pack(fiscal_pack)?;
    let crosswalk = load_crosswalk(taxonomy_regime)?;
+    let income_bridge = load_income_bridge(fiscal_pack).ok();
    let mut surface_rows = empty_surface_row_map();
    let mut detail_rows = empty_detail_row_map();
    let mut concept_mappings = HashMap::<String, MappingAssignment>::new();
@@ -157,14 +158,20 @@ pub fn build_compact_surface_model(
                .filter(|matched| matched.match_role == MatchRole::Detail)
                .cloned()
                .collect::<Vec<_>>();
+            let bridge_detail_matches = collect_income_bridge_detail_matches(
+                definition,
+                &rows,
+                crosswalk.as_ref(),
+                income_bridge.as_ref(),
+            );

            let detail_matches = if definition.detail_grouping_policy == "group_all_children" {
-                if detail_component_matches.is_empty()
-                    && definition.rollup_policy == "aggregate_children"
-                {
+                let detail_matches =
+                    merge_detail_matches(&detail_component_matches, &bridge_detail_matches);
+                if detail_matches.is_empty() && definition.rollup_policy == "aggregate_children" {
                    Vec::new()
                } else {
-                    detail_component_matches.clone()
+                    detail_matches
                }
            } else {
                Vec::new()
@@ -758,28 +765,123 @@ fn match_statement_row<'a>(
    None
 }

+fn collect_income_bridge_detail_matches<'a>(
+    definition: &SurfaceDefinition,
+    rows: &'a [StatementRowOutput],
+    crosswalk: Option<&CrosswalkFile>,
+    income_bridge: Option<&IncomeBridgeFile>,
+) -> Vec<MatchedStatementRow<'a>> {
+    if definition.statement != "income"
+        || definition.rollup_policy != "aggregate_children"
+        || definition.detail_grouping_policy != "group_all_children"
+    {
+        return Vec::new();
+    }
+
+    let Some(bridge_row) =
+        income_bridge.and_then(|bridge| bridge.rows.get(&definition.surface_key))
+    else {
+        return Vec::new();
+    };
+
+    rows.iter()
+        .filter(|row| has_any_value(&row.values))
+        .filter_map(|row| match_income_bridge_detail_row(row, bridge_row, crosswalk))
+        .collect()
+}
+
+fn match_income_bridge_detail_row<'a>(
+    row: &'a StatementRowOutput,
+    bridge_row: &IncomeBridgeRow,
+    crosswalk: Option<&CrosswalkFile>,
+) -> Option<MatchedStatementRow<'a>> {
+    let authoritative_concept_key = crosswalk
+        .and_then(|crosswalk| crosswalk.mappings.get(&row.qname))
+        .map(|mapping| mapping.authoritative_concept_key.clone())
+        .or_else(|| {
+            if !row.is_extension {
+                Some(row.qname.clone())
+            } else {
+                None
+            }
+        });
+
+    let matches_group = bridge_row
+        .component_concept_groups
+        .positive
+        .iter()
+        .chain(bridge_row.component_concept_groups.negative.iter())
+        .any(|group| {
+            group.concepts.iter().any(|candidate| {
+                candidate_matches(candidate, &row.qname)
+                    || candidate_matches(candidate, &row.local_name)
+                    || authoritative_concept_key
+                        .as_ref()
+                        .map(|concept| candidate_matches(candidate, concept))
+                        .unwrap_or(false)
+            })
+        });
+
+    if !matches_group {
+        return None;
+    }
+
+    Some(MatchedStatementRow {
+        row,
+        authoritative_concept_key,
+        mapping_method: MappingMethod::AggregateChildren,
+        match_role: MatchRole::Detail,
+        rank: 2,
+    })
+}
+
+fn merge_detail_matches<'a>(
+    direct_matches: &[MatchedStatementRow<'a>],
+    bridge_matches: &[MatchedStatementRow<'a>],
+) -> Vec<MatchedStatementRow<'a>> {
+    let mut merged = HashMap::<String, MatchedStatementRow<'a>>::new();
+
+    for matched in direct_matches.iter().chain(bridge_matches.iter()) {
+        merged
+            .entry(matched.row.key.clone())
+            .and_modify(|existing| {
+                if compare_statement_matches(matched, existing).is_lt() {
+                    *existing = matched.clone();
+                }
+            })
+            .or_insert_with(|| matched.clone());
+    }
+
+    merged.into_values().collect()
+}
+
 fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> {
    matches
        .iter()
-        .min_by(|left, right| {
-            left.rank
-                .cmp(&right.rank)
-                .then_with(|| {
-                    let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
-                    let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
-                    left_dimension_rank.cmp(&right_dimension_rank)
-                })
-                .then_with(|| left.row.order.cmp(&right.row.order))
-                .then_with(|| {
-                    max_abs_value(&right.row.values)
-                        .partial_cmp(&max_abs_value(&left.row.values))
-                        .unwrap_or(std::cmp::Ordering::Equal)
-                })
-                .then_with(|| left.row.label.cmp(&right.row.label))
-        })
+        .min_by(|left, right| compare_statement_matches(left, right))
        .expect("pick_best_match requires at least one match")
 }

+fn compare_statement_matches(
+    left: &MatchedStatementRow<'_>,
+    right: &MatchedStatementRow<'_>,
+) -> std::cmp::Ordering {
+    left.rank
+        .cmp(&right.rank)
+        .then_with(|| {
+            let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
+            let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
+            left_dimension_rank.cmp(&right_dimension_rank)
+        })
+        .then_with(|| left.row.order.cmp(&right.row.order))
+        .then_with(|| {
+            max_abs_value(&right.row.values)
+                .partial_cmp(&max_abs_value(&left.row.values))
+                .unwrap_or(std::cmp::Ordering::Equal)
+        })
+        .then_with(|| left.row.label.cmp(&right.row.label))
+}
+
 fn build_surface_values(
    periods: &[PeriodOutput],
    matches: &[MatchedStatementRow<'_>],