Integrate crabrl parser into taxonomy hydration

2026-03-16 15:18:01 -04:00
parent cf084793ed
commit a58b07456e
23 changed files with 4696 additions and 2466 deletions
--- a/rust/fiscal-xbrl-core/src/crabrl_adapter.rs
+++ b/rust/fiscal-xbrl-core/src/crabrl_adapter.rs
@@ -0,0 +1,231 @@
+use anyhow::{Context, Result};
+use crabrl::{Document, FactValue, Measure, Parser, Period, UnitType};
+use serde_json::json;
+
+use crate::{
+    is_xbrl_infrastructure_prefix, ContextOutput, DimensionOutput, ParsedFact, ParsedInstance,
+};
+
+pub(crate) fn parse_xbrl_instance(
+    raw: &str,
+    source_file: Option<String>,
+) -> Result<ParsedInstance> {
+    let document = Parser::new()
+        .parse_bytes(raw.as_bytes())
+        .context("crabrl failed to parse XBRL instance")?;
+
+    Ok(ParsedInstance {
+        contexts: build_contexts(&document),
+        facts: build_facts(&document, source_file),
+    })
+}
+
+fn build_contexts(document: &Document) -> Vec<ContextOutput> {
+    document
+        .contexts
+        .iter()
+        .map(|context| {
+            let (period_start, period_end, period_instant) = convert_period(&context.period);
+
+            ContextOutput {
+                context_id: context.id.to_string(),
+                entity_identifier: Some(context.entity.identifier.to_string()),
+                entity_scheme: Some(context.entity.scheme.to_string()),
+                period_start,
+                period_end,
+                period_instant,
+                segment_json: context.entity.segment.as_ref().map(segment_to_json),
+                scenario_json: context.scenario.as_ref().map(scenario_to_json),
+            }
+        })
+        .collect()
+}
+
+fn build_facts(document: &Document, source_file: Option<String>) -> Vec<ParsedFact> {
+    document
+        .facts
+        .concept_ids
+        .iter()
+        .enumerate()
+        .filter_map(|(index, concept_id)| {
+            let qname = document
+                .concept_names
+                .get(*concept_id as usize)?
+                .to_string();
+            let (prefix, local_name) = split_qname(&qname)?;
+            if is_xbrl_infrastructure_prefix(&prefix) {
+                return None;
+            }
+
+            let value = numeric_fact_value(document.facts.values.get(index)?)?;
+            let context = document
+                .contexts
+                .get(*document.facts.context_ids.get(index)? as usize)?;
+            let namespace_uri = document
+                .namespaces
+                .get(prefix.as_str())
+                .map(|value| value.to_string())
+                .unwrap_or_else(|| format!("urn:unknown:{prefix}"));
+            let (period_start, period_end, period_instant) = convert_period(&context.period);
+            let dimensions = context_dimensions(context);
+
+            Some(ParsedFact {
+                concept_key: format!("{namespace_uri}#{local_name}"),
+                qname,
+                namespace_uri,
+                local_name,
+                data_type: None,
+                context_id: context.id.to_string(),
+                unit: unit_for_fact(document, index),
+                decimals: document
+                    .facts
+                    .decimals
+                    .get(index)
+                    .and_then(|value| value.map(|entry| entry.to_string())),
+                precision: None,
+                nil: matches!(document.facts.values.get(index), Some(FactValue::Nil)),
+                value,
+                period_start,
+                period_end,
+                period_instant,
+                is_dimensionless: dimensions.is_empty(),
+                dimensions,
+                source_file: source_file.clone(),
+            })
+        })
+        .collect()
+}
+
+fn numeric_fact_value(value: &FactValue) -> Option<f64> {
+    match value {
+        FactValue::Decimal(value) => Some(*value),
+        FactValue::Integer(value) => Some(*value as f64),
+        _ => None,
+    }
+}
+
+fn split_qname(qname: &str) -> Option<(String, String)> {
+    let (prefix, local_name) = qname.split_once(':')?;
+    let prefix = prefix.trim().to_string();
+    let local_name = local_name.trim().to_string();
+    if prefix.is_empty() || local_name.is_empty() {
+        return None;
+    }
+
+    Some((prefix, local_name))
+}
+
+fn convert_period(period: &Period) -> (Option<String>, Option<String>, Option<String>) {
+    match period {
+        Period::Instant { date } => (None, None, Some(date.to_string())),
+        Period::Duration { start, end } => (Some(start.to_string()), Some(end.to_string()), None),
+        Period::Forever => (None, None, None),
+    }
+}
+
+fn context_dimensions(context: &crabrl::Context) -> Vec<DimensionOutput> {
+    let mut dimensions = Vec::new();
+
+    if let Some(segment) = context.entity.segment.as_ref() {
+        dimensions.extend(
+            segment
+                .explicit_members
+                .iter()
+                .map(|member| DimensionOutput {
+                    axis: member.dimension.to_string(),
+                    member: member.member.to_string(),
+                }),
+        );
+    }
+
+    if let Some(scenario) = context.scenario.as_ref() {
+        dimensions.extend(
+            scenario
+                .explicit_members
+                .iter()
+                .map(|member| DimensionOutput {
+                    axis: member.dimension.to_string(),
+                    member: member.member.to_string(),
+                }),
+        );
+    }
+
+    dimensions
+}
+
+fn unit_for_fact(document: &Document, fact_index: usize) -> Option<String> {
+    let unit_id = *document.facts.unit_ids.get(fact_index)?;
+    if unit_id == 0 {
+        return None;
+    }
+
+    document
+        .units
+        .get((unit_id - 1) as usize)
+        .map(|unit| unit_type_to_string(&unit.unit_type))
+}
+
+fn unit_type_to_string(unit_type: &UnitType) -> String {
+    match unit_type {
+        UnitType::Simple(measures) => join_measures(measures, "/"),
+        UnitType::Multiply(measures) => join_measures(measures, "*"),
+        UnitType::Divide {
+            numerator,
+            denominator,
+        } => format!(
+            "{}/{}",
+            join_measures(numerator, "*"),
+            join_measures(denominator, "*")
+        ),
+    }
+}
+
+fn join_measures(measures: &[Measure], separator: &str) -> String {
+    measures
+        .iter()
+        .map(measure_to_string)
+        .collect::<Vec<_>>()
+        .join(separator)
+}
+
+fn measure_to_string(measure: &Measure) -> String {
+    if measure.namespace.is_empty() {
+        measure.name.to_string()
+    } else {
+        format!("{}:{}", measure.namespace, measure.name)
+    }
+}
+
+fn segment_to_json(segment: &crabrl::Segment) -> serde_json::Value {
+    json!({
+        "explicitMembers": segment.explicit_members.iter().map(|member| {
+            json!({
+                "axis": member.dimension.to_string(),
+                "member": member.member.to_string(),
+            })
+        }).collect::<Vec<_>>(),
+        "typedMembers": segment.typed_members.iter().map(|member| {
+            json!({
+                "axis": member.dimension.to_string(),
+                "value": member.value.to_string(),
+            })
+        }).collect::<Vec<_>>(),
+    })
+}
+
+fn scenario_to_json(scenario: &crabrl::Scenario) -> serde_json::Value {
+    json!({
+        "explicitMembers": scenario.explicit_members.iter().map(|member| {
+            json!({
+                "axis": member.dimension.to_string(),
+                "member": member.member.to_string(),
+            })
+        }).collect::<Vec<_>>(),
+        "typedMembers": scenario.typed_members.iter().map(|member| {
+            json!({
+                "axis": member.dimension.to_string(),
+                "value": member.value.to_string(),
+            })
+        }).collect::<Vec<_>>(),
+    })
+}
--- a/rust/fiscal-xbrl-core/src/lib.rs
+++ b/rust/fiscal-xbrl-core/src/lib.rs
@@ -9,6 +9,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
 use std::sync::Mutex;
 use std::time::{Duration, Instant};

+mod crabrl_adapter;
 mod kpi_mapper;
 mod metrics;
 mod pack_selector;
@@ -54,44 +55,6 @@ where
    fetch_fn()
 }

-static CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?context>"#).unwrap()
-});
-static UNIT_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?unit>"#).unwrap()
-});
-static FACT_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>(.*?)</[a-zA-Z0-9_\-]+:[a-zA-Z0-9_\-.]+>"#).unwrap()
-});
-static EXPLICIT_MEMBER_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?explicitMember>"#).unwrap()
-});
-static TYPED_MEMBER_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?typedMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?typedMember>"#).unwrap()
-});
-static IDENTIFIER_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?identifier\b[^>]*\bscheme=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?identifier>"#).unwrap()
-});
-static SEGMENT_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?segment>"#)
-        .unwrap()
-});
-static SCENARIO_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?scenario>"#)
-        .unwrap()
-});
-static START_DATE_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?startDate>(.*?)</(?:[a-z0-9_\-]+:)?startDate>"#).unwrap()
-});
-static END_DATE_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?endDate>(.*?)</(?:[a-z0-9_\-]+:)?endDate>"#).unwrap()
-});
-static INSTANT_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?instant>(.*?)</(?:[a-z0-9_\-]+:)?instant>"#).unwrap()
-});
-static MEASURE_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?measure>(.*?)</(?:[a-z0-9_\-]+:)?measure>"#).unwrap()
-});
 static LABEL_LINK_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?labelLink>"#)
        .unwrap()
@@ -465,25 +428,7 @@ pub type SurfaceRowMap = BTreeMap<String, Vec<SurfaceRowOutput>>;
 pub type DetailRowStatementMap = BTreeMap<String, BTreeMap<String, Vec<DetailRowOutput>>>;

 #[derive(Debug, Clone)]
-struct ParsedContext {
-    id: String,
-    entity_identifier: Option<String>,
-    entity_scheme: Option<String>,
-    period_start: Option<String>,
-    period_end: Option<String>,
-    period_instant: Option<String>,
-    dimensions: Vec<DimensionOutput>,
-    segment: Option<serde_json::Value>,
-    scenario: Option<serde_json::Value>,
-}
-
-#[derive(Debug, Clone)]
-struct ParsedUnit {
-    measure: Option<String>,
-}
-
-#[derive(Debug, Clone)]
-struct ParsedFact {
+pub(crate) struct ParsedFact {
    concept_key: String,
    qname: String,
    namespace_uri: String,
@@ -593,7 +538,8 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
        );
    }

-    let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone()));
+    let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone()))
+        .context("parse failed for XBRL instance")?;

    let mut label_by_concept = HashMap::new();
    let mut presentation = Vec::new();
@@ -1144,114 +1090,13 @@ fn validate_xbrl_structure(xml: &str, source_file: Option<&str>) -> XbrlValidati
    }
 }

-struct ParsedInstance {
+pub(crate) struct ParsedInstance {
    contexts: Vec<ContextOutput>,
    facts: Vec<ParsedFact>,
 }

-fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> ParsedInstance {
-    let namespaces = parse_namespace_map(raw, "xbrl");
-    let context_by_id = parse_contexts(raw);
-    let unit_by_id = parse_units(raw);
-    let mut facts = Vec::new();
-
-    for captures in FACT_RE.captures_iter(raw) {
-        let prefix = captures
-            .get(1)
-            .map(|value| value.as_str().trim())
-            .unwrap_or_default();
-        let local_name = captures
-            .get(2)
-            .map(|value| value.as_str().trim())
-            .unwrap_or_default();
-        let attrs = captures
-            .get(3)
-            .map(|value| value.as_str())
-            .unwrap_or_default();
-        let body = decode_xml_entities(
-            captures
-                .get(4)
-                .map(|value| value.as_str())
-                .unwrap_or_default()
-                .trim(),
-        );
-
-        if prefix.is_empty() || local_name.is_empty() || is_xbrl_infrastructure_prefix(prefix) {
-            continue;
-        }
-
-        let attr_map = parse_attrs(attrs);
-        let Some(context_id) = attr_map
-            .get("contextRef")
-            .cloned()
-            .or_else(|| attr_map.get("contextref").cloned())
-        else {
-            continue;
-        };
-
-        let Some(value) = parse_number(&body) else {
-            continue;
-        };
-
-        let namespace_uri = namespaces
-            .get(prefix)
-            .cloned()
-            .unwrap_or_else(|| format!("urn:unknown:{prefix}"));
-        let context = context_by_id.get(&context_id);
-        let unit_ref = attr_map
-            .get("unitRef")
-            .cloned()
-            .or_else(|| attr_map.get("unitref").cloned());
-        let unit = unit_ref
-            .as_ref()
-            .and_then(|unit_ref| unit_by_id.get(unit_ref))
-            .and_then(|unit| unit.measure.clone())
-            .or(unit_ref);
-
-        facts.push(ParsedFact {
-            concept_key: format!("{namespace_uri}#{local_name}"),
-            qname: format!("{prefix}:{local_name}"),
-            namespace_uri,
-            local_name: local_name.to_string(),
-            data_type: None,
-            context_id: context_id.clone(),
-            unit,
-            decimals: attr_map.get("decimals").cloned(),
-            precision: attr_map.get("precision").cloned(),
-            nil: attr_map
-                .get("xsi:nil")
-                .or_else(|| attr_map.get("nil"))
-                .map(|value| value.eq_ignore_ascii_case("true"))
-                .unwrap_or(false),
-            value,
-            period_start: context.and_then(|value| value.period_start.clone()),
-            period_end: context.and_then(|value| value.period_end.clone()),
-            period_instant: context.and_then(|value| value.period_instant.clone()),
-            dimensions: context
-                .map(|value| value.dimensions.clone())
-                .unwrap_or_default(),
-            is_dimensionless: context
-                .map(|value| value.dimensions.is_empty())
-                .unwrap_or(true),
-            source_file: source_file.clone(),
-        });
-    }
-
-    let contexts = context_by_id
-        .values()
-        .map(|context| ContextOutput {
-            context_id: context.id.clone(),
-            entity_identifier: context.entity_identifier.clone(),
-            entity_scheme: context.entity_scheme.clone(),
-            period_start: context.period_start.clone(),
-            period_end: context.period_end.clone(),
-            period_instant: context.period_instant.clone(),
-            segment_json: context.segment.clone(),
-            scenario_json: context.scenario.clone(),
-        })
-        .collect::<Vec<_>>();
-
-    ParsedInstance { contexts, facts }
+fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> Result<ParsedInstance> {
+    crabrl_adapter::parse_xbrl_instance(raw, source_file)
 }

 fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap<String, String> {
@@ -1277,173 +1122,7 @@ fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap<String, String
    map
 }

-fn parse_contexts(raw: &str) -> HashMap<String, ParsedContext> {
-    let mut contexts = HashMap::new();
-
-    for captures in CONTEXT_RE.captures_iter(raw) {
-        let Some(context_id) = captures
-            .get(1)
-            .map(|value| value.as_str().trim().to_string())
-        else {
-            continue;
-        };
-        let block = captures
-            .get(2)
-            .map(|value| value.as_str())
-            .unwrap_or_default();
-        let (entity_identifier, entity_scheme) = IDENTIFIER_RE
-            .captures(block)
-            .map(|captures| {
-                (
-                    captures
-                        .get(2)
-                        .map(|value| decode_xml_entities(value.as_str().trim())),
-                    captures
-                        .get(1)
-                        .map(|value| decode_xml_entities(value.as_str().trim())),
-                )
-            })
-            .unwrap_or((None, None));
-
-        let period_start = START_DATE_RE
-            .captures(block)
-            .and_then(|captures| captures.get(1))
-            .map(|value| decode_xml_entities(value.as_str().trim()));
-        let period_end = END_DATE_RE
-            .captures(block)
-            .and_then(|captures| captures.get(1))
-            .map(|value| decode_xml_entities(value.as_str().trim()));
-        let period_instant = INSTANT_RE
-            .captures(block)
-            .and_then(|captures| captures.get(1))
-            .map(|value| decode_xml_entities(value.as_str().trim()));
-
-        let segment = SEGMENT_RE
-            .captures(block)
-            .and_then(|captures| captures.get(1))
-            .map(|value| parse_dimension_container(value.as_str()));
-        let scenario = SCENARIO_RE
-            .captures(block)
-            .and_then(|captures| captures.get(1))
-            .map(|value| parse_dimension_container(value.as_str()));
-
-        let mut dimensions = Vec::new();
-        if let Some(segment_value) = segment.as_ref() {
-            if let Some(members) = segment_value
-                .get("explicitMembers")
-                .and_then(|value| value.as_array())
-            {
-                for member in members {
-                    if let (Some(axis), Some(member_value)) = (
-                        member.get("axis").and_then(|value| value.as_str()),
-                        member.get("member").and_then(|value| value.as_str()),
-                    ) {
-                        dimensions.push(DimensionOutput {
-                            axis: axis.to_string(),
-                            member: member_value.to_string(),
-                        });
-                    }
-                }
-            }
-        }
-        if let Some(scenario_value) = scenario.as_ref() {
-            if let Some(members) = scenario_value
-                .get("explicitMembers")
-                .and_then(|value| value.as_array())
-            {
-                for member in members {
-                    if let (Some(axis), Some(member_value)) = (
-                        member.get("axis").and_then(|value| value.as_str()),
-                        member.get("member").and_then(|value| value.as_str()),
-                    ) {
-                        dimensions.push(DimensionOutput {
-                            axis: axis.to_string(),
-                            member: member_value.to_string(),
-                        });
-                    }
-                }
-            }
-        }
-
-        contexts.insert(
-            context_id.clone(),
-            ParsedContext {
-                id: context_id,
-                entity_identifier,
-                entity_scheme,
-                period_start,
-                period_end,
-                period_instant,
-                dimensions,
-                segment,
-                scenario,
-            },
-        );
-    }
-
-    contexts
-}
-
-fn parse_dimension_container(raw: &str) -> serde_json::Value {
-    let explicit_members = EXPLICIT_MEMBER_RE
-        .captures_iter(raw)
-        .filter_map(|captures| {
-            Some(serde_json::json!({
-                "axis": decode_xml_entities(captures.get(1)?.as_str().trim()),
-                "member": decode_xml_entities(captures.get(2)?.as_str().trim())
-            }))
-        })
-        .collect::<Vec<_>>();
-    let typed_members = TYPED_MEMBER_RE
-        .captures_iter(raw)
-        .filter_map(|captures| {
-            Some(serde_json::json!({
-                "axis": decode_xml_entities(captures.get(1)?.as_str().trim()),
-                "value": decode_xml_entities(captures.get(2)?.as_str().trim())
-            }))
-        })
-        .collect::<Vec<_>>();
-
-    serde_json::json!({
-        "explicitMembers": explicit_members,
-        "typedMembers": typed_members
-    })
-}
-
-fn parse_units(raw: &str) -> HashMap<String, ParsedUnit> {
-    let mut units = HashMap::new();
-    for captures in UNIT_RE.captures_iter(raw) {
-        let Some(id) = captures
-            .get(1)
-            .map(|value| value.as_str().trim().to_string())
-        else {
-            continue;
-        };
-        let block = captures
-            .get(2)
-            .map(|value| value.as_str())
-            .unwrap_or_default();
-        let measures = MEASURE_RE
-            .captures_iter(block)
-            .filter_map(|captures| captures.get(1))
-            .map(|value| decode_xml_entities(value.as_str().trim()))
-            .filter(|value| !value.is_empty())
-            .collect::<Vec<_>>();
-
-        let measure = if measures.len() == 1 {
-            measures.first().cloned()
-        } else if measures.len() > 1 {
-            Some(measures.join("/"))
-        } else {
-            None
-        };
-
-        units.insert(id, ParsedUnit { measure });
-    }
-    units
-}
-
-fn is_xbrl_infrastructure_prefix(prefix: &str) -> bool {
+pub(crate) fn is_xbrl_infrastructure_prefix(prefix: &str) -> bool {
    matches!(
        prefix.to_ascii_lowercase().as_str(),
        "xbrli" | "xlink" | "link" | "xbrldi" | "xbrldt"
@@ -1474,25 +1153,6 @@ fn decode_xml_entities(value: &str) -> String {
        .replace("&nbsp;", " ")
 }

-fn parse_number(raw: &str) -> Option<f64> {
-    let trimmed = raw.trim();
-    if trimmed.is_empty() || trimmed.chars().all(|char| char == '-') {
-        return None;
-    }
-    let negative = trimmed.starts_with('(') && trimmed.ends_with(')');
-    let normalized = Regex::new(r#"<[^>]+>"#)
-        .unwrap()
-        .replace_all(trimmed, " ")
-        .replace(',', "")
-        .replace('$', "")
-        .replace(['(', ')'], "")
-        .replace('\u{2212}', "-")
-        .split_whitespace()
-        .collect::<String>();
-    let parsed = normalized.parse::<f64>().ok()?;
-    Some(if negative { -parsed.abs() } else { parsed })
-}
-
 fn parse_label_linkbase(raw: &str) -> HashMap<String, String> {
    let namespaces = parse_namespace_map(raw, "linkbase");
    let mut preferred = HashMap::<String, (String, i64)>::new();
@@ -2543,7 +2203,8 @@ mod tests {
            </xbrli:xbrl>
        "#;

-        let parsed = parse_xbrl_instance(raw, Some("test.xml".to_string()));
+        let parsed = parse_xbrl_instance(raw, Some("test.xml".to_string()))
+            .expect("crabrl parser should parse test instance");
        assert_eq!(parsed.facts.len(), 1);
        assert_eq!(
            parsed.facts[0].qname,
--- a/rust/fiscal-xbrl-core/src/surface_mapper.rs
+++ b/rust/fiscal-xbrl-core/src/surface_mapper.rs
@@ -3,8 +3,8 @@ use std::collections::{BTreeMap, HashMap, HashSet};

 use crate::pack_selector::FiscalPack;
 use crate::taxonomy_loader::{
-    load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula,
-    SurfaceFormulaOp, SurfaceSignTransform,
+    load_crosswalk, load_income_bridge, load_surface_pack, CrosswalkFile, IncomeBridgeFile,
+    IncomeBridgeRow, SurfaceDefinition, SurfaceFormula, SurfaceFormulaOp, SurfaceSignTransform,
 };
 use crate::{
    ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput,
@@ -114,6 +114,7 @@ pub fn build_compact_surface_model(
 ) -> Result<CompactSurfaceModel> {
    let pack = load_surface_pack(fiscal_pack)?;
    let crosswalk = load_crosswalk(taxonomy_regime)?;
+    let income_bridge = load_income_bridge(fiscal_pack).ok();
    let mut surface_rows = empty_surface_row_map();
    let mut detail_rows = empty_detail_row_map();
    let mut concept_mappings = HashMap::<String, MappingAssignment>::new();
@@ -157,14 +158,20 @@ pub fn build_compact_surface_model(
                .filter(|matched| matched.match_role == MatchRole::Detail)
                .cloned()
                .collect::<Vec<_>>();
+            let bridge_detail_matches = collect_income_bridge_detail_matches(
+                definition,
+                &rows,
+                crosswalk.as_ref(),
+                income_bridge.as_ref(),
+            );

            let detail_matches = if definition.detail_grouping_policy == "group_all_children" {
-                if detail_component_matches.is_empty()
-                    && definition.rollup_policy == "aggregate_children"
-                {
+                let detail_matches =
+                    merge_detail_matches(&detail_component_matches, &bridge_detail_matches);
+                if detail_matches.is_empty() && definition.rollup_policy == "aggregate_children" {
                    Vec::new()
                } else {
-                    detail_component_matches.clone()
+                    detail_matches
                }
            } else {
                Vec::new()
@@ -758,28 +765,123 @@ fn match_statement_row<'a>(
    None
 }

+fn collect_income_bridge_detail_matches<'a>(
+    definition: &SurfaceDefinition,
+    rows: &'a [StatementRowOutput],
+    crosswalk: Option<&CrosswalkFile>,
+    income_bridge: Option<&IncomeBridgeFile>,
+) -> Vec<MatchedStatementRow<'a>> {
+    if definition.statement != "income"
+        || definition.rollup_policy != "aggregate_children"
+        || definition.detail_grouping_policy != "group_all_children"
+    {
+        return Vec::new();
+    }
+
+    let Some(bridge_row) =
+        income_bridge.and_then(|bridge| bridge.rows.get(&definition.surface_key))
+    else {
+        return Vec::new();
+    };
+
+    rows.iter()
+        .filter(|row| has_any_value(&row.values))
+        .filter_map(|row| match_income_bridge_detail_row(row, bridge_row, crosswalk))
+        .collect()
+}
+
+fn match_income_bridge_detail_row<'a>(
+    row: &'a StatementRowOutput,
+    bridge_row: &IncomeBridgeRow,
+    crosswalk: Option<&CrosswalkFile>,
+) -> Option<MatchedStatementRow<'a>> {
+    let authoritative_concept_key = crosswalk
+        .and_then(|crosswalk| crosswalk.mappings.get(&row.qname))
+        .map(|mapping| mapping.authoritative_concept_key.clone())
+        .or_else(|| {
+            if !row.is_extension {
+                Some(row.qname.clone())
+            } else {
+                None
+            }
+        });
+
+    let matches_group = bridge_row
+        .component_concept_groups
+        .positive
+        .iter()
+        .chain(bridge_row.component_concept_groups.negative.iter())
+        .any(|group| {
+            group.concepts.iter().any(|candidate| {
+                candidate_matches(candidate, &row.qname)
+                    || candidate_matches(candidate, &row.local_name)
+                    || authoritative_concept_key
+                        .as_ref()
+                        .map(|concept| candidate_matches(candidate, concept))
+                        .unwrap_or(false)
+            })
+        });
+
+    if !matches_group {
+        return None;
+    }
+
+    Some(MatchedStatementRow {
+        row,
+        authoritative_concept_key,
+        mapping_method: MappingMethod::AggregateChildren,
+        match_role: MatchRole::Detail,
+        rank: 2,
+    })
+}
+
+fn merge_detail_matches<'a>(
+    direct_matches: &[MatchedStatementRow<'a>],
+    bridge_matches: &[MatchedStatementRow<'a>],
+) -> Vec<MatchedStatementRow<'a>> {
+    let mut merged = HashMap::<String, MatchedStatementRow<'a>>::new();
+
+    for matched in direct_matches.iter().chain(bridge_matches.iter()) {
+        merged
+            .entry(matched.row.key.clone())
+            .and_modify(|existing| {
+                if compare_statement_matches(matched, existing).is_lt() {
+                    *existing = matched.clone();
+                }
+            })
+            .or_insert_with(|| matched.clone());
+    }
+
+    merged.into_values().collect()
+}
+
 fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> {
    matches
        .iter()
-        .min_by(|left, right| {
-            left.rank
-                .cmp(&right.rank)
-                .then_with(|| {
-                    let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
-                    let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
-                    left_dimension_rank.cmp(&right_dimension_rank)
-                })
-                .then_with(|| left.row.order.cmp(&right.row.order))
-                .then_with(|| {
-                    max_abs_value(&right.row.values)
-                        .partial_cmp(&max_abs_value(&left.row.values))
-                        .unwrap_or(std::cmp::Ordering::Equal)
-                })
-                .then_with(|| left.row.label.cmp(&right.row.label))
-        })
+        .min_by(|left, right| compare_statement_matches(left, right))
        .expect("pick_best_match requires at least one match")
 }

+fn compare_statement_matches(
+    left: &MatchedStatementRow<'_>,
+    right: &MatchedStatementRow<'_>,
+) -> std::cmp::Ordering {
+    left.rank
+        .cmp(&right.rank)
+        .then_with(|| {
+            let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
+            let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
+            left_dimension_rank.cmp(&right_dimension_rank)
+        })
+        .then_with(|| left.row.order.cmp(&right.row.order))
+        .then_with(|| {
+            max_abs_value(&right.row.values)
+                .partial_cmp(&max_abs_value(&left.row.values))
+                .unwrap_or(std::cmp::Ordering::Equal)
+        })
+        .then_with(|| left.row.label.cmp(&right.row.label))
+}
+
 fn build_surface_values(
    periods: &[PeriodOutput],
    matches: &[MatchedStatementRow<'_>],
--- a/rust/fiscal-xbrl-core/src/universal_income.rs
+++ b/rust/fiscal-xbrl-core/src/universal_income.rs
@@ -336,22 +336,26 @@ fn build_formula_row(
        .positive
        .iter()
        .filter_map(|surface_key| {
-            income_surface_rows
-                .iter()
-                .find(|row| row.key == *surface_key)
+            resolve_component_surface_source(
+                surface_key,
+                income_statement_rows,
+                income_surface_rows,
+                crosswalk,
+            )
        })
-        .map(surface_source)
        .collect::<Vec<_>>();
    let negative_surface_sources = bridge_row
        .component_surfaces
        .negative
        .iter()
        .filter_map(|surface_key| {
-            income_surface_rows
-                .iter()
-                .find(|row| row.key == *surface_key)
+            resolve_component_surface_source(
+                surface_key,
+                income_statement_rows,
+                income_surface_rows,
+                crosswalk,
+            )
        })
-        .map(surface_source)
        .collect::<Vec<_>>();

    let (positive_group_sources, positive_group_rows) = collect_group_sources(
@@ -810,6 +814,44 @@ fn collect_group_sources<'a>(
    (sources, rows)
 }

+fn resolve_component_surface_source(
+    surface_key: &str,
+    income_statement_rows: &[StatementRowOutput],
+    income_surface_rows: &[SurfaceRowOutput],
+    crosswalk: Option<&CrosswalkFile>,
+) -> Option<ValueSource> {
+    if let Some(surface_row) = income_surface_rows
+        .iter()
+        .find(|row| row.key == surface_key)
+    {
+        return Some(surface_source(surface_row));
+    }
+
+    let matches = income_statement_rows
+        .iter()
+        .filter(|row| has_any_value(&row.values))
+        .filter(|row| row_matches_surface_key(row, surface_key, crosswalk))
+        .map(statement_row_source)
+        .collect::<Vec<_>>();
+
+    if matches.is_empty() {
+        return None;
+    }
+
+    Some(merge_value_sources(&matches))
+}
+
+fn row_matches_surface_key(
+    row: &StatementRowOutput,
+    surface_key: &str,
+    crosswalk: Option<&CrosswalkFile>,
+) -> bool {
+    crosswalk
+        .and_then(|crosswalk| crosswalk.mappings.get(&row.qname))
+        .map(|mapping| mapping.surface_key.eq_ignore_ascii_case(surface_key))
+        .unwrap_or(false)
+}
+
 fn match_direct_authoritative<'a>(
    row: &'a StatementRowOutput,
    candidates: &[String],
@@ -1024,6 +1066,52 @@ fn surface_source(row: &SurfaceRowOutput) -> ValueSource {
    }
 }

+fn merge_value_sources(sources: &[ValueSource]) -> ValueSource {
+    let mut values = BTreeMap::<String, Option<f64>>::new();
+
+    for period_id in sources.iter().flat_map(|source| source.values.keys()) {
+        values.entry(period_id.clone()).or_insert_with(|| {
+            let period_values = sources
+                .iter()
+                .map(|source| source.values.get(period_id).copied().flatten())
+                .collect::<Vec<_>>();
+            if period_values.iter().all(|value| value.is_none()) {
+                None
+            } else {
+                Some(
+                    period_values
+                        .into_iter()
+                        .map(|value| value.unwrap_or(0.0))
+                        .sum(),
+                )
+            }
+        });
+    }
+
+    ValueSource {
+        values,
+        source_concepts: unique_sorted_strings(
+            sources
+                .iter()
+                .flat_map(|source| source.source_concepts.clone())
+                .collect(),
+        ),
+        source_row_keys: unique_sorted_strings(
+            sources
+                .iter()
+                .flat_map(|source| source.source_row_keys.clone())
+                .collect(),
+        ),
+        source_fact_ids: unique_sorted_i64(
+            sources
+                .iter()
+                .flat_map(|source| source.source_fact_ids.clone())
+                .collect(),
+        ),
+        has_dimensions: sources.iter().any(|source| source.has_dimensions),
+    }
+}
+
 fn fact_matches_period(fact: &FactOutput, period: &PeriodOutput) -> bool {
    if fact.period_end != period.period_end {
        return false;