Integrate crabrl parser into taxonomy hydration

This commit is contained in:
2026-03-16 15:18:01 -04:00
parent cf084793ed
commit a58b07456e
23 changed files with 4696 additions and 2466 deletions

View File

@@ -3,8 +3,8 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use crate::pack_selector::FiscalPack;
use crate::taxonomy_loader::{
load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula,
SurfaceFormulaOp, SurfaceSignTransform,
load_crosswalk, load_income_bridge, load_surface_pack, CrosswalkFile, IncomeBridgeFile,
IncomeBridgeRow, SurfaceDefinition, SurfaceFormula, SurfaceFormulaOp, SurfaceSignTransform,
};
use crate::{
ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput,
@@ -114,6 +114,7 @@ pub fn build_compact_surface_model(
) -> Result<CompactSurfaceModel> {
let pack = load_surface_pack(fiscal_pack)?;
let crosswalk = load_crosswalk(taxonomy_regime)?;
let income_bridge = load_income_bridge(fiscal_pack).ok();
let mut surface_rows = empty_surface_row_map();
let mut detail_rows = empty_detail_row_map();
let mut concept_mappings = HashMap::<String, MappingAssignment>::new();
@@ -157,14 +158,20 @@ pub fn build_compact_surface_model(
.filter(|matched| matched.match_role == MatchRole::Detail)
.cloned()
.collect::<Vec<_>>();
let bridge_detail_matches = collect_income_bridge_detail_matches(
definition,
&rows,
crosswalk.as_ref(),
income_bridge.as_ref(),
);
let detail_matches = if definition.detail_grouping_policy == "group_all_children" {
if detail_component_matches.is_empty()
&& definition.rollup_policy == "aggregate_children"
{
let detail_matches =
merge_detail_matches(&detail_component_matches, &bridge_detail_matches);
if detail_matches.is_empty() && definition.rollup_policy == "aggregate_children" {
Vec::new()
} else {
detail_component_matches.clone()
detail_matches
}
} else {
Vec::new()
@@ -758,28 +765,123 @@ fn match_statement_row<'a>(
None
}
fn collect_income_bridge_detail_matches<'a>(
definition: &SurfaceDefinition,
rows: &'a [StatementRowOutput],
crosswalk: Option<&CrosswalkFile>,
income_bridge: Option<&IncomeBridgeFile>,
) -> Vec<MatchedStatementRow<'a>> {
if definition.statement != "income"
|| definition.rollup_policy != "aggregate_children"
|| definition.detail_grouping_policy != "group_all_children"
{
return Vec::new();
}
let Some(bridge_row) =
income_bridge.and_then(|bridge| bridge.rows.get(&definition.surface_key))
else {
return Vec::new();
};
rows.iter()
.filter(|row| has_any_value(&row.values))
.filter_map(|row| match_income_bridge_detail_row(row, bridge_row, crosswalk))
.collect()
}
fn match_income_bridge_detail_row<'a>(
row: &'a StatementRowOutput,
bridge_row: &IncomeBridgeRow,
crosswalk: Option<&CrosswalkFile>,
) -> Option<MatchedStatementRow<'a>> {
let authoritative_concept_key = crosswalk
.and_then(|crosswalk| crosswalk.mappings.get(&row.qname))
.map(|mapping| mapping.authoritative_concept_key.clone())
.or_else(|| {
if !row.is_extension {
Some(row.qname.clone())
} else {
None
}
});
let matches_group = bridge_row
.component_concept_groups
.positive
.iter()
.chain(bridge_row.component_concept_groups.negative.iter())
.any(|group| {
group.concepts.iter().any(|candidate| {
candidate_matches(candidate, &row.qname)
|| candidate_matches(candidate, &row.local_name)
|| authoritative_concept_key
.as_ref()
.map(|concept| candidate_matches(candidate, concept))
.unwrap_or(false)
})
});
if !matches_group {
return None;
}
Some(MatchedStatementRow {
row,
authoritative_concept_key,
mapping_method: MappingMethod::AggregateChildren,
match_role: MatchRole::Detail,
rank: 2,
})
}
fn merge_detail_matches<'a>(
direct_matches: &[MatchedStatementRow<'a>],
bridge_matches: &[MatchedStatementRow<'a>],
) -> Vec<MatchedStatementRow<'a>> {
let mut merged = HashMap::<String, MatchedStatementRow<'a>>::new();
for matched in direct_matches.iter().chain(bridge_matches.iter()) {
merged
.entry(matched.row.key.clone())
.and_modify(|existing| {
if compare_statement_matches(matched, existing).is_lt() {
*existing = matched.clone();
}
})
.or_insert_with(|| matched.clone());
}
merged.into_values().collect()
}
fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> {
matches
.iter()
.min_by(|left, right| {
left.rank
.cmp(&right.rank)
.then_with(|| {
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
left_dimension_rank.cmp(&right_dimension_rank)
})
.then_with(|| left.row.order.cmp(&right.row.order))
.then_with(|| {
max_abs_value(&right.row.values)
.partial_cmp(&max_abs_value(&left.row.values))
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| left.row.label.cmp(&right.row.label))
})
.min_by(|left, right| compare_statement_matches(left, right))
.expect("pick_best_match requires at least one match")
}
fn compare_statement_matches(
left: &MatchedStatementRow<'_>,
right: &MatchedStatementRow<'_>,
) -> std::cmp::Ordering {
left.rank
.cmp(&right.rank)
.then_with(|| {
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
left_dimension_rank.cmp(&right_dimension_rank)
})
.then_with(|| left.row.order.cmp(&right.row.order))
.then_with(|| {
max_abs_value(&right.row.values)
.partial_cmp(&max_abs_value(&left.row.values))
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| left.row.label.cmp(&right.row.label))
}
fn build_surface_values(
periods: &[PeriodOutput],
matches: &[MatchedStatementRow<'_>],