Integrate crabrl parser into taxonomy hydration
This commit is contained in:
@@ -3,8 +3,8 @@ use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
|
||||
use crate::pack_selector::FiscalPack;
|
||||
use crate::taxonomy_loader::{
|
||||
load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula,
|
||||
SurfaceFormulaOp, SurfaceSignTransform,
|
||||
load_crosswalk, load_income_bridge, load_surface_pack, CrosswalkFile, IncomeBridgeFile,
|
||||
IncomeBridgeRow, SurfaceDefinition, SurfaceFormula, SurfaceFormulaOp, SurfaceSignTransform,
|
||||
};
|
||||
use crate::{
|
||||
ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput,
|
||||
@@ -114,6 +114,7 @@ pub fn build_compact_surface_model(
|
||||
) -> Result<CompactSurfaceModel> {
|
||||
let pack = load_surface_pack(fiscal_pack)?;
|
||||
let crosswalk = load_crosswalk(taxonomy_regime)?;
|
||||
let income_bridge = load_income_bridge(fiscal_pack).ok();
|
||||
let mut surface_rows = empty_surface_row_map();
|
||||
let mut detail_rows = empty_detail_row_map();
|
||||
let mut concept_mappings = HashMap::<String, MappingAssignment>::new();
|
||||
@@ -157,14 +158,20 @@ pub fn build_compact_surface_model(
|
||||
.filter(|matched| matched.match_role == MatchRole::Detail)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
let bridge_detail_matches = collect_income_bridge_detail_matches(
|
||||
definition,
|
||||
&rows,
|
||||
crosswalk.as_ref(),
|
||||
income_bridge.as_ref(),
|
||||
);
|
||||
|
||||
let detail_matches = if definition.detail_grouping_policy == "group_all_children" {
|
||||
if detail_component_matches.is_empty()
|
||||
&& definition.rollup_policy == "aggregate_children"
|
||||
{
|
||||
let detail_matches =
|
||||
merge_detail_matches(&detail_component_matches, &bridge_detail_matches);
|
||||
if detail_matches.is_empty() && definition.rollup_policy == "aggregate_children" {
|
||||
Vec::new()
|
||||
} else {
|
||||
detail_component_matches.clone()
|
||||
detail_matches
|
||||
}
|
||||
} else {
|
||||
Vec::new()
|
||||
@@ -758,28 +765,123 @@ fn match_statement_row<'a>(
|
||||
None
|
||||
}
|
||||
|
||||
fn collect_income_bridge_detail_matches<'a>(
|
||||
definition: &SurfaceDefinition,
|
||||
rows: &'a [StatementRowOutput],
|
||||
crosswalk: Option<&CrosswalkFile>,
|
||||
income_bridge: Option<&IncomeBridgeFile>,
|
||||
) -> Vec<MatchedStatementRow<'a>> {
|
||||
if definition.statement != "income"
|
||||
|| definition.rollup_policy != "aggregate_children"
|
||||
|| definition.detail_grouping_policy != "group_all_children"
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let Some(bridge_row) =
|
||||
income_bridge.and_then(|bridge| bridge.rows.get(&definition.surface_key))
|
||||
else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
rows.iter()
|
||||
.filter(|row| has_any_value(&row.values))
|
||||
.filter_map(|row| match_income_bridge_detail_row(row, bridge_row, crosswalk))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn match_income_bridge_detail_row<'a>(
|
||||
row: &'a StatementRowOutput,
|
||||
bridge_row: &IncomeBridgeRow,
|
||||
crosswalk: Option<&CrosswalkFile>,
|
||||
) -> Option<MatchedStatementRow<'a>> {
|
||||
let authoritative_concept_key = crosswalk
|
||||
.and_then(|crosswalk| crosswalk.mappings.get(&row.qname))
|
||||
.map(|mapping| mapping.authoritative_concept_key.clone())
|
||||
.or_else(|| {
|
||||
if !row.is_extension {
|
||||
Some(row.qname.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let matches_group = bridge_row
|
||||
.component_concept_groups
|
||||
.positive
|
||||
.iter()
|
||||
.chain(bridge_row.component_concept_groups.negative.iter())
|
||||
.any(|group| {
|
||||
group.concepts.iter().any(|candidate| {
|
||||
candidate_matches(candidate, &row.qname)
|
||||
|| candidate_matches(candidate, &row.local_name)
|
||||
|| authoritative_concept_key
|
||||
.as_ref()
|
||||
.map(|concept| candidate_matches(candidate, concept))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
});
|
||||
|
||||
if !matches_group {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(MatchedStatementRow {
|
||||
row,
|
||||
authoritative_concept_key,
|
||||
mapping_method: MappingMethod::AggregateChildren,
|
||||
match_role: MatchRole::Detail,
|
||||
rank: 2,
|
||||
})
|
||||
}
|
||||
|
||||
fn merge_detail_matches<'a>(
|
||||
direct_matches: &[MatchedStatementRow<'a>],
|
||||
bridge_matches: &[MatchedStatementRow<'a>],
|
||||
) -> Vec<MatchedStatementRow<'a>> {
|
||||
let mut merged = HashMap::<String, MatchedStatementRow<'a>>::new();
|
||||
|
||||
for matched in direct_matches.iter().chain(bridge_matches.iter()) {
|
||||
merged
|
||||
.entry(matched.row.key.clone())
|
||||
.and_modify(|existing| {
|
||||
if compare_statement_matches(matched, existing).is_lt() {
|
||||
*existing = matched.clone();
|
||||
}
|
||||
})
|
||||
.or_insert_with(|| matched.clone());
|
||||
}
|
||||
|
||||
merged.into_values().collect()
|
||||
}
|
||||
|
||||
fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> {
|
||||
matches
|
||||
.iter()
|
||||
.min_by(|left, right| {
|
||||
left.rank
|
||||
.cmp(&right.rank)
|
||||
.then_with(|| {
|
||||
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
|
||||
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
|
||||
left_dimension_rank.cmp(&right_dimension_rank)
|
||||
})
|
||||
.then_with(|| left.row.order.cmp(&right.row.order))
|
||||
.then_with(|| {
|
||||
max_abs_value(&right.row.values)
|
||||
.partial_cmp(&max_abs_value(&left.row.values))
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
.then_with(|| left.row.label.cmp(&right.row.label))
|
||||
})
|
||||
.min_by(|left, right| compare_statement_matches(left, right))
|
||||
.expect("pick_best_match requires at least one match")
|
||||
}
|
||||
|
||||
fn compare_statement_matches(
|
||||
left: &MatchedStatementRow<'_>,
|
||||
right: &MatchedStatementRow<'_>,
|
||||
) -> std::cmp::Ordering {
|
||||
left.rank
|
||||
.cmp(&right.rank)
|
||||
.then_with(|| {
|
||||
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
|
||||
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
|
||||
left_dimension_rank.cmp(&right_dimension_rank)
|
||||
})
|
||||
.then_with(|| left.row.order.cmp(&right.row.order))
|
||||
.then_with(|| {
|
||||
max_abs_value(&right.row.values)
|
||||
.partial_cmp(&max_abs_value(&left.row.values))
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
.then_with(|| left.row.label.cmp(&right.row.label))
|
||||
}
|
||||
|
||||
fn build_surface_values(
|
||||
periods: &[PeriodOutput],
|
||||
matches: &[MatchedStatementRow<'_>],
|
||||
|
||||
Reference in New Issue
Block a user