feat(taxonomy): add rust sidecar compact surface pipeline
This commit is contained in:
700
rust/fiscal-xbrl-core/src/kpi_mapper.rs
Normal file
700
rust/fiscal-xbrl-core/src/kpi_mapper.rs
Normal file
@@ -0,0 +1,700 @@
|
||||
use anyhow::Result;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
|
||||
use crate::pack_selector::FiscalPack;
|
||||
use crate::surface_mapper::{MappingAssignment, MappingMethod};
|
||||
use crate::taxonomy_loader::{load_kpi_pack, KpiDefinition};
|
||||
use crate::{FactOutput, KpiRowOutput, PeriodOutput};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct KpiExtractionResult {
|
||||
pub rows: Vec<KpiRowOutput>,
|
||||
pub mapping_assignments: HashMap<String, MappingAssignment>,
|
||||
pub warnings: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn build_taxonomy_kpis(
|
||||
periods: &[PeriodOutput],
|
||||
facts: &[FactOutput],
|
||||
fiscal_pack: FiscalPack,
|
||||
) -> Result<KpiExtractionResult> {
|
||||
if fiscal_pack == FiscalPack::Core {
|
||||
return Ok(KpiExtractionResult::default());
|
||||
}
|
||||
|
||||
let kpi_pack = load_kpi_pack(fiscal_pack)?;
|
||||
let mut rows = Vec::<KpiRowOutput>::new();
|
||||
let mut mapping_assignments = HashMap::<String, MappingAssignment>::new();
|
||||
|
||||
for (index, definition) in kpi_pack.kpis.iter().enumerate() {
|
||||
let Some(kpi_row) = build_kpi_row(definition, index as i64, periods, facts) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for concept_key in unique_sorted_strings(kpi_row.source_concepts.iter().map(|qname| concept_key_from_qname(qname)).collect()) {
|
||||
mapping_assignments.insert(
|
||||
concept_key,
|
||||
MappingAssignment {
|
||||
authoritative_concept_key: None,
|
||||
mapping_method: Some(MappingMethod::TaxonomyKpi),
|
||||
surface_key: None,
|
||||
detail_parent_surface_key: None,
|
||||
kpi_key: Some(kpi_row.key.clone()),
|
||||
residual_flag: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
rows.push(kpi_row);
|
||||
}
|
||||
|
||||
rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label)));
|
||||
|
||||
Ok(KpiExtractionResult {
|
||||
rows,
|
||||
mapping_assignments,
|
||||
warnings: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
fn build_kpi_row(
|
||||
definition: &KpiDefinition,
|
||||
order_index: i64,
|
||||
periods: &[PeriodOutput],
|
||||
facts: &[FactOutput],
|
||||
) -> Option<KpiRowOutput> {
|
||||
match definition.key.as_str() {
|
||||
"loan_growth" => growth_kpi_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&[
|
||||
"FinancingReceivableRecordedInvestment",
|
||||
"LoansReceivableNetReportedAmount",
|
||||
"FinancingReceivableExcludingAccruedInterestBeforeAllowanceForCreditLoss",
|
||||
"FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss",
|
||||
"FinanceReceivableAllowanceForCreditLossesExcluded",
|
||||
],
|
||||
),
|
||||
"deposit_growth" => growth_kpi_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&["DepositsLiabilities", "Deposits", "DepositsDomestic", "DepositsForeign"],
|
||||
),
|
||||
"premium_growth" => growth_kpi_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&[
|
||||
"Premiums",
|
||||
"PremiumsEarned",
|
||||
"PremiumsWritten",
|
||||
"PremiumsEarnedNet",
|
||||
"PremiumsWrittenNet",
|
||||
"SupplementaryInsuranceInformationPremiumRevenue",
|
||||
],
|
||||
),
|
||||
"net_interest_margin" => direct_or_formula_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&["NetInterestMargin", "NetInterestSpread"],
|
||||
Some((
|
||||
&[
|
||||
"InterestAndDividendIncomeOperating",
|
||||
"InterestIncomeExpenseOperatingNet",
|
||||
"InterestIncomeExpenseNet",
|
||||
],
|
||||
&["Assets", "AverageInterestEarningAssets"],
|
||||
true,
|
||||
)),
|
||||
),
|
||||
"combined_ratio" => direct_or_formula_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&["CombinedRatio"],
|
||||
Some((
|
||||
&[
|
||||
"PolicyholderBenefitsAndClaimsIncurredNet",
|
||||
"BenefitsLossesAndExpenses",
|
||||
"LossesAndLossAdjustmentExpenses",
|
||||
"SupplementaryInsuranceInformationBenefitsClaimsLossesAndSettlementExpense",
|
||||
],
|
||||
&[
|
||||
"Premiums",
|
||||
"PremiumsEarned",
|
||||
"PremiumsWritten",
|
||||
"PremiumsEarnedNet",
|
||||
"PremiumsWrittenNet",
|
||||
"SupplementaryInsuranceInformationPremiumRevenue",
|
||||
],
|
||||
true,
|
||||
)),
|
||||
),
|
||||
"property_count" => direct_or_formula_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&["NumberOfRealEstateProperties", "SECScheduleIIIRealEstateNumberOfUnits"],
|
||||
None,
|
||||
),
|
||||
"investment_property_growth" => growth_kpi_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&[
|
||||
"RealEstateInvestmentPropertyNet",
|
||||
"RealEstateInvestmentPropertyAtCost",
|
||||
"RealEstateGrossAtCarryingValue",
|
||||
],
|
||||
),
|
||||
"aum" => direct_or_formula_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&["AssetsUnderManagementCarryingAmount"],
|
||||
None,
|
||||
),
|
||||
"fee_paying_aum" => direct_or_formula_row(
|
||||
definition,
|
||||
order_index,
|
||||
periods,
|
||||
facts,
|
||||
&["FeePayingAssetUnderManagement"],
|
||||
None,
|
||||
),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn growth_kpi_row(
|
||||
definition: &KpiDefinition,
|
||||
order_index: i64,
|
||||
periods: &[PeriodOutput],
|
||||
facts: &[FactOutput],
|
||||
local_names: &[&str],
|
||||
) -> Option<KpiRowOutput> {
|
||||
let matched = collect_period_values(periods, facts, local_names);
|
||||
if matched.values.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let sorted_periods = sort_periods(periods);
|
||||
let mut values = BTreeMap::<String, Option<f64>>::new();
|
||||
for window in sorted_periods.windows(2) {
|
||||
let previous = window.first()?;
|
||||
let current = window.get(1)?;
|
||||
let current_value = matched.values.get(¤t.id).copied().flatten();
|
||||
let previous_value = matched.values.get(&previous.id).copied().flatten();
|
||||
let growth = match (current_value, previous_value) {
|
||||
(Some(current_value), Some(previous_value)) if previous_value != 0.0 => {
|
||||
Some(current_value / previous_value - 1.0)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
values.insert(current.id.clone(), growth);
|
||||
}
|
||||
|
||||
build_kpi_output(definition, order_index, "operating_kpi", values, matched)
|
||||
}
|
||||
|
||||
fn direct_or_formula_row(
|
||||
definition: &KpiDefinition,
|
||||
order_index: i64,
|
||||
periods: &[PeriodOutput],
|
||||
facts: &[FactOutput],
|
||||
direct_local_names: &[&str],
|
||||
formula: Option<(&[&str], &[&str], bool)>,
|
||||
) -> Option<KpiRowOutput> {
|
||||
let direct = collect_period_values(periods, facts, direct_local_names);
|
||||
if !direct.values.is_empty() {
|
||||
return build_kpi_output(
|
||||
definition,
|
||||
order_index,
|
||||
"operating_kpi",
|
||||
direct.values.clone(),
|
||||
direct,
|
||||
);
|
||||
}
|
||||
|
||||
let direct_by_end_date = collect_end_date_values(facts, direct_local_names);
|
||||
if !direct_by_end_date.values.is_empty() {
|
||||
return build_date_aligned_kpi_output(
|
||||
definition,
|
||||
order_index,
|
||||
"operating_kpi",
|
||||
periods,
|
||||
direct_by_end_date,
|
||||
);
|
||||
}
|
||||
|
||||
let Some((numerator_names, denominator_names, divide)) = formula else {
|
||||
return None;
|
||||
};
|
||||
let numerator = collect_period_values(periods, facts, numerator_names);
|
||||
let denominator = collect_period_values(periods, facts, denominator_names);
|
||||
let mut values = BTreeMap::<String, Option<f64>>::new();
|
||||
let mut sources = PeriodFactValues::default();
|
||||
|
||||
for period in periods {
|
||||
let numerator_value = numerator.values.get(&period.id).copied().flatten();
|
||||
let denominator_value = denominator.values.get(&period.id).copied().flatten();
|
||||
let next_value = if divide {
|
||||
match (numerator_value, denominator_value) {
|
||||
(Some(numerator_value), Some(denominator_value)) if denominator_value != 0.0 => {
|
||||
Some(numerator_value / denominator_value)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
values.insert(period.id.clone(), next_value);
|
||||
|
||||
for qname in numerator.source_concepts.iter().chain(denominator.source_concepts.iter()) {
|
||||
sources.source_concepts.insert(qname.clone());
|
||||
}
|
||||
for fact_id in numerator.source_fact_ids.iter().chain(denominator.source_fact_ids.iter()) {
|
||||
sources.source_fact_ids.insert(*fact_id);
|
||||
}
|
||||
sources.has_dimensions = sources.has_dimensions || numerator.has_dimensions || denominator.has_dimensions;
|
||||
}
|
||||
|
||||
if values.values().any(|value| value.is_some()) {
|
||||
return Some(KpiRowOutput {
|
||||
key: definition.key.clone(),
|
||||
label: definition.label.clone(),
|
||||
category: "operating_kpi".to_string(),
|
||||
unit: definition.unit.clone(),
|
||||
order: (order_index + 1) * 10,
|
||||
segment: None,
|
||||
axis: None,
|
||||
member: None,
|
||||
values,
|
||||
source_concepts: unique_sorted_strings(sources.source_concepts.into_iter().collect()),
|
||||
source_fact_ids: unique_sorted_i64(sources.source_fact_ids.into_iter().collect()),
|
||||
provenance_type: "taxonomy".to_string(),
|
||||
has_dimensions: sources.has_dimensions,
|
||||
});
|
||||
}
|
||||
|
||||
let numerator_by_end_date = collect_end_date_values(facts, numerator_names);
|
||||
let denominator_by_end_date = collect_end_date_values(facts, denominator_names);
|
||||
let mut aligned_values = BTreeMap::<String, Option<f64>>::new();
|
||||
|
||||
for end_date in numerator_by_end_date.values.keys() {
|
||||
let numerator_value = numerator_by_end_date.values.get(end_date).copied().flatten();
|
||||
let denominator_value = denominator_by_end_date.values.get(end_date).copied().flatten();
|
||||
let next_value = if divide {
|
||||
match (numerator_value, denominator_value) {
|
||||
(Some(numerator_value), Some(denominator_value)) if denominator_value != 0.0 => {
|
||||
Some(numerator_value / denominator_value)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let Some(period_id) = select_period_id_for_end_date(periods, end_date, true) else {
|
||||
continue;
|
||||
};
|
||||
aligned_values.insert(period_id, next_value);
|
||||
}
|
||||
|
||||
if aligned_values.values().all(|value| value.is_none()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut aligned_sources = DateFactValues {
|
||||
prefer_duration: true,
|
||||
..DateFactValues::default()
|
||||
};
|
||||
aligned_sources.source_concepts.extend(numerator_by_end_date.source_concepts);
|
||||
aligned_sources.source_concepts.extend(denominator_by_end_date.source_concepts);
|
||||
aligned_sources.source_fact_ids.extend(numerator_by_end_date.source_fact_ids);
|
||||
aligned_sources.source_fact_ids.extend(denominator_by_end_date.source_fact_ids);
|
||||
aligned_sources.has_dimensions = numerator_by_end_date.has_dimensions || denominator_by_end_date.has_dimensions;
|
||||
|
||||
Some(KpiRowOutput {
|
||||
key: definition.key.clone(),
|
||||
label: definition.label.clone(),
|
||||
category: "operating_kpi".to_string(),
|
||||
unit: definition.unit.clone(),
|
||||
order: (order_index + 1) * 10,
|
||||
segment: None,
|
||||
axis: None,
|
||||
member: None,
|
||||
values: aligned_values,
|
||||
source_concepts: unique_sorted_strings(aligned_sources.source_concepts.into_iter().collect()),
|
||||
source_fact_ids: unique_sorted_i64(aligned_sources.source_fact_ids.into_iter().collect()),
|
||||
provenance_type: "taxonomy".to_string(),
|
||||
has_dimensions: aligned_sources.has_dimensions,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct PeriodFactValues {
|
||||
values: BTreeMap<String, Option<f64>>,
|
||||
source_concepts: HashSet<String>,
|
||||
source_fact_ids: HashSet<i64>,
|
||||
has_dimensions: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct DateFactValues {
|
||||
values: BTreeMap<String, Option<f64>>,
|
||||
source_concepts: HashSet<String>,
|
||||
source_fact_ids: HashSet<i64>,
|
||||
has_dimensions: bool,
|
||||
prefer_duration: bool,
|
||||
}
|
||||
|
||||
fn collect_period_values(
|
||||
periods: &[PeriodOutput],
|
||||
facts: &[FactOutput],
|
||||
local_names: &[&str],
|
||||
) -> PeriodFactValues {
|
||||
let mut values = PeriodFactValues::default();
|
||||
let targets = local_names
|
||||
.iter()
|
||||
.map(|name| name.to_ascii_lowercase())
|
||||
.collect::<HashSet<_>>();
|
||||
let mut fact_ids_by_period = HashMap::<String, Vec<(i64, &FactOutput)>>::new();
|
||||
|
||||
for (index, fact) in facts.iter().enumerate() {
|
||||
if !targets.contains(&fact.local_name.to_ascii_lowercase()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(period_id) = period_id_for_fact(periods, fact) else {
|
||||
continue;
|
||||
};
|
||||
fact_ids_by_period
|
||||
.entry(period_id)
|
||||
.or_default()
|
||||
.push((index as i64 + 1, fact));
|
||||
}
|
||||
|
||||
for period in periods {
|
||||
let Some(grouped_facts) = fact_ids_by_period.get(&period.id) else {
|
||||
continue;
|
||||
};
|
||||
let Some((fact_id, fact)) = pick_preferred_fact(grouped_facts) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
values.values.insert(period.id.clone(), Some(fact.value_num));
|
||||
values.source_concepts.insert(fact.qname.clone());
|
||||
values.source_fact_ids.insert(*fact_id);
|
||||
values.has_dimensions = values.has_dimensions || !fact.is_dimensionless;
|
||||
}
|
||||
|
||||
values
|
||||
}
|
||||
|
||||
fn collect_end_date_values(
|
||||
facts: &[FactOutput],
|
||||
local_names: &[&str],
|
||||
) -> DateFactValues {
|
||||
let mut values = DateFactValues::default();
|
||||
let targets = local_names
|
||||
.iter()
|
||||
.map(|name| name.to_ascii_lowercase())
|
||||
.collect::<HashSet<_>>();
|
||||
let mut fact_ids_by_end_date = HashMap::<String, Vec<(i64, &FactOutput)>>::new();
|
||||
|
||||
for (index, fact) in facts.iter().enumerate() {
|
||||
if !targets.contains(&fact.local_name.to_ascii_lowercase()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(end_date) = fact.period_end.clone().or_else(|| fact.period_instant.clone()) else {
|
||||
continue;
|
||||
};
|
||||
fact_ids_by_end_date
|
||||
.entry(end_date)
|
||||
.or_default()
|
||||
.push((index as i64 + 1, fact));
|
||||
}
|
||||
|
||||
for (end_date, grouped_facts) in fact_ids_by_end_date {
|
||||
let Some((fact_id, fact)) = pick_preferred_fact(&grouped_facts) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
values.values.insert(end_date, Some(fact.value_num));
|
||||
values.source_concepts.insert(fact.qname.clone());
|
||||
values.source_fact_ids.insert(*fact_id);
|
||||
values.has_dimensions = values.has_dimensions || !fact.is_dimensionless;
|
||||
values.prefer_duration = values.prefer_duration || fact.period_start.is_some();
|
||||
}
|
||||
|
||||
values
|
||||
}
|
||||
|
||||
fn period_id_for_fact(periods: &[PeriodOutput], fact: &FactOutput) -> Option<String> {
|
||||
let fact_period_end = fact.period_end.clone().or_else(|| fact.period_instant.clone());
|
||||
|
||||
periods
|
||||
.iter()
|
||||
.find(|period| {
|
||||
period.period_start == fact.period_start
|
||||
&& period.period_end == fact_period_end
|
||||
})
|
||||
.map(|period| period.id.clone())
|
||||
}
|
||||
|
||||
fn pick_preferred_fact<'a>(grouped_facts: &'a [(i64, &'a FactOutput)]) -> Option<&'a (i64, &'a FactOutput)> {
|
||||
grouped_facts.iter().max_by(|left, right| {
|
||||
let left_dimension_score = if left.1.is_dimensionless { 1 } else { 0 };
|
||||
let right_dimension_score = if right.1.is_dimensionless { 1 } else { 0 };
|
||||
left_dimension_score
|
||||
.cmp(&right_dimension_score)
|
||||
.then_with(|| {
|
||||
left.1
|
||||
.value_num
|
||||
.abs()
|
||||
.partial_cmp(&right.1.value_num.abs())
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn select_period_id_for_end_date(
|
||||
periods: &[PeriodOutput],
|
||||
end_date: &str,
|
||||
prefer_duration: bool,
|
||||
) -> Option<String> {
|
||||
periods
|
||||
.iter()
|
||||
.filter(|period| period.period_end.as_deref() == Some(end_date))
|
||||
.max_by(|left, right| {
|
||||
let left_score = if prefer_duration {
|
||||
if left.period_start.is_some() { 1 } else { 0 }
|
||||
} else if left.period_start.is_none() {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let right_score = if prefer_duration {
|
||||
if right.period_start.is_some() { 1 } else { 0 }
|
||||
} else if right.period_start.is_none() {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
left_score
|
||||
.cmp(&right_score)
|
||||
.then_with(|| left.id.cmp(&right.id))
|
||||
})
|
||||
.map(|period| period.id.clone())
|
||||
}
|
||||
|
||||
fn sort_periods(periods: &[PeriodOutput]) -> Vec<&PeriodOutput> {
|
||||
let mut periods = periods.iter().collect::<Vec<_>>();
|
||||
periods.sort_by(|left, right| {
|
||||
let left_key = left.period_end.clone().unwrap_or_else(|| left.filing_date.clone());
|
||||
let right_key = right.period_end.clone().unwrap_or_else(|| right.filing_date.clone());
|
||||
left_key.cmp(&right_key).then_with(|| left.id.cmp(&right.id))
|
||||
});
|
||||
periods
|
||||
}
|
||||
|
||||
fn build_kpi_output(
|
||||
definition: &KpiDefinition,
|
||||
order_index: i64,
|
||||
category: &str,
|
||||
values: BTreeMap<String, Option<f64>>,
|
||||
matched: PeriodFactValues,
|
||||
) -> Option<KpiRowOutput> {
|
||||
if values.values().all(|value| value.is_none()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(KpiRowOutput {
|
||||
key: definition.key.clone(),
|
||||
label: definition.label.clone(),
|
||||
category: category.to_string(),
|
||||
unit: definition.unit.clone(),
|
||||
order: (order_index + 1) * 10,
|
||||
segment: None,
|
||||
axis: None,
|
||||
member: None,
|
||||
values,
|
||||
source_concepts: unique_sorted_strings(matched.source_concepts.into_iter().collect()),
|
||||
source_fact_ids: unique_sorted_i64(matched.source_fact_ids.into_iter().collect()),
|
||||
provenance_type: "taxonomy".to_string(),
|
||||
has_dimensions: matched.has_dimensions,
|
||||
})
|
||||
}
|
||||
|
||||
fn build_date_aligned_kpi_output(
|
||||
definition: &KpiDefinition,
|
||||
order_index: i64,
|
||||
category: &str,
|
||||
periods: &[PeriodOutput],
|
||||
matched: DateFactValues,
|
||||
) -> Option<KpiRowOutput> {
|
||||
let mut values = BTreeMap::<String, Option<f64>>::new();
|
||||
|
||||
for (end_date, value) in &matched.values {
|
||||
let Some(period_id) = select_period_id_for_end_date(periods, end_date, matched.prefer_duration) else {
|
||||
continue;
|
||||
};
|
||||
values.insert(period_id, *value);
|
||||
}
|
||||
|
||||
if values.values().all(|value| value.is_none()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(KpiRowOutput {
|
||||
key: definition.key.clone(),
|
||||
label: definition.label.clone(),
|
||||
category: category.to_string(),
|
||||
unit: definition.unit.clone(),
|
||||
order: (order_index + 1) * 10,
|
||||
segment: None,
|
||||
axis: None,
|
||||
member: None,
|
||||
values,
|
||||
source_concepts: unique_sorted_strings(matched.source_concepts.into_iter().collect()),
|
||||
source_fact_ids: unique_sorted_i64(matched.source_fact_ids.into_iter().collect()),
|
||||
provenance_type: "taxonomy".to_string(),
|
||||
has_dimensions: matched.has_dimensions,
|
||||
})
|
||||
}
|
||||
|
||||
fn concept_key_from_qname(qname: &str) -> String {
|
||||
if let Some((prefix, local_name)) = qname.split_once(':') {
|
||||
let namespace_uri = if prefix.eq_ignore_ascii_case("us-gaap") {
|
||||
"http://fasb.org/us-gaap/2024".to_string()
|
||||
} else if prefix.eq_ignore_ascii_case("ifrs-full") {
|
||||
"http://xbrl.ifrs.org/taxonomy/2024-03-27/ifrs-full".to_string()
|
||||
} else {
|
||||
format!("urn:{prefix}")
|
||||
};
|
||||
return format!("{namespace_uri}#{local_name}");
|
||||
}
|
||||
|
||||
qname.to_string()
|
||||
}
|
||||
|
||||
fn unique_sorted_strings(values: Vec<String>) -> Vec<String> {
|
||||
let mut values = values.into_iter().collect::<HashSet<_>>().into_iter().collect::<Vec<_>>();
|
||||
values.sort();
|
||||
values
|
||||
}
|
||||
|
||||
fn unique_sorted_i64(values: Vec<i64>) -> Vec<i64> {
|
||||
let mut values = values.into_iter().collect::<HashSet<_>>().into_iter().collect::<Vec<_>>();
|
||||
values.sort();
|
||||
values
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::pack_selector::FiscalPack;
|
||||
use crate::{FactOutput, PeriodOutput};
|
||||
|
||||
fn period(id: &str, end: &str, start: Option<&str>) -> PeriodOutput {
|
||||
PeriodOutput {
|
||||
id: id.to_string(),
|
||||
filing_id: 1,
|
||||
accession_number: "0000000000-00-000001".to_string(),
|
||||
filing_date: end.to_string(),
|
||||
period_start: start.map(|value| value.to_string()),
|
||||
period_end: Some(end.to_string()),
|
||||
filing_type: "10-Q".to_string(),
|
||||
period_label: id.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn fact(local_name: &str, period_start: Option<&str>, period_end: &str, value: f64) -> FactOutput {
|
||||
FactOutput {
|
||||
concept_key: format!("http://fasb.org/us-gaap/2024#{local_name}"),
|
||||
qname: format!("us-gaap:{local_name}"),
|
||||
namespace_uri: "http://fasb.org/us-gaap/2024".to_string(),
|
||||
local_name: local_name.to_string(),
|
||||
data_type: None,
|
||||
statement_kind: Some("balance".to_string()),
|
||||
role_uri: Some("balance".to_string()),
|
||||
authoritative_concept_key: None,
|
||||
mapping_method: None,
|
||||
surface_key: None,
|
||||
detail_parent_surface_key: None,
|
||||
kpi_key: None,
|
||||
residual_flag: false,
|
||||
context_id: "c1".to_string(),
|
||||
unit: Some("iso4217:USD".to_string()),
|
||||
decimals: None,
|
||||
precision: None,
|
||||
nil: false,
|
||||
value_num: value,
|
||||
period_start: period_start.map(|value| value.to_string()),
|
||||
period_end: Some(period_end.to_string()),
|
||||
period_instant: None,
|
||||
dimensions: vec![],
|
||||
is_dimensionless: true,
|
||||
source_file: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emits_taxonomy_growth_kpis_for_bank_pack() {
|
||||
let periods = vec![
|
||||
period("prev", "2024-12-31", None),
|
||||
period("curr", "2025-12-31", None),
|
||||
];
|
||||
let facts = vec![
|
||||
fact("FinancingReceivableRecordedInvestment", None, "2024-12-31", 100.0),
|
||||
fact("FinancingReceivableRecordedInvestment", None, "2025-12-31", 120.0),
|
||||
fact("DepositsLiabilities", None, "2024-12-31", 200.0),
|
||||
fact("DepositsLiabilities", None, "2025-12-31", 250.0),
|
||||
];
|
||||
|
||||
let result = build_taxonomy_kpis(&periods, &facts, FiscalPack::BankLender)
|
||||
.expect("taxonomy kpis should build");
|
||||
assert!(result.rows.iter().all(|row| row.provenance_type == "taxonomy"));
|
||||
assert!(result.rows.iter().any(|row| row.key == "loan_growth"));
|
||||
assert!(result.rows.iter().any(|row| row.key == "deposit_growth"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emits_net_interest_margin_when_duration_and_instant_periods_share_end_date() {
|
||||
let periods = vec![
|
||||
period("dur-prev", "2024-12-31", Some("2024-01-01")),
|
||||
period("inst-prev", "2024-12-31", None),
|
||||
period("dur-curr", "2025-12-31", Some("2025-01-01")),
|
||||
period("inst-curr", "2025-12-31", None),
|
||||
];
|
||||
let facts = vec![
|
||||
fact("InterestIncomeExpenseNet", Some("2024-01-01"), "2024-12-31", 90.0),
|
||||
fact("InterestIncomeExpenseNet", Some("2025-01-01"), "2025-12-31", 100.0),
|
||||
fact("Assets", None, "2024-12-31", 1000.0),
|
||||
fact("Assets", None, "2025-12-31", 1200.0),
|
||||
];
|
||||
|
||||
let result = build_taxonomy_kpis(&periods, &facts, FiscalPack::BankLender)
|
||||
.expect("taxonomy kpis should build");
|
||||
let net_interest_margin = result
|
||||
.rows
|
||||
.iter()
|
||||
.find(|row| row.key == "net_interest_margin")
|
||||
.expect("net interest margin should be present");
|
||||
|
||||
assert_eq!(net_interest_margin.values.get("dur-prev").copied().flatten(), Some(0.09));
|
||||
assert_eq!(net_interest_margin.values.get("dur-curr").copied().flatten(), Some(100.0 / 1200.0));
|
||||
}
|
||||
}
|
||||
2069
rust/fiscal-xbrl-core/src/lib.rs
Normal file
2069
rust/fiscal-xbrl-core/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
90
rust/fiscal-xbrl-core/src/metrics.rs
Normal file
90
rust/fiscal-xbrl-core/src/metrics.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
use crate::{FactOutput, FilingMetrics};
|
||||
|
||||
pub fn derive_metrics(facts: &[FactOutput]) -> FilingMetrics {
|
||||
fn pick_best(facts: &[&FactOutput]) -> Option<f64> {
|
||||
facts
|
||||
.iter()
|
||||
.max_by(|left, right| {
|
||||
let left_dimension_score = if left.is_dimensionless { 1 } else { 0 };
|
||||
let right_dimension_score = if right.is_dimensionless { 1 } else { 0 };
|
||||
left_dimension_score
|
||||
.cmp(&right_dimension_score)
|
||||
.then_with(|| {
|
||||
let left_date = left
|
||||
.period_end
|
||||
.as_ref()
|
||||
.or(left.period_instant.as_ref())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
let right_date = right
|
||||
.period_end
|
||||
.as_ref()
|
||||
.or(right.period_instant.as_ref())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
left_date.cmp(&right_date)
|
||||
})
|
||||
.then_with(|| {
|
||||
left.value_num
|
||||
.abs()
|
||||
.partial_cmp(&right.value_num.abs())
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
})
|
||||
.map(|fact| fact.value_num)
|
||||
}
|
||||
|
||||
fn by_local_names<'a>(facts: &'a [FactOutput], names: &[&str]) -> Vec<&'a FactOutput> {
|
||||
let targets = names.iter().map(|name| name.to_ascii_lowercase()).collect::<Vec<_>>();
|
||||
facts
|
||||
.iter()
|
||||
.filter(|fact| targets.iter().any(|target| fact.local_name.eq_ignore_ascii_case(target)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
let revenue = pick_best(&by_local_names(
|
||||
facts,
|
||||
&[
|
||||
"Revenues",
|
||||
"SalesRevenueNet",
|
||||
"RevenueFromContractWithCustomerExcludingAssessedTax",
|
||||
"TotalRevenuesAndOtherIncome",
|
||||
],
|
||||
));
|
||||
let net_income = pick_best(&by_local_names(facts, &["NetIncomeLoss", "ProfitLoss"]));
|
||||
let total_assets = pick_best(&by_local_names(facts, &["Assets"]));
|
||||
let cash = pick_best(&by_local_names(
|
||||
facts,
|
||||
&[
|
||||
"CashAndCashEquivalentsAtCarryingValue",
|
||||
"CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
|
||||
],
|
||||
));
|
||||
let direct_debt = pick_best(&by_local_names(
|
||||
facts,
|
||||
&[
|
||||
"DebtAndFinanceLeaseLiabilities",
|
||||
"Debt",
|
||||
"LongTermDebtAndCapitalLeaseObligations",
|
||||
],
|
||||
));
|
||||
let current_debt = pick_best(&by_local_names(
|
||||
facts,
|
||||
&["DebtCurrent", "ShortTermBorrowings", "LongTermDebtCurrent"],
|
||||
));
|
||||
let long_term_debt = pick_best(&by_local_names(
|
||||
facts,
|
||||
&["LongTermDebtNoncurrent", "LongTermDebt", "DebtNoncurrent"],
|
||||
));
|
||||
|
||||
FilingMetrics {
|
||||
revenue,
|
||||
net_income,
|
||||
total_assets,
|
||||
cash,
|
||||
debt: direct_debt.or_else(|| match (current_debt, long_term_debt) {
|
||||
(Some(left), Some(right)) => Some(left + right),
|
||||
_ => None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
361
rust/fiscal-xbrl-core/src/pack_selector.rs
Normal file
361
rust/fiscal-xbrl-core/src/pack_selector.rs
Normal file
@@ -0,0 +1,361 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use crate::{FactOutput, StatementRowMap};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum FiscalPack {
|
||||
Core,
|
||||
BankLender,
|
||||
Insurance,
|
||||
ReitRealEstate,
|
||||
BrokerAssetManager,
|
||||
}
|
||||
|
||||
impl FiscalPack {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
FiscalPack::Core => "core",
|
||||
FiscalPack::BankLender => "bank_lender",
|
||||
FiscalPack::Insurance => "insurance",
|
||||
FiscalPack::ReitRealEstate => "reit_real_estate",
|
||||
FiscalPack::BrokerAssetManager => "broker_asset_manager",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PackSelection {
|
||||
pub pack: FiscalPack,
|
||||
pub warnings: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn select_fiscal_pack(statement_rows: &StatementRowMap, facts: &[FactOutput]) -> PackSelection {
|
||||
let concept_names = collect_concept_names(statement_rows, facts);
|
||||
let role_tokens = collect_role_tokens(statement_rows, facts);
|
||||
|
||||
let bank_score = score_bank_lender(&concept_names, &role_tokens);
|
||||
let insurance_score = score_insurance(&concept_names, &role_tokens);
|
||||
let reit_score = score_reit_real_estate(&concept_names, &role_tokens);
|
||||
let broker_score = score_broker_asset_manager(&concept_names, &role_tokens);
|
||||
let mut scored_packs = vec![
|
||||
(FiscalPack::BankLender, bank_score),
|
||||
(FiscalPack::Insurance, insurance_score),
|
||||
(FiscalPack::ReitRealEstate, reit_score),
|
||||
(FiscalPack::BrokerAssetManager, broker_score),
|
||||
];
|
||||
scored_packs.sort_by(|left, right| right.1.cmp(&left.1));
|
||||
|
||||
let (top_pack, top_score) = scored_packs[0];
|
||||
let second_score = scored_packs.get(1).map(|(_, score)| *score).unwrap_or_default();
|
||||
let margin = top_score - second_score;
|
||||
let selected_pack = if top_score >= 10 && margin >= 4 {
|
||||
top_pack
|
||||
} else {
|
||||
FiscalPack::Core
|
||||
};
|
||||
|
||||
let mut warnings = Vec::new();
|
||||
if selected_pack == FiscalPack::Core && top_score > 0 {
|
||||
warnings.push("fiscal_pack_defaulted_to_core".to_string());
|
||||
}
|
||||
|
||||
PackSelection {
|
||||
pack: selected_pack,
|
||||
warnings,
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_concept_names(statement_rows: &StatementRowMap, facts: &[FactOutput]) -> HashSet<String> {
|
||||
let mut names = HashSet::new();
|
||||
|
||||
for rows in statement_rows.values() {
|
||||
for row in rows {
|
||||
names.insert(row.local_name.to_ascii_lowercase());
|
||||
names.insert(row.qname.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
|
||||
for fact in facts {
|
||||
names.insert(fact.local_name.to_ascii_lowercase());
|
||||
names.insert(fact.qname.to_ascii_lowercase());
|
||||
}
|
||||
|
||||
names
|
||||
}
|
||||
|
||||
fn collect_role_tokens(statement_rows: &StatementRowMap, facts: &[FactOutput]) -> HashSet<String> {
|
||||
let mut roles = HashSet::new();
|
||||
|
||||
for rows in statement_rows.values() {
|
||||
for row in rows {
|
||||
if let Some(role_uri) = &row.role_uri {
|
||||
roles.insert(role_uri.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for fact in facts {
|
||||
if let Some(role_uri) = &fact.role_uri {
|
||||
roles.insert(role_uri.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
|
||||
roles
|
||||
}
|
||||
|
||||
fn score_bank_lender(concepts: &HashSet<String>, roles: &HashSet<String>) -> i64 {
|
||||
let mut score = 0;
|
||||
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"depositsliabilities",
|
||||
"us-gaap:depositsliabilities",
|
||||
"deposits",
|
||||
],
|
||||
8,
|
||||
);
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"financingreceivablerecordedinvestment",
|
||||
"us-gaap:financingreceivablerecordedinvestment",
|
||||
"loansreceivablenetreportedamount",
|
||||
"us-gaap:loansreceivablenetreportedamount",
|
||||
],
|
||||
8,
|
||||
);
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"allowanceforcreditlosses",
|
||||
"allowanceforloanlosses",
|
||||
"provisionforcreditlosses",
|
||||
"loanlossprovision",
|
||||
"netinterestincome",
|
||||
"interestexpense",
|
||||
"interestanddividendincomeoperating",
|
||||
],
|
||||
4,
|
||||
);
|
||||
score += weighted_role_match(roles, &["deposit", "loan", "credit", "netinterest"], 2);
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
fn score_insurance(concepts: &HashSet<String>, roles: &HashSet<String>) -> i64 {
|
||||
let mut score = 0;
|
||||
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"premiums",
|
||||
"premiumswritten",
|
||||
"premiumsearned",
|
||||
"premiumswrittennet",
|
||||
"premiumsearnednet",
|
||||
"us-gaap:premiums",
|
||||
],
|
||||
8,
|
||||
);
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"policyholderbenefitsandclaimsincurrednet",
|
||||
"futurepolicybenefits",
|
||||
"liabilityforfuturepolicybenefits",
|
||||
"liabilityforunpaidlossesandlossadjustmentexpenses",
|
||||
"liabilityforunpaidclaimsandclaimsadjustmentexpense",
|
||||
"liabilityforfuturepolicybenefits",
|
||||
"deferredpolicyacquisitioncosts",
|
||||
"deferredpolicyacquisitioncostsamortizationexpense",
|
||||
"netinvestmentincome",
|
||||
"underwritingincomeloss",
|
||||
"unearnedpremiumsnet",
|
||||
],
|
||||
6,
|
||||
);
|
||||
score += weighted_role_match(roles, &["insurance", "premium", "policy", "claims"], 2);
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
fn score_reit_real_estate(concepts: &HashSet<String>, roles: &HashSet<String>) -> i64 {
|
||||
let mut score = 0;
|
||||
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"leaseincome",
|
||||
"realestateinvestmentpropertynet",
|
||||
"realestategrossatcarryingvalue",
|
||||
"realestateinvestmentpropertyatcost",
|
||||
],
|
||||
8,
|
||||
);
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"numberofrealestateproperties",
|
||||
"directcostsofleasedandrentedpropertyorequipment",
|
||||
"depreciationdepletionandamortization",
|
||||
"realestateaccumulateddepreciation",
|
||||
"paymentstoacquirecommercialrealestate",
|
||||
],
|
||||
6,
|
||||
);
|
||||
score += weighted_role_match(roles, &["realestate", "property", "lease", "rental"], 2);
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
fn score_broker_asset_manager(concepts: &HashSet<String>, roles: &HashSet<String>) -> i64 {
|
||||
let mut score = 0;
|
||||
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"assetsundermanagementcarryingamount",
|
||||
"feepayingassetundermanagement",
|
||||
],
|
||||
8,
|
||||
);
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"performancefeerevenuerecognized",
|
||||
"subadvisoryandother",
|
||||
"sponsorfees",
|
||||
],
|
||||
6,
|
||||
);
|
||||
score += weighted_match(
|
||||
concepts,
|
||||
&[
|
||||
"totalsalesassetandaccountexpense",
|
||||
"estimatedannualfixedminimumfeesforcurrentlyoutstandingcontracts",
|
||||
"reductioninthevalueofmanagementcontract",
|
||||
],
|
||||
6,
|
||||
);
|
||||
score += weighted_role_match(roles, &["advis", "management", "asset", "distribution"], 2);
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
fn weighted_match(concepts: &HashSet<String>, candidates: &[&str], weight: i64) -> i64 {
|
||||
if candidates
|
||||
.iter()
|
||||
.any(|candidate| concepts.contains(&candidate.to_ascii_lowercase()))
|
||||
{
|
||||
weight
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
fn weighted_role_match(roles: &HashSet<String>, candidates: &[&str], weight: i64) -> i64 {
|
||||
if roles.iter().any(|role| candidates.iter().any(|candidate| role.contains(candidate))) {
|
||||
weight
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{StatementRowOutput, StatementRowMap};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
fn row(local_name: &str, statement: &str) -> StatementRowOutput {
|
||||
StatementRowOutput {
|
||||
key: local_name.to_string(),
|
||||
label: local_name.to_string(),
|
||||
concept_key: format!("http://fasb.org/us-gaap/2024#{local_name}"),
|
||||
qname: format!("us-gaap:{local_name}"),
|
||||
namespace_uri: "http://fasb.org/us-gaap/2024".to_string(),
|
||||
local_name: local_name.to_string(),
|
||||
is_extension: false,
|
||||
statement: statement.to_string(),
|
||||
role_uri: Some(statement.to_string()),
|
||||
order: 1,
|
||||
depth: 0,
|
||||
parent_key: None,
|
||||
values: BTreeMap::from([("p1".to_string(), Some(1.0))]),
|
||||
units: BTreeMap::from([("p1".to_string(), Some("iso4217:USD".to_string()))]),
|
||||
has_dimensions: false,
|
||||
source_fact_ids: vec![1],
|
||||
}
|
||||
}
|
||||
|
||||
fn empty_map() -> StatementRowMap {
|
||||
BTreeMap::from([
|
||||
("income".to_string(), Vec::new()),
|
||||
("balance".to_string(), Vec::new()),
|
||||
("cash_flow".to_string(), Vec::new()),
|
||||
("equity".to_string(), Vec::new()),
|
||||
("comprehensive_income".to_string(), Vec::new()),
|
||||
])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chooses_bank_lender_from_loan_and_deposit_signatures() {
|
||||
let mut rows = empty_map();
|
||||
rows.get_mut("balance").unwrap().extend([
|
||||
row("DepositsLiabilities", "balance"),
|
||||
row("FinancingReceivableRecordedInvestment", "balance"),
|
||||
row("AllowanceForCreditLosses", "balance"),
|
||||
]);
|
||||
|
||||
let selection = select_fiscal_pack(&rows, &[]);
|
||||
assert_eq!(selection.pack, FiscalPack::BankLender);
|
||||
assert!(selection.warnings.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chooses_insurance_from_premium_and_claim_signatures() {
|
||||
let mut rows = empty_map();
|
||||
rows.get_mut("income").unwrap().extend([
|
||||
row("Premiums", "income"),
|
||||
row("PolicyholderBenefitsAndClaimsIncurredNet", "income"),
|
||||
]);
|
||||
rows.get_mut("balance").unwrap().push(row("FuturePolicyBenefits", "balance"));
|
||||
|
||||
let selection = select_fiscal_pack(&rows, &[]);
|
||||
assert_eq!(selection.pack, FiscalPack::Insurance);
|
||||
assert!(selection.warnings.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn defaults_to_core_on_low_confidence() {
|
||||
let mut rows = empty_map();
|
||||
rows.get_mut("income").unwrap().push(row("InterestExpense", "income"));
|
||||
|
||||
let selection = select_fiscal_pack(&rows, &[]);
|
||||
assert_eq!(selection.pack, FiscalPack::Core);
|
||||
assert_eq!(selection.warnings, vec!["fiscal_pack_defaulted_to_core"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chooses_reit_from_property_and_lease_signatures() {
|
||||
let mut rows = empty_map();
|
||||
rows.get_mut("income").unwrap().push(row("LeaseIncome", "income"));
|
||||
rows.get_mut("balance").unwrap().push(row("RealEstateInvestmentPropertyNet", "balance"));
|
||||
rows.get_mut("balance").unwrap().push(row("NumberOfRealEstateProperties", "balance"));
|
||||
|
||||
let selection = select_fiscal_pack(&rows, &[]);
|
||||
assert_eq!(selection.pack, FiscalPack::ReitRealEstate);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chooses_broker_asset_manager_from_aum_and_fee_signatures() {
|
||||
let mut rows = empty_map();
|
||||
rows.get_mut("income").unwrap().push(row("PerformanceFeeRevenueRecognized", "income"));
|
||||
rows.get_mut("balance").unwrap().push(row("AssetsUnderManagementCarryingAmount", "balance"));
|
||||
rows.get_mut("balance").unwrap().push(row("FeePayingAssetUnderManagement", "balance"));
|
||||
|
||||
let selection = select_fiscal_pack(&rows, &[]);
|
||||
assert_eq!(selection.pack, FiscalPack::BrokerAssetManager);
|
||||
}
|
||||
}
|
||||
667
rust/fiscal-xbrl-core/src/surface_mapper.rs
Normal file
667
rust/fiscal-xbrl-core/src/surface_mapper.rs
Normal file
@@ -0,0 +1,667 @@
|
||||
use anyhow::Result;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
|
||||
use crate::pack_selector::FiscalPack;
|
||||
use crate::taxonomy_loader::{load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition};
|
||||
use crate::{
|
||||
ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput,
|
||||
PeriodOutput, StatementRowMap, StatementRowOutput, SurfaceRowMap, SurfaceRowOutput,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum MappingMethod {
|
||||
AuthoritativeDirect,
|
||||
DirectSourceConcept,
|
||||
AggregateChildren,
|
||||
TaxonomyKpi,
|
||||
UnmappedResidual,
|
||||
}
|
||||
|
||||
impl MappingMethod {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
MappingMethod::AuthoritativeDirect => "authoritative_direct",
|
||||
MappingMethod::DirectSourceConcept => "direct_source_concept",
|
||||
MappingMethod::AggregateChildren => "aggregate_children",
|
||||
MappingMethod::TaxonomyKpi => "taxonomy_kpi",
|
||||
MappingMethod::UnmappedResidual => "unmapped_residual",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MappingAssignment {
|
||||
pub authoritative_concept_key: Option<String>,
|
||||
pub mapping_method: Option<MappingMethod>,
|
||||
pub surface_key: Option<String>,
|
||||
pub detail_parent_surface_key: Option<String>,
|
||||
pub kpi_key: Option<String>,
|
||||
pub residual_flag: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct CompactSurfaceModel {
|
||||
pub surface_rows: SurfaceRowMap,
|
||||
pub detail_rows: DetailRowStatementMap,
|
||||
pub normalization_summary: NormalizationSummaryOutput,
|
||||
pub concept_mappings: HashMap<String, MappingAssignment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum MatchRole {
|
||||
Surface,
|
||||
Detail,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct MatchedStatementRow<'a> {
|
||||
row: &'a StatementRowOutput,
|
||||
authoritative_concept_key: Option<String>,
|
||||
mapping_method: MappingMethod,
|
||||
match_role: MatchRole,
|
||||
rank: i64,
|
||||
}
|
||||
|
||||
pub fn build_compact_surface_model(
|
||||
periods: &[PeriodOutput],
|
||||
statement_rows: &StatementRowMap,
|
||||
taxonomy_regime: &str,
|
||||
fiscal_pack: FiscalPack,
|
||||
warnings: Vec<String>,
|
||||
) -> Result<CompactSurfaceModel> {
|
||||
let pack = load_surface_pack(fiscal_pack)?;
|
||||
let crosswalk = load_crosswalk(taxonomy_regime)?;
|
||||
let mut surface_rows = empty_surface_row_map();
|
||||
let mut detail_rows = empty_detail_row_map();
|
||||
let mut concept_mappings = HashMap::<String, MappingAssignment>::new();
|
||||
let mut surface_row_count = 0usize;
|
||||
let mut detail_row_count = 0usize;
|
||||
let mut unmapped_row_count = 0usize;
|
||||
let mut material_unmapped_row_count = 0usize;
|
||||
|
||||
for statement in statement_keys() {
|
||||
let rows = statement_rows
|
||||
.get(statement)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
let statement_definitions = pack
|
||||
.surfaces
|
||||
.iter()
|
||||
.filter(|definition| definition.statement == statement)
|
||||
.collect::<Vec<_>>();
|
||||
let mut used_row_keys = HashSet::<String>::new();
|
||||
let mut statement_surface_rows = Vec::<SurfaceRowOutput>::new();
|
||||
let mut statement_detail_rows = BTreeMap::<String, Vec<DetailRowOutput>>::new();
|
||||
|
||||
for definition in statement_definitions {
|
||||
let matches = rows
|
||||
.iter()
|
||||
.filter(|row| !used_row_keys.contains(&row.key))
|
||||
.filter_map(|row| match_statement_row(row, definition, crosswalk.as_ref()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if matches.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let direct_surface_matches = matches
|
||||
.iter()
|
||||
.filter(|matched| matched.match_role == MatchRole::Surface)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
let detail_component_matches = matches
|
||||
.iter()
|
||||
.filter(|matched| matched.match_role == MatchRole::Detail)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut surface_source_matches = if !direct_surface_matches.is_empty() {
|
||||
vec![pick_best_match(&direct_surface_matches).clone()]
|
||||
} else if definition.rollup_policy == "aggregate_children" {
|
||||
detail_component_matches.clone()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
if surface_source_matches.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let detail_matches = if definition.detail_grouping_policy == "group_all_children" {
|
||||
if detail_component_matches.is_empty() && definition.rollup_policy == "aggregate_children" {
|
||||
Vec::new()
|
||||
} else {
|
||||
detail_component_matches.clone()
|
||||
}
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
if definition.rollup_policy == "aggregate_children"
|
||||
&& direct_surface_matches.is_empty()
|
||||
&& !surface_source_matches.is_empty()
|
||||
{
|
||||
for matched in &mut surface_source_matches {
|
||||
matched.mapping_method = MappingMethod::AggregateChildren;
|
||||
}
|
||||
}
|
||||
|
||||
let values = build_surface_values(periods, &surface_source_matches);
|
||||
if !has_any_value(&values) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let resolved_source_row_keys = periods
|
||||
.iter()
|
||||
.map(|period| {
|
||||
let resolved = if surface_source_matches.len() == 1 {
|
||||
surface_source_matches
|
||||
.first()
|
||||
.and_then(|matched| matched.row.values.get(&period.id).copied().flatten().map(|_| matched.row.key.clone()))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
(period.id.clone(), resolved)
|
||||
})
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
|
||||
let source_concepts = unique_sorted_strings(
|
||||
surface_source_matches
|
||||
.iter()
|
||||
.map(|matched| matched.row.qname.clone())
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
let source_row_keys = unique_sorted_strings(
|
||||
surface_source_matches
|
||||
.iter()
|
||||
.map(|matched| matched.row.key.clone())
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
let source_fact_ids = unique_sorted_i64(
|
||||
surface_source_matches
|
||||
.iter()
|
||||
.flat_map(|matched| matched.row.source_fact_ids.clone())
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
let has_dimensions = surface_source_matches.iter().any(|matched| matched.row.has_dimensions);
|
||||
|
||||
for matched in &surface_source_matches {
|
||||
used_row_keys.insert(matched.row.key.clone());
|
||||
concept_mappings.insert(
|
||||
matched.row.concept_key.clone(),
|
||||
MappingAssignment {
|
||||
authoritative_concept_key: matched.authoritative_concept_key.clone(),
|
||||
mapping_method: Some(matched.mapping_method),
|
||||
surface_key: Some(definition.surface_key.clone()),
|
||||
detail_parent_surface_key: None,
|
||||
kpi_key: None,
|
||||
residual_flag: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let details = detail_matches
|
||||
.iter()
|
||||
.map(|matched| {
|
||||
used_row_keys.insert(matched.row.key.clone());
|
||||
concept_mappings.insert(
|
||||
matched.row.concept_key.clone(),
|
||||
MappingAssignment {
|
||||
authoritative_concept_key: matched.authoritative_concept_key.clone(),
|
||||
mapping_method: Some(matched.mapping_method),
|
||||
surface_key: Some(definition.surface_key.clone()),
|
||||
detail_parent_surface_key: Some(definition.surface_key.clone()),
|
||||
kpi_key: None,
|
||||
residual_flag: false,
|
||||
},
|
||||
);
|
||||
build_detail_row(matched.row, &definition.surface_key, false)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
if !details.is_empty() {
|
||||
detail_row_count += details.len();
|
||||
statement_detail_rows.insert(definition.surface_key.clone(), details);
|
||||
}
|
||||
|
||||
statement_surface_rows.push(SurfaceRowOutput {
|
||||
key: definition.surface_key.clone(),
|
||||
label: definition.label.clone(),
|
||||
category: definition.category.clone(),
|
||||
template_section: definition.category.clone(),
|
||||
order: definition.order,
|
||||
unit: definition.unit.clone(),
|
||||
values,
|
||||
source_concepts,
|
||||
source_row_keys,
|
||||
source_fact_ids,
|
||||
formula_key: definition.formula_fallback.as_ref().map(|_| definition.surface_key.clone()),
|
||||
has_dimensions,
|
||||
resolved_source_row_keys,
|
||||
statement: Some(definition.statement.clone()),
|
||||
detail_count: statement_detail_rows
|
||||
.get(&definition.surface_key)
|
||||
.map(|rows| rows.len() as i64),
|
||||
resolution_method: None,
|
||||
confidence: None,
|
||||
warning_codes: vec![],
|
||||
});
|
||||
surface_row_count += 1;
|
||||
let _ = &definition.materiality_policy;
|
||||
}
|
||||
|
||||
statement_surface_rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label)));
|
||||
let baseline = baseline_for_statement(statement, &statement_surface_rows);
|
||||
let threshold = materiality_threshold(statement, baseline);
|
||||
let residual_rows = rows
|
||||
.iter()
|
||||
.filter(|row| !used_row_keys.contains(&row.key))
|
||||
.filter(|row| has_any_value(&row.values))
|
||||
.map(|row| {
|
||||
concept_mappings.insert(
|
||||
row.concept_key.clone(),
|
||||
MappingAssignment {
|
||||
authoritative_concept_key: None,
|
||||
mapping_method: Some(MappingMethod::UnmappedResidual),
|
||||
surface_key: None,
|
||||
detail_parent_surface_key: Some("unmapped".to_string()),
|
||||
kpi_key: None,
|
||||
residual_flag: true,
|
||||
},
|
||||
);
|
||||
build_detail_row(row, "unmapped", true)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !residual_rows.is_empty() {
|
||||
unmapped_row_count += residual_rows.len();
|
||||
material_unmapped_row_count += residual_rows
|
||||
.iter()
|
||||
.filter(|row| max_abs_value(&row.values) >= threshold)
|
||||
.count();
|
||||
detail_row_count += residual_rows.len();
|
||||
statement_detail_rows.insert("unmapped".to_string(), residual_rows);
|
||||
}
|
||||
|
||||
surface_rows.insert(statement.to_string(), statement_surface_rows);
|
||||
detail_rows.insert(statement.to_string(), statement_detail_rows);
|
||||
}
|
||||
|
||||
Ok(CompactSurfaceModel {
|
||||
surface_rows,
|
||||
detail_rows,
|
||||
normalization_summary: NormalizationSummaryOutput {
|
||||
surface_row_count,
|
||||
detail_row_count,
|
||||
kpi_row_count: 0,
|
||||
unmapped_row_count,
|
||||
material_unmapped_row_count,
|
||||
warnings,
|
||||
},
|
||||
concept_mappings,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn merge_mapping_assignments(
|
||||
primary: &mut HashMap<String, MappingAssignment>,
|
||||
secondary: HashMap<String, MappingAssignment>,
|
||||
) {
|
||||
for (concept_key, assignment) in secondary {
|
||||
let existing = primary.entry(concept_key).or_default();
|
||||
existing.authoritative_concept_key = existing
|
||||
.authoritative_concept_key
|
||||
.clone()
|
||||
.or(assignment.authoritative_concept_key);
|
||||
if existing.mapping_method.is_none()
|
||||
|| matches!(existing.mapping_method, Some(MappingMethod::UnmappedResidual))
|
||||
{
|
||||
existing.mapping_method = assignment.mapping_method;
|
||||
}
|
||||
if existing.surface_key.is_none() {
|
||||
existing.surface_key = assignment.surface_key;
|
||||
}
|
||||
if existing.detail_parent_surface_key.is_none() {
|
||||
existing.detail_parent_surface_key = assignment.detail_parent_surface_key;
|
||||
}
|
||||
if existing.kpi_key.is_none() {
|
||||
existing.kpi_key = assignment.kpi_key;
|
||||
}
|
||||
existing.residual_flag = existing.residual_flag && assignment.residual_flag;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_mapping_assignments(
|
||||
concepts: &mut [ConceptOutput],
|
||||
facts: &mut [FactOutput],
|
||||
mappings: &HashMap<String, MappingAssignment>,
|
||||
) {
|
||||
for concept in concepts {
|
||||
if let Some(mapping) = mappings.get(&concept.concept_key) {
|
||||
concept.authoritative_concept_key = mapping.authoritative_concept_key.clone();
|
||||
concept.mapping_method = mapping.mapping_method.map(|method| method.as_str().to_string());
|
||||
concept.surface_key = mapping.surface_key.clone();
|
||||
concept.detail_parent_surface_key = mapping.detail_parent_surface_key.clone();
|
||||
concept.kpi_key = mapping.kpi_key.clone();
|
||||
concept.residual_flag = mapping.residual_flag;
|
||||
}
|
||||
}
|
||||
|
||||
for fact in facts {
|
||||
if let Some(mapping) = mappings.get(&fact.concept_key) {
|
||||
fact.authoritative_concept_key = mapping.authoritative_concept_key.clone();
|
||||
fact.mapping_method = mapping.mapping_method.map(|method| method.as_str().to_string());
|
||||
fact.surface_key = mapping.surface_key.clone();
|
||||
fact.detail_parent_surface_key = mapping.detail_parent_surface_key.clone();
|
||||
fact.kpi_key = mapping.kpi_key.clone();
|
||||
fact.residual_flag = mapping.residual_flag;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn match_statement_row<'a>(
|
||||
row: &'a StatementRowOutput,
|
||||
definition: &SurfaceDefinition,
|
||||
crosswalk: Option<&CrosswalkFile>,
|
||||
) -> Option<MatchedStatementRow<'a>> {
|
||||
let authoritative_mapping = crosswalk.and_then(|crosswalk| crosswalk.mappings.get(&row.qname));
|
||||
let authoritative_concept_key = authoritative_mapping
|
||||
.map(|mapping| mapping.authoritative_concept_key.clone())
|
||||
.or_else(|| {
|
||||
if !row.is_extension {
|
||||
Some(row.qname.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let matches_authoritative = authoritative_concept_key.as_ref().map_or(false, |concept| {
|
||||
definition
|
||||
.allowed_authoritative_concepts
|
||||
.iter()
|
||||
.any(|candidate| candidate_matches(candidate, concept))
|
||||
}) || authoritative_mapping
|
||||
.map(|mapping| mapping.surface_key == definition.surface_key)
|
||||
.unwrap_or(false);
|
||||
|
||||
if matches_authoritative {
|
||||
return Some(MatchedStatementRow {
|
||||
row,
|
||||
authoritative_concept_key,
|
||||
mapping_method: MappingMethod::AuthoritativeDirect,
|
||||
match_role: MatchRole::Surface,
|
||||
rank: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let matches_source = definition
|
||||
.allowed_source_concepts
|
||||
.iter()
|
||||
.any(|candidate| candidate_matches(candidate, &row.qname) || candidate_matches(candidate, &row.local_name));
|
||||
if matches_source {
|
||||
return Some(MatchedStatementRow {
|
||||
row,
|
||||
authoritative_concept_key,
|
||||
mapping_method: MappingMethod::DirectSourceConcept,
|
||||
match_role: if definition.rollup_policy == "aggregate_children" {
|
||||
MatchRole::Detail
|
||||
} else {
|
||||
MatchRole::Surface
|
||||
},
|
||||
rank: 1,
|
||||
});
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> {
|
||||
matches
|
||||
.iter()
|
||||
.min_by(|left, right| {
|
||||
left.rank
|
||||
.cmp(&right.rank)
|
||||
.then_with(|| {
|
||||
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
|
||||
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
|
||||
left_dimension_rank.cmp(&right_dimension_rank)
|
||||
})
|
||||
.then_with(|| left.row.order.cmp(&right.row.order))
|
||||
.then_with(|| {
|
||||
max_abs_value(&right.row.values)
|
||||
.partial_cmp(&max_abs_value(&left.row.values))
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
.then_with(|| left.row.label.cmp(&right.row.label))
|
||||
})
|
||||
.expect("pick_best_match requires at least one match")
|
||||
}
|
||||
|
||||
fn build_surface_values(
|
||||
periods: &[PeriodOutput],
|
||||
matches: &[MatchedStatementRow<'_>],
|
||||
) -> BTreeMap<String, Option<f64>> {
|
||||
periods
|
||||
.iter()
|
||||
.map(|period| {
|
||||
let value = if matches.len() == 1 {
|
||||
matches
|
||||
.first()
|
||||
.and_then(|matched| matched.row.values.get(&period.id).copied())
|
||||
.flatten()
|
||||
} else {
|
||||
sum_nullable_values(
|
||||
matches
|
||||
.iter()
|
||||
.map(|matched| matched.row.values.get(&period.id).copied().flatten())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
};
|
||||
(period.id.clone(), value)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn sum_nullable_values(values: Vec<Option<f64>>) -> Option<f64> {
|
||||
if values.iter().all(|value| value.is_none()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(values.into_iter().map(|value| value.unwrap_or(0.0)).sum())
|
||||
}
|
||||
|
||||
fn build_detail_row(
|
||||
row: &StatementRowOutput,
|
||||
parent_surface_key: &str,
|
||||
residual_flag: bool,
|
||||
) -> DetailRowOutput {
|
||||
DetailRowOutput {
|
||||
key: row.key.clone(),
|
||||
parent_surface_key: parent_surface_key.to_string(),
|
||||
label: row.label.clone(),
|
||||
concept_key: row.concept_key.clone(),
|
||||
qname: row.qname.clone(),
|
||||
namespace_uri: row.namespace_uri.clone(),
|
||||
local_name: row.local_name.clone(),
|
||||
unit: row.units.values().find_map(|value| value.clone()),
|
||||
values: row.values.clone(),
|
||||
source_fact_ids: row.source_fact_ids.clone(),
|
||||
is_extension: row.is_extension,
|
||||
dimensions_summary: if row.has_dimensions {
|
||||
vec!["has_dimensions".to_string()]
|
||||
} else {
|
||||
vec![]
|
||||
},
|
||||
residual_flag,
|
||||
}
|
||||
}
|
||||
|
||||
fn has_any_value(values: &BTreeMap<String, Option<f64>>) -> bool {
|
||||
values.values().any(|value| value.is_some())
|
||||
}
|
||||
|
||||
fn max_abs_value(values: &BTreeMap<String, Option<f64>>) -> f64 {
|
||||
values
|
||||
.values()
|
||||
.fold(0.0_f64, |max, value| max.max(value.unwrap_or(0.0).abs()))
|
||||
}
|
||||
|
||||
fn baseline_for_statement(statement: &str, surface_rows: &[SurfaceRowOutput]) -> f64 {
|
||||
let anchor_key = if statement == "balance" {
|
||||
"total_assets"
|
||||
} else {
|
||||
"revenue"
|
||||
};
|
||||
|
||||
surface_rows
|
||||
.iter()
|
||||
.find(|row| row.key == anchor_key)
|
||||
.map(|row| max_abs_value(&row.values))
|
||||
.unwrap_or(0.0)
|
||||
}
|
||||
|
||||
fn materiality_threshold(statement: &str, baseline: f64) -> f64 {
|
||||
if statement == "balance" {
|
||||
return (baseline * 0.005).max(5_000_000.0);
|
||||
}
|
||||
|
||||
(baseline * 0.01).max(1_000_000.0)
|
||||
}
|
||||
|
||||
fn unique_sorted_strings(values: Vec<String>) -> Vec<String> {
|
||||
let mut values = values.into_iter().collect::<HashSet<_>>().into_iter().collect::<Vec<_>>();
|
||||
values.sort();
|
||||
values
|
||||
}
|
||||
|
||||
fn unique_sorted_i64(values: Vec<i64>) -> Vec<i64> {
|
||||
let mut values = values.into_iter().collect::<HashSet<_>>().into_iter().collect::<Vec<_>>();
|
||||
values.sort();
|
||||
values
|
||||
}
|
||||
|
||||
fn candidate_matches(candidate: &str, actual: &str) -> bool {
|
||||
candidate.eq_ignore_ascii_case(actual)
|
||||
|| candidate
|
||||
.rsplit_once(':')
|
||||
.map(|(_, local_name)| local_name.eq_ignore_ascii_case(actual))
|
||||
.unwrap_or(false)
|
||||
|| actual
|
||||
.rsplit_once(':')
|
||||
.map(|(_, local_name)| local_name.eq_ignore_ascii_case(candidate))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn statement_keys() -> [&'static str; 5] {
|
||||
["income", "balance", "cash_flow", "equity", "comprehensive_income"]
|
||||
}
|
||||
|
||||
fn empty_surface_row_map() -> SurfaceRowMap {
|
||||
statement_keys()
|
||||
.into_iter()
|
||||
.map(|key| (key.to_string(), Vec::new()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn empty_detail_row_map() -> DetailRowStatementMap {
|
||||
statement_keys()
|
||||
.into_iter()
|
||||
.map(|key| (key.to_string(), BTreeMap::new()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::pack_selector::FiscalPack;
|
||||
use crate::{PeriodOutput, StatementRowOutput};
|
||||
|
||||
fn period(id: &str) -> PeriodOutput {
|
||||
PeriodOutput {
|
||||
id: id.to_string(),
|
||||
filing_id: 1,
|
||||
accession_number: "0000000000-00-000001".to_string(),
|
||||
filing_date: "2025-12-31".to_string(),
|
||||
period_start: Some("2025-01-01".to_string()),
|
||||
period_end: Some("2025-12-31".to_string()),
|
||||
filing_type: "10-K".to_string(),
|
||||
period_label: id.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn row(key: &str, qname: &str, statement: &str, value: f64) -> StatementRowOutput {
|
||||
StatementRowOutput {
|
||||
key: key.to_string(),
|
||||
label: key.to_string(),
|
||||
concept_key: format!("http://fasb.org/us-gaap/2024#{}", qname.split(':').nth(1).unwrap_or(key)),
|
||||
qname: qname.to_string(),
|
||||
namespace_uri: "http://fasb.org/us-gaap/2024".to_string(),
|
||||
local_name: qname.split(':').nth(1).unwrap_or(key).to_string(),
|
||||
is_extension: false,
|
||||
statement: statement.to_string(),
|
||||
role_uri: Some(statement.to_string()),
|
||||
order: 1,
|
||||
depth: 0,
|
||||
parent_key: None,
|
||||
values: BTreeMap::from([("p1".to_string(), Some(value))]),
|
||||
units: BTreeMap::from([("p1".to_string(), Some("iso4217:USD".to_string()))]),
|
||||
has_dimensions: false,
|
||||
source_fact_ids: vec![1],
|
||||
}
|
||||
}
|
||||
|
||||
fn empty_map() -> StatementRowMap {
|
||||
BTreeMap::from([
|
||||
("income".to_string(), Vec::new()),
|
||||
("balance".to_string(), Vec::new()),
|
||||
("cash_flow".to_string(), Vec::new()),
|
||||
("equity".to_string(), Vec::new()),
|
||||
("comprehensive_income".to_string(), Vec::new()),
|
||||
])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefers_direct_authoritative_row_over_child_aggregation() {
|
||||
let mut rows = empty_map();
|
||||
rows.get_mut("income").unwrap().extend([
|
||||
row("op-expenses", "us-gaap:OperatingExpenses", "income", 40.0),
|
||||
row("sga", "us-gaap:SellingGeneralAndAdministrativeExpense", "income", 25.0),
|
||||
row("rd", "us-gaap:ResearchAndDevelopmentExpense", "income", 15.0),
|
||||
]);
|
||||
|
||||
let model = build_compact_surface_model(
|
||||
&[period("p1")],
|
||||
&rows,
|
||||
"us-gaap",
|
||||
FiscalPack::Core,
|
||||
vec![],
|
||||
)
|
||||
.expect("compact model should build");
|
||||
let op_expenses = model
|
||||
.surface_rows
|
||||
.get("income")
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|row| row.key == "operating_expenses")
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(op_expenses.values.get("p1").copied().flatten(), Some(40.0));
|
||||
assert_eq!(op_expenses.detail_count, Some(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emits_unmapped_residual_rows() {
|
||||
let mut rows = empty_map();
|
||||
rows.get_mut("income").unwrap().push(row("custom", "company:CustomMetric", "income", 12.0));
|
||||
|
||||
let model = build_compact_surface_model(
|
||||
&[period("p1")],
|
||||
&rows,
|
||||
"us-gaap",
|
||||
FiscalPack::Core,
|
||||
vec![],
|
||||
)
|
||||
.expect("compact model should build");
|
||||
let residual_rows = model.detail_rows.get("income").unwrap().get("unmapped").unwrap();
|
||||
assert_eq!(residual_rows.len(), 1);
|
||||
assert!(residual_rows[0].residual_flag);
|
||||
}
|
||||
}
|
||||
249
rust/fiscal-xbrl-core/src/taxonomy_loader.rs
Normal file
249
rust/fiscal-xbrl-core/src/taxonomy_loader.rs
Normal file
@@ -0,0 +1,249 @@
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use serde::Deserialize;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::pack_selector::FiscalPack;
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct SurfacePackFile {
|
||||
pub version: String,
|
||||
pub pack: String,
|
||||
pub surfaces: Vec<SurfaceDefinition>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct SurfaceDefinition {
|
||||
pub surface_key: String,
|
||||
pub statement: String,
|
||||
pub label: String,
|
||||
pub category: String,
|
||||
pub order: i64,
|
||||
pub unit: String,
|
||||
pub rollup_policy: String,
|
||||
pub allowed_source_concepts: Vec<String>,
|
||||
pub allowed_authoritative_concepts: Vec<String>,
|
||||
pub formula_fallback: Option<serde_json::Value>,
|
||||
pub detail_grouping_policy: String,
|
||||
pub materiality_policy: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct CrosswalkFile {
|
||||
pub version: String,
|
||||
pub regime: String,
|
||||
pub mappings: std::collections::HashMap<String, CrosswalkMapping>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct CrosswalkMapping {
|
||||
pub surface_key: String,
|
||||
pub authoritative_concept_key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct KpiPackFile {
|
||||
pub version: String,
|
||||
pub pack: String,
|
||||
pub kpis: Vec<KpiDefinition>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct KpiDefinition {
|
||||
pub key: String,
|
||||
pub label: String,
|
||||
pub unit: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct UniversalIncomeFile {
|
||||
pub version: String,
|
||||
pub rows: Vec<UniversalIncomeDefinition>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct UniversalIncomeDefinition {
|
||||
pub key: String,
|
||||
pub statement: String,
|
||||
pub label: String,
|
||||
pub category: String,
|
||||
pub order: i64,
|
||||
pub unit: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct IncomeBridgeFile {
|
||||
pub version: String,
|
||||
pub pack: String,
|
||||
pub rows: HashMap<String, IncomeBridgeRow>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone, Default)]
|
||||
pub struct IncomeBridgeComponents {
|
||||
#[serde(default)]
|
||||
pub positive: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub negative: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone, Default)]
|
||||
pub struct IncomeBridgeConceptGroups {
|
||||
#[serde(default)]
|
||||
pub positive: Vec<IncomeBridgeConceptGroup>,
|
||||
#[serde(default)]
|
||||
pub negative: Vec<IncomeBridgeConceptGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct IncomeBridgeConceptGroup {
|
||||
pub name: String,
|
||||
pub concepts: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct IncomeBridgeRow {
|
||||
#[serde(default)]
|
||||
pub direct_authoritative_concepts: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub direct_source_concepts: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub component_surfaces: IncomeBridgeComponents,
|
||||
#[serde(default)]
|
||||
pub component_concept_groups: IncomeBridgeConceptGroups,
|
||||
pub formula: String,
|
||||
#[serde(default)]
|
||||
pub not_meaningful_for_pack: bool,
|
||||
#[serde(default)]
|
||||
pub warning_codes_when_used: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn resolve_taxonomy_dir() -> Result<PathBuf> {
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
if let Some(value) = env::var("FISCAL_TAXONOMY_DIR")
|
||||
.ok()
|
||||
.map(|value| value.trim().to_string())
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
candidates.push(PathBuf::from(value));
|
||||
}
|
||||
|
||||
if let Ok(current_dir) = env::current_dir() {
|
||||
candidates.push(current_dir.join("rust").join("taxonomy"));
|
||||
candidates.push(current_dir.join("taxonomy"));
|
||||
}
|
||||
|
||||
candidates.push(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../taxonomy"));
|
||||
|
||||
if let Ok(executable) = env::current_exe() {
|
||||
if let Some(parent) = executable.parent() {
|
||||
candidates.push(parent.join("../rust/taxonomy"));
|
||||
candidates.push(parent.join("../taxonomy"));
|
||||
}
|
||||
}
|
||||
|
||||
candidates
|
||||
.into_iter()
|
||||
.find(|path| path.is_dir())
|
||||
.ok_or_else(|| anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory"))
|
||||
}
|
||||
|
||||
pub fn load_surface_pack(pack: FiscalPack) -> Result<SurfacePackFile> {
|
||||
let taxonomy_dir = resolve_taxonomy_dir()?;
|
||||
let path = taxonomy_dir
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join(format!("{}.surface.json", pack.as_str()));
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<SurfacePackFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let _ = (&file.version, &file.pack);
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
pub fn load_crosswalk(regime: &str) -> Result<Option<CrosswalkFile>> {
|
||||
let file_name = match regime {
|
||||
"us-gaap" => "us-gaap.json",
|
||||
"ifrs-full" => "ifrs.json",
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
let taxonomy_dir = resolve_taxonomy_dir()?;
|
||||
let path = taxonomy_dir.join("crosswalk").join(file_name);
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<CrosswalkFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let _ = (&file.version, &file.regime);
|
||||
Ok(Some(file))
|
||||
}
|
||||
|
||||
pub fn load_kpi_pack(pack: FiscalPack) -> Result<KpiPackFile> {
|
||||
let taxonomy_dir = resolve_taxonomy_dir()?;
|
||||
let path = taxonomy_dir
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join("kpis")
|
||||
.join(format!("{}.kpis.json", pack.as_str()));
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<KpiPackFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let _ = (&file.version, &file.pack);
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
pub fn load_universal_income_definitions() -> Result<UniversalIncomeFile> {
|
||||
let taxonomy_dir = resolve_taxonomy_dir()?;
|
||||
let path = taxonomy_dir
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join("universal_income.surface.json");
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<UniversalIncomeFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let _ = &file.version;
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
pub fn load_income_bridge(pack: FiscalPack) -> Result<IncomeBridgeFile> {
|
||||
let taxonomy_dir = resolve_taxonomy_dir()?;
|
||||
let path = taxonomy_dir
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join(format!("{}.income-bridge.json", pack.as_str()));
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<IncomeBridgeFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let _ = (&file.version, &file.pack);
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resolves_taxonomy_dir_and_loads_core_pack() {
|
||||
let taxonomy_dir = resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests");
|
||||
assert!(taxonomy_dir.exists());
|
||||
|
||||
let surface_pack = load_surface_pack(FiscalPack::Core).expect("core surface pack should load");
|
||||
assert_eq!(surface_pack.pack, "core");
|
||||
assert!(!surface_pack.surfaces.is_empty());
|
||||
|
||||
let kpi_pack = load_kpi_pack(FiscalPack::Core).expect("core kpi pack should load");
|
||||
assert_eq!(kpi_pack.pack, "core");
|
||||
|
||||
let universal_income = load_universal_income_definitions().expect("universal income config should load");
|
||||
assert!(!universal_income.rows.is_empty());
|
||||
|
||||
let core_bridge = load_income_bridge(FiscalPack::Core).expect("core bridge should load");
|
||||
assert_eq!(core_bridge.pack, "core");
|
||||
}
|
||||
}
|
||||
1404
rust/fiscal-xbrl-core/src/universal_income.rs
Normal file
1404
rust/fiscal-xbrl-core/src/universal_income.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user