Commit remaining Rust parser updates

This commit is contained in:
2026-03-12 21:17:37 -04:00
parent 7a7a78340f
commit 1efbffa347
4 changed files with 211 additions and 56 deletions

View File

@@ -20,7 +20,9 @@ fn main() {
fn run() -> Result<()> {
let command = env::args().nth(1).unwrap_or_default();
if command != "hydrate-filing" {
return Err(anyhow!("invalid request: expected `hydrate-filing` command"));
return Err(anyhow!(
"invalid request: expected `hydrate-filing` command"
));
}
let mut buffer = String::new();

View File

@@ -31,7 +31,13 @@ pub fn build_taxonomy_kpis(
continue;
};
for concept_key in unique_sorted_strings(kpi_row.source_concepts.iter().map(|qname| concept_key_from_qname(qname)).collect()) {
for concept_key in unique_sorted_strings(
kpi_row
.source_concepts
.iter()
.map(|qname| concept_key_from_qname(qname))
.collect(),
) {
mapping_assignments.insert(
concept_key,
MappingAssignment {
@@ -48,7 +54,11 @@ pub fn build_taxonomy_kpis(
rows.push(kpi_row);
}
rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label)));
rows.sort_by(|left, right| {
left.order
.cmp(&right.order)
.then_with(|| left.label.cmp(&right.label))
});
Ok(KpiExtractionResult {
rows,
@@ -82,7 +92,12 @@ fn build_kpi_row(
order_index,
periods,
facts,
&["DepositsLiabilities", "Deposits", "DepositsDomestic", "DepositsForeign"],
&[
"DepositsLiabilities",
"Deposits",
"DepositsDomestic",
"DepositsForeign",
],
),
"premium_growth" => growth_kpi_row(
definition,
@@ -143,7 +158,10 @@ fn build_kpi_row(
order_index,
periods,
facts,
&["NumberOfRealEstateProperties", "SECScheduleIIIRealEstateNumberOfUnits"],
&[
"NumberOfRealEstateProperties",
"SECScheduleIIIRealEstateNumberOfUnits",
],
None,
),
"investment_property_growth" => growth_kpi_row(
@@ -261,13 +279,22 @@ fn direct_or_formula_row(
};
values.insert(period.id.clone(), next_value);
for qname in numerator.source_concepts.iter().chain(denominator.source_concepts.iter()) {
for qname in numerator
.source_concepts
.iter()
.chain(denominator.source_concepts.iter())
{
sources.source_concepts.insert(qname.clone());
}
for fact_id in numerator.source_fact_ids.iter().chain(denominator.source_fact_ids.iter()) {
for fact_id in numerator
.source_fact_ids
.iter()
.chain(denominator.source_fact_ids.iter())
{
sources.source_fact_ids.insert(*fact_id);
}
sources.has_dimensions = sources.has_dimensions || numerator.has_dimensions || denominator.has_dimensions;
sources.has_dimensions =
sources.has_dimensions || numerator.has_dimensions || denominator.has_dimensions;
}
if values.values().any(|value| value.is_some()) {
@@ -293,8 +320,16 @@ fn direct_or_formula_row(
let mut aligned_values = BTreeMap::<String, Option<f64>>::new();
for end_date in numerator_by_end_date.values.keys() {
let numerator_value = numerator_by_end_date.values.get(end_date).copied().flatten();
let denominator_value = denominator_by_end_date.values.get(end_date).copied().flatten();
let numerator_value = numerator_by_end_date
.values
.get(end_date)
.copied()
.flatten();
let denominator_value = denominator_by_end_date
.values
.get(end_date)
.copied()
.flatten();
let next_value = if divide {
match (numerator_value, denominator_value) {
(Some(numerator_value), Some(denominator_value)) if denominator_value != 0.0 => {
@@ -319,11 +354,20 @@ fn direct_or_formula_row(
prefer_duration: true,
..DateFactValues::default()
};
aligned_sources.source_concepts.extend(numerator_by_end_date.source_concepts);
aligned_sources.source_concepts.extend(denominator_by_end_date.source_concepts);
aligned_sources.source_fact_ids.extend(numerator_by_end_date.source_fact_ids);
aligned_sources.source_fact_ids.extend(denominator_by_end_date.source_fact_ids);
aligned_sources.has_dimensions = numerator_by_end_date.has_dimensions || denominator_by_end_date.has_dimensions;
aligned_sources
.source_concepts
.extend(numerator_by_end_date.source_concepts);
aligned_sources
.source_concepts
.extend(denominator_by_end_date.source_concepts);
aligned_sources
.source_fact_ids
.extend(numerator_by_end_date.source_fact_ids);
aligned_sources
.source_fact_ids
.extend(denominator_by_end_date.source_fact_ids);
aligned_sources.has_dimensions =
numerator_by_end_date.has_dimensions || denominator_by_end_date.has_dimensions;
Some(KpiRowOutput {
key: definition.key.clone(),
@@ -335,7 +379,9 @@ fn direct_or_formula_row(
axis: None,
member: None,
values: aligned_values,
source_concepts: unique_sorted_strings(aligned_sources.source_concepts.into_iter().collect()),
source_concepts: unique_sorted_strings(
aligned_sources.source_concepts.into_iter().collect(),
),
source_fact_ids: unique_sorted_i64(aligned_sources.source_fact_ids.into_iter().collect()),
provenance_type: "taxonomy".to_string(),
has_dimensions: aligned_sources.has_dimensions,
@@ -393,7 +439,9 @@ fn collect_period_values(
continue;
};
values.values.insert(period.id.clone(), Some(fact.value_num));
values
.values
.insert(period.id.clone(), Some(fact.value_num));
values.source_concepts.insert(fact.qname.clone());
values.source_fact_ids.insert(*fact_id);
values.has_dimensions = values.has_dimensions || !fact.is_dimensionless;
@@ -402,10 +450,7 @@ fn collect_period_values(
values
}
fn collect_end_date_values(
facts: &[FactOutput],
local_names: &[&str],
) -> DateFactValues {
fn collect_end_date_values(facts: &[FactOutput], local_names: &[&str]) -> DateFactValues {
let mut values = DateFactValues::default();
let targets = local_names
.iter()
@@ -418,7 +463,11 @@ fn collect_end_date_values(
continue;
}
let Some(end_date) = fact.period_end.clone().or_else(|| fact.period_instant.clone()) else {
let Some(end_date) = fact
.period_end
.clone()
.or_else(|| fact.period_instant.clone())
else {
continue;
};
fact_ids_by_end_date
@@ -443,18 +492,22 @@ fn collect_end_date_values(
}
fn period_id_for_fact(periods: &[PeriodOutput], fact: &FactOutput) -> Option<String> {
let fact_period_end = fact.period_end.clone().or_else(|| fact.period_instant.clone());
let fact_period_end = fact
.period_end
.clone()
.or_else(|| fact.period_instant.clone());
periods
.iter()
.find(|period| {
period.period_start == fact.period_start
&& period.period_end == fact_period_end
period.period_start == fact.period_start && period.period_end == fact_period_end
})
.map(|period| period.id.clone())
}
fn pick_preferred_fact<'a>(grouped_facts: &'a [(i64, &'a FactOutput)]) -> Option<&'a (i64, &'a FactOutput)> {
fn pick_preferred_fact<'a>(
grouped_facts: &'a [(i64, &'a FactOutput)],
) -> Option<&'a (i64, &'a FactOutput)> {
grouped_facts.iter().max_by(|left, right| {
let left_dimension_score = if left.1.is_dimensionless { 1 } else { 0 };
let right_dimension_score = if right.1.is_dimensionless { 1 } else { 0 };
@@ -480,14 +533,22 @@ fn select_period_id_for_end_date(
.filter(|period| period.period_end.as_deref() == Some(end_date))
.max_by(|left, right| {
let left_score = if prefer_duration {
if left.period_start.is_some() { 1 } else { 0 }
if left.period_start.is_some() {
1
} else {
0
}
} else if left.period_start.is_none() {
1
} else {
0
};
let right_score = if prefer_duration {
if right.period_start.is_some() { 1 } else { 0 }
if right.period_start.is_some() {
1
} else {
0
}
} else if right.period_start.is_none() {
1
} else {
@@ -503,9 +564,17 @@ fn select_period_id_for_end_date(
fn sort_periods(periods: &[PeriodOutput]) -> Vec<&PeriodOutput> {
let mut periods = periods.iter().collect::<Vec<_>>();
periods.sort_by(|left, right| {
let left_key = left.period_end.clone().unwrap_or_else(|| left.filing_date.clone());
let right_key = right.period_end.clone().unwrap_or_else(|| right.filing_date.clone());
left_key.cmp(&right_key).then_with(|| left.id.cmp(&right.id))
let left_key = left
.period_end
.clone()
.unwrap_or_else(|| left.filing_date.clone());
let right_key = right
.period_end
.clone()
.unwrap_or_else(|| right.filing_date.clone());
left_key
.cmp(&right_key)
.then_with(|| left.id.cmp(&right.id))
});
periods
}
@@ -548,7 +617,9 @@ fn build_date_aligned_kpi_output(
let mut values = BTreeMap::<String, Option<f64>>::new();
for (end_date, value) in &matched.values {
let Some(period_id) = select_period_id_for_end_date(periods, end_date, matched.prefer_duration) else {
let Some(period_id) =
select_period_id_for_end_date(periods, end_date, matched.prefer_duration)
else {
continue;
};
values.insert(period_id, *value);
@@ -591,13 +662,21 @@ fn concept_key_from_qname(qname: &str) -> String {
}
fn unique_sorted_strings(values: Vec<String>) -> Vec<String> {
let mut values = values.into_iter().collect::<HashSet<_>>().into_iter().collect::<Vec<_>>();
let mut values = values
.into_iter()
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
values.sort();
values
}
fn unique_sorted_i64(values: Vec<i64>) -> Vec<i64> {
let mut values = values.into_iter().collect::<HashSet<_>>().into_iter().collect::<Vec<_>>();
let mut values = values
.into_iter()
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
values.sort();
values
}
@@ -621,7 +700,12 @@ mod tests {
}
}
fn fact(local_name: &str, period_start: Option<&str>, period_end: &str, value: f64) -> FactOutput {
fn fact(
local_name: &str,
period_start: Option<&str>,
period_end: &str,
value: f64,
) -> FactOutput {
FactOutput {
concept_key: format!("http://fasb.org/us-gaap/2024#{local_name}"),
qname: format!("us-gaap:{local_name}"),
@@ -658,15 +742,28 @@ mod tests {
period("curr", "2025-12-31", None),
];
let facts = vec![
fact("FinancingReceivableRecordedInvestment", None, "2024-12-31", 100.0),
fact("FinancingReceivableRecordedInvestment", None, "2025-12-31", 120.0),
fact(
"FinancingReceivableRecordedInvestment",
None,
"2024-12-31",
100.0,
),
fact(
"FinancingReceivableRecordedInvestment",
None,
"2025-12-31",
120.0,
),
fact("DepositsLiabilities", None, "2024-12-31", 200.0),
fact("DepositsLiabilities", None, "2025-12-31", 250.0),
];
let result = build_taxonomy_kpis(&periods, &facts, FiscalPack::BankLender)
.expect("taxonomy kpis should build");
assert!(result.rows.iter().all(|row| row.provenance_type == "taxonomy"));
assert!(result
.rows
.iter()
.all(|row| row.provenance_type == "taxonomy"));
assert!(result.rows.iter().any(|row| row.key == "loan_growth"));
assert!(result.rows.iter().any(|row| row.key == "deposit_growth"));
}
@@ -680,8 +777,18 @@ mod tests {
period("inst-curr", "2025-12-31", None),
];
let facts = vec![
fact("InterestIncomeExpenseNet", Some("2024-01-01"), "2024-12-31", 90.0),
fact("InterestIncomeExpenseNet", Some("2025-01-01"), "2025-12-31", 100.0),
fact(
"InterestIncomeExpenseNet",
Some("2024-01-01"),
"2024-12-31",
90.0,
),
fact(
"InterestIncomeExpenseNet",
Some("2025-01-01"),
"2025-12-31",
100.0,
),
fact("Assets", None, "2024-12-31", 1000.0),
fact("Assets", None, "2025-12-31", 1200.0),
];
@@ -694,7 +801,21 @@ mod tests {
.find(|row| row.key == "net_interest_margin")
.expect("net interest margin should be present");
assert_eq!(net_interest_margin.values.get("dur-prev").copied().flatten(), Some(0.09));
assert_eq!(net_interest_margin.values.get("dur-curr").copied().flatten(), Some(100.0 / 1200.0));
assert_eq!(
net_interest_margin
.values
.get("dur-prev")
.copied()
.flatten(),
Some(0.09)
);
assert_eq!(
net_interest_margin
.values
.get("dur-curr")
.copied()
.flatten(),
Some(100.0 / 1200.0)
);
}
}

View File

@@ -35,10 +35,17 @@ pub fn derive_metrics(facts: &[FactOutput]) -> FilingMetrics {
}
fn by_local_names<'a>(facts: &'a [FactOutput], names: &[&str]) -> Vec<&'a FactOutput> {
let targets = names.iter().map(|name| name.to_ascii_lowercase()).collect::<Vec<_>>();
let targets = names
.iter()
.map(|name| name.to_ascii_lowercase())
.collect::<Vec<_>>();
facts
.iter()
.filter(|fact| targets.iter().any(|target| fact.local_name.eq_ignore_ascii_case(target)))
.filter(|fact| {
targets
.iter()
.any(|target| fact.local_name.eq_ignore_ascii_case(target))
})
.collect()
}

View File

@@ -46,7 +46,10 @@ pub fn select_fiscal_pack(statement_rows: &StatementRowMap, facts: &[FactOutput]
scored_packs.sort_by(|left, right| right.1.cmp(&left.1));
let (top_pack, top_score) = scored_packs[0];
let second_score = scored_packs.get(1).map(|(_, score)| *score).unwrap_or_default();
let second_score = scored_packs
.get(1)
.map(|(_, score)| *score)
.unwrap_or_default();
let margin = top_score - second_score;
let selected_pack = if top_score >= 10 && margin >= 4 {
top_pack
@@ -65,7 +68,10 @@ pub fn select_fiscal_pack(statement_rows: &StatementRowMap, facts: &[FactOutput]
}
}
fn collect_concept_names(statement_rows: &StatementRowMap, facts: &[FactOutput]) -> HashSet<String> {
fn collect_concept_names(
statement_rows: &StatementRowMap,
facts: &[FactOutput],
) -> HashSet<String> {
let mut names = HashSet::new();
for rows in statement_rows.values() {
@@ -255,7 +261,10 @@ fn weighted_match(concepts: &HashSet<String>, candidates: &[&str], weight: i64)
}
fn weighted_role_match(roles: &HashSet<String>, candidates: &[&str], weight: i64) -> i64 {
if roles.iter().any(|role| candidates.iter().any(|candidate| role.contains(candidate))) {
if roles
.iter()
.any(|role| candidates.iter().any(|candidate| role.contains(candidate)))
{
weight
} else {
0
@@ -265,7 +274,7 @@ fn weighted_role_match(roles: &HashSet<String>, candidates: &[&str], weight: i64
#[cfg(test)]
mod tests {
use super::*;
use crate::{StatementRowOutput, StatementRowMap};
use crate::{StatementRowMap, StatementRowOutput};
use std::collections::BTreeMap;
fn row(local_name: &str, statement: &str) -> StatementRowOutput {
@@ -320,7 +329,9 @@ mod tests {
row("Premiums", "income"),
row("PolicyholderBenefitsAndClaimsIncurredNet", "income"),
]);
rows.get_mut("balance").unwrap().push(row("FuturePolicyBenefits", "balance"));
rows.get_mut("balance")
.unwrap()
.push(row("FuturePolicyBenefits", "balance"));
let selection = select_fiscal_pack(&rows, &[]);
assert_eq!(selection.pack, FiscalPack::Insurance);
@@ -330,7 +341,9 @@ mod tests {
#[test]
fn defaults_to_core_on_low_confidence() {
let mut rows = empty_map();
rows.get_mut("income").unwrap().push(row("InterestExpense", "income"));
rows.get_mut("income")
.unwrap()
.push(row("InterestExpense", "income"));
let selection = select_fiscal_pack(&rows, &[]);
assert_eq!(selection.pack, FiscalPack::Core);
@@ -340,9 +353,15 @@ mod tests {
#[test]
fn chooses_reit_from_property_and_lease_signatures() {
let mut rows = empty_map();
rows.get_mut("income").unwrap().push(row("LeaseIncome", "income"));
rows.get_mut("balance").unwrap().push(row("RealEstateInvestmentPropertyNet", "balance"));
rows.get_mut("balance").unwrap().push(row("NumberOfRealEstateProperties", "balance"));
rows.get_mut("income")
.unwrap()
.push(row("LeaseIncome", "income"));
rows.get_mut("balance")
.unwrap()
.push(row("RealEstateInvestmentPropertyNet", "balance"));
rows.get_mut("balance")
.unwrap()
.push(row("NumberOfRealEstateProperties", "balance"));
let selection = select_fiscal_pack(&rows, &[]);
assert_eq!(selection.pack, FiscalPack::ReitRealEstate);
@@ -351,9 +370,15 @@ mod tests {
#[test]
fn chooses_broker_asset_manager_from_aum_and_fee_signatures() {
let mut rows = empty_map();
rows.get_mut("income").unwrap().push(row("PerformanceFeeRevenueRecognized", "income"));
rows.get_mut("balance").unwrap().push(row("AssetsUnderManagementCarryingAmount", "balance"));
rows.get_mut("balance").unwrap().push(row("FeePayingAssetUnderManagement", "balance"));
rows.get_mut("income")
.unwrap()
.push(row("PerformanceFeeRevenueRecognized", "income"));
rows.get_mut("balance")
.unwrap()
.push(row("AssetsUnderManagementCarryingAmount", "balance"));
rows.get_mut("balance")
.unwrap()
.push(row("FeePayingAssetUnderManagement", "balance"));
let selection = select_fiscal_pack(&rows, &[]);
assert_eq!(selection.pack, FiscalPack::BrokerAssetManager);