Attempting to fix the Taxonomy. Stashing changes so far for worktree merge
This commit is contained in:
@@ -12,6 +12,37 @@ use crate::{
|
|||||||
SurfaceRowOutput,
|
SurfaceRowOutput,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const RESIDUAL_TOLERANCE: f64 = 1.0;
|
||||||
|
const EXCLUDED_OPERATING_RESIDUAL_KEYS: &[&str] = &[
|
||||||
|
"revenue",
|
||||||
|
"cost_of_revenue",
|
||||||
|
"gross_profit",
|
||||||
|
"operating_expenses",
|
||||||
|
"selling_general_and_administrative",
|
||||||
|
"sales_and_marketing",
|
||||||
|
"general_and_administrative",
|
||||||
|
"other_operating_expense",
|
||||||
|
"operating_income",
|
||||||
|
"income_tax_expense",
|
||||||
|
"net_income",
|
||||||
|
];
|
||||||
|
const COMMON_OPERATING_COMPONENT_KEYS: &[&str] = &[
|
||||||
|
"research_and_development",
|
||||||
|
"depreciation_and_amortization",
|
||||||
|
"depreciation",
|
||||||
|
"amortization",
|
||||||
|
"restructuring",
|
||||||
|
"asset_impairment",
|
||||||
|
"stock_based_compensation",
|
||||||
|
"labor_and_related_expense",
|
||||||
|
"occupancy_net",
|
||||||
|
"data_processing_expense",
|
||||||
|
"claims_and_benefits",
|
||||||
|
"underwriting_expenses",
|
||||||
|
"property_operating_expense",
|
||||||
|
"administrative_expense",
|
||||||
|
];
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
enum ResolutionMethod {
|
enum ResolutionMethod {
|
||||||
Direct,
|
Direct,
|
||||||
@@ -154,6 +185,7 @@ pub fn apply_universal_income_rows(
|
|||||||
resolve_universal_row(
|
resolve_universal_row(
|
||||||
definition,
|
definition,
|
||||||
bridge_row,
|
bridge_row,
|
||||||
|
&income_bridge.rows,
|
||||||
periods,
|
periods,
|
||||||
&income_statement_rows,
|
&income_statement_rows,
|
||||||
facts,
|
facts,
|
||||||
@@ -237,6 +269,7 @@ pub fn apply_universal_income_rows(
|
|||||||
fn resolve_universal_row(
|
fn resolve_universal_row(
|
||||||
definition: &UniversalIncomeDefinition,
|
definition: &UniversalIncomeDefinition,
|
||||||
bridge_row: &IncomeBridgeRow,
|
bridge_row: &IncomeBridgeRow,
|
||||||
|
bridge_rows: &HashMap<String, IncomeBridgeRow>,
|
||||||
periods: &[PeriodOutput],
|
periods: &[PeriodOutput],
|
||||||
income_statement_rows: &[StatementRowOutput],
|
income_statement_rows: &[StatementRowOutput],
|
||||||
facts: &[FactOutput],
|
facts: &[FactOutput],
|
||||||
@@ -313,6 +346,46 @@ fn resolve_universal_row(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if definition.key == "selling_general_and_administrative" {
|
||||||
|
let formula_row = build_formula_row(
|
||||||
|
definition,
|
||||||
|
bridge_row,
|
||||||
|
periods,
|
||||||
|
income_statement_rows,
|
||||||
|
income_surface_rows,
|
||||||
|
crosswalk,
|
||||||
|
);
|
||||||
|
if has_any_value(&formula_row.row.values) {
|
||||||
|
return formula_row;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(residual_row) = build_residual_sga_row(
|
||||||
|
definition,
|
||||||
|
bridge_rows,
|
||||||
|
periods,
|
||||||
|
income_statement_rows,
|
||||||
|
facts,
|
||||||
|
income_surface_rows,
|
||||||
|
crosswalk,
|
||||||
|
) {
|
||||||
|
return residual_row;
|
||||||
|
}
|
||||||
|
|
||||||
|
return formula_row;
|
||||||
|
}
|
||||||
|
|
||||||
|
if definition.key == "other_operating_expense" {
|
||||||
|
return build_residual_other_operating_expense_row(
|
||||||
|
definition,
|
||||||
|
bridge_rows,
|
||||||
|
periods,
|
||||||
|
income_statement_rows,
|
||||||
|
facts,
|
||||||
|
income_surface_rows,
|
||||||
|
crosswalk,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
build_formula_row(
|
build_formula_row(
|
||||||
definition,
|
definition,
|
||||||
bridge_row,
|
bridge_row,
|
||||||
@@ -501,6 +574,247 @@ fn build_formula_row(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_residual_sga_row(
|
||||||
|
definition: &UniversalIncomeDefinition,
|
||||||
|
bridge_rows: &HashMap<String, IncomeBridgeRow>,
|
||||||
|
periods: &[PeriodOutput],
|
||||||
|
income_statement_rows: &[StatementRowOutput],
|
||||||
|
facts: &[FactOutput],
|
||||||
|
income_surface_rows: &[SurfaceRowOutput],
|
||||||
|
crosswalk: Option<&CrosswalkFile>,
|
||||||
|
) -> Option<ResolvedUniversalRow> {
|
||||||
|
let operating_expenses = find_surface_row(income_surface_rows, "operating_expenses")?;
|
||||||
|
let research_source = resolve_component_surface_source(
|
||||||
|
"research_and_development",
|
||||||
|
income_statement_rows,
|
||||||
|
income_surface_rows,
|
||||||
|
crosswalk,
|
||||||
|
)
|
||||||
|
.filter(|source| non_formula_value_source_for_any_period(source));
|
||||||
|
let explicit_other_bridge = bridge_rows.get("other_operating_expense");
|
||||||
|
|
||||||
|
let mut values = BTreeMap::<String, Option<f64>>::new();
|
||||||
|
let mut negative_residual = false;
|
||||||
|
let mut used_sources = Vec::<ValueSource>::new();
|
||||||
|
|
||||||
|
for period in periods {
|
||||||
|
let mut period_value = None;
|
||||||
|
|
||||||
|
if !has_explicit_direct_match_for_period(
|
||||||
|
period,
|
||||||
|
income_statement_rows,
|
||||||
|
facts,
|
||||||
|
explicit_other_bridge,
|
||||||
|
crosswalk,
|
||||||
|
) {
|
||||||
|
if let Some(operating_value) =
|
||||||
|
residual_anchor_value_for_period(operating_expenses, &period.id)
|
||||||
|
{
|
||||||
|
let research_value = research_source.as_ref().and_then(|source| {
|
||||||
|
non_formula_value_from_source_for_period(source, &period.id)
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Some(research_value) = research_value {
|
||||||
|
let component_total = research_value;
|
||||||
|
let inferred = operating_value - component_total;
|
||||||
|
|
||||||
|
if inferred.abs() <= RESIDUAL_TOLERANCE {
|
||||||
|
period_value = Some(0.0);
|
||||||
|
} else if inferred.is_sign_negative() {
|
||||||
|
negative_residual = true;
|
||||||
|
} else {
|
||||||
|
period_value = Some(inferred);
|
||||||
|
}
|
||||||
|
|
||||||
|
if period_value.is_some() {
|
||||||
|
used_sources.push(surface_source(operating_expenses));
|
||||||
|
if let Some(source) = research_source.as_ref() {
|
||||||
|
used_sources.push(source.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
values.insert(period.id.clone(), period_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !has_any_value(&values) {
|
||||||
|
if negative_residual {
|
||||||
|
return Some(unresolved_row(
|
||||||
|
definition,
|
||||||
|
periods,
|
||||||
|
&[
|
||||||
|
"selling_general_and_administrative_negative_residual_invalid".to_string(),
|
||||||
|
"selling_general_and_administrative_unresolved".to_string(),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut warning_codes =
|
||||||
|
vec!["selling_general_and_administrative_residual_from_operating_expenses".to_string()];
|
||||||
|
if negative_residual {
|
||||||
|
warning_codes
|
||||||
|
.push("selling_general_and_administrative_negative_residual_invalid".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(build_formula_surface_row(
|
||||||
|
definition,
|
||||||
|
periods,
|
||||||
|
values,
|
||||||
|
&used_sources,
|
||||||
|
warning_codes,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_residual_other_operating_expense_row(
|
||||||
|
definition: &UniversalIncomeDefinition,
|
||||||
|
bridge_rows: &HashMap<String, IncomeBridgeRow>,
|
||||||
|
periods: &[PeriodOutput],
|
||||||
|
_income_statement_rows: &[StatementRowOutput],
|
||||||
|
_facts: &[FactOutput],
|
||||||
|
income_surface_rows: &[SurfaceRowOutput],
|
||||||
|
_crosswalk: Option<&CrosswalkFile>,
|
||||||
|
) -> ResolvedUniversalRow {
|
||||||
|
let Some(operating_expenses) = find_surface_row(income_surface_rows, "operating_expenses")
|
||||||
|
else {
|
||||||
|
return unresolved_row(
|
||||||
|
definition,
|
||||||
|
periods,
|
||||||
|
&[format!("{}_unresolved", definition.key)],
|
||||||
|
);
|
||||||
|
};
|
||||||
|
let Some(sga_row) = find_surface_row(income_surface_rows, "selling_general_and_administrative")
|
||||||
|
else {
|
||||||
|
return unresolved_row(
|
||||||
|
definition,
|
||||||
|
periods,
|
||||||
|
&[format!("{}_unresolved", definition.key)],
|
||||||
|
);
|
||||||
|
};
|
||||||
|
let component_rows = collect_residual_component_rows(
|
||||||
|
income_surface_rows,
|
||||||
|
bridge_rows,
|
||||||
|
"other_operating_expense",
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut values = BTreeMap::<String, Option<f64>>::new();
|
||||||
|
let mut suppressed_zero_residual = false;
|
||||||
|
let mut used_sources = Vec::<ValueSource>::new();
|
||||||
|
|
||||||
|
for period in periods {
|
||||||
|
let Some(operating_value) =
|
||||||
|
residual_anchor_value_for_period(operating_expenses, &period.id)
|
||||||
|
else {
|
||||||
|
values.insert(period.id.clone(), None);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some(sga_value) = surface_row_value_for_period(sga_row, &period.id) else {
|
||||||
|
values.insert(period.id.clone(), None);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let period_components = component_rows
|
||||||
|
.iter()
|
||||||
|
.filter(|row| non_formula_value_for_period(row, &period.id).is_some())
|
||||||
|
.copied()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let component_total = period_components
|
||||||
|
.iter()
|
||||||
|
.filter_map(|row| non_formula_value_for_period(row, &period.id))
|
||||||
|
.sum::<f64>();
|
||||||
|
let residual = operating_value - sga_value - component_total;
|
||||||
|
|
||||||
|
let period_value = if residual.abs() <= RESIDUAL_TOLERANCE || residual <= 0.0 {
|
||||||
|
if residual.abs() <= RESIDUAL_TOLERANCE {
|
||||||
|
suppressed_zero_residual = true;
|
||||||
|
}
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
used_sources.push(surface_source(operating_expenses));
|
||||||
|
used_sources.push(surface_source(sga_row));
|
||||||
|
used_sources.extend(period_components.into_iter().map(surface_source));
|
||||||
|
Some(residual)
|
||||||
|
};
|
||||||
|
|
||||||
|
values.insert(period.id.clone(), period_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !has_any_value(&values) {
|
||||||
|
if suppressed_zero_residual {
|
||||||
|
return ResolvedUniversalRow {
|
||||||
|
row: null_surface_row(
|
||||||
|
definition,
|
||||||
|
periods,
|
||||||
|
None,
|
||||||
|
Some(Confidence::Low),
|
||||||
|
vec!["other_operating_expense_suppressed_zero_residual".to_string()],
|
||||||
|
),
|
||||||
|
detail_rows: vec![],
|
||||||
|
mapping_assignments: HashMap::new(),
|
||||||
|
warning_codes: vec!["other_operating_expense_suppressed_zero_residual".to_string()],
|
||||||
|
consumed_sources: ConsumedSources::default(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return unresolved_row(
|
||||||
|
definition,
|
||||||
|
periods,
|
||||||
|
&[format!("{}_unresolved", definition.key)],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut warning_codes = vec!["other_operating_expense_formula_derived".to_string()];
|
||||||
|
if suppressed_zero_residual {
|
||||||
|
warning_codes.push("other_operating_expense_suppressed_zero_residual".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
build_formula_surface_row(definition, periods, values, &used_sources, warning_codes)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_formula_surface_row(
|
||||||
|
definition: &UniversalIncomeDefinition,
|
||||||
|
periods: &[PeriodOutput],
|
||||||
|
values: BTreeMap<String, Option<f64>>,
|
||||||
|
source_rows: &[ValueSource],
|
||||||
|
warning_codes: Vec<String>,
|
||||||
|
) -> ResolvedUniversalRow {
|
||||||
|
let merged_source = merge_value_sources(source_rows);
|
||||||
|
|
||||||
|
ResolvedUniversalRow {
|
||||||
|
row: SurfaceRowOutput {
|
||||||
|
key: definition.key.clone(),
|
||||||
|
label: definition.label.clone(),
|
||||||
|
category: definition.category.clone(),
|
||||||
|
template_section: definition.category.clone(),
|
||||||
|
order: definition.order,
|
||||||
|
unit: definition.unit.clone(),
|
||||||
|
values,
|
||||||
|
source_concepts: merged_source.source_concepts,
|
||||||
|
source_row_keys: merged_source.source_row_keys,
|
||||||
|
source_fact_ids: merged_source.source_fact_ids,
|
||||||
|
formula_key: Some(definition.key.clone()),
|
||||||
|
has_dimensions: merged_source.has_dimensions,
|
||||||
|
resolved_source_row_keys: periods
|
||||||
|
.iter()
|
||||||
|
.map(|period| (period.id.clone(), None))
|
||||||
|
.collect(),
|
||||||
|
statement: Some(definition.statement.clone()),
|
||||||
|
detail_count: Some(0),
|
||||||
|
resolution_method: Some(ResolutionMethod::FormulaDerived.as_str().to_string()),
|
||||||
|
confidence: Some(Confidence::Medium.as_str().to_string()),
|
||||||
|
warning_codes: warning_codes.clone(),
|
||||||
|
},
|
||||||
|
detail_rows: vec![],
|
||||||
|
mapping_assignments: HashMap::new(),
|
||||||
|
warning_codes,
|
||||||
|
consumed_sources: ConsumedSources::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn build_direct_row(
|
fn build_direct_row(
|
||||||
definition: &UniversalIncomeDefinition,
|
definition: &UniversalIncomeDefinition,
|
||||||
periods: &[PeriodOutput],
|
periods: &[PeriodOutput],
|
||||||
@@ -814,6 +1128,112 @@ fn collect_group_sources<'a>(
|
|||||||
(sources, rows)
|
(sources, rows)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn find_surface_row<'a>(
|
||||||
|
income_surface_rows: &'a [SurfaceRowOutput],
|
||||||
|
key: &str,
|
||||||
|
) -> Option<&'a SurfaceRowOutput> {
|
||||||
|
income_surface_rows.iter().find(|row| row.key == key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_explicit_direct_match_for_period(
|
||||||
|
period: &PeriodOutput,
|
||||||
|
income_statement_rows: &[StatementRowOutput],
|
||||||
|
facts: &[FactOutput],
|
||||||
|
bridge_row: Option<&IncomeBridgeRow>,
|
||||||
|
crosswalk: Option<&CrosswalkFile>,
|
||||||
|
) -> bool {
|
||||||
|
let Some(bridge_row) = bridge_row else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
income_statement_rows.iter().any(|row| {
|
||||||
|
row.values.get(&period.id).copied().flatten().is_some()
|
||||||
|
&& (match_direct_authoritative(
|
||||||
|
row,
|
||||||
|
&bridge_row.direct_authoritative_concepts,
|
||||||
|
crosswalk,
|
||||||
|
)
|
||||||
|
.is_some()
|
||||||
|
|| match_direct_source(row, &bridge_row.direct_source_concepts, crosswalk)
|
||||||
|
.is_some())
|
||||||
|
}) || facts.iter().any(|fact| {
|
||||||
|
fact_matches_period(fact, period)
|
||||||
|
&& (match_direct_fact_authoritative(
|
||||||
|
fact,
|
||||||
|
&bridge_row.direct_authoritative_concepts,
|
||||||
|
crosswalk,
|
||||||
|
)
|
||||||
|
.is_some()
|
||||||
|
|| match_direct_fact_source(fact, &bridge_row.direct_source_concepts, crosswalk)
|
||||||
|
.is_some())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect_residual_component_rows<'a>(
|
||||||
|
income_surface_rows: &'a [SurfaceRowOutput],
|
||||||
|
bridge_rows: &HashMap<String, IncomeBridgeRow>,
|
||||||
|
target_key: &str,
|
||||||
|
) -> Vec<&'a SurfaceRowOutput> {
|
||||||
|
let mut allowed_surface_keys = COMMON_OPERATING_COMPONENT_KEYS
|
||||||
|
.iter()
|
||||||
|
.map(|key| key.to_string())
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
|
if let Some(operating_bridge) = bridge_rows.get("operating_expenses") {
|
||||||
|
allowed_surface_keys.extend(operating_bridge.component_surfaces.positive.iter().cloned());
|
||||||
|
}
|
||||||
|
|
||||||
|
income_surface_rows
|
||||||
|
.iter()
|
||||||
|
.filter(|row| row.key != target_key)
|
||||||
|
.filter(|row| !EXCLUDED_OPERATING_RESIDUAL_KEYS.contains(&row.key.as_str()))
|
||||||
|
.filter(|row| allowed_surface_keys.contains(&row.key))
|
||||||
|
.filter(|row| !surface_row_is_formula_derived(row))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn residual_anchor_value_for_period(row: &SurfaceRowOutput, period_id: &str) -> Option<f64> {
|
||||||
|
let confidence = row.confidence.as_deref().unwrap_or_default();
|
||||||
|
let resolution_method = row.resolution_method.as_deref().unwrap_or_default();
|
||||||
|
if resolution_method == ResolutionMethod::NotMeaningful.as_str() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if resolution_method == ResolutionMethod::FormulaDerived.as_str()
|
||||||
|
&& confidence != Confidence::High.as_str()
|
||||||
|
{
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
surface_row_value_for_period(row, period_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn surface_row_value_for_period(row: &SurfaceRowOutput, period_id: &str) -> Option<f64> {
|
||||||
|
row.values.get(period_id).copied().flatten()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn non_formula_value_for_period(row: &SurfaceRowOutput, period_id: &str) -> Option<f64> {
|
||||||
|
if surface_row_is_formula_derived(row) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
surface_row_value_for_period(row, period_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn non_formula_value_source_for_any_period(source: &ValueSource) -> bool {
|
||||||
|
source
|
||||||
|
.values
|
||||||
|
.keys()
|
||||||
|
.any(|period_id| non_formula_value_from_source_for_period(source, period_id).is_some())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn non_formula_value_from_source_for_period(source: &ValueSource, period_id: &str) -> Option<f64> {
|
||||||
|
source.values.get(period_id).copied().flatten()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn surface_row_is_formula_derived(row: &SurfaceRowOutput) -> bool {
|
||||||
|
row.resolution_method.as_deref() == Some(ResolutionMethod::FormulaDerived.as_str())
|
||||||
|
}
|
||||||
|
|
||||||
fn resolve_component_surface_source(
|
fn resolve_component_surface_source(
|
||||||
surface_key: &str,
|
surface_key: &str,
|
||||||
income_statement_rows: &[StatementRowOutput],
|
income_statement_rows: &[StatementRowOutput],
|
||||||
@@ -1628,12 +2048,17 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn derives_other_operating_expense_from_operating_expenses_minus_sga_and_missing_rnd() {
|
fn prefers_direct_sga_over_residual_inference() {
|
||||||
let rows = empty_rows();
|
let mut rows = empty_rows();
|
||||||
|
rows.get_mut("income").unwrap().push(row(
|
||||||
|
"sga-direct",
|
||||||
|
"us-gaap:SellingGeneralAndAdministrativeExpense",
|
||||||
|
70.0,
|
||||||
|
));
|
||||||
let mut model = empty_model();
|
let mut model = empty_model();
|
||||||
model.surface_rows.get_mut("income").unwrap().extend([
|
model.surface_rows.get_mut("income").unwrap().extend([
|
||||||
surface_row("operating_expenses", 100.0),
|
surface_row("operating_expenses", 100.0),
|
||||||
surface_row("selling_general_and_administrative", 60.0),
|
surface_row("research_and_development", 30.0),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
apply_universal_income_rows(
|
apply_universal_income_rows(
|
||||||
@@ -1646,6 +2071,47 @@ mod tests {
|
|||||||
)
|
)
|
||||||
.expect("universal income rows should build");
|
.expect("universal income rows should build");
|
||||||
|
|
||||||
|
let sga = model
|
||||||
|
.surface_rows
|
||||||
|
.get("income")
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "selling_general_and_administrative")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(sga.values.get("p1").copied().flatten(), Some(70.0));
|
||||||
|
assert_eq!(sga.resolution_method.as_deref(), Some("direct"));
|
||||||
|
assert!(!sga.warning_codes.contains(
|
||||||
|
&"selling_general_and_administrative_residual_from_operating_expenses".to_string()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn derives_sga_from_operating_expenses_minus_research_and_development() {
|
||||||
|
let rows = empty_rows();
|
||||||
|
let mut model = empty_model();
|
||||||
|
model.surface_rows.get_mut("income").unwrap().extend([
|
||||||
|
surface_row("operating_expenses", 100.0),
|
||||||
|
surface_row("research_and_development", 40.0),
|
||||||
|
]);
|
||||||
|
|
||||||
|
apply_universal_income_rows(
|
||||||
|
&[period("p1")],
|
||||||
|
&rows,
|
||||||
|
&[],
|
||||||
|
"us-gaap",
|
||||||
|
FiscalPack::Core,
|
||||||
|
&mut model,
|
||||||
|
)
|
||||||
|
.expect("universal income rows should build");
|
||||||
|
|
||||||
|
let sga = model
|
||||||
|
.surface_rows
|
||||||
|
.get("income")
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "selling_general_and_administrative")
|
||||||
|
.unwrap();
|
||||||
let other = model
|
let other = model
|
||||||
.surface_rows
|
.surface_rows
|
||||||
.get("income")
|
.get("income")
|
||||||
@@ -1654,12 +2120,134 @@ mod tests {
|
|||||||
.find(|row| row.key == "other_operating_expense")
|
.find(|row| row.key == "other_operating_expense")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(other.values.get("p1").copied().flatten(), Some(40.0));
|
assert_eq!(sga.values.get("p1").copied().flatten(), Some(60.0));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
other.formula_key.as_deref(),
|
sga.warning_codes,
|
||||||
Some("other_operating_expense")
|
vec!["selling_general_and_administrative_residual_from_operating_expenses".to_string()]
|
||||||
);
|
);
|
||||||
assert_eq!(other.resolution_method.as_deref(), Some("formula_derived"));
|
assert_eq!(other.values.get("p1").copied().flatten(), None);
|
||||||
|
assert!(other
|
||||||
|
.warning_codes
|
||||||
|
.contains(&"other_operating_expense_suppressed_zero_residual".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn derives_sga_from_operating_expenses_before_other_operating_expense_residual() {
|
||||||
|
let rows = empty_rows();
|
||||||
|
let mut model = empty_model();
|
||||||
|
model.surface_rows.get_mut("income").unwrap().extend([
|
||||||
|
surface_row("operating_expenses", 100.0),
|
||||||
|
surface_row("research_and_development", 25.0),
|
||||||
|
surface_row("depreciation_and_amortization", 15.0),
|
||||||
|
]);
|
||||||
|
|
||||||
|
apply_universal_income_rows(
|
||||||
|
&[period("p1")],
|
||||||
|
&rows,
|
||||||
|
&[],
|
||||||
|
"us-gaap",
|
||||||
|
FiscalPack::Core,
|
||||||
|
&mut model,
|
||||||
|
)
|
||||||
|
.expect("universal income rows should build");
|
||||||
|
|
||||||
|
let sga = model
|
||||||
|
.surface_rows
|
||||||
|
.get("income")
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "selling_general_and_administrative")
|
||||||
|
.unwrap();
|
||||||
|
let other = model
|
||||||
|
.surface_rows
|
||||||
|
.get("income")
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "other_operating_expense")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(sga.values.get("p1").copied().flatten(), Some(75.0));
|
||||||
|
assert_eq!(other.values.get("p1").copied().flatten(), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn leaves_sga_unresolved_when_operating_expense_residual_is_negative() {
|
||||||
|
let rows = empty_rows();
|
||||||
|
let mut model = empty_model();
|
||||||
|
model.surface_rows.get_mut("income").unwrap().extend([
|
||||||
|
surface_row("operating_expenses", 100.0),
|
||||||
|
surface_row("research_and_development", 120.0),
|
||||||
|
]);
|
||||||
|
|
||||||
|
apply_universal_income_rows(
|
||||||
|
&[period("p1")],
|
||||||
|
&rows,
|
||||||
|
&[],
|
||||||
|
"us-gaap",
|
||||||
|
FiscalPack::Core,
|
||||||
|
&mut model,
|
||||||
|
)
|
||||||
|
.expect("universal income rows should build");
|
||||||
|
|
||||||
|
let sga = model
|
||||||
|
.surface_rows
|
||||||
|
.get("income")
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "selling_general_and_administrative")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(sga.values.get("p1").copied().flatten(), None);
|
||||||
|
assert!(sga
|
||||||
|
.warning_codes
|
||||||
|
.contains(&"selling_general_and_administrative_negative_residual_invalid".to_string()));
|
||||||
|
assert!(sga
|
||||||
|
.warning_codes
|
||||||
|
.contains(&"selling_general_and_administrative_unresolved".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keeps_explicit_other_operating_expense_direct() {
|
||||||
|
let mut rows = empty_rows();
|
||||||
|
rows.get_mut("income").unwrap().push(row(
|
||||||
|
"other-expense",
|
||||||
|
"us-gaap:OtherOperatingExpense",
|
||||||
|
12.0,
|
||||||
|
));
|
||||||
|
let mut model = empty_model();
|
||||||
|
model.surface_rows.get_mut("income").unwrap().extend([
|
||||||
|
surface_row("operating_expenses", 100.0),
|
||||||
|
surface_row("research_and_development", 40.0),
|
||||||
|
]);
|
||||||
|
|
||||||
|
apply_universal_income_rows(
|
||||||
|
&[period("p1")],
|
||||||
|
&rows,
|
||||||
|
&[],
|
||||||
|
"us-gaap",
|
||||||
|
FiscalPack::Core,
|
||||||
|
&mut model,
|
||||||
|
)
|
||||||
|
.expect("universal income rows should build");
|
||||||
|
|
||||||
|
let sga = model
|
||||||
|
.surface_rows
|
||||||
|
.get("income")
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "selling_general_and_administrative")
|
||||||
|
.unwrap();
|
||||||
|
let other = model
|
||||||
|
.surface_rows
|
||||||
|
.get("income")
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "other_operating_expense")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(sga.values.get("p1").copied().flatten(), None);
|
||||||
|
assert_eq!(other.values.get("p1").copied().flatten(), Some(12.0));
|
||||||
|
assert_eq!(other.resolution_method.as_deref(), Some("direct"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import {
|
|||||||
normalizeFilingTaxonomySnapshotPayload,
|
normalizeFilingTaxonomySnapshotPayload,
|
||||||
upsertFilingTaxonomySnapshot
|
upsertFilingTaxonomySnapshot
|
||||||
} from '@/lib/server/repos/filing-taxonomy';
|
} from '@/lib/server/repos/filing-taxonomy';
|
||||||
|
import { getIssuerOverlay } from '@/lib/server/repos/issuer-overlays';
|
||||||
|
|
||||||
type ScriptOptions = {
|
type ScriptOptions = {
|
||||||
apply: boolean;
|
apply: boolean;
|
||||||
@@ -22,6 +23,17 @@ type ScriptSummary = {
|
|||||||
failed: number;
|
failed: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type ActiveOverlayState = {
|
||||||
|
definition: Awaited<ReturnType<typeof getIssuerOverlay>> extends infer T
|
||||||
|
? T extends { active_revision: infer R | null }
|
||||||
|
? R extends { definition_json: infer D }
|
||||||
|
? D | null
|
||||||
|
: null
|
||||||
|
: null
|
||||||
|
: null;
|
||||||
|
revisionId: number | null;
|
||||||
|
};
|
||||||
|
|
||||||
type FilingRow = {
|
type FilingRow = {
|
||||||
id: number;
|
id: number;
|
||||||
ticker: string;
|
ticker: string;
|
||||||
@@ -36,6 +48,28 @@ type FilingRow = {
|
|||||||
|
|
||||||
const REQUEST_DELAY_MS = 120;
|
const REQUEST_DELAY_MS = 120;
|
||||||
|
|
||||||
|
async function loadActiveOverlayState(
|
||||||
|
ticker: string,
|
||||||
|
cache: Map<string, Promise<ActiveOverlayState>>,
|
||||||
|
) {
|
||||||
|
const normalizedTicker = ticker.trim().toUpperCase();
|
||||||
|
const existing = cache.get(normalizedTicker);
|
||||||
|
if (existing) {
|
||||||
|
return await existing;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pending = (async (): Promise<ActiveOverlayState> => {
|
||||||
|
const overlay = await getIssuerOverlay(normalizedTicker);
|
||||||
|
return {
|
||||||
|
definition: overlay?.active_revision?.definition_json ?? null,
|
||||||
|
revisionId: overlay?.active_revision_id ?? null
|
||||||
|
};
|
||||||
|
})();
|
||||||
|
cache.set(normalizedTicker, pending);
|
||||||
|
|
||||||
|
return await pending;
|
||||||
|
}
|
||||||
|
|
||||||
function parseOptions(argv: string[]): ScriptOptions {
|
function parseOptions(argv: string[]): ScriptOptions {
|
||||||
const options: ScriptOptions = {
|
const options: ScriptOptions = {
|
||||||
apply: false,
|
apply: false,
|
||||||
@@ -147,6 +181,7 @@ async function loadFilings(options: ScriptOptions): Promise<FilingRow[]> {
|
|||||||
|
|
||||||
async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
||||||
const rows = await loadFilings(options);
|
const rows = await loadFilings(options);
|
||||||
|
const overlayCache = new Map<string, Promise<ActiveOverlayState>>();
|
||||||
const summary: ScriptSummary = {
|
const summary: ScriptSummary = {
|
||||||
scanned: 0,
|
scanned: 0,
|
||||||
wouldWrite: 0,
|
wouldWrite: 0,
|
||||||
@@ -164,8 +199,13 @@ async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
|||||||
summary.scanned += 1;
|
summary.scanned += 1;
|
||||||
console.log(`[backfill-taxonomy-snapshots] [${index + 1}/${rows.length}] ${row.ticker} ${row.filingType} ${row.filingDate} ${row.accessionNumber}`);
|
console.log(`[backfill-taxonomy-snapshots] [${index + 1}/${rows.length}] ${row.ticker} ${row.filingType} ${row.filingDate} ${row.accessionNumber}`);
|
||||||
|
|
||||||
|
const activeOverlay = await loadActiveOverlayState(row.ticker, overlayCache);
|
||||||
const existing = await getFilingTaxonomySnapshotByFilingId(row.id);
|
const existing = await getFilingTaxonomySnapshotByFilingId(row.id);
|
||||||
const isFresh = existing && Date.parse(existing.updated_at) >= Date.parse(row.updatedAt);
|
const isFresh = Boolean(
|
||||||
|
existing
|
||||||
|
&& Date.parse(existing.updated_at) >= Date.parse(row.updatedAt)
|
||||||
|
&& (existing.issuer_overlay_revision_id ?? null) === activeOverlay.revisionId
|
||||||
|
);
|
||||||
|
|
||||||
if (isFresh && !options.refresh) {
|
if (isFresh && !options.refresh) {
|
||||||
summary.skippedFresh += 1;
|
summary.skippedFresh += 1;
|
||||||
@@ -181,7 +221,8 @@ async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
|||||||
filingDate: row.filingDate,
|
filingDate: row.filingDate,
|
||||||
filingType: row.filingType,
|
filingType: row.filingType,
|
||||||
filingUrl: row.filingUrl,
|
filingUrl: row.filingUrl,
|
||||||
primaryDocument: row.primaryDocument
|
primaryDocument: row.primaryDocument,
|
||||||
|
issuerOverlay: activeOverlay.definition
|
||||||
});
|
});
|
||||||
|
|
||||||
summary.wouldWrite += 1;
|
summary.wouldWrite += 1;
|
||||||
@@ -189,6 +230,7 @@ async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
|||||||
if (options.apply) {
|
if (options.apply) {
|
||||||
const normalizedSnapshot = {
|
const normalizedSnapshot = {
|
||||||
...snapshot,
|
...snapshot,
|
||||||
|
issuer_overlay_revision_id: activeOverlay.revisionId,
|
||||||
...normalizeFilingTaxonomySnapshotPayload(snapshot)
|
...normalizeFilingTaxonomySnapshotPayload(snapshot)
|
||||||
};
|
};
|
||||||
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
|
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
|
||||||
|
|||||||
Reference in New Issue
Block a user