From 7a7a78340f41fb24854fb0f763e9ddf29b588a97 Mon Sep 17 00:00:00 2001 From: francy51 Date: Thu, 12 Mar 2026 21:15:54 -0400 Subject: [PATCH] Expand backend financial statement parsers --- .../BALANCE_SHEET_PARSER_SPEC.md | 144 ++ .../CASH_FLOW_STATEMENT_PARSER_SPEC.md | 155 ++ .../OPERATING_STATEMENT_PARSER_SPEC.md | 103 ++ rust/fiscal-xbrl-core/src/lib.rs | 500 ++++-- rust/fiscal-xbrl-core/src/surface_mapper.rs | 1171 +++++++++++-- rust/fiscal-xbrl-core/src/taxonomy_loader.rs | 172 +- rust/fiscal-xbrl-core/src/universal_income.rs | 805 +++++++-- .../fiscal/v1/bank_lender.surface.json | 12 +- .../v1/broker_asset_manager.surface.json | 6 +- rust/taxonomy/fiscal/v1/core.surface.json | 1475 ++++++++++++++++- .../taxonomy/fiscal/v1/insurance.surface.json | 16 +- .../fiscal/v1/reit_real_estate.surface.json | 27 +- scripts/compare-fiscal-ai-statements.ts | 268 ++- 13 files changed, 4398 insertions(+), 456 deletions(-) create mode 100644 rust/fiscal-xbrl-core/BALANCE_SHEET_PARSER_SPEC.md create mode 100644 rust/fiscal-xbrl-core/CASH_FLOW_STATEMENT_PARSER_SPEC.md create mode 100644 rust/fiscal-xbrl-core/OPERATING_STATEMENT_PARSER_SPEC.md diff --git a/rust/fiscal-xbrl-core/BALANCE_SHEET_PARSER_SPEC.md b/rust/fiscal-xbrl-core/BALANCE_SHEET_PARSER_SPEC.md new file mode 100644 index 0000000..e485655 --- /dev/null +++ b/rust/fiscal-xbrl-core/BALANCE_SHEET_PARSER_SPEC.md @@ -0,0 +1,144 @@ +# Balance Sheet Parser Spec + +## Purpose +This document defines the backend-only balance-sheet parsing rules for `fiscal-xbrl-core`. + +This pass is limited to Rust parser behavior and taxonomy packs. It must not modify frontend files, frontend rendering logic, or frontend response shapes. + +## Hydration Order +1. Load the selected surface pack. +2. For non-core packs, merge in any core balance-sheet surfaces that the selected pack does not override. +3. Resolve direct canonical balance rows from statement rows. +4. Resolve aggregate-child rows from detail components when direct canonical rows are absent. +5. Resolve formula-backed balance rows from already-resolved canonical rows. +6. Emit `unmapped` only for rows not consumed by canonical balance parsing. + +## Category Taxonomy +Balance rows use these backend category keys: +- `current_assets` +- `noncurrent_assets` +- `current_liabilities` +- `noncurrent_liabilities` +- `equity` +- `derived` +- `sector_specific` + +Default rule: +- use economic placement first +- reserve `sector_specific` for rows that cannot be expressed economically + +## Canonical Precedence Rule +Canonical balance mappings take precedence over residual classification. + +If a statement row is consumed by a canonical balance row, it must not remain in `detail_rows["balance"]["unmapped"]`. + +## Alias Flattening Rule +Synonymous balance concepts flatten into one canonical surface row. + +Example: +- `AccountsReceivableNetCurrent` +- `ReceivablesNetCurrent` + +These must become one `accounts_receivable` row with period-aware provenance. + +## Per-Period Resolution Rule +Direct balance matching is resolved per period, not by choosing one row globally. + +For each canonical balance row: +1. Collect all direct candidates. +2. For each period, choose the best candidate with a value in that period. +3. Build one canonical row from those period-specific winners. +4. Preserve the union of all consumed aliases in `source_concepts`, `source_row_keys`, and `source_fact_ids`. + +## Formula Evaluation Rule +Structured formulas are evaluated only after their source surface rows have been resolved. + +Supported operators: +- `sum` +- `subtract` + +Formula rules: +- formulas operate period by period +- `sum` may treat nulls as zero when `treat_null_as_zero` is true +- `subtract` requires exactly two sources +- formula rows inherit provenance from the source surface rows they consume + +## Residual Pruning Rule +`balance.unmapped` is a strict remainder set. + +A balance statement row must be excluded from `unmapped` when either of these is true: +- its row key was consumed by a canonical balance row +- its concept key was consumed by a canonical balance row + +## Helper Surface Rule +Some balance rows are parser helpers rather than user-facing canonical output. + +Current helper rows: +- `deferred_revenue_current` +- `deferred_revenue_noncurrent` +- `current_liabilities` +- `leases` + +Behavior: +- they remain available to formulas +- they do not appear in emitted `surface_rows` +- they do not create emitted detail buckets +- they still consume matched backend sources so those rows do not leak into `unmapped` + +## Synonym vs Aggregate Child Rule +Two cases must remain distinct. + +### Synonym aliases +Different concept names for the same canonical balance meaning. + +Behavior: +- flatten into one canonical surface row +- do not emit duplicate detail rows +- do not remain in `unmapped` + +### Aggregate child components +Rows that legitimately roll into a subtotal or total. + +Behavior: +- may remain as detail rows beneath the canonical parent when grouping is enabled +- must not remain in `unmapped` after being consumed + +## Sector Placement Decisions +Sector rows stay inside the same economic taxonomy. + +Mappings in this pass: +- `loans` -> `noncurrent_assets` +- `allowance_for_credit_losses` -> `noncurrent_assets` +- `deposits` -> `current_liabilities` +- `policy_liabilities` -> `noncurrent_liabilities` +- `deferred_acquisition_costs` -> `noncurrent_assets` +- `investment_property` -> `noncurrent_assets` + +`sector_specific` remains unused by default in this pass. + +## Required Invariants +- A consumed balance source must never remain in `balance.unmapped`. +- A synonym alias must never create more than one canonical balance row. +- Hidden helper surfaces may consume sources but must not appear in emitted `surface_rows`. +- Formula-derived rows inherit canonical provenance from their source surfaces. +- The frontend response shape remains unchanged. + +## Test Matrix +The parser must cover: +- direct alias flattening for `accounts_receivable` +- period-sparse alias merges into one canonical row +- formula derivation for `total_cash_and_equivalents` +- formula derivation for `unearned_revenue` +- formula derivation for `total_debt` +- formula derivation for `net_cash_position` +- helper rows staying out of emitted balance surfaces +- residual pruning of canonically consumed balance rows +- sector packs receiving merged core balance coverage without changing frontend contracts + +## Learnings Reusable For Other Statements +The same parser rules should later apply to cash flow: +- canonical mapping outranks residual classification +- direct aliases should resolve per period +- helper rows can exist backend-only when formulas need them +- consumed sources must be removed from `unmapped` +- sector packs should inherit common canonical coverage rather than duplicating it diff --git a/rust/fiscal-xbrl-core/CASH_FLOW_STATEMENT_PARSER_SPEC.md b/rust/fiscal-xbrl-core/CASH_FLOW_STATEMENT_PARSER_SPEC.md new file mode 100644 index 0000000..5fdc614 --- /dev/null +++ b/rust/fiscal-xbrl-core/CASH_FLOW_STATEMENT_PARSER_SPEC.md @@ -0,0 +1,155 @@ +# Cash Flow Statement Parser Spec + +## Purpose +This document defines the backend-only cash-flow parsing rules for `fiscal-xbrl-core`. + +This pass is limited to Rust parser behavior, taxonomy packs, and backend comparison tooling. It must not modify frontend files, frontend rendering logic, or frontend response shapes. + +## Hydration Order +1. Load the selected surface pack. +2. For non-core packs, merge in any core balance-sheet and cash-flow surfaces that the selected pack does not override. +3. Resolve direct canonical cash-flow rows from statement rows. +4. Resolve aggregate-child cash-flow rows from matched detail components when direct canonical rows are absent. +5. Resolve formula-backed cash-flow rows from already-resolved canonical rows and helper rows. +6. Emit `unmapped` only for rows not consumed by canonical cash-flow parsing. + +## Category Model +Cash-flow rows use these backend category keys: +- `operating` +- `investing` +- `financing` +- `free_cash_flow` +- `helper` + +Rules: +- `helper` rows are backend-only and use `include_in_output: false`. +- Only `operating`, `investing`, `financing`, and `free_cash_flow` should appear in emitted `surface_rows`. + +## Canonical Precedence Rule +Canonical cash-flow mappings take precedence over residual classification. + +If a statement row is consumed by a canonical cash-flow row, it must not remain in `detail_rows["cash_flow"]["unmapped"]`. + +## Alias Flattening Rule +Synonymous cash-flow concepts flatten into one canonical surface row. + +Example: +- `NetCashProvidedByUsedInOperatingActivities` +- `NetCashProvidedByUsedInOperatingActivitiesContinuingOperations` + +These must become one `operating_cash_flow` row with period-aware provenance. + +## Per-Period Resolution Rule +Direct cash-flow matching is resolved per period, not by choosing one row globally. + +For each canonical cash-flow row: +1. Collect all direct candidates. +2. For each period, choose the best candidate with a value in that period. +3. Build one canonical row from those period-specific winners. +4. Preserve the union of all consumed aliases in `source_concepts`, `source_row_keys`, and `source_fact_ids`. + +## Sign Normalization Rule +Some canonical cash-flow rows require sign normalization. + +Supported transform: +- `invert` + +Rule: +- sign transforms are applied after direct or aggregate resolution +- sign transforms are applied before formula evaluation consumes the row +- emitted detail rows inherit the same transform when they belong to the transformed canonical row +- provenance is preserved unchanged + +## Formula Rule +Structured formulas are evaluated only after their source surface rows have been resolved. + +Supported operators: +- `sum` +- `subtract` + +Current formulas: +- `changes_unearned_revenue = contract_liability_incurred - contract_liability_recognized` +- `changes_other_operating_activities = changes_other_current_assets + changes_other_current_liabilities + changes_other_noncurrent_assets + changes_other_noncurrent_liabilities` +- `free_cash_flow = operating_cash_flow + capital_expenditures` + +## Helper Row Rule +Helper rows exist only to support formulas and canonical grouping. + +Current helper rows: +- `contract_liability_incurred` +- `contract_liability_recognized` +- `changes_other_current_assets` +- `changes_other_current_liabilities` +- `changes_other_noncurrent_assets` +- `changes_other_noncurrent_liabilities` + +Behavior: +- helper rows remain available for formula evaluation +- helper rows do not appear in emitted `surface_rows` +- helper rows do not create emitted detail buckets +- helper rows still consume matched backend sources so those rows do not leak into `unmapped` + +## Residual Pruning Rule +`cash_flow.unmapped` is a strict remainder set. + +A cash-flow statement row must be excluded from `unmapped` when either of these is true: +- its row key was consumed by a canonical cash-flow row +- its concept key was consumed by a canonical cash-flow row + +## Sector Inheritance Rule +Sector packs inherit the core cash-flow taxonomy unless they provide an explicit cash-flow override. + +Current behavior: +- bank/lender inherits core cash-flow rows +- broker/asset manager inherits core cash-flow rows +- insurance inherits core cash-flow rows +- REIT/real estate inherits core cash-flow rows + +No first-pass sector-specific cash-flow overrides are required. + +## Synonym vs Aggregate Child Rule +Two cases must remain distinct. + +### Synonym aliases +Different concept names for the same canonical cash-flow meaning. + +Behavior: +- flatten into one canonical surface row +- do not emit duplicate detail rows +- do not remain in `unmapped` + +### Aggregate child components +Rows that legitimately roll into a subtotal or grouped adjustment row. + +Behavior: +- may remain as detail rows beneath the canonical parent when grouping is enabled +- must not remain in `unmapped` after being consumed + +## Required Invariants +- A consumed cash-flow source must never remain in `cash_flow.unmapped`. +- A synonym alias must never create more than one canonical cash-flow row. +- Hidden helper surfaces may consume sources but must not appear in emitted `surface_rows`. +- Formula-derived rows inherit canonical provenance from their source surfaces. +- The frontend response shape remains unchanged. + +## Test Matrix +The parser must cover: +- direct sign inversion for `capital_expenditures` +- direct sign inversion for `debt_repaid` +- direct sign inversion for `share_repurchases` +- direct mapping for `operating_cash_flow` +- formula derivation for `changes_unearned_revenue` +- formula derivation for `changes_other_operating_activities` +- formula derivation for `free_cash_flow` +- helper rows staying out of emitted cash-flow surfaces +- residual pruning of canonically consumed cash-flow rows +- sector packs receiving merged core cash-flow coverage without changing frontend contracts +- fallback classification for fact-only cash-flow concepts such as `IncreaseDecreaseInAccountsReceivable` and `PaymentsOfDividends` + +## Learnings Reusable For Other Statements +The same parser rules now apply consistently across income, balance, and cash flow: +- canonical mapping outranks residual classification +- direct aliases resolve per period +- helper rows may exist backend-only when formulas need them +- consumed sources must be removed from `unmapped` +- sector packs inherit common canonical coverage instead of duplicating it diff --git a/rust/fiscal-xbrl-core/OPERATING_STATEMENT_PARSER_SPEC.md b/rust/fiscal-xbrl-core/OPERATING_STATEMENT_PARSER_SPEC.md new file mode 100644 index 0000000..f4698d7 --- /dev/null +++ b/rust/fiscal-xbrl-core/OPERATING_STATEMENT_PARSER_SPEC.md @@ -0,0 +1,103 @@ +# Operating Statement Parser Spec + +## Purpose +This document defines the backend-only parsing rules for operating statement hydration in `fiscal-xbrl-core`. + +This pass is intentionally limited to Rust parser behavior. It must not change frontend files, frontend rendering logic, or API response shapes. + +## Hydration Order +1. Generic compact surface mapping builds initial `surface_rows`, `detail_rows`, and `unmapped` residuals. +2. Universal income parsing rewrites the income statement into canonical operating-statement rows. +3. Canonical income parsing is authoritative for income provenance and must prune any consumed residual rows from `detail_rows["income"]["unmapped"]`. + +## Canonical Precedence Rule +For income rows, canonical universal mappings take precedence over generic residual classification. + +If an income concept is consumed by a canonical operating-statement row, it must not remain in `unmapped`. + +## Alias Flattening Rule +Multiple source aliases for the same canonical operating-statement concept must flatten into a single canonical surface row. + +Examples: +- `us-gaap:OtherOperatingExpense` +- `us-gaap:OtherOperatingExpenses` +- `us-gaap:OtherCostAndExpenseOperating` + +These may differ by filer or period, but they still represent one canonical row such as `other_operating_expense`. + +## Per-Period Resolution Rule +Direct canonical matching is resolved per period, not by selecting one global winner for all periods. + +For each canonical income row: +1. Collect all direct statement-row matches. +2. For each period, keep only candidates with a value in that period. +3. Choose the best candidate for that period using existing ranking rules. +4. Build one canonical row whose `values` and `resolved_source_row_keys` are assembled period-by-period. + +The canonical row's provenance is the union of all consumed aliases, even if a different alias wins in different periods. + +## Residual Pruning Rule +After canonical income rows are resolved: +- collect all consumed source row keys +- collect all consumed concept keys +- remove any residual income detail row from `unmapped` if either identifier matches + +`unmapped` is a strict remainder set after income canonicalization. + +## Synonym vs Aggregate Child Rule +Two cases must remain distinct: + +### Synonym aliases +Different concept names representing the same canonical meaning. + +Behavior: +- flatten into one canonical surface row +- do not emit as detail rows +- do not leave in `unmapped` + +### Aggregate child components +Rows that are true components of a higher-level canonical row, such as: +- `SalesAndMarketingExpense` +- `GeneralAndAdministrativeExpense` +used to derive `selling_general_and_administrative` + +Behavior: +- may appear as detail rows under the canonical parent +- must not also remain in `unmapped` once consumed by that canonical parent + +## Required Invariants +For income parsing, a consumed source may appear in exactly one of these places: +- canonical surface provenance +- canonical detail provenance +- `unmapped` + +It must never appear in more than one place at the same time. + +Additional invariants: +- canonical surface rows are unique by canonical key +- aliases are flattened into one canonical row +- `resolved_source_row_keys` are period-specific +- normalization counts reflect the post-pruning state + +## Performance Constraints +- Use `HashSet` membership for consumed-source pruning. +- Build candidate collections once per canonical definition. +- Avoid UI-side dedupe or post-processing. +- Keep the parser close to linear in candidate volume per definition. + +## Test Matrix +The parser must cover: +- direct alias dedupe for `other_operating_expense` +- period-sparse alias merge into a single canonical row +- pruning of canonically consumed aliases from `income.unmapped` +- preservation of truly unrelated residual rows +- pruning of formula-consumed component rows from `income.unmapped` + +## Learnings For Other Statements +The same backend rules should later be applied to balance sheet and cash flow: +- canonical mapping must outrank residual classification +- alias resolution should be per-period +- consumed sources must be removed from `unmapped` +- synonym aliases and aggregate child components must be treated differently + +When balance sheet and cash flow are upgraded, they should adopt these invariants without changing frontend response shapes. diff --git a/rust/fiscal-xbrl-core/src/lib.rs b/rust/fiscal-xbrl-core/src/lib.rs index 615f31c..6d78e6d 100644 --- a/rust/fiscal-xbrl-core/src/lib.rs +++ b/rust/fiscal-xbrl-core/src/lib.rs @@ -37,10 +37,12 @@ static IDENTIFIER_RE: Lazy = Lazy::new(|| { Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?identifier\b[^>]*\bscheme=["']([^"']+)["'][^>]*>(.*?)"#).unwrap() }); static SEGMENT_RE: Lazy = Lazy::new(|| { - Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)"#).unwrap() + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)"#) + .unwrap() }); static SCENARIO_RE: Lazy = Lazy::new(|| { - Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)"#).unwrap() + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)"#) + .unwrap() }); static START_DATE_RE: Lazy = Lazy::new(|| { Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?startDate>(.*?)"#).unwrap() @@ -55,7 +57,8 @@ static MEASURE_RE: Lazy = Lazy::new(|| { Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?measure>(.*?)"#).unwrap() }); static LABEL_LINK_RE: Lazy = Lazy::new(|| { - Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)"#).unwrap() + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)"#) + .unwrap() }); static PRESENTATION_LINK_RE: Lazy = Lazy::new(|| { Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>(.*?)"#).unwrap() @@ -67,12 +70,14 @@ static LABEL_RESOURCE_RE: Lazy = Lazy::new(|| { Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?label\b([^>]*)>(.*?)"#).unwrap() }); static LABEL_ARC_RE: Lazy = Lazy::new(|| { - Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)/?>(?:)?"#).unwrap() + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)/?>(?:)?"#) + .unwrap() }); static PRESENTATION_ARC_RE: Lazy = Lazy::new(|| { Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)/?>(?:)?"#).unwrap() }); -static ATTR_RE: Lazy = Lazy::new(|| Regex::new(r#"([a-zA-Z0-9:_\-]+)=["']([^"']+)["']"#).unwrap()); +static ATTR_RE: Lazy = + Lazy::new(|| Regex::new(r#"([a-zA-Z0-9:_\-]+)=["']([^"']+)["']"#).unwrap()); #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] @@ -451,7 +456,8 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result Result { if asset.asset_type == "presentation" { @@ -515,10 +519,15 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result Result Result>() .len(), assets: discovered.assets, @@ -622,7 +639,10 @@ struct DiscoveredAssets { assets: Vec, } -fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Result { +fn discover_filing_assets( + input: &HydrateFilingRequest, + client: &Client, +) -> Result { let Some(directory_url) = resolve_filing_directory_url( input.filing_url.as_deref(), &input.cik, @@ -631,12 +651,19 @@ fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Resu return Ok(DiscoveredAssets { assets: vec![] }); }; - let payload = fetch_json::(client, &format!("{directory_url}index.json")).ok(); + let payload = + fetch_json::(client, &format!("{directory_url}index.json")).ok(); let mut discovered = Vec::new(); - if let Some(items) = payload.and_then(|payload| payload.directory.and_then(|directory| directory.item)) { + if let Some(items) = + payload.and_then(|payload| payload.directory.and_then(|directory| directory.item)) + { for item in items { - let Some(name) = item.name.map(|name| name.trim().to_string()).filter(|name| !name.is_empty()) else { + let Some(name) = item + .name + .map(|name| name.trim().to_string()) + .filter(|name| !name.is_empty()) + else { continue; }; @@ -683,12 +710,19 @@ fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Resu score_instance(&asset.name, input.primary_document.as_deref()), ) }) - .max_by(|left, right| left.1.partial_cmp(&right.1).unwrap_or(std::cmp::Ordering::Equal)) + .max_by(|left, right| { + left.1 + .partial_cmp(&right.1) + .unwrap_or(std::cmp::Ordering::Equal) + }) .map(|entry| entry.0); for asset in &mut discovered { asset.score = if asset.asset_type == "instance" { - Some(score_instance(&asset.name, input.primary_document.as_deref())) + Some(score_instance( + &asset.name, + input.primary_document.as_deref(), + )) } else if asset.asset_type == "pdf" { Some(score_pdf(&asset.name, asset.size_bytes)) } else { @@ -708,7 +742,11 @@ fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Resu Ok(DiscoveredAssets { assets: discovered }) } -fn resolve_filing_directory_url(filing_url: Option<&str>, cik: &str, accession_number: &str) -> Option { +fn resolve_filing_directory_url( + filing_url: Option<&str>, + cik: &str, + accession_number: &str, +) -> Option { if let Some(filing_url) = filing_url.map(str::trim).filter(|value| !value.is_empty()) { if let Some(last_slash) = filing_url.rfind('/') { if last_slash > "https://".len() { @@ -725,7 +763,10 @@ fn resolve_filing_directory_url(filing_url: Option<&str>, cik: &str, accession_n } fn normalize_cik_for_path(value: &str) -> Option { - let digits = value.chars().filter(|char| char.is_ascii_digit()).collect::(); + let digits = value + .chars() + .filter(|char| char.is_ascii_digit()) + .collect::(); if digits.is_empty() { return None; } @@ -741,16 +782,25 @@ fn classify_asset_type(name: &str) -> &'static str { return "schema"; } if lower.ends_with(".xml") { - if lower.ends_with("_pre.xml") || lower.ends_with("-pre.xml") || lower.contains("presentation") { + if lower.ends_with("_pre.xml") + || lower.ends_with("-pre.xml") + || lower.contains("presentation") + { return "presentation"; } if lower.ends_with("_lab.xml") || lower.ends_with("-lab.xml") || lower.contains("label") { return "label"; } - if lower.ends_with("_cal.xml") || lower.ends_with("-cal.xml") || lower.contains("calculation") { + if lower.ends_with("_cal.xml") + || lower.ends_with("-cal.xml") + || lower.contains("calculation") + { return "calculation"; } - if lower.ends_with("_def.xml") || lower.ends_with("-def.xml") || lower.contains("definition") { + if lower.ends_with("_def.xml") + || lower.ends_with("-def.xml") + || lower.contains("definition") + { return "definition"; } return "instance"; @@ -779,7 +829,11 @@ fn score_instance(name: &str, primary_document: Option<&str>) -> f64 { score += 5.0; } } - if lower.contains("cal") || lower.contains("def") || lower.contains("lab") || lower.contains("pre") { + if lower.contains("cal") + || lower.contains("def") + || lower.contains("lab") + || lower.contains("pre") + { score -= 3.0; } score @@ -819,7 +873,9 @@ fn fetch_text(client: &Client, url: &str) -> Result { if !response.status().is_success() { return Err(anyhow!("request failed for {url} ({})", response.status())); } - response.text().with_context(|| format!("unable to read response body for {url}")) + response + .text() + .with_context(|| format!("unable to read response body for {url}")) } fn fetch_json Deserialize<'de>>(client: &Client, url: &str) -> Result { @@ -847,17 +903,36 @@ fn parse_xbrl_instance(raw: &str, source_file: Option) -> ParsedInstance let mut facts = Vec::new(); for captures in FACT_RE.captures_iter(raw) { - let prefix = captures.get(1).map(|value| value.as_str().trim()).unwrap_or_default(); - let local_name = captures.get(2).map(|value| value.as_str().trim()).unwrap_or_default(); - let attrs = captures.get(3).map(|value| value.as_str()).unwrap_or_default(); - let body = decode_xml_entities(captures.get(4).map(|value| value.as_str()).unwrap_or_default().trim()); + let prefix = captures + .get(1) + .map(|value| value.as_str().trim()) + .unwrap_or_default(); + let local_name = captures + .get(2) + .map(|value| value.as_str().trim()) + .unwrap_or_default(); + let attrs = captures + .get(3) + .map(|value| value.as_str()) + .unwrap_or_default(); + let body = decode_xml_entities( + captures + .get(4) + .map(|value| value.as_str()) + .unwrap_or_default() + .trim(), + ); if prefix.is_empty() || local_name.is_empty() || is_xbrl_infrastructure_prefix(prefix) { continue; } let attr_map = parse_attrs(attrs); - let Some(context_id) = attr_map.get("contextRef").cloned().or_else(|| attr_map.get("contextref").cloned()) else { + let Some(context_id) = attr_map + .get("contextRef") + .cloned() + .or_else(|| attr_map.get("contextref").cloned()) + else { continue; }; @@ -870,7 +945,10 @@ fn parse_xbrl_instance(raw: &str, source_file: Option) -> ParsedInstance .cloned() .unwrap_or_else(|| format!("urn:unknown:{prefix}")); let context = context_by_id.get(&context_id); - let unit_ref = attr_map.get("unitRef").cloned().or_else(|| attr_map.get("unitref").cloned()); + let unit_ref = attr_map + .get("unitRef") + .cloned() + .or_else(|| attr_map.get("unitref").cloned()); let unit = unit_ref .as_ref() .and_then(|unit_ref| unit_by_id.get(unit_ref)) @@ -896,8 +974,12 @@ fn parse_xbrl_instance(raw: &str, source_file: Option) -> ParsedInstance period_start: context.and_then(|value| value.period_start.clone()), period_end: context.and_then(|value| value.period_end.clone()), period_instant: context.and_then(|value| value.period_instant.clone()), - dimensions: context.map(|value| value.dimensions.clone()).unwrap_or_default(), - is_dimensionless: context.map(|value| value.dimensions.is_empty()).unwrap_or(true), + dimensions: context + .map(|value| value.dimensions.clone()) + .unwrap_or_default(), + is_dimensionless: context + .map(|value| value.dimensions.is_empty()) + .unwrap_or(true), source_file: source_file.clone(), }); } @@ -916,10 +998,7 @@ fn parse_xbrl_instance(raw: &str, source_file: Option) -> ParsedInstance }) .collect::>(); - ParsedInstance { - contexts, - facts, - } + ParsedInstance { contexts, facts } } fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap { @@ -935,7 +1014,10 @@ fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap HashMap { let mut contexts = HashMap::new(); for captures in CONTEXT_RE.captures_iter(raw) { - let Some(context_id) = captures.get(1).map(|value| value.as_str().trim().to_string()) else { + let Some(context_id) = captures + .get(1) + .map(|value| value.as_str().trim().to_string()) + else { continue; }; - let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default(); + let block = captures + .get(2) + .map(|value| value.as_str()) + .unwrap_or_default(); let (entity_identifier, entity_scheme) = IDENTIFIER_RE .captures(block) .map(|captures| { ( - captures.get(2).map(|value| decode_xml_entities(value.as_str().trim())), - captures.get(1).map(|value| decode_xml_entities(value.as_str().trim())), + captures + .get(2) + .map(|value| decode_xml_entities(value.as_str().trim())), + captures + .get(1) + .map(|value| decode_xml_entities(value.as_str().trim())), ) }) .unwrap_or((None, None)); @@ -984,7 +1076,10 @@ fn parse_contexts(raw: &str) -> HashMap { let mut dimensions = Vec::new(); if let Some(segment_value) = segment.as_ref() { - if let Some(members) = segment_value.get("explicitMembers").and_then(|value| value.as_array()) { + if let Some(members) = segment_value + .get("explicitMembers") + .and_then(|value| value.as_array()) + { for member in members { if let (Some(axis), Some(member_value)) = ( member.get("axis").and_then(|value| value.as_str()), @@ -999,7 +1094,10 @@ fn parse_contexts(raw: &str) -> HashMap { } } if let Some(scenario_value) = scenario.as_ref() { - if let Some(members) = scenario_value.get("explicitMembers").and_then(|value| value.as_array()) { + if let Some(members) = scenario_value + .get("explicitMembers") + .and_then(|value| value.as_array()) + { for member in members { if let (Some(axis), Some(member_value)) = ( member.get("axis").and_then(|value| value.as_str()), @@ -1062,10 +1160,16 @@ fn parse_dimension_container(raw: &str) -> serde_json::Value { fn parse_units(raw: &str) -> HashMap { let mut units = HashMap::new(); for captures in UNIT_RE.captures_iter(raw) { - let Some(id) = captures.get(1).map(|value| value.as_str().trim().to_string()) else { + let Some(id) = captures + .get(1) + .map(|value| value.as_str().trim().to_string()) + else { continue; }; - let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default(); + let block = captures + .get(2) + .map(|value| value.as_str()) + .unwrap_or_default(); let measures = MEASURE_RE .captures_iter(block) .filter_map(|captures| captures.get(1)) @@ -1097,7 +1201,10 @@ fn parse_attrs(raw: &str) -> HashMap { let mut map = HashMap::new(); for captures in ATTR_RE.captures_iter(raw) { if let (Some(name), Some(value)) = (captures.get(1), captures.get(2)) { - map.insert(name.as_str().to_string(), decode_xml_entities(value.as_str())); + map.insert( + name.as_str().to_string(), + decode_xml_entities(value.as_str()), + ); } } map @@ -1138,12 +1245,20 @@ fn parse_label_linkbase(raw: &str) -> HashMap { let mut preferred = HashMap::::new(); for captures in LABEL_LINK_RE.captures_iter(raw) { - let block = captures.get(1).map(|value| value.as_str()).unwrap_or_default(); + let block = captures + .get(1) + .map(|value| value.as_str()) + .unwrap_or_default(); let mut loc_by_label = HashMap::::new(); let mut resource_by_label = HashMap::)>::new(); for captures in LOC_RE.captures_iter(block) { - let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let attrs = parse_attrs( + captures + .get(1) + .map(|value| value.as_str()) + .unwrap_or_default(), + ); let Some(label) = attrs.get("xlink:label").cloned() else { continue; }; @@ -1160,14 +1275,24 @@ fn parse_label_linkbase(raw: &str) -> HashMap { } for captures in LABEL_RESOURCE_RE.captures_iter(block) { - let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let attrs = parse_attrs( + captures + .get(1) + .map(|value| value.as_str()) + .unwrap_or_default(), + ); let Some(label) = attrs.get("xlink:label").cloned() else { continue; }; - let body = decode_xml_entities(captures.get(2).map(|value| value.as_str()).unwrap_or_default()) - .split_whitespace() - .collect::>() - .join(" "); + let body = decode_xml_entities( + captures + .get(2) + .map(|value| value.as_str()) + .unwrap_or_default(), + ) + .split_whitespace() + .collect::>() + .join(" "); if body.is_empty() { continue; } @@ -1175,7 +1300,12 @@ fn parse_label_linkbase(raw: &str) -> HashMap { } for captures in LABEL_ARC_RE.captures_iter(block) { - let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let attrs = parse_attrs( + captures + .get(1) + .map(|value| value.as_str()) + .unwrap_or_default(), + ); let Some(from) = attrs.get("xlink:from").cloned() else { continue; }; @@ -1190,7 +1320,11 @@ fn parse_label_linkbase(raw: &str) -> HashMap { }; let priority = label_priority(role.as_deref()); let current = preferred.get(concept_key).cloned(); - if current.as_ref().map(|(_, current_priority)| priority > *current_priority).unwrap_or(true) { + if current + .as_ref() + .map(|(_, current_priority)| priority > *current_priority) + .unwrap_or(true) + { preferred.insert(concept_key.clone(), (label.clone(), priority)); } } @@ -1207,18 +1341,31 @@ fn parse_presentation_linkbase(raw: &str) -> Vec { let mut rows = Vec::new(); for captures in PRESENTATION_LINK_RE.captures_iter(raw) { - let link_attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let link_attrs = parse_attrs( + captures + .get(1) + .map(|value| value.as_str()) + .unwrap_or_default(), + ); let Some(role_uri) = link_attrs.get("xlink:role").cloned() else { continue; }; - let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default(); + let block = captures + .get(2) + .map(|value| value.as_str()) + .unwrap_or_default(); let mut loc_by_label = HashMap::::new(); let mut children_by_label = HashMap::>::new(); let mut incoming = HashSet::::new(); let mut all_referenced = HashSet::::new(); for captures in LOC_RE.captures_iter(block) { - let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let attrs = parse_attrs( + captures + .get(1) + .map(|value| value.as_str()) + .unwrap_or_default(), + ); let Some(label) = attrs.get("xlink:label").cloned() else { continue; }; @@ -1228,14 +1375,27 @@ fn parse_presentation_linkbase(raw: &str) -> Vec { let Some(qname) = qname_from_href(&href) else { continue; }; - let Some((concept_key, qname, local_name)) = concept_from_qname(&qname, &namespaces) else { + let Some((concept_key, qname, local_name)) = concept_from_qname(&qname, &namespaces) + else { continue; }; - loc_by_label.insert(label, (concept_key, qname, local_name.to_ascii_lowercase().contains("abstract"))); + loc_by_label.insert( + label, + ( + concept_key, + qname, + local_name.to_ascii_lowercase().contains("abstract"), + ), + ); } for captures in PRESENTATION_ARC_RE.captures_iter(block) { - let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let attrs = parse_attrs( + captures + .get(1) + .map(|value| value.as_str()) + .unwrap_or_default(), + ); let Some(from) = attrs.get("xlink:from").cloned() else { continue; }; @@ -1248,8 +1408,16 @@ fn parse_presentation_linkbase(raw: &str) -> Vec { let order = attrs .get("order") .and_then(|value| value.parse::().ok()) - .unwrap_or_else(|| children_by_label.get(&from).map(|children| children.len() as f64 + 1.0).unwrap_or(1.0)); - children_by_label.entry(from.clone()).or_default().push((to.clone(), order)); + .unwrap_or_else(|| { + children_by_label + .get(&from) + .map(|children| children.len() as f64 + 1.0) + .unwrap_or(1.0) + }); + children_by_label + .entry(from.clone()) + .or_default() + .push((to.clone(), order)); incoming.insert(to.clone()); all_referenced.insert(from); all_referenced.insert(to); @@ -1281,7 +1449,11 @@ fn parse_presentation_linkbase(raw: &str) -> Vec { return; } - let parent_concept_key = parent_label.and_then(|parent| loc_by_label.get(parent).map(|(concept_key, _, _)| concept_key.clone())); + let parent_concept_key = parent_label.and_then(|parent| { + loc_by_label + .get(parent) + .map(|(concept_key, _, _)| concept_key.clone()) + }); rows.push(PresentationNode { concept_key: concept_key.clone(), role_uri: role_uri.to_string(), @@ -1292,7 +1464,11 @@ fn parse_presentation_linkbase(raw: &str) -> Vec { }); let mut children = children_by_label.get(label).cloned().unwrap_or_default(); - children.sort_by(|left, right| left.1.partial_cmp(&right.1).unwrap_or(std::cmp::Ordering::Equal)); + children.sort_by(|left, right| { + left.1 + .partial_cmp(&right.1) + .unwrap_or(std::cmp::Ordering::Equal) + }); for (index, (child_label, _)) in children.into_iter().enumerate() { dfs( &child_label, @@ -1400,7 +1576,10 @@ fn materialize_taxonomy_statements( .clone() .or_else(|| fact.period_instant.clone()) .unwrap_or_else(|| filing_date.to_string()); - let id = format!("{date}-{compact_accession}-{}", period_by_signature.len() + 1); + let id = format!( + "{date}-{compact_accession}-{}", + period_by_signature.len() + 1 + ); let period_label = if fact.period_instant.is_some() && fact.period_start.is_none() { "Instant".to_string() } else if fact.period_start.is_some() && fact.period_end.is_some() { @@ -1420,7 +1599,10 @@ fn materialize_taxonomy_statements( accession_number: accession_number.to_string(), filing_date: filing_date.to_string(), period_start: fact.period_start.clone(), - period_end: fact.period_end.clone().or_else(|| fact.period_instant.clone()), + period_end: fact + .period_end + .clone() + .or_else(|| fact.period_instant.clone()), filing_type: filing_type.to_string(), period_label, }, @@ -1429,9 +1611,17 @@ fn materialize_taxonomy_statements( let mut periods = period_by_signature.values().cloned().collect::>(); periods.sort_by(|left, right| { - let left_key = left.period_end.clone().unwrap_or_else(|| left.filing_date.clone()); - let right_key = right.period_end.clone().unwrap_or_else(|| right.filing_date.clone()); - left_key.cmp(&right_key).then_with(|| left.id.cmp(&right.id)) + let left_key = left + .period_end + .clone() + .unwrap_or_else(|| left.filing_date.clone()); + let right_key = right + .period_end + .clone() + .unwrap_or_else(|| right.filing_date.clone()); + left_key + .cmp(&right_key) + .then_with(|| left.id.cmp(&right.id)) }); let period_id_by_signature = period_by_signature .iter() @@ -1440,7 +1630,10 @@ fn materialize_taxonomy_statements( let mut presentation_by_concept = HashMap::>::new(); for node in presentation { - presentation_by_concept.entry(node.concept_key.clone()).or_default().push(node); + presentation_by_concept + .entry(node.concept_key.clone()) + .or_default() + .push(node); } let mut grouped_by_statement = empty_parsed_fact_map(); @@ -1502,9 +1695,13 @@ fn materialize_taxonomy_statements( let mut concepts = Vec::::new(); for statement_kind in statement_keys() { - let concept_groups = grouped_by_statement.remove(statement_kind).unwrap_or_default(); + let concept_groups = grouped_by_statement + .remove(statement_kind) + .unwrap_or_default(); let mut concept_keys = HashSet::::new(); - for node in presentation.iter().filter(|node| classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind)) { + for node in presentation.iter().filter(|node| { + classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind) + }) { concept_keys.insert(node.concept_key.clone()); } for concept_key in concept_groups.keys() { @@ -1516,12 +1713,21 @@ fn materialize_taxonomy_statements( .map(|concept_key| { let nodes = presentation .iter() - .filter(|node| node.concept_key == concept_key && classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind)) + .filter(|node| { + node.concept_key == concept_key + && classify_statement_role(&node.role_uri).as_deref() + == Some(statement_kind) + }) .collect::>(); - let order = nodes.iter().map(|node| node.order).fold(f64::INFINITY, f64::min); + let order = nodes + .iter() + .map(|node| node.order) + .fold(f64::INFINITY, f64::min); let depth = nodes.iter().map(|node| node.depth).min().unwrap_or(0); let role_uri = nodes.first().map(|node| node.role_uri.clone()); - let parent_concept_key = nodes.first().and_then(|node| node.parent_concept_key.clone()); + let parent_concept_key = nodes + .first() + .and_then(|node| node.parent_concept_key.clone()); (concept_key, order, depth, role_uri, parent_concept_key) }) .collect::>(); @@ -1532,8 +1738,13 @@ fn materialize_taxonomy_statements( .then_with(|| left.0.cmp(&right.0)) }); - for (concept_key, presentation_order, depth, role_uri, parent_concept_key) in ordered_concepts { - let fact_group = concept_groups.get(&concept_key).cloned().unwrap_or_default(); + for (concept_key, presentation_order, depth, role_uri, parent_concept_key) in + ordered_concepts + { + let fact_group = concept_groups + .get(&concept_key) + .cloned() + .unwrap_or_default(); let (namespace_uri, local_name) = split_concept_key(&concept_key); let qname = fact_group .first() @@ -1672,7 +1883,13 @@ fn empty_detail_row_map() -> DetailRowStatementMap { } fn statement_keys() -> [&'static str; 5] { - ["income", "balance", "cash_flow", "equity", "comprehensive_income"] + [ + "income", + "balance", + "cash_flow", + "equity", + "comprehensive_income", + ] } fn statement_key_ref(value: &str) -> Option<&'static str> { @@ -1709,7 +1926,13 @@ fn pick_preferred_fact(grouped_facts: &[(i64, ParsedFact)]) -> Option<&(i64, Par .unwrap_or_default(); left_date.cmp(&right_date) }) - .then_with(|| left.1.value.abs().partial_cmp(&right.1.value.abs()).unwrap_or(std::cmp::Ordering::Equal)) + .then_with(|| { + left.1 + .value + .abs() + .partial_cmp(&right.1.value.abs()) + .unwrap_or(std::cmp::Ordering::Equal) + }) }) } @@ -1779,12 +2002,6 @@ fn classify_statement_role(role_uri: &str) -> Option { fn concept_statement_fallback(local_name: &str) -> Option { let normalized = local_name.to_ascii_lowercase(); - if Regex::new(r#"cash|operatingactivities|investingactivities|financingactivities"#) - .unwrap() - .is_match(&normalized) - { - return Some("cash_flow".to_string()); - } if Regex::new(r#"equity|retainedearnings|additionalpaidincapital"#) .unwrap() .is_match(&normalized) @@ -1794,6 +2011,22 @@ fn concept_statement_fallback(local_name: &str) -> Option { if normalized.contains("comprehensiveincome") { return Some("comprehensive_income".to_string()); } + if Regex::new( + r#"deferredpolicyacquisitioncosts(andvalueofbusinessacquired)?$|supplementaryinsuranceinformationdeferredpolicyacquisitioncosts$|deferredacquisitioncosts$"#, + ) + .unwrap() + .is_match(&normalized) + { + return Some("balance".to_string()); + } + if Regex::new( + r#"netcashprovidedbyusedin.*activities|increasedecreasein|paymentstoacquire|paymentsforcapitalimprovements$|paymentsfordepositsonrealestateacquisitions$|paymentsforrepurchase|paymentsofdividends|dividendscommonstockcash$|proceedsfrom|repaymentsofdebt|sharebasedcompensation$|allocatedsharebasedcompensationexpense$|depreciationdepletionandamortization$|depreciationamortizationandaccretionnet$|depreciationandamortization$|depreciationamortizationandother$|otheradjustmentstoreconcilenetincomelosstocashprovidedbyusedinoperatingactivities"#, + ) + .unwrap() + .is_match(&normalized) + { + return Some("cash_flow".to_string()); + } if Regex::new( r#"asset|liabilit|debt|financingreceivable|loansreceivable|deposits|allowanceforcreditloss|futurepolicybenefits|policyholderaccountbalances|unearnedpremiums|realestateinvestmentproperty|grossatcarryingvalue|investmentproperty"#, ) @@ -1967,7 +2200,10 @@ mod tests { vec![], ) .expect("core pack should load and map"); - let income_surface_rows = model.surface_rows.get("income").expect("income surface rows"); + let income_surface_rows = model + .surface_rows + .get("income") + .expect("income surface rows"); let op_expenses = income_surface_rows .iter() .find(|row| row.key == "operating_expenses") @@ -1978,7 +2214,10 @@ mod tests { .expect("revenue surface row"); assert_eq!(revenue.values.get("2025").copied().flatten(), Some(120.0)); - assert_eq!(op_expenses.values.get("2024").copied().flatten(), Some(40.0)); + assert_eq!( + op_expenses.values.get("2024").copied().flatten(), + Some(40.0) + ); assert_eq!(op_expenses.detail_count, Some(2)); let operating_expense_details = model @@ -1987,8 +2226,12 @@ mod tests { .and_then(|groups| groups.get("operating_expenses")) .expect("operating expenses details"); assert_eq!(operating_expense_details.len(), 2); - assert!(operating_expense_details.iter().any(|row| row.key == "sga-row")); - assert!(operating_expense_details.iter().any(|row| row.key == "rd-row")); + assert!(operating_expense_details + .iter() + .any(|row| row.key == "sga-row")); + assert!(operating_expense_details + .iter() + .any(|row| row.key == "rd-row")); let residual_rows = model .detail_rows @@ -2003,17 +2246,26 @@ mod tests { .concept_mappings .get("http://fasb.org/us-gaap/2024#ResearchAndDevelopmentExpense") .expect("rd mapping"); - assert_eq!(rd_mapping.detail_parent_surface_key.as_deref(), Some("operating_expenses")); - assert_eq!(rd_mapping.surface_key.as_deref(), Some("operating_expenses")); + assert_eq!( + rd_mapping.detail_parent_surface_key.as_deref(), + Some("operating_expenses") + ); + assert_eq!( + rd_mapping.surface_key.as_deref(), + Some("operating_expenses") + ); let residual_mapping = model .concept_mappings .get("urn:company#OtherOperatingCharges") .expect("residual mapping"); assert!(residual_mapping.residual_flag); - assert_eq!(residual_mapping.detail_parent_surface_key.as_deref(), Some("unmapped")); + assert_eq!( + residual_mapping.detail_parent_surface_key.as_deref(), + Some("unmapped") + ); - assert_eq!(model.normalization_summary.surface_row_count, 5); + assert_eq!(model.normalization_summary.surface_row_count, 6); assert_eq!(model.normalization_summary.detail_row_count, 3); assert_eq!(model.normalization_summary.unmapped_row_count, 1); } @@ -2051,18 +2303,60 @@ mod tests { #[test] fn classifies_pack_specific_concepts_without_presentation_roles() { assert_eq!( - concept_statement_fallback("FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss") - .as_deref(), + concept_statement_fallback( + "FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss" + ) + .as_deref(), + Some("balance") + ); + assert_eq!( + concept_statement_fallback("Deposits").as_deref(), Some("balance") ); - assert_eq!(concept_statement_fallback("Deposits").as_deref(), Some("balance")); assert_eq!( concept_statement_fallback("RealEstateInvestmentPropertyNet").as_deref(), Some("balance") ); - assert_eq!(concept_statement_fallback("LeaseIncome").as_deref(), Some("income")); assert_eq!( - concept_statement_fallback("DirectCostsOfLeasedAndRentedPropertyOrEquipment").as_deref(), + concept_statement_fallback("DeferredPolicyAcquisitionCosts").as_deref(), + Some("balance") + ); + assert_eq!( + concept_statement_fallback("DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired") + .as_deref(), + Some("balance") + ); + assert_eq!( + concept_statement_fallback("IncreaseDecreaseInAccountsReceivable").as_deref(), + Some("cash_flow") + ); + assert_eq!( + concept_statement_fallback("PaymentsOfDividends").as_deref(), + Some("cash_flow") + ); + assert_eq!( + concept_statement_fallback("RepaymentsOfDebt").as_deref(), + Some("cash_flow") + ); + assert_eq!( + concept_statement_fallback("ShareBasedCompensation").as_deref(), + Some("cash_flow") + ); + assert_eq!( + concept_statement_fallback("PaymentsForCapitalImprovements").as_deref(), + Some("cash_flow") + ); + assert_eq!( + concept_statement_fallback("PaymentsForDepositsOnRealEstateAcquisitions").as_deref(), + Some("cash_flow") + ); + assert_eq!( + concept_statement_fallback("LeaseIncome").as_deref(), + Some("income") + ); + assert_eq!( + concept_statement_fallback("DirectCostsOfLeasedAndRentedPropertyOrEquipment") + .as_deref(), Some("income") ); } diff --git a/rust/fiscal-xbrl-core/src/surface_mapper.rs b/rust/fiscal-xbrl-core/src/surface_mapper.rs index 61d81ce..5e825ab 100644 --- a/rust/fiscal-xbrl-core/src/surface_mapper.rs +++ b/rust/fiscal-xbrl-core/src/surface_mapper.rs @@ -2,7 +2,10 @@ use anyhow::Result; use std::collections::{BTreeMap, HashMap, HashSet}; use crate::pack_selector::FiscalPack; -use crate::taxonomy_loader::{load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition}; +use crate::taxonomy_loader::{ + load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula, + SurfaceFormulaOp, SurfaceSignTransform, +}; use crate::{ ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput, PeriodOutput, StatementRowMap, StatementRowOutput, SurfaceRowMap, SurfaceRowOutput, @@ -62,6 +65,46 @@ struct MatchedStatementRow<'a> { rank: i64, } +#[derive(Debug, Default, Clone)] +struct ConsumedSources { + row_keys: HashSet, + concept_keys: HashSet, +} + +impl ConsumedSources { + fn insert_row(&mut self, row: &StatementRowOutput) { + self.row_keys.insert(row.key.clone()); + self.concept_keys.insert(row.concept_key.clone()); + } + + fn insert_surface_row(&mut self, row: &SurfaceRowOutput) { + self.row_keys.extend(row.source_row_keys.iter().cloned()); + self.concept_keys + .extend(row.source_concepts.iter().cloned()); + } + + fn extend(&mut self, other: Self) { + self.row_keys.extend(other.row_keys); + self.concept_keys.extend(other.concept_keys); + } + + fn matches_statement_row(&self, row: &StatementRowOutput) -> bool { + self.row_keys.contains(&row.key) || self.concept_keys.contains(&row.concept_key) + } +} + +#[derive(Debug)] +struct SurfaceResolution { + values: BTreeMap>, + source_concepts: Vec, + source_row_keys: Vec, + source_fact_ids: Vec, + has_dimensions: bool, + resolved_source_row_keys: BTreeMap>, + consumed_sources: ConsumedSources, + formula_used: bool, +} + pub fn build_compact_surface_model( periods: &[PeriodOutput], statement_rows: &StatementRowMap, @@ -80,18 +123,22 @@ pub fn build_compact_surface_model( let mut material_unmapped_row_count = 0usize; for statement in statement_keys() { - let rows = statement_rows - .get(statement) - .cloned() - .unwrap_or_default(); - let statement_definitions = pack + let rows = statement_rows.get(statement).cloned().unwrap_or_default(); + let mut statement_definitions = pack .surfaces .iter() .filter(|definition| definition.statement == statement) .collect::>(); + statement_definitions.sort_by(|left, right| { + left.order + .cmp(&right.order) + .then_with(|| left.label.cmp(&right.label)) + }); let mut used_row_keys = HashSet::::new(); + let mut consumed_sources = ConsumedSources::default(); let mut statement_surface_rows = Vec::::new(); let mut statement_detail_rows = BTreeMap::>::new(); + let mut resolved_statement_surfaces = HashMap::::new(); for definition in statement_definitions { let matches = rows @@ -100,10 +147,6 @@ pub fn build_compact_surface_model( .filter_map(|row| match_statement_row(row, definition, crosswalk.as_ref())) .collect::>(); - if matches.is_empty() { - continue; - } - let direct_surface_matches = matches .iter() .filter(|matched| matched.match_role == MatchRole::Surface) @@ -115,20 +158,10 @@ pub fn build_compact_surface_model( .cloned() .collect::>(); - let mut surface_source_matches = if !direct_surface_matches.is_empty() { - vec![pick_best_match(&direct_surface_matches).clone()] - } else if definition.rollup_policy == "aggregate_children" { - detail_component_matches.clone() - } else { - Vec::new() - }; - - if surface_source_matches.is_empty() { - continue; - } - let detail_matches = if definition.detail_grouping_policy == "group_all_children" { - if detail_component_matches.is_empty() && definition.rollup_policy == "aggregate_children" { + if detail_component_matches.is_empty() + && definition.rollup_policy == "aggregate_children" + { Vec::new() } else { detail_component_matches.clone() @@ -137,56 +170,21 @@ pub fn build_compact_surface_model( Vec::new() }; - if definition.rollup_policy == "aggregate_children" - && direct_surface_matches.is_empty() - && !surface_source_matches.is_empty() - { - for matched in &mut surface_source_matches { - matched.mapping_method = MappingMethod::AggregateChildren; - } - } + let surface_resolution = resolve_surface( + definition, + periods, + &direct_surface_matches, + &detail_component_matches, + &resolved_statement_surfaces, + ); - let values = build_surface_values(periods, &surface_source_matches); - if !has_any_value(&values) { + let Some(surface_resolution) = surface_resolution else { continue; - } + }; - let resolved_source_row_keys = periods - .iter() - .map(|period| { - let resolved = if surface_source_matches.len() == 1 { - surface_source_matches - .first() - .and_then(|matched| matched.row.values.get(&period.id).copied().flatten().map(|_| matched.row.key.clone())) - } else { - None - }; - (period.id.clone(), resolved) - }) - .collect::>(); - - let source_concepts = unique_sorted_strings( - surface_source_matches - .iter() - .map(|matched| matched.row.qname.clone()) - .collect::>(), - ); - let source_row_keys = unique_sorted_strings( - surface_source_matches - .iter() - .map(|matched| matched.row.key.clone()) - .collect::>(), - ); - let source_fact_ids = unique_sorted_i64( - surface_source_matches - .iter() - .flat_map(|matched| matched.row.source_fact_ids.clone()) - .collect::>(), - ); - let has_dimensions = surface_source_matches.iter().any(|matched| matched.row.has_dimensions); - - for matched in &surface_source_matches { + for matched in &direct_surface_matches { used_row_keys.insert(matched.row.key.clone()); + consumed_sources.insert_row(matched.row); concept_mappings.insert( matched.row.concept_key.clone(), MappingAssignment { @@ -204,6 +202,7 @@ pub fn build_compact_surface_model( .iter() .map(|matched| { used_row_keys.insert(matched.row.key.clone()); + consumed_sources.insert_row(matched.row); concept_mappings.insert( matched.row.concept_key.clone(), MappingAssignment { @@ -215,46 +214,76 @@ pub fn build_compact_surface_model( residual_flag: false, }, ); - build_detail_row(matched.row, &definition.surface_key, false) + build_detail_row( + matched.row, + &definition.surface_key, + false, + definition.sign_transform, + ) }) .collect::>(); - if !details.is_empty() { + if !details.is_empty() && definition.include_in_output { detail_row_count += details.len(); statement_detail_rows.insert(definition.surface_key.clone(), details); } - statement_surface_rows.push(SurfaceRowOutput { + let detail_count = if definition.include_in_output { + statement_detail_rows + .get(&definition.surface_key) + .map(|rows| rows.len() as i64) + } else { + None + }; + + let row = SurfaceRowOutput { key: definition.surface_key.clone(), label: definition.label.clone(), category: definition.category.clone(), template_section: definition.category.clone(), order: definition.order, unit: definition.unit.clone(), - values, - source_concepts, - source_row_keys, - source_fact_ids, - formula_key: definition.formula_fallback.as_ref().map(|_| definition.surface_key.clone()), - has_dimensions, - resolved_source_row_keys, + values: surface_resolution.values, + source_concepts: surface_resolution.source_concepts, + source_row_keys: surface_resolution.source_row_keys, + source_fact_ids: surface_resolution.source_fact_ids, + formula_key: if surface_resolution.formula_used + || definition.formula_fallback.is_some() + { + definition + .formula_fallback + .as_ref() + .map(|_| definition.surface_key.clone()) + } else { + None + }, + has_dimensions: surface_resolution.has_dimensions, + resolved_source_row_keys: surface_resolution.resolved_source_row_keys, statement: Some(definition.statement.clone()), - detail_count: statement_detail_rows - .get(&definition.surface_key) - .map(|rows| rows.len() as i64), + detail_count, resolution_method: None, confidence: None, warning_codes: vec![], - }); - surface_row_count += 1; + }; + consumed_sources.extend(surface_resolution.consumed_sources.clone()); + resolved_statement_surfaces.insert(definition.surface_key.clone(), row.clone()); + if definition.include_in_output { + statement_surface_rows.push(row); + surface_row_count += 1; + } let _ = &definition.materiality_policy; } - statement_surface_rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label))); + statement_surface_rows.sort_by(|left, right| { + left.order + .cmp(&right.order) + .then_with(|| left.label.cmp(&right.label)) + }); let baseline = baseline_for_statement(statement, &statement_surface_rows); let threshold = materiality_threshold(statement, baseline); let residual_rows = rows .iter() .filter(|row| !used_row_keys.contains(&row.key)) + .filter(|row| !consumed_sources.matches_statement_row(row)) .filter(|row| has_any_value(&row.values)) .map(|row| { concept_mappings.insert( @@ -268,7 +297,7 @@ pub fn build_compact_surface_model( residual_flag: true, }, ); - build_detail_row(row, "unmapped", true) + build_detail_row(row, "unmapped", true, None) }) .collect::>(); @@ -301,6 +330,288 @@ pub fn build_compact_surface_model( }) } +fn resolve_surface( + definition: &SurfaceDefinition, + periods: &[PeriodOutput], + direct_surface_matches: &[MatchedStatementRow<'_>], + detail_component_matches: &[MatchedStatementRow<'_>], + resolved_statement_surfaces: &HashMap, +) -> Option { + if definition.rollup_policy == "formula_only" { + return resolve_formula_surface(definition, periods, resolved_statement_surfaces) + .map(|resolution| apply_sign_transform(resolution, definition.sign_transform)); + } + + if !direct_surface_matches.is_empty() { + return Some(apply_sign_transform( + resolve_direct_surface(periods, direct_surface_matches), + definition.sign_transform, + )); + } + + if definition.rollup_policy == "aggregate_children" && !detail_component_matches.is_empty() { + return Some(apply_sign_transform( + resolve_aggregate_surface(periods, detail_component_matches), + definition.sign_transform, + )); + } + + if definition.rollup_policy == "aggregate_children" && definition.formula_fallback.is_some() { + return resolve_formula_surface(definition, periods, resolved_statement_surfaces) + .map(|resolution| apply_sign_transform(resolution, definition.sign_transform)); + } + + if definition.rollup_policy == "direct_or_formula" { + return resolve_formula_surface(definition, periods, resolved_statement_surfaces) + .map(|resolution| apply_sign_transform(resolution, definition.sign_transform)); + } + + None +} + +fn resolve_direct_surface( + periods: &[PeriodOutput], + matches: &[MatchedStatementRow<'_>], +) -> SurfaceResolution { + let mut values = BTreeMap::new(); + let mut resolved_source_row_keys = BTreeMap::new(); + + for period in periods { + let period_matches = matches + .iter() + .filter(|matched| { + matched + .row + .values + .get(&period.id) + .copied() + .flatten() + .is_some() + }) + .cloned() + .collect::>(); + let chosen = if period_matches.is_empty() { + None + } else { + Some(pick_best_match(&period_matches)) + }; + values.insert( + period.id.clone(), + chosen.and_then(|matched| matched.row.values.get(&period.id).copied().flatten()), + ); + resolved_source_row_keys.insert( + period.id.clone(), + chosen.map(|matched| matched.row.key.clone()), + ); + } + + let mut consumed_sources = ConsumedSources::default(); + for matched in matches { + consumed_sources.insert_row(matched.row); + } + + SurfaceResolution { + values, + source_concepts: unique_sorted_strings( + matches + .iter() + .map(|matched| matched.row.qname.clone()) + .collect::>(), + ), + source_row_keys: unique_sorted_strings( + matches + .iter() + .map(|matched| matched.row.key.clone()) + .collect::>(), + ), + source_fact_ids: unique_sorted_i64( + matches + .iter() + .flat_map(|matched| matched.row.source_fact_ids.clone()) + .collect::>(), + ), + has_dimensions: matches.iter().any(|matched| matched.row.has_dimensions), + resolved_source_row_keys, + consumed_sources, + formula_used: false, + } +} + +fn resolve_aggregate_surface( + periods: &[PeriodOutput], + matches: &[MatchedStatementRow<'_>], +) -> SurfaceResolution { + let aggregate_matches = matches + .iter() + .map(|matched| { + let mut aggregate = matched.clone(); + aggregate.mapping_method = MappingMethod::AggregateChildren; + aggregate + }) + .collect::>(); + let mut consumed_sources = ConsumedSources::default(); + for matched in &aggregate_matches { + consumed_sources.insert_row(matched.row); + } + + SurfaceResolution { + values: build_surface_values(periods, &aggregate_matches), + source_concepts: unique_sorted_strings( + aggregate_matches + .iter() + .map(|matched| matched.row.qname.clone()) + .collect::>(), + ), + source_row_keys: unique_sorted_strings( + aggregate_matches + .iter() + .map(|matched| matched.row.key.clone()) + .collect::>(), + ), + source_fact_ids: unique_sorted_i64( + aggregate_matches + .iter() + .flat_map(|matched| matched.row.source_fact_ids.clone()) + .collect::>(), + ), + has_dimensions: aggregate_matches + .iter() + .any(|matched| matched.row.has_dimensions), + resolved_source_row_keys: periods + .iter() + .map(|period| (period.id.clone(), None)) + .collect(), + consumed_sources, + formula_used: false, + } +} + +fn resolve_formula_surface( + definition: &SurfaceDefinition, + periods: &[PeriodOutput], + resolved_statement_surfaces: &HashMap, +) -> Option { + let formula = definition + .formula_fallback + .as_ref() + .and_then(|formula| formula.structured())?; + let source_rows = formula + .sources + .iter() + .filter_map(|source_key| resolved_statement_surfaces.get(source_key)) + .collect::>(); + if source_rows.is_empty() { + return None; + } + + let values = periods + .iter() + .map(|period| { + ( + period.id.clone(), + evaluate_formula_for_period(formula, &period.id, &source_rows), + ) + }) + .collect::>(); + if !has_any_value(&values) { + return None; + } + + let mut consumed_sources = ConsumedSources::default(); + for row in &source_rows { + consumed_sources.insert_surface_row(row); + } + + Some(SurfaceResolution { + values, + source_concepts: unique_sorted_strings( + source_rows + .iter() + .flat_map(|row| row.source_concepts.clone()) + .collect::>(), + ), + source_row_keys: unique_sorted_strings( + source_rows + .iter() + .flat_map(|row| row.source_row_keys.clone()) + .collect::>(), + ), + source_fact_ids: unique_sorted_i64( + source_rows + .iter() + .flat_map(|row| row.source_fact_ids.clone()) + .collect::>(), + ), + has_dimensions: source_rows.iter().any(|row| row.has_dimensions), + resolved_source_row_keys: periods + .iter() + .map(|period| (period.id.clone(), None)) + .collect(), + consumed_sources, + formula_used: true, + }) +} + +fn evaluate_formula_for_period( + formula: &SurfaceFormula, + period_id: &str, + source_rows: &[&SurfaceRowOutput], +) -> Option { + let values = formula + .sources + .iter() + .map(|source_key| { + source_rows + .iter() + .find(|row| row.key == *source_key) + .and_then(|row| row.values.get(period_id).copied().flatten()) + }) + .collect::>(); + + match formula.op { + SurfaceFormulaOp::Sum => sum_formula_values(&values, formula.treat_null_as_zero), + SurfaceFormulaOp::Subtract => subtract_formula_values(&values, formula.treat_null_as_zero), + } +} + +fn sum_formula_values(values: &[Option], treat_null_as_zero: bool) -> Option { + if treat_null_as_zero { + if values.iter().all(|value| value.is_none()) { + return None; + } + return Some(values.iter().map(|value| value.unwrap_or(0.0)).sum()); + } + + if values.iter().any(|value| value.is_none()) { + return None; + } + + Some(values.iter().map(|value| value.unwrap_or(0.0)).sum()) +} + +fn subtract_formula_values(values: &[Option], treat_null_as_zero: bool) -> Option { + if values.len() != 2 { + return None; + } + + let left = if treat_null_as_zero { + values[0].unwrap_or(0.0) + } else { + values[0]? + }; + let right = if treat_null_as_zero { + values[1].unwrap_or(0.0) + } else { + values[1]? + }; + + if !treat_null_as_zero && values.iter().all(|value| value.is_none()) { + return None; + } + + Some(left - right) +} + pub fn merge_mapping_assignments( primary: &mut HashMap, secondary: HashMap, @@ -312,7 +623,10 @@ pub fn merge_mapping_assignments( .clone() .or(assignment.authoritative_concept_key); if existing.mapping_method.is_none() - || matches!(existing.mapping_method, Some(MappingMethod::UnmappedResidual)) + || matches!( + existing.mapping_method, + Some(MappingMethod::UnmappedResidual) + ) { existing.mapping_method = assignment.mapping_method; } @@ -337,7 +651,9 @@ pub fn apply_mapping_assignments( for concept in concepts { if let Some(mapping) = mappings.get(&concept.concept_key) { concept.authoritative_concept_key = mapping.authoritative_concept_key.clone(); - concept.mapping_method = mapping.mapping_method.map(|method| method.as_str().to_string()); + concept.mapping_method = mapping + .mapping_method + .map(|method| method.as_str().to_string()); concept.surface_key = mapping.surface_key.clone(); concept.detail_parent_surface_key = mapping.detail_parent_surface_key.clone(); concept.kpi_key = mapping.kpi_key.clone(); @@ -348,7 +664,9 @@ pub fn apply_mapping_assignments( for fact in facts { if let Some(mapping) = mappings.get(&fact.concept_key) { fact.authoritative_concept_key = mapping.authoritative_concept_key.clone(); - fact.mapping_method = mapping.mapping_method.map(|method| method.as_str().to_string()); + fact.mapping_method = mapping + .mapping_method + .map(|method| method.as_str().to_string()); fact.surface_key = mapping.surface_key.clone(); fact.detail_parent_surface_key = mapping.detail_parent_surface_key.clone(); fact.kpi_key = mapping.kpi_key.clone(); @@ -392,10 +710,9 @@ fn match_statement_row<'a>( }); } - let matches_source = definition - .allowed_source_concepts - .iter() - .any(|candidate| candidate_matches(candidate, &row.qname) || candidate_matches(candidate, &row.local_name)); + let matches_source = definition.allowed_source_concepts.iter().any(|candidate| { + candidate_matches(candidate, &row.qname) || candidate_matches(candidate, &row.local_name) + }); if matches_source { return Some(MatchedStatementRow { row, @@ -472,6 +789,7 @@ fn build_detail_row( row: &StatementRowOutput, parent_surface_key: &str, residual_flag: bool, + sign_transform: Option, ) -> DetailRowOutput { DetailRowOutput { key: row.key.clone(), @@ -482,7 +800,7 @@ fn build_detail_row( namespace_uri: row.namespace_uri.clone(), local_name: row.local_name.clone(), unit: row.units.values().find_map(|value| value.clone()), - values: row.values.clone(), + values: transform_values(&row.values, sign_transform), source_fact_ids: row.source_fact_ids.clone(), is_extension: row.is_extension, dimensions_summary: if row.has_dimensions { @@ -494,6 +812,32 @@ fn build_detail_row( } } +fn apply_sign_transform( + mut resolution: SurfaceResolution, + sign_transform: Option, +) -> SurfaceResolution { + resolution.values = transform_values(&resolution.values, sign_transform); + resolution +} + +fn transform_values( + values: &BTreeMap>, + sign_transform: Option, +) -> BTreeMap> { + values + .iter() + .map(|(period_id, value)| { + ( + period_id.clone(), + match sign_transform { + Some(SurfaceSignTransform::Invert) => value.map(|amount| -amount), + None => *value, + }, + ) + }) + .collect() +} + fn has_any_value(values: &BTreeMap>) -> bool { values.values().any(|value| value.is_some()) } @@ -505,10 +849,10 @@ fn max_abs_value(values: &BTreeMap>) -> f64 { } fn baseline_for_statement(statement: &str, surface_rows: &[SurfaceRowOutput]) -> f64 { - let anchor_key = if statement == "balance" { - "total_assets" - } else { - "revenue" + let anchor_key = match statement { + "balance" => "total_assets", + "cash_flow" => "operating_cash_flow", + _ => "revenue", }; surface_rows @@ -527,13 +871,21 @@ fn materiality_threshold(statement: &str, baseline: f64) -> f64 { } fn unique_sorted_strings(values: Vec) -> Vec { - let mut values = values.into_iter().collect::>().into_iter().collect::>(); + let mut values = values + .into_iter() + .collect::>() + .into_iter() + .collect::>(); values.sort(); values } fn unique_sorted_i64(values: Vec) -> Vec { - let mut values = values.into_iter().collect::>().into_iter().collect::>(); + let mut values = values + .into_iter() + .collect::>() + .into_iter() + .collect::>(); values.sort(); values } @@ -551,7 +903,13 @@ fn candidate_matches(candidate: &str, actual: &str) -> bool { } fn statement_keys() -> [&'static str; 5] { - ["income", "balance", "cash_flow", "equity", "comprehensive_income"] + [ + "income", + "balance", + "cash_flow", + "equity", + "comprehensive_income", + ] } fn empty_surface_row_map() -> SurfaceRowMap { @@ -588,10 +946,27 @@ mod tests { } fn row(key: &str, qname: &str, statement: &str, value: f64) -> StatementRowOutput { + row_with_values( + key, + qname, + statement, + BTreeMap::from([("p1".to_string(), Some(value))]), + ) + } + + fn row_with_values( + key: &str, + qname: &str, + statement: &str, + values: BTreeMap>, + ) -> StatementRowOutput { StatementRowOutput { key: key.to_string(), label: key.to_string(), - concept_key: format!("http://fasb.org/us-gaap/2024#{}", qname.split(':').nth(1).unwrap_or(key)), + concept_key: format!( + "http://fasb.org/us-gaap/2024#{}", + qname.split(':').nth(1).unwrap_or(key) + ), qname: qname.to_string(), namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), local_name: qname.split(':').nth(1).unwrap_or(key).to_string(), @@ -601,8 +976,11 @@ mod tests { order: 1, depth: 0, parent_key: None, - values: BTreeMap::from([("p1".to_string(), Some(value))]), - units: BTreeMap::from([("p1".to_string(), Some("iso4217:USD".to_string()))]), + units: values + .keys() + .map(|period_id| (period_id.clone(), Some("iso4217:USD".to_string()))) + .collect(), + values, has_dimensions: false, source_fact_ids: vec![1], } @@ -623,8 +1001,18 @@ mod tests { let mut rows = empty_map(); rows.get_mut("income").unwrap().extend([ row("op-expenses", "us-gaap:OperatingExpenses", "income", 40.0), - row("sga", "us-gaap:SellingGeneralAndAdministrativeExpense", "income", 25.0), - row("rd", "us-gaap:ResearchAndDevelopmentExpense", "income", 15.0), + row( + "sga", + "us-gaap:SellingGeneralAndAdministrativeExpense", + "income", + 25.0, + ), + row( + "rd", + "us-gaap:ResearchAndDevelopmentExpense", + "income", + 15.0, + ), ]); let model = build_compact_surface_model( @@ -650,7 +1038,9 @@ mod tests { #[test] fn emits_unmapped_residual_rows() { let mut rows = empty_map(); - rows.get_mut("income").unwrap().push(row("custom", "company:CustomMetric", "income", 12.0)); + rows.get_mut("income") + .unwrap() + .push(row("custom", "company:CustomMetric", "income", 12.0)); let model = build_compact_surface_model( &[period("p1")], @@ -660,8 +1050,573 @@ mod tests { vec![], ) .expect("compact model should build"); - let residual_rows = model.detail_rows.get("income").unwrap().get("unmapped").unwrap(); + let residual_rows = model + .detail_rows + .get("income") + .unwrap() + .get("unmapped") + .unwrap(); assert_eq!(residual_rows.len(), 1); assert!(residual_rows[0].residual_flag); } + + #[test] + fn flattens_balance_aliases_and_prunes_balance_unmapped_rows() { + let mut rows = empty_map(); + rows.get_mut("balance").unwrap().extend([ + row( + "receivable-primary", + "us-gaap:AccountsReceivableNetCurrent", + "balance", + 25.0, + ), + row( + "receivable-alias", + "us-gaap:ReceivablesNetCurrent", + "balance", + 25.0, + ), + row( + "custom-balance", + "company:CustomBalanceMetric", + "balance", + 9.0, + ), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Core, + vec![], + ) + .expect("compact model should build"); + + let receivables = model + .surface_rows + .get("balance") + .unwrap() + .iter() + .find(|row| row.key == "accounts_receivable") + .unwrap(); + assert_eq!(receivables.values.get("p1").copied().flatten(), Some(25.0)); + assert_eq!( + receivables.source_row_keys, + vec![ + "receivable-alias".to_string(), + "receivable-primary".to_string() + ] + ); + assert_eq!( + receivables + .resolved_source_row_keys + .get("p1") + .cloned() + .flatten(), + Some("receivable-alias".to_string()) + ); + + let unmapped = model + .detail_rows + .get("balance") + .and_then(|groups| groups.get("unmapped")) + .cloned() + .unwrap_or_default(); + assert_eq!(unmapped.len(), 1); + assert_eq!(unmapped[0].key, "custom-balance"); + } + + #[test] + fn merges_period_sparse_balance_aliases_into_one_row() { + let mut rows = empty_map(); + rows.get_mut("balance").unwrap().extend([ + row_with_values( + "receivable-p1", + "us-gaap:AccountsReceivableNetCurrent", + "balance", + BTreeMap::from([("p1".to_string(), Some(10.0)), ("p2".to_string(), None)]), + ), + row_with_values( + "receivable-p2", + "us-gaap:ReceivablesNetCurrent", + "balance", + BTreeMap::from([("p1".to_string(), None), ("p2".to_string(), Some(18.0))]), + ), + ]); + + let periods = vec![ + period("p1"), + PeriodOutput { + id: "p2".to_string(), + filing_id: 2, + accession_number: "0000000000-00-000002".to_string(), + filing_date: "2026-12-31".to_string(), + period_start: Some("2026-01-01".to_string()), + period_end: Some("2026-12-31".to_string()), + filing_type: "10-K".to_string(), + period_label: "p2".to_string(), + }, + ]; + + let model = + build_compact_surface_model(&periods, &rows, "us-gaap", FiscalPack::Core, vec![]) + .expect("compact model should build"); + + let receivables = model + .surface_rows + .get("balance") + .unwrap() + .iter() + .find(|row| row.key == "accounts_receivable") + .unwrap(); + + assert_eq!(receivables.values.get("p1").copied().flatten(), Some(10.0)); + assert_eq!(receivables.values.get("p2").copied().flatten(), Some(18.0)); + assert_eq!( + receivables + .resolved_source_row_keys + .get("p1") + .cloned() + .flatten(), + Some("receivable-p1".to_string()) + ); + assert_eq!( + receivables + .resolved_source_row_keys + .get("p2") + .cloned() + .flatten(), + Some("receivable-p2".to_string()) + ); + } + + #[test] + fn derives_balance_formula_rows_and_hides_helper_surfaces() { + let mut rows = empty_map(); + rows.get_mut("balance").unwrap().extend([ + row( + "cash", + "us-gaap:CashAndCashEquivalentsAtCarryingValue", + "balance", + 100.0, + ), + row( + "marketable", + "us-gaap:ShortTermInvestments", + "balance", + 40.0, + ), + row("ap", "us-gaap:AccountsPayableCurrent", "balance", 30.0), + row( + "deferred-current", + "us-gaap:DeferredRevenueCurrent", + "balance", + 15.0, + ), + row( + "deferred-noncurrent", + "us-gaap:DeferredRevenueNoncurrent", + "balance", + 5.0, + ), + row("short-debt", "us-gaap:ShortTermBorrowings", "balance", 10.0), + row( + "long-debt", + "us-gaap:LongTermDebtNoncurrent", + "balance", + 50.0, + ), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Core, + vec![], + ) + .expect("compact model should build"); + + let balance_rows = model.surface_rows.get("balance").unwrap(); + let total_cash = balance_rows + .iter() + .find(|row| row.key == "total_cash_and_equivalents") + .unwrap(); + let unearned_revenue = balance_rows + .iter() + .find(|row| row.key == "unearned_revenue") + .unwrap(); + let total_debt = balance_rows + .iter() + .find(|row| row.key == "total_debt") + .unwrap(); + let net_cash = balance_rows + .iter() + .find(|row| row.key == "net_cash_position") + .unwrap(); + + assert_eq!(total_cash.values.get("p1").copied().flatten(), Some(140.0)); + assert_eq!( + unearned_revenue.values.get("p1").copied().flatten(), + Some(20.0) + ); + assert_eq!(total_debt.values.get("p1").copied().flatten(), Some(60.0)); + assert_eq!(net_cash.values.get("p1").copied().flatten(), Some(80.0)); + assert!(balance_rows + .iter() + .all(|row| row.key != "deferred_revenue_current" + && row.key != "deferred_revenue_noncurrent")); + assert!(model + .detail_rows + .get("balance") + .unwrap() + .get("deferred_revenue_current") + .is_none()); + } + + #[test] + fn merges_core_balance_rows_into_sector_packs() { + let mut rows = empty_map(); + rows.get_mut("balance").unwrap().extend([ + row( + "cash", + "us-gaap:CashAndCashEquivalentsAtCarryingValue", + "balance", + 20.0, + ), + row( + "loans", + "us-gaap:LoansReceivableNetReportedAmount", + "balance", + 80.0, + ), + row("deposits", "us-gaap:DepositsLiabilities", "balance", 70.0), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::BankLender, + vec![], + ) + .expect("compact model should build"); + + let balance_rows = model.surface_rows.get("balance").unwrap(); + let cash = balance_rows + .iter() + .find(|row| row.key == "cash_and_equivalents") + .unwrap(); + let loans = balance_rows.iter().find(|row| row.key == "loans").unwrap(); + let deposits = balance_rows + .iter() + .find(|row| row.key == "deposits") + .unwrap(); + + assert_eq!(cash.category, "current_assets"); + assert_eq!(loans.category, "noncurrent_assets"); + assert_eq!(deposits.category, "current_liabilities"); + } + + #[test] + fn maps_insurance_deferred_acquisition_costs_and_prunes_unmapped_rows() { + let mut rows = empty_map(); + rows.get_mut("balance").unwrap().extend([ + row( + "dac-voba", + "us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired", + "balance", + 2106.0, + ), + row( + "custom-balance", + "company:CustomInsuranceBalanceMetric", + "balance", + 12.0, + ), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Insurance, + vec![], + ) + .expect("compact model should build"); + + let dac = model + .surface_rows + .get("balance") + .unwrap() + .iter() + .find(|row| row.key == "deferred_acquisition_costs") + .unwrap(); + + assert_eq!(dac.category, "noncurrent_assets"); + assert_eq!(dac.values.get("p1").copied().flatten(), Some(2106.0)); + assert_eq!( + dac.source_concepts, + vec![ + "us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired" + .to_string() + ] + ); + assert_eq!(dac.source_row_keys, vec!["dac-voba".to_string()]); + + let unmapped = model + .detail_rows + .get("balance") + .and_then(|groups| groups.get("unmapped")) + .cloned() + .unwrap_or_default(); + assert_eq!(unmapped.len(), 1); + assert_eq!(unmapped[0].key, "custom-balance"); + } + + #[test] + fn derives_cash_flow_rows_applies_signs_and_hides_helper_surfaces() { + let mut rows = empty_map(); + rows.get_mut("cash_flow").unwrap().extend([ + row("cfo", "us-gaap:NetCashProvidedByUsedInOperatingActivities", "cash_flow", 120.0), + row( + "capex", + "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", + "cash_flow", + 35.0, + ), + row( + "debt-repaid", + "us-gaap:RepaymentsOfDebt", + "cash_flow", + 14.0, + ), + row( + "share-repurchase", + "us-gaap:PaymentsForRepurchaseOfCommonStock", + "cash_flow", + 11.0, + ), + row( + "contract-incurred", + "us-gaap:ContractWithCustomerLiabilityIncurred", + "cash_flow", + 40.0, + ), + row( + "contract-recognized", + "us-gaap:ContractWithCustomerLiabilityRevenueRecognized", + "cash_flow", + 15.0, + ), + row( + "other-current-assets", + "us-gaap:IncreaseDecreaseInOtherCurrentAssets", + "cash_flow", + 6.0, + ), + row( + "other-current-liabilities", + "us-gaap:IncreaseDecreaseInOtherCurrentLiabilities", + "cash_flow", + 4.0, + ), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Core, + vec![], + ) + .expect("compact model should build"); + + let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap(); + let capex = cash_flow_rows + .iter() + .find(|row| row.key == "capital_expenditures") + .unwrap(); + let debt_repaid = cash_flow_rows + .iter() + .find(|row| row.key == "debt_repaid") + .unwrap(); + let repurchases = cash_flow_rows + .iter() + .find(|row| row.key == "share_repurchases") + .unwrap(); + let changes_unearned = cash_flow_rows + .iter() + .find(|row| row.key == "changes_unearned_revenue") + .unwrap(); + let changes_other = cash_flow_rows + .iter() + .find(|row| row.key == "changes_other_operating_activities") + .unwrap(); + let free_cash_flow = cash_flow_rows + .iter() + .find(|row| row.key == "free_cash_flow") + .unwrap(); + + assert_eq!(capex.values.get("p1").copied().flatten(), Some(-35.0)); + assert_eq!(debt_repaid.values.get("p1").copied().flatten(), Some(-14.0)); + assert_eq!(repurchases.values.get("p1").copied().flatten(), Some(-11.0)); + assert_eq!(changes_unearned.values.get("p1").copied().flatten(), Some(25.0)); + assert_eq!(changes_other.values.get("p1").copied().flatten(), Some(-10.0)); + assert_eq!(free_cash_flow.values.get("p1").copied().flatten(), Some(85.0)); + + assert!(cash_flow_rows.iter().all(|row| { + row.key != "contract_liability_incurred" + && row.key != "contract_liability_recognized" + && row.key != "changes_other_current_assets" + && row.key != "changes_other_current_liabilities" + })); + } + + #[test] + fn prunes_consumed_cash_flow_rows_from_unmapped() { + let mut rows = empty_map(); + rows.get_mut("cash_flow").unwrap().extend([ + row( + "capex-primary", + "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", + "cash_flow", + 20.0, + ), + row( + "capex-alias", + "us-gaap:CapitalExpendituresIncurredButNotYetPaid", + "cash_flow", + 20.0, + ), + row( + "custom-cash-flow", + "company:CustomCashFlowMetric", + "cash_flow", + 8.0, + ), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Core, + vec![], + ) + .expect("compact model should build"); + + let capex = model + .surface_rows + .get("cash_flow") + .unwrap() + .iter() + .find(|row| row.key == "capital_expenditures") + .unwrap(); + assert_eq!(capex.values.get("p1").copied().flatten(), Some(-20.0)); + + let unmapped = model + .detail_rows + .get("cash_flow") + .and_then(|groups| groups.get("unmapped")) + .cloned() + .unwrap_or_default(); + assert_eq!(unmapped.len(), 1); + assert_eq!(unmapped[0].key, "custom-cash-flow"); + } + + #[test] + fn merges_core_cash_flow_rows_into_sector_packs() { + let mut rows = empty_map(); + rows.get_mut("cash_flow").unwrap().extend([ + row( + "cfo", + "us-gaap:NetCashProvidedByUsedInOperatingActivities", + "cash_flow", + 90.0, + ), + row( + "capex", + "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", + "cash_flow", + 25.0, + ), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Insurance, + vec![], + ) + .expect("compact model should build"); + + let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap(); + let cfo = cash_flow_rows + .iter() + .find(|row| row.key == "operating_cash_flow") + .unwrap(); + let capex = cash_flow_rows + .iter() + .find(|row| row.key == "capital_expenditures") + .unwrap(); + let free_cash_flow = cash_flow_rows + .iter() + .find(|row| row.key == "free_cash_flow") + .unwrap(); + + assert_eq!(cfo.category, "operating"); + assert_eq!(capex.category, "investing"); + assert_eq!(capex.values.get("p1").copied().flatten(), Some(-25.0)); + assert_eq!(free_cash_flow.values.get("p1").copied().flatten(), Some(65.0)); + } + + #[test] + fn reit_cash_flow_override_keeps_capex_separate_from_acquisitions() { + let mut rows = empty_map(); + rows.get_mut("cash_flow").unwrap().extend([ + row( + "reit-capex", + "us-gaap:PaymentsToAcquireCommercialRealEstate", + "cash_flow", + 300.0, + ), + row( + "capital-improvements", + "us-gaap:PaymentsForCapitalImprovements", + "cash_flow", + 20.0, + ), + row( + "reit-acquisition", + "us-gaap:PaymentsToAcquireInterestInSubsidiariesAndAffiliates", + "cash_flow", + 15.0, + ), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::ReitRealEstate, + vec![], + ) + .expect("compact model should build"); + + let cash_flow_rows = model.surface_rows.get("cash_flow").unwrap(); + let capex = cash_flow_rows + .iter() + .find(|row| row.key == "capital_expenditures") + .unwrap(); + let acquisitions = cash_flow_rows + .iter() + .find(|row| row.key == "acquisitions") + .unwrap(); + + assert_eq!(capex.values.get("p1").copied().flatten(), Some(-320.0)); + assert_eq!(acquisitions.values.get("p1").copied().flatten(), Some(-15.0)); + } } diff --git a/rust/fiscal-xbrl-core/src/taxonomy_loader.rs b/rust/fiscal-xbrl-core/src/taxonomy_loader.rs index d568ae9..bcbbe07 100644 --- a/rust/fiscal-xbrl-core/src/taxonomy_loader.rs +++ b/rust/fiscal-xbrl-core/src/taxonomy_loader.rs @@ -1,12 +1,22 @@ use anyhow::{anyhow, Context, Result}; use serde::Deserialize; +use std::collections::HashMap; use std::env; use std::fs; -use std::collections::HashMap; use std::path::PathBuf; use crate::pack_selector::FiscalPack; +fn default_include_in_output() -> bool { + true +} + +#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum SurfaceSignTransform { + Invert, +} + #[derive(Debug, Deserialize, Clone)] pub struct SurfacePackFile { pub version: String, @@ -25,9 +35,44 @@ pub struct SurfaceDefinition { pub rollup_policy: String, pub allowed_source_concepts: Vec, pub allowed_authoritative_concepts: Vec, - pub formula_fallback: Option, + pub formula_fallback: Option, pub detail_grouping_policy: String, pub materiality_policy: String, + #[serde(default = "default_include_in_output")] + pub include_in_output: bool, + #[serde(default)] + pub sign_transform: Option, +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(untagged)] +pub enum SurfaceFormulaFallback { + LegacyString(#[allow(dead_code)] String), + Structured(SurfaceFormula), +} + +impl SurfaceFormulaFallback { + pub fn structured(&self) -> Option<&SurfaceFormula> { + match self { + Self::Structured(formula) => Some(formula), + Self::LegacyString(_) => None, + } + } +} + +#[derive(Debug, Deserialize, Clone)] +pub struct SurfaceFormula { + pub op: SurfaceFormulaOp, + pub sources: Vec, + #[serde(default)] + pub treat_null_as_zero: bool, +} + +#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum SurfaceFormulaOp { + Sum, + Subtract, } #[derive(Debug, Deserialize, Clone)] @@ -147,7 +192,9 @@ pub fn resolve_taxonomy_dir() -> Result { candidates .into_iter() .find(|path| path.is_dir()) - .ok_or_else(|| anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory")) + .ok_or_else(|| { + anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory") + }) } pub fn load_surface_pack(pack: FiscalPack) -> Result { @@ -156,14 +203,52 @@ pub fn load_surface_pack(pack: FiscalPack) -> Result { .join("fiscal") .join("v1") .join(format!("{}.surface.json", pack.as_str())); - let raw = fs::read_to_string(&path) - .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; - let file = serde_json::from_str::(&raw) - .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let mut file = load_surface_pack_file(&path)?; + + if !matches!(pack, FiscalPack::Core) { + let core_path = taxonomy_dir + .join("fiscal") + .join("v1") + .join("core.surface.json"); + let core_file = load_surface_pack_file(&core_path)?; + let pack_inherited_keys = file + .surfaces + .iter() + .filter(|surface| surface.statement == "balance" || surface.statement == "cash_flow") + .map(|surface| (surface.statement.clone(), surface.surface_key.clone())) + .collect::>(); + + file.surfaces.extend( + core_file + .surfaces + .into_iter() + .filter(|surface| surface.statement == "balance" || surface.statement == "cash_flow") + .filter(|surface| { + !pack_inherited_keys + .contains(&(surface.statement.clone(), surface.surface_key.clone())) + }), + ); + } + let _ = (&file.version, &file.pack); Ok(file) } +fn load_surface_pack_file(path: &PathBuf) -> Result { + let raw = fs::read_to_string(path).with_context(|| { + format!( + "taxonomy resolution failed: unable to read {}", + path.display() + ) + })?; + serde_json::from_str::(&raw).with_context(|| { + format!( + "taxonomy resolution failed: unable to parse {}", + path.display() + ) + }) +} + pub fn load_crosswalk(regime: &str) -> Result> { let file_name = match regime { "us-gaap" => "us-gaap.json", @@ -173,10 +258,18 @@ pub fn load_crosswalk(regime: &str) -> Result> { let taxonomy_dir = resolve_taxonomy_dir()?; let path = taxonomy_dir.join("crosswalk").join(file_name); - let raw = fs::read_to_string(&path) - .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; - let file = serde_json::from_str::(&raw) - .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let raw = fs::read_to_string(&path).with_context(|| { + format!( + "taxonomy resolution failed: unable to read {}", + path.display() + ) + })?; + let file = serde_json::from_str::(&raw).with_context(|| { + format!( + "taxonomy resolution failed: unable to parse {}", + path.display() + ) + })?; let _ = (&file.version, &file.regime); Ok(Some(file)) } @@ -188,10 +281,18 @@ pub fn load_kpi_pack(pack: FiscalPack) -> Result { .join("v1") .join("kpis") .join(format!("{}.kpis.json", pack.as_str())); - let raw = fs::read_to_string(&path) - .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; - let file = serde_json::from_str::(&raw) - .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let raw = fs::read_to_string(&path).with_context(|| { + format!( + "taxonomy resolution failed: unable to read {}", + path.display() + ) + })?; + let file = serde_json::from_str::(&raw).with_context(|| { + format!( + "taxonomy resolution failed: unable to parse {}", + path.display() + ) + })?; let _ = (&file.version, &file.pack); Ok(file) } @@ -202,10 +303,18 @@ pub fn load_universal_income_definitions() -> Result { .join("fiscal") .join("v1") .join("universal_income.surface.json"); - let raw = fs::read_to_string(&path) - .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; - let file = serde_json::from_str::(&raw) - .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let raw = fs::read_to_string(&path).with_context(|| { + format!( + "taxonomy resolution failed: unable to read {}", + path.display() + ) + })?; + let file = serde_json::from_str::(&raw).with_context(|| { + format!( + "taxonomy resolution failed: unable to parse {}", + path.display() + ) + })?; let _ = &file.version; Ok(file) } @@ -216,10 +325,18 @@ pub fn load_income_bridge(pack: FiscalPack) -> Result { .join("fiscal") .join("v1") .join(format!("{}.income-bridge.json", pack.as_str())); - let raw = fs::read_to_string(&path) - .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; - let file = serde_json::from_str::(&raw) - .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let raw = fs::read_to_string(&path).with_context(|| { + format!( + "taxonomy resolution failed: unable to read {}", + path.display() + ) + })?; + let file = serde_json::from_str::(&raw).with_context(|| { + format!( + "taxonomy resolution failed: unable to parse {}", + path.display() + ) + })?; let _ = (&file.version, &file.pack); Ok(file) } @@ -230,17 +347,20 @@ mod tests { #[test] fn resolves_taxonomy_dir_and_loads_core_pack() { - let taxonomy_dir = resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests"); + let taxonomy_dir = + resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests"); assert!(taxonomy_dir.exists()); - let surface_pack = load_surface_pack(FiscalPack::Core).expect("core surface pack should load"); + let surface_pack = + load_surface_pack(FiscalPack::Core).expect("core surface pack should load"); assert_eq!(surface_pack.pack, "core"); assert!(!surface_pack.surfaces.is_empty()); let kpi_pack = load_kpi_pack(FiscalPack::Core).expect("core kpi pack should load"); assert_eq!(kpi_pack.pack, "core"); - let universal_income = load_universal_income_definitions().expect("universal income config should load"); + let universal_income = + load_universal_income_definitions().expect("universal income config should load"); assert!(!universal_income.rows.is_empty()); let core_bridge = load_income_bridge(FiscalPack::Core).expect("core bridge should load"); diff --git a/rust/fiscal-xbrl-core/src/universal_income.rs b/rust/fiscal-xbrl-core/src/universal_income.rs index b993d8a..4881030 100644 --- a/rust/fiscal-xbrl-core/src/universal_income.rs +++ b/rust/fiscal-xbrl-core/src/universal_income.rs @@ -7,7 +7,10 @@ use crate::taxonomy_loader::{ load_crosswalk, load_income_bridge, load_universal_income_definitions, CrosswalkFile, IncomeBridgeConceptGroup, IncomeBridgeRow, UniversalIncomeDefinition, }; -use crate::{DetailRowOutput, FactOutput, PeriodOutput, StatementRowMap, StatementRowOutput, SurfaceRowOutput}; +use crate::{ + DetailRowOutput, FactOutput, PeriodOutput, StatementRowMap, StatementRowOutput, + SurfaceRowOutput, +}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum ResolutionMethod { @@ -70,12 +73,44 @@ struct ValueSource { has_dimensions: bool, } +#[derive(Debug, Default)] +struct ConsumedSources { + row_keys: HashSet, + concept_keys: HashSet, +} + +impl ConsumedSources { + fn insert_row(&mut self, row: &StatementRowOutput) { + self.row_keys.insert(row.key.clone()); + self.concept_keys.insert(row.concept_key.clone()); + } + + fn insert_fact(&mut self, fact: &FactOutput) { + self.concept_keys.insert(fact.concept_key.clone()); + } + + fn insert_detail_row(&mut self, row: &DetailRowOutput) { + self.row_keys.insert(row.key.clone()); + self.concept_keys.insert(row.concept_key.clone()); + } + + fn extend(&mut self, other: ConsumedSources) { + self.row_keys.extend(other.row_keys); + self.concept_keys.extend(other.concept_keys); + } + + fn matches_detail_row(&self, row: &DetailRowOutput) -> bool { + self.row_keys.contains(&row.key) || self.concept_keys.contains(&row.concept_key) + } +} + #[derive(Debug)] struct ResolvedUniversalRow { row: SurfaceRowOutput, detail_rows: Vec, mapping_assignments: HashMap, warning_codes: Vec, + consumed_sources: ConsumedSources, } pub fn apply_universal_income_rows( @@ -102,6 +137,7 @@ pub fn apply_universal_income_rows( .cloned() .unwrap_or_default(); let mut built_rows = Vec::::new(); + let mut consumed_sources = ConsumedSources::default(); let mut warnings_seen = compact_model .normalization_summary .warnings @@ -109,7 +145,11 @@ pub fn apply_universal_income_rows( .cloned() .collect::>(); - for definition in universal_income.rows.iter().filter(|row| row.statement == "income") { + for definition in universal_income + .rows + .iter() + .filter(|row| row.statement == "income") + { let resolved = if let Some(bridge_row) = income_bridge.rows.get(&definition.key) { resolve_universal_row( definition, @@ -122,12 +162,19 @@ pub fn apply_universal_income_rows( crosswalk.as_ref(), ) } else { - unresolved_row(definition, periods, &[format!("{}_bridge_missing", definition.key)]) + unresolved_row( + definition, + periods, + &[format!("{}_bridge_missing", definition.key)], + ) }; for warning in &resolved.warning_codes { if warnings_seen.insert(warning.clone()) { - compact_model.normalization_summary.warnings.push(warning.clone()); + compact_model + .normalization_summary + .warnings + .push(warning.clone()); } } @@ -141,9 +188,13 @@ pub fn apply_universal_income_rows( } for (concept_key, assignment) in resolved.mapping_assignments { - compact_model.concept_mappings.insert(concept_key, assignment); + compact_model + .concept_mappings + .insert(concept_key, assignment); } + consumed_sources.extend(resolved.consumed_sources); + let detail_count = income_detail_rows .get(&definition.key) .map(|rows| rows.len() as i64); @@ -162,10 +213,16 @@ pub fn apply_universal_income_rows( .filter(|row| !universal_keys.contains(&row.key)) .collect::>(); - built_rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label))); + built_rows.sort_by(|left, right| { + left.order + .cmp(&right.order) + .then_with(|| left.label.cmp(&right.label)) + }); let mut final_income_rows = built_rows; final_income_rows.extend(remaining_pack_rows); + prune_consumed_unmapped_income_rows(&mut income_detail_rows, &consumed_sources); + compact_model .surface_rows .insert("income".to_string(), final_income_rows); @@ -191,40 +248,29 @@ fn resolve_universal_row( return not_meaningful_row(definition, periods, &bridge_row.warning_codes_when_used); } - if let Some(matched) = pick_best_match( - &income_statement_rows - .iter() - .filter(|row| has_any_value(&row.values)) - .filter_map(|row| { - match_direct_authoritative( - row, - &bridge_row.direct_authoritative_concepts, - crosswalk, - ) - }) - .collect::>(), - ) { + let direct_authoritative_matches = + collect_direct_statement_matches(income_statement_rows, |row| { + match_direct_authoritative(row, &bridge_row.direct_authoritative_concepts, crosswalk) + }); + if !direct_authoritative_matches.is_empty() { return build_direct_row( definition, periods, - matched, + &direct_authoritative_matches, ResolutionMethod::Direct, Confidence::High, vec![], ); } - if let Some(matched) = pick_best_match( - &income_statement_rows - .iter() - .filter(|row| has_any_value(&row.values)) - .filter_map(|row| match_direct_source(row, &bridge_row.direct_source_concepts, crosswalk)) - .collect::>(), - ) { + let direct_source_matches = collect_direct_statement_matches(income_statement_rows, |row| { + match_direct_source(row, &bridge_row.direct_source_concepts, crosswalk) + }); + if !direct_source_matches.is_empty() { return build_direct_row( definition, periods, - matched, + &direct_source_matches, ResolutionMethod::Direct, Confidence::Medium, vec![], @@ -241,7 +287,10 @@ fn resolve_universal_row( return build_direct_fact_row(definition, periods, &matched_facts, vec![]); } - if let Some(existing_surface) = income_surface_rows.iter().find(|row| row.key == definition.key) { + if let Some(existing_surface) = income_surface_rows + .iter() + .find(|row| row.key == definition.key) + { let mut row = existing_surface.clone(); row.order = definition.order; row.label = definition.label.clone(); @@ -260,6 +309,7 @@ fn resolve_universal_row( detail_rows: vec![], mapping_assignments: HashMap::new(), warning_codes: vec![], + consumed_sources: ConsumedSources::default(), }; } @@ -285,14 +335,22 @@ fn build_formula_row( .component_surfaces .positive .iter() - .filter_map(|surface_key| income_surface_rows.iter().find(|row| row.key == *surface_key)) + .filter_map(|surface_key| { + income_surface_rows + .iter() + .find(|row| row.key == *surface_key) + }) .map(surface_source) .collect::>(); let negative_surface_sources = bridge_row .component_surfaces .negative .iter() - .filter_map(|surface_key| income_surface_rows.iter().find(|row| row.key == *surface_key)) + .filter_map(|surface_key| { + income_surface_rows + .iter() + .find(|row| row.key == *surface_key) + }) .map(surface_source) .collect::>(); @@ -320,10 +378,13 @@ fn build_formula_row( let value = match bridge_row.formula.as_str() { "sum" | "pack_bridge_sum" => positive_value, "subtract" => match (positive_value, negative_value) { - (Some(positive_value), Some(negative_value)) => Some(positive_value - negative_value), + (Some(positive_value), Some(negative_value)) => { + Some(positive_value - negative_value) + } _ => None, }, - "sum_then_subtract" => positive_value.map(|positive_value| positive_value - negative_value.unwrap_or(0.0)), + "sum_then_subtract" => positive_value + .map(|positive_value| positive_value - negative_value.unwrap_or(0.0)), _ => None, }; @@ -373,7 +434,9 @@ fn build_formula_row( let resolved_source_row_keys = periods .iter() .map(|period| { - let resolved = if source_row_keys.len() == 1 && values.get(&period.id).copied().flatten().is_some() { + let resolved = if source_row_keys.len() == 1 + && values.get(&period.id).copied().flatten().is_some() + { source_row_keys.first().cloned() } else { None @@ -387,6 +450,10 @@ fn build_formula_row( .chain(negative_group_rows) .map(|row| build_detail_row(row, &definition.key, false)) .collect::>(); + let mut consumed_sources = ConsumedSources::default(); + for row in &detail_rows { + consumed_sources.insert_detail_row(row); + } let mut mapping_assignments = HashMap::::new(); for row in detail_rows.iter() { mapping_assignments.insert( @@ -426,42 +493,70 @@ fn build_formula_row( detail_rows, mapping_assignments, warning_codes, + consumed_sources, } } fn build_direct_row( definition: &UniversalIncomeDefinition, periods: &[PeriodOutput], - matched: &MatchedStatementRow<'_>, + matches: &[MatchedStatementRow<'_>], resolution_method: ResolutionMethod, confidence: Confidence, warning_codes: Vec, ) -> ResolvedUniversalRow { - let resolved_source_row_keys = periods - .iter() - .map(|period| { - let resolved = matched - .row - .values - .get(&period.id) - .copied() - .flatten() - .map(|_| matched.row.key.clone()); - (period.id.clone(), resolved) - }) - .collect::>(); + let mut consumed_sources = ConsumedSources::default(); let mut mapping_assignments = HashMap::::new(); - mapping_assignments.insert( - matched.row.concept_key.clone(), - MappingAssignment { - authoritative_concept_key: matched.authoritative_concept_key.clone(), - mapping_method: Some(matched.mapping_method), - surface_key: Some(definition.key.clone()), - detail_parent_surface_key: None, - kpi_key: None, - residual_flag: false, - }, - ); + let mut values = BTreeMap::>::new(); + let mut resolved_source_row_keys = BTreeMap::>::new(); + let mut source_concepts = Vec::::new(); + let mut source_row_keys = Vec::::new(); + let mut source_fact_ids = Vec::::new(); + let mut has_dimensions = false; + + for matched in matches { + consumed_sources.insert_row(matched.row); + source_concepts.push(matched.row.qname.clone()); + source_row_keys.push(matched.row.key.clone()); + source_fact_ids.extend(matched.row.source_fact_ids.iter().copied()); + has_dimensions = has_dimensions || matched.row.has_dimensions; + mapping_assignments.insert( + matched.row.concept_key.clone(), + MappingAssignment { + authoritative_concept_key: matched.authoritative_concept_key.clone(), + mapping_method: Some(matched.mapping_method), + surface_key: Some(definition.key.clone()), + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + }, + ); + } + + for period in periods { + let period_matches = matches + .iter() + .filter(|matched| { + matched + .row + .values + .get(&period.id) + .copied() + .flatten() + .is_some() + }) + .cloned() + .collect::>(); + let resolved = pick_best_match(&period_matches); + values.insert( + period.id.clone(), + resolved.and_then(|matched| matched.row.values.get(&period.id).copied().flatten()), + ); + resolved_source_row_keys.insert( + period.id.clone(), + resolved.map(|matched| matched.row.key.clone()), + ); + } ResolvedUniversalRow { row: SurfaceRowOutput { @@ -471,12 +566,12 @@ fn build_direct_row( template_section: definition.category.clone(), order: definition.order, unit: definition.unit.clone(), - values: matched.row.values.clone(), - source_concepts: vec![matched.row.qname.clone()], - source_row_keys: vec![matched.row.key.clone()], - source_fact_ids: matched.row.source_fact_ids.clone(), + values, + source_concepts: unique_sorted_strings(source_concepts), + source_row_keys: unique_sorted_strings(source_row_keys), + source_fact_ids: unique_sorted_i64(source_fact_ids), formula_key: None, - has_dimensions: matched.row.has_dimensions, + has_dimensions, resolved_source_row_keys, statement: Some(definition.statement.clone()), detail_count: None, @@ -487,6 +582,7 @@ fn build_direct_row( detail_rows: vec![], mapping_assignments, warning_codes, + consumed_sources, } } @@ -501,7 +597,9 @@ fn build_direct_fact_row( .map(|period| { ( period.id.clone(), - matches.get(&period.id).map(|matched| matched.fact.value_num), + matches + .get(&period.id) + .map(|matched| matched.fact.value_num), ) }) .collect::>(); @@ -510,7 +608,9 @@ fn build_direct_fact_row( .map(|period| { ( period.id.clone(), - matches.get(&period.id).map(|matched| matched.fact.local_name.clone()), + matches + .get(&period.id) + .map(|matched| matched.fact.local_name.clone()), ) }) .collect::>(); @@ -526,12 +626,14 @@ fn build_direct_fact_row( .map(|matched| matched.fact.local_name.clone()) .collect::>(), ); - let source_fact_ids = unique_sorted_i64( - vec![], - ); - let has_dimensions = matches.values().any(|matched| !matched.fact.is_dimensionless); + let source_fact_ids = unique_sorted_i64(vec![]); + let has_dimensions = matches + .values() + .any(|matched| !matched.fact.is_dimensionless); + let mut consumed_sources = ConsumedSources::default(); let mut mapping_assignments = HashMap::::new(); for matched in matches.values() { + consumed_sources.insert_fact(matched.fact); mapping_assignments.insert( matched.fact.concept_key.clone(), MappingAssignment { @@ -569,6 +671,7 @@ fn build_direct_fact_row( detail_rows: vec![], mapping_assignments, warning_codes, + consumed_sources, } } @@ -588,6 +691,7 @@ fn not_meaningful_row( detail_rows: vec![], mapping_assignments: HashMap::new(), warning_codes: warning_codes.to_vec(), + consumed_sources: ConsumedSources::default(), } } @@ -597,13 +701,48 @@ fn unresolved_row( warning_codes: &[String], ) -> ResolvedUniversalRow { ResolvedUniversalRow { - row: null_surface_row(definition, periods, None, Some(Confidence::Low), warning_codes.to_vec()), + row: null_surface_row( + definition, + periods, + None, + Some(Confidence::Low), + warning_codes.to_vec(), + ), detail_rows: vec![], mapping_assignments: HashMap::new(), warning_codes: warning_codes.to_vec(), + consumed_sources: ConsumedSources::default(), } } +fn collect_direct_statement_matches<'a, F>( + income_statement_rows: &'a [StatementRowOutput], + matcher: F, +) -> Vec> +where + F: Fn(&'a StatementRowOutput) -> Option>, +{ + let mut matches_by_row_key = HashMap::>::new(); + + for row in income_statement_rows + .iter() + .filter(|row| has_any_value(&row.values)) + { + if let Some(candidate) = matcher(row) { + matches_by_row_key + .entry(candidate.row.key.clone()) + .and_modify(|existing| { + if compare_statement_matches(&candidate, existing).is_lt() { + *existing = candidate.clone(); + } + }) + .or_insert(candidate); + } + } + + matches_by_row_key.into_values().collect() +} + fn null_surface_row( definition: &UniversalIncomeDefinition, periods: &[PeriodOutput], @@ -678,7 +817,9 @@ fn match_direct_authoritative<'a>( ) -> Option> { let authoritative_concept_key = authoritative_concept_key(row, crosswalk); let matches = authoritative_concept_key.as_ref().map_or(false, |actual| { - candidates.iter().any(|candidate| candidate_matches(candidate, actual)) + candidates + .iter() + .any(|candidate| candidate_matches(candidate, actual)) }); if !matches { return None; @@ -754,7 +895,9 @@ fn match_direct_fact_authoritative<'a>( ) -> Option> { let authoritative_concept_key = fact_authoritative_concept_key(fact, crosswalk); let matches = authoritative_concept_key.as_ref().map_or(false, |actual| { - candidates.iter().any(|candidate| candidate_matches(candidate, actual)) + candidates + .iter() + .any(|candidate| candidate_matches(candidate, actual)) }); if !matches { return None; @@ -814,23 +957,32 @@ fn fact_authoritative_concept_key( .or_else(|| Some(fact.qname.clone())) } -fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> Option<&'a MatchedStatementRow<'a>> { - matches.iter().min_by(|left, right| { - left.rank - .cmp(&right.rank) - .then_with(|| { - let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 }; - let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 }; - left_dimension_rank.cmp(&right_dimension_rank) - }) - .then_with(|| left.row.order.cmp(&right.row.order)) - .then_with(|| { - max_abs_value(&right.row.values) - .partial_cmp(&max_abs_value(&left.row.values)) - .unwrap_or(std::cmp::Ordering::Equal) - }) - .then_with(|| left.row.label.cmp(&right.row.label)) - }) +fn pick_best_match<'a>( + matches: &'a [MatchedStatementRow<'a>], +) -> Option<&'a MatchedStatementRow<'a>> { + matches + .iter() + .min_by(|left, right| compare_statement_matches(left, right)) +} + +fn compare_statement_matches( + left: &MatchedStatementRow<'_>, + right: &MatchedStatementRow<'_>, +) -> std::cmp::Ordering { + left.rank + .cmp(&right.rank) + .then_with(|| { + let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 }; + let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 }; + left_dimension_rank.cmp(&right_dimension_rank) + }) + .then_with(|| left.row.order.cmp(&right.row.order)) + .then_with(|| { + max_abs_value(&right.row.values) + .partial_cmp(&max_abs_value(&left.row.values)) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .then_with(|| left.row.label.cmp(&right.row.label)) } fn compare_fact_matches(left: &MatchedFact<'_>, right: &MatchedFact<'_>) -> std::cmp::Ordering { @@ -979,6 +1131,74 @@ fn recount_normalization_summary(compact_model: &mut CompactSurfaceModel) { .values() .map(|groups| groups.values().map(|rows| rows.len()).sum::()) .sum(); + compact_model.normalization_summary.unmapped_row_count = compact_model + .detail_rows + .values() + .map(|groups| groups.get("unmapped").map(|rows| rows.len()).unwrap_or(0)) + .sum(); + compact_model + .normalization_summary + .material_unmapped_row_count = compact_model + .detail_rows + .iter() + .map(|(statement, groups)| { + let baseline = baseline_for_statement( + statement, + compact_model + .surface_rows + .get(statement) + .map(|rows| rows.as_slice()) + .unwrap_or(&[]), + ); + let threshold = materiality_threshold(statement, baseline); + groups + .get("unmapped") + .map(|rows| { + rows.iter() + .filter(|row| max_abs_value(&row.values) >= threshold) + .count() + }) + .unwrap_or(0) + }) + .sum(); +} + +fn prune_consumed_unmapped_income_rows( + income_detail_rows: &mut BTreeMap>, + consumed_sources: &ConsumedSources, +) { + let mut remove_unmapped_bucket = false; + + if let Some(unmapped_rows) = income_detail_rows.get_mut("unmapped") { + unmapped_rows.retain(|row| !consumed_sources.matches_detail_row(row)); + remove_unmapped_bucket = unmapped_rows.is_empty(); + } + + if remove_unmapped_bucket { + income_detail_rows.remove("unmapped"); + } +} + +fn baseline_for_statement(statement: &str, surface_rows: &[SurfaceRowOutput]) -> f64 { + let anchor_key = if statement == "balance" { + "total_assets" + } else { + "revenue" + }; + + surface_rows + .iter() + .find(|row| row.key == anchor_key) + .map(|row| max_abs_value(&row.values)) + .unwrap_or(0.0) +} + +fn materiality_threshold(statement: &str, baseline: f64) -> f64 { + if statement == "balance" { + return (baseline * 0.005).max(5_000_000.0); + } + + (baseline * 0.01).max(1_000_000.0) } fn candidate_matches(candidate: &str, actual: &str) -> bool { @@ -1004,13 +1224,21 @@ fn max_abs_value(values: &BTreeMap>) -> f64 { } fn unique_sorted_strings(values: Vec) -> Vec { - let mut values = values.into_iter().collect::>().into_iter().collect::>(); + let mut values = values + .into_iter() + .collect::>() + .into_iter() + .collect::>(); values.sort(); values } fn unique_sorted_i64(values: Vec) -> Vec { - let mut values = values.into_iter().collect::>().into_iter().collect::>(); + let mut values = values + .into_iter() + .collect::>() + .into_iter() + .collect::>(); values.sort(); values } @@ -1035,10 +1263,25 @@ mod tests { } fn row(key: &str, qname: &str, value: f64) -> StatementRowOutput { + row_with_values( + key, + qname, + BTreeMap::from([("p1".to_string(), Some(value))]), + ) + } + + fn row_with_values( + key: &str, + qname: &str, + values: BTreeMap>, + ) -> StatementRowOutput { StatementRowOutput { key: key.to_string(), label: key.to_string(), - concept_key: format!("http://fasb.org/us-gaap/2024#{}", qname.split(':').nth(1).unwrap_or(key)), + concept_key: format!( + "http://fasb.org/us-gaap/2024#{}", + qname.split(':').nth(1).unwrap_or(key) + ), qname: qname.to_string(), namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), local_name: qname.split(':').nth(1).unwrap_or(key).to_string(), @@ -1048,8 +1291,11 @@ mod tests { order: 1, depth: 0, parent_key: None, - values: BTreeMap::from([("p1".to_string(), Some(value))]), - units: BTreeMap::from([("p1".to_string(), Some("iso4217:USD".to_string()))]), + units: values + .keys() + .map(|period_id| (period_id.clone(), Some("iso4217:USD".to_string()))) + .collect(), + values, has_dimensions: false, source_fact_ids: vec![1], } @@ -1143,13 +1389,24 @@ mod tests { fn derives_gross_profit_from_revenue_minus_cost_of_revenue() { let mut rows = empty_rows(); rows.get_mut("income").unwrap().extend([ - row("revenue", "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax", 100.0), + row( + "revenue", + "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax", + 100.0, + ), row("cogs", "us-gaap:CostOfRevenue", 40.0), ]); let mut model = empty_model(); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); let gross_profit = model .surface_rows @@ -1166,11 +1423,22 @@ mod tests { #[test] fn emits_not_meaningful_bank_gross_profit_row() { let mut rows = empty_rows(); - rows.get_mut("income").unwrap().push(row("net-interest", "us-gaap:NetInterestIncome", 50.0)); + rows.get_mut("income").unwrap().push(row( + "net-interest", + "us-gaap:NetInterestIncome", + 50.0, + )); let mut model = empty_model(); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::BankLender, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::BankLender, + &mut model, + ) + .expect("universal income rows should build"); let gross_profit = model .surface_rows @@ -1181,22 +1449,33 @@ mod tests { .unwrap(); assert_eq!(gross_profit.values.get("p1").copied().flatten(), None); - assert_eq!(gross_profit.resolution_method.as_deref(), Some("not_meaningful")); - assert!(gross_profit.warning_codes.contains(&"gross_profit_not_meaningful_bank_pack".to_string())); + assert_eq!( + gross_profit.resolution_method.as_deref(), + Some("not_meaningful") + ); + assert!(gross_profit + .warning_codes + .contains(&"gross_profit_not_meaningful_bank_pack".to_string())); } #[test] fn derives_bank_revenue_from_net_interest_income_and_noninterest_income() { let rows = empty_rows(); let mut model = empty_model(); - model - .surface_rows - .get_mut("income") - .unwrap() - .extend([surface_row("net_interest_income", 60.0), surface_row("noninterest_income", 40.0)]); + model.surface_rows.get_mut("income").unwrap().extend([ + surface_row("net_interest_income", 60.0), + surface_row("noninterest_income", 40.0), + ]); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::BankLender, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::BankLender, + &mut model, + ) + .expect("universal income rows should build"); let revenue = model .surface_rows @@ -1207,20 +1486,34 @@ mod tests { .unwrap(); assert_eq!(revenue.values.get("p1").copied().flatten(), Some(100.0)); - assert_eq!(revenue.resolution_method.as_deref(), Some("formula_derived")); + assert_eq!( + revenue.resolution_method.as_deref(), + Some("formula_derived") + ); } #[test] fn derives_sga_from_sales_and_marketing_plus_general_and_administrative() { let mut rows = empty_rows(); rows.get_mut("income").unwrap().extend([ - row("sales-and-marketing", "us-gaap:SalesAndMarketingExpense", 30.0), + row( + "sales-and-marketing", + "us-gaap:SalesAndMarketingExpense", + 30.0, + ), row("g-and-a", "us-gaap:GeneralAndAdministrativeExpense", 10.0), ]); let mut model = empty_model(); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); let sga = model .surface_rows @@ -1231,7 +1524,10 @@ mod tests { .unwrap(); assert_eq!(sga.values.get("p1").copied().flatten(), Some(40.0)); - assert_eq!(sga.formula_key.as_deref(), Some("selling_general_and_administrative")); + assert_eq!( + sga.formula_key.as_deref(), + Some("selling_general_and_administrative") + ); assert_eq!(sga.resolution_method.as_deref(), Some("formula_derived")); let detail_rows = model @@ -1247,14 +1543,20 @@ mod tests { fn derives_other_operating_expense_from_operating_expenses_minus_sga_and_missing_rnd() { let rows = empty_rows(); let mut model = empty_model(); - model - .surface_rows - .get_mut("income") - .unwrap() - .extend([surface_row("operating_expenses", 100.0), surface_row("selling_general_and_administrative", 60.0)]); + model.surface_rows.get_mut("income").unwrap().extend([ + surface_row("operating_expenses", 100.0), + surface_row("selling_general_and_administrative", 60.0), + ]); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); let other = model .surface_rows @@ -1265,7 +1567,10 @@ mod tests { .unwrap(); assert_eq!(other.values.get("p1").copied().flatten(), Some(40.0)); - assert_eq!(other.formula_key.as_deref(), Some("other_operating_expense")); + assert_eq!( + other.formula_key.as_deref(), + Some("other_operating_expense") + ); assert_eq!(other.resolution_method.as_deref(), Some("formula_derived")); } @@ -1273,14 +1578,20 @@ mod tests { fn derives_insurance_operating_expenses_from_claims_and_underwriting() { let rows = empty_rows(); let mut model = empty_model(); - model - .surface_rows - .get_mut("income") - .unwrap() - .extend([surface_row("claims_and_benefits", 80.0), surface_row("underwriting_expenses", 20.0)]); + model.surface_rows.get_mut("income").unwrap().extend([ + surface_row("claims_and_benefits", 80.0), + surface_row("underwriting_expenses", 20.0), + ]); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Insurance, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Insurance, + &mut model, + ) + .expect("universal income rows should build"); let operating_expenses = model .surface_rows @@ -1290,17 +1601,31 @@ mod tests { .find(|row| row.key == "operating_expenses") .unwrap(); - assert_eq!(operating_expenses.values.get("p1").copied().flatten(), Some(100.0)); + assert_eq!( + operating_expenses.values.get("p1").copied().flatten(), + Some(100.0) + ); } #[test] fn emits_not_meaningful_bank_expense_breakdown_rows() { let mut rows = empty_rows(); - rows.get_mut("income").unwrap().push(row("net-interest", "us-gaap:NetInterestIncome", 50.0)); + rows.get_mut("income").unwrap().push(row( + "net-interest", + "us-gaap:NetInterestIncome", + 50.0, + )); let mut model = empty_model(); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::BankLender, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::BankLender, + &mut model, + ) + .expect("universal income rows should build"); let sga = model .surface_rows @@ -1319,24 +1644,34 @@ mod tests { assert_eq!(sga.values.get("p1").copied().flatten(), None); assert_eq!(sga.resolution_method.as_deref(), Some("not_meaningful")); - assert!(sga.warning_codes.contains(&"selling_general_and_administrative_not_meaningful_bank_pack".to_string())); + assert!(sga + .warning_codes + .contains(&"selling_general_and_administrative_not_meaningful_bank_pack".to_string())); assert_eq!(other.values.get("p1").copied().flatten(), None); assert_eq!(other.resolution_method.as_deref(), Some("not_meaningful")); - assert!(other.warning_codes.contains(&"other_operating_expense_not_meaningful_bank_pack".to_string())); + assert!(other + .warning_codes + .contains(&"other_operating_expense_not_meaningful_bank_pack".to_string())); } #[test] fn derives_reit_gross_profit_from_revenue_minus_property_operating_expense() { let rows = empty_rows(); let mut model = empty_model(); - model - .surface_rows - .get_mut("income") - .unwrap() - .extend([surface_row("revenue", 75.0), surface_row("property_operating_expense", 15.0)]); + model.surface_rows.get_mut("income").unwrap().extend([ + surface_row("revenue", 75.0), + surface_row("property_operating_expense", 15.0), + ]); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::ReitRealEstate, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::ReitRealEstate, + &mut model, + ) + .expect("universal income rows should build"); let gross_profit = model .surface_rows @@ -1380,7 +1715,11 @@ mod tests { #[test] fn derives_income_tax_expense_from_pretax_income_minus_net_income() { let mut rows = empty_rows(); - rows.get_mut("income").unwrap().push(row("pretax", "us-gaap:IncomeBeforeTaxExpenseBenefit", 100.0)); + rows.get_mut("income").unwrap().push(row( + "pretax", + "us-gaap:IncomeBeforeTaxExpenseBenefit", + 100.0, + )); let mut model = empty_model(); model .surface_rows @@ -1388,8 +1727,15 @@ mod tests { .unwrap() .push(surface_row("net_income", 75.0)); - apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) - .expect("universal income rows should build"); + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); let tax = model .surface_rows @@ -1401,4 +1747,171 @@ mod tests { assert_eq!(tax.values.get("p1").copied().flatten(), Some(25.0)); } + + #[test] + fn flattens_other_operating_expense_aliases_and_prunes_income_unmapped_rows() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().extend([ + row("other-expense", "us-gaap:OtherOperatingExpense", 15.0), + row("other-expenses", "us-gaap:OtherOperatingExpenses", 15.0), + row("custom-unmapped", "company:CustomOperatingExpense", 7.0), + ]); + let mut model = empty_model(); + model.detail_rows.get_mut("income").unwrap().insert( + "unmapped".to_string(), + rows.get("income") + .unwrap() + .iter() + .map(|row| build_detail_row(row, "unmapped", true)) + .collect(), + ); + model.normalization_summary.unmapped_row_count = 3; + model.normalization_summary.detail_row_count = 3; + + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); + + let other = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "other_operating_expense") + .unwrap(); + + assert_eq!(other.values.get("p1").copied().flatten(), Some(15.0)); + assert_eq!( + other.source_row_keys, + vec!["other-expense".to_string(), "other-expenses".to_string()] + ); + + let unmapped = model + .detail_rows + .get("income") + .and_then(|groups| groups.get("unmapped")) + .cloned() + .unwrap_or_default(); + assert_eq!(unmapped.len(), 1); + assert_eq!(unmapped[0].key, "custom-unmapped"); + assert_eq!(model.normalization_summary.unmapped_row_count, 1); + } + + #[test] + fn merges_period_sparse_aliases_into_one_canonical_row() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().extend([ + row_with_values( + "other-expense-p1", + "us-gaap:OtherOperatingExpense", + BTreeMap::from([("p1".to_string(), Some(12.0)), ("p2".to_string(), None)]), + ), + row_with_values( + "other-expense-p2", + "us-gaap:OtherOperatingExpenses", + BTreeMap::from([("p1".to_string(), None), ("p2".to_string(), Some(18.0))]), + ), + ]); + let periods = vec![ + period("p1"), + PeriodOutput { + id: "p2".to_string(), + filing_id: 2, + accession_number: "0000000000-00-000002".to_string(), + filing_date: "2026-12-31".to_string(), + period_start: Some("2026-01-01".to_string()), + period_end: Some("2026-12-31".to_string()), + filing_type: "10-K".to_string(), + period_label: "p2".to_string(), + }, + ]; + let mut model = empty_model(); + + apply_universal_income_rows( + &periods, + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); + + let other = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "other_operating_expense") + .unwrap(); + + assert_eq!(other.values.get("p1").copied().flatten(), Some(12.0)); + assert_eq!(other.values.get("p2").copied().flatten(), Some(18.0)); + assert_eq!( + other.resolved_source_row_keys.get("p1").cloned().flatten(), + Some("other-expense-p1".to_string()) + ); + assert_eq!( + other.resolved_source_row_keys.get("p2").cloned().flatten(), + Some("other-expense-p2".to_string()) + ); + } + + #[test] + fn prunes_formula_component_rows_from_income_unmapped_but_keeps_true_residuals() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().extend([ + row( + "sales-and-marketing", + "us-gaap:SalesAndMarketingExpense", + 30.0, + ), + row("g-and-a", "us-gaap:GeneralAndAdministrativeExpense", 10.0), + row("custom-unmapped", "company:CustomOperatingExpense", 6.0), + ]); + let mut model = empty_model(); + model.detail_rows.get_mut("income").unwrap().insert( + "unmapped".to_string(), + rows.get("income") + .unwrap() + .iter() + .map(|row| build_detail_row(row, "unmapped", true)) + .collect(), + ); + model.normalization_summary.unmapped_row_count = 3; + model.normalization_summary.detail_row_count = 3; + + apply_universal_income_rows( + &[period("p1")], + &rows, + &[], + "us-gaap", + FiscalPack::Core, + &mut model, + ) + .expect("universal income rows should build"); + + let sga_details = model + .detail_rows + .get("income") + .and_then(|groups| groups.get("selling_general_and_administrative")) + .cloned() + .unwrap_or_default(); + assert_eq!(sga_details.len(), 2); + + let unmapped = model + .detail_rows + .get("income") + .and_then(|groups| groups.get("unmapped")) + .cloned() + .unwrap_or_default(); + assert_eq!(unmapped.len(), 1); + assert_eq!(unmapped[0].key, "custom-unmapped"); + } } diff --git a/rust/taxonomy/fiscal/v1/bank_lender.surface.json b/rust/taxonomy/fiscal/v1/bank_lender.surface.json index 03fee56..08efb77 100644 --- a/rust/taxonomy/fiscal/v1/bank_lender.surface.json +++ b/rust/taxonomy/fiscal/v1/bank_lender.surface.json @@ -156,7 +156,7 @@ "surface_key": "loans", "statement": "balance", "label": "Loans", - "category": "surface", + "category": "noncurrent_assets", "order": 30, "unit": "currency", "rollup_policy": "aggregate_children", @@ -181,7 +181,7 @@ "surface_key": "allowance_for_credit_losses", "statement": "balance", "label": "Allowance for Credit Losses", - "category": "surface", + "category": "noncurrent_assets", "order": 40, "unit": "currency", "rollup_policy": "aggregate_children", @@ -201,7 +201,7 @@ "surface_key": "deposits", "statement": "balance", "label": "Deposits", - "category": "surface", + "category": "current_liabilities", "order": 80, "unit": "currency", "rollup_policy": "aggregate_children", @@ -215,7 +215,7 @@ "surface_key": "total_assets", "statement": "balance", "label": "Total Assets", - "category": "surface", + "category": "derived", "order": 90, "unit": "currency", "rollup_policy": "direct_only", @@ -229,7 +229,7 @@ "surface_key": "total_liabilities", "statement": "balance", "label": "Total Liabilities", - "category": "surface", + "category": "derived", "order": 100, "unit": "currency", "rollup_policy": "direct_only", @@ -243,7 +243,7 @@ "surface_key": "total_equity", "statement": "balance", "label": "Total Equity", - "category": "surface", + "category": "equity", "order": 110, "unit": "currency", "rollup_policy": "direct_only", diff --git a/rust/taxonomy/fiscal/v1/broker_asset_manager.surface.json b/rust/taxonomy/fiscal/v1/broker_asset_manager.surface.json index b51ba94..81e80bd 100644 --- a/rust/taxonomy/fiscal/v1/broker_asset_manager.surface.json +++ b/rust/taxonomy/fiscal/v1/broker_asset_manager.surface.json @@ -63,7 +63,7 @@ "surface_key": "total_assets", "statement": "balance", "label": "Total Assets", - "category": "surface", + "category": "derived", "order": 90, "unit": "currency", "rollup_policy": "direct_only", @@ -77,7 +77,7 @@ "surface_key": "total_liabilities", "statement": "balance", "label": "Total Liabilities", - "category": "surface", + "category": "derived", "order": 100, "unit": "currency", "rollup_policy": "direct_only", @@ -91,7 +91,7 @@ "surface_key": "total_equity", "statement": "balance", "label": "Total Equity", - "category": "surface", + "category": "equity", "order": 110, "unit": "currency", "rollup_policy": "direct_only", diff --git a/rust/taxonomy/fiscal/v1/core.surface.json b/rust/taxonomy/fiscal/v1/core.surface.json index 0fabf05..755b471 100644 --- a/rust/taxonomy/fiscal/v1/core.surface.json +++ b/rust/taxonomy/fiscal/v1/core.surface.json @@ -45,32 +45,1485 @@ "materiality_policy": "income_default" }, { - "surface_key": "total_assets", + "surface_key": "cash_and_equivalents", "statement": "balance", - "label": "Total Assets", - "category": "surface", - "order": 70, + "label": "Cash and Cash Equivalents", + "category": "current_assets", + "order": 10, "unit": "currency", "rollup_policy": "direct_only", - "allowed_source_concepts": ["us-gaap:Assets"], - "allowed_authoritative_concepts": ["us-gaap:Assets"], + "allowed_source_concepts": [ + "us-gaap:CashAndCashEquivalentsAtCarryingValue", + "us-gaap:Cash", + "us-gaap:CashCashEquivalentsAndFederalFundsSold" + ], + "allowed_authoritative_concepts": [ + "us-gaap:CashAndCashEquivalentsAtCarryingValue" + ], "formula_fallback": null, "detail_grouping_policy": "top_level_only", "materiality_policy": "balance_default" }, { - "surface_key": "operating_cash_flow", + "surface_key": "short_term_investments", + "statement": "balance", + "label": "Short-Term Investments", + "category": "current_assets", + "order": 20, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:AvailableForSaleSecuritiesCurrent", + "us-gaap:ShortTermInvestments", + "us-gaap:MarketableSecuritiesCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AvailableForSaleSecuritiesCurrent", + "us-gaap:ShortTermInvestments", + "us-gaap:MarketableSecuritiesCurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_cash_and_equivalents", + "statement": "balance", + "label": "Total Cash and Cash Equivalents", + "category": "current_assets", + "order": 30, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:CashCashEquivalentsAndShortTermInvestments", + "us-gaap:CashAndShortTermInvestments" + ], + "allowed_authoritative_concepts": [ + "us-gaap:CashCashEquivalentsAndShortTermInvestments", + "us-gaap:CashAndShortTermInvestments" + ], + "formula_fallback": { + "op": "sum", + "sources": ["cash_and_equivalents", "short_term_investments"], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "accounts_receivable", + "statement": "balance", + "label": "Accounts Receivable", + "category": "current_assets", + "order": 40, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:AccountsReceivableNetCurrent", + "us-gaap:ReceivablesNetCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AccountsReceivableNetCurrent", + "us-gaap:ReceivablesNetCurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "inventory", + "statement": "balance", + "label": "Inventories", + "category": "current_assets", + "order": 50, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:InventoryNet" + ], + "allowed_authoritative_concepts": [ + "us-gaap:InventoryNet" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "other_current_assets", + "statement": "balance", + "label": "Other Current Assets", + "category": "current_assets", + "order": 60, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:OtherAssetsCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:OtherAssetsCurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "current_assets", + "statement": "balance", + "label": "Total Current Assets", + "category": "current_assets", + "order": 70, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:AssetsCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AssetsCurrent" + ], + "formula_fallback": { + "op": "sum", + "sources": [ + "cash_and_equivalents", + "short_term_investments", + "accounts_receivable", + "inventory", + "other_current_assets" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "property_plant_equipment", + "statement": "balance", + "label": "Net Property, Plant & Equipment", + "category": "noncurrent_assets", + "order": 80, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:PropertyPlantAndEquipmentNet", + "us-gaap:PropertyPlantAndEquipmentAndFinanceLeaseRightOfUseAssetAfterAccumulatedDepreciationAndAmortization" + ], + "allowed_authoritative_concepts": [ + "us-gaap:PropertyPlantAndEquipmentNet" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "operating_lease_right_of_use_assets", + "statement": "balance", + "label": "Operating Lease Right-of-Use Assets", + "category": "noncurrent_assets", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:OperatingLeaseRightOfUseAsset" + ], + "allowed_authoritative_concepts": [ + "us-gaap:OperatingLeaseRightOfUseAsset" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "intangible_assets", + "statement": "balance", + "label": "Net Intangible Assets", + "category": "noncurrent_assets", + "order": 100, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:FiniteLivedIntangibleAssetsNet", + "us-gaap:IntangibleAssetsNetExcludingGoodwill", + "us-gaap:FiniteLivedIntangibleAssetsNetExcludingGoodwill" + ], + "allowed_authoritative_concepts": [ + "us-gaap:FiniteLivedIntangibleAssetsNet", + "us-gaap:IntangibleAssetsNetExcludingGoodwill", + "us-gaap:FiniteLivedIntangibleAssetsNetExcludingGoodwill" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "goodwill", + "statement": "balance", + "label": "Goodwill", + "category": "noncurrent_assets", + "order": 110, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:Goodwill" + ], + "allowed_authoritative_concepts": [ + "us-gaap:Goodwill" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "long_term_investments", + "statement": "balance", + "label": "Long-Term Investments", + "category": "noncurrent_assets", + "order": 120, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:AvailableForSaleSecuritiesDebtMaturitiesSingleMaturityDate", + "us-gaap:AvailableForSaleSecuritiesNoncurrent", + "us-gaap:LongTermInvestments", + "us-gaap:AvailableForSaleSecuritiesDebtSecurities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AvailableForSaleSecuritiesNoncurrent", + "us-gaap:LongTermInvestments" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "deferred_income_taxes_asset", + "statement": "balance", + "label": "Deferred Income Taxes", + "category": "noncurrent_assets", + "order": 130, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:DeferredIncomeTaxAssetsNet", + "us-gaap:DeferredTaxAssetsNet" + ], + "allowed_authoritative_concepts": [ + "us-gaap:DeferredIncomeTaxAssetsNet", + "us-gaap:DeferredTaxAssetsNet" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "other_long_term_assets", + "statement": "balance", + "label": "Other Long-Term Assets", + "category": "noncurrent_assets", + "order": 140, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:OtherAssetsNoncurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:OtherAssetsNoncurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_assets", + "statement": "balance", + "label": "Total Assets", + "category": "derived", + "order": 150, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:Assets" + ], + "allowed_authoritative_concepts": [ + "us-gaap:Assets" + ], + "formula_fallback": { + "op": "sum", + "sources": [ + "current_assets", + "property_plant_equipment", + "operating_lease_right_of_use_assets", + "intangible_assets", + "goodwill", + "long_term_investments", + "deferred_income_taxes_asset", + "other_long_term_assets" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "accounts_payable", + "statement": "balance", + "label": "Accounts Payable", + "category": "current_liabilities", + "order": 160, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:AccountsPayableCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AccountsPayableCurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "accrued_liabilities", + "statement": "balance", + "label": "Accrued Expenses", + "category": "current_liabilities", + "order": 170, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:AccruedLiabilitiesCurrent", + "us-gaap:OtherAccruedLiabilitiesCurrent", + "us-gaap:AccruedCompensationCurrent", + "us-gaap:EmployeeRelatedLiabilitiesCurrent", + "us-gaap:OtherLiabilitiesCurrent", + "us-gaap:AccruedPropertyTaxes" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AccruedLiabilitiesCurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "short_term_debt", + "statement": "balance", + "label": "Short-Term Debt", + "category": "current_liabilities", + "order": 180, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ShortTermBorrowings", + "us-gaap:DebtCurrent", + "us-gaap:CommercialPaper" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ShortTermBorrowings", + "us-gaap:DebtCurrent", + "us-gaap:CommercialPaper" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "current_debt", + "statement": "balance", + "label": "Current Portion of Long-Term Debt", + "category": "current_liabilities", + "order": 190, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:LongTermDebtCurrent", + "us-gaap:CurrentPortionOfLongTermDebt", + "us-gaap:LongTermDebtAndCapitalLeaseObligationsCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:LongTermDebtCurrent", + "us-gaap:CurrentPortionOfLongTermDebt", + "us-gaap:LongTermDebtAndCapitalLeaseObligationsCurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "deferred_revenue_current", + "statement": "balance", + "label": "Deferred Revenue, Current", + "category": "current_liabilities", + "order": 200, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ContractWithCustomerLiabilityCurrent", + "us-gaap:DeferredRevenueCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ContractWithCustomerLiabilityCurrent", + "us-gaap:DeferredRevenueCurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default", + "include_in_output": false + }, + { + "surface_key": "deferred_revenue_noncurrent", + "statement": "balance", + "label": "Deferred Revenue, Noncurrent", + "category": "noncurrent_liabilities", + "order": 205, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ContractWithCustomerLiabilityNoncurrent", + "us-gaap:DeferredRevenueNoncurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ContractWithCustomerLiabilityNoncurrent", + "us-gaap:DeferredRevenueNoncurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default", + "include_in_output": false + }, + { + "surface_key": "unearned_revenue", + "statement": "balance", + "label": "Unearned Revenue", + "category": "current_liabilities", + "order": 210, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:ContractWithCustomerLiability", + "us-gaap:DeferredRevenue" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ContractWithCustomerLiability", + "us-gaap:DeferredRevenue" + ], + "formula_fallback": { + "op": "sum", + "sources": ["deferred_revenue_current", "deferred_revenue_noncurrent"], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "long_term_debt", + "statement": "balance", + "label": "Long-Term Debt", + "category": "noncurrent_liabilities", + "order": 220, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:LongTermDebtNoncurrent", + "us-gaap:DebtNoncurrent", + "us-gaap:LongTermDebtAndCapitalLeaseObligations", + "us-gaap:LongTermDebt", + "us-gaap:DebtInstrumentCarryingAmount" + ], + "allowed_authoritative_concepts": [ + "us-gaap:LongTermDebtNoncurrent", + "us-gaap:DebtNoncurrent", + "us-gaap:LongTermDebtAndCapitalLeaseObligations" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "lease_liabilities", + "statement": "balance", + "label": "Lease Liabilities", + "category": "noncurrent_liabilities", + "order": 240, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:OperatingLeaseLiabilityNoncurrent", + "us-gaap:OperatingLeaseLiability", + "us-gaap:FinanceLeaseLiability", + "us-gaap:FinanceLeaseLiabilityNoncurrent", + "us-gaap:LesseeOperatingLeaseLiability" + ], + "allowed_authoritative_concepts": [ + "us-gaap:OperatingLeaseLiabilityNoncurrent", + "us-gaap:OperatingLeaseLiability", + "us-gaap:FinanceLeaseLiability", + "us-gaap:FinanceLeaseLiabilityNoncurrent", + "us-gaap:LesseeOperatingLeaseLiability" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "deferred_income_taxes_liability", + "statement": "balance", + "label": "Deferred Income Taxes", + "category": "noncurrent_liabilities", + "order": 250, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:DeferredIncomeTaxLiabilitiesNet", + "us-gaap:DeferredIncomeTaxLiabilities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:DeferredIncomeTaxLiabilitiesNet", + "us-gaap:DeferredIncomeTaxLiabilities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "other_long_term_liabilities", + "statement": "balance", + "label": "Other Long-Term Liabilities", + "category": "noncurrent_liabilities", + "order": 260, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:AssetRetirementObligationsNoncurrent", + "us-gaap:OtherLiabilitiesNoncurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:OtherLiabilitiesNoncurrent" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "current_liabilities", + "statement": "balance", + "label": "Current Liabilities", + "category": "current_liabilities", + "order": 270, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:LiabilitiesCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:LiabilitiesCurrent" + ], + "formula_fallback": { + "op": "sum", + "sources": [ + "accounts_payable", + "accrued_liabilities", + "short_term_debt", + "current_debt", + "unearned_revenue" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default", + "include_in_output": false + }, + { + "surface_key": "total_current_liabilities", + "statement": "balance", + "label": "Total Current Liabilities", + "category": "current_liabilities", + "order": 280, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:LiabilitiesCurrent" + ], + "allowed_authoritative_concepts": [ + "us-gaap:LiabilitiesCurrent" + ], + "formula_fallback": { + "op": "sum", + "sources": [ + "accounts_payable", + "accrued_liabilities", + "short_term_debt", + "current_debt", + "unearned_revenue" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "leases", + "statement": "balance", + "label": "Leases", + "category": "noncurrent_liabilities", + "order": 290, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:OperatingLeaseLiability", + "us-gaap:OperatingLeaseLiabilityCurrent", + "us-gaap:OperatingLeaseLiabilityNoncurrent", + "us-gaap:FinanceLeaseLiability", + "us-gaap:FinanceLeaseLiabilityCurrent", + "us-gaap:FinanceLeaseLiabilityNoncurrent", + "us-gaap:LesseeOperatingLeaseLiability" + ], + "allowed_authoritative_concepts": [ + "us-gaap:OperatingLeaseLiability", + "us-gaap:FinanceLeaseLiability", + "us-gaap:LesseeOperatingLeaseLiability" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default", + "include_in_output": false + }, + { + "surface_key": "total_liabilities", + "statement": "balance", + "label": "Total Liabilities", + "category": "derived", + "order": 300, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:Liabilities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:Liabilities" + ], + "formula_fallback": { + "op": "sum", + "sources": [ + "total_current_liabilities", + "long_term_debt", + "lease_liabilities", + "deferred_income_taxes_liability", + "other_long_term_liabilities" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "common_stock", + "statement": "balance", + "label": "Common Stock", + "category": "equity", + "order": 310, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:CommonStocksIncludingAdditionalPaidInCapital", + "us-gaap:CommonStockValue", + "us-gaap:AdditionalPaidInCapitalCommonStock", + "us-gaap:AdditionalPaidInCapital" + ], + "allowed_authoritative_concepts": [ + "us-gaap:CommonStocksIncludingAdditionalPaidInCapital", + "us-gaap:CommonStockValue" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "accumulated_other_comprehensive_income", + "statement": "balance", + "label": "Accumulated Other Comprehensive Income", + "category": "equity", + "order": 320, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:AccumulatedOtherComprehensiveIncomeLossNetOfTax" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AccumulatedOtherComprehensiveIncomeLossNetOfTax" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "retained_earnings", + "statement": "balance", + "label": "Retained Earnings", + "category": "equity", + "order": 330, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:RetainedEarningsAccumulatedDeficit" + ], + "allowed_authoritative_concepts": [ + "us-gaap:RetainedEarningsAccumulatedDeficit" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_common_shareholders_equity", + "statement": "balance", + "label": "Total Common Shareholders' Equity", + "category": "equity", + "order": 340, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:StockholdersEquity" + ], + "allowed_authoritative_concepts": [ + "us-gaap:StockholdersEquity" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_equity", + "statement": "balance", + "label": "Total Shareholders' Equity", + "category": "equity", + "order": 350, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:StockholdersEquity", + "us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", + "us-gaap:PartnersCapital" + ], + "allowed_authoritative_concepts": [ + "us-gaap:StockholdersEquity", + "us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", + "us-gaap:PartnersCapital" + ], + "formula_fallback": { + "op": "subtract", + "sources": ["total_assets", "total_liabilities"], + "treat_null_as_zero": false + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_liabilities_and_equity", + "statement": "balance", + "label": "Total Liabilities and Shareholders' Equity", + "category": "derived", + "order": 360, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:LiabilitiesAndStockholdersEquity" + ], + "allowed_authoritative_concepts": [ + "us-gaap:LiabilitiesAndStockholdersEquity" + ], + "formula_fallback": { + "op": "sum", + "sources": ["total_liabilities", "total_equity"], + "treat_null_as_zero": false + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_debt", + "statement": "balance", + "label": "Total Debt", + "category": "derived", + "order": 370, + "unit": "currency", + "rollup_policy": "formula_only", + "allowed_source_concepts": [], + "allowed_authoritative_concepts": [], + "formula_fallback": { + "op": "sum", + "sources": [ + "short_term_debt", + "current_debt", + "long_term_debt", + "lease_liabilities" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "net_cash_position", + "statement": "balance", + "label": "Net Cash Position", + "category": "derived", + "order": 380, + "unit": "currency", + "rollup_policy": "formula_only", + "allowed_source_concepts": [], + "allowed_authoritative_concepts": [], + "formula_fallback": { + "op": "subtract", + "sources": ["total_cash_and_equivalents", "total_debt"], + "treat_null_as_zero": false + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "net_income", "statement": "cash_flow", - "label": "Operating Cash Flow", - "category": "surface", + "label": "Net Income", + "category": "operating", "order": 10, "unit": "currency", "rollup_policy": "direct_only", - "allowed_source_concepts": ["us-gaap:NetCashProvidedByUsedInOperatingActivities"], - "allowed_authoritative_concepts": ["us-gaap:NetCashProvidedByUsedInOperatingActivities"], + "allowed_source_concepts": [ + "us-gaap:NetIncomeLossAvailableToCommonStockholdersBasic", + "us-gaap:NetIncomeLoss" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NetIncomeLossAvailableToCommonStockholdersBasic", + "us-gaap:NetIncomeLoss" + ], "formula_fallback": null, "detail_grouping_policy": "top_level_only", "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "depreciation_and_amortization", + "statement": "cash_flow", + "label": "Depreciation and Amortization", + "category": "operating", + "order": 20, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:DepreciationDepletionAndAmortization", + "us-gaap:DepreciationAmortizationAndAccretionNet", + "us-gaap:DepreciationAndAmortization", + "us-gaap:DepreciationAmortizationAndOther", + "us-gaap:AmortizationOfIntangibleAssets" + ], + "allowed_authoritative_concepts": [ + "us-gaap:DepreciationDepletionAndAmortization", + "us-gaap:DepreciationAmortizationAndAccretionNet", + "us-gaap:DepreciationAndAmortization", + "us-gaap:DepreciationAmortizationAndOther" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "stock_based_compensation", + "statement": "cash_flow", + "label": "Stock-Based Compensation", + "category": "operating", + "order": 30, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ShareBasedCompensation", + "us-gaap:AllocatedShareBasedCompensationExpense" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ShareBasedCompensation", + "us-gaap:AllocatedShareBasedCompensationExpense" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "other_adjustments", + "statement": "cash_flow", + "label": "Other Adjustments", + "category": "operating", + "order": 40, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:OtherAdjustmentsToReconcileNetIncomeLossToCashProvidedByUsedInOperatingActivities", + "us-gaap:IncreaseDecreaseInDeferredIncomeTaxes", + "us-gaap:OtherNoncashIncomeExpense" + ], + "allowed_authoritative_concepts": [], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "changes_trade_receivables", + "statement": "cash_flow", + "label": "Changes in Trade Receivables", + "category": "operating", + "order": 50, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInAccountsReceivable", + "us-gaap:IncreaseDecreaseInReceivables" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInAccountsReceivable", + "us-gaap:IncreaseDecreaseInReceivables" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "changes_inventories", + "statement": "cash_flow", + "label": "Changes in Inventories", + "category": "operating", + "order": 60, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInInventories" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInInventories" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "changes_accounts_payable", + "statement": "cash_flow", + "label": "Changes in Accounts Payable", + "category": "operating", + "order": 70, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInAccountsPayable" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInAccountsPayable" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "changes_accrued_expenses", + "statement": "cash_flow", + "label": "Changes in Accrued Expenses", + "category": "operating", + "order": 80, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInAccruedLiabilities", + "us-gaap:IncreaseDecreaseInEmployeeRelatedLiabilitiesCurrent", + "us-gaap:IncreaseDecreaseInOtherLiabilitiesCurrent" + ], + "allowed_authoritative_concepts": [], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "changes_income_taxes_payable", + "statement": "cash_flow", + "label": "Changes in Income Taxes Payable", + "category": "operating", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInAccruedIncomeTaxesPayable", + "us-gaap:IncreaseDecreaseInIncomeTaxes" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInAccruedIncomeTaxesPayable", + "us-gaap:IncreaseDecreaseInIncomeTaxes" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "changes_unearned_revenue", + "statement": "cash_flow", + "label": "Changes in Unearned Revenue", + "category": "operating", + "order": 100, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInDeferredRevenue" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInDeferredRevenue" + ], + "formula_fallback": { + "op": "subtract", + "sources": [ + "contract_liability_incurred", + "contract_liability_recognized" + ], + "treat_null_as_zero": false + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "contract_liability_incurred", + "statement": "cash_flow", + "label": "Unearned Revenue Incurred", + "category": "helper", + "order": 95, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ContractWithCustomerLiabilityIncurred" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ContractWithCustomerLiabilityIncurred" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "include_in_output": false + }, + { + "surface_key": "contract_liability_recognized", + "statement": "cash_flow", + "label": "Unearned Revenue Recognized", + "category": "helper", + "order": 96, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ContractWithCustomerLiabilityRevenueRecognized" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ContractWithCustomerLiabilityRevenueRecognized" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "include_in_output": false + }, + { + "surface_key": "changes_other_operating_activities", + "statement": "cash_flow", + "label": "Changes in Other Operating Activities", + "category": "operating", + "order": 110, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInOtherOperatingAssets", + "us-gaap:IncreaseDecreaseInOtherOperatingLiabilities", + "us-gaap:IncreaseDecreaseInDeferredIncomeTaxes", + "us-gaap:IncreaseDecreaseInPrepaidExpense" + ], + "allowed_authoritative_concepts": [], + "formula_fallback": { + "op": "sum", + "sources": [ + "changes_other_current_assets", + "changes_other_current_liabilities", + "changes_other_noncurrent_assets", + "changes_other_noncurrent_liabilities" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "changes_other_current_assets", + "statement": "cash_flow", + "label": "Other Current Assets", + "category": "helper", + "order": 101, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInOtherCurrentAssets" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInOtherCurrentAssets" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "include_in_output": false + }, + { + "surface_key": "changes_other_current_liabilities", + "statement": "cash_flow", + "label": "Other Current Liabilities", + "category": "helper", + "order": 102, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInOtherCurrentLiabilities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInOtherCurrentLiabilities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "include_in_output": false + }, + { + "surface_key": "changes_other_noncurrent_assets", + "statement": "cash_flow", + "label": "Other Noncurrent Assets", + "category": "helper", + "order": 103, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInOtherNoncurrentAssets" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInOtherNoncurrentAssets" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "include_in_output": false + }, + { + "surface_key": "changes_other_noncurrent_liabilities", + "statement": "cash_flow", + "label": "Other Noncurrent Liabilities", + "category": "helper", + "order": 104, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:IncreaseDecreaseInOtherNoncurrentLiabilities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:IncreaseDecreaseInOtherNoncurrentLiabilities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "include_in_output": false + }, + { + "surface_key": "operating_cash_flow", + "statement": "cash_flow", + "label": "Operating Cash Flow", + "category": "operating", + "order": 120, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:NetCashProvidedByUsedInOperatingActivities", + "us-gaap:NetCashProvidedByUsedInOperatingActivitiesContinuingOperations" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NetCashProvidedByUsedInOperatingActivities", + "us-gaap:NetCashProvidedByUsedInOperatingActivitiesContinuingOperations" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "capital_expenditures", + "statement": "cash_flow", + "label": "Capital Expenditures", + "category": "investing", + "order": 130, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", + "us-gaap:CapitalExpendituresIncurredButNotYetPaid", + "us-gaap:PaymentsForCapitalImprovements" + ], + "allowed_authoritative_concepts": [ + "us-gaap:PaymentsToAcquirePropertyPlantAndEquipment", + "us-gaap:PaymentsForCapitalImprovements" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "acquisitions", + "statement": "cash_flow", + "label": "Acquisitions", + "category": "investing", + "order": 140, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:PaymentsToAcquireBusinessesNetOfCashAcquired", + "us-gaap:AcquisitionsNetOfCashAcquiredAndPurchasesOfIntangibleAndOtherAssets", + "us-gaap:PaymentsToAcquireInterestInSubsidiariesAndAffiliates" + ], + "allowed_authoritative_concepts": [ + "us-gaap:PaymentsToAcquireBusinessesNetOfCashAcquired", + "us-gaap:AcquisitionsNetOfCashAcquiredAndPurchasesOfIntangibleAndOtherAssets", + "us-gaap:PaymentsToAcquireInterestInSubsidiariesAndAffiliates" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "investments", + "statement": "cash_flow", + "label": "Investments", + "category": "investing", + "order": 150, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:PaymentsForProceedsFromOtherInvestingActivities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:PaymentsForProceedsFromOtherInvestingActivities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "proceeds_from_sale_of_property_plant_and_equipment", + "statement": "cash_flow", + "label": "Proceeds from Sale of Property, Plant and Equipment", + "category": "investing", + "order": 160, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ProceedsFromSaleOfPropertyPlantAndEquipment" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ProceedsFromSaleOfPropertyPlantAndEquipment" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "other_investing_activities", + "statement": "cash_flow", + "label": "Other Investing Activities", + "category": "investing", + "order": 170, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:OtherInvestingActivitiesNet", + "us-gaap:OtherCashFlowFromInvestingActivities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:OtherInvestingActivitiesNet", + "us-gaap:OtherCashFlowFromInvestingActivities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "investing_cash_flow", + "statement": "cash_flow", + "label": "Investing Cash Flow", + "category": "investing", + "order": 180, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:NetCashProvidedByUsedInInvestingActivities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NetCashProvidedByUsedInInvestingActivities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "short_term_debt_issued", + "statement": "cash_flow", + "label": "Short-Term Debt Issued", + "category": "financing", + "order": 190, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ProceedsFromShortTermDebt" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ProceedsFromShortTermDebt" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "long_term_debt_issued", + "statement": "cash_flow", + "label": "Long-Term Debt Issued", + "category": "financing", + "order": 200, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ProceedsFromIssuanceOfLongTermDebt" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ProceedsFromIssuanceOfLongTermDebt" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "debt_repaid", + "statement": "cash_flow", + "label": "Debt Repaid", + "category": "financing", + "order": 210, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:RepaymentsOfDebt", + "us-gaap:RepaymentsOfLongTermDebt" + ], + "allowed_authoritative_concepts": [ + "us-gaap:RepaymentsOfDebt", + "us-gaap:RepaymentsOfLongTermDebt" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "share_repurchases", + "statement": "cash_flow", + "label": "Share Repurchases", + "category": "financing", + "order": 220, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:PaymentsForRepurchaseOfCommonStock", + "us-gaap:PaymentsForRepurchaseOfEquity" + ], + "allowed_authoritative_concepts": [ + "us-gaap:PaymentsForRepurchaseOfCommonStock", + "us-gaap:PaymentsForRepurchaseOfEquity" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "dividends_paid", + "statement": "cash_flow", + "label": "Dividends Paid", + "category": "financing", + "order": 230, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:DividendsCommonStockCash", + "us-gaap:PaymentsOfDividendsCommonStock", + "us-gaap:PaymentsOfDividends" + ], + "allowed_authoritative_concepts": [ + "us-gaap:DividendsCommonStockCash", + "us-gaap:PaymentsOfDividendsCommonStock", + "us-gaap:PaymentsOfDividends" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" + }, + { + "surface_key": "other_financing_activities", + "statement": "cash_flow", + "label": "Other Financing Activities", + "category": "financing", + "order": 240, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:ProceedsFromPaymentsForOtherFinancingActivities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ProceedsFromPaymentsForOtherFinancingActivities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "financing_cash_flow", + "statement": "cash_flow", + "label": "Financing Cash Flow", + "category": "financing", + "order": 250, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": [ + "us-gaap:NetCashProvidedByUsedInFinancingActivities" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NetCashProvidedByUsedInFinancingActivities" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + }, + { + "surface_key": "free_cash_flow", + "statement": "cash_flow", + "label": "Free Cash Flow", + "category": "free_cash_flow", + "order": 260, + "unit": "currency", + "rollup_policy": "formula_only", + "allowed_source_concepts": [], + "allowed_authoritative_concepts": [], + "formula_fallback": { + "op": "sum", + "sources": [ + "operating_cash_flow", + "capital_expenditures" + ], + "treat_null_as_zero": true + }, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" } ] } diff --git a/rust/taxonomy/fiscal/v1/insurance.surface.json b/rust/taxonomy/fiscal/v1/insurance.surface.json index 44de828..d48cd3e 100644 --- a/rust/taxonomy/fiscal/v1/insurance.surface.json +++ b/rust/taxonomy/fiscal/v1/insurance.surface.json @@ -119,7 +119,7 @@ "surface_key": "policy_liabilities", "statement": "balance", "label": "Policy Liabilities", - "category": "surface", + "category": "noncurrent_liabilities", "order": 80, "unit": "currency", "rollup_policy": "aggregate_children", @@ -145,17 +145,19 @@ "surface_key": "deferred_acquisition_costs", "statement": "balance", "label": "Deferred Acquisition Costs", - "category": "surface", + "category": "noncurrent_assets", "order": 90, "unit": "currency", "rollup_policy": "aggregate_children", "allowed_source_concepts": [ "us-gaap:DeferredPolicyAcquisitionCosts", - "us-gaap:DeferredAcquisitionCosts" + "us-gaap:DeferredAcquisitionCosts", + "us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired" ], "allowed_authoritative_concepts": [ "us-gaap:DeferredPolicyAcquisitionCosts", - "us-gaap:DeferredAcquisitionCosts" + "us-gaap:DeferredAcquisitionCosts", + "us-gaap:DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired" ], "formula_fallback": null, "detail_grouping_policy": "group_all_children", @@ -165,7 +167,7 @@ "surface_key": "total_assets", "statement": "balance", "label": "Total Assets", - "category": "surface", + "category": "derived", "order": 100, "unit": "currency", "rollup_policy": "direct_only", @@ -179,7 +181,7 @@ "surface_key": "total_liabilities", "statement": "balance", "label": "Total Liabilities", - "category": "surface", + "category": "derived", "order": 110, "unit": "currency", "rollup_policy": "direct_only", @@ -193,7 +195,7 @@ "surface_key": "total_equity", "statement": "balance", "label": "Total Equity", - "category": "surface", + "category": "equity", "order": 120, "unit": "currency", "rollup_policy": "direct_only", diff --git a/rust/taxonomy/fiscal/v1/reit_real_estate.surface.json b/rust/taxonomy/fiscal/v1/reit_real_estate.surface.json index 1891ad7..194fb8d 100644 --- a/rust/taxonomy/fiscal/v1/reit_real_estate.surface.json +++ b/rust/taxonomy/fiscal/v1/reit_real_estate.surface.json @@ -78,7 +78,7 @@ "surface_key": "investment_property", "statement": "balance", "label": "Investment Property", - "category": "surface", + "category": "noncurrent_assets", "order": 40, "unit": "currency", "rollup_policy": "aggregate_children", @@ -99,7 +99,7 @@ "surface_key": "total_assets", "statement": "balance", "label": "Total Assets", - "category": "surface", + "category": "derived", "order": 90, "unit": "currency", "rollup_policy": "direct_only", @@ -113,7 +113,7 @@ "surface_key": "total_liabilities", "statement": "balance", "label": "Total Liabilities", - "category": "surface", + "category": "derived", "order": 100, "unit": "currency", "rollup_policy": "direct_only", @@ -127,7 +127,7 @@ "surface_key": "total_equity", "statement": "balance", "label": "Total Equity", - "category": "surface", + "category": "equity", "order": 110, "unit": "currency", "rollup_policy": "direct_only", @@ -136,6 +136,25 @@ "formula_fallback": null, "detail_grouping_policy": "top_level_only", "materiality_policy": "balance_default" + }, + { + "surface_key": "capital_expenditures", + "statement": "cash_flow", + "label": "Capital Expenditures", + "category": "investing", + "order": 130, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:PaymentsToAcquireCommercialRealEstate", + "us-gaap:PaymentsForCapitalImprovements", + "us-gaap:PaymentsForDepositsOnRealEstateAcquisitions" + ], + "allowed_authoritative_concepts": [], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "cash_flow_default", + "sign_transform": "invert" } ] } diff --git a/scripts/compare-fiscal-ai-statements.ts b/scripts/compare-fiscal-ai-statements.ts index a6f0283..2ff1e42 100644 --- a/scripts/compare-fiscal-ai-statements.ts +++ b/scripts/compare-fiscal-ai-statements.ts @@ -5,7 +5,7 @@ import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine'; import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types'; type ComparisonTarget = { - statement: Extract; + statement: Extract; surfaceKey: string; fiscalAiLabels: string[]; allowNotMeaningful?: boolean; @@ -46,7 +46,7 @@ type FiscalAiTable = { }; type ComparisonRow = { - statement: Extract; + statement: Extract; surfaceKey: string; fiscalAiLabel: string | null; fiscalAiValueM: number | null; @@ -89,6 +89,11 @@ const CASES: CompanyCase[] = [ surfaceKey: 'net_income', fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income'] }, + { statement: 'balance', surfaceKey: 'current_assets', fiscalAiLabels: ['Current Assets', 'Total Current Assets'] }, + { statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] }, + { statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Operating Cash Flow', 'Net Cash from Operations', 'Net Cash Provided by Operating'] }, + { statement: 'cash_flow', surfaceKey: 'capital_expenditures', fiscalAiLabels: ['Capital Expenditures', 'Capital Expenditure'] }, + { statement: 'cash_flow', surfaceKey: 'free_cash_flow', fiscalAiLabels: ['Free Cash Flow', 'Levered Free Cash Flow'] }, ] }, { @@ -113,6 +118,11 @@ const CASES: CompanyCase[] = [ surfaceKey: 'net_income', fiscalAiLabels: ['Net Income to Common', 'Net Income Attributable to Common Shareholders', 'Net Income'] }, + { statement: 'balance', surfaceKey: 'loans', fiscalAiLabels: ['Net Loans', 'Loans', 'Loans Receivable'] }, + { statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] }, + { statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Net Cash from Operating Activities', 'Net Cash Provided by Operating'] }, + { statement: 'cash_flow', surfaceKey: 'investing_cash_flow', fiscalAiLabels: ['Cash from Investing Activities', 'Net Cash from Investing Activities', 'Net Cash Provided by Investing'] }, + { statement: 'cash_flow', surfaceKey: 'financing_cash_flow', fiscalAiLabels: ['Cash from Financing Activities', 'Net Cash from Financing Activities', 'Net Cash Provided by Financing'] }, ] }, { @@ -137,6 +147,18 @@ const CASES: CompanyCase[] = [ surfaceKey: 'net_income', fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income'] }, + { + statement: 'balance', + surfaceKey: 'deferred_acquisition_costs', + fiscalAiLabels: [ + 'Deferred Acquisition Costs', + 'Deferred Policy Acquisition Costs', + 'Deferred Policy Acquisition Costs and Value of Business Acquired' + ] + }, + { statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] }, + { statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Operating Cash Flow', 'Net Cash from Operations', 'Net Cash Provided by Operating'] }, + { statement: 'cash_flow', surfaceKey: 'free_cash_flow', fiscalAiLabels: ['Free Cash Flow', 'Levered Free Cash Flow'] }, ] }, { @@ -154,7 +176,22 @@ const CASES: CompanyCase[] = [ statement: 'income', surfaceKey: 'net_income', fiscalAiLabels: ['Net Income Attributable to Common Shareholders', 'Consolidated Net Income', 'Net Income'] - } + }, + { + statement: 'balance', + surfaceKey: 'investment_property', + fiscalAiLabels: [ + 'Investment Property', + 'Investment Properties', + 'Real Estate Investment Property, Net', + 'Real Estate Investment Property, at Cost', + 'Total real estate held for investment, at cost' + ] + }, + { statement: 'balance', surfaceKey: 'total_assets', fiscalAiLabels: ['Total Assets'] }, + { statement: 'cash_flow', surfaceKey: 'operating_cash_flow', fiscalAiLabels: ['Cash from Operating Activities', 'Operating Cash Flow', 'Net Cash from Operations', 'Net Cash Provided by Operating'] }, + { statement: 'cash_flow', surfaceKey: 'capital_expenditures', fiscalAiLabels: ['Capital Expenditures', 'Capital Expenditure'] }, + { statement: 'cash_flow', surfaceKey: 'free_cash_flow', fiscalAiLabels: ['Free Cash Flow', 'Levered Free Cash Flow'] } ] }, { @@ -184,6 +221,9 @@ const CASES: CompanyCase[] = [ ]; function parseTickerFilter(argv: string[]) { + let ticker: string | null = null; + let statement: Extract | null = null; + for (const arg of argv) { if (arg === '--help' || arg === '-h') { console.log('Compare live Fiscal.ai standardized statement rows against local sidecar output.'); @@ -191,16 +231,26 @@ function parseTickerFilter(argv: string[]) { console.log('Usage:'); console.log(' bun run scripts/compare-fiscal-ai-statements.ts'); console.log(' bun run scripts/compare-fiscal-ai-statements.ts --ticker=MSFT'); + console.log(' bun run scripts/compare-fiscal-ai-statements.ts --statement=balance'); + console.log(' bun run scripts/compare-fiscal-ai-statements.ts --statement=cash_flow'); process.exit(0); } if (arg.startsWith('--ticker=')) { const value = arg.slice('--ticker='.length).trim().toUpperCase(); - return value.length > 0 ? value : null; + ticker = value.length > 0 ? value : null; + continue; + } + + if (arg.startsWith('--statement=')) { + const value = arg.slice('--statement='.length).trim().toLowerCase().replace(/-/g, '_'); + if (value === 'income' || value === 'balance' || value === 'cash_flow') { + statement = value; + } } } - return null; + return { ticker, statement }; } function normalizeLabel(value: string) { @@ -295,10 +345,98 @@ function chooseInstantPeriodId(result: TaxonomyHydrationResult) { return instantPeriods[0]?.id ?? null; } +function parseColumnLabelPeriodEnd(columnLabel: string) { + const match = columnLabel.match(/^([A-Za-z]{3})\s+'?(\d{2,4})$/); + if (!match) { + return null; + } + + const [, monthToken, yearToken] = match; + const monthMap: Record = { + jan: 0, + feb: 1, + mar: 2, + apr: 3, + may: 4, + jun: 5, + jul: 6, + aug: 7, + sep: 8, + oct: 9, + nov: 10, + dec: 11 + }; + const month = monthMap[monthToken.toLowerCase()]; + if (month === undefined) { + return null; + } + + const parsedYear = Number.parseInt(yearToken, 10); + if (!Number.isFinite(parsedYear)) { + return null; + } + + const year = yearToken.length === 2 ? 2000 + parsedYear : parsedYear; + return { month, year }; +} + +function choosePeriodIdForColumnLabel( + result: TaxonomyHydrationResult, + statement: Extract, + columnLabel: string +) { + const parsed = parseColumnLabelPeriodEnd(columnLabel); + if (!parsed) { + return null; + } + + const matchingPeriods = result.periods + .filter((period): period is ResultPeriod => { + const end = periodEnd(period as ResultPeriod); + if (!end) { + return false; + } + const endDate = new Date(end); + if (Number.isNaN(endDate.getTime())) { + return false; + } + + const periodMatchesStatement = statement === 'balance' + ? !periodStart(period as ResultPeriod) + : Boolean(periodStart(period as ResultPeriod)); + if (!periodMatchesStatement) { + return false; + } + + return endDate.getUTCFullYear() === parsed.year && endDate.getUTCMonth() === parsed.month; + }) + .sort((left, right) => { + if (statement !== 'balance') { + const leftStart = periodStart(left); + const rightStart = periodStart(right); + const leftDuration = leftStart + ? Math.round((Date.parse(periodEnd(left) as string) - Date.parse(leftStart)) / (1000 * 60 * 60 * 24)) + : -1; + const rightDuration = rightStart + ? Math.round((Date.parse(periodEnd(right) as string) - Date.parse(rightStart)) / (1000 * 60 * 60 * 24)) + : -1; + + if (leftDuration !== rightDuration) { + return rightDuration - leftDuration; + } + } + + return Date.parse(periodEnd(right) as string) - Date.parse(periodEnd(left) as string); + }); + + return matchingPeriods[0]?.id ?? null; +} + function findSurfaceValue( result: TaxonomyHydrationResult, - statement: Extract, - surfaceKey: string + statement: Extract, + surfaceKey: string, + referenceColumnLabel?: string ) { const rows = result.surface_rows[statement] ?? []; const row = rows.find((entry) => entry.key === surfaceKey) ?? null; @@ -306,9 +444,11 @@ function findSurfaceValue( return { row: null, value: null }; } - const periodId = statement === 'balance' + const periodId = (referenceColumnLabel + ? choosePeriodIdForColumnLabel(result, statement, referenceColumnLabel) + : null) ?? (statement === 'balance' ? chooseInstantPeriodId(result) - : chooseDurationPeriodId(result); + : chooseDurationPeriodId(result)); if (periodId) { const directValue = row.values[periodId]; @@ -412,14 +552,24 @@ async function fetchLatestAnnualFiling(company: CompanyCase): Promise { - const pagePath = statement === 'income' ? 'income-statement' : 'balance-sheet'; + const pagePath = statement === 'income' + ? 'income-statement' + : statement === 'balance' + ? 'balance-sheet' + : 'cash-flow-statement'; const url = `https://fiscal.ai/company/${exchangeTicker}/financials/${pagePath}/annual/?templateType=standardized`; await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120_000 }); await page.waitForSelector('table', { timeout: 120_000 }); await page.waitForTimeout(2_500); + await page.evaluate(async () => { + window.scrollTo(0, document.body.scrollHeight); + await new Promise((resolve) => setTimeout(resolve, 750)); + window.scrollTo(0, 0); + await new Promise((resolve) => setTimeout(resolve, 250)); + }); return await page.evaluate(() => { function normalizeLabel(value: string) { @@ -452,45 +602,52 @@ async function scrapeFiscalAiTable( return Number.isFinite(parsed) ? (negative ? -Math.abs(parsed) : parsed) : null; } - const table = document.querySelector('table'); - if (!table) { + const tables = Array.from(document.querySelectorAll('table')); + if (tables.length === 0) { throw new Error('Fiscal.ai table not found'); } - const headerCells = Array.from(table.querySelectorAll('tr:first-child th, tr:first-child td')) - .map((cell) => cell.textContent?.trim() ?? '') - .filter((value) => value.length > 0); + const rowsByLabel = new Map(); + let columnLabel = 'unknown'; - const annualColumnIndex = headerCells.findIndex((value, index) => index > 0 && value !== 'LTM'); - if (annualColumnIndex < 0) { - throw new Error(`Could not locate latest annual column in headers: ${headerCells.join(' | ')}`); - } + for (const table of tables) { + const headerCells = Array.from(table.querySelectorAll('tr:first-child th, tr:first-child td')) + .map((cell) => cell.textContent?.trim() ?? '') + .filter((value) => value.length > 0); + const annualColumnIndex = headerCells.findIndex((value, index) => index > 0 && value !== 'LTM'); + if (annualColumnIndex < 0) { + continue; + } - const rows = Array.from(table.querySelectorAll('tr')) - .slice(1) - .map((row) => { + if (columnLabel === 'unknown') { + columnLabel = headerCells[annualColumnIndex] ?? 'unknown'; + } + + for (const row of Array.from(table.querySelectorAll('tr')).slice(1)) { const cells = Array.from(row.querySelectorAll('td')); if (cells.length <= annualColumnIndex) { - return null; + continue; } const label = cells[0]?.textContent?.trim() ?? ''; const valueText = cells[annualColumnIndex]?.textContent?.trim() ?? ''; if (!label) { - return null; + continue; } - return { + rowsByLabel.set(label, { label, normalizedLabel: normalizeLabel(label), valueText, value: parseDisplayedNumber(valueText) - }; - }) - .filter((entry): entry is FiscalAiTableRow => entry !== null); + }); + } + } + + const rows = Array.from(rowsByLabel.values()); return { - columnLabel: headerCells[annualColumnIndex] ?? 'unknown', + columnLabel, rows }; }); @@ -536,7 +693,7 @@ function compareRow( ): ComparisonRow { const fiscalAiRow = findFiscalAiRow(fiscalAiTable.rows, target.fiscalAiLabels); const fiscalAiValueM = fiscalAiRow?.value ?? null; - const ourSurface = findSurfaceValue(result, target.statement, target.surfaceKey); + const ourSurface = findSurfaceValue(result, target.statement, target.surfaceKey, fiscalAiTable.columnLabel); const ourValueM = roundMillions(ourSurface.value); const absDiffM = absoluteDiff(ourValueM, fiscalAiValueM); const relDiffValue = relativeDiff(ourValueM, fiscalAiValueM); @@ -587,17 +744,34 @@ async function compareCase(page: import('@playwright/test').Page, company: Compa throw new Error(`${company.ticker} parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ''}`); } - const incomeTable = await scrapeFiscalAiTable(page, company.exchangeTicker, 'income'); - const balanceTable = await scrapeFiscalAiTable(page, company.exchangeTicker, 'balance'); + const statementKinds = new Set(company.comparisons.map((target) => target.statement)); + const incomeTable = statementKinds.has('income') + ? await scrapeFiscalAiTable(page, company.exchangeTicker, 'income') + : null; + const balanceTable = statementKinds.has('balance') + ? await scrapeFiscalAiTable(page, company.exchangeTicker, 'balance') + : null; + const cashFlowTable = statementKinds.has('cash_flow') + ? await scrapeFiscalAiTable(page, company.exchangeTicker, 'cash_flow') + : null; const rows = company.comparisons.map((target) => { - const table = target.statement === 'income' ? incomeTable : balanceTable; + const table = target.statement === 'income' + ? incomeTable + : target.statement === 'balance' + ? balanceTable + : cashFlowTable; + if (!table) { + throw new Error(`Missing scraped table for ${target.statement}`); + } return compareRow(target, result, table); }); - const failures = rows.filter((row) => row.status === 'fail' || row.status === 'missing_ours'); + const failures = rows.filter( + (row) => row.status === 'fail' || row.status === 'missing_ours' || row.status === 'missing_reference' + ); console.log( - `[compare-fiscal-ai] ${company.ticker} filing=${filing.accessionNumber} fiscal_pack=${result.fiscal_pack ?? 'null'} income_column="${incomeTable.columnLabel}" balance_column="${balanceTable.columnLabel}" pass=${rows.length - failures.length}/${rows.length}` + `[compare-fiscal-ai] ${company.ticker} filing=${filing.accessionNumber} fiscal_pack=${result.fiscal_pack ?? 'null'} income_column="${incomeTable?.columnLabel ?? 'n/a'}" balance_column="${balanceTable?.columnLabel ?? 'n/a'}" cash_flow_column="${cashFlowTable?.columnLabel ?? 'n/a'}" pass=${rows.length - failures.length}/${rows.length}` ); for (const row of rows) { console.log( @@ -625,18 +799,28 @@ async function compareCase(page: import('@playwright/test').Page, company: Compa async function main() { process.env.XBRL_ENGINE_TIMEOUT_MS = process.env.XBRL_ENGINE_TIMEOUT_MS ?? '180000'; - const tickerFilter = parseTickerFilter(process.argv.slice(2)); - const selectedCases = tickerFilter - ? CASES.filter((entry) => entry.ticker === tickerFilter) - : CASES; + const filters = parseTickerFilter(process.argv.slice(2)); + const selectedCases = (filters.ticker + ? CASES.filter((entry) => entry.ticker === filters.ticker) + : CASES + ) + .map((entry) => ({ + ...entry, + comparisons: filters.statement + ? entry.comparisons.filter((target) => target.statement === filters.statement) + : entry.comparisons + })) + .filter((entry) => entry.comparisons.length > 0); if (selectedCases.length === 0) { - console.error(`[compare-fiscal-ai] unknown ticker: ${tickerFilter}`); + console.error( + `[compare-fiscal-ai] no matching cases for ticker=${filters.ticker ?? 'all'} statement=${filters.statement ?? 'all'}` + ); process.exitCode = 1; return; } - const browser = await chromium.launch({ headless: false }); + const browser = await chromium.launch({ headless: true }); const page = await browser.newPage({ userAgent: BROWSER_USER_AGENT });