Neon-Desk/rust/fiscal-xbrl-core/src/lib.rs

use anyhow::{anyhow, Context, Result};
use once_cell::sync::Lazy;
use regex::Regex;
use reqwest::blocking::Client;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap, HashSet};

mod kpi_mapper;
mod metrics;
mod pack_selector;
mod surface_mapper;
mod taxonomy_loader;
mod universal_income;

use taxonomy_loader::{ComputationSpec, ComputedDefinition};

#[cfg(feature = "with-crabrl")]
use crabrl as _;

pub const PARSER_ENGINE: &str = "fiscal-xbrl";
pub const PARSER_VERSION: &str = env!("CARGO_PKG_VERSION");

static CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?context>"#).unwrap()
});
static UNIT_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?unit>"#).unwrap()
});
static FACT_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>(.*?)</[a-zA-Z0-9_\-]+:[a-zA-Z0-9_\-.]+>"#).unwrap()
});
static EXPLICIT_MEMBER_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?explicitMember>"#).unwrap()
});
static TYPED_MEMBER_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?typedMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?typedMember>"#).unwrap()
});
static IDENTIFIER_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?identifier\b[^>]*\bscheme=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?identifier>"#).unwrap()
});
static SEGMENT_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?segment>"#)
        .unwrap()
});
static SCENARIO_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?scenario>"#)
        .unwrap()
});
static START_DATE_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?startDate>(.*?)</(?:[a-z0-9_\-]+:)?startDate>"#).unwrap()
});
static END_DATE_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?endDate>(.*?)</(?:[a-z0-9_\-]+:)?endDate>"#).unwrap()
});
static INSTANT_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?instant>(.*?)</(?:[a-z0-9_\-]+:)?instant>"#).unwrap()
});
static MEASURE_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?measure>(.*?)</(?:[a-z0-9_\-]+:)?measure>"#).unwrap()
});
static LABEL_LINK_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?labelLink>"#)
        .unwrap()
});
static PRESENTATION_LINK_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>(.*?)</(?:[a-z0-9_\-]+:)?presentationLink>"#).unwrap()
});
static LOC_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?loc\b([^>]*)/?>(?:</(?:[a-z0-9_\-]+:)?loc>)?"#).unwrap()
});
static LABEL_RESOURCE_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?label\b([^>]*)>(.*?)</(?:[a-z0-9_\-]+:)?label>"#).unwrap()
});
static LABEL_ARC_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)/?>(?:</(?:[a-z0-9_\-]+:)?labelArc>)?"#)
        .unwrap()
});
static PRESENTATION_ARC_RE: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)/?>(?:</(?:[a-z0-9_\-]+:)?presentationArc>)?"#).unwrap()
});
static ATTR_RE: Lazy<Regex> =
    Lazy::new(|| Regex::new(r#"([a-zA-Z0-9:_\-]+)=["']([^"']+)["']"#).unwrap());

#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct HydrateFilingRequest {
    pub filing_id: i64,
    pub ticker: String,
    pub cik: String,
    pub accession_number: String,
    pub filing_date: String,
    pub filing_type: String,
    pub filing_url: Option<String>,
    pub primary_document: Option<String>,
    pub cache_dir: Option<String>,
}

#[derive(Debug, Serialize)]
pub struct HydrateFilingResponse {
    pub filing_id: i64,
    pub ticker: String,
    pub filing_date: String,
    pub filing_type: String,
    pub parse_status: String,
    pub parse_error: Option<String>,
    pub source: String,
    pub parser_engine: String,
    pub parser_version: String,
    pub taxonomy_regime: String,
    pub fiscal_pack: Option<String>,
    pub periods: Vec<PeriodOutput>,
    pub faithful_rows: StatementRowMap,
    pub statement_rows: StatementRowMap,
    pub surface_rows: SurfaceRowMap,
    pub detail_rows: DetailRowStatementMap,
    pub kpi_rows: Vec<KpiRowOutput>,
    pub computed_definitions: Vec<ComputedDefinitionOutput>,
    pub contexts: Vec<ContextOutput>,
    pub derived_metrics: FilingMetrics,
    pub validation_result: ValidationResultOutput,
    pub facts_count: usize,
    pub concepts_count: usize,
    pub dimensions_count: usize,
    pub assets: Vec<AssetOutput>,
    pub concepts: Vec<ConceptOutput>,
    pub facts: Vec<FactOutput>,
    pub metric_validations: Vec<MetricValidationOutput>,
    pub normalization_summary: NormalizationSummaryOutput,
}

#[derive(Debug, Clone, Serialize, Default)]
pub struct FilingMetrics {
    pub revenue: Option<f64>,
    #[serde(rename = "netIncome")]
    pub net_income: Option<f64>,
    #[serde(rename = "totalAssets")]
    pub total_assets: Option<f64>,
    pub cash: Option<f64>,
    pub debt: Option<f64>,
}

#[derive(Debug, Clone, Serialize)]
pub struct ValidationResultOutput {
    pub status: String,
    pub checks: Vec<serde_json::Value>,
    #[serde(rename = "validatedAt")]
    pub validated_at: Option<String>,
}

#[derive(Debug, Clone, Serialize)]
pub struct AssetOutput {
    pub asset_type: String,
    pub name: String,
    pub url: String,
    pub size_bytes: Option<i64>,
    pub score: Option<f64>,
    pub is_selected: bool,
}

#[derive(Debug, Clone, Serialize)]
pub struct PeriodOutput {
    pub id: String,
    pub filing_id: i64,
    pub accession_number: String,
    pub filing_date: String,
    pub period_start: Option<String>,
    pub period_end: Option<String>,
    pub filing_type: String,
    pub period_label: String,
}

#[derive(Debug, Clone, Serialize)]
pub struct ContextOutput {
    pub context_id: String,
    pub entity_identifier: Option<String>,
    pub entity_scheme: Option<String>,
    pub period_start: Option<String>,
    pub period_end: Option<String>,
    pub period_instant: Option<String>,
    pub segment_json: Option<serde_json::Value>,
    pub scenario_json: Option<serde_json::Value>,
}

#[derive(Debug, Clone, Serialize)]
pub struct StatementRowOutput {
    pub key: String,
    pub label: String,
    pub concept_key: String,
    pub qname: String,
    pub namespace_uri: String,
    pub local_name: String,
    pub is_extension: bool,
    pub statement: String,
    pub role_uri: Option<String>,
    pub order: i64,
    pub depth: i64,
    pub parent_key: Option<String>,
    pub values: BTreeMap<String, Option<f64>>,
    pub units: BTreeMap<String, Option<String>>,
    pub has_dimensions: bool,
    pub source_fact_ids: Vec<i64>,
}

#[derive(Debug, Clone, Serialize)]
pub struct SurfaceRowOutput {
    pub key: String,
    pub label: String,
    pub category: String,
    pub template_section: String,
    pub order: i64,
    pub unit: String,
    pub values: BTreeMap<String, Option<f64>>,
    pub source_concepts: Vec<String>,
    pub source_row_keys: Vec<String>,
    pub source_fact_ids: Vec<i64>,
    pub formula_key: Option<String>,
    pub has_dimensions: bool,
    pub resolved_source_row_keys: BTreeMap<String, Option<String>>,
    pub statement: Option<String>,
    pub detail_count: Option<i64>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub resolution_method: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub confidence: Option<String>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub warning_codes: Vec<String>,
}

#[derive(Debug, Clone, Serialize)]
pub struct DetailRowOutput {
    pub key: String,
    pub parent_surface_key: String,
    pub label: String,
    pub concept_key: String,
    pub qname: String,
    pub namespace_uri: String,
    pub local_name: String,
    pub unit: Option<String>,
    pub values: BTreeMap<String, Option<f64>>,
    pub source_fact_ids: Vec<i64>,
    pub is_extension: bool,
    pub dimensions_summary: Vec<String>,
    pub residual_flag: bool,
}

#[derive(Debug, Clone, Serialize)]
pub struct KpiRowOutput {
    pub key: String,
    pub label: String,
    pub category: String,
    pub unit: String,
    pub order: i64,
    pub segment: Option<String>,
    pub axis: Option<String>,
    pub member: Option<String>,
    pub values: BTreeMap<String, Option<f64>>,
    pub source_concepts: Vec<String>,
    pub source_fact_ids: Vec<i64>,
    pub provenance_type: String,
    pub has_dimensions: bool,
}

#[derive(Debug, Clone, Serialize)]
pub struct ComputedDefinitionOutput {
    pub key: String,
    pub label: String,
    pub category: String,
    pub order: i64,
    pub unit: String,
    pub computation: ComputationSpecOutput,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub supported_cadences: Vec<String>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub requires_external_data: Vec<String>,
}

#[derive(Debug, Clone, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ComputationSpecOutput {
    Ratio {
        numerator: String,
        denominator: String,
    },
    YoyGrowth {
        source: String,
    },
    Cagr {
        source: String,
        years: i64,
    },
    PerShare {
        source: String,
        shares_key: String,
    },
    Simple {
        formula: String,
    },
}

impl From<&ComputationSpec> for ComputationSpecOutput {
    fn from(spec: &ComputationSpec) -> Self {
        match spec {
            ComputationSpec::Ratio {
                numerator,
                denominator,
            } => ComputationSpecOutput::Ratio {
                numerator: numerator.clone(),
                denominator: denominator.clone(),
            },
            ComputationSpec::YoyGrowth { source } => ComputationSpecOutput::YoyGrowth {
                source: source.clone(),
            },
            ComputationSpec::Cagr { source, years } => ComputationSpecOutput::Cagr {
                source: source.clone(),
                years: *years,
            },
            ComputationSpec::PerShare { source, shares_key } => ComputationSpecOutput::PerShare {
                source: source.clone(),
                shares_key: shares_key.clone(),
            },
            ComputationSpec::Simple { formula } => ComputationSpecOutput::Simple {
                formula: formula.clone(),
            },
        }
    }
}

impl From<&ComputedDefinition> for ComputedDefinitionOutput {
    fn from(def: &ComputedDefinition) -> Self {
        ComputedDefinitionOutput {
            key: def.key.clone(),
            label: def.label.clone(),
            category: def.category.clone(),
            order: def.order,
            unit: def.unit.clone(),
            computation: ComputationSpecOutput::from(&def.computation),
            supported_cadences: def.supported_cadences.clone(),
            requires_external_data: def.requires_external_data.clone(),
        }
    }
}

#[derive(Debug, Clone, Serialize)]
pub struct ConceptOutput {
    pub concept_key: String,
    pub qname: String,
    pub namespace_uri: String,
    pub local_name: String,
    pub label: Option<String>,
    pub is_extension: bool,
    pub balance: Option<String>,
    pub period_type: Option<String>,
    pub data_type: Option<String>,
    pub statement_kind: Option<String>,
    pub role_uri: Option<String>,
    pub authoritative_concept_key: Option<String>,
    pub mapping_method: Option<String>,
    pub surface_key: Option<String>,
    pub detail_parent_surface_key: Option<String>,
    pub kpi_key: Option<String>,
    pub residual_flag: bool,
    pub presentation_order: Option<f64>,
    pub presentation_depth: Option<i64>,
    pub parent_concept_key: Option<String>,
    pub is_abstract: bool,
}

#[derive(Debug, Clone, Serialize)]
pub struct FactOutput {
    pub concept_key: String,
    pub qname: String,
    pub namespace_uri: String,
    pub local_name: String,
    pub data_type: Option<String>,
    pub statement_kind: Option<String>,
    pub role_uri: Option<String>,
    pub authoritative_concept_key: Option<String>,
    pub mapping_method: Option<String>,
    pub surface_key: Option<String>,
    pub detail_parent_surface_key: Option<String>,
    pub kpi_key: Option<String>,
    pub residual_flag: bool,
    pub context_id: String,
    pub unit: Option<String>,
    pub decimals: Option<String>,
    pub precision: Option<String>,
    pub nil: bool,
    pub value_num: f64,
    pub period_start: Option<String>,
    pub period_end: Option<String>,
    pub period_instant: Option<String>,
    pub dimensions: Vec<DimensionOutput>,
    pub is_dimensionless: bool,
    pub source_file: Option<String>,
}

#[derive(Debug, Clone, Serialize)]
pub struct DimensionOutput {
    pub axis: String,
    pub member: String,
}

#[derive(Debug, Clone, Serialize)]
pub struct MetricValidationOutput {
    pub metric_key: String,
    pub taxonomy_value: Option<f64>,
    pub llm_value: Option<f64>,
    pub absolute_diff: Option<f64>,
    pub relative_diff: Option<f64>,
    pub status: String,
    pub evidence_pages: Vec<i64>,
    pub pdf_url: Option<String>,
    pub provider: Option<String>,
    pub model: Option<String>,
    pub error: Option<String>,
}

#[derive(Debug, Clone, Serialize, Default)]
pub struct NormalizationSummaryOutput {
    pub surface_row_count: usize,
    pub detail_row_count: usize,
    pub kpi_row_count: usize,
    pub unmapped_row_count: usize,
    pub material_unmapped_row_count: usize,
    pub warnings: Vec<String>,
}

pub type StatementRowMap = BTreeMap<String, Vec<StatementRowOutput>>;
pub type SurfaceRowMap = BTreeMap<String, Vec<SurfaceRowOutput>>;
pub type DetailRowStatementMap = BTreeMap<String, BTreeMap<String, Vec<DetailRowOutput>>>;

#[derive(Debug, Clone)]
struct ParsedContext {
    id: String,
    entity_identifier: Option<String>,
    entity_scheme: Option<String>,
    period_start: Option<String>,
    period_end: Option<String>,
    period_instant: Option<String>,
    dimensions: Vec<DimensionOutput>,
    segment: Option<serde_json::Value>,
    scenario: Option<serde_json::Value>,
}

#[derive(Debug, Clone)]
struct ParsedUnit {
    measure: Option<String>,
}

#[derive(Debug, Clone)]
struct ParsedFact {
    concept_key: String,
    qname: String,
    namespace_uri: String,
    local_name: String,
    data_type: Option<String>,
    context_id: String,
    unit: Option<String>,
    decimals: Option<String>,
    precision: Option<String>,
    nil: bool,
    value: f64,
    period_start: Option<String>,
    period_end: Option<String>,
    period_instant: Option<String>,
    dimensions: Vec<DimensionOutput>,
    is_dimensionless: bool,
    source_file: Option<String>,
}

#[derive(Debug, Clone)]
struct PresentationNode {
    concept_key: String,
    role_uri: String,
    order: f64,
    depth: i64,
    parent_concept_key: Option<String>,
    is_abstract: bool,
}

pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingResponse> {
    let client = Client::builder()
        .user_agent("Fiscal Clone <support@fiscal.local>")
        .build()
        .context("unable to build HTTP client")?;

    let discovered = discover_filing_assets(&input, &client)?;
    let empty_rows = empty_statement_row_map();
    let empty_surface_rows = empty_surface_row_map();
    let empty_detail_rows = empty_detail_row_map();
    let validation_result = ValidationResultOutput {
        status: "not_run".to_string(),
        checks: vec![],
        validated_at: None,
    };

    let Some(instance_asset) = discovered
        .assets
        .iter()
        .find(|asset| asset.asset_type == "instance" && asset.is_selected)
        .cloned()
    else {
        return Ok(HydrateFilingResponse {
            filing_id: input.filing_id,
            ticker: input.ticker.to_uppercase(),
            filing_date: input.filing_date,
            filing_type: input.filing_type,
            parse_status: "failed".to_string(),
            parse_error: Some("No XBRL instance found".to_string()),
            source: "legacy_html_fallback".to_string(),
            parser_engine: PARSER_ENGINE.to_string(),
            parser_version: PARSER_VERSION.to_string(),
            taxonomy_regime: "unknown".to_string(),
            fiscal_pack: Some("core".to_string()),
            periods: vec![],
            faithful_rows: empty_rows.clone(),
            statement_rows: empty_rows,
            surface_rows: empty_surface_rows,
            detail_rows: empty_detail_rows,
            kpi_rows: vec![],
            computed_definitions: vec![],
            contexts: vec![],
            derived_metrics: FilingMetrics::default(),
            validation_result,
            facts_count: 0,
            concepts_count: 0,
            dimensions_count: 0,
            assets: discovered.assets,
            concepts: vec![],
            facts: vec![],
            metric_validations: vec![],
            normalization_summary: NormalizationSummaryOutput {
                surface_row_count: 0,
                detail_row_count: 0,
                kpi_row_count: 0,
                unmapped_row_count: 0,
                material_unmapped_row_count: 0,
                warnings: vec![],
            },
        });
    };

    let instance_text = fetch_text(&client, &instance_asset.url)
        .context("fetch request failed for XBRL instance")?;
    let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone()));

    let mut label_by_concept = HashMap::new();
    let mut presentation = Vec::new();
    let mut source = "xbrl_instance".to_string();
    let mut parse_error = None;

    for asset in discovered.assets.iter().filter(|asset| {
        asset.is_selected && (asset.asset_type == "presentation" || asset.asset_type == "label")
    }) {
        match fetch_text(&client, &asset.url) {
            Ok(content) => {
                if asset.asset_type == "presentation" {
                    let parsed = parse_presentation_linkbase(&content);
                    if !parsed.is_empty() {
                        source = "xbrl_instance_with_linkbase".to_string();
                    }
                    presentation.extend(parsed);
                } else {
                    for (key, value) in parse_label_linkbase(&content) {
                        label_by_concept.entry(key).or_insert(value);
                    }
                }
            }
            Err(error) => {
                if parse_error.is_none() {
                    parse_error = Some(error.to_string());
                }
            }
        }
    }

    let materialized = materialize_taxonomy_statements(
        input.filing_id,
        &input.accession_number,
        &input.filing_date,
        &input.filing_type,
        &parsed_instance.facts,
        &presentation,
        &label_by_concept,
    );
    let taxonomy_regime = infer_taxonomy_regime(&parsed_instance.facts);
    let mut concepts = materialized.concepts;
    let mut facts = materialized.facts;
    let pack_selection = pack_selector::select_fiscal_pack(&materialized.statement_rows, &facts);
    let fiscal_pack = pack_selection.pack.as_str().to_string();
    let mut compact_model = surface_mapper::build_compact_surface_model(
        &materialized.periods,
        &materialized.statement_rows,
        &taxonomy_regime,
        pack_selection.pack,
        pack_selection.warnings,
    )?;
    universal_income::apply_universal_income_rows(
        &materialized.periods,
        &materialized.statement_rows,
        &facts,
        &taxonomy_regime,
        pack_selection.pack,
        &mut compact_model,
    )?;
    let kpi_result =
        kpi_mapper::build_taxonomy_kpis(&materialized.periods, &facts, pack_selection.pack)?;
    compact_model.normalization_summary.kpi_row_count = kpi_result.rows.len();
    for warning in kpi_result.warnings {
        if !compact_model
            .normalization_summary
            .warnings
            .contains(&warning)
        {
            compact_model.normalization_summary.warnings.push(warning);
        }
    }
    surface_mapper::merge_mapping_assignments(
        &mut compact_model.concept_mappings,
        kpi_result.mapping_assignments,
    );
    surface_mapper::apply_mapping_assignments(
        &mut concepts,
        &mut facts,
        &compact_model.concept_mappings,
    );

    let computed_pack = taxonomy_loader::load_computed_pack(pack_selection.pack)
        .ok()
        .or_else(|| taxonomy_loader::load_computed_pack(pack_selector::FiscalPack::Core).ok());
    let computed_definitions: Vec<ComputedDefinitionOutput> = computed_pack
        .map(|pack| {
            pack.computed
                .iter()
                .map(ComputedDefinitionOutput::from)
                .collect()
        })
        .unwrap_or_default();

    let has_rows = materialized
        .statement_rows
        .values()
        .map(|rows| rows.len())
        .sum::<usize>()
        > 0;
    let has_facts = !facts.is_empty();
    let parse_status = if has_rows && has_facts {
        "ready"
    } else if has_facts {
        "partial"
    } else {
        "failed"
    };

    Ok(HydrateFilingResponse {
        filing_id: input.filing_id,
        ticker: input.ticker.to_uppercase(),
        filing_date: input.filing_date,
        filing_type: input.filing_type,
        parse_status: parse_status.to_string(),
        parse_error: if parse_status == "failed" {
            Some(parse_error.unwrap_or_else(|| "No XBRL facts extracted".to_string()))
        } else {
            parse_error
        },
        source,
        parser_engine: PARSER_ENGINE.to_string(),
        parser_version: PARSER_VERSION.to_string(),
        taxonomy_regime,
        fiscal_pack: Some(fiscal_pack),
        periods: materialized.periods,
        faithful_rows: materialized.statement_rows.clone(),
        statement_rows: materialized.statement_rows,
        surface_rows: compact_model.surface_rows,
        detail_rows: compact_model.detail_rows,
        kpi_rows: kpi_result.rows,
        computed_definitions,
        contexts: parsed_instance.contexts,
        derived_metrics: metrics::derive_metrics(&facts),
        validation_result,
        facts_count: facts.len(),
        concepts_count: concepts.len(),
        dimensions_count: facts
            .iter()
            .flat_map(|fact| {
                fact.dimensions
                    .iter()
                    .map(|dimension| format!("{}::{}", dimension.axis, dimension.member))
            })
            .collect::<HashSet<_>>()
            .len(),
        assets: discovered.assets,
        concepts,
        facts,
        metric_validations: vec![],
        normalization_summary: compact_model.normalization_summary,
    })
}

fn infer_taxonomy_regime(facts: &[ParsedFact]) -> String {
    if facts
        .iter()
        .any(|fact| fact.namespace_uri.to_lowercase().contains("us-gaap"))
    {
        return "us-gaap".to_string();
    }

    if facts
        .iter()
        .any(|fact| fact.namespace_uri.to_lowercase().contains("ifrs"))
    {
        return "ifrs-full".to_string();
    }

    "unknown".to_string()
}

#[derive(Debug, Deserialize)]
struct FilingDirectoryPayload {
    directory: Option<FilingDirectory>,
}

#[derive(Debug, Deserialize)]
struct FilingDirectory {
    item: Option<Vec<FilingDirectoryItem>>,
}

#[derive(Debug, Deserialize)]
struct FilingDirectoryItem {
    name: Option<String>,
    size: Option<serde_json::Value>,
}

#[derive(Debug)]
struct DiscoveredAssets {
    assets: Vec<AssetOutput>,
}

fn discover_filing_assets(
    input: &HydrateFilingRequest,
    client: &Client,
) -> Result<DiscoveredAssets> {
    let Some(directory_url) = resolve_filing_directory_url(
        input.filing_url.as_deref(),
        &input.cik,
        &input.accession_number,
    ) else {
        return Ok(DiscoveredAssets { assets: vec![] });
    };

    let payload =
        fetch_json::<FilingDirectoryPayload>(client, &format!("{directory_url}index.json")).ok();
    let mut discovered = Vec::new();

    if let Some(items) =
        payload.and_then(|payload| payload.directory.and_then(|directory| directory.item))
    {
        for item in items {
            let Some(name) = item
                .name
                .map(|name| name.trim().to_string())
                .filter(|name| !name.is_empty())
            else {
                continue;
            };

            let asset_type = classify_asset_type(&name);
            let size_bytes = parse_size(item.size.as_ref());
            discovered.push(AssetOutput {
                asset_type: asset_type.to_string(),
                name: name.clone(),
                url: format!("{directory_url}{}", name.trim_start_matches('/')),
                size_bytes,
                score: None,
                is_selected: false,
            });
        }
    }

    if discovered.is_empty() {
        if let Some(filing_url) = &input.filing_url {
            discovered.push(AssetOutput {
                asset_type: if filing_url.to_lowercase().ends_with(".xml") {
                    "instance".to_string()
                } else {
                    "other".to_string()
                },
                name: input
                    .primary_document
                    .clone()
                    .or_else(|| filing_url.split('/').last().map(|part| part.to_string()))
                    .unwrap_or_else(|| "primary_document".to_string()),
                url: filing_url.clone(),
                size_bytes: None,
                score: None,
                is_selected: true,
            });
        }
    }

    let selected_instance_url = discovered
        .iter()
        .filter(|asset| asset.asset_type == "instance")
        .map(|asset| {
            (
                asset.url.clone(),
                score_instance(&asset.name, input.primary_document.as_deref()),
            )
        })
        .max_by(|left, right| {
            left.1
                .partial_cmp(&right.1)
                .unwrap_or(std::cmp::Ordering::Equal)
        })
        .map(|entry| entry.0);

    for asset in &mut discovered {
        asset.score = if asset.asset_type == "instance" {
            Some(score_instance(
                &asset.name,
                input.primary_document.as_deref(),
            ))
        } else if asset.asset_type == "pdf" {
            Some(score_pdf(&asset.name, asset.size_bytes))
        } else {
            None
        };

        asset.is_selected = match asset.asset_type.as_str() {
            "instance" => selected_instance_url
                .as_ref()
                .map(|url| url == &asset.url)
                .unwrap_or(false),
            "presentation" | "label" => true,
            _ => false,
        };
    }

    Ok(DiscoveredAssets { assets: discovered })
}

fn resolve_filing_directory_url(
    filing_url: Option<&str>,
    cik: &str,
    accession_number: &str,
) -> Option<String> {
    if let Some(filing_url) = filing_url.map(str::trim).filter(|value| !value.is_empty()) {
        if let Some(last_slash) = filing_url.rfind('/') {
            if last_slash > "https://".len() {
                return Some(filing_url[..=last_slash].to_string());
            }
        }
    }

    let cik_path = normalize_cik_for_path(cik)?;
    let accession_path = accession_number.replace('-', "");
    Some(format!(
        "https://www.sec.gov/Archives/edgar/data/{cik_path}/{accession_path}/"
    ))
}

fn normalize_cik_for_path(value: &str) -> Option<String> {
    let digits = value
        .chars()
        .filter(|char| char.is_ascii_digit())
        .collect::<String>();
    if digits.is_empty() {
        return None;
    }
    digits.parse::<u64>().ok().map(|parsed| parsed.to_string())
}

fn classify_asset_type(name: &str) -> &'static str {
    let lower = name.to_lowercase();
    if lower.ends_with(".pdf") {
        return "pdf";
    }
    if lower.ends_with(".xsd") {
        return "schema";
    }
    if lower.ends_with(".xml") {
        if lower.ends_with("_pre.xml")
            || lower.ends_with("-pre.xml")
            || lower.contains("presentation")
        {
            return "presentation";
        }
        if lower.ends_with("_lab.xml") || lower.ends_with("-lab.xml") || lower.contains("label") {
            return "label";
        }
        if lower.ends_with("_cal.xml")
            || lower.ends_with("-cal.xml")
            || lower.contains("calculation")
        {
            return "calculation";
        }
        if lower.ends_with("_def.xml")
            || lower.ends_with("-def.xml")
            || lower.contains("definition")
        {
            return "definition";
        }
        return "instance";
    }
    "other"
}

fn score_instance(name: &str, primary_document: Option<&str>) -> f64 {
    let lower = name.to_lowercase();
    let mut score = 1.0;
    if lower.ends_with("_htm.xml") {
        score += 4.0;
    }
    if lower.ends_with("_ins.xml") {
        score += 4.0;
    }
    if let Some(base_primary) = primary_document
        .map(|value| value.replace(|char: char| char == '.' || char == '-', "_"))
        .map(|value| value.to_lowercase())
    {
        let base = base_primary
            .rsplit_once('_')
            .map(|(head, _)| head.to_string())
            .unwrap_or(base_primary);
        if !base.is_empty() && lower.contains(&base) {
            score += 5.0;
        }
    }
    if lower.contains("cal")
        || lower.contains("def")
        || lower.contains("lab")
        || lower.contains("pre")
    {
        score -= 3.0;
    }
    score
}

fn score_pdf(name: &str, size_bytes: Option<i64>) -> f64 {
    let lower = name.to_lowercase();
    let mut score = 0.0;
    if ["financial", "statement", "annual", "quarter", "10k", "10q"]
        .iter()
        .any(|needle| lower.contains(needle))
    {
        score += 8.0;
    }
    if lower.contains("exhibit") {
        score -= 2.0;
    }
    if size_bytes.unwrap_or_default() > 100_000 {
        score += 1.0;
    }
    score
}

fn parse_size(value: Option<&serde_json::Value>) -> Option<i64> {
    match value {
        Some(serde_json::Value::Number(number)) => number.as_i64(),
        Some(serde_json::Value::String(raw)) => raw.parse::<i64>().ok(),
        _ => None,
    }
}

fn fetch_text(client: &Client, url: &str) -> Result<String> {
    let response = client
        .get(url)
        .send()
        .with_context(|| format!("request failed for {url}"))?;
    if !response.status().is_success() {
        return Err(anyhow!("request failed for {url} ({})", response.status()));
    }
    response
        .text()
        .with_context(|| format!("unable to read response body for {url}"))
}

fn fetch_json<T: for<'de> Deserialize<'de>>(client: &Client, url: &str) -> Result<T> {
    let response = client
        .get(url)
        .send()
        .with_context(|| format!("request failed for {url}"))?;
    if !response.status().is_success() {
        return Err(anyhow!("request failed for {url} ({})", response.status()));
    }
    response
        .json::<T>()
        .with_context(|| format!("unable to parse JSON response for {url}"))
}

struct ParsedInstance {
    contexts: Vec<ContextOutput>,
    facts: Vec<ParsedFact>,
}

fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> ParsedInstance {
    let namespaces = parse_namespace_map(raw, "xbrl");
    let context_by_id = parse_contexts(raw);
    let unit_by_id = parse_units(raw);
    let mut facts = Vec::new();

    for captures in FACT_RE.captures_iter(raw) {
        let prefix = captures
            .get(1)
            .map(|value| value.as_str().trim())
            .unwrap_or_default();
        let local_name = captures
            .get(2)
            .map(|value| value.as_str().trim())
            .unwrap_or_default();
        let attrs = captures
            .get(3)
            .map(|value| value.as_str())
            .unwrap_or_default();
        let body = decode_xml_entities(
            captures
                .get(4)
                .map(|value| value.as_str())
                .unwrap_or_default()
                .trim(),
        );

        if prefix.is_empty() || local_name.is_empty() || is_xbrl_infrastructure_prefix(prefix) {
            continue;
        }

        let attr_map = parse_attrs(attrs);
        let Some(context_id) = attr_map
            .get("contextRef")
            .cloned()
            .or_else(|| attr_map.get("contextref").cloned())
        else {
            continue;
        };

        let Some(value) = parse_number(&body) else {
            continue;
        };

        let namespace_uri = namespaces
            .get(prefix)
            .cloned()
            .unwrap_or_else(|| format!("urn:unknown:{prefix}"));
        let context = context_by_id.get(&context_id);
        let unit_ref = attr_map
            .get("unitRef")
            .cloned()
            .or_else(|| attr_map.get("unitref").cloned());
        let unit = unit_ref
            .as_ref()
            .and_then(|unit_ref| unit_by_id.get(unit_ref))
            .and_then(|unit| unit.measure.clone())
            .or(unit_ref);

        facts.push(ParsedFact {
            concept_key: format!("{namespace_uri}#{local_name}"),
            qname: format!("{prefix}:{local_name}"),
            namespace_uri,
            local_name: local_name.to_string(),
            data_type: None,
            context_id: context_id.clone(),
            unit,
            decimals: attr_map.get("decimals").cloned(),
            precision: attr_map.get("precision").cloned(),
            nil: attr_map
                .get("xsi:nil")
                .or_else(|| attr_map.get("nil"))
                .map(|value| value.eq_ignore_ascii_case("true"))
                .unwrap_or(false),
            value,
            period_start: context.and_then(|value| value.period_start.clone()),
            period_end: context.and_then(|value| value.period_end.clone()),
            period_instant: context.and_then(|value| value.period_instant.clone()),
            dimensions: context
                .map(|value| value.dimensions.clone())
                .unwrap_or_default(),
            is_dimensionless: context
                .map(|value| value.dimensions.is_empty())
                .unwrap_or(true),
            source_file: source_file.clone(),
        });
    }

    let contexts = context_by_id
        .values()
        .map(|context| ContextOutput {
            context_id: context.id.clone(),
            entity_identifier: context.entity_identifier.clone(),
            entity_scheme: context.entity_scheme.clone(),
            period_start: context.period_start.clone(),
            period_end: context.period_end.clone(),
            period_instant: context.period_instant.clone(),
            segment_json: context.segment.clone(),
            scenario_json: context.scenario.clone(),
        })
        .collect::<Vec<_>>();

    ParsedInstance { contexts, facts }
}

fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap<String, String> {
    let mut map = HashMap::new();
    let root_start = Regex::new(&format!(r#"(?is)<[^>]*{root_tag_hint}[^>]*>"#))
        .unwrap()
        .find(raw)
        .map(|match_| match_.as_str().to_string())
        .unwrap_or_else(|| raw.chars().take(1200).collect::<String>());

    for captures in Regex::new(r#"xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']"#)
        .unwrap()
        .captures_iter(&root_start)
    {
        if let (Some(prefix), Some(uri)) = (captures.get(1), captures.get(2)) {
            map.insert(
                prefix.as_str().trim().to_string(),
                uri.as_str().trim().to_string(),
            );
        }
    }

    map
}

fn parse_contexts(raw: &str) -> HashMap<String, ParsedContext> {
    let mut contexts = HashMap::new();

    for captures in CONTEXT_RE.captures_iter(raw) {
        let Some(context_id) = captures
            .get(1)
            .map(|value| value.as_str().trim().to_string())
        else {
            continue;
        };
        let block = captures
            .get(2)
            .map(|value| value.as_str())
            .unwrap_or_default();
        let (entity_identifier, entity_scheme) = IDENTIFIER_RE
            .captures(block)
            .map(|captures| {
                (
                    captures
                        .get(2)
                        .map(|value| decode_xml_entities(value.as_str().trim())),
                    captures
                        .get(1)
                        .map(|value| decode_xml_entities(value.as_str().trim())),
                )
            })
            .unwrap_or((None, None));

        let period_start = START_DATE_RE
            .captures(block)
            .and_then(|captures| captures.get(1))
            .map(|value| decode_xml_entities(value.as_str().trim()));
        let period_end = END_DATE_RE
            .captures(block)
            .and_then(|captures| captures.get(1))
            .map(|value| decode_xml_entities(value.as_str().trim()));
        let period_instant = INSTANT_RE
            .captures(block)
            .and_then(|captures| captures.get(1))
            .map(|value| decode_xml_entities(value.as_str().trim()));

        let segment = SEGMENT_RE
            .captures(block)
            .and_then(|captures| captures.get(1))
            .map(|value| parse_dimension_container(value.as_str()));
        let scenario = SCENARIO_RE
            .captures(block)
            .and_then(|captures| captures.get(1))
            .map(|value| parse_dimension_container(value.as_str()));

        let mut dimensions = Vec::new();
        if let Some(segment_value) = segment.as_ref() {
            if let Some(members) = segment_value
                .get("explicitMembers")
                .and_then(|value| value.as_array())
            {
                for member in members {
                    if let (Some(axis), Some(member_value)) = (
                        member.get("axis").and_then(|value| value.as_str()),
                        member.get("member").and_then(|value| value.as_str()),
                    ) {
                        dimensions.push(DimensionOutput {
                            axis: axis.to_string(),
                            member: member_value.to_string(),
                        });
                    }
                }
            }
        }
        if let Some(scenario_value) = scenario.as_ref() {
            if let Some(members) = scenario_value
                .get("explicitMembers")
                .and_then(|value| value.as_array())
            {
                for member in members {
                    if let (Some(axis), Some(member_value)) = (
                        member.get("axis").and_then(|value| value.as_str()),
                        member.get("member").and_then(|value| value.as_str()),
                    ) {
                        dimensions.push(DimensionOutput {
                            axis: axis.to_string(),
                            member: member_value.to_string(),
                        });
                    }
                }
            }
        }

        contexts.insert(
            context_id.clone(),
            ParsedContext {
                id: context_id,
                entity_identifier,
                entity_scheme,
                period_start,
                period_end,
                period_instant,
                dimensions,
                segment,
                scenario,
            },
        );
    }

    contexts
}

fn parse_dimension_container(raw: &str) -> serde_json::Value {
    let explicit_members = EXPLICIT_MEMBER_RE
        .captures_iter(raw)
        .filter_map(|captures| {
            Some(serde_json::json!({
                "axis": decode_xml_entities(captures.get(1)?.as_str().trim()),
                "member": decode_xml_entities(captures.get(2)?.as_str().trim())
            }))
        })
        .collect::<Vec<_>>();
    let typed_members = TYPED_MEMBER_RE
        .captures_iter(raw)
        .filter_map(|captures| {
            Some(serde_json::json!({
                "axis": decode_xml_entities(captures.get(1)?.as_str().trim()),
                "value": decode_xml_entities(captures.get(2)?.as_str().trim())
            }))
        })
        .collect::<Vec<_>>();

    serde_json::json!({
        "explicitMembers": explicit_members,
        "typedMembers": typed_members
    })
}

fn parse_units(raw: &str) -> HashMap<String, ParsedUnit> {
    let mut units = HashMap::new();
    for captures in UNIT_RE.captures_iter(raw) {
        let Some(id) = captures
            .get(1)
            .map(|value| value.as_str().trim().to_string())
        else {
            continue;
        };
        let block = captures
            .get(2)
            .map(|value| value.as_str())
            .unwrap_or_default();
        let measures = MEASURE_RE
            .captures_iter(block)
            .filter_map(|captures| captures.get(1))
            .map(|value| decode_xml_entities(value.as_str().trim()))
            .filter(|value| !value.is_empty())
            .collect::<Vec<_>>();

        let measure = if measures.len() == 1 {
            measures.first().cloned()
        } else if measures.len() > 1 {
            Some(measures.join("/"))
        } else {
            None
        };

        units.insert(id, ParsedUnit { measure });
    }
    units
}

fn is_xbrl_infrastructure_prefix(prefix: &str) -> bool {
    matches!(
        prefix.to_ascii_lowercase().as_str(),
        "xbrli" | "xlink" | "link" | "xbrldi" | "xbrldt"
    )
}

fn parse_attrs(raw: &str) -> HashMap<String, String> {
    let mut map = HashMap::new();
    for captures in ATTR_RE.captures_iter(raw) {
        if let (Some(name), Some(value)) = (captures.get(1), captures.get(2)) {
            map.insert(
                name.as_str().to_string(),
                decode_xml_entities(value.as_str()),
            );
        }
    }
    map
}

fn decode_xml_entities(value: &str) -> String {
    value
        .replace("&amp;", "&")
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&#39;", "'")
        .replace("&#160;", " ")
        .replace("&nbsp;", " ")
}

fn parse_number(raw: &str) -> Option<f64> {
    let trimmed = raw.trim();
    if trimmed.is_empty() || trimmed.chars().all(|char| char == '-') {
        return None;
    }
    let negative = trimmed.starts_with('(') && trimmed.ends_with(')');
    let normalized = Regex::new(r#"<[^>]+>"#)
        .unwrap()
        .replace_all(trimmed, " ")
        .replace(',', "")
        .replace('$', "")
        .replace(['(', ')'], "")
        .replace('\u{2212}', "-")
        .split_whitespace()
        .collect::<String>();
    let parsed = normalized.parse::<f64>().ok()?;
    Some(if negative { -parsed.abs() } else { parsed })
}

fn parse_label_linkbase(raw: &str) -> HashMap<String, String> {
    let namespaces = parse_namespace_map(raw, "linkbase");
    let mut preferred = HashMap::<String, (String, i64)>::new();

    for captures in LABEL_LINK_RE.captures_iter(raw) {
        let block = captures
            .get(1)
            .map(|value| value.as_str())
            .unwrap_or_default();
        let mut loc_by_label = HashMap::<String, String>::new();
        let mut resource_by_label = HashMap::<String, (String, Option<String>)>::new();

        for captures in LOC_RE.captures_iter(block) {
            let attrs = parse_attrs(
                captures
                    .get(1)
                    .map(|value| value.as_str())
                    .unwrap_or_default(),
            );
            let Some(label) = attrs.get("xlink:label").cloned() else {
                continue;
            };
            let Some(href) = attrs.get("xlink:href").cloned() else {
                continue;
            };
            let Some(qname) = qname_from_href(&href) else {
                continue;
            };
            let Some((concept_key, _, _)) = concept_from_qname(&qname, &namespaces) else {
                continue;
            };
            loc_by_label.insert(label, concept_key);
        }

        for captures in LABEL_RESOURCE_RE.captures_iter(block) {
            let attrs = parse_attrs(
                captures
                    .get(1)
                    .map(|value| value.as_str())
                    .unwrap_or_default(),
            );
            let Some(label) = attrs.get("xlink:label").cloned() else {
                continue;
            };
            let body = decode_xml_entities(
                captures
                    .get(2)
                    .map(|value| value.as_str())
                    .unwrap_or_default(),
            )
            .split_whitespace()
            .collect::<Vec<_>>()
            .join(" ");
            if body.is_empty() {
                continue;
            }
            resource_by_label.insert(label, (body, attrs.get("xlink:role").cloned()));
        }

        for captures in LABEL_ARC_RE.captures_iter(block) {
            let attrs = parse_attrs(
                captures
                    .get(1)
                    .map(|value| value.as_str())
                    .unwrap_or_default(),
            );
            let Some(from) = attrs.get("xlink:from").cloned() else {
                continue;
            };
            let Some(to) = attrs.get("xlink:to").cloned() else {
                continue;
            };
            let Some(concept_key) = loc_by_label.get(&from) else {
                continue;
            };
            let Some((label, role)) = resource_by_label.get(&to) else {
                continue;
            };
            let priority = label_priority(role.as_deref());
            let current = preferred.get(concept_key).cloned();
            if current
                .as_ref()
                .map(|(_, current_priority)| priority > *current_priority)
                .unwrap_or(true)
            {
                preferred.insert(concept_key.clone(), (label.clone(), priority));
            }
        }
    }

    preferred
        .into_iter()
        .map(|(key, (value, _))| (key, value))
        .collect()
}

fn parse_presentation_linkbase(raw: &str) -> Vec<PresentationNode> {
    let namespaces = parse_namespace_map(raw, "linkbase");
    let mut rows = Vec::new();

    for captures in PRESENTATION_LINK_RE.captures_iter(raw) {
        let link_attrs = parse_attrs(
            captures
                .get(1)
                .map(|value| value.as_str())
                .unwrap_or_default(),
        );
        let Some(role_uri) = link_attrs.get("xlink:role").cloned() else {
            continue;
        };
        let block = captures
            .get(2)
            .map(|value| value.as_str())
            .unwrap_or_default();
        let mut loc_by_label = HashMap::<String, (String, String, bool)>::new();
        let mut children_by_label = HashMap::<String, Vec<(String, f64)>>::new();
        let mut incoming = HashSet::<String>::new();
        let mut all_referenced = HashSet::<String>::new();

        for captures in LOC_RE.captures_iter(block) {
            let attrs = parse_attrs(
                captures
                    .get(1)
                    .map(|value| value.as_str())
                    .unwrap_or_default(),
            );
            let Some(label) = attrs.get("xlink:label").cloned() else {
                continue;
            };
            let Some(href) = attrs.get("xlink:href").cloned() else {
                continue;
            };
            let Some(qname) = qname_from_href(&href) else {
                continue;
            };
            let Some((concept_key, qname, local_name)) = concept_from_qname(&qname, &namespaces)
            else {
                continue;
            };
            loc_by_label.insert(
                label,
                (
                    concept_key,
                    qname,
                    local_name.to_ascii_lowercase().contains("abstract"),
                ),
            );
        }

        for captures in PRESENTATION_ARC_RE.captures_iter(block) {
            let attrs = parse_attrs(
                captures
                    .get(1)
                    .map(|value| value.as_str())
                    .unwrap_or_default(),
            );
            let Some(from) = attrs.get("xlink:from").cloned() else {
                continue;
            };
            let Some(to) = attrs.get("xlink:to").cloned() else {
                continue;
            };
            if !loc_by_label.contains_key(&from) || !loc_by_label.contains_key(&to) {
                continue;
            }
            let order = attrs
                .get("order")
                .and_then(|value| value.parse::<f64>().ok())
                .unwrap_or_else(|| {
                    children_by_label
                        .get(&from)
                        .map(|children| children.len() as f64 + 1.0)
                        .unwrap_or(1.0)
                });
            children_by_label
                .entry(from.clone())
                .or_default()
                .push((to.clone(), order));
            incoming.insert(to.clone());
            all_referenced.insert(from);
            all_referenced.insert(to);
        }

        let roots = all_referenced
            .iter()
            .filter(|label| !incoming.contains(*label))
            .cloned()
            .collect::<Vec<_>>();
        let mut visited = HashSet::<String>::new();

        fn dfs(
            label: &str,
            depth: i64,
            parent_label: Option<&str>,
            base_order: f64,
            role_uri: &str,
            loc_by_label: &HashMap<String, (String, String, bool)>,
            children_by_label: &HashMap<String, Vec<(String, f64)>>,
            rows: &mut Vec<PresentationNode>,
            visited: &mut HashSet<String>,
        ) {
            let Some((concept_key, _qname, is_abstract)) = loc_by_label.get(label) else {
                return;
            };
            let path_key = format!("{}::{label}::{depth}", parent_label.unwrap_or("root"));
            if !visited.insert(path_key) {
                return;
            }

            let parent_concept_key = parent_label.and_then(|parent| {
                loc_by_label
                    .get(parent)
                    .map(|(concept_key, _, _)| concept_key.clone())
            });
            rows.push(PresentationNode {
                concept_key: concept_key.clone(),
                role_uri: role_uri.to_string(),
                order: base_order,
                depth,
                parent_concept_key,
                is_abstract: *is_abstract,
            });

            let mut children = children_by_label.get(label).cloned().unwrap_or_default();
            children.sort_by(|left, right| {
                left.1
                    .partial_cmp(&right.1)
                    .unwrap_or(std::cmp::Ordering::Equal)
            });
            for (index, (child_label, _)) in children.into_iter().enumerate() {
                dfs(
                    &child_label,
                    depth + 1,
                    Some(label),
                    base_order + (index as f64 + 1.0) / 1000.0,
                    role_uri,
                    loc_by_label,
                    children_by_label,
                    rows,
                    visited,
                );
            }
        }

        for (index, root) in roots.iter().enumerate() {
            dfs(
                root,
                0,
                None,
                index as f64 + 1.0,
                &role_uri,
                &loc_by_label,
                &children_by_label,
                &mut rows,
                &mut visited,
            );
        }
    }

    rows
}

fn qname_from_href(href: &str) -> Option<String> {
    let fragment = href.split('#').nth(1).unwrap_or(href).trim();
    if fragment.is_empty() {
        return None;
    }
    let cleaned = fragment.trim_start_matches("loc_");
    if cleaned.contains(':') {
        return Some(cleaned.to_string());
    }
    cleaned
        .split_once('_')
        .map(|(prefix, local)| format!("{prefix}:{local}"))
}

fn concept_from_qname(
    qname: &str,
    namespaces: &HashMap<String, String>,
) -> Option<(String, String, String)> {
    let (prefix, local_name) = qname.split_once(':')?;
    let namespace_uri = namespaces
        .get(prefix)
        .cloned()
        .unwrap_or_else(|| format!("urn:unknown:{prefix}"));
    Some((
        format!("{namespace_uri}#{local_name}"),
        qname.to_string(),
        local_name.to_string(),
    ))
}

fn label_priority(role: Option<&str>) -> i64 {
    let normalized = role.unwrap_or_default().to_ascii_lowercase();
    if normalized.ends_with("/label") {
        4
    } else if normalized.ends_with("/terselabel") {
        3
    } else if normalized.ends_with("/verboselabel") {
        2
    } else if normalized.is_empty() {
        0
    } else {
        1
    }
}

struct MaterializedStatements {
    periods: Vec<PeriodOutput>,
    statement_rows: StatementRowMap,
    concepts: Vec<ConceptOutput>,
    facts: Vec<FactOutput>,
}

fn materialize_taxonomy_statements(
    filing_id: i64,
    accession_number: &str,
    filing_date: &str,
    filing_type: &str,
    facts: &[ParsedFact],
    presentation: &[PresentationNode],
    label_by_concept: &HashMap<String, String>,
) -> MaterializedStatements {
    let compact_accession = accession_number.replace('-', "");
    let mut period_by_signature = HashMap::<String, PeriodOutput>::new();

    for fact in facts {
        let signature = period_signature(fact);
        if period_by_signature.contains_key(&signature) {
            continue;
        }
        let date = fact
            .period_end
            .clone()
            .or_else(|| fact.period_instant.clone())
            .unwrap_or_else(|| filing_date.to_string());
        let id = format!(
            "{date}-{compact_accession}-{}",
            period_by_signature.len() + 1
        );
        let period_label = if fact.period_instant.is_some() && fact.period_start.is_none() {
            "Instant".to_string()
        } else if fact.period_start.is_some() && fact.period_end.is_some() {
            format!(
                "{} to {}",
                fact.period_start.clone().unwrap_or_default(),
                fact.period_end.clone().unwrap_or_default()
            )
        } else {
            "Filing Period".to_string()
        };
        period_by_signature.insert(
            signature,
            PeriodOutput {
                id,
                filing_id,
                accession_number: accession_number.to_string(),
                filing_date: filing_date.to_string(),
                period_start: fact.period_start.clone(),
                period_end: fact
                    .period_end
                    .clone()
                    .or_else(|| fact.period_instant.clone()),
                filing_type: filing_type.to_string(),
                period_label,
            },
        );
    }

    let mut periods = period_by_signature.values().cloned().collect::<Vec<_>>();
    periods.sort_by(|left, right| {
        let left_key = left
            .period_end
            .clone()
            .unwrap_or_else(|| left.filing_date.clone());
        let right_key = right
            .period_end
            .clone()
            .unwrap_or_else(|| right.filing_date.clone());
        left_key
            .cmp(&right_key)
            .then_with(|| left.id.cmp(&right.id))
    });
    let period_id_by_signature = period_by_signature
        .iter()
        .map(|(signature, period)| (signature.clone(), period.id.clone()))
        .collect::<HashMap<_, _>>();

    let mut presentation_by_concept = HashMap::<String, Vec<&PresentationNode>>::new();
    for node in presentation {
        presentation_by_concept
            .entry(node.concept_key.clone())
            .or_default()
            .push(node);
    }

    let mut grouped_by_statement = empty_parsed_fact_map();
    let mut enriched_facts = Vec::new();

    for (index, fact) in facts.iter().enumerate() {
        let nodes = presentation_by_concept
            .get(&fact.concept_key)
            .cloned()
            .unwrap_or_default();
        let best_node = nodes.first().copied();
        let statement_kind = best_node
            .and_then(|node| classify_statement_role(&node.role_uri))
            .or_else(|| concept_statement_fallback(&fact.local_name));

        let fact_output = FactOutput {
            concept_key: fact.concept_key.clone(),
            qname: fact.qname.clone(),
            namespace_uri: fact.namespace_uri.clone(),
            local_name: fact.local_name.clone(),
            data_type: fact.data_type.clone(),
            statement_kind: statement_kind.clone(),
            role_uri: best_node.map(|node| node.role_uri.clone()),
            authoritative_concept_key: None,
            mapping_method: None,
            surface_key: None,
            detail_parent_surface_key: None,
            kpi_key: None,
            residual_flag: false,
            context_id: fact.context_id.clone(),
            unit: fact.unit.clone(),
            decimals: fact.decimals.clone(),
            precision: fact.precision.clone(),
            nil: fact.nil,
            value_num: fact.value,
            period_start: fact.period_start.clone(),
            period_end: fact.period_end.clone(),
            period_instant: fact.period_instant.clone(),
            dimensions: fact.dimensions.clone(),
            is_dimensionless: fact.is_dimensionless,
            source_file: fact.source_file.clone(),
        };

        if let Some(statement_kind) = statement_kind.clone() {
            if let Some(statement_key) = statement_key_ref(&statement_kind) {
                grouped_by_statement
                    .entry(statement_key)
                    .or_default()
                    .entry(fact.concept_key.clone())
                    .or_default()
                    .push((index as i64 + 1, fact.clone(), best_node.cloned()));
            }
        }

        enriched_facts.push(fact_output);
    }

    let mut statement_rows = empty_statement_row_map();
    let mut concepts = Vec::<ConceptOutput>::new();

    for statement_kind in statement_keys() {
        let concept_groups = grouped_by_statement
            .remove(statement_kind)
            .unwrap_or_default();
        let mut concept_keys = HashSet::<String>::new();
        for node in presentation.iter().filter(|node| {
            classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind)
        }) {
            concept_keys.insert(node.concept_key.clone());
        }
        for concept_key in concept_groups.keys() {
            concept_keys.insert(concept_key.clone());
        }

        let mut ordered_concepts = concept_keys
            .into_iter()
            .map(|concept_key| {
                let nodes = presentation
                    .iter()
                    .filter(|node| {
                        node.concept_key == concept_key
                            && classify_statement_role(&node.role_uri).as_deref()
                                == Some(statement_kind)
                    })
                    .collect::<Vec<_>>();
                let order = nodes
                    .iter()
                    .map(|node| node.order)
                    .fold(f64::INFINITY, f64::min);
                let depth = nodes.iter().map(|node| node.depth).min().unwrap_or(0);
                let role_uri = nodes.first().map(|node| node.role_uri.clone());
                let parent_concept_key = nodes
                    .first()
                    .and_then(|node| node.parent_concept_key.clone());
                (concept_key, order, depth, role_uri, parent_concept_key)
            })
            .collect::<Vec<_>>();
        ordered_concepts.sort_by(|left, right| {
            left.1
                .partial_cmp(&right.1)
                .unwrap_or(std::cmp::Ordering::Equal)
                .then_with(|| left.0.cmp(&right.0))
        });

        for (concept_key, presentation_order, depth, role_uri, parent_concept_key) in
            ordered_concepts
        {
            let fact_group = concept_groups
                .get(&concept_key)
                .cloned()
                .unwrap_or_default();
            let (namespace_uri, local_name) = split_concept_key(&concept_key);
            let qname = fact_group
                .first()
                .map(|(_, fact, _)| fact.qname.clone())
                .unwrap_or_else(|| format!("unknown:{local_name}"));
            let label = label_by_concept
                .get(&concept_key)
                .cloned()
                .unwrap_or_else(|| local_name_to_label(&local_name));
            let mut values = BTreeMap::<String, Option<f64>>::new();
            let mut units = BTreeMap::<String, Option<String>>::new();
            let mut source_fact_ids = Vec::<i64>::new();
            let mut has_dimensions = false;

            let mut fact_groups = HashMap::<String, Vec<(i64, ParsedFact)>>::new();
            for (fact_id, fact, _) in fact_group.iter() {
                fact_groups
                    .entry(period_signature(fact))
                    .or_default()
                    .push((*fact_id, fact.clone()));
            }

            for (signature, grouped_facts) in fact_groups {
                let Some(period_id) = period_id_by_signature.get(&signature) else {
                    continue;
                };
                let preferred = pick_preferred_fact(&grouped_facts);
                if let Some((fact_id, fact)) = preferred {
                    values.insert(period_id.clone(), Some(fact.value));
                    units.insert(period_id.clone(), fact.unit.clone());
                    source_fact_ids.push(*fact_id);
                    has_dimensions = has_dimensions || !fact.is_dimensionless;
                }
            }

            let row = StatementRowOutput {
                key: concept_key.clone(),
                label: label.clone(),
                concept_key: concept_key.clone(),
                qname: qname.clone(),
                namespace_uri: namespace_uri.clone(),
                local_name: local_name.clone(),
                is_extension: !is_standard_namespace(&namespace_uri),
                statement: statement_kind.to_string(),
                role_uri: role_uri.clone(),
                order: if presentation_order.is_finite() {
                    (presentation_order * 1000.0).round() as i64
                } else {
                    1_000_000
                },
                depth,
                parent_key: parent_concept_key.clone(),
                values,
                units,
                has_dimensions,
                source_fact_ids: {
                    source_fact_ids.sort();
                    source_fact_ids
                },
            };

            if let Some(statement_rows) = statement_rows.get_mut(statement_kind) {
                statement_rows.push(row.clone());
            }

            concepts.push(ConceptOutput {
                concept_key,
                qname,
                namespace_uri,
                local_name,
                label: Some(label),
                is_extension: !is_standard_namespace(&row.namespace_uri),
                balance: None,
                period_type: None,
                data_type: None,
                statement_kind: Some(statement_kind.to_string()),
                role_uri,
                authoritative_concept_key: None,
                mapping_method: None,
                surface_key: None,
                detail_parent_surface_key: None,
                kpi_key: None,
                residual_flag: false,
                presentation_order: if presentation_order.is_finite() {
                    Some(presentation_order)
                } else {
                    None
                },
                presentation_depth: Some(depth),
                parent_concept_key,
                is_abstract: presentation
                    .iter()
                    .find(|node| node.concept_key == row.concept_key)
                    .map(|node| node.is_abstract)
                    .unwrap_or(false),
            });
        }
    }

    MaterializedStatements {
        periods,
        statement_rows,
        concepts,
        facts: enriched_facts,
    }
}

fn empty_parsed_fact_map(
) -> HashMap<&'static str, HashMap<String, Vec<(i64, ParsedFact, Option<PresentationNode>)>>> {
    let mut map = HashMap::new();
    for key in statement_keys() {
        map.insert(key, HashMap::new());
    }
    map
}

fn empty_statement_row_map() -> StatementRowMap {
    statement_keys()
        .into_iter()
        .map(|key| (key.to_string(), Vec::new()))
        .collect()
}

fn empty_surface_row_map() -> SurfaceRowMap {
    statement_keys()
        .into_iter()
        .map(|key| (key.to_string(), Vec::new()))
        .collect()
}

fn empty_detail_row_map() -> DetailRowStatementMap {
    statement_keys()
        .into_iter()
        .map(|key| (key.to_string(), BTreeMap::new()))
        .collect()
}

fn statement_keys() -> [&'static str; 5] {
    [
        "income",
        "balance",
        "cash_flow",
        "equity",
        "comprehensive_income",
    ]
}

fn statement_key_ref(value: &str) -> Option<&'static str> {
    match value {
        "income" => Some("income"),
        "balance" => Some("balance"),
        "cash_flow" => Some("cash_flow"),
        "equity" => Some("equity"),
        "comprehensive_income" => Some("comprehensive_income"),
        _ => None,
    }
}

fn pick_preferred_fact(grouped_facts: &[(i64, ParsedFact)]) -> Option<&(i64, ParsedFact)> {
    grouped_facts.iter().max_by(|left, right| {
        let left_dimension_score = if left.1.is_dimensionless { 1 } else { 0 };
        let right_dimension_score = if right.1.is_dimensionless { 1 } else { 0 };
        left_dimension_score
            .cmp(&right_dimension_score)
            .then_with(|| {
                let left_date = left
                    .1
                    .period_end
                    .as_ref()
                    .or(left.1.period_instant.as_ref())
                    .cloned()
                    .unwrap_or_default();
                let right_date = right
                    .1
                    .period_end
                    .as_ref()
                    .or(right.1.period_instant.as_ref())
                    .cloned()
                    .unwrap_or_default();
                left_date.cmp(&right_date)
            })
            .then_with(|| {
                left.1
                    .value
                    .abs()
                    .partial_cmp(&right.1.value.abs())
                    .unwrap_or(std::cmp::Ordering::Equal)
            })
    })
}

fn period_signature(fact: &ParsedFact) -> String {
    format!(
        "start:{}|end:{}|instant:{}",
        fact.period_start.clone().unwrap_or_default(),
        fact.period_end.clone().unwrap_or_default(),
        fact.period_instant.clone().unwrap_or_default()
    )
}

fn split_concept_key(concept_key: &str) -> (String, String) {
    concept_key
        .rsplit_once('#')
        .map(|(namespace_uri, local_name)| (namespace_uri.to_string(), local_name.to_string()))
        .unwrap_or_else(|| ("urn:unknown".to_string(), concept_key.to_string()))
}

fn local_name_to_label(local_name: &str) -> String {
    let spaced = Regex::new(r#"([a-z0-9])([A-Z])"#)
        .unwrap()
        .replace_all(local_name, "$1 $2")
        .to_string();
    Regex::new(r#"([A-Z]+)([A-Z][a-z])"#)
        .unwrap()
        .replace_all(&spaced, "$1 $2")
        .replace('_', " ")
        .trim()
        .to_string()
}

fn classify_statement_role(role_uri: &str) -> Option<String> {
    let normalized = role_uri.to_ascii_lowercase();
    if Regex::new(r#"cash\s*flow|statementsof?cashflows|netcash"#)
        .unwrap()
        .is_match(&normalized)
    {
        return Some("cash_flow".to_string());
    }
    if Regex::new(r#"shareholders?|stockholders?|equity|retainedearnings"#)
        .unwrap()
        .is_match(&normalized)
    {
        return Some("equity".to_string());
    }
    if Regex::new(r#"comprehensive\s*income"#)
        .unwrap()
        .is_match(&normalized)
    {
        return Some("comprehensive_income".to_string());
    }
    if Regex::new(r#"balance\s*sheet|financial\s*position|assets?andliabilities"#)
        .unwrap()
        .is_match(&normalized)
    {
        return Some("balance".to_string());
    }
    if Regex::new(r#"operations|income\s*statement|statementsofincome|profit"#)
        .unwrap()
        .is_match(&normalized)
    {
        return Some("income".to_string());
    }
    None
}

fn concept_statement_fallback(local_name: &str) -> Option<String> {
    let normalized = local_name.to_ascii_lowercase();
    if Regex::new(r#"equity|retainedearnings|additionalpaidincapital"#)
        .unwrap()
        .is_match(&normalized)
    {
        return Some("equity".to_string());
    }
    if normalized.contains("comprehensiveincome") {
        return Some("comprehensive_income".to_string());
    }
    if Regex::new(
        r#"deferredpolicyacquisitioncosts(andvalueofbusinessacquired)?$|supplementaryinsuranceinformationdeferredpolicyacquisitioncosts$|deferredacquisitioncosts$"#,
    )
    .unwrap()
    .is_match(&normalized)
    {
        return Some("balance".to_string());
    }
    if Regex::new(
        r#"netcashprovidedbyusedin.*activities|increasedecreasein|paymentstoacquire|paymentsforcapitalimprovements$|paymentsfordepositsonrealestateacquisitions$|paymentsforrepurchase|paymentsofdividends|dividendscommonstockcash$|proceedsfrom|repaymentsofdebt|sharebasedcompensation$|allocatedsharebasedcompensationexpense$|depreciationdepletionandamortization$|depreciationamortizationandaccretionnet$|depreciationandamortization$|depreciationamortizationandother$|otheradjustmentstoreconcilenetincomelosstocashprovidedbyusedinoperatingactivities"#,
    )
    .unwrap()
    .is_match(&normalized)
    {
        return Some("cash_flow".to_string());
    }
    if Regex::new(
        r#"asset|liabilit|debt|financingreceivable|loansreceivable|deposits|allowanceforcreditloss|futurepolicybenefits|policyholderaccountbalances|unearnedpremiums|realestateinvestmentproperty|grossatcarryingvalue|investmentproperty"#,
    )
    .unwrap()
    .is_match(&normalized)
    {
        return Some("balance".to_string());
    }
    if Regex::new(
        r#"revenue|income|profit|expense|costof|leaseincome|rental|premiums|claims|underwriting|policyacquisition|interestincome|interestexpense|noninterest|leasedandrentedproperty"#,
    )
        .unwrap()
        .is_match(&normalized)
    {
        return Some("income".to_string());
    }
    None
}

fn is_standard_namespace(namespace_uri: &str) -> bool {
    let lower = namespace_uri.to_ascii_lowercase();
    lower.contains("us-gaap")
        || lower.contains("ifrs")
        || lower.contains("/dei/")
        || lower.contains("xbrl.sec.gov/dei")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::pack_selector::FiscalPack;

    fn period(id: &str, period_end: &str) -> PeriodOutput {
        PeriodOutput {
            id: id.to_string(),
            filing_id: 1,
            accession_number: "0000000000-00-000001".to_string(),
            filing_date: "2025-12-31".to_string(),
            period_start: Some("2025-01-01".to_string()),
            period_end: Some(period_end.to_string()),
            filing_type: "10-K".to_string(),
            period_label: period_end.to_string(),
        }
    }

    fn row(
        key: &str,
        qname: &str,
        statement: &str,
        order: i64,
        values: &[(&str, f64)],
    ) -> StatementRowOutput {
        let namespace_uri = qname
            .split_once(':')
            .map(|(prefix, _)| {
                if prefix == "us-gaap" {
                    "http://fasb.org/us-gaap/2024".to_string()
                } else {
                    format!("urn:{prefix}")
                }
            })
            .unwrap_or_else(|| "urn:unknown".to_string());
        let local_name = qname
            .split_once(':')
            .map(|(_, local_name)| local_name.to_string())
            .unwrap_or_else(|| qname.to_string());

        StatementRowOutput {
            key: key.to_string(),
            label: local_name_to_label(&local_name),
            concept_key: format!("{namespace_uri}#{local_name}"),
            qname: qname.to_string(),
            namespace_uri,
            local_name,
            is_extension: false,
            statement: statement.to_string(),
            role_uri: Some(statement.to_string()),
            order,
            depth: 0,
            parent_key: None,
            values: values
                .iter()
                .map(|(period_id, value)| (period_id.to_string(), Some(*value)))
                .collect(),
            units: values
                .iter()
                .map(|(period_id, _)| (period_id.to_string(), Some("iso4217:USD".to_string())))
                .collect(),
            has_dimensions: false,
            source_fact_ids: vec![order],
        }
    }

    #[test]
    fn builds_compact_surface_rows_from_core_pack() {
        let periods = vec![period("2024", "2024-12-31"), period("2025", "2025-12-31")];
        let mut statement_rows = empty_statement_row_map();
        statement_rows.insert(
            "income".to_string(),
            vec![
                row(
                    "revenue-row",
                    "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax",
                    "income",
                    10,
                    &[("2024", 100.0), ("2025", 120.0)],
                ),
                row(
                    "operating-expenses-row",
                    "us-gaap:OperatingExpenses",
                    "income",
                    20,
                    &[("2024", 40.0), ("2025", 50.0)],
                ),
                row(
                    "sga-row",
                    "us-gaap:SellingGeneralAndAdministrativeExpense",
                    "income",
                    30,
                    &[("2024", 25.0), ("2025", 31.0)],
                ),
                row(
                    "rd-row",
                    "us-gaap:ResearchAndDevelopmentExpense",
                    "income",
                    40,
                    &[("2024", 15.0), ("2025", 19.0)],
                ),
                row(
                    "net-income-row",
                    "us-gaap:NetIncomeLoss",
                    "income",
                    50,
                    &[("2024", 22.0), ("2025", 30.0)],
                ),
                row(
                    "unmapped-row",
                    "company:OtherOperatingCharges",
                    "income",
                    60,
                    &[("2024", 3.0), ("2025", 4.0)],
                ),
            ],
        );
        statement_rows.insert(
            "balance".to_string(),
            vec![row(
                "assets-row",
                "us-gaap:Assets",
                "balance",
                70,
                &[("2024", 500.0), ("2025", 550.0)],
            )],
        );
        statement_rows.insert(
            "cash_flow".to_string(),
            vec![row(
                "ocf-row",
                "us-gaap:NetCashProvidedByUsedInOperatingActivities",
                "cash_flow",
                80,
                &[("2024", 60.0), ("2025", 65.0)],
            )],
        );

        let model = surface_mapper::build_compact_surface_model(
            &periods,
            &statement_rows,
            "us-gaap",
            FiscalPack::Core,
            vec![],
        )
        .expect("core pack should load and map");
        let income_surface_rows = model
            .surface_rows
            .get("income")
            .expect("income surface rows");
        let op_expenses = income_surface_rows
            .iter()
            .find(|row| row.key == "operating_expenses")
            .expect("operating expenses surface row");
        let revenue = income_surface_rows
            .iter()
            .find(|row| row.key == "revenue")
            .expect("revenue surface row");

        assert_eq!(revenue.values.get("2025").copied().flatten(), Some(120.0));
        assert_eq!(
            op_expenses.values.get("2024").copied().flatten(),
            Some(40.0)
        );
        assert_eq!(op_expenses.detail_count, Some(2));

        let operating_expense_details = model
            .detail_rows
            .get("income")
            .and_then(|groups| groups.get("operating_expenses"))
            .expect("operating expenses details");
        assert_eq!(operating_expense_details.len(), 2);
        assert!(operating_expense_details
            .iter()
            .any(|row| row.key == "sga-row"));
        assert!(operating_expense_details
            .iter()
            .any(|row| row.key == "rd-row"));

        let residual_rows = model
            .detail_rows
            .get("income")
            .and_then(|groups| groups.get("unmapped"))
            .expect("unmapped detail rows");
        assert_eq!(residual_rows.len(), 1);
        assert_eq!(residual_rows[0].key, "unmapped-row");
        assert!(residual_rows[0].residual_flag);

        let rd_mapping = model
            .concept_mappings
            .get("http://fasb.org/us-gaap/2024#ResearchAndDevelopmentExpense")
            .expect("rd mapping");
        assert_eq!(
            rd_mapping.detail_parent_surface_key.as_deref(),
            Some("operating_expenses")
        );
        assert_eq!(
            rd_mapping.surface_key.as_deref(),
            Some("operating_expenses")
        );

        let residual_mapping = model
            .concept_mappings
            .get("urn:company#OtherOperatingCharges")
            .expect("residual mapping");
        assert!(residual_mapping.residual_flag);
        assert_eq!(
            residual_mapping.detail_parent_surface_key.as_deref(),
            Some("unmapped")
        );

        assert_eq!(model.normalization_summary.surface_row_count, 6);
        assert_eq!(model.normalization_summary.detail_row_count, 3);
        assert_eq!(model.normalization_summary.unmapped_row_count, 1);
    }

    #[test]
    fn parses_basic_xbrl_facts_without_regex_backreferences() {
        let raw = r#"
            <xbrli:xbrl xmlns:xbrli="http://www.xbrl.org/2003/instance" xmlns:us-gaap="http://fasb.org/us-gaap/2024">
              <xbrli:context id="c1">
                <xbrli:entity>
                  <xbrli:identifier scheme="http://www.sec.gov/CIK">0000320193</xbrli:identifier>
                </xbrli:entity>
                <xbrli:period>
                  <xbrli:startDate>2025-01-01</xbrli:startDate>
                  <xbrli:endDate>2025-12-31</xbrli:endDate>
                </xbrli:period>
              </xbrli:context>
              <xbrli:unit id="u1">
                <xbrli:measure>iso4217:USD</xbrli:measure>
              </xbrli:unit>
              <us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax contextRef="c1" unitRef="u1" decimals="-6">1000</us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax>
            </xbrli:xbrl>
        "#;

        let parsed = parse_xbrl_instance(raw, Some("test.xml".to_string()));
        assert_eq!(parsed.facts.len(), 1);
        assert_eq!(
            parsed.facts[0].qname,
            "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax"
        );
        assert_eq!(parsed.facts[0].value, 1000.0);
        assert_eq!(parsed.facts[0].unit.as_deref(), Some("iso4217:USD"));
    }

    #[test]
    fn classifies_pack_specific_concepts_without_presentation_roles() {
        assert_eq!(
            concept_statement_fallback(
                "FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss"
            )
            .as_deref(),
            Some("balance")
        );
        assert_eq!(
            concept_statement_fallback("Deposits").as_deref(),
            Some("balance")
        );
        assert_eq!(
            concept_statement_fallback("RealEstateInvestmentPropertyNet").as_deref(),
            Some("balance")
        );
        assert_eq!(
            concept_statement_fallback("DeferredPolicyAcquisitionCosts").as_deref(),
            Some("balance")
        );
        assert_eq!(
            concept_statement_fallback("DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired")
                .as_deref(),
            Some("balance")
        );
        assert_eq!(
            concept_statement_fallback("IncreaseDecreaseInAccountsReceivable").as_deref(),
            Some("cash_flow")
        );
        assert_eq!(
            concept_statement_fallback("PaymentsOfDividends").as_deref(),
            Some("cash_flow")
        );
        assert_eq!(
            concept_statement_fallback("RepaymentsOfDebt").as_deref(),
            Some("cash_flow")
        );
        assert_eq!(
            concept_statement_fallback("ShareBasedCompensation").as_deref(),
            Some("cash_flow")
        );
        assert_eq!(
            concept_statement_fallback("PaymentsForCapitalImprovements").as_deref(),
            Some("cash_flow")
        );
        assert_eq!(
            concept_statement_fallback("PaymentsForDepositsOnRealEstateAcquisitions").as_deref(),
            Some("cash_flow")
        );
        assert_eq!(
            concept_statement_fallback("LeaseIncome").as_deref(),
            Some("income")
        );
        assert_eq!(
            concept_statement_fallback("DirectCostsOfLeasedAndRentedPropertyOrEquipment")
                .as_deref(),
            Some("income")
        );
    }
}