Integrate crabrl parser into taxonomy hydration
This commit is contained in:
231
rust/fiscal-xbrl-core/src/crabrl_adapter.rs
Normal file
231
rust/fiscal-xbrl-core/src/crabrl_adapter.rs
Normal file
@@ -0,0 +1,231 @@
|
||||
use anyhow::{Context, Result};
|
||||
use crabrl::{Document, FactValue, Measure, Parser, Period, UnitType};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::{
|
||||
is_xbrl_infrastructure_prefix, ContextOutput, DimensionOutput, ParsedFact, ParsedInstance,
|
||||
};
|
||||
|
||||
pub(crate) fn parse_xbrl_instance(
|
||||
raw: &str,
|
||||
source_file: Option<String>,
|
||||
) -> Result<ParsedInstance> {
|
||||
let document = Parser::new()
|
||||
.parse_bytes(raw.as_bytes())
|
||||
.context("crabrl failed to parse XBRL instance")?;
|
||||
|
||||
Ok(ParsedInstance {
|
||||
contexts: build_contexts(&document),
|
||||
facts: build_facts(&document, source_file),
|
||||
})
|
||||
}
|
||||
|
||||
fn build_contexts(document: &Document) -> Vec<ContextOutput> {
|
||||
document
|
||||
.contexts
|
||||
.iter()
|
||||
.map(|context| {
|
||||
let (period_start, period_end, period_instant) = convert_period(&context.period);
|
||||
|
||||
ContextOutput {
|
||||
context_id: context.id.to_string(),
|
||||
entity_identifier: Some(context.entity.identifier.to_string()),
|
||||
entity_scheme: Some(context.entity.scheme.to_string()),
|
||||
period_start,
|
||||
period_end,
|
||||
period_instant,
|
||||
segment_json: context.entity.segment.as_ref().map(segment_to_json),
|
||||
scenario_json: context.scenario.as_ref().map(scenario_to_json),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_facts(document: &Document, source_file: Option<String>) -> Vec<ParsedFact> {
|
||||
document
|
||||
.facts
|
||||
.concept_ids
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(index, concept_id)| {
|
||||
let qname = document
|
||||
.concept_names
|
||||
.get(*concept_id as usize)?
|
||||
.to_string();
|
||||
let (prefix, local_name) = split_qname(&qname)?;
|
||||
if is_xbrl_infrastructure_prefix(&prefix) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let value = numeric_fact_value(document.facts.values.get(index)?)?;
|
||||
let context = document
|
||||
.contexts
|
||||
.get(*document.facts.context_ids.get(index)? as usize)?;
|
||||
let namespace_uri = document
|
||||
.namespaces
|
||||
.get(prefix.as_str())
|
||||
.map(|value| value.to_string())
|
||||
.unwrap_or_else(|| format!("urn:unknown:{prefix}"));
|
||||
let (period_start, period_end, period_instant) = convert_period(&context.period);
|
||||
let dimensions = context_dimensions(context);
|
||||
|
||||
Some(ParsedFact {
|
||||
concept_key: format!("{namespace_uri}#{local_name}"),
|
||||
qname,
|
||||
namespace_uri,
|
||||
local_name,
|
||||
data_type: None,
|
||||
context_id: context.id.to_string(),
|
||||
unit: unit_for_fact(document, index),
|
||||
decimals: document
|
||||
.facts
|
||||
.decimals
|
||||
.get(index)
|
||||
.and_then(|value| value.map(|entry| entry.to_string())),
|
||||
precision: None,
|
||||
nil: matches!(document.facts.values.get(index), Some(FactValue::Nil)),
|
||||
value,
|
||||
period_start,
|
||||
period_end,
|
||||
period_instant,
|
||||
is_dimensionless: dimensions.is_empty(),
|
||||
dimensions,
|
||||
source_file: source_file.clone(),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn numeric_fact_value(value: &FactValue) -> Option<f64> {
|
||||
match value {
|
||||
FactValue::Decimal(value) => Some(*value),
|
||||
FactValue::Integer(value) => Some(*value as f64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn split_qname(qname: &str) -> Option<(String, String)> {
|
||||
let (prefix, local_name) = qname.split_once(':')?;
|
||||
let prefix = prefix.trim().to_string();
|
||||
let local_name = local_name.trim().to_string();
|
||||
if prefix.is_empty() || local_name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((prefix, local_name))
|
||||
}
|
||||
|
||||
fn convert_period(period: &Period) -> (Option<String>, Option<String>, Option<String>) {
|
||||
match period {
|
||||
Period::Instant { date } => (None, None, Some(date.to_string())),
|
||||
Period::Duration { start, end } => (Some(start.to_string()), Some(end.to_string()), None),
|
||||
Period::Forever => (None, None, None),
|
||||
}
|
||||
}
|
||||
|
||||
fn context_dimensions(context: &crabrl::Context) -> Vec<DimensionOutput> {
|
||||
let mut dimensions = Vec::new();
|
||||
|
||||
if let Some(segment) = context.entity.segment.as_ref() {
|
||||
dimensions.extend(
|
||||
segment
|
||||
.explicit_members
|
||||
.iter()
|
||||
.map(|member| DimensionOutput {
|
||||
axis: member.dimension.to_string(),
|
||||
member: member.member.to_string(),
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(scenario) = context.scenario.as_ref() {
|
||||
dimensions.extend(
|
||||
scenario
|
||||
.explicit_members
|
||||
.iter()
|
||||
.map(|member| DimensionOutput {
|
||||
axis: member.dimension.to_string(),
|
||||
member: member.member.to_string(),
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
dimensions
|
||||
}
|
||||
|
||||
fn unit_for_fact(document: &Document, fact_index: usize) -> Option<String> {
|
||||
let unit_id = *document.facts.unit_ids.get(fact_index)?;
|
||||
if unit_id == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
document
|
||||
.units
|
||||
.get((unit_id - 1) as usize)
|
||||
.map(|unit| unit_type_to_string(&unit.unit_type))
|
||||
}
|
||||
|
||||
fn unit_type_to_string(unit_type: &UnitType) -> String {
|
||||
match unit_type {
|
||||
UnitType::Simple(measures) => join_measures(measures, "/"),
|
||||
UnitType::Multiply(measures) => join_measures(measures, "*"),
|
||||
UnitType::Divide {
|
||||
numerator,
|
||||
denominator,
|
||||
} => format!(
|
||||
"{}/{}",
|
||||
join_measures(numerator, "*"),
|
||||
join_measures(denominator, "*")
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn join_measures(measures: &[Measure], separator: &str) -> String {
|
||||
measures
|
||||
.iter()
|
||||
.map(measure_to_string)
|
||||
.collect::<Vec<_>>()
|
||||
.join(separator)
|
||||
}
|
||||
|
||||
fn measure_to_string(measure: &Measure) -> String {
|
||||
if measure.namespace.is_empty() {
|
||||
measure.name.to_string()
|
||||
} else {
|
||||
format!("{}:{}", measure.namespace, measure.name)
|
||||
}
|
||||
}
|
||||
|
||||
fn segment_to_json(segment: &crabrl::Segment) -> serde_json::Value {
|
||||
json!({
|
||||
"explicitMembers": segment.explicit_members.iter().map(|member| {
|
||||
json!({
|
||||
"axis": member.dimension.to_string(),
|
||||
"member": member.member.to_string(),
|
||||
})
|
||||
}).collect::<Vec<_>>(),
|
||||
"typedMembers": segment.typed_members.iter().map(|member| {
|
||||
json!({
|
||||
"axis": member.dimension.to_string(),
|
||||
"value": member.value.to_string(),
|
||||
})
|
||||
}).collect::<Vec<_>>(),
|
||||
})
|
||||
}
|
||||
|
||||
fn scenario_to_json(scenario: &crabrl::Scenario) -> serde_json::Value {
|
||||
json!({
|
||||
"explicitMembers": scenario.explicit_members.iter().map(|member| {
|
||||
json!({
|
||||
"axis": member.dimension.to_string(),
|
||||
"member": member.member.to_string(),
|
||||
})
|
||||
}).collect::<Vec<_>>(),
|
||||
"typedMembers": scenario.typed_members.iter().map(|member| {
|
||||
json!({
|
||||
"axis": member.dimension.to_string(),
|
||||
"value": member.value.to_string(),
|
||||
})
|
||||
}).collect::<Vec<_>>(),
|
||||
})
|
||||
}
|
||||
@@ -9,6 +9,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::sync::Mutex;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
mod crabrl_adapter;
|
||||
mod kpi_mapper;
|
||||
mod metrics;
|
||||
mod pack_selector;
|
||||
@@ -54,44 +55,6 @@ where
|
||||
fetch_fn()
|
||||
}
|
||||
|
||||
static CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?context>"#).unwrap()
|
||||
});
|
||||
static UNIT_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?unit>"#).unwrap()
|
||||
});
|
||||
static FACT_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>(.*?)</[a-zA-Z0-9_\-]+:[a-zA-Z0-9_\-.]+>"#).unwrap()
|
||||
});
|
||||
static EXPLICIT_MEMBER_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?explicitMember>"#).unwrap()
|
||||
});
|
||||
static TYPED_MEMBER_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?typedMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?typedMember>"#).unwrap()
|
||||
});
|
||||
static IDENTIFIER_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?identifier\b[^>]*\bscheme=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?identifier>"#).unwrap()
|
||||
});
|
||||
static SEGMENT_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?segment>"#)
|
||||
.unwrap()
|
||||
});
|
||||
static SCENARIO_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?scenario>"#)
|
||||
.unwrap()
|
||||
});
|
||||
static START_DATE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?startDate>(.*?)</(?:[a-z0-9_\-]+:)?startDate>"#).unwrap()
|
||||
});
|
||||
static END_DATE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?endDate>(.*?)</(?:[a-z0-9_\-]+:)?endDate>"#).unwrap()
|
||||
});
|
||||
static INSTANT_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?instant>(.*?)</(?:[a-z0-9_\-]+:)?instant>"#).unwrap()
|
||||
});
|
||||
static MEASURE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?measure>(.*?)</(?:[a-z0-9_\-]+:)?measure>"#).unwrap()
|
||||
});
|
||||
static LABEL_LINK_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?labelLink>"#)
|
||||
.unwrap()
|
||||
@@ -465,25 +428,7 @@ pub type SurfaceRowMap = BTreeMap<String, Vec<SurfaceRowOutput>>;
|
||||
pub type DetailRowStatementMap = BTreeMap<String, BTreeMap<String, Vec<DetailRowOutput>>>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ParsedContext {
|
||||
id: String,
|
||||
entity_identifier: Option<String>,
|
||||
entity_scheme: Option<String>,
|
||||
period_start: Option<String>,
|
||||
period_end: Option<String>,
|
||||
period_instant: Option<String>,
|
||||
dimensions: Vec<DimensionOutput>,
|
||||
segment: Option<serde_json::Value>,
|
||||
scenario: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ParsedUnit {
|
||||
measure: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ParsedFact {
|
||||
pub(crate) struct ParsedFact {
|
||||
concept_key: String,
|
||||
qname: String,
|
||||
namespace_uri: String,
|
||||
@@ -593,7 +538,8 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
|
||||
);
|
||||
}
|
||||
|
||||
let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone()));
|
||||
let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone()))
|
||||
.context("parse failed for XBRL instance")?;
|
||||
|
||||
let mut label_by_concept = HashMap::new();
|
||||
let mut presentation = Vec::new();
|
||||
@@ -1144,114 +1090,13 @@ fn validate_xbrl_structure(xml: &str, source_file: Option<&str>) -> XbrlValidati
|
||||
}
|
||||
}
|
||||
|
||||
struct ParsedInstance {
|
||||
pub(crate) struct ParsedInstance {
|
||||
contexts: Vec<ContextOutput>,
|
||||
facts: Vec<ParsedFact>,
|
||||
}
|
||||
|
||||
fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> ParsedInstance {
|
||||
let namespaces = parse_namespace_map(raw, "xbrl");
|
||||
let context_by_id = parse_contexts(raw);
|
||||
let unit_by_id = parse_units(raw);
|
||||
let mut facts = Vec::new();
|
||||
|
||||
for captures in FACT_RE.captures_iter(raw) {
|
||||
let prefix = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str().trim())
|
||||
.unwrap_or_default();
|
||||
let local_name = captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str().trim())
|
||||
.unwrap_or_default();
|
||||
let attrs = captures
|
||||
.get(3)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let body = decode_xml_entities(
|
||||
captures
|
||||
.get(4)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default()
|
||||
.trim(),
|
||||
);
|
||||
|
||||
if prefix.is_empty() || local_name.is_empty() || is_xbrl_infrastructure_prefix(prefix) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let attr_map = parse_attrs(attrs);
|
||||
let Some(context_id) = attr_map
|
||||
.get("contextRef")
|
||||
.cloned()
|
||||
.or_else(|| attr_map.get("contextref").cloned())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let Some(value) = parse_number(&body) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let namespace_uri = namespaces
|
||||
.get(prefix)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("urn:unknown:{prefix}"));
|
||||
let context = context_by_id.get(&context_id);
|
||||
let unit_ref = attr_map
|
||||
.get("unitRef")
|
||||
.cloned()
|
||||
.or_else(|| attr_map.get("unitref").cloned());
|
||||
let unit = unit_ref
|
||||
.as_ref()
|
||||
.and_then(|unit_ref| unit_by_id.get(unit_ref))
|
||||
.and_then(|unit| unit.measure.clone())
|
||||
.or(unit_ref);
|
||||
|
||||
facts.push(ParsedFact {
|
||||
concept_key: format!("{namespace_uri}#{local_name}"),
|
||||
qname: format!("{prefix}:{local_name}"),
|
||||
namespace_uri,
|
||||
local_name: local_name.to_string(),
|
||||
data_type: None,
|
||||
context_id: context_id.clone(),
|
||||
unit,
|
||||
decimals: attr_map.get("decimals").cloned(),
|
||||
precision: attr_map.get("precision").cloned(),
|
||||
nil: attr_map
|
||||
.get("xsi:nil")
|
||||
.or_else(|| attr_map.get("nil"))
|
||||
.map(|value| value.eq_ignore_ascii_case("true"))
|
||||
.unwrap_or(false),
|
||||
value,
|
||||
period_start: context.and_then(|value| value.period_start.clone()),
|
||||
period_end: context.and_then(|value| value.period_end.clone()),
|
||||
period_instant: context.and_then(|value| value.period_instant.clone()),
|
||||
dimensions: context
|
||||
.map(|value| value.dimensions.clone())
|
||||
.unwrap_or_default(),
|
||||
is_dimensionless: context
|
||||
.map(|value| value.dimensions.is_empty())
|
||||
.unwrap_or(true),
|
||||
source_file: source_file.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let contexts = context_by_id
|
||||
.values()
|
||||
.map(|context| ContextOutput {
|
||||
context_id: context.id.clone(),
|
||||
entity_identifier: context.entity_identifier.clone(),
|
||||
entity_scheme: context.entity_scheme.clone(),
|
||||
period_start: context.period_start.clone(),
|
||||
period_end: context.period_end.clone(),
|
||||
period_instant: context.period_instant.clone(),
|
||||
segment_json: context.segment.clone(),
|
||||
scenario_json: context.scenario.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
ParsedInstance { contexts, facts }
|
||||
fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> Result<ParsedInstance> {
|
||||
crabrl_adapter::parse_xbrl_instance(raw, source_file)
|
||||
}
|
||||
|
||||
fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap<String, String> {
|
||||
@@ -1277,173 +1122,7 @@ fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap<String, String
|
||||
map
|
||||
}
|
||||
|
||||
fn parse_contexts(raw: &str) -> HashMap<String, ParsedContext> {
|
||||
let mut contexts = HashMap::new();
|
||||
|
||||
for captures in CONTEXT_RE.captures_iter(raw) {
|
||||
let Some(context_id) = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str().trim().to_string())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let block = captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let (entity_identifier, entity_scheme) = IDENTIFIER_RE
|
||||
.captures(block)
|
||||
.map(|captures| {
|
||||
(
|
||||
captures
|
||||
.get(2)
|
||||
.map(|value| decode_xml_entities(value.as_str().trim())),
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| decode_xml_entities(value.as_str().trim())),
|
||||
)
|
||||
})
|
||||
.unwrap_or((None, None));
|
||||
|
||||
let period_start = START_DATE_RE
|
||||
.captures(block)
|
||||
.and_then(|captures| captures.get(1))
|
||||
.map(|value| decode_xml_entities(value.as_str().trim()));
|
||||
let period_end = END_DATE_RE
|
||||
.captures(block)
|
||||
.and_then(|captures| captures.get(1))
|
||||
.map(|value| decode_xml_entities(value.as_str().trim()));
|
||||
let period_instant = INSTANT_RE
|
||||
.captures(block)
|
||||
.and_then(|captures| captures.get(1))
|
||||
.map(|value| decode_xml_entities(value.as_str().trim()));
|
||||
|
||||
let segment = SEGMENT_RE
|
||||
.captures(block)
|
||||
.and_then(|captures| captures.get(1))
|
||||
.map(|value| parse_dimension_container(value.as_str()));
|
||||
let scenario = SCENARIO_RE
|
||||
.captures(block)
|
||||
.and_then(|captures| captures.get(1))
|
||||
.map(|value| parse_dimension_container(value.as_str()));
|
||||
|
||||
let mut dimensions = Vec::new();
|
||||
if let Some(segment_value) = segment.as_ref() {
|
||||
if let Some(members) = segment_value
|
||||
.get("explicitMembers")
|
||||
.and_then(|value| value.as_array())
|
||||
{
|
||||
for member in members {
|
||||
if let (Some(axis), Some(member_value)) = (
|
||||
member.get("axis").and_then(|value| value.as_str()),
|
||||
member.get("member").and_then(|value| value.as_str()),
|
||||
) {
|
||||
dimensions.push(DimensionOutput {
|
||||
axis: axis.to_string(),
|
||||
member: member_value.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(scenario_value) = scenario.as_ref() {
|
||||
if let Some(members) = scenario_value
|
||||
.get("explicitMembers")
|
||||
.and_then(|value| value.as_array())
|
||||
{
|
||||
for member in members {
|
||||
if let (Some(axis), Some(member_value)) = (
|
||||
member.get("axis").and_then(|value| value.as_str()),
|
||||
member.get("member").and_then(|value| value.as_str()),
|
||||
) {
|
||||
dimensions.push(DimensionOutput {
|
||||
axis: axis.to_string(),
|
||||
member: member_value.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
contexts.insert(
|
||||
context_id.clone(),
|
||||
ParsedContext {
|
||||
id: context_id,
|
||||
entity_identifier,
|
||||
entity_scheme,
|
||||
period_start,
|
||||
period_end,
|
||||
period_instant,
|
||||
dimensions,
|
||||
segment,
|
||||
scenario,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
contexts
|
||||
}
|
||||
|
||||
fn parse_dimension_container(raw: &str) -> serde_json::Value {
|
||||
let explicit_members = EXPLICIT_MEMBER_RE
|
||||
.captures_iter(raw)
|
||||
.filter_map(|captures| {
|
||||
Some(serde_json::json!({
|
||||
"axis": decode_xml_entities(captures.get(1)?.as_str().trim()),
|
||||
"member": decode_xml_entities(captures.get(2)?.as_str().trim())
|
||||
}))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let typed_members = TYPED_MEMBER_RE
|
||||
.captures_iter(raw)
|
||||
.filter_map(|captures| {
|
||||
Some(serde_json::json!({
|
||||
"axis": decode_xml_entities(captures.get(1)?.as_str().trim()),
|
||||
"value": decode_xml_entities(captures.get(2)?.as_str().trim())
|
||||
}))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
serde_json::json!({
|
||||
"explicitMembers": explicit_members,
|
||||
"typedMembers": typed_members
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_units(raw: &str) -> HashMap<String, ParsedUnit> {
|
||||
let mut units = HashMap::new();
|
||||
for captures in UNIT_RE.captures_iter(raw) {
|
||||
let Some(id) = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str().trim().to_string())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let block = captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let measures = MEASURE_RE
|
||||
.captures_iter(block)
|
||||
.filter_map(|captures| captures.get(1))
|
||||
.map(|value| decode_xml_entities(value.as_str().trim()))
|
||||
.filter(|value| !value.is_empty())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let measure = if measures.len() == 1 {
|
||||
measures.first().cloned()
|
||||
} else if measures.len() > 1 {
|
||||
Some(measures.join("/"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
units.insert(id, ParsedUnit { measure });
|
||||
}
|
||||
units
|
||||
}
|
||||
|
||||
fn is_xbrl_infrastructure_prefix(prefix: &str) -> bool {
|
||||
pub(crate) fn is_xbrl_infrastructure_prefix(prefix: &str) -> bool {
|
||||
matches!(
|
||||
prefix.to_ascii_lowercase().as_str(),
|
||||
"xbrli" | "xlink" | "link" | "xbrldi" | "xbrldt"
|
||||
@@ -1474,25 +1153,6 @@ fn decode_xml_entities(value: &str) -> String {
|
||||
.replace(" ", " ")
|
||||
}
|
||||
|
||||
fn parse_number(raw: &str) -> Option<f64> {
|
||||
let trimmed = raw.trim();
|
||||
if trimmed.is_empty() || trimmed.chars().all(|char| char == '-') {
|
||||
return None;
|
||||
}
|
||||
let negative = trimmed.starts_with('(') && trimmed.ends_with(')');
|
||||
let normalized = Regex::new(r#"<[^>]+>"#)
|
||||
.unwrap()
|
||||
.replace_all(trimmed, " ")
|
||||
.replace(',', "")
|
||||
.replace('$', "")
|
||||
.replace(['(', ')'], "")
|
||||
.replace('\u{2212}', "-")
|
||||
.split_whitespace()
|
||||
.collect::<String>();
|
||||
let parsed = normalized.parse::<f64>().ok()?;
|
||||
Some(if negative { -parsed.abs() } else { parsed })
|
||||
}
|
||||
|
||||
fn parse_label_linkbase(raw: &str) -> HashMap<String, String> {
|
||||
let namespaces = parse_namespace_map(raw, "linkbase");
|
||||
let mut preferred = HashMap::<String, (String, i64)>::new();
|
||||
@@ -2543,7 +2203,8 @@ mod tests {
|
||||
</xbrli:xbrl>
|
||||
"#;
|
||||
|
||||
let parsed = parse_xbrl_instance(raw, Some("test.xml".to_string()));
|
||||
let parsed = parse_xbrl_instance(raw, Some("test.xml".to_string()))
|
||||
.expect("crabrl parser should parse test instance");
|
||||
assert_eq!(parsed.facts.len(), 1);
|
||||
assert_eq!(
|
||||
parsed.facts[0].qname,
|
||||
|
||||
@@ -3,8 +3,8 @@ use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
|
||||
use crate::pack_selector::FiscalPack;
|
||||
use crate::taxonomy_loader::{
|
||||
load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition, SurfaceFormula,
|
||||
SurfaceFormulaOp, SurfaceSignTransform,
|
||||
load_crosswalk, load_income_bridge, load_surface_pack, CrosswalkFile, IncomeBridgeFile,
|
||||
IncomeBridgeRow, SurfaceDefinition, SurfaceFormula, SurfaceFormulaOp, SurfaceSignTransform,
|
||||
};
|
||||
use crate::{
|
||||
ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput,
|
||||
@@ -114,6 +114,7 @@ pub fn build_compact_surface_model(
|
||||
) -> Result<CompactSurfaceModel> {
|
||||
let pack = load_surface_pack(fiscal_pack)?;
|
||||
let crosswalk = load_crosswalk(taxonomy_regime)?;
|
||||
let income_bridge = load_income_bridge(fiscal_pack).ok();
|
||||
let mut surface_rows = empty_surface_row_map();
|
||||
let mut detail_rows = empty_detail_row_map();
|
||||
let mut concept_mappings = HashMap::<String, MappingAssignment>::new();
|
||||
@@ -157,14 +158,20 @@ pub fn build_compact_surface_model(
|
||||
.filter(|matched| matched.match_role == MatchRole::Detail)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
let bridge_detail_matches = collect_income_bridge_detail_matches(
|
||||
definition,
|
||||
&rows,
|
||||
crosswalk.as_ref(),
|
||||
income_bridge.as_ref(),
|
||||
);
|
||||
|
||||
let detail_matches = if definition.detail_grouping_policy == "group_all_children" {
|
||||
if detail_component_matches.is_empty()
|
||||
&& definition.rollup_policy == "aggregate_children"
|
||||
{
|
||||
let detail_matches =
|
||||
merge_detail_matches(&detail_component_matches, &bridge_detail_matches);
|
||||
if detail_matches.is_empty() && definition.rollup_policy == "aggregate_children" {
|
||||
Vec::new()
|
||||
} else {
|
||||
detail_component_matches.clone()
|
||||
detail_matches
|
||||
}
|
||||
} else {
|
||||
Vec::new()
|
||||
@@ -758,28 +765,123 @@ fn match_statement_row<'a>(
|
||||
None
|
||||
}
|
||||
|
||||
fn collect_income_bridge_detail_matches<'a>(
|
||||
definition: &SurfaceDefinition,
|
||||
rows: &'a [StatementRowOutput],
|
||||
crosswalk: Option<&CrosswalkFile>,
|
||||
income_bridge: Option<&IncomeBridgeFile>,
|
||||
) -> Vec<MatchedStatementRow<'a>> {
|
||||
if definition.statement != "income"
|
||||
|| definition.rollup_policy != "aggregate_children"
|
||||
|| definition.detail_grouping_policy != "group_all_children"
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let Some(bridge_row) =
|
||||
income_bridge.and_then(|bridge| bridge.rows.get(&definition.surface_key))
|
||||
else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
rows.iter()
|
||||
.filter(|row| has_any_value(&row.values))
|
||||
.filter_map(|row| match_income_bridge_detail_row(row, bridge_row, crosswalk))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn match_income_bridge_detail_row<'a>(
|
||||
row: &'a StatementRowOutput,
|
||||
bridge_row: &IncomeBridgeRow,
|
||||
crosswalk: Option<&CrosswalkFile>,
|
||||
) -> Option<MatchedStatementRow<'a>> {
|
||||
let authoritative_concept_key = crosswalk
|
||||
.and_then(|crosswalk| crosswalk.mappings.get(&row.qname))
|
||||
.map(|mapping| mapping.authoritative_concept_key.clone())
|
||||
.or_else(|| {
|
||||
if !row.is_extension {
|
||||
Some(row.qname.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let matches_group = bridge_row
|
||||
.component_concept_groups
|
||||
.positive
|
||||
.iter()
|
||||
.chain(bridge_row.component_concept_groups.negative.iter())
|
||||
.any(|group| {
|
||||
group.concepts.iter().any(|candidate| {
|
||||
candidate_matches(candidate, &row.qname)
|
||||
|| candidate_matches(candidate, &row.local_name)
|
||||
|| authoritative_concept_key
|
||||
.as_ref()
|
||||
.map(|concept| candidate_matches(candidate, concept))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
});
|
||||
|
||||
if !matches_group {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(MatchedStatementRow {
|
||||
row,
|
||||
authoritative_concept_key,
|
||||
mapping_method: MappingMethod::AggregateChildren,
|
||||
match_role: MatchRole::Detail,
|
||||
rank: 2,
|
||||
})
|
||||
}
|
||||
|
||||
fn merge_detail_matches<'a>(
|
||||
direct_matches: &[MatchedStatementRow<'a>],
|
||||
bridge_matches: &[MatchedStatementRow<'a>],
|
||||
) -> Vec<MatchedStatementRow<'a>> {
|
||||
let mut merged = HashMap::<String, MatchedStatementRow<'a>>::new();
|
||||
|
||||
for matched in direct_matches.iter().chain(bridge_matches.iter()) {
|
||||
merged
|
||||
.entry(matched.row.key.clone())
|
||||
.and_modify(|existing| {
|
||||
if compare_statement_matches(matched, existing).is_lt() {
|
||||
*existing = matched.clone();
|
||||
}
|
||||
})
|
||||
.or_insert_with(|| matched.clone());
|
||||
}
|
||||
|
||||
merged.into_values().collect()
|
||||
}
|
||||
|
||||
fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> {
|
||||
matches
|
||||
.iter()
|
||||
.min_by(|left, right| {
|
||||
left.rank
|
||||
.cmp(&right.rank)
|
||||
.then_with(|| {
|
||||
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
|
||||
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
|
||||
left_dimension_rank.cmp(&right_dimension_rank)
|
||||
})
|
||||
.then_with(|| left.row.order.cmp(&right.row.order))
|
||||
.then_with(|| {
|
||||
max_abs_value(&right.row.values)
|
||||
.partial_cmp(&max_abs_value(&left.row.values))
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
.then_with(|| left.row.label.cmp(&right.row.label))
|
||||
})
|
||||
.min_by(|left, right| compare_statement_matches(left, right))
|
||||
.expect("pick_best_match requires at least one match")
|
||||
}
|
||||
|
||||
fn compare_statement_matches(
|
||||
left: &MatchedStatementRow<'_>,
|
||||
right: &MatchedStatementRow<'_>,
|
||||
) -> std::cmp::Ordering {
|
||||
left.rank
|
||||
.cmp(&right.rank)
|
||||
.then_with(|| {
|
||||
let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 };
|
||||
let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 };
|
||||
left_dimension_rank.cmp(&right_dimension_rank)
|
||||
})
|
||||
.then_with(|| left.row.order.cmp(&right.row.order))
|
||||
.then_with(|| {
|
||||
max_abs_value(&right.row.values)
|
||||
.partial_cmp(&max_abs_value(&left.row.values))
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
.then_with(|| left.row.label.cmp(&right.row.label))
|
||||
}
|
||||
|
||||
fn build_surface_values(
|
||||
periods: &[PeriodOutput],
|
||||
matches: &[MatchedStatementRow<'_>],
|
||||
|
||||
@@ -336,22 +336,26 @@ fn build_formula_row(
|
||||
.positive
|
||||
.iter()
|
||||
.filter_map(|surface_key| {
|
||||
income_surface_rows
|
||||
.iter()
|
||||
.find(|row| row.key == *surface_key)
|
||||
resolve_component_surface_source(
|
||||
surface_key,
|
||||
income_statement_rows,
|
||||
income_surface_rows,
|
||||
crosswalk,
|
||||
)
|
||||
})
|
||||
.map(surface_source)
|
||||
.collect::<Vec<_>>();
|
||||
let negative_surface_sources = bridge_row
|
||||
.component_surfaces
|
||||
.negative
|
||||
.iter()
|
||||
.filter_map(|surface_key| {
|
||||
income_surface_rows
|
||||
.iter()
|
||||
.find(|row| row.key == *surface_key)
|
||||
resolve_component_surface_source(
|
||||
surface_key,
|
||||
income_statement_rows,
|
||||
income_surface_rows,
|
||||
crosswalk,
|
||||
)
|
||||
})
|
||||
.map(surface_source)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let (positive_group_sources, positive_group_rows) = collect_group_sources(
|
||||
@@ -810,6 +814,44 @@ fn collect_group_sources<'a>(
|
||||
(sources, rows)
|
||||
}
|
||||
|
||||
fn resolve_component_surface_source(
|
||||
surface_key: &str,
|
||||
income_statement_rows: &[StatementRowOutput],
|
||||
income_surface_rows: &[SurfaceRowOutput],
|
||||
crosswalk: Option<&CrosswalkFile>,
|
||||
) -> Option<ValueSource> {
|
||||
if let Some(surface_row) = income_surface_rows
|
||||
.iter()
|
||||
.find(|row| row.key == surface_key)
|
||||
{
|
||||
return Some(surface_source(surface_row));
|
||||
}
|
||||
|
||||
let matches = income_statement_rows
|
||||
.iter()
|
||||
.filter(|row| has_any_value(&row.values))
|
||||
.filter(|row| row_matches_surface_key(row, surface_key, crosswalk))
|
||||
.map(statement_row_source)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if matches.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(merge_value_sources(&matches))
|
||||
}
|
||||
|
||||
fn row_matches_surface_key(
|
||||
row: &StatementRowOutput,
|
||||
surface_key: &str,
|
||||
crosswalk: Option<&CrosswalkFile>,
|
||||
) -> bool {
|
||||
crosswalk
|
||||
.and_then(|crosswalk| crosswalk.mappings.get(&row.qname))
|
||||
.map(|mapping| mapping.surface_key.eq_ignore_ascii_case(surface_key))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn match_direct_authoritative<'a>(
|
||||
row: &'a StatementRowOutput,
|
||||
candidates: &[String],
|
||||
@@ -1024,6 +1066,52 @@ fn surface_source(row: &SurfaceRowOutput) -> ValueSource {
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_value_sources(sources: &[ValueSource]) -> ValueSource {
|
||||
let mut values = BTreeMap::<String, Option<f64>>::new();
|
||||
|
||||
for period_id in sources.iter().flat_map(|source| source.values.keys()) {
|
||||
values.entry(period_id.clone()).or_insert_with(|| {
|
||||
let period_values = sources
|
||||
.iter()
|
||||
.map(|source| source.values.get(period_id).copied().flatten())
|
||||
.collect::<Vec<_>>();
|
||||
if period_values.iter().all(|value| value.is_none()) {
|
||||
None
|
||||
} else {
|
||||
Some(
|
||||
period_values
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or(0.0))
|
||||
.sum(),
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
ValueSource {
|
||||
values,
|
||||
source_concepts: unique_sorted_strings(
|
||||
sources
|
||||
.iter()
|
||||
.flat_map(|source| source.source_concepts.clone())
|
||||
.collect(),
|
||||
),
|
||||
source_row_keys: unique_sorted_strings(
|
||||
sources
|
||||
.iter()
|
||||
.flat_map(|source| source.source_row_keys.clone())
|
||||
.collect(),
|
||||
),
|
||||
source_fact_ids: unique_sorted_i64(
|
||||
sources
|
||||
.iter()
|
||||
.flat_map(|source| source.source_fact_ids.clone())
|
||||
.collect(),
|
||||
),
|
||||
has_dimensions: sources.iter().any(|source| source.has_dimensions),
|
||||
}
|
||||
}
|
||||
|
||||
fn fact_matches_period(fact: &FactOutput, period: &PeriodOutput) -> bool {
|
||||
if fact.period_end != period.period_end {
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user