Expand backend financial statement parsers
This commit is contained in:
@@ -37,10 +37,12 @@ static IDENTIFIER_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?identifier\b[^>]*\bscheme=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?identifier>"#).unwrap()
|
||||
});
|
||||
static SEGMENT_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?segment>"#).unwrap()
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?segment>"#)
|
||||
.unwrap()
|
||||
});
|
||||
static SCENARIO_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?scenario>"#).unwrap()
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?scenario>"#)
|
||||
.unwrap()
|
||||
});
|
||||
static START_DATE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?startDate>(.*?)</(?:[a-z0-9_\-]+:)?startDate>"#).unwrap()
|
||||
@@ -55,7 +57,8 @@ static MEASURE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?measure>(.*?)</(?:[a-z0-9_\-]+:)?measure>"#).unwrap()
|
||||
});
|
||||
static LABEL_LINK_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?labelLink>"#).unwrap()
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)</(?:[a-z0-9_\-]+:)?labelLink>"#)
|
||||
.unwrap()
|
||||
});
|
||||
static PRESENTATION_LINK_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>(.*?)</(?:[a-z0-9_\-]+:)?presentationLink>"#).unwrap()
|
||||
@@ -67,12 +70,14 @@ static LABEL_RESOURCE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?label\b([^>]*)>(.*?)</(?:[a-z0-9_\-]+:)?label>"#).unwrap()
|
||||
});
|
||||
static LABEL_ARC_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)/?>(?:</(?:[a-z0-9_\-]+:)?labelArc>)?"#).unwrap()
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)/?>(?:</(?:[a-z0-9_\-]+:)?labelArc>)?"#)
|
||||
.unwrap()
|
||||
});
|
||||
static PRESENTATION_ARC_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)/?>(?:</(?:[a-z0-9_\-]+:)?presentationArc>)?"#).unwrap()
|
||||
});
|
||||
static ATTR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"([a-zA-Z0-9:_\-]+)=["']([^"']+)["']"#).unwrap());
|
||||
static ATTR_RE: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r#"([a-zA-Z0-9:_\-]+)=["']([^"']+)["']"#).unwrap());
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -451,7 +456,8 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
|
||||
});
|
||||
};
|
||||
|
||||
let instance_text = fetch_text(&client, &instance_asset.url).context("fetch request failed for XBRL instance")?;
|
||||
let instance_text = fetch_text(&client, &instance_asset.url)
|
||||
.context("fetch request failed for XBRL instance")?;
|
||||
let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone()));
|
||||
|
||||
let mut label_by_concept = HashMap::new();
|
||||
@@ -459,11 +465,9 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
|
||||
let mut source = "xbrl_instance".to_string();
|
||||
let mut parse_error = None;
|
||||
|
||||
for asset in discovered
|
||||
.assets
|
||||
.iter()
|
||||
.filter(|asset| asset.is_selected && (asset.asset_type == "presentation" || asset.asset_type == "label"))
|
||||
{
|
||||
for asset in discovered.assets.iter().filter(|asset| {
|
||||
asset.is_selected && (asset.asset_type == "presentation" || asset.asset_type == "label")
|
||||
}) {
|
||||
match fetch_text(&client, &asset.url) {
|
||||
Ok(content) => {
|
||||
if asset.asset_type == "presentation" {
|
||||
@@ -515,10 +519,15 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
|
||||
pack_selection.pack,
|
||||
&mut compact_model,
|
||||
)?;
|
||||
let kpi_result = kpi_mapper::build_taxonomy_kpis(&materialized.periods, &facts, pack_selection.pack)?;
|
||||
let kpi_result =
|
||||
kpi_mapper::build_taxonomy_kpis(&materialized.periods, &facts, pack_selection.pack)?;
|
||||
compact_model.normalization_summary.kpi_row_count = kpi_result.rows.len();
|
||||
for warning in kpi_result.warnings {
|
||||
if !compact_model.normalization_summary.warnings.contains(&warning) {
|
||||
if !compact_model
|
||||
.normalization_summary
|
||||
.warnings
|
||||
.contains(&warning)
|
||||
{
|
||||
compact_model.normalization_summary.warnings.push(warning);
|
||||
}
|
||||
}
|
||||
@@ -526,7 +535,11 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
|
||||
&mut compact_model.concept_mappings,
|
||||
kpi_result.mapping_assignments,
|
||||
);
|
||||
surface_mapper::apply_mapping_assignments(&mut concepts, &mut facts, &compact_model.concept_mappings);
|
||||
surface_mapper::apply_mapping_assignments(
|
||||
&mut concepts,
|
||||
&mut facts,
|
||||
&compact_model.concept_mappings,
|
||||
);
|
||||
|
||||
let has_rows = materialized
|
||||
.statement_rows
|
||||
@@ -572,7 +585,11 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
|
||||
concepts_count: concepts.len(),
|
||||
dimensions_count: facts
|
||||
.iter()
|
||||
.flat_map(|fact| fact.dimensions.iter().map(|dimension| format!("{}::{}", dimension.axis, dimension.member)))
|
||||
.flat_map(|fact| {
|
||||
fact.dimensions
|
||||
.iter()
|
||||
.map(|dimension| format!("{}::{}", dimension.axis, dimension.member))
|
||||
})
|
||||
.collect::<HashSet<_>>()
|
||||
.len(),
|
||||
assets: discovered.assets,
|
||||
@@ -622,7 +639,10 @@ struct DiscoveredAssets {
|
||||
assets: Vec<AssetOutput>,
|
||||
}
|
||||
|
||||
fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Result<DiscoveredAssets> {
|
||||
fn discover_filing_assets(
|
||||
input: &HydrateFilingRequest,
|
||||
client: &Client,
|
||||
) -> Result<DiscoveredAssets> {
|
||||
let Some(directory_url) = resolve_filing_directory_url(
|
||||
input.filing_url.as_deref(),
|
||||
&input.cik,
|
||||
@@ -631,12 +651,19 @@ fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Resu
|
||||
return Ok(DiscoveredAssets { assets: vec![] });
|
||||
};
|
||||
|
||||
let payload = fetch_json::<FilingDirectoryPayload>(client, &format!("{directory_url}index.json")).ok();
|
||||
let payload =
|
||||
fetch_json::<FilingDirectoryPayload>(client, &format!("{directory_url}index.json")).ok();
|
||||
let mut discovered = Vec::new();
|
||||
|
||||
if let Some(items) = payload.and_then(|payload| payload.directory.and_then(|directory| directory.item)) {
|
||||
if let Some(items) =
|
||||
payload.and_then(|payload| payload.directory.and_then(|directory| directory.item))
|
||||
{
|
||||
for item in items {
|
||||
let Some(name) = item.name.map(|name| name.trim().to_string()).filter(|name| !name.is_empty()) else {
|
||||
let Some(name) = item
|
||||
.name
|
||||
.map(|name| name.trim().to_string())
|
||||
.filter(|name| !name.is_empty())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
@@ -683,12 +710,19 @@ fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Resu
|
||||
score_instance(&asset.name, input.primary_document.as_deref()),
|
||||
)
|
||||
})
|
||||
.max_by(|left, right| left.1.partial_cmp(&right.1).unwrap_or(std::cmp::Ordering::Equal))
|
||||
.max_by(|left, right| {
|
||||
left.1
|
||||
.partial_cmp(&right.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
.map(|entry| entry.0);
|
||||
|
||||
for asset in &mut discovered {
|
||||
asset.score = if asset.asset_type == "instance" {
|
||||
Some(score_instance(&asset.name, input.primary_document.as_deref()))
|
||||
Some(score_instance(
|
||||
&asset.name,
|
||||
input.primary_document.as_deref(),
|
||||
))
|
||||
} else if asset.asset_type == "pdf" {
|
||||
Some(score_pdf(&asset.name, asset.size_bytes))
|
||||
} else {
|
||||
@@ -708,7 +742,11 @@ fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Resu
|
||||
Ok(DiscoveredAssets { assets: discovered })
|
||||
}
|
||||
|
||||
fn resolve_filing_directory_url(filing_url: Option<&str>, cik: &str, accession_number: &str) -> Option<String> {
|
||||
fn resolve_filing_directory_url(
|
||||
filing_url: Option<&str>,
|
||||
cik: &str,
|
||||
accession_number: &str,
|
||||
) -> Option<String> {
|
||||
if let Some(filing_url) = filing_url.map(str::trim).filter(|value| !value.is_empty()) {
|
||||
if let Some(last_slash) = filing_url.rfind('/') {
|
||||
if last_slash > "https://".len() {
|
||||
@@ -725,7 +763,10 @@ fn resolve_filing_directory_url(filing_url: Option<&str>, cik: &str, accession_n
|
||||
}
|
||||
|
||||
fn normalize_cik_for_path(value: &str) -> Option<String> {
|
||||
let digits = value.chars().filter(|char| char.is_ascii_digit()).collect::<String>();
|
||||
let digits = value
|
||||
.chars()
|
||||
.filter(|char| char.is_ascii_digit())
|
||||
.collect::<String>();
|
||||
if digits.is_empty() {
|
||||
return None;
|
||||
}
|
||||
@@ -741,16 +782,25 @@ fn classify_asset_type(name: &str) -> &'static str {
|
||||
return "schema";
|
||||
}
|
||||
if lower.ends_with(".xml") {
|
||||
if lower.ends_with("_pre.xml") || lower.ends_with("-pre.xml") || lower.contains("presentation") {
|
||||
if lower.ends_with("_pre.xml")
|
||||
|| lower.ends_with("-pre.xml")
|
||||
|| lower.contains("presentation")
|
||||
{
|
||||
return "presentation";
|
||||
}
|
||||
if lower.ends_with("_lab.xml") || lower.ends_with("-lab.xml") || lower.contains("label") {
|
||||
return "label";
|
||||
}
|
||||
if lower.ends_with("_cal.xml") || lower.ends_with("-cal.xml") || lower.contains("calculation") {
|
||||
if lower.ends_with("_cal.xml")
|
||||
|| lower.ends_with("-cal.xml")
|
||||
|| lower.contains("calculation")
|
||||
{
|
||||
return "calculation";
|
||||
}
|
||||
if lower.ends_with("_def.xml") || lower.ends_with("-def.xml") || lower.contains("definition") {
|
||||
if lower.ends_with("_def.xml")
|
||||
|| lower.ends_with("-def.xml")
|
||||
|| lower.contains("definition")
|
||||
{
|
||||
return "definition";
|
||||
}
|
||||
return "instance";
|
||||
@@ -779,7 +829,11 @@ fn score_instance(name: &str, primary_document: Option<&str>) -> f64 {
|
||||
score += 5.0;
|
||||
}
|
||||
}
|
||||
if lower.contains("cal") || lower.contains("def") || lower.contains("lab") || lower.contains("pre") {
|
||||
if lower.contains("cal")
|
||||
|| lower.contains("def")
|
||||
|| lower.contains("lab")
|
||||
|| lower.contains("pre")
|
||||
{
|
||||
score -= 3.0;
|
||||
}
|
||||
score
|
||||
@@ -819,7 +873,9 @@ fn fetch_text(client: &Client, url: &str) -> Result<String> {
|
||||
if !response.status().is_success() {
|
||||
return Err(anyhow!("request failed for {url} ({})", response.status()));
|
||||
}
|
||||
response.text().with_context(|| format!("unable to read response body for {url}"))
|
||||
response
|
||||
.text()
|
||||
.with_context(|| format!("unable to read response body for {url}"))
|
||||
}
|
||||
|
||||
fn fetch_json<T: for<'de> Deserialize<'de>>(client: &Client, url: &str) -> Result<T> {
|
||||
@@ -847,17 +903,36 @@ fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> ParsedInstance
|
||||
let mut facts = Vec::new();
|
||||
|
||||
for captures in FACT_RE.captures_iter(raw) {
|
||||
let prefix = captures.get(1).map(|value| value.as_str().trim()).unwrap_or_default();
|
||||
let local_name = captures.get(2).map(|value| value.as_str().trim()).unwrap_or_default();
|
||||
let attrs = captures.get(3).map(|value| value.as_str()).unwrap_or_default();
|
||||
let body = decode_xml_entities(captures.get(4).map(|value| value.as_str()).unwrap_or_default().trim());
|
||||
let prefix = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str().trim())
|
||||
.unwrap_or_default();
|
||||
let local_name = captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str().trim())
|
||||
.unwrap_or_default();
|
||||
let attrs = captures
|
||||
.get(3)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let body = decode_xml_entities(
|
||||
captures
|
||||
.get(4)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default()
|
||||
.trim(),
|
||||
);
|
||||
|
||||
if prefix.is_empty() || local_name.is_empty() || is_xbrl_infrastructure_prefix(prefix) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let attr_map = parse_attrs(attrs);
|
||||
let Some(context_id) = attr_map.get("contextRef").cloned().or_else(|| attr_map.get("contextref").cloned()) else {
|
||||
let Some(context_id) = attr_map
|
||||
.get("contextRef")
|
||||
.cloned()
|
||||
.or_else(|| attr_map.get("contextref").cloned())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
@@ -870,7 +945,10 @@ fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> ParsedInstance
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("urn:unknown:{prefix}"));
|
||||
let context = context_by_id.get(&context_id);
|
||||
let unit_ref = attr_map.get("unitRef").cloned().or_else(|| attr_map.get("unitref").cloned());
|
||||
let unit_ref = attr_map
|
||||
.get("unitRef")
|
||||
.cloned()
|
||||
.or_else(|| attr_map.get("unitref").cloned());
|
||||
let unit = unit_ref
|
||||
.as_ref()
|
||||
.and_then(|unit_ref| unit_by_id.get(unit_ref))
|
||||
@@ -896,8 +974,12 @@ fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> ParsedInstance
|
||||
period_start: context.and_then(|value| value.period_start.clone()),
|
||||
period_end: context.and_then(|value| value.period_end.clone()),
|
||||
period_instant: context.and_then(|value| value.period_instant.clone()),
|
||||
dimensions: context.map(|value| value.dimensions.clone()).unwrap_or_default(),
|
||||
is_dimensionless: context.map(|value| value.dimensions.is_empty()).unwrap_or(true),
|
||||
dimensions: context
|
||||
.map(|value| value.dimensions.clone())
|
||||
.unwrap_or_default(),
|
||||
is_dimensionless: context
|
||||
.map(|value| value.dimensions.is_empty())
|
||||
.unwrap_or(true),
|
||||
source_file: source_file.clone(),
|
||||
});
|
||||
}
|
||||
@@ -916,10 +998,7 @@ fn parse_xbrl_instance(raw: &str, source_file: Option<String>) -> ParsedInstance
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
ParsedInstance {
|
||||
contexts,
|
||||
facts,
|
||||
}
|
||||
ParsedInstance { contexts, facts }
|
||||
}
|
||||
|
||||
fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap<String, String> {
|
||||
@@ -935,7 +1014,10 @@ fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap<String, String
|
||||
.captures_iter(&root_start)
|
||||
{
|
||||
if let (Some(prefix), Some(uri)) = (captures.get(1), captures.get(2)) {
|
||||
map.insert(prefix.as_str().trim().to_string(), uri.as_str().trim().to_string());
|
||||
map.insert(
|
||||
prefix.as_str().trim().to_string(),
|
||||
uri.as_str().trim().to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -946,16 +1028,26 @@ fn parse_contexts(raw: &str) -> HashMap<String, ParsedContext> {
|
||||
let mut contexts = HashMap::new();
|
||||
|
||||
for captures in CONTEXT_RE.captures_iter(raw) {
|
||||
let Some(context_id) = captures.get(1).map(|value| value.as_str().trim().to_string()) else {
|
||||
let Some(context_id) = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str().trim().to_string())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default();
|
||||
let block = captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let (entity_identifier, entity_scheme) = IDENTIFIER_RE
|
||||
.captures(block)
|
||||
.map(|captures| {
|
||||
(
|
||||
captures.get(2).map(|value| decode_xml_entities(value.as_str().trim())),
|
||||
captures.get(1).map(|value| decode_xml_entities(value.as_str().trim())),
|
||||
captures
|
||||
.get(2)
|
||||
.map(|value| decode_xml_entities(value.as_str().trim())),
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| decode_xml_entities(value.as_str().trim())),
|
||||
)
|
||||
})
|
||||
.unwrap_or((None, None));
|
||||
@@ -984,7 +1076,10 @@ fn parse_contexts(raw: &str) -> HashMap<String, ParsedContext> {
|
||||
|
||||
let mut dimensions = Vec::new();
|
||||
if let Some(segment_value) = segment.as_ref() {
|
||||
if let Some(members) = segment_value.get("explicitMembers").and_then(|value| value.as_array()) {
|
||||
if let Some(members) = segment_value
|
||||
.get("explicitMembers")
|
||||
.and_then(|value| value.as_array())
|
||||
{
|
||||
for member in members {
|
||||
if let (Some(axis), Some(member_value)) = (
|
||||
member.get("axis").and_then(|value| value.as_str()),
|
||||
@@ -999,7 +1094,10 @@ fn parse_contexts(raw: &str) -> HashMap<String, ParsedContext> {
|
||||
}
|
||||
}
|
||||
if let Some(scenario_value) = scenario.as_ref() {
|
||||
if let Some(members) = scenario_value.get("explicitMembers").and_then(|value| value.as_array()) {
|
||||
if let Some(members) = scenario_value
|
||||
.get("explicitMembers")
|
||||
.and_then(|value| value.as_array())
|
||||
{
|
||||
for member in members {
|
||||
if let (Some(axis), Some(member_value)) = (
|
||||
member.get("axis").and_then(|value| value.as_str()),
|
||||
@@ -1062,10 +1160,16 @@ fn parse_dimension_container(raw: &str) -> serde_json::Value {
|
||||
fn parse_units(raw: &str) -> HashMap<String, ParsedUnit> {
|
||||
let mut units = HashMap::new();
|
||||
for captures in UNIT_RE.captures_iter(raw) {
|
||||
let Some(id) = captures.get(1).map(|value| value.as_str().trim().to_string()) else {
|
||||
let Some(id) = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str().trim().to_string())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default();
|
||||
let block = captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let measures = MEASURE_RE
|
||||
.captures_iter(block)
|
||||
.filter_map(|captures| captures.get(1))
|
||||
@@ -1097,7 +1201,10 @@ fn parse_attrs(raw: &str) -> HashMap<String, String> {
|
||||
let mut map = HashMap::new();
|
||||
for captures in ATTR_RE.captures_iter(raw) {
|
||||
if let (Some(name), Some(value)) = (captures.get(1), captures.get(2)) {
|
||||
map.insert(name.as_str().to_string(), decode_xml_entities(value.as_str()));
|
||||
map.insert(
|
||||
name.as_str().to_string(),
|
||||
decode_xml_entities(value.as_str()),
|
||||
);
|
||||
}
|
||||
}
|
||||
map
|
||||
@@ -1138,12 +1245,20 @@ fn parse_label_linkbase(raw: &str) -> HashMap<String, String> {
|
||||
let mut preferred = HashMap::<String, (String, i64)>::new();
|
||||
|
||||
for captures in LABEL_LINK_RE.captures_iter(raw) {
|
||||
let block = captures.get(1).map(|value| value.as_str()).unwrap_or_default();
|
||||
let block = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let mut loc_by_label = HashMap::<String, String>::new();
|
||||
let mut resource_by_label = HashMap::<String, (String, Option<String>)>::new();
|
||||
|
||||
for captures in LOC_RE.captures_iter(block) {
|
||||
let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default());
|
||||
let attrs = parse_attrs(
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
let Some(label) = attrs.get("xlink:label").cloned() else {
|
||||
continue;
|
||||
};
|
||||
@@ -1160,14 +1275,24 @@ fn parse_label_linkbase(raw: &str) -> HashMap<String, String> {
|
||||
}
|
||||
|
||||
for captures in LABEL_RESOURCE_RE.captures_iter(block) {
|
||||
let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default());
|
||||
let attrs = parse_attrs(
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
let Some(label) = attrs.get("xlink:label").cloned() else {
|
||||
continue;
|
||||
};
|
||||
let body = decode_xml_entities(captures.get(2).map(|value| value.as_str()).unwrap_or_default())
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
let body = decode_xml_entities(
|
||||
captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
if body.is_empty() {
|
||||
continue;
|
||||
}
|
||||
@@ -1175,7 +1300,12 @@ fn parse_label_linkbase(raw: &str) -> HashMap<String, String> {
|
||||
}
|
||||
|
||||
for captures in LABEL_ARC_RE.captures_iter(block) {
|
||||
let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default());
|
||||
let attrs = parse_attrs(
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
let Some(from) = attrs.get("xlink:from").cloned() else {
|
||||
continue;
|
||||
};
|
||||
@@ -1190,7 +1320,11 @@ fn parse_label_linkbase(raw: &str) -> HashMap<String, String> {
|
||||
};
|
||||
let priority = label_priority(role.as_deref());
|
||||
let current = preferred.get(concept_key).cloned();
|
||||
if current.as_ref().map(|(_, current_priority)| priority > *current_priority).unwrap_or(true) {
|
||||
if current
|
||||
.as_ref()
|
||||
.map(|(_, current_priority)| priority > *current_priority)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
preferred.insert(concept_key.clone(), (label.clone(), priority));
|
||||
}
|
||||
}
|
||||
@@ -1207,18 +1341,31 @@ fn parse_presentation_linkbase(raw: &str) -> Vec<PresentationNode> {
|
||||
let mut rows = Vec::new();
|
||||
|
||||
for captures in PRESENTATION_LINK_RE.captures_iter(raw) {
|
||||
let link_attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default());
|
||||
let link_attrs = parse_attrs(
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
let Some(role_uri) = link_attrs.get("xlink:role").cloned() else {
|
||||
continue;
|
||||
};
|
||||
let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default();
|
||||
let block = captures
|
||||
.get(2)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let mut loc_by_label = HashMap::<String, (String, String, bool)>::new();
|
||||
let mut children_by_label = HashMap::<String, Vec<(String, f64)>>::new();
|
||||
let mut incoming = HashSet::<String>::new();
|
||||
let mut all_referenced = HashSet::<String>::new();
|
||||
|
||||
for captures in LOC_RE.captures_iter(block) {
|
||||
let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default());
|
||||
let attrs = parse_attrs(
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
let Some(label) = attrs.get("xlink:label").cloned() else {
|
||||
continue;
|
||||
};
|
||||
@@ -1228,14 +1375,27 @@ fn parse_presentation_linkbase(raw: &str) -> Vec<PresentationNode> {
|
||||
let Some(qname) = qname_from_href(&href) else {
|
||||
continue;
|
||||
};
|
||||
let Some((concept_key, qname, local_name)) = concept_from_qname(&qname, &namespaces) else {
|
||||
let Some((concept_key, qname, local_name)) = concept_from_qname(&qname, &namespaces)
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
loc_by_label.insert(label, (concept_key, qname, local_name.to_ascii_lowercase().contains("abstract")));
|
||||
loc_by_label.insert(
|
||||
label,
|
||||
(
|
||||
concept_key,
|
||||
qname,
|
||||
local_name.to_ascii_lowercase().contains("abstract"),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
for captures in PRESENTATION_ARC_RE.captures_iter(block) {
|
||||
let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default());
|
||||
let attrs = parse_attrs(
|
||||
captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
let Some(from) = attrs.get("xlink:from").cloned() else {
|
||||
continue;
|
||||
};
|
||||
@@ -1248,8 +1408,16 @@ fn parse_presentation_linkbase(raw: &str) -> Vec<PresentationNode> {
|
||||
let order = attrs
|
||||
.get("order")
|
||||
.and_then(|value| value.parse::<f64>().ok())
|
||||
.unwrap_or_else(|| children_by_label.get(&from).map(|children| children.len() as f64 + 1.0).unwrap_or(1.0));
|
||||
children_by_label.entry(from.clone()).or_default().push((to.clone(), order));
|
||||
.unwrap_or_else(|| {
|
||||
children_by_label
|
||||
.get(&from)
|
||||
.map(|children| children.len() as f64 + 1.0)
|
||||
.unwrap_or(1.0)
|
||||
});
|
||||
children_by_label
|
||||
.entry(from.clone())
|
||||
.or_default()
|
||||
.push((to.clone(), order));
|
||||
incoming.insert(to.clone());
|
||||
all_referenced.insert(from);
|
||||
all_referenced.insert(to);
|
||||
@@ -1281,7 +1449,11 @@ fn parse_presentation_linkbase(raw: &str) -> Vec<PresentationNode> {
|
||||
return;
|
||||
}
|
||||
|
||||
let parent_concept_key = parent_label.and_then(|parent| loc_by_label.get(parent).map(|(concept_key, _, _)| concept_key.clone()));
|
||||
let parent_concept_key = parent_label.and_then(|parent| {
|
||||
loc_by_label
|
||||
.get(parent)
|
||||
.map(|(concept_key, _, _)| concept_key.clone())
|
||||
});
|
||||
rows.push(PresentationNode {
|
||||
concept_key: concept_key.clone(),
|
||||
role_uri: role_uri.to_string(),
|
||||
@@ -1292,7 +1464,11 @@ fn parse_presentation_linkbase(raw: &str) -> Vec<PresentationNode> {
|
||||
});
|
||||
|
||||
let mut children = children_by_label.get(label).cloned().unwrap_or_default();
|
||||
children.sort_by(|left, right| left.1.partial_cmp(&right.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
children.sort_by(|left, right| {
|
||||
left.1
|
||||
.partial_cmp(&right.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
for (index, (child_label, _)) in children.into_iter().enumerate() {
|
||||
dfs(
|
||||
&child_label,
|
||||
@@ -1400,7 +1576,10 @@ fn materialize_taxonomy_statements(
|
||||
.clone()
|
||||
.or_else(|| fact.period_instant.clone())
|
||||
.unwrap_or_else(|| filing_date.to_string());
|
||||
let id = format!("{date}-{compact_accession}-{}", period_by_signature.len() + 1);
|
||||
let id = format!(
|
||||
"{date}-{compact_accession}-{}",
|
||||
period_by_signature.len() + 1
|
||||
);
|
||||
let period_label = if fact.period_instant.is_some() && fact.period_start.is_none() {
|
||||
"Instant".to_string()
|
||||
} else if fact.period_start.is_some() && fact.period_end.is_some() {
|
||||
@@ -1420,7 +1599,10 @@ fn materialize_taxonomy_statements(
|
||||
accession_number: accession_number.to_string(),
|
||||
filing_date: filing_date.to_string(),
|
||||
period_start: fact.period_start.clone(),
|
||||
period_end: fact.period_end.clone().or_else(|| fact.period_instant.clone()),
|
||||
period_end: fact
|
||||
.period_end
|
||||
.clone()
|
||||
.or_else(|| fact.period_instant.clone()),
|
||||
filing_type: filing_type.to_string(),
|
||||
period_label,
|
||||
},
|
||||
@@ -1429,9 +1611,17 @@ fn materialize_taxonomy_statements(
|
||||
|
||||
let mut periods = period_by_signature.values().cloned().collect::<Vec<_>>();
|
||||
periods.sort_by(|left, right| {
|
||||
let left_key = left.period_end.clone().unwrap_or_else(|| left.filing_date.clone());
|
||||
let right_key = right.period_end.clone().unwrap_or_else(|| right.filing_date.clone());
|
||||
left_key.cmp(&right_key).then_with(|| left.id.cmp(&right.id))
|
||||
let left_key = left
|
||||
.period_end
|
||||
.clone()
|
||||
.unwrap_or_else(|| left.filing_date.clone());
|
||||
let right_key = right
|
||||
.period_end
|
||||
.clone()
|
||||
.unwrap_or_else(|| right.filing_date.clone());
|
||||
left_key
|
||||
.cmp(&right_key)
|
||||
.then_with(|| left.id.cmp(&right.id))
|
||||
});
|
||||
let period_id_by_signature = period_by_signature
|
||||
.iter()
|
||||
@@ -1440,7 +1630,10 @@ fn materialize_taxonomy_statements(
|
||||
|
||||
let mut presentation_by_concept = HashMap::<String, Vec<&PresentationNode>>::new();
|
||||
for node in presentation {
|
||||
presentation_by_concept.entry(node.concept_key.clone()).or_default().push(node);
|
||||
presentation_by_concept
|
||||
.entry(node.concept_key.clone())
|
||||
.or_default()
|
||||
.push(node);
|
||||
}
|
||||
|
||||
let mut grouped_by_statement = empty_parsed_fact_map();
|
||||
@@ -1502,9 +1695,13 @@ fn materialize_taxonomy_statements(
|
||||
let mut concepts = Vec::<ConceptOutput>::new();
|
||||
|
||||
for statement_kind in statement_keys() {
|
||||
let concept_groups = grouped_by_statement.remove(statement_kind).unwrap_or_default();
|
||||
let concept_groups = grouped_by_statement
|
||||
.remove(statement_kind)
|
||||
.unwrap_or_default();
|
||||
let mut concept_keys = HashSet::<String>::new();
|
||||
for node in presentation.iter().filter(|node| classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind)) {
|
||||
for node in presentation.iter().filter(|node| {
|
||||
classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind)
|
||||
}) {
|
||||
concept_keys.insert(node.concept_key.clone());
|
||||
}
|
||||
for concept_key in concept_groups.keys() {
|
||||
@@ -1516,12 +1713,21 @@ fn materialize_taxonomy_statements(
|
||||
.map(|concept_key| {
|
||||
let nodes = presentation
|
||||
.iter()
|
||||
.filter(|node| node.concept_key == concept_key && classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind))
|
||||
.filter(|node| {
|
||||
node.concept_key == concept_key
|
||||
&& classify_statement_role(&node.role_uri).as_deref()
|
||||
== Some(statement_kind)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let order = nodes.iter().map(|node| node.order).fold(f64::INFINITY, f64::min);
|
||||
let order = nodes
|
||||
.iter()
|
||||
.map(|node| node.order)
|
||||
.fold(f64::INFINITY, f64::min);
|
||||
let depth = nodes.iter().map(|node| node.depth).min().unwrap_or(0);
|
||||
let role_uri = nodes.first().map(|node| node.role_uri.clone());
|
||||
let parent_concept_key = nodes.first().and_then(|node| node.parent_concept_key.clone());
|
||||
let parent_concept_key = nodes
|
||||
.first()
|
||||
.and_then(|node| node.parent_concept_key.clone());
|
||||
(concept_key, order, depth, role_uri, parent_concept_key)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
@@ -1532,8 +1738,13 @@ fn materialize_taxonomy_statements(
|
||||
.then_with(|| left.0.cmp(&right.0))
|
||||
});
|
||||
|
||||
for (concept_key, presentation_order, depth, role_uri, parent_concept_key) in ordered_concepts {
|
||||
let fact_group = concept_groups.get(&concept_key).cloned().unwrap_or_default();
|
||||
for (concept_key, presentation_order, depth, role_uri, parent_concept_key) in
|
||||
ordered_concepts
|
||||
{
|
||||
let fact_group = concept_groups
|
||||
.get(&concept_key)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
let (namespace_uri, local_name) = split_concept_key(&concept_key);
|
||||
let qname = fact_group
|
||||
.first()
|
||||
@@ -1672,7 +1883,13 @@ fn empty_detail_row_map() -> DetailRowStatementMap {
|
||||
}
|
||||
|
||||
fn statement_keys() -> [&'static str; 5] {
|
||||
["income", "balance", "cash_flow", "equity", "comprehensive_income"]
|
||||
[
|
||||
"income",
|
||||
"balance",
|
||||
"cash_flow",
|
||||
"equity",
|
||||
"comprehensive_income",
|
||||
]
|
||||
}
|
||||
|
||||
fn statement_key_ref(value: &str) -> Option<&'static str> {
|
||||
@@ -1709,7 +1926,13 @@ fn pick_preferred_fact(grouped_facts: &[(i64, ParsedFact)]) -> Option<&(i64, Par
|
||||
.unwrap_or_default();
|
||||
left_date.cmp(&right_date)
|
||||
})
|
||||
.then_with(|| left.1.value.abs().partial_cmp(&right.1.value.abs()).unwrap_or(std::cmp::Ordering::Equal))
|
||||
.then_with(|| {
|
||||
left.1
|
||||
.value
|
||||
.abs()
|
||||
.partial_cmp(&right.1.value.abs())
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1779,12 +2002,6 @@ fn classify_statement_role(role_uri: &str) -> Option<String> {
|
||||
|
||||
fn concept_statement_fallback(local_name: &str) -> Option<String> {
|
||||
let normalized = local_name.to_ascii_lowercase();
|
||||
if Regex::new(r#"cash|operatingactivities|investingactivities|financingactivities"#)
|
||||
.unwrap()
|
||||
.is_match(&normalized)
|
||||
{
|
||||
return Some("cash_flow".to_string());
|
||||
}
|
||||
if Regex::new(r#"equity|retainedearnings|additionalpaidincapital"#)
|
||||
.unwrap()
|
||||
.is_match(&normalized)
|
||||
@@ -1794,6 +2011,22 @@ fn concept_statement_fallback(local_name: &str) -> Option<String> {
|
||||
if normalized.contains("comprehensiveincome") {
|
||||
return Some("comprehensive_income".to_string());
|
||||
}
|
||||
if Regex::new(
|
||||
r#"deferredpolicyacquisitioncosts(andvalueofbusinessacquired)?$|supplementaryinsuranceinformationdeferredpolicyacquisitioncosts$|deferredacquisitioncosts$"#,
|
||||
)
|
||||
.unwrap()
|
||||
.is_match(&normalized)
|
||||
{
|
||||
return Some("balance".to_string());
|
||||
}
|
||||
if Regex::new(
|
||||
r#"netcashprovidedbyusedin.*activities|increasedecreasein|paymentstoacquire|paymentsforcapitalimprovements$|paymentsfordepositsonrealestateacquisitions$|paymentsforrepurchase|paymentsofdividends|dividendscommonstockcash$|proceedsfrom|repaymentsofdebt|sharebasedcompensation$|allocatedsharebasedcompensationexpense$|depreciationdepletionandamortization$|depreciationamortizationandaccretionnet$|depreciationandamortization$|depreciationamortizationandother$|otheradjustmentstoreconcilenetincomelosstocashprovidedbyusedinoperatingactivities"#,
|
||||
)
|
||||
.unwrap()
|
||||
.is_match(&normalized)
|
||||
{
|
||||
return Some("cash_flow".to_string());
|
||||
}
|
||||
if Regex::new(
|
||||
r#"asset|liabilit|debt|financingreceivable|loansreceivable|deposits|allowanceforcreditloss|futurepolicybenefits|policyholderaccountbalances|unearnedpremiums|realestateinvestmentproperty|grossatcarryingvalue|investmentproperty"#,
|
||||
)
|
||||
@@ -1967,7 +2200,10 @@ mod tests {
|
||||
vec![],
|
||||
)
|
||||
.expect("core pack should load and map");
|
||||
let income_surface_rows = model.surface_rows.get("income").expect("income surface rows");
|
||||
let income_surface_rows = model
|
||||
.surface_rows
|
||||
.get("income")
|
||||
.expect("income surface rows");
|
||||
let op_expenses = income_surface_rows
|
||||
.iter()
|
||||
.find(|row| row.key == "operating_expenses")
|
||||
@@ -1978,7 +2214,10 @@ mod tests {
|
||||
.expect("revenue surface row");
|
||||
|
||||
assert_eq!(revenue.values.get("2025").copied().flatten(), Some(120.0));
|
||||
assert_eq!(op_expenses.values.get("2024").copied().flatten(), Some(40.0));
|
||||
assert_eq!(
|
||||
op_expenses.values.get("2024").copied().flatten(),
|
||||
Some(40.0)
|
||||
);
|
||||
assert_eq!(op_expenses.detail_count, Some(2));
|
||||
|
||||
let operating_expense_details = model
|
||||
@@ -1987,8 +2226,12 @@ mod tests {
|
||||
.and_then(|groups| groups.get("operating_expenses"))
|
||||
.expect("operating expenses details");
|
||||
assert_eq!(operating_expense_details.len(), 2);
|
||||
assert!(operating_expense_details.iter().any(|row| row.key == "sga-row"));
|
||||
assert!(operating_expense_details.iter().any(|row| row.key == "rd-row"));
|
||||
assert!(operating_expense_details
|
||||
.iter()
|
||||
.any(|row| row.key == "sga-row"));
|
||||
assert!(operating_expense_details
|
||||
.iter()
|
||||
.any(|row| row.key == "rd-row"));
|
||||
|
||||
let residual_rows = model
|
||||
.detail_rows
|
||||
@@ -2003,17 +2246,26 @@ mod tests {
|
||||
.concept_mappings
|
||||
.get("http://fasb.org/us-gaap/2024#ResearchAndDevelopmentExpense")
|
||||
.expect("rd mapping");
|
||||
assert_eq!(rd_mapping.detail_parent_surface_key.as_deref(), Some("operating_expenses"));
|
||||
assert_eq!(rd_mapping.surface_key.as_deref(), Some("operating_expenses"));
|
||||
assert_eq!(
|
||||
rd_mapping.detail_parent_surface_key.as_deref(),
|
||||
Some("operating_expenses")
|
||||
);
|
||||
assert_eq!(
|
||||
rd_mapping.surface_key.as_deref(),
|
||||
Some("operating_expenses")
|
||||
);
|
||||
|
||||
let residual_mapping = model
|
||||
.concept_mappings
|
||||
.get("urn:company#OtherOperatingCharges")
|
||||
.expect("residual mapping");
|
||||
assert!(residual_mapping.residual_flag);
|
||||
assert_eq!(residual_mapping.detail_parent_surface_key.as_deref(), Some("unmapped"));
|
||||
assert_eq!(
|
||||
residual_mapping.detail_parent_surface_key.as_deref(),
|
||||
Some("unmapped")
|
||||
);
|
||||
|
||||
assert_eq!(model.normalization_summary.surface_row_count, 5);
|
||||
assert_eq!(model.normalization_summary.surface_row_count, 6);
|
||||
assert_eq!(model.normalization_summary.detail_row_count, 3);
|
||||
assert_eq!(model.normalization_summary.unmapped_row_count, 1);
|
||||
}
|
||||
@@ -2051,18 +2303,60 @@ mod tests {
|
||||
#[test]
|
||||
fn classifies_pack_specific_concepts_without_presentation_roles() {
|
||||
assert_eq!(
|
||||
concept_statement_fallback("FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss")
|
||||
.as_deref(),
|
||||
concept_statement_fallback(
|
||||
"FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss"
|
||||
)
|
||||
.as_deref(),
|
||||
Some("balance")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("Deposits").as_deref(),
|
||||
Some("balance")
|
||||
);
|
||||
assert_eq!(concept_statement_fallback("Deposits").as_deref(), Some("balance"));
|
||||
assert_eq!(
|
||||
concept_statement_fallback("RealEstateInvestmentPropertyNet").as_deref(),
|
||||
Some("balance")
|
||||
);
|
||||
assert_eq!(concept_statement_fallback("LeaseIncome").as_deref(), Some("income"));
|
||||
assert_eq!(
|
||||
concept_statement_fallback("DirectCostsOfLeasedAndRentedPropertyOrEquipment").as_deref(),
|
||||
concept_statement_fallback("DeferredPolicyAcquisitionCosts").as_deref(),
|
||||
Some("balance")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("DeferredPolicyAcquisitionCostsAndValueOfBusinessAcquired")
|
||||
.as_deref(),
|
||||
Some("balance")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("IncreaseDecreaseInAccountsReceivable").as_deref(),
|
||||
Some("cash_flow")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("PaymentsOfDividends").as_deref(),
|
||||
Some("cash_flow")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("RepaymentsOfDebt").as_deref(),
|
||||
Some("cash_flow")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("ShareBasedCompensation").as_deref(),
|
||||
Some("cash_flow")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("PaymentsForCapitalImprovements").as_deref(),
|
||||
Some("cash_flow")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("PaymentsForDepositsOnRealEstateAcquisitions").as_deref(),
|
||||
Some("cash_flow")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("LeaseIncome").as_deref(),
|
||||
Some("income")
|
||||
);
|
||||
assert_eq!(
|
||||
concept_statement_fallback("DirectCostsOfLeasedAndRentedPropertyOrEquipment")
|
||||
.as_deref(),
|
||||
Some("income")
|
||||
);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +1,22 @@
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::pack_selector::FiscalPack;
|
||||
|
||||
fn default_include_in_output() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SurfaceSignTransform {
|
||||
Invert,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct SurfacePackFile {
|
||||
pub version: String,
|
||||
@@ -25,9 +35,44 @@ pub struct SurfaceDefinition {
|
||||
pub rollup_policy: String,
|
||||
pub allowed_source_concepts: Vec<String>,
|
||||
pub allowed_authoritative_concepts: Vec<String>,
|
||||
pub formula_fallback: Option<serde_json::Value>,
|
||||
pub formula_fallback: Option<SurfaceFormulaFallback>,
|
||||
pub detail_grouping_policy: String,
|
||||
pub materiality_policy: String,
|
||||
#[serde(default = "default_include_in_output")]
|
||||
pub include_in_output: bool,
|
||||
#[serde(default)]
|
||||
pub sign_transform: Option<SurfaceSignTransform>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[serde(untagged)]
|
||||
pub enum SurfaceFormulaFallback {
|
||||
LegacyString(#[allow(dead_code)] String),
|
||||
Structured(SurfaceFormula),
|
||||
}
|
||||
|
||||
impl SurfaceFormulaFallback {
|
||||
pub fn structured(&self) -> Option<&SurfaceFormula> {
|
||||
match self {
|
||||
Self::Structured(formula) => Some(formula),
|
||||
Self::LegacyString(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct SurfaceFormula {
|
||||
pub op: SurfaceFormulaOp,
|
||||
pub sources: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub treat_null_as_zero: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SurfaceFormulaOp {
|
||||
Sum,
|
||||
Subtract,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
@@ -147,7 +192,9 @@ pub fn resolve_taxonomy_dir() -> Result<PathBuf> {
|
||||
candidates
|
||||
.into_iter()
|
||||
.find(|path| path.is_dir())
|
||||
.ok_or_else(|| anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory"))
|
||||
.ok_or_else(|| {
|
||||
anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory")
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load_surface_pack(pack: FiscalPack) -> Result<SurfacePackFile> {
|
||||
@@ -156,14 +203,52 @@ pub fn load_surface_pack(pack: FiscalPack) -> Result<SurfacePackFile> {
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join(format!("{}.surface.json", pack.as_str()));
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<SurfacePackFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let mut file = load_surface_pack_file(&path)?;
|
||||
|
||||
if !matches!(pack, FiscalPack::Core) {
|
||||
let core_path = taxonomy_dir
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join("core.surface.json");
|
||||
let core_file = load_surface_pack_file(&core_path)?;
|
||||
let pack_inherited_keys = file
|
||||
.surfaces
|
||||
.iter()
|
||||
.filter(|surface| surface.statement == "balance" || surface.statement == "cash_flow")
|
||||
.map(|surface| (surface.statement.clone(), surface.surface_key.clone()))
|
||||
.collect::<std::collections::HashSet<_>>();
|
||||
|
||||
file.surfaces.extend(
|
||||
core_file
|
||||
.surfaces
|
||||
.into_iter()
|
||||
.filter(|surface| surface.statement == "balance" || surface.statement == "cash_flow")
|
||||
.filter(|surface| {
|
||||
!pack_inherited_keys
|
||||
.contains(&(surface.statement.clone(), surface.surface_key.clone()))
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
let _ = (&file.version, &file.pack);
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
fn load_surface_pack_file(path: &PathBuf) -> Result<SurfacePackFile> {
|
||||
let raw = fs::read_to_string(path).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to read {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
serde_json::from_str::<SurfacePackFile>(&raw).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to parse {}",
|
||||
path.display()
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load_crosswalk(regime: &str) -> Result<Option<CrosswalkFile>> {
|
||||
let file_name = match regime {
|
||||
"us-gaap" => "us-gaap.json",
|
||||
@@ -173,10 +258,18 @@ pub fn load_crosswalk(regime: &str) -> Result<Option<CrosswalkFile>> {
|
||||
|
||||
let taxonomy_dir = resolve_taxonomy_dir()?;
|
||||
let path = taxonomy_dir.join("crosswalk").join(file_name);
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<CrosswalkFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let raw = fs::read_to_string(&path).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to read {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let file = serde_json::from_str::<CrosswalkFile>(&raw).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to parse {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let _ = (&file.version, &file.regime);
|
||||
Ok(Some(file))
|
||||
}
|
||||
@@ -188,10 +281,18 @@ pub fn load_kpi_pack(pack: FiscalPack) -> Result<KpiPackFile> {
|
||||
.join("v1")
|
||||
.join("kpis")
|
||||
.join(format!("{}.kpis.json", pack.as_str()));
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<KpiPackFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let raw = fs::read_to_string(&path).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to read {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let file = serde_json::from_str::<KpiPackFile>(&raw).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to parse {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let _ = (&file.version, &file.pack);
|
||||
Ok(file)
|
||||
}
|
||||
@@ -202,10 +303,18 @@ pub fn load_universal_income_definitions() -> Result<UniversalIncomeFile> {
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join("universal_income.surface.json");
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<UniversalIncomeFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let raw = fs::read_to_string(&path).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to read {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let file = serde_json::from_str::<UniversalIncomeFile>(&raw).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to parse {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let _ = &file.version;
|
||||
Ok(file)
|
||||
}
|
||||
@@ -216,10 +325,18 @@ pub fn load_income_bridge(pack: FiscalPack) -> Result<IncomeBridgeFile> {
|
||||
.join("fiscal")
|
||||
.join("v1")
|
||||
.join(format!("{}.income-bridge.json", pack.as_str()));
|
||||
let raw = fs::read_to_string(&path)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?;
|
||||
let file = serde_json::from_str::<IncomeBridgeFile>(&raw)
|
||||
.with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?;
|
||||
let raw = fs::read_to_string(&path).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to read {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let file = serde_json::from_str::<IncomeBridgeFile>(&raw).with_context(|| {
|
||||
format!(
|
||||
"taxonomy resolution failed: unable to parse {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let _ = (&file.version, &file.pack);
|
||||
Ok(file)
|
||||
}
|
||||
@@ -230,17 +347,20 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resolves_taxonomy_dir_and_loads_core_pack() {
|
||||
let taxonomy_dir = resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests");
|
||||
let taxonomy_dir =
|
||||
resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests");
|
||||
assert!(taxonomy_dir.exists());
|
||||
|
||||
let surface_pack = load_surface_pack(FiscalPack::Core).expect("core surface pack should load");
|
||||
let surface_pack =
|
||||
load_surface_pack(FiscalPack::Core).expect("core surface pack should load");
|
||||
assert_eq!(surface_pack.pack, "core");
|
||||
assert!(!surface_pack.surfaces.is_empty());
|
||||
|
||||
let kpi_pack = load_kpi_pack(FiscalPack::Core).expect("core kpi pack should load");
|
||||
assert_eq!(kpi_pack.pack, "core");
|
||||
|
||||
let universal_income = load_universal_income_definitions().expect("universal income config should load");
|
||||
let universal_income =
|
||||
load_universal_income_definitions().expect("universal income config should load");
|
||||
assert!(!universal_income.rows.is_empty());
|
||||
|
||||
let core_bridge = load_income_bridge(FiscalPack::Core).expect("core bridge should load");
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user