feat(taxonomy): add rust sidecar compact surface pipeline
This commit is contained in:
99
rust/vendor/crabrl/src/simple_parser.rs
vendored
Normal file
99
rust/vendor/crabrl/src/simple_parser.rs
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
//! Simple working XBRL parser
|
||||
|
||||
use crate::{model::*, Result};
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Parser {
|
||||
#[allow(dead_code)]
|
||||
load_linkbases: bool,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn parse_str(&self, content: &str) -> Result<Document> {
|
||||
self.parse_bytes(content.as_bytes())
|
||||
}
|
||||
|
||||
pub fn parse_file<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_bytes(&content)
|
||||
}
|
||||
|
||||
pub fn parse_bytes(&self, data: &[u8]) -> Result<Document> {
|
||||
// Simple XML parsing - just count elements for now
|
||||
let text = String::from_utf8_lossy(data);
|
||||
|
||||
// Count facts (very simplified)
|
||||
let fact_count = text.matches("<us-gaap:").count()
|
||||
+ text.matches("<dei:").count()
|
||||
+ text.matches("<ifrs:").count();
|
||||
|
||||
// Count contexts
|
||||
let context_count =
|
||||
text.matches("<context ").count() + text.matches("<xbrli:context").count();
|
||||
|
||||
// Count units
|
||||
let unit_count = text.matches("<unit ").count() + text.matches("<xbrli:unit").count();
|
||||
|
||||
// Create dummy document with approximate counts
|
||||
let mut doc = Document {
|
||||
facts: FactStorage {
|
||||
concept_ids: vec![0; fact_count],
|
||||
context_ids: vec![0; fact_count],
|
||||
unit_ids: vec![0; fact_count],
|
||||
values: vec![FactValue::Text(String::from("")); fact_count],
|
||||
decimals: vec![None; fact_count],
|
||||
ids: vec![None; fact_count],
|
||||
footnote_refs: vec![],
|
||||
},
|
||||
contexts: Vec::with_capacity(context_count),
|
||||
units: Vec::with_capacity(unit_count),
|
||||
tuples: Vec::new(),
|
||||
footnotes: Vec::new(),
|
||||
presentation_links: Vec::new(),
|
||||
calculation_links: Vec::new(),
|
||||
definition_links: Vec::new(),
|
||||
label_links: Vec::new(),
|
||||
reference_links: Vec::new(),
|
||||
custom_links: Vec::new(),
|
||||
role_types: Vec::new(),
|
||||
arcrole_types: Vec::new(),
|
||||
schemas: Vec::new(),
|
||||
dimensions: Vec::new(),
|
||||
concept_names: Vec::new(),
|
||||
};
|
||||
|
||||
// Add dummy contexts
|
||||
for i in 0..context_count {
|
||||
doc.contexts.push(Context {
|
||||
id: String::from(&format!("ctx{}", i)),
|
||||
entity: Entity {
|
||||
identifier: String::from("0000000000"),
|
||||
scheme: String::from("http://www.sec.gov/CIK"),
|
||||
segment: None,
|
||||
},
|
||||
period: Period::Instant {
|
||||
date: String::from("2023-12-31"),
|
||||
},
|
||||
scenario: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Add dummy units
|
||||
for i in 0..unit_count {
|
||||
doc.units.push(Unit {
|
||||
id: String::from(&format!("unit{}", i)),
|
||||
unit_type: UnitType::Simple(vec![Measure {
|
||||
namespace: String::from("iso4217"),
|
||||
name: String::from("USD"),
|
||||
}]),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(doc)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user