feat(taxonomy): add rust sidecar compact surface pipeline

2026-03-12 15:23:10 -04:00
parent f2c25fb9c6
commit 58061af006
84 changed files with 19350 additions and 265 deletions
--- a/rust/vendor/crabrl/src/allocator.rs
+++ b/rust/vendor/crabrl/src/allocator.rs
@@ -0,0 +1,177 @@
+use bumpalo::Bump;
+use std::cell::RefCell;
+use std::mem::MaybeUninit;
+use std::ptr::NonNull;
+use std::sync::Arc;
+use parking_lot::Mutex;
+use string_interner::{DefaultBackend, Symbol};
+use string_interner::symbol::SymbolU32;
+
+const ARENA_SIZE: usize = 64 * 1024 * 1024; // 64MB arenas
+const POOL_SIZE: usize = 1024;
+
+#[repr(align(64))]
+pub struct ArenaAllocator {
+    current: RefCell<Bump>,
+    arenas: RefCell<Vec<Bump>>,
+    string_interner: Arc<Mutex<string_interner::StringInterner<DefaultBackend>>>,
+}
+
+impl ArenaAllocator {
+    pub fn new() -> Self {
+        Self {
+            current: RefCell::new(Bump::with_capacity(ARENA_SIZE)),
+            arenas: RefCell::new(Vec::with_capacity(16)),
+            string_interner: Arc::new(Mutex::new(string_interner::StringInterner::new())),
+        }
+    }
+
+    #[inline(always)]
+    pub fn alloc<T>(&self, val: T) -> &T {
+        unsafe {
+            let ptr = self.current.borrow().alloc(val) as *const T;
+            &*ptr
+        }
+    }
+
+    #[inline(always)]
+    pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &[T] {
+        unsafe {
+            let ptr = self.current.borrow().alloc_slice_copy(slice) as *const [T];
+            &*ptr
+        }
+    }
+
+    #[inline(always)]
+    pub fn alloc_str(&self, s: &str) -> &str {
+        unsafe {
+            let ptr = self.current.borrow().alloc_str(s) as *const str;
+            &*ptr
+        }
+    }
+
+    #[inline(always)]
+    pub fn intern_string(&self, s: &str) -> u32 {
+        let mut interner = self.string_interner.lock();
+        interner.get_or_intern(s).to_usize() as u32
+    }
+
+    #[inline(always)]
+    pub fn get_interned(&self, id: u32) -> Option<String> {
+        let interner = self.string_interner.lock();
+        let symbol = SymbolU32::try_from_usize(id as usize)?;
+        interner.resolve(symbol)
+            .map(|s| s.to_string())
+    }
+
+    pub fn reset(&self) {
+        let mut current = self.current.borrow_mut();
+        current.reset();
+        
+        let mut arenas = self.arenas.borrow_mut();
+        for arena in arenas.iter_mut() {
+            arena.reset();
+        }
+    }
+
+    pub fn new_arena(&self) {
+        let mut arenas = self.arenas.borrow_mut();
+        let old = std::mem::replace(&mut *self.current.borrow_mut(), 
+                                    Bump::with_capacity(ARENA_SIZE));
+        arenas.push(old);
+    }
+}
+
+pub struct ObjectPool<T> {
+    pool: Vec<Box<T>>,
+    factory: fn() -> T,
+}
+
+impl<T> ObjectPool<T> {
+    pub fn new(capacity: usize, factory: fn() -> T) -> Self {
+        let mut pool = Vec::with_capacity(capacity);
+        for _ in 0..capacity {
+            pool.push(Box::new(factory()));
+        }
+        Self { pool, factory }
+    }
+
+    #[inline(always)]
+    pub fn acquire(&mut self) -> Box<T> {
+        self.pool.pop().unwrap_or_else(|| Box::new((self.factory)()))
+    }
+
+    #[inline(always)]
+    pub fn release(&mut self, obj: Box<T>) {
+        if self.pool.len() < POOL_SIZE {
+            self.pool.push(obj);
+        }
+    }
+}
+
+#[repr(C, align(64))]
+pub struct StackBuffer<const N: usize> {
+    data: [MaybeUninit<u8>; N],
+    len: usize,
+}
+
+impl<const N: usize> StackBuffer<N> {
+    #[inline(always)]
+    pub const fn new() -> Self {
+        Self {
+            data: unsafe { MaybeUninit::uninit().assume_init() },
+            len: 0,
+        }
+    }
+
+    #[inline(always)]
+    pub fn push(&mut self, byte: u8) -> bool {
+        if self.len < N {
+            self.data[self.len] = MaybeUninit::new(byte);
+            self.len += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    #[inline(always)]
+    pub fn as_slice(&self) -> &[u8] {
+        unsafe {
+            std::slice::from_raw_parts(
+                self.data.as_ptr() as *const u8,
+                self.len
+            )
+        }
+    }
+
+    #[inline(always)]
+    pub fn clear(&mut self) {
+        self.len = 0;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_arena_allocator() {
+        let arena = ArenaAllocator::new();
+        let s1 = arena.alloc_str("hello");
+        let s2 = arena.alloc_str("world");
+        assert_eq!(s1, "hello");
+        assert_eq!(s2, "world");
+    }
+
+    #[test]
+    fn test_string_interning() {
+        let arena = ArenaAllocator::new();
+        let id1 = arena.intern_string("test");
+        let id2 = arena.intern_string("test");
+        assert_eq!(id1, id2);
+        
+        let s = arena.get_interned(id1).unwrap();
+        assert_eq!(s, "test");
+    }
+}
--- a/rust/vendor/crabrl/src/cache.rs
+++ b/rust/vendor/crabrl/src/cache.rs
@@ -0,0 +1,47 @@
+use dashmap::DashMap;
+use std::sync::Arc;
+use std::hash::Hash;
+
+pub struct LockFreeCache<K, V> {
+    map: Arc<DashMap<K, V>>,
+    capacity: usize,
+}
+
+impl<K, V> LockFreeCache<K, V>
+where
+    K: Eq + Hash + Clone,
+    V: Clone,
+{
+    pub fn new(capacity: usize) -> Self {
+        Self {
+            map: Arc::new(DashMap::with_capacity(capacity)),
+            capacity,
+        }
+    }
+
+    #[inline(always)]
+    pub fn get(&self, key: &K) -> Option<V> {
+        self.map.get(key).map(|v| v.clone())
+    }
+
+    #[inline(always)]
+    pub fn insert(&self, key: K, value: V) {
+        if self.map.len() >= self.capacity {
+            if let Some(entry) = self.map.iter().next() {
+                let k = entry.key().clone();
+                drop(entry);
+                self.map.remove(&k);
+            }
+        }
+        self.map.insert(key, value);
+    }
+
+    #[inline(always)]
+    pub fn contains(&self, key: &K) -> bool {
+        self.map.contains_key(key)
+    }
+
+    pub fn clear(&self) {
+        self.map.clear();
+    }
+}
--- a/rust/vendor/crabrl/src/instance.rs
+++ b/rust/vendor/crabrl/src/instance.rs
@@ -0,0 +1,21 @@
+use crate::model::Document;
+use crate::Result;
+
+pub struct InstanceValidator {
+    strict: bool,
+}
+
+impl InstanceValidator {
+    pub fn new() -> Self {
+        Self { strict: false }
+    }
+
+    pub fn with_strict(mut self, strict: bool) -> Self {
+        self.strict = strict;
+        self
+    }
+
+    pub fn validate(&self, _document: &Document) -> Result<()> {
+        Ok(())
+    }
+}
--- a/rust/vendor/crabrl/src/lib.rs
+++ b/rust/vendor/crabrl/src/lib.rs
@@ -0,0 +1,123 @@
+//! crabrl - High-performance XBRL parser and validator
+//!
+//! Licensed under AGPL-3.0
+
+pub mod model;
+pub mod simple_parser;
+pub mod validator;
+
+// Use simple parser for now
+pub use simple_parser::Parser;
+
+// Re-export main types
+pub use model::{Context, Document, Fact, Unit};
+
+// Create validator wrapper for the CLI
+#[derive(Default)]
+pub struct Validator {
+    inner: validator::XbrlValidator,
+    #[allow(dead_code)]
+    strict: bool,
+}
+
+impl Validator {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_config(config: ValidationConfig) -> Self {
+        let mut inner = validator::XbrlValidator::new();
+        if config.strict {
+            inner = inner.strict();
+        }
+        Self {
+            inner,
+            strict: config.strict,
+        }
+    }
+
+    pub fn sec_edgar() -> Self {
+        Self {
+            inner: validator::XbrlValidator::new().strict(),
+            strict: true,
+        }
+    }
+
+    pub fn validate(&self, doc: &Document) -> Result<ValidationResult> {
+        let start = std::time::Instant::now();
+
+        // Clone doc for validation (validator mutates it)
+        let mut doc_copy = doc.clone();
+
+        // Run validation
+        let is_valid = self.inner.validate(&mut doc_copy).is_ok();
+
+        Ok(ValidationResult {
+            is_valid,
+            errors: if is_valid {
+                Vec::new()
+            } else {
+                vec!["Validation failed".to_string()]
+            },
+            warnings: Vec::new(),
+            stats: ValidationStats {
+                facts_validated: doc.facts.len(),
+                duration_ms: start.elapsed().as_millis() as u64,
+            },
+        })
+    }
+}
+
+/// Simple validation config for CLI
+#[derive(Default)]
+pub struct ValidationConfig {
+    pub strict: bool,
+}
+
+impl ValidationConfig {
+    pub fn sec_edgar() -> Self {
+        Self { strict: true }
+    }
+}
+
+/// Simple validation result for CLI
+pub struct ValidationResult {
+    pub is_valid: bool,
+    pub errors: Vec<String>,
+    pub warnings: Vec<String>,
+    pub stats: ValidationStats,
+}
+
+pub struct ValidationStats {
+    pub facts_validated: usize,
+    pub duration_ms: u64,
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(Debug)]
+pub enum Error {
+    Io(std::io::Error),
+    Parse(String),
+    Validation(String),
+    NotFound(String),
+}
+
+impl std::fmt::Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Error::Io(e) => write!(f, "IO error: {}", e),
+            Error::Parse(s) => write!(f, "Parse error: {}", s),
+            Error::Validation(s) => write!(f, "Validation error: {}", s),
+            Error::NotFound(s) => write!(f, "Not found: {}", s),
+        }
+    }
+}
+
+impl std::error::Error for Error {}
+
+impl From<std::io::Error> for Error {
+    fn from(err: std::io::Error) -> Self {
+        Error::Io(err)
+    }
+}
--- a/rust/vendor/crabrl/src/linkbase.rs
+++ b/rust/vendor/crabrl/src/linkbase.rs
@@ -0,0 +1,438 @@
+// Linkbase processing for XBRL
+use crate::{Error, Result, model::*};
+use compact_str::CompactString;
+use std::collections::HashMap;
+use std::path::Path;
+
+pub struct LinkbaseProcessor {
+    presentation_links: HashMap<CompactString, Vec<PresentationLink>>,
+    calculation_links: HashMap<CompactString, Vec<CalculationLink>>,
+    definition_links: HashMap<CompactString, Vec<DefinitionLink>>,
+    label_links: HashMap<CompactString, Vec<LabelLink>>,
+    reference_links: HashMap<CompactString, Vec<ReferenceLink>>,
+}
+
+impl LinkbaseProcessor {
+    pub fn new() -> Self {
+        Self {
+            presentation_links: HashMap::new(),
+            calculation_links: HashMap::new(),
+            definition_links: HashMap::new(),
+            label_links: HashMap::new(),
+            reference_links: HashMap::new(),
+        }
+    }
+
+    pub fn load_linkbase<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
+        let content = std::fs::read(path)?;
+        self.parse_linkbase(&content)
+    }
+
+    pub fn parse_linkbase(&mut self, data: &[u8]) -> Result<()> {
+        // Skip BOM if present
+        let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
+            &data[3..]
+        } else {
+            data
+        };
+
+        let text = std::str::from_utf8(data)
+            .map_err(|_| Error::Parse("Invalid UTF-8 in linkbase".to_string()))?;
+
+        // Detect linkbase type and parse accordingly
+        if text.contains("presentationLink") {
+            self.parse_presentation_linkbase(text)?;
+        }
+        if text.contains("calculationLink") {
+            self.parse_calculation_linkbase(text)?;
+        }
+        if text.contains("definitionLink") {
+            self.parse_definition_linkbase(text)?;
+        }
+        if text.contains("labelLink") {
+            self.parse_label_linkbase(text)?;
+        }
+        if text.contains("referenceLink") {
+            self.parse_reference_linkbase(text)?;
+        }
+
+        Ok(())
+    }
+
+    fn parse_presentation_linkbase(&mut self, text: &str) -> Result<()> {
+        // Parse presentation arcs
+        let mut pos = 0;
+        while let Some(arc_start) = text[pos..].find("<link:presentationArc") {
+            let arc_start = pos + arc_start;
+            pos = arc_start + 1;
+
+            if let Some(arc_end) = text[arc_start..].find("/>") {
+                let arc_text = &text[arc_start..arc_start + arc_end];
+                
+                let mut link = PresentationLink {
+                    from: CompactString::new(""),
+                    to: CompactString::new(""),
+                    order: 1.0,
+                    priority: None,
+                    use_attribute: None,
+                };
+
+                // Extract from
+                if let Some(from_start) = arc_text.find("xlink:from=\"") {
+                    let from_start = from_start + 12;
+                    if let Some(from_end) = arc_text[from_start..].find('"') {
+                        link.from = CompactString::from(&arc_text[from_start..from_start + from_end]);
+                    }
+                }
+
+                // Extract to
+                if let Some(to_start) = arc_text.find("xlink:to=\"") {
+                    let to_start = to_start + 10;
+                    if let Some(to_end) = arc_text[to_start..].find('"') {
+                        link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
+                    }
+                }
+
+                // Extract order
+                if let Some(order_start) = arc_text.find("order=\"") {
+                    let order_start = order_start + 7;
+                    if let Some(order_end) = arc_text[order_start..].find('"') {
+                        if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
+                            link.order = order;
+                        }
+                    }
+                }
+
+                // Extract priority
+                if let Some(priority_start) = arc_text.find("priority=\"") {
+                    let priority_start = priority_start + 10;
+                    if let Some(priority_end) = arc_text[priority_start..].find('"') {
+                        if let Ok(priority) = arc_text[priority_start..priority_start + priority_end].parse() {
+                            link.priority = Some(priority);
+                        }
+                    }
+                }
+
+                // Extract use
+                if let Some(use_start) = arc_text.find("use=\"") {
+                    let use_start = use_start + 5;
+                    if let Some(use_end) = arc_text[use_start..].find('"') {
+                        link.use_attribute = Some(CompactString::from(&arc_text[use_start..use_start + use_end]));
+                    }
+                }
+
+                self.presentation_links
+                    .entry(link.from.clone())
+                    .or_insert_with(Vec::new)
+                    .push(link);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn parse_calculation_linkbase(&mut self, text: &str) -> Result<()> {
+        // Parse calculation arcs
+        let mut pos = 0;
+        while let Some(arc_start) = text[pos..].find("<link:calculationArc") {
+            let arc_start = pos + arc_start;
+            pos = arc_start + 1;
+
+            if let Some(arc_end) = text[arc_start..].find("/>") {
+                let arc_text = &text[arc_start..arc_start + arc_end];
+                
+                let mut link = CalculationLink {
+                    from: CompactString::new(""),
+                    to: CompactString::new(""),
+                    weight: 1.0,
+                    order: 1.0,
+                };
+
+                // Extract from
+                if let Some(from_start) = arc_text.find("xlink:from=\"") {
+                    let from_start = from_start + 12;
+                    if let Some(from_end) = arc_text[from_start..].find('"') {
+                        link.from = CompactString::from(&arc_text[from_start..from_start + from_end]);
+                    }
+                }
+
+                // Extract to
+                if let Some(to_start) = arc_text.find("xlink:to=\"") {
+                    let to_start = to_start + 10;
+                    if let Some(to_end) = arc_text[to_start..].find('"') {
+                        link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
+                    }
+                }
+
+                // Extract weight
+                if let Some(weight_start) = arc_text.find("weight=\"") {
+                    let weight_start = weight_start + 8;
+                    if let Some(weight_end) = arc_text[weight_start..].find('"') {
+                        if let Ok(weight) = arc_text[weight_start..weight_start + weight_end].parse() {
+                            link.weight = weight;
+                        }
+                    }
+                }
+
+                // Extract order
+                if let Some(order_start) = arc_text.find("order=\"") {
+                    let order_start = order_start + 7;
+                    if let Some(order_end) = arc_text[order_start..].find('"') {
+                        if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
+                            link.order = order;
+                        }
+                    }
+                }
+
+                self.calculation_links
+                    .entry(link.from.clone())
+                    .or_insert_with(Vec::new)
+                    .push(link);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn parse_definition_linkbase(&mut self, text: &str) -> Result<()> {
+        // Parse definition arcs
+        let mut pos = 0;
+        while let Some(arc_start) = text[pos..].find("<link:definitionArc") {
+            let arc_start = pos + arc_start;
+            pos = arc_start + 1;
+
+            if let Some(arc_end) = text[arc_start..].find("/>") {
+                let arc_text = &text[arc_start..arc_start + arc_end];
+                
+                let mut link = DefinitionLink {
+                    from: CompactString::new(""),
+                    to: CompactString::new(""),
+                    arcrole: CompactString::new(""),
+                    order: 1.0,
+                };
+
+                // Extract from
+                if let Some(from_start) = arc_text.find("xlink:from=\"") {
+                    let from_start = from_start + 12;
+                    if let Some(from_end) = arc_text[from_start..].find('"') {
+                        link.from = CompactString::from(&arc_text[from_start..from_start + from_end]);
+                    }
+                }
+
+                // Extract to
+                if let Some(to_start) = arc_text.find("xlink:to=\"") {
+                    let to_start = to_start + 10;
+                    if let Some(to_end) = arc_text[to_start..].find('"') {
+                        link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
+                    }
+                }
+
+                // Extract arcrole
+                if let Some(arcrole_start) = arc_text.find("xlink:arcrole=\"") {
+                    let arcrole_start = arcrole_start + 15;
+                    if let Some(arcrole_end) = arc_text[arcrole_start..].find('"') {
+                        link.arcrole = CompactString::from(&arc_text[arcrole_start..arcrole_start + arcrole_end]);
+                    }
+                }
+
+                // Extract order
+                if let Some(order_start) = arc_text.find("order=\"") {
+                    let order_start = order_start + 7;
+                    if let Some(order_end) = arc_text[order_start..].find('"') {
+                        if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
+                            link.order = order;
+                        }
+                    }
+                }
+
+                self.definition_links
+                    .entry(link.from.clone())
+                    .or_insert_with(Vec::new)
+                    .push(link);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn parse_label_linkbase(&mut self, text: &str) -> Result<()> {
+        // Parse labels
+        let mut pos = 0;
+        while let Some(label_start) = text[pos..].find("<link:label") {
+            let label_start = pos + label_start;
+            pos = label_start + 1;
+
+            if let Some(label_end) = text[label_start..].find("</link:label>") {
+                let label_text = &text[label_start..label_start + label_end];
+                
+                let mut link = LabelLink {
+                    concept: CompactString::new(""),
+                    label: CompactString::new(""),
+                    role: CompactString::new(""),
+                    lang: CompactString::new("en"),
+                };
+
+                // Extract label ID for concept mapping
+                if let Some(id_start) = label_text.find("xlink:label=\"") {
+                    let id_start = id_start + 13;
+                    if let Some(id_end) = label_text[id_start..].find('"') {
+                        link.concept = CompactString::from(&label_text[id_start..id_start + id_end]);
+                    }
+                }
+
+                // Extract role
+                if let Some(role_start) = label_text.find("xlink:role=\"") {
+                    let role_start = role_start + 12;
+                    if let Some(role_end) = label_text[role_start..].find('"') {
+                        link.role = CompactString::from(&label_text[role_start..role_start + role_end]);
+                    }
+                }
+
+                // Extract lang
+                if let Some(lang_start) = label_text.find("xml:lang=\"") {
+                    let lang_start = lang_start + 10;
+                    if let Some(lang_end) = label_text[lang_start..].find('"') {
+                        link.lang = CompactString::from(&label_text[lang_start..lang_start + lang_end]);
+                    }
+                }
+
+                // Extract label text content
+                if let Some(content_start) = label_text.find('>') {
+                    let content = &label_text[content_start + 1..];
+                    link.label = CompactString::from(content.trim());
+                }
+
+                self.label_links
+                    .entry(link.concept.clone())
+                    .or_insert_with(Vec::new)
+                    .push(link);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn parse_reference_linkbase(&mut self, text: &str) -> Result<()> {
+        // Parse references - simplified version
+        let mut pos = 0;
+        while let Some(ref_start) = text[pos..].find("<link:reference") {
+            let ref_start = pos + ref_start;
+            pos = ref_start + 1;
+
+            if let Some(ref_end) = text[ref_start..].find("</link:reference>") {
+                let ref_text = &text[ref_start..ref_start + ref_end];
+                
+                let mut reference = Reference {
+                    role: CompactString::new(""),
+                    parts: HashMap::new(),
+                };
+
+                // Extract role
+                if let Some(role_start) = ref_text.find("xlink:role=\"") {
+                    let role_start = role_start + 12;
+                    if let Some(role_end) = ref_text[role_start..].find('"') {
+                        reference.role = CompactString::from(&ref_text[role_start..role_start + role_end]);
+                    }
+                }
+
+                // Parse reference parts (simplified)
+                let parts = ["Name", "Number", "Section", "Subsection", "Paragraph", "Subparagraph", "Clause"];
+                for part in &parts {
+                    let tag = format!("<link:{}", part);
+                    if let Some(part_start) = ref_text.find(&tag) {
+                        let part_start = part_start + tag.len();
+                        if let Some(content_start) = ref_text[part_start..].find('>') {
+                            let content_start = part_start + content_start + 1;
+                            if let Some(content_end) = ref_text[content_start..].find('<') {
+                                let content = &ref_text[content_start..content_start + content_end];
+                                reference.parts.insert(
+                                    CompactString::from(*part),
+                                    content.trim().to_string()
+                                );
+                            }
+                        }
+                    }
+                }
+
+                // Find concept this reference belongs to
+                if let Some(label_start) = ref_text.find("xlink:label=\"") {
+                    let label_start = label_start + 13;
+                    if let Some(label_end) = ref_text[label_start..].find('"') {
+                        let concept = CompactString::from(&ref_text[label_start..label_start + label_end]);
+                        
+                        let link = ReferenceLink {
+                            concept: concept.clone(),
+                            reference,
+                        };
+                        
+                        self.reference_links
+                            .entry(concept)
+                            .or_insert_with(Vec::new)
+                            .push(link);
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    pub fn get_presentation_tree(&self, root: &str) -> Vec<&PresentationLink> {
+        self.presentation_links
+            .get(root)
+            .map(|links| {
+                let mut sorted = links.iter().collect::<Vec<_>>();
+                sorted.sort_by(|a, b| a.order.partial_cmp(&b.order).unwrap());
+                sorted
+            })
+            .unwrap_or_default()
+    }
+
+    pub fn calculate_total(&self, parent: &str, facts: &HashMap<String, f64>) -> f64 {
+        if let Some(links) = self.calculation_links.get(parent) {
+            links.iter()
+                .map(|link| {
+                    facts.get(link.to.as_str())
+                        .map(|value| value * link.weight)
+                        .unwrap_or(0.0)
+                })
+                .sum()
+        } else {
+            facts.get(parent).copied().unwrap_or(0.0)
+        }
+    }
+
+    pub fn get_label(&self, concept: &str, role: &str, lang: &str) -> Option<&str> {
+        self.label_links
+            .get(concept)
+            .and_then(|labels| {
+                labels.iter()
+                    .find(|l| l.role == role && l.lang == lang)
+                    .or_else(|| labels.iter().find(|l| l.lang == lang))
+                    .or_else(|| labels.first())
+            })
+            .map(|l| l.label.as_str())
+    }
+
+    pub fn validate_calculations(&self, facts: &HashMap<String, f64>) -> Vec<ValidationError> {
+        let mut errors = Vec::new();
+
+        for (parent, links) in &self.calculation_links {
+            let calculated = self.calculate_total(parent, facts);
+            if let Some(&actual) = facts.get(parent.as_str()) {
+                let diff = (calculated - actual).abs();
+                let tolerance = 0.01; // Allow small rounding differences
+                
+                if diff > tolerance {
+                    errors.push(ValidationError::CalculationInconsistency {
+                        concept: parent.to_string(),
+                        expected: calculated,
+                        actual,
+                    });
+                }
+            }
+        }
+
+        errors
+    }
+}
--- a/rust/vendor/crabrl/src/main.rs
+++ b/rust/vendor/crabrl/src/main.rs
@@ -0,0 +1,181 @@
+//! crabrl CLI - High-performance XBRL parser and validator
+
+use anyhow::{Context, Result};
+use clap::{Parser as ClapParser, Subcommand};
+use colored::*;
+use std::path::PathBuf;
+use std::time::Instant;
+
+use crabrl::{Parser, ValidationConfig, Validator};
+
+/// High-performance XBRL parser and validator
+#[derive(ClapParser)]
+#[command(name = "crabrl")]
+#[command(author, version, about, long_about = None)]
+struct Cli {
+    #[command(subcommand)]
+    command: Commands,
+}
+
+#[derive(Subcommand)]
+enum Commands {
+    /// Parse an XBRL file
+    Parse {
+        /// Input file
+        input: PathBuf,
+
+        /// Output as JSON
+        #[arg(short, long)]
+        json: bool,
+
+        /// Show statistics
+        #[arg(short, long)]
+        stats: bool,
+    },
+
+    /// Validate an XBRL file
+    Validate {
+        /// Input file
+        input: PathBuf,
+
+        /// Validation profile (generic, sec-edgar)
+        #[arg(short, long, default_value = "generic")]
+        profile: String,
+
+        /// Treat warnings as errors
+        #[arg(long)]
+        strict: bool,
+    },
+
+    /// Benchmark parsing performance
+    Bench {
+        /// Input file
+        input: PathBuf,
+
+        /// Number of iterations
+        #[arg(short, long, default_value = "100")]
+        iterations: usize,
+    },
+}
+
+fn main() -> Result<()> {
+    let cli = Cli::parse();
+
+    match cli.command {
+        Commands::Parse {
+            input,
+            json: _,
+            stats,
+        } => {
+            let start = Instant::now();
+            let parser = Parser::new();
+            let doc = parser
+                .parse_file(&input)
+                .with_context(|| format!("Failed to parse {}", input.display()))?;
+            let elapsed = start.elapsed();
+
+            println!("{} {}", "✓".green().bold(), input.display());
+            println!("  Facts: {}", doc.facts.len());
+            println!("  Contexts: {}", doc.contexts.len());
+            println!("  Units: {}", doc.units.len());
+
+            if stats {
+                println!("  Time: {:.2}ms", elapsed.as_secs_f64() * 1000.0);
+                println!(
+                    "  Throughput: {:.0} facts/sec",
+                    doc.facts.len() as f64 / elapsed.as_secs_f64()
+                );
+            }
+        }
+
+        Commands::Validate {
+            input,
+            profile,
+            strict,
+        } => {
+            let parser = Parser::new();
+            let doc = parser
+                .parse_file(&input)
+                .with_context(|| format!("Failed to parse {}", input.display()))?;
+
+            let config = match profile.as_str() {
+                "sec-edgar" => ValidationConfig::sec_edgar(),
+                _ => ValidationConfig::default(),
+            };
+
+            let validator = Validator::with_config(config);
+            let result = validator.validate(&doc)?;
+
+            if result.is_valid {
+                println!(
+                    "{} {} - Document is valid",
+                    "✓".green().bold(),
+                    input.display()
+                );
+            } else {
+                println!(
+                    "{} {} - Validation failed",
+                    "✗".red().bold(),
+                    input.display()
+                );
+                println!("  Errors: {}", result.errors.len());
+                println!("  Warnings: {}", result.warnings.len());
+
+                for error in result.errors.iter().take(5) {
+                    println!("  {} {}", "ERROR:".red(), error);
+                }
+
+                if result.errors.len() > 5 {
+                    println!("  ... and {} more errors", result.errors.len() - 5);
+                }
+
+                if strict && !result.warnings.is_empty() {
+                    std::process::exit(1);
+                }
+
+                if !result.is_valid {
+                    std::process::exit(1);
+                }
+            }
+        }
+
+        Commands::Bench { input, iterations } => {
+            let parser = Parser::new();
+
+            // Warmup
+            for _ in 0..3 {
+                let _ = parser.parse_file(&input)?;
+            }
+
+            let mut times = Vec::with_capacity(iterations);
+            let mut doc_facts = 0;
+
+            for _ in 0..iterations {
+                let start = Instant::now();
+                let doc = parser.parse_file(&input)?;
+                times.push(start.elapsed());
+                doc_facts = doc.facts.len();
+            }
+
+            times.sort();
+            let min = times[0];
+            let max = times[times.len() - 1];
+            let median = times[times.len() / 2];
+            let mean = times.iter().sum::<std::time::Duration>() / times.len() as u32;
+
+            println!("Benchmark Results for {}", input.display());
+            println!("  Iterations: {}", iterations);
+            println!("  Facts: {}", doc_facts);
+            println!("  Min:    {:.3}ms", min.as_secs_f64() * 1000.0);
+            println!("  Median: {:.3}ms", median.as_secs_f64() * 1000.0);
+            println!("  Mean:   {:.3}ms", mean.as_secs_f64() * 1000.0);
+            println!("  Max:    {:.3}ms", max.as_secs_f64() * 1000.0);
+            println!(
+                "  Throughput: {:.0} facts/sec",
+                doc_facts as f64 / mean.as_secs_f64()
+            );
+        }
+    }
+
+    Ok(())
+}
--- a/rust/vendor/crabrl/src/model.rs
+++ b/rust/vendor/crabrl/src/model.rs
@@ -0,0 +1,347 @@
+use std::collections::HashMap;
+
+// ============================================================================
+// Core XBRL Data Structures - Full Specification Support
+// ============================================================================
+
+#[repr(C, align(64))]
+#[derive(Clone)]
+pub struct FactStorage {
+    pub concept_ids: Vec<u32>,
+    pub context_ids: Vec<u16>,
+    pub unit_ids: Vec<u16>,
+    pub values: Vec<FactValue>,
+    pub decimals: Vec<Option<i8>>,
+    pub ids: Vec<Option<String>>,
+    pub footnote_refs: Vec<Vec<String>>,
+}
+
+#[derive(Debug, Clone)]
+pub enum FactValue {
+    Text(String),
+    Decimal(f64),
+    Integer(i64),
+    Boolean(bool),
+    Date(String),
+    DateTime(String),
+    Nil,
+}
+
+impl FactStorage {
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            concept_ids: Vec::with_capacity(capacity),
+            context_ids: Vec::with_capacity(capacity),
+            unit_ids: Vec::with_capacity(capacity),
+            values: Vec::with_capacity(capacity),
+            decimals: Vec::with_capacity(capacity),
+            ids: Vec::with_capacity(capacity),
+            footnote_refs: Vec::with_capacity(capacity),
+        }
+    }
+
+    #[inline(always)]
+    pub fn len(&self) -> usize {
+        self.concept_ids.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.concept_ids.is_empty()
+    }
+}
+
+// Full fact representation with all XBRL features
+#[derive(Debug, Clone)]
+pub struct Fact {
+    pub id: Option<String>,
+    pub concept: String,
+    pub context_ref: String,
+    pub unit_ref: Option<String>,
+    pub value: String,
+    pub decimals: Option<i8>,
+    pub precision: Option<u8>,
+    pub nil: bool,
+    pub nil_reason: Option<String>,
+    pub footnote_refs: Vec<String>,
+}
+
+// Context with full dimension support
+#[derive(Debug, Clone)]
+pub struct Context {
+    pub id: String,
+    pub entity: Entity,
+    pub period: Period,
+    pub scenario: Option<Scenario>,
+}
+
+#[derive(Debug, Clone)]
+pub struct Entity {
+    pub identifier: String,
+    pub scheme: String,
+    pub segment: Option<Segment>,
+}
+
+// Dimensional data support
+#[derive(Debug, Clone)]
+pub struct Segment {
+    pub explicit_members: Vec<DimensionMember>,
+    pub typed_members: Vec<TypedMember>,
+}
+
+#[derive(Debug, Clone)]
+pub struct DimensionMember {
+    pub dimension: String,
+    pub member: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct TypedMember {
+    pub dimension: String,
+    pub value: String, // XML content
+}
+
+#[derive(Debug, Clone)]
+pub struct Scenario {
+    pub explicit_members: Vec<DimensionMember>,
+    pub typed_members: Vec<TypedMember>,
+}
+
+// Period with forever support
+#[derive(Debug, Clone)]
+pub enum Period {
+    Instant { date: String },
+    Duration { start: String, end: String },
+    Forever,
+}
+
+// Complex unit support with divide/multiply
+#[derive(Debug, Clone)]
+pub struct Unit {
+    pub id: String,
+    pub unit_type: UnitType,
+}
+
+#[derive(Debug, Clone)]
+pub enum UnitType {
+    Simple(Vec<Measure>),
+    Divide {
+        numerator: Vec<Measure>,
+        denominator: Vec<Measure>,
+    },
+    Multiply(Vec<Measure>),
+}
+
+#[derive(Debug, Clone)]
+pub struct Measure {
+    pub namespace: String,
+    pub name: String,
+}
+
+// Tuple support for structured data
+#[derive(Debug, Clone)]
+pub struct Tuple {
+    pub id: Option<String>,
+    pub name: String,
+    pub facts: Vec<FactOrTuple>,
+}
+
+#[derive(Debug, Clone)]
+pub enum FactOrTuple {
+    Fact(Fact),
+    Tuple(Box<Tuple>),
+}
+
+// Footnote support
+#[derive(Debug, Clone)]
+pub struct Footnote {
+    pub id: String,
+    pub role: Option<String>,
+    pub lang: Option<String>,
+    pub content: String,
+    pub fact_refs: Vec<String>,
+}
+
+// Fraction support
+#[derive(Debug, Clone)]
+pub struct FractionValue {
+    pub numerator: f64,
+    pub denominator: f64,
+}
+
+// Schema and taxonomy support
+#[derive(Debug, Clone)]
+pub struct Schema {
+    pub target_namespace: String,
+    pub elements: HashMap<String, SchemaElement>,
+    pub types: HashMap<String, SchemaType>,
+    pub imports: Vec<SchemaImport>,
+}
+
+#[derive(Debug, Clone)]
+pub struct SchemaElement {
+    pub name: String,
+    pub element_type: String,
+    pub substitution_group: Option<String>,
+    pub period_type: Option<String>,
+    pub balance: Option<String>,
+    pub abstract_element: bool,
+    pub nillable: bool,
+}
+
+#[derive(Debug, Clone)]
+pub struct SchemaType {
+    pub name: String,
+    pub base_type: Option<String>,
+    pub restrictions: Vec<TypeRestriction>,
+}
+
+#[derive(Debug, Clone)]
+pub enum TypeRestriction {
+    MinInclusive(String),
+    MaxInclusive(String),
+    MinExclusive(String),
+    MaxExclusive(String),
+    Pattern(String),
+    Enumeration(Vec<String>),
+    Length(usize),
+    MinLength(usize),
+    MaxLength(usize),
+}
+
+#[derive(Debug, Clone)]
+pub struct SchemaImport {
+    pub namespace: String,
+    pub schema_location: String,
+}
+
+// Linkbase support
+#[derive(Debug, Clone)]
+pub struct Linkbase {
+    pub role: String,
+    pub links: Vec<Link>,
+}
+
+#[derive(Debug, Clone)]
+pub enum Link {
+    Presentation(PresentationLink),
+    Calculation(CalculationLink),
+    Definition(DefinitionLink),
+    Label(LabelLink),
+    Reference(ReferenceLink),
+}
+
+#[derive(Debug, Clone)]
+pub struct PresentationLink {
+    pub from: String,
+    pub to: String,
+    pub order: f32,
+    pub priority: Option<i32>,
+    pub use_attribute: Option<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct CalculationLink {
+    pub from: String,
+    pub to: String,
+    pub weight: f64,
+    pub order: f32,
+}
+
+#[derive(Debug, Clone)]
+pub struct DefinitionLink {
+    pub from: String,
+    pub to: String,
+    pub arcrole: String,
+    pub order: f32,
+}
+
+#[derive(Debug, Clone)]
+pub struct LabelLink {
+    pub concept: String,
+    pub label: String,
+    pub role: String,
+    pub lang: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct ReferenceLink {
+    pub concept: String,
+    pub reference: Reference,
+}
+
+#[derive(Debug, Clone)]
+pub struct Reference {
+    pub role: String,
+    pub parts: HashMap<String, String>,
+}
+
+// Main document structure with full XBRL support
+#[derive(Clone)]
+pub struct Document {
+    pub facts: FactStorage,
+    pub contexts: Vec<Context>,
+    pub units: Vec<Unit>,
+    pub tuples: Vec<Tuple>,
+    pub footnotes: Vec<Footnote>,
+    pub presentation_links: Vec<PresentationLink>,
+    pub calculation_links: Vec<CalculationLink>,
+    pub definition_links: Vec<DefinitionLink>,
+    pub label_links: Vec<LabelLink>,
+    pub reference_links: Vec<ReferenceLink>,
+    pub custom_links: Vec<Link>,
+    pub role_types: Vec<String>,
+    pub arcrole_types: Vec<String>,
+    pub schemas: Vec<Schema>,
+    pub dimensions: Vec<DimensionMember>,
+    pub concept_names: Vec<String>,
+}
+
+impl Default for Document {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Document {
+    pub fn new() -> Self {
+        Self {
+            facts: FactStorage::with_capacity(10000),
+            contexts: Vec::with_capacity(100),
+            units: Vec::with_capacity(50),
+            tuples: Vec::new(),
+            footnotes: Vec::new(),
+            presentation_links: Vec::new(),
+            calculation_links: Vec::new(),
+            definition_links: Vec::new(),
+            label_links: Vec::new(),
+            reference_links: Vec::new(),
+            custom_links: Vec::new(),
+            role_types: Vec::new(),
+            arcrole_types: Vec::new(),
+            schemas: Vec::new(),
+            dimensions: Vec::new(),
+            concept_names: Vec::new(),
+        }
+    }
+
+    pub fn with_capacity(facts: usize, contexts: usize, units: usize) -> Self {
+        Self {
+            facts: FactStorage::with_capacity(facts),
+            contexts: Vec::with_capacity(contexts),
+            units: Vec::with_capacity(units),
+            tuples: Vec::new(),
+            footnotes: Vec::new(),
+            presentation_links: Vec::new(),
+            calculation_links: Vec::new(),
+            definition_links: Vec::new(),
+            label_links: Vec::new(),
+            reference_links: Vec::new(),
+            custom_links: Vec::new(),
+            role_types: Vec::new(),
+            arcrole_types: Vec::new(),
+            schemas: Vec::new(),
+            dimensions: Vec::new(),
+            concept_names: Vec::new(),
+        }
+    }
+}
--- a/rust/vendor/crabrl/src/parser.rs
+++ b/rust/vendor/crabrl/src/parser.rs
--- a/rust/vendor/crabrl/src/parser_base.rs
+++ b/rust/vendor/crabrl/src/parser_base.rs
@@ -0,0 +1,238 @@
+// Base parsing methods for FullXbrlParser
+
+impl<'a> FullXbrlParser<'a> {
+    #[inline(always)]
+    fn read_tag_name(&mut self) -> Result<&'a str> {
+        let start = self.scanner.pos;
+        while let Some(ch) = self.scanner.peek() {
+            if ch == b' ' || ch == b'>' || ch == b'/' || ch == b'\t' || ch == b'\n' || ch == b'\r' {
+                break;
+            }
+            self.scanner.advance(1);
+        }
+        let end = self.scanner.pos;
+        
+        if start == end {
+            return Err(Error::Parse("Empty tag name".to_string()));
+        }
+        
+        std::str::from_utf8(&self.scanner.data[start..end])
+            .map_err(|_| Error::Parse("Invalid UTF-8 in tag name".to_string()))
+    }
+
+    #[inline(always)]
+    fn parse_attributes(&mut self) -> Result<Vec<(&'a str, &'a str)>> {
+        let mut attrs = Vec::new();
+        
+        loop {
+            self.scanner.skip_whitespace();
+            
+            match self.scanner.peek() {
+                Some(b'>') => {
+                    // End of tag
+                    break;
+                }
+                Some(b'/') => {
+                    // Self-closing tag
+                    self.scanner.advance(1);
+                    if self.scanner.peek() == Some(b'>') {
+                        break;
+                    }
+                }
+                None => return Err(Error::Parse("Unexpected EOF in attributes".to_string())),
+                _ => {}
+            }
+            
+            let name_start = self.scanner.pos;
+            while let Some(ch) = self.scanner.peek() {
+                if ch == b'=' || ch == b' ' || ch == b'>' || ch == b'/' {
+                    break;
+                }
+                self.scanner.advance(1);
+            }
+            
+            if self.scanner.pos == name_start {
+                break; // No more attributes
+            }
+            
+            let name = std::str::from_utf8(&self.scanner.data[name_start..self.scanner.pos])
+                .map_err(|_| Error::Parse("Invalid UTF-8 in attribute name".to_string()))?;
+            
+            self.scanner.skip_whitespace();
+            
+            if self.scanner.peek() != Some(b'=') {
+                continue;
+            }
+            self.scanner.advance(1);
+            
+            self.scanner.skip_whitespace();
+            
+            let quote = self.scanner.peek()
+                .ok_or_else(|| Error::Parse("Expected quote".to_string()))?;
+            
+            if quote != b'"' && quote != b'\'' {
+                return Err(Error::Parse("Expected quote in attribute".to_string()));
+            }
+            
+            self.scanner.advance(1);
+            let value_start = self.scanner.pos;
+            
+            while let Some(ch) = self.scanner.peek() {
+                if ch == quote {
+                    break;
+                }
+                self.scanner.advance(1);
+            }
+            
+            let value = std::str::from_utf8(&self.scanner.data[value_start..self.scanner.pos])
+                .map_err(|_| Error::Parse("Invalid UTF-8 in attribute value".to_string()))?;
+            
+            self.scanner.advance(1); // Skip closing quote
+            
+            attrs.push((name, value));
+        }
+        
+        Ok(attrs)
+    }
+
+    #[inline(always)]
+    fn skip_to_tag_end(&mut self) -> Result<()> {
+        while let Some(ch) = self.scanner.peek() {
+            if ch == b'>' {
+                self.scanner.advance(1);
+                return Ok(());
+            }
+            self.scanner.advance(1);
+        }
+        Err(Error::Parse("Expected '>'".to_string()))
+    }
+
+    #[inline(always)]
+    fn read_text_content(&mut self) -> Result<&'a str> {
+        let start = self.scanner.pos;
+        while let Some(ch) = self.scanner.peek() {
+            if ch == b'<' {
+                break;
+            }
+            self.scanner.advance(1);
+        }
+        
+        let text = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos])
+            .map_err(|_| Error::Parse("Invalid UTF-8 in text content".to_string()))?;
+        
+        Ok(text.trim())
+    }
+
+    #[inline(always)]
+    fn skip_element_from_tag(&mut self) -> Result<()> {
+        // We've already read the tag name, now skip to end of opening tag
+        self.skip_to_tag_end()?;
+        
+        // Check if it was self-closing
+        if self.scanner.pos >= 2 && self.scanner.data[self.scanner.pos - 2] == b'/' {
+            return Ok(()); // Self-closing tag, we're done
+        }
+        
+        // Skip element content and find matching closing tag
+        let mut depth = 1;
+        
+        while depth > 0 && !self.scanner.is_eof() {
+            // Find next tag
+            while let Some(ch) = self.scanner.peek() {
+                if ch == b'<' {
+                    break;
+                }
+                self.scanner.advance(1);
+            }
+            
+            if self.scanner.is_eof() {
+                break;
+            }
+            
+            self.scanner.advance(1); // consume '<'
+            
+            if self.scanner.peek() == Some(b'/') {
+                depth -= 1;
+            } else if self.scanner.peek() != Some(b'!') && self.scanner.peek() != Some(b'?') {
+                // Check if it's a self-closing tag
+                let mut is_self_closing = false;
+                let _saved_pos = self.scanner.pos;
+                
+                // Skip to end of tag to check
+                while let Some(ch) = self.scanner.peek() {
+                    if ch == b'/' {
+                        if self.scanner.pos + 1 < self.scanner.data.len() 
+                            && self.scanner.data[self.scanner.pos + 1] == b'>' {
+                            is_self_closing = true;
+                        }
+                    }
+                    if ch == b'>' {
+                        self.scanner.advance(1);
+                        break;
+                    }
+                    self.scanner.advance(1);
+                }
+                
+                if !is_self_closing {
+                    depth += 1;
+                }
+                
+                continue;
+            }
+            
+            // Skip to end of this tag
+            while let Some(ch) = self.scanner.peek() {
+                if ch == b'>' {
+                    self.scanner.advance(1);
+                    break;
+                }
+                self.scanner.advance(1);
+            }
+        }
+        
+        Ok(())
+    }
+
+    #[inline(always)]
+    fn skip_processing_instruction(&mut self) -> Result<()> {
+        // Skip until ?>
+        while !self.scanner.is_eof() {
+            if self.scanner.peek() == Some(b'?') {
+                self.scanner.advance(1);
+                if self.scanner.peek() == Some(b'>') {
+                    self.scanner.advance(1);
+                    return Ok(());
+                }
+            } else {
+                self.scanner.advance(1);
+            }
+        }
+        Err(Error::Parse("Unclosed processing instruction".to_string()))
+    }
+
+    #[inline(always)]
+    fn skip_comment(&mut self) -> Result<()> {
+        // Skip until -->
+        while !self.scanner.is_eof() {
+            if self.scanner.peek() == Some(b'-') {
+                self.scanner.advance(1);
+                if self.scanner.peek() == Some(b'-') {
+                    self.scanner.advance(1);
+                    if self.scanner.peek() == Some(b'>') {
+                        self.scanner.advance(1);
+                        return Ok(());
+                    }
+                }
+            } else {
+                self.scanner.advance(1);
+            }
+        }
+        Err(Error::Parse("Unclosed comment".to_string()))
+    }
+}
+
+impl Default for Parser {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/rust/vendor/crabrl/src/schema.rs
+++ b/rust/vendor/crabrl/src/schema.rs
@@ -0,0 +1,275 @@
+// Schema loading and validation for XBRL
+use crate::{Error, Result, model::*};
+use compact_str::CompactString;
+use std::collections::HashMap;
+use std::path::Path;
+
+pub struct SchemaLoader {
+    cache: HashMap<CompactString, Schema>,
+}
+
+impl SchemaLoader {
+    pub fn new() -> Self {
+        Self {
+            cache: HashMap::new(),
+        }
+    }
+
+    pub fn load_schema<P: AsRef<Path>>(&mut self, path: P) -> Result<&Schema> {
+        let path_str = path.as_ref().to_string_lossy();
+        let key = CompactString::from(path_str.as_ref());
+        
+        if self.cache.contains_key(&key) {
+            return Ok(self.cache.get(&key).unwrap());
+        }
+
+        let schema = self.parse_schema_file(path)?;
+        self.cache.insert(key.clone(), schema);
+        Ok(self.cache.get(&key).unwrap())
+    }
+
+    fn parse_schema_file<P: AsRef<Path>>(&self, path: P) -> Result<Schema> {
+        let content = std::fs::read(path)?;
+        self.parse_schema_bytes(&content)
+    }
+
+    fn parse_schema_bytes(&self, data: &[u8]) -> Result<Schema> {
+        // Simple XML parsing for schema
+        let mut schema = Schema {
+            target_namespace: CompactString::new(""),
+            elements: HashMap::new(),
+            types: HashMap::new(),
+            imports: Vec::new(),
+        };
+
+        // Skip BOM if present
+        let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
+            &data[3..]
+        } else {
+            data
+        };
+
+        let text = std::str::from_utf8(data)
+            .map_err(|_| Error::Parse("Invalid UTF-8 in schema".to_string()))?;
+
+        // Extract target namespace
+        if let Some(ns_start) = text.find("targetNamespace=\"") {
+            let ns_start = ns_start + 17;
+            if let Some(ns_end) = text[ns_start..].find('"') {
+                schema.target_namespace = CompactString::from(&text[ns_start..ns_start + ns_end]);
+            }
+        }
+
+        // Parse elements
+        let mut pos = 0;
+        while let Some(elem_start) = text[pos..].find("<xs:element") {
+            let elem_start = pos + elem_start;
+            pos = elem_start + 1;
+
+            // Find element end
+            let elem_end = if let Some(end) = text[elem_start..].find("/>") {
+                elem_start + end + 2
+            } else if let Some(end) = text[elem_start..].find("</xs:element>") {
+                elem_start + end + 13
+            } else {
+                continue;
+            };
+
+            let elem_text = &text[elem_start..elem_end];
+            
+            // Extract element attributes
+            let mut element = SchemaElement {
+                name: CompactString::new(""),
+                element_type: CompactString::new(""),
+                substitution_group: None,
+                period_type: None,
+                balance: None,
+                abstract_element: elem_text.contains("abstract=\"true\""),
+                nillable: elem_text.contains("nillable=\"true\""),
+            };
+
+            // Extract name
+            if let Some(name_start) = elem_text.find("name=\"") {
+                let name_start = name_start + 6;
+                if let Some(name_end) = elem_text[name_start..].find('"') {
+                    element.name = CompactString::from(&elem_text[name_start..name_start + name_end]);
+                }
+            }
+
+            // Extract type
+            if let Some(type_start) = elem_text.find("type=\"") {
+                let type_start = type_start + 6;
+                if let Some(type_end) = elem_text[type_start..].find('"') {
+                    element.element_type = CompactString::from(&elem_text[type_start..type_start + type_end]);
+                }
+            }
+
+            // Extract substitutionGroup
+            if let Some(sg_start) = elem_text.find("substitutionGroup=\"") {
+                let sg_start = sg_start + 19;
+                if let Some(sg_end) = elem_text[sg_start..].find('"') {
+                    element.substitution_group = Some(CompactString::from(&elem_text[sg_start..sg_start + sg_end]));
+                }
+            }
+
+            // Extract XBRL-specific attributes
+            if let Some(pt_start) = elem_text.find("xbrli:periodType=\"") {
+                let pt_start = pt_start + 18;
+                if let Some(pt_end) = elem_text[pt_start..].find('"') {
+                    element.period_type = Some(CompactString::from(&elem_text[pt_start..pt_start + pt_end]));
+                }
+            }
+
+            if let Some(bal_start) = elem_text.find("xbrli:balance=\"") {
+                let bal_start = bal_start + 15;
+                if let Some(bal_end) = elem_text[bal_start..].find('"') {
+                    element.balance = Some(CompactString::from(&elem_text[bal_start..bal_start + bal_end]));
+                }
+            }
+
+            if !element.name.is_empty() {
+                schema.elements.insert(element.name.clone(), element);
+            }
+        }
+
+        // Parse imports
+        pos = 0;
+        while let Some(import_start) = text[pos..].find("<xs:import") {
+            let import_start = pos + import_start;
+            pos = import_start + 1;
+
+            if let Some(import_end) = text[import_start..].find("/>") {
+                let import_text = &text[import_start..import_start + import_end];
+                
+                let mut import = SchemaImport {
+                    namespace: CompactString::new(""),
+                    schema_location: CompactString::new(""),
+                };
+
+                if let Some(ns_start) = import_text.find("namespace=\"") {
+                    let ns_start = ns_start + 11;
+                    if let Some(ns_end) = import_text[ns_start..].find('"') {
+                        import.namespace = CompactString::from(&import_text[ns_start..ns_start + ns_end]);
+                    }
+                }
+
+                if let Some(loc_start) = import_text.find("schemaLocation=\"") {
+                    let loc_start = loc_start + 16;
+                    if let Some(loc_end) = import_text[loc_start..].find('"') {
+                        import.schema_location = CompactString::from(&import_text[loc_start..loc_start + loc_end]);
+                    }
+                }
+
+                schema.imports.push(import);
+            }
+        }
+
+        Ok(schema)
+    }
+
+    pub fn validate_element(&self, name: &str, value: &str, schema: &Schema) -> Result<()> {
+        if let Some(element) = schema.elements.get(name) {
+            // Check if element is abstract
+            if element.abstract_element {
+                return Err(Error::Validation(format!("Element {} is abstract", name)));
+            }
+
+            // Validate type
+            if let Some(type_def) = schema.types.get(&element.element_type) {
+                self.validate_type(value, type_def)?;
+            }
+
+            Ok(())
+        } else {
+            // Element not found in schema - might be from imported schema
+            Ok(())
+        }
+    }
+
+    fn validate_type(&self, value: &str, type_def: &SchemaType) -> Result<()> {
+        for restriction in &type_def.restrictions {
+            match restriction {
+                TypeRestriction::MinInclusive(min) => {
+                    if let (Ok(val), Ok(min_val)) = (value.parse::<f64>(), min.parse::<f64>()) {
+                        if val < min_val {
+                            return Err(Error::Validation(format!("Value {} is less than minimum {}", val, min_val)));
+                        }
+                    }
+                }
+                TypeRestriction::MaxInclusive(max) => {
+                    if let (Ok(val), Ok(max_val)) = (value.parse::<f64>(), max.parse::<f64>()) {
+                        if val > max_val {
+                            return Err(Error::Validation(format!("Value {} is greater than maximum {}", val, max_val)));
+                        }
+                    }
+                }
+                TypeRestriction::Pattern(pattern) => {
+                    // Simple pattern matching - could use regex for complex patterns
+                    if !value.contains(pattern) {
+                        return Err(Error::Validation(format!("Value {} doesn't match pattern {}", value, pattern)));
+                    }
+                }
+                TypeRestriction::MinLength(min) => {
+                    if value.len() < *min {
+                        return Err(Error::Validation(format!("Value length {} is less than minimum {}", value.len(), min)));
+                    }
+                }
+                TypeRestriction::MaxLength(max) => {
+                    if value.len() > *max {
+                        return Err(Error::Validation(format!("Value length {} is greater than maximum {}", value.len(), max)));
+                    }
+                }
+                _ => {}
+            }
+        }
+        Ok(())
+    }
+}
+
+// Schema validator for documents
+pub struct SchemaValidator {
+    schemas: Vec<Schema>,
+}
+
+impl SchemaValidator {
+    pub fn new() -> Self {
+        Self {
+            schemas: Vec::new(),
+        }
+    }
+
+    pub fn add_schema(&mut self, schema: Schema) {
+        self.schemas.push(schema);
+    }
+
+    pub fn validate_document(&self, doc: &Document) -> Vec<ValidationError> {
+        let mut errors = Vec::new();
+
+        // Validate facts against schemas
+        for i in 0..doc.facts.len() {
+            if let Some(_fact) = doc.facts.get(i) {
+                // Would need to map fact concept_id back to concept name
+                // and validate against schema
+                // This is simplified for now
+            }
+        }
+
+        // Check for required elements
+        for schema in &self.schemas {
+            for (name, element) in &schema.elements {
+                if !element.nillable && !element.abstract_element {
+                    // Check if this required element exists in document
+                    // This would require reverse mapping from concept names to facts
+                    let _found = false;
+                    // if !found {
+                    //     errors.push(ValidationError::MissingRequiredElement {
+                    //         element: name.to_string(),
+                    //     });
+                    // }
+                }
+            }
+        }
+
+        errors
+    }
+}
--- a/rust/vendor/crabrl/src/sec.rs
+++ b/rust/vendor/crabrl/src/sec.rs
@@ -0,0 +1,51 @@
+// SEC EDGAR XBRL filing support (local files only)
+use crate::{Parser, Document, Result};
+use std::path::Path;
+
+pub struct SecFilingParser {
+    parser: Parser,
+}
+
+impl SecFilingParser {
+    pub fn new() -> Self {
+        Self {
+            parser: Parser::new().with_validation(true),
+        }
+    }
+
+    pub fn parse_filing<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
+        self.parser.parse_file(path)
+    }
+    
+    pub fn with_validation(mut self, validate: bool) -> Self {
+        self.parser = self.parser.with_validation(validate);
+        self
+    }
+}
+
+// Test utilities for SEC filings
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_local_sec_filing() {
+        let parser = SecFilingParser::new();
+        
+        // Test with local test files
+        if std::path::Path::new("test_data/test_tiny.xbrl").exists() {
+            match parser.parse_filing("test_data/test_tiny.xbrl") {
+                Ok(doc) => {
+                    println!("Successfully parsed filing:");
+                    println!("  Facts: {}", doc.facts.len());
+                    println!("  Contexts: {}", doc.contexts.len());
+                    println!("  Units: {}", doc.units.len());
+                    assert!(doc.contexts.len() > 0, "Should have contexts");
+                }
+                Err(e) => {
+                    eprintln!("Failed to parse filing: {}", e);
+                }
+            }
+        }
+    }
+}
--- a/rust/vendor/crabrl/src/simd.rs
+++ b/rust/vendor/crabrl/src/simd.rs
@@ -0,0 +1,208 @@
+use memchr::{memchr, memchr2, memchr3};
+use std::arch::x86_64::*;
+
+const XML_TAG_START: u8 = b'<';
+const XML_TAG_END: u8 = b'>';
+const XML_SLASH: u8 = b'/';
+const XML_QUOTE: u8 = b'"';
+const XML_EQUALS: u8 = b'=';
+const XML_SPACE: u8 = b' ';
+
+#[inline(always)]
+pub fn find_tag_start(haystack: &[u8]) -> Option<usize> {
+    memchr(XML_TAG_START, haystack)
+}
+
+#[inline(always)]
+pub fn find_tag_end(haystack: &[u8]) -> Option<usize> {
+    memchr(XML_TAG_END, haystack)
+}
+
+#[inline(always)]
+pub fn find_quote(haystack: &[u8]) -> Option<usize> {
+    memchr(XML_QUOTE, haystack)
+}
+
+#[inline(always)]
+pub fn find_any_delimiter(haystack: &[u8]) -> Option<usize> {
+    memchr3(XML_TAG_START, XML_TAG_END, XML_QUOTE, haystack)
+}
+
+#[target_feature(enable = "avx2")]
+#[inline]
+pub unsafe fn find_pattern_avx2(haystack: &[u8], pattern: &[u8]) -> Option<usize> {
+    if pattern.is_empty() || haystack.len() < pattern.len() {
+        return None;
+    }
+
+    let first_byte = _mm256_set1_epi8(pattern[0] as i8);
+    let mut i = 0;
+
+    while i + 32 <= haystack.len() {
+        let chunk = _mm256_loadu_si256(haystack.as_ptr().add(i) as *const _);
+        let cmp = _mm256_cmpeq_epi8(chunk, first_byte);
+        let mask = _mm256_movemask_epi8(cmp);
+
+        if mask != 0 {
+            for bit_pos in 0..32 {
+                if (mask & (1 << bit_pos)) != 0 {
+                    let pos = i + bit_pos;
+                    if pos + pattern.len() <= haystack.len() 
+                        && &haystack[pos..pos + pattern.len()] == pattern {
+                        return Some(pos);
+                    }
+                }
+            }
+        }
+        i += 32;
+    }
+
+    while i < haystack.len() - pattern.len() + 1 {
+        if &haystack[i..i + pattern.len()] == pattern {
+            return Some(i);
+        }
+        i += 1;
+    }
+
+    None
+}
+
+#[target_feature(enable = "avx2")]
+#[inline]
+pub unsafe fn skip_whitespace_avx2(data: &[u8], mut pos: usize) -> usize {
+    let space = _mm256_set1_epi8(0x20);
+    let tab = _mm256_set1_epi8(0x09);
+    let newline = _mm256_set1_epi8(0x0A);
+    let carriage = _mm256_set1_epi8(0x0D);
+
+    while pos + 32 <= data.len() {
+        let chunk = _mm256_loadu_si256(data.as_ptr().add(pos) as *const _);
+        
+        let is_space = _mm256_cmpeq_epi8(chunk, space);
+        let is_tab = _mm256_cmpeq_epi8(chunk, tab);
+        let is_newline = _mm256_cmpeq_epi8(chunk, newline);
+        let is_carriage = _mm256_cmpeq_epi8(chunk, carriage);
+        
+        let is_whitespace = _mm256_or_si256(
+            _mm256_or_si256(is_space, is_tab),
+            _mm256_or_si256(is_newline, is_carriage)
+        );
+        
+        let mask = _mm256_movemask_epi8(is_whitespace);
+        
+        if mask != -1 {
+            for i in 0..32 {
+                if (mask & (1 << i)) == 0 {
+                    return pos + i;
+                }
+            }
+        }
+        
+        pos += 32;
+    }
+
+    while pos < data.len() {
+        match data[pos] {
+            b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
+            _ => break,
+        }
+    }
+
+    pos
+}
+
+#[inline(always)]
+pub fn skip_whitespace(data: &[u8], mut pos: usize) -> usize {
+    #[cfg(target_arch = "x86_64")]
+    {
+        if is_x86_feature_detected!("avx2") && data.len() - pos >= 32 {
+            return unsafe { skip_whitespace_avx2(data, pos) };
+        }
+    }
+
+    while pos < data.len() {
+        match data[pos] {
+            b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
+            _ => break,
+        }
+    }
+    pos
+}
+
+#[inline(always)]
+pub fn find_pattern(haystack: &[u8], pattern: &[u8]) -> Option<usize> {
+    #[cfg(target_arch = "x86_64")]
+    {
+        if is_x86_feature_detected!("avx2") && haystack.len() >= 32 {
+            return unsafe { find_pattern_avx2(haystack, pattern) };
+        }
+    }
+
+    haystack.windows(pattern.len())
+        .position(|window| window == pattern)
+}
+
+pub struct SimdScanner<'a> {
+    pub data: &'a [u8],
+    pub pos: usize,
+}
+
+impl<'a> SimdScanner<'a> {
+    #[inline(always)]
+    pub fn new(data: &'a [u8]) -> Self {
+        Self { data, pos: 0 }
+    }
+
+    #[inline(always)]
+    pub fn skip_whitespace(&mut self) {
+        self.pos = skip_whitespace(self.data, self.pos);
+    }
+
+    #[inline(always)]
+    pub fn find_next(&self, byte: u8) -> Option<usize> {
+        memchr(byte, &self.data[self.pos..]).map(|i| self.pos + i)
+    }
+
+    #[inline(always)]
+    pub fn find_pattern(&self, pattern: &[u8]) -> Option<usize> {
+        find_pattern(&self.data[self.pos..], pattern).map(|i| self.pos + i)
+    }
+
+    #[inline(always)]
+    pub fn advance(&mut self, n: usize) {
+        self.pos = (self.pos + n).min(self.data.len());
+    }
+
+    #[inline(always)]
+    pub fn peek(&self) -> Option<u8> {
+        self.data.get(self.pos).copied()
+    }
+
+    #[inline(always)]
+    pub fn remaining(&self) -> &'a [u8] {
+        &self.data[self.pos..]
+    }
+
+    #[inline(always)]
+    pub fn is_eof(&self) -> bool {
+        self.pos >= self.data.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_find_pattern() {
+        let haystack = b"<xbrl:context id=\"c1\">";
+        let pattern = b"context";
+        assert_eq!(find_pattern(haystack, pattern), Some(6));
+    }
+
+    #[test]
+    fn test_skip_whitespace() {
+        let data = b"   \t\n\r<tag>";
+        assert_eq!(skip_whitespace(data, 0), 6);
+    }
+}
--- a/rust/vendor/crabrl/src/simple_parser.rs
+++ b/rust/vendor/crabrl/src/simple_parser.rs
@@ -0,0 +1,99 @@
+//! Simple working XBRL parser
+
+use crate::{model::*, Result};
+use std::path::Path;
+
+#[derive(Default)]
+pub struct Parser {
+    #[allow(dead_code)]
+    load_linkbases: bool,
+}
+
+impl Parser {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn parse_str(&self, content: &str) -> Result<Document> {
+        self.parse_bytes(content.as_bytes())
+    }
+
+    pub fn parse_file<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
+        let content = std::fs::read(path)?;
+        self.parse_bytes(&content)
+    }
+
+    pub fn parse_bytes(&self, data: &[u8]) -> Result<Document> {
+        // Simple XML parsing - just count elements for now
+        let text = String::from_utf8_lossy(data);
+
+        // Count facts (very simplified)
+        let fact_count = text.matches("<us-gaap:").count()
+            + text.matches("<dei:").count()
+            + text.matches("<ifrs:").count();
+
+        // Count contexts
+        let context_count =
+            text.matches("<context ").count() + text.matches("<xbrli:context").count();
+
+        // Count units
+        let unit_count = text.matches("<unit ").count() + text.matches("<xbrli:unit").count();
+
+        // Create dummy document with approximate counts
+        let mut doc = Document {
+            facts: FactStorage {
+                concept_ids: vec![0; fact_count],
+                context_ids: vec![0; fact_count],
+                unit_ids: vec![0; fact_count],
+                values: vec![FactValue::Text(String::from("")); fact_count],
+                decimals: vec![None; fact_count],
+                ids: vec![None; fact_count],
+                footnote_refs: vec![],
+            },
+            contexts: Vec::with_capacity(context_count),
+            units: Vec::with_capacity(unit_count),
+            tuples: Vec::new(),
+            footnotes: Vec::new(),
+            presentation_links: Vec::new(),
+            calculation_links: Vec::new(),
+            definition_links: Vec::new(),
+            label_links: Vec::new(),
+            reference_links: Vec::new(),
+            custom_links: Vec::new(),
+            role_types: Vec::new(),
+            arcrole_types: Vec::new(),
+            schemas: Vec::new(),
+            dimensions: Vec::new(),
+            concept_names: Vec::new(),
+        };
+
+        // Add dummy contexts
+        for i in 0..context_count {
+            doc.contexts.push(Context {
+                id: String::from(&format!("ctx{}", i)),
+                entity: Entity {
+                    identifier: String::from("0000000000"),
+                    scheme: String::from("http://www.sec.gov/CIK"),
+                    segment: None,
+                },
+                period: Period::Instant {
+                    date: String::from("2023-12-31"),
+                },
+                scenario: None,
+            });
+        }
+
+        // Add dummy units
+        for i in 0..unit_count {
+            doc.units.push(Unit {
+                id: String::from(&format!("unit{}", i)),
+                unit_type: UnitType::Simple(vec![Measure {
+                    namespace: String::from("iso4217"),
+                    name: String::from("USD"),
+                }]),
+            });
+        }
+
+        Ok(doc)
+    }
+}
--- a/rust/vendor/crabrl/src/taxonomy.rs
+++ b/rust/vendor/crabrl/src/taxonomy.rs
@@ -0,0 +1,49 @@
+use crate::Result;
+use compact_str::CompactString;
+use std::collections::HashMap;
+
+pub struct Taxonomy {
+    pub schemas: Vec<Schema>,
+    pub linkbases: Vec<Linkbase>,
+}
+
+pub struct Schema {
+    pub target_namespace: CompactString,
+    pub elements: HashMap<CompactString, Element>,
+}
+
+pub struct Element {
+    pub name: CompactString,
+    pub element_type: CompactString,
+    pub substitution_group: Option<CompactString>,
+    pub period_type: Option<CompactString>,
+}
+
+pub struct Linkbase {
+    pub role: CompactString,
+    pub arcs: Vec<Arc>,
+}
+
+pub struct Arc {
+    pub from: CompactString,
+    pub to: CompactString,
+    pub order: f32,
+    pub weight: f32,
+}
+
+impl Taxonomy {
+    pub fn new() -> Self {
+        Self {
+            schemas: Vec::new(),
+            linkbases: Vec::new(),
+        }
+    }
+
+    pub fn load_schema(&mut self, _path: &str) -> Result<()> {
+        Ok(())
+    }
+
+    pub fn load_linkbase(&mut self, _path: &str) -> Result<()> {
+        Ok(())
+    }
+}
--- a/rust/vendor/crabrl/src/validator.rs
+++ b/rust/vendor/crabrl/src/validator.rs
@@ -0,0 +1,601 @@
+// Comprehensive XBRL validation
+use crate::{model::*, Error, Result};
+use std::collections::HashSet;
+
+#[derive(Debug, Clone)]
+pub enum ValidationError {
+    InvalidContextRef {
+        fact_index: usize,
+        context_id: u16,
+    },
+    InvalidUnitRef {
+        fact_index: usize,
+        unit_id: u16,
+    },
+    CalculationInconsistency {
+        concept: String,
+        expected: f64,
+        actual: f64,
+    },
+    InvalidDataType {
+        concept: String,
+        expected_type: String,
+        actual_value: String,
+    },
+    MissingRequiredElement {
+        element: String,
+    },
+    DuplicateId {
+        id: String,
+    },
+}
+
+pub struct XbrlValidator {
+    strict_mode: bool,
+    #[allow(dead_code)]
+    check_calculations: bool,
+    check_duplicates: bool,
+    check_contexts: bool,
+    check_units: bool,
+    #[allow(dead_code)]
+    check_datatypes: bool,
+    decimal_tolerance: f64,
+}
+
+impl Default for XbrlValidator {
+    fn default() -> Self {
+        Self {
+            strict_mode: false,
+            check_calculations: true,
+            check_duplicates: true,
+            check_contexts: true,
+            check_units: true,
+            check_datatypes: true,
+            decimal_tolerance: 0.01,
+        }
+    }
+}
+
+impl XbrlValidator {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn strict(mut self) -> Self {
+        self.strict_mode = true;
+        self
+    }
+
+    pub fn with_tolerance(mut self, tolerance: f64) -> Self {
+        self.decimal_tolerance = tolerance;
+        self
+    }
+
+    pub fn validate(&self, doc: &mut Document) -> Result<()> {
+        let mut validation_errors = Vec::new();
+
+        // Context validation
+        if self.check_contexts {
+            validation_errors.extend(self.validate_contexts(doc));
+        }
+
+        // Unit validation
+        if self.check_units {
+            validation_errors.extend(self.validate_units(doc));
+        }
+
+        // Fact validation
+        validation_errors.extend(self.validate_facts(doc));
+
+        // Duplicate detection
+        if self.check_duplicates {
+            validation_errors.extend(self.check_duplicate_facts(doc));
+        }
+
+        // Return error in strict mode if any validation errors
+        if self.strict_mode && !validation_errors.is_empty() {
+            return Err(Error::Validation(format!(
+                "Validation failed with {} errors",
+                validation_errors.len()
+            )));
+        }
+
+        Ok(())
+    }
+
+    fn validate_contexts(&self, doc: &Document) -> Vec<ValidationError> {
+        let mut errors = Vec::new();
+        let mut context_ids = HashSet::new();
+
+        for ctx in &doc.contexts {
+            // Check for duplicate context IDs
+            if !context_ids.insert(ctx.id.clone()) {
+                errors.push(ValidationError::DuplicateId {
+                    id: ctx.id.to_string(),
+                });
+            }
+
+            // Validate entity identifier
+            if ctx.entity.identifier.is_empty() {
+                errors.push(ValidationError::MissingRequiredElement {
+                    element: format!("Entity identifier for context {}", ctx.id),
+                });
+            }
+
+            // Validate period
+            if let Period::Duration { start, end } = &ctx.period {
+                if start > end {
+                    errors.push(ValidationError::InvalidDataType {
+                        concept: format!("context_{}", ctx.id),
+                        expected_type: "valid period".to_string(),
+                        actual_value: format!("start {} > end {}", start, end),
+                    });
+                }
+            }
+        }
+
+        errors
+    }
+
+    fn validate_units(&self, doc: &Document) -> Vec<ValidationError> {
+        let mut errors = Vec::new();
+        let mut unit_ids = HashSet::new();
+
+        for unit in &doc.units {
+            // Check for duplicate unit IDs
+            if !unit_ids.insert(unit.id.clone()) {
+                errors.push(ValidationError::DuplicateId {
+                    id: unit.id.to_string(),
+                });
+            }
+
+            // Validate measures
+            match &unit.unit_type {
+                UnitType::Simple(measures) => {
+                    if measures.is_empty() {
+                        errors.push(ValidationError::MissingRequiredElement {
+                            element: format!("Measures for unit {}", unit.id),
+                        });
+                    }
+                }
+                UnitType::Divide {
+                    numerator,
+                    denominator,
+                } => {
+                    if numerator.is_empty() || denominator.is_empty() {
+                        errors.push(ValidationError::MissingRequiredElement {
+                            element: format!("Numerator/denominator for unit {}", unit.id),
+                        });
+                    }
+                }
+                UnitType::Multiply(measures) => {
+                    if measures.is_empty() {
+                        errors.push(ValidationError::MissingRequiredElement {
+                            element: format!("Measures for unit {}", unit.id),
+                        });
+                    }
+                }
+            }
+        }
+
+        errors
+    }
+
+    fn validate_facts(&self, doc: &Document) -> Vec<ValidationError> {
+        let mut errors = Vec::new();
+
+        // Validate fact references
+        for i in 0..doc.facts.len() {
+            if i < doc.facts.context_ids.len() {
+                let context_id = doc.facts.context_ids[i];
+                if context_id as usize >= doc.contexts.len() {
+                    errors.push(ValidationError::InvalidContextRef {
+                        fact_index: i,
+                        context_id,
+                    });
+                }
+            }
+
+            if i < doc.facts.unit_ids.len() {
+                let unit_id = doc.facts.unit_ids[i];
+                if unit_id > 0 && unit_id as usize > doc.units.len() {
+                    errors.push(ValidationError::InvalidUnitRef {
+                        fact_index: i,
+                        unit_id,
+                    });
+                }
+            }
+        }
+
+        errors
+    }
+
+    fn check_duplicate_facts(&self, doc: &Document) -> Vec<ValidationError> {
+        let mut errors = Vec::new();
+        let mut fact_keys = HashSet::new();
+
+        for i in 0..doc.facts.len() {
+            if i < doc.facts.concept_ids.len() && i < doc.facts.context_ids.len() {
+                let key = (doc.facts.concept_ids[i], doc.facts.context_ids[i]);
+                if !fact_keys.insert(key) && self.strict_mode {
+                    errors.push(ValidationError::DuplicateId {
+                        id: format!("Duplicate fact at index {}", i),
+                    });
+                }
+            }
+        }
+
+        errors
+    }
+}
+
+// Type alias for validation rules
+type ValidationRule = Box<dyn Fn(&Document) -> Vec<ValidationError>>;
+
+// Validation context and rules
+pub struct ValidationContext {
+    pub profile: ValidationProfile,
+    pub custom_rules: Vec<ValidationRule>,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum ValidationProfile {
+    Generic,
+    SecEdgar,
+    Ifrs,
+    UsGaap,
+}
+
+impl ValidationContext {
+    pub fn new(profile: ValidationProfile) -> Self {
+        Self {
+            profile,
+            custom_rules: Vec::new(),
+        }
+    }
+
+    pub fn add_rule<F>(&mut self, rule: F)
+    where
+        F: Fn(&Document) -> Vec<ValidationError> + 'static,
+    {
+        self.custom_rules.push(Box::new(rule));
+    }
+
+    pub fn validate(&self, doc: &Document) -> Vec<ValidationError> {
+        let mut errors = Vec::new();
+
+        // Apply profile-specific rules
+        match self.profile {
+            ValidationProfile::SecEdgar => {
+                errors.extend(sec_validation_rules(doc));
+            }
+            ValidationProfile::Ifrs => {
+                errors.extend(ifrs_validation_rules(doc));
+            }
+            _ => {}
+        }
+
+        // Apply custom rules
+        for rule in &self.custom_rules {
+            errors.extend(rule(doc));
+        }
+
+        errors
+    }
+}
+
+// SEC EDGAR specific validation rules
+pub fn sec_validation_rules(doc: &Document) -> Vec<ValidationError> {
+    let mut errors = Vec::new();
+
+    // Check for required DEI contexts
+    let mut has_current_period = false;
+    let mut has_entity_info = false;
+    let mut has_dei_elements = false;
+
+    for ctx in &doc.contexts {
+        // Check for current period context
+        if ctx.id.contains("CurrentYear")
+            || ctx.id.contains("CurrentPeriod")
+            || ctx.id.contains("DocumentPeriodEndDate")
+        {
+            has_current_period = true;
+        }
+
+        // Validate CIK format (10 digits)
+        if ctx.entity.scheme.contains("sec.gov/CIK") {
+            has_entity_info = true;
+            let cik = &ctx.entity.identifier;
+            if cik.len() != 10 || !cik.chars().all(|c| c.is_ascii_digit()) {
+                errors.push(ValidationError::InvalidDataType {
+                    concept: "CIK".to_string(),
+                    expected_type: "10-digit number".to_string(),
+                    actual_value: cik.to_string(),
+                });
+            }
+        }
+    }
+
+    // Check for DEI elements in facts
+    for i in 0..doc.facts.concept_ids.len() {
+        if i < doc.concept_names.len() {
+            let concept = &doc.concept_names[i];
+            if concept.contains("dei:")
+                || concept.contains("DocumentType")
+                || concept.contains("EntityRegistrantName")
+            {
+                has_dei_elements = true;
+            }
+        }
+    }
+
+    // Required elements validation
+    if !has_current_period {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Current period context required for SEC filing".to_string(),
+        });
+    }
+
+    if !has_entity_info {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Entity CIK information required for SEC filing".to_string(),
+        });
+    }
+
+    if !has_dei_elements {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "DEI (Document and Entity Information) elements required".to_string(),
+        });
+    }
+
+    // Validate segment reporting if present
+    for ctx in &doc.contexts {
+        if let Some(segment) = &ctx.entity.segment {
+            // Check explicit members have valid dimension references
+            for member in &segment.explicit_members {
+                if member.dimension.is_empty() || member.member.is_empty() {
+                    errors.push(ValidationError::InvalidDataType {
+                        concept: format!("segment_{}", ctx.id),
+                        expected_type: "valid dimension member".to_string(),
+                        actual_value: format!("{}:{}", member.dimension, member.member),
+                    });
+                }
+            }
+        }
+    }
+
+    // Validate calculation consistency for monetary items
+    let mut monetary_facts: Vec<(usize, f64)> = Vec::new();
+    for i in 0..doc.facts.len() {
+        if i < doc.facts.values.len() {
+            if let FactValue::Decimal(val) = &doc.facts.values[i] {
+                // Check if this is a monetary fact (has USD unit)
+                if i < doc.facts.unit_ids.len() {
+                    let unit_id = doc.facts.unit_ids[i] as usize;
+                    if unit_id < doc.units.len() {
+                        if let UnitType::Simple(measures) = &doc.units[unit_id].unit_type {
+                            if measures.iter().any(|m| m.name == "USD" || m.name == "usd") {
+                                monetary_facts.push((i, *val));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Basic calculation validation - check for reasonable values
+    for (idx, value) in monetary_facts {
+        if value.is_nan() || value.is_infinite() {
+            errors.push(ValidationError::InvalidDataType {
+                concept: format!("fact_{}", idx),
+                expected_type: "valid monetary amount".to_string(),
+                actual_value: format!("{}", value),
+            });
+        }
+        // Check for suspiciously large values (> $10 trillion)
+        if value.abs() > 10_000_000_000_000.0 {
+            errors.push(ValidationError::InvalidDataType {
+                concept: format!("fact_{}", idx),
+                expected_type: "reasonable monetary amount".to_string(),
+                actual_value: format!("${:.2}", value),
+            });
+        }
+    }
+
+    errors
+}
+
+// IFRS specific validation rules
+pub fn ifrs_validation_rules(doc: &Document) -> Vec<ValidationError> {
+    let mut errors = Vec::new();
+
+    // Check for IFRS-required contexts
+    let mut has_reporting_period = false;
+    let mut has_comparative_period = false;
+    let mut has_entity_info = false;
+
+    for ctx in &doc.contexts {
+        // Check for reporting period
+        match &ctx.period {
+            Period::Duration { start, end: _ } => {
+                has_reporting_period = true;
+                // IFRS requires comparative information
+                if start.contains("PY")
+                    || ctx.id.contains("PriorYear")
+                    || ctx.id.contains("Comparative")
+                {
+                    has_comparative_period = true;
+                }
+            }
+            Period::Instant { date } => {
+                if !date.is_empty() {
+                    has_reporting_period = true;
+                }
+            }
+            _ => {}
+        }
+
+        // Validate entity information
+        if !ctx.entity.identifier.is_empty() {
+            has_entity_info = true;
+        }
+    }
+
+    // Required contexts validation
+    if !has_reporting_period {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Reporting period required for IFRS filing".to_string(),
+        });
+    }
+
+    if !has_comparative_period {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Comparative period information required by IFRS".to_string(),
+        });
+    }
+
+    if !has_entity_info {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Entity identification required for IFRS filing".to_string(),
+        });
+    }
+
+    // Validate dimensional structure
+    let mut dimension_validations = Vec::new();
+    for ctx in &doc.contexts {
+        // Check segment dimensions
+        if let Some(segment) = &ctx.entity.segment {
+            for member in &segment.explicit_members {
+                // IFRS dimensions should follow specific patterns
+                if !member.dimension.contains(":") {
+                    dimension_validations
+                        .push(format!("Invalid dimension format: {}", member.dimension));
+                }
+                if member.dimension.contains("ifrs") || member.dimension.contains("ifrs-full") {
+                    // Valid IFRS dimension
+                    if member.member.is_empty() {
+                        errors.push(ValidationError::InvalidDataType {
+                            concept: format!("dimension_{}", ctx.id),
+                            expected_type: "valid IFRS dimension member".to_string(),
+                            actual_value: member.dimension.to_string(),
+                        });
+                    }
+                }
+            }
+
+            // Check typed members for IFRS compliance
+            for typed in &segment.typed_members {
+                if typed.dimension.contains("ifrs") && typed.value.is_empty() {
+                    errors.push(ValidationError::InvalidDataType {
+                        concept: format!("typed_dimension_{}", ctx.id),
+                        expected_type: "non-empty typed dimension value".to_string(),
+                        actual_value: typed.dimension.to_string(),
+                    });
+                }
+            }
+        }
+
+        // Check scenario dimensions (alternative to segment)
+        if let Some(scenario) = &ctx.scenario {
+            for member in &scenario.explicit_members {
+                if member.dimension.contains("ifrs") && member.member.is_empty() {
+                    errors.push(ValidationError::InvalidDataType {
+                        concept: format!("scenario_dimension_{}", ctx.id),
+                        expected_type: "valid IFRS scenario member".to_string(),
+                        actual_value: member.dimension.to_string(),
+                    });
+                }
+            }
+        }
+    }
+
+    // Check for mandatory IFRS disclosures in facts
+    let mut has_financial_position = false;
+    let mut has_comprehensive_income = false;
+    let mut has_cash_flows = false;
+    let mut has_changes_in_equity = false;
+
+    for i in 0..doc.concept_names.len() {
+        let concept = &doc.concept_names[i];
+        let lower = concept.to_lowercase();
+
+        if lower.contains("financialposition")
+            || lower.contains("balancesheet")
+            || lower.contains("assets")
+            || lower.contains("liabilities")
+        {
+            has_financial_position = true;
+        }
+
+        if lower.contains("comprehensiveincome")
+            || lower.contains("profitorloss")
+            || lower.contains("income")
+            || lower.contains("revenue")
+        {
+            has_comprehensive_income = true;
+        }
+
+        if lower.contains("cashflow") || lower.contains("cashflows") {
+            has_cash_flows = true;
+        }
+
+        if lower.contains("changesinequity") || lower.contains("equity") {
+            has_changes_in_equity = true;
+        }
+    }
+
+    // Validate mandatory statements
+    if !has_financial_position {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Statement of Financial Position required by IFRS".to_string(),
+        });
+    }
+
+    if !has_comprehensive_income {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Statement of Comprehensive Income required by IFRS".to_string(),
+        });
+    }
+
+    if !has_cash_flows {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Statement of Cash Flows required by IFRS".to_string(),
+        });
+    }
+
+    if !has_changes_in_equity {
+        errors.push(ValidationError::MissingRequiredElement {
+            element: "Statement of Changes in Equity required by IFRS".to_string(),
+        });
+    }
+
+    // Validate presentation linkbase relationships
+    for link in &doc.presentation_links {
+        // Check order is valid (typically 1.0 to 999.0)
+        if link.order < 0.0 || link.order > 1000.0 {
+            errors.push(ValidationError::InvalidDataType {
+                concept: format!("presentation_link_{}_{}", link.from, link.to),
+                expected_type: "valid presentation order (0-1000)".to_string(),
+                actual_value: format!("{}", link.order),
+            });
+        }
+    }
+
+    // Validate calculation relationships
+    for link in &doc.calculation_links {
+        // Check weight is reasonable (-1.0 or 1.0 typically)
+        if link.weight != 1.0 && link.weight != -1.0 && link.weight != 0.0 {
+            // Unusual weight, might be an error
+            if link.weight.abs() > 10.0 {
+                errors.push(ValidationError::InvalidDataType {
+                    concept: format!("calculation_link_{}_{}", link.from, link.to),
+                    expected_type: "reasonable calculation weight".to_string(),
+                    actual_value: format!("{}", link.weight),
+                });
+            }
+        }
+    }
+
+    errors
+}