feat(taxonomy): add rust sidecar compact surface pipeline
This commit is contained in:
177
rust/vendor/crabrl/src/allocator.rs
vendored
Normal file
177
rust/vendor/crabrl/src/allocator.rs
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
use bumpalo::Bump;
|
||||
use std::cell::RefCell;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::Arc;
|
||||
use parking_lot::Mutex;
|
||||
use string_interner::{DefaultBackend, Symbol};
|
||||
use string_interner::symbol::SymbolU32;
|
||||
|
||||
const ARENA_SIZE: usize = 64 * 1024 * 1024; // 64MB arenas
|
||||
const POOL_SIZE: usize = 1024;
|
||||
|
||||
#[repr(align(64))]
|
||||
pub struct ArenaAllocator {
|
||||
current: RefCell<Bump>,
|
||||
arenas: RefCell<Vec<Bump>>,
|
||||
string_interner: Arc<Mutex<string_interner::StringInterner<DefaultBackend>>>,
|
||||
}
|
||||
|
||||
impl ArenaAllocator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
current: RefCell::new(Bump::with_capacity(ARENA_SIZE)),
|
||||
arenas: RefCell::new(Vec::with_capacity(16)),
|
||||
string_interner: Arc::new(Mutex::new(string_interner::StringInterner::new())),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn alloc<T>(&self, val: T) -> &T {
|
||||
unsafe {
|
||||
let ptr = self.current.borrow().alloc(val) as *const T;
|
||||
&*ptr
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &[T] {
|
||||
unsafe {
|
||||
let ptr = self.current.borrow().alloc_slice_copy(slice) as *const [T];
|
||||
&*ptr
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn alloc_str(&self, s: &str) -> &str {
|
||||
unsafe {
|
||||
let ptr = self.current.borrow().alloc_str(s) as *const str;
|
||||
&*ptr
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn intern_string(&self, s: &str) -> u32 {
|
||||
let mut interner = self.string_interner.lock();
|
||||
interner.get_or_intern(s).to_usize() as u32
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get_interned(&self, id: u32) -> Option<String> {
|
||||
let interner = self.string_interner.lock();
|
||||
let symbol = SymbolU32::try_from_usize(id as usize)?;
|
||||
interner.resolve(symbol)
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
pub fn reset(&self) {
|
||||
let mut current = self.current.borrow_mut();
|
||||
current.reset();
|
||||
|
||||
let mut arenas = self.arenas.borrow_mut();
|
||||
for arena in arenas.iter_mut() {
|
||||
arena.reset();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_arena(&self) {
|
||||
let mut arenas = self.arenas.borrow_mut();
|
||||
let old = std::mem::replace(&mut *self.current.borrow_mut(),
|
||||
Bump::with_capacity(ARENA_SIZE));
|
||||
arenas.push(old);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ObjectPool<T> {
|
||||
pool: Vec<Box<T>>,
|
||||
factory: fn() -> T,
|
||||
}
|
||||
|
||||
impl<T> ObjectPool<T> {
|
||||
pub fn new(capacity: usize, factory: fn() -> T) -> Self {
|
||||
let mut pool = Vec::with_capacity(capacity);
|
||||
for _ in 0..capacity {
|
||||
pool.push(Box::new(factory()));
|
||||
}
|
||||
Self { pool, factory }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn acquire(&mut self) -> Box<T> {
|
||||
self.pool.pop().unwrap_or_else(|| Box::new((self.factory)()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn release(&mut self, obj: Box<T>) {
|
||||
if self.pool.len() < POOL_SIZE {
|
||||
self.pool.push(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C, align(64))]
|
||||
pub struct StackBuffer<const N: usize> {
|
||||
data: [MaybeUninit<u8>; N],
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<const N: usize> StackBuffer<N> {
|
||||
#[inline(always)]
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
data: unsafe { MaybeUninit::uninit().assume_init() },
|
||||
len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn push(&mut self, byte: u8) -> bool {
|
||||
if self.len < N {
|
||||
self.data[self.len] = MaybeUninit::new(byte);
|
||||
self.len += 1;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
self.data.as_ptr() as *const u8,
|
||||
self.len
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn clear(&mut self) {
|
||||
self.len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_arena_allocator() {
|
||||
let arena = ArenaAllocator::new();
|
||||
let s1 = arena.alloc_str("hello");
|
||||
let s2 = arena.alloc_str("world");
|
||||
assert_eq!(s1, "hello");
|
||||
assert_eq!(s2, "world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_interning() {
|
||||
let arena = ArenaAllocator::new();
|
||||
let id1 = arena.intern_string("test");
|
||||
let id2 = arena.intern_string("test");
|
||||
assert_eq!(id1, id2);
|
||||
|
||||
let s = arena.get_interned(id1).unwrap();
|
||||
assert_eq!(s, "test");
|
||||
}
|
||||
}
|
||||
47
rust/vendor/crabrl/src/cache.rs
vendored
Normal file
47
rust/vendor/crabrl/src/cache.rs
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
use dashmap::DashMap;
|
||||
use std::sync::Arc;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub struct LockFreeCache<K, V> {
|
||||
map: Arc<DashMap<K, V>>,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl<K, V> LockFreeCache<K, V>
|
||||
where
|
||||
K: Eq + Hash + Clone,
|
||||
V: Clone,
|
||||
{
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
map: Arc::new(DashMap::with_capacity(capacity)),
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get(&self, key: &K) -> Option<V> {
|
||||
self.map.get(key).map(|v| v.clone())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn insert(&self, key: K, value: V) {
|
||||
if self.map.len() >= self.capacity {
|
||||
if let Some(entry) = self.map.iter().next() {
|
||||
let k = entry.key().clone();
|
||||
drop(entry);
|
||||
self.map.remove(&k);
|
||||
}
|
||||
}
|
||||
self.map.insert(key, value);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn contains(&self, key: &K) -> bool {
|
||||
self.map.contains_key(key)
|
||||
}
|
||||
|
||||
pub fn clear(&self) {
|
||||
self.map.clear();
|
||||
}
|
||||
}
|
||||
21
rust/vendor/crabrl/src/instance.rs
vendored
Normal file
21
rust/vendor/crabrl/src/instance.rs
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
use crate::model::Document;
|
||||
use crate::Result;
|
||||
|
||||
pub struct InstanceValidator {
|
||||
strict: bool,
|
||||
}
|
||||
|
||||
impl InstanceValidator {
|
||||
pub fn new() -> Self {
|
||||
Self { strict: false }
|
||||
}
|
||||
|
||||
pub fn with_strict(mut self, strict: bool) -> Self {
|
||||
self.strict = strict;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn validate(&self, _document: &Document) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
123
rust/vendor/crabrl/src/lib.rs
vendored
Normal file
123
rust/vendor/crabrl/src/lib.rs
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
//! crabrl - High-performance XBRL parser and validator
|
||||
//!
|
||||
//! Licensed under AGPL-3.0
|
||||
|
||||
pub mod model;
|
||||
pub mod simple_parser;
|
||||
pub mod validator;
|
||||
|
||||
// Use simple parser for now
|
||||
pub use simple_parser::Parser;
|
||||
|
||||
// Re-export main types
|
||||
pub use model::{Context, Document, Fact, Unit};
|
||||
|
||||
// Create validator wrapper for the CLI
|
||||
#[derive(Default)]
|
||||
pub struct Validator {
|
||||
inner: validator::XbrlValidator,
|
||||
#[allow(dead_code)]
|
||||
strict: bool,
|
||||
}
|
||||
|
||||
impl Validator {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn with_config(config: ValidationConfig) -> Self {
|
||||
let mut inner = validator::XbrlValidator::new();
|
||||
if config.strict {
|
||||
inner = inner.strict();
|
||||
}
|
||||
Self {
|
||||
inner,
|
||||
strict: config.strict,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sec_edgar() -> Self {
|
||||
Self {
|
||||
inner: validator::XbrlValidator::new().strict(),
|
||||
strict: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(&self, doc: &Document) -> Result<ValidationResult> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Clone doc for validation (validator mutates it)
|
||||
let mut doc_copy = doc.clone();
|
||||
|
||||
// Run validation
|
||||
let is_valid = self.inner.validate(&mut doc_copy).is_ok();
|
||||
|
||||
Ok(ValidationResult {
|
||||
is_valid,
|
||||
errors: if is_valid {
|
||||
Vec::new()
|
||||
} else {
|
||||
vec!["Validation failed".to_string()]
|
||||
},
|
||||
warnings: Vec::new(),
|
||||
stats: ValidationStats {
|
||||
facts_validated: doc.facts.len(),
|
||||
duration_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple validation config for CLI
|
||||
#[derive(Default)]
|
||||
pub struct ValidationConfig {
|
||||
pub strict: bool,
|
||||
}
|
||||
|
||||
impl ValidationConfig {
|
||||
pub fn sec_edgar() -> Self {
|
||||
Self { strict: true }
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple validation result for CLI
|
||||
pub struct ValidationResult {
|
||||
pub is_valid: bool,
|
||||
pub errors: Vec<String>,
|
||||
pub warnings: Vec<String>,
|
||||
pub stats: ValidationStats,
|
||||
}
|
||||
|
||||
pub struct ValidationStats {
|
||||
pub facts_validated: usize,
|
||||
pub duration_ms: u64,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Io(std::io::Error),
|
||||
Parse(String),
|
||||
Validation(String),
|
||||
NotFound(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Error::Io(e) => write!(f, "IO error: {}", e),
|
||||
Error::Parse(s) => write!(f, "Parse error: {}", s),
|
||||
Error::Validation(s) => write!(f, "Validation error: {}", s),
|
||||
Error::NotFound(s) => write!(f, "Not found: {}", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
Error::Io(err)
|
||||
}
|
||||
}
|
||||
438
rust/vendor/crabrl/src/linkbase.rs
vendored
Normal file
438
rust/vendor/crabrl/src/linkbase.rs
vendored
Normal file
@@ -0,0 +1,438 @@
|
||||
// Linkbase processing for XBRL
|
||||
use crate::{Error, Result, model::*};
|
||||
use compact_str::CompactString;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct LinkbaseProcessor {
|
||||
presentation_links: HashMap<CompactString, Vec<PresentationLink>>,
|
||||
calculation_links: HashMap<CompactString, Vec<CalculationLink>>,
|
||||
definition_links: HashMap<CompactString, Vec<DefinitionLink>>,
|
||||
label_links: HashMap<CompactString, Vec<LabelLink>>,
|
||||
reference_links: HashMap<CompactString, Vec<ReferenceLink>>,
|
||||
}
|
||||
|
||||
impl LinkbaseProcessor {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
presentation_links: HashMap::new(),
|
||||
calculation_links: HashMap::new(),
|
||||
definition_links: HashMap::new(),
|
||||
label_links: HashMap::new(),
|
||||
reference_links: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_linkbase<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_linkbase(&content)
|
||||
}
|
||||
|
||||
pub fn parse_linkbase(&mut self, data: &[u8]) -> Result<()> {
|
||||
// Skip BOM if present
|
||||
let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
|
||||
&data[3..]
|
||||
} else {
|
||||
data
|
||||
};
|
||||
|
||||
let text = std::str::from_utf8(data)
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in linkbase".to_string()))?;
|
||||
|
||||
// Detect linkbase type and parse accordingly
|
||||
if text.contains("presentationLink") {
|
||||
self.parse_presentation_linkbase(text)?;
|
||||
}
|
||||
if text.contains("calculationLink") {
|
||||
self.parse_calculation_linkbase(text)?;
|
||||
}
|
||||
if text.contains("definitionLink") {
|
||||
self.parse_definition_linkbase(text)?;
|
||||
}
|
||||
if text.contains("labelLink") {
|
||||
self.parse_label_linkbase(text)?;
|
||||
}
|
||||
if text.contains("referenceLink") {
|
||||
self.parse_reference_linkbase(text)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_presentation_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse presentation arcs
|
||||
let mut pos = 0;
|
||||
while let Some(arc_start) = text[pos..].find("<link:presentationArc") {
|
||||
let arc_start = pos + arc_start;
|
||||
pos = arc_start + 1;
|
||||
|
||||
if let Some(arc_end) = text[arc_start..].find("/>") {
|
||||
let arc_text = &text[arc_start..arc_start + arc_end];
|
||||
|
||||
let mut link = PresentationLink {
|
||||
from: CompactString::new(""),
|
||||
to: CompactString::new(""),
|
||||
order: 1.0,
|
||||
priority: None,
|
||||
use_attribute: None,
|
||||
};
|
||||
|
||||
// Extract from
|
||||
if let Some(from_start) = arc_text.find("xlink:from=\"") {
|
||||
let from_start = from_start + 12;
|
||||
if let Some(from_end) = arc_text[from_start..].find('"') {
|
||||
link.from = CompactString::from(&arc_text[from_start..from_start + from_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract to
|
||||
if let Some(to_start) = arc_text.find("xlink:to=\"") {
|
||||
let to_start = to_start + 10;
|
||||
if let Some(to_end) = arc_text[to_start..].find('"') {
|
||||
link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract order
|
||||
if let Some(order_start) = arc_text.find("order=\"") {
|
||||
let order_start = order_start + 7;
|
||||
if let Some(order_end) = arc_text[order_start..].find('"') {
|
||||
if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
|
||||
link.order = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract priority
|
||||
if let Some(priority_start) = arc_text.find("priority=\"") {
|
||||
let priority_start = priority_start + 10;
|
||||
if let Some(priority_end) = arc_text[priority_start..].find('"') {
|
||||
if let Ok(priority) = arc_text[priority_start..priority_start + priority_end].parse() {
|
||||
link.priority = Some(priority);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract use
|
||||
if let Some(use_start) = arc_text.find("use=\"") {
|
||||
let use_start = use_start + 5;
|
||||
if let Some(use_end) = arc_text[use_start..].find('"') {
|
||||
link.use_attribute = Some(CompactString::from(&arc_text[use_start..use_start + use_end]));
|
||||
}
|
||||
}
|
||||
|
||||
self.presentation_links
|
||||
.entry(link.from.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_calculation_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse calculation arcs
|
||||
let mut pos = 0;
|
||||
while let Some(arc_start) = text[pos..].find("<link:calculationArc") {
|
||||
let arc_start = pos + arc_start;
|
||||
pos = arc_start + 1;
|
||||
|
||||
if let Some(arc_end) = text[arc_start..].find("/>") {
|
||||
let arc_text = &text[arc_start..arc_start + arc_end];
|
||||
|
||||
let mut link = CalculationLink {
|
||||
from: CompactString::new(""),
|
||||
to: CompactString::new(""),
|
||||
weight: 1.0,
|
||||
order: 1.0,
|
||||
};
|
||||
|
||||
// Extract from
|
||||
if let Some(from_start) = arc_text.find("xlink:from=\"") {
|
||||
let from_start = from_start + 12;
|
||||
if let Some(from_end) = arc_text[from_start..].find('"') {
|
||||
link.from = CompactString::from(&arc_text[from_start..from_start + from_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract to
|
||||
if let Some(to_start) = arc_text.find("xlink:to=\"") {
|
||||
let to_start = to_start + 10;
|
||||
if let Some(to_end) = arc_text[to_start..].find('"') {
|
||||
link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract weight
|
||||
if let Some(weight_start) = arc_text.find("weight=\"") {
|
||||
let weight_start = weight_start + 8;
|
||||
if let Some(weight_end) = arc_text[weight_start..].find('"') {
|
||||
if let Ok(weight) = arc_text[weight_start..weight_start + weight_end].parse() {
|
||||
link.weight = weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract order
|
||||
if let Some(order_start) = arc_text.find("order=\"") {
|
||||
let order_start = order_start + 7;
|
||||
if let Some(order_end) = arc_text[order_start..].find('"') {
|
||||
if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
|
||||
link.order = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.calculation_links
|
||||
.entry(link.from.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_definition_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse definition arcs
|
||||
let mut pos = 0;
|
||||
while let Some(arc_start) = text[pos..].find("<link:definitionArc") {
|
||||
let arc_start = pos + arc_start;
|
||||
pos = arc_start + 1;
|
||||
|
||||
if let Some(arc_end) = text[arc_start..].find("/>") {
|
||||
let arc_text = &text[arc_start..arc_start + arc_end];
|
||||
|
||||
let mut link = DefinitionLink {
|
||||
from: CompactString::new(""),
|
||||
to: CompactString::new(""),
|
||||
arcrole: CompactString::new(""),
|
||||
order: 1.0,
|
||||
};
|
||||
|
||||
// Extract from
|
||||
if let Some(from_start) = arc_text.find("xlink:from=\"") {
|
||||
let from_start = from_start + 12;
|
||||
if let Some(from_end) = arc_text[from_start..].find('"') {
|
||||
link.from = CompactString::from(&arc_text[from_start..from_start + from_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract to
|
||||
if let Some(to_start) = arc_text.find("xlink:to=\"") {
|
||||
let to_start = to_start + 10;
|
||||
if let Some(to_end) = arc_text[to_start..].find('"') {
|
||||
link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract arcrole
|
||||
if let Some(arcrole_start) = arc_text.find("xlink:arcrole=\"") {
|
||||
let arcrole_start = arcrole_start + 15;
|
||||
if let Some(arcrole_end) = arc_text[arcrole_start..].find('"') {
|
||||
link.arcrole = CompactString::from(&arc_text[arcrole_start..arcrole_start + arcrole_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract order
|
||||
if let Some(order_start) = arc_text.find("order=\"") {
|
||||
let order_start = order_start + 7;
|
||||
if let Some(order_end) = arc_text[order_start..].find('"') {
|
||||
if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
|
||||
link.order = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.definition_links
|
||||
.entry(link.from.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_label_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse labels
|
||||
let mut pos = 0;
|
||||
while let Some(label_start) = text[pos..].find("<link:label") {
|
||||
let label_start = pos + label_start;
|
||||
pos = label_start + 1;
|
||||
|
||||
if let Some(label_end) = text[label_start..].find("</link:label>") {
|
||||
let label_text = &text[label_start..label_start + label_end];
|
||||
|
||||
let mut link = LabelLink {
|
||||
concept: CompactString::new(""),
|
||||
label: CompactString::new(""),
|
||||
role: CompactString::new(""),
|
||||
lang: CompactString::new("en"),
|
||||
};
|
||||
|
||||
// Extract label ID for concept mapping
|
||||
if let Some(id_start) = label_text.find("xlink:label=\"") {
|
||||
let id_start = id_start + 13;
|
||||
if let Some(id_end) = label_text[id_start..].find('"') {
|
||||
link.concept = CompactString::from(&label_text[id_start..id_start + id_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract role
|
||||
if let Some(role_start) = label_text.find("xlink:role=\"") {
|
||||
let role_start = role_start + 12;
|
||||
if let Some(role_end) = label_text[role_start..].find('"') {
|
||||
link.role = CompactString::from(&label_text[role_start..role_start + role_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract lang
|
||||
if let Some(lang_start) = label_text.find("xml:lang=\"") {
|
||||
let lang_start = lang_start + 10;
|
||||
if let Some(lang_end) = label_text[lang_start..].find('"') {
|
||||
link.lang = CompactString::from(&label_text[lang_start..lang_start + lang_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract label text content
|
||||
if let Some(content_start) = label_text.find('>') {
|
||||
let content = &label_text[content_start + 1..];
|
||||
link.label = CompactString::from(content.trim());
|
||||
}
|
||||
|
||||
self.label_links
|
||||
.entry(link.concept.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_reference_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse references - simplified version
|
||||
let mut pos = 0;
|
||||
while let Some(ref_start) = text[pos..].find("<link:reference") {
|
||||
let ref_start = pos + ref_start;
|
||||
pos = ref_start + 1;
|
||||
|
||||
if let Some(ref_end) = text[ref_start..].find("</link:reference>") {
|
||||
let ref_text = &text[ref_start..ref_start + ref_end];
|
||||
|
||||
let mut reference = Reference {
|
||||
role: CompactString::new(""),
|
||||
parts: HashMap::new(),
|
||||
};
|
||||
|
||||
// Extract role
|
||||
if let Some(role_start) = ref_text.find("xlink:role=\"") {
|
||||
let role_start = role_start + 12;
|
||||
if let Some(role_end) = ref_text[role_start..].find('"') {
|
||||
reference.role = CompactString::from(&ref_text[role_start..role_start + role_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse reference parts (simplified)
|
||||
let parts = ["Name", "Number", "Section", "Subsection", "Paragraph", "Subparagraph", "Clause"];
|
||||
for part in &parts {
|
||||
let tag = format!("<link:{}", part);
|
||||
if let Some(part_start) = ref_text.find(&tag) {
|
||||
let part_start = part_start + tag.len();
|
||||
if let Some(content_start) = ref_text[part_start..].find('>') {
|
||||
let content_start = part_start + content_start + 1;
|
||||
if let Some(content_end) = ref_text[content_start..].find('<') {
|
||||
let content = &ref_text[content_start..content_start + content_end];
|
||||
reference.parts.insert(
|
||||
CompactString::from(*part),
|
||||
content.trim().to_string()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find concept this reference belongs to
|
||||
if let Some(label_start) = ref_text.find("xlink:label=\"") {
|
||||
let label_start = label_start + 13;
|
||||
if let Some(label_end) = ref_text[label_start..].find('"') {
|
||||
let concept = CompactString::from(&ref_text[label_start..label_start + label_end]);
|
||||
|
||||
let link = ReferenceLink {
|
||||
concept: concept.clone(),
|
||||
reference,
|
||||
};
|
||||
|
||||
self.reference_links
|
||||
.entry(concept)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_presentation_tree(&self, root: &str) -> Vec<&PresentationLink> {
|
||||
self.presentation_links
|
||||
.get(root)
|
||||
.map(|links| {
|
||||
let mut sorted = links.iter().collect::<Vec<_>>();
|
||||
sorted.sort_by(|a, b| a.order.partial_cmp(&b.order).unwrap());
|
||||
sorted
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn calculate_total(&self, parent: &str, facts: &HashMap<String, f64>) -> f64 {
|
||||
if let Some(links) = self.calculation_links.get(parent) {
|
||||
links.iter()
|
||||
.map(|link| {
|
||||
facts.get(link.to.as_str())
|
||||
.map(|value| value * link.weight)
|
||||
.unwrap_or(0.0)
|
||||
})
|
||||
.sum()
|
||||
} else {
|
||||
facts.get(parent).copied().unwrap_or(0.0)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_label(&self, concept: &str, role: &str, lang: &str) -> Option<&str> {
|
||||
self.label_links
|
||||
.get(concept)
|
||||
.and_then(|labels| {
|
||||
labels.iter()
|
||||
.find(|l| l.role == role && l.lang == lang)
|
||||
.or_else(|| labels.iter().find(|l| l.lang == lang))
|
||||
.or_else(|| labels.first())
|
||||
})
|
||||
.map(|l| l.label.as_str())
|
||||
}
|
||||
|
||||
pub fn validate_calculations(&self, facts: &HashMap<String, f64>) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
for (parent, links) in &self.calculation_links {
|
||||
let calculated = self.calculate_total(parent, facts);
|
||||
if let Some(&actual) = facts.get(parent.as_str()) {
|
||||
let diff = (calculated - actual).abs();
|
||||
let tolerance = 0.01; // Allow small rounding differences
|
||||
|
||||
if diff > tolerance {
|
||||
errors.push(ValidationError::CalculationInconsistency {
|
||||
concept: parent.to_string(),
|
||||
expected: calculated,
|
||||
actual,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
181
rust/vendor/crabrl/src/main.rs
vendored
Normal file
181
rust/vendor/crabrl/src/main.rs
vendored
Normal file
@@ -0,0 +1,181 @@
|
||||
//! crabrl CLI - High-performance XBRL parser and validator
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{Parser as ClapParser, Subcommand};
|
||||
use colored::*;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use crabrl::{Parser, ValidationConfig, Validator};
|
||||
|
||||
/// High-performance XBRL parser and validator
|
||||
#[derive(ClapParser)]
|
||||
#[command(name = "crabrl")]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Parse an XBRL file
|
||||
Parse {
|
||||
/// Input file
|
||||
input: PathBuf,
|
||||
|
||||
/// Output as JSON
|
||||
#[arg(short, long)]
|
||||
json: bool,
|
||||
|
||||
/// Show statistics
|
||||
#[arg(short, long)]
|
||||
stats: bool,
|
||||
},
|
||||
|
||||
/// Validate an XBRL file
|
||||
Validate {
|
||||
/// Input file
|
||||
input: PathBuf,
|
||||
|
||||
/// Validation profile (generic, sec-edgar)
|
||||
#[arg(short, long, default_value = "generic")]
|
||||
profile: String,
|
||||
|
||||
/// Treat warnings as errors
|
||||
#[arg(long)]
|
||||
strict: bool,
|
||||
},
|
||||
|
||||
/// Benchmark parsing performance
|
||||
Bench {
|
||||
/// Input file
|
||||
input: PathBuf,
|
||||
|
||||
/// Number of iterations
|
||||
#[arg(short, long, default_value = "100")]
|
||||
iterations: usize,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
match cli.command {
|
||||
Commands::Parse {
|
||||
input,
|
||||
json: _,
|
||||
stats,
|
||||
} => {
|
||||
let start = Instant::now();
|
||||
let parser = Parser::new();
|
||||
let doc = parser
|
||||
.parse_file(&input)
|
||||
.with_context(|| format!("Failed to parse {}", input.display()))?;
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
println!("{} {}", "✓".green().bold(), input.display());
|
||||
println!(" Facts: {}", doc.facts.len());
|
||||
println!(" Contexts: {}", doc.contexts.len());
|
||||
println!(" Units: {}", doc.units.len());
|
||||
|
||||
if stats {
|
||||
println!(" Time: {:.2}ms", elapsed.as_secs_f64() * 1000.0);
|
||||
println!(
|
||||
" Throughput: {:.0} facts/sec",
|
||||
doc.facts.len() as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Commands::Validate {
|
||||
input,
|
||||
profile,
|
||||
strict,
|
||||
} => {
|
||||
let parser = Parser::new();
|
||||
let doc = parser
|
||||
.parse_file(&input)
|
||||
.with_context(|| format!("Failed to parse {}", input.display()))?;
|
||||
|
||||
let config = match profile.as_str() {
|
||||
"sec-edgar" => ValidationConfig::sec_edgar(),
|
||||
_ => ValidationConfig::default(),
|
||||
};
|
||||
|
||||
let validator = Validator::with_config(config);
|
||||
let result = validator.validate(&doc)?;
|
||||
|
||||
if result.is_valid {
|
||||
println!(
|
||||
"{} {} - Document is valid",
|
||||
"✓".green().bold(),
|
||||
input.display()
|
||||
);
|
||||
} else {
|
||||
println!(
|
||||
"{} {} - Validation failed",
|
||||
"✗".red().bold(),
|
||||
input.display()
|
||||
);
|
||||
println!(" Errors: {}", result.errors.len());
|
||||
println!(" Warnings: {}", result.warnings.len());
|
||||
|
||||
for error in result.errors.iter().take(5) {
|
||||
println!(" {} {}", "ERROR:".red(), error);
|
||||
}
|
||||
|
||||
if result.errors.len() > 5 {
|
||||
println!(" ... and {} more errors", result.errors.len() - 5);
|
||||
}
|
||||
|
||||
if strict && !result.warnings.is_empty() {
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if !result.is_valid {
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Commands::Bench { input, iterations } => {
|
||||
let parser = Parser::new();
|
||||
|
||||
// Warmup
|
||||
for _ in 0..3 {
|
||||
let _ = parser.parse_file(&input)?;
|
||||
}
|
||||
|
||||
let mut times = Vec::with_capacity(iterations);
|
||||
let mut doc_facts = 0;
|
||||
|
||||
for _ in 0..iterations {
|
||||
let start = Instant::now();
|
||||
let doc = parser.parse_file(&input)?;
|
||||
times.push(start.elapsed());
|
||||
doc_facts = doc.facts.len();
|
||||
}
|
||||
|
||||
times.sort();
|
||||
let min = times[0];
|
||||
let max = times[times.len() - 1];
|
||||
let median = times[times.len() / 2];
|
||||
let mean = times.iter().sum::<std::time::Duration>() / times.len() as u32;
|
||||
|
||||
println!("Benchmark Results for {}", input.display());
|
||||
println!(" Iterations: {}", iterations);
|
||||
println!(" Facts: {}", doc_facts);
|
||||
println!(" Min: {:.3}ms", min.as_secs_f64() * 1000.0);
|
||||
println!(" Median: {:.3}ms", median.as_secs_f64() * 1000.0);
|
||||
println!(" Mean: {:.3}ms", mean.as_secs_f64() * 1000.0);
|
||||
println!(" Max: {:.3}ms", max.as_secs_f64() * 1000.0);
|
||||
println!(
|
||||
" Throughput: {:.0} facts/sec",
|
||||
doc_facts as f64 / mean.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
347
rust/vendor/crabrl/src/model.rs
vendored
Normal file
347
rust/vendor/crabrl/src/model.rs
vendored
Normal file
@@ -0,0 +1,347 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ============================================================================
|
||||
// Core XBRL Data Structures - Full Specification Support
|
||||
// ============================================================================
|
||||
|
||||
#[repr(C, align(64))]
|
||||
#[derive(Clone)]
|
||||
pub struct FactStorage {
|
||||
pub concept_ids: Vec<u32>,
|
||||
pub context_ids: Vec<u16>,
|
||||
pub unit_ids: Vec<u16>,
|
||||
pub values: Vec<FactValue>,
|
||||
pub decimals: Vec<Option<i8>>,
|
||||
pub ids: Vec<Option<String>>,
|
||||
pub footnote_refs: Vec<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FactValue {
|
||||
Text(String),
|
||||
Decimal(f64),
|
||||
Integer(i64),
|
||||
Boolean(bool),
|
||||
Date(String),
|
||||
DateTime(String),
|
||||
Nil,
|
||||
}
|
||||
|
||||
impl FactStorage {
|
||||
pub fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
concept_ids: Vec::with_capacity(capacity),
|
||||
context_ids: Vec::with_capacity(capacity),
|
||||
unit_ids: Vec::with_capacity(capacity),
|
||||
values: Vec::with_capacity(capacity),
|
||||
decimals: Vec::with_capacity(capacity),
|
||||
ids: Vec::with_capacity(capacity),
|
||||
footnote_refs: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn len(&self) -> usize {
|
||||
self.concept_ids.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.concept_ids.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
// Full fact representation with all XBRL features
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Fact {
|
||||
pub id: Option<String>,
|
||||
pub concept: String,
|
||||
pub context_ref: String,
|
||||
pub unit_ref: Option<String>,
|
||||
pub value: String,
|
||||
pub decimals: Option<i8>,
|
||||
pub precision: Option<u8>,
|
||||
pub nil: bool,
|
||||
pub nil_reason: Option<String>,
|
||||
pub footnote_refs: Vec<String>,
|
||||
}
|
||||
|
||||
// Context with full dimension support
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Context {
|
||||
pub id: String,
|
||||
pub entity: Entity,
|
||||
pub period: Period,
|
||||
pub scenario: Option<Scenario>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Entity {
|
||||
pub identifier: String,
|
||||
pub scheme: String,
|
||||
pub segment: Option<Segment>,
|
||||
}
|
||||
|
||||
// Dimensional data support
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Segment {
|
||||
pub explicit_members: Vec<DimensionMember>,
|
||||
pub typed_members: Vec<TypedMember>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DimensionMember {
|
||||
pub dimension: String,
|
||||
pub member: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TypedMember {
|
||||
pub dimension: String,
|
||||
pub value: String, // XML content
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Scenario {
|
||||
pub explicit_members: Vec<DimensionMember>,
|
||||
pub typed_members: Vec<TypedMember>,
|
||||
}
|
||||
|
||||
// Period with forever support
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Period {
|
||||
Instant { date: String },
|
||||
Duration { start: String, end: String },
|
||||
Forever,
|
||||
}
|
||||
|
||||
// Complex unit support with divide/multiply
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Unit {
|
||||
pub id: String,
|
||||
pub unit_type: UnitType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum UnitType {
|
||||
Simple(Vec<Measure>),
|
||||
Divide {
|
||||
numerator: Vec<Measure>,
|
||||
denominator: Vec<Measure>,
|
||||
},
|
||||
Multiply(Vec<Measure>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Measure {
|
||||
pub namespace: String,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
// Tuple support for structured data
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Tuple {
|
||||
pub id: Option<String>,
|
||||
pub name: String,
|
||||
pub facts: Vec<FactOrTuple>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FactOrTuple {
|
||||
Fact(Fact),
|
||||
Tuple(Box<Tuple>),
|
||||
}
|
||||
|
||||
// Footnote support
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Footnote {
|
||||
pub id: String,
|
||||
pub role: Option<String>,
|
||||
pub lang: Option<String>,
|
||||
pub content: String,
|
||||
pub fact_refs: Vec<String>,
|
||||
}
|
||||
|
||||
// Fraction support
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FractionValue {
|
||||
pub numerator: f64,
|
||||
pub denominator: f64,
|
||||
}
|
||||
|
||||
// Schema and taxonomy support
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Schema {
|
||||
pub target_namespace: String,
|
||||
pub elements: HashMap<String, SchemaElement>,
|
||||
pub types: HashMap<String, SchemaType>,
|
||||
pub imports: Vec<SchemaImport>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SchemaElement {
|
||||
pub name: String,
|
||||
pub element_type: String,
|
||||
pub substitution_group: Option<String>,
|
||||
pub period_type: Option<String>,
|
||||
pub balance: Option<String>,
|
||||
pub abstract_element: bool,
|
||||
pub nillable: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SchemaType {
|
||||
pub name: String,
|
||||
pub base_type: Option<String>,
|
||||
pub restrictions: Vec<TypeRestriction>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum TypeRestriction {
|
||||
MinInclusive(String),
|
||||
MaxInclusive(String),
|
||||
MinExclusive(String),
|
||||
MaxExclusive(String),
|
||||
Pattern(String),
|
||||
Enumeration(Vec<String>),
|
||||
Length(usize),
|
||||
MinLength(usize),
|
||||
MaxLength(usize),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SchemaImport {
|
||||
pub namespace: String,
|
||||
pub schema_location: String,
|
||||
}
|
||||
|
||||
// Linkbase support
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Linkbase {
|
||||
pub role: String,
|
||||
pub links: Vec<Link>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Link {
|
||||
Presentation(PresentationLink),
|
||||
Calculation(CalculationLink),
|
||||
Definition(DefinitionLink),
|
||||
Label(LabelLink),
|
||||
Reference(ReferenceLink),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PresentationLink {
|
||||
pub from: String,
|
||||
pub to: String,
|
||||
pub order: f32,
|
||||
pub priority: Option<i32>,
|
||||
pub use_attribute: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CalculationLink {
|
||||
pub from: String,
|
||||
pub to: String,
|
||||
pub weight: f64,
|
||||
pub order: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DefinitionLink {
|
||||
pub from: String,
|
||||
pub to: String,
|
||||
pub arcrole: String,
|
||||
pub order: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LabelLink {
|
||||
pub concept: String,
|
||||
pub label: String,
|
||||
pub role: String,
|
||||
pub lang: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ReferenceLink {
|
||||
pub concept: String,
|
||||
pub reference: Reference,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Reference {
|
||||
pub role: String,
|
||||
pub parts: HashMap<String, String>,
|
||||
}
|
||||
|
||||
// Main document structure with full XBRL support
|
||||
#[derive(Clone)]
|
||||
pub struct Document {
|
||||
pub facts: FactStorage,
|
||||
pub contexts: Vec<Context>,
|
||||
pub units: Vec<Unit>,
|
||||
pub tuples: Vec<Tuple>,
|
||||
pub footnotes: Vec<Footnote>,
|
||||
pub presentation_links: Vec<PresentationLink>,
|
||||
pub calculation_links: Vec<CalculationLink>,
|
||||
pub definition_links: Vec<DefinitionLink>,
|
||||
pub label_links: Vec<LabelLink>,
|
||||
pub reference_links: Vec<ReferenceLink>,
|
||||
pub custom_links: Vec<Link>,
|
||||
pub role_types: Vec<String>,
|
||||
pub arcrole_types: Vec<String>,
|
||||
pub schemas: Vec<Schema>,
|
||||
pub dimensions: Vec<DimensionMember>,
|
||||
pub concept_names: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for Document {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Document {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
facts: FactStorage::with_capacity(10000),
|
||||
contexts: Vec::with_capacity(100),
|
||||
units: Vec::with_capacity(50),
|
||||
tuples: Vec::new(),
|
||||
footnotes: Vec::new(),
|
||||
presentation_links: Vec::new(),
|
||||
calculation_links: Vec::new(),
|
||||
definition_links: Vec::new(),
|
||||
label_links: Vec::new(),
|
||||
reference_links: Vec::new(),
|
||||
custom_links: Vec::new(),
|
||||
role_types: Vec::new(),
|
||||
arcrole_types: Vec::new(),
|
||||
schemas: Vec::new(),
|
||||
dimensions: Vec::new(),
|
||||
concept_names: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_capacity(facts: usize, contexts: usize, units: usize) -> Self {
|
||||
Self {
|
||||
facts: FactStorage::with_capacity(facts),
|
||||
contexts: Vec::with_capacity(contexts),
|
||||
units: Vec::with_capacity(units),
|
||||
tuples: Vec::new(),
|
||||
footnotes: Vec::new(),
|
||||
presentation_links: Vec::new(),
|
||||
calculation_links: Vec::new(),
|
||||
definition_links: Vec::new(),
|
||||
label_links: Vec::new(),
|
||||
reference_links: Vec::new(),
|
||||
custom_links: Vec::new(),
|
||||
role_types: Vec::new(),
|
||||
arcrole_types: Vec::new(),
|
||||
schemas: Vec::new(),
|
||||
dimensions: Vec::new(),
|
||||
concept_names: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
1552
rust/vendor/crabrl/src/parser.rs
vendored
Normal file
1552
rust/vendor/crabrl/src/parser.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
238
rust/vendor/crabrl/src/parser_base.rs
vendored
Normal file
238
rust/vendor/crabrl/src/parser_base.rs
vendored
Normal file
@@ -0,0 +1,238 @@
|
||||
// Base parsing methods for FullXbrlParser
|
||||
|
||||
impl<'a> FullXbrlParser<'a> {
|
||||
#[inline(always)]
|
||||
fn read_tag_name(&mut self) -> Result<&'a str> {
|
||||
let start = self.scanner.pos;
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b' ' || ch == b'>' || ch == b'/' || ch == b'\t' || ch == b'\n' || ch == b'\r' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
let end = self.scanner.pos;
|
||||
|
||||
if start == end {
|
||||
return Err(Error::Parse("Empty tag name".to_string()));
|
||||
}
|
||||
|
||||
std::str::from_utf8(&self.scanner.data[start..end])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in tag name".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn parse_attributes(&mut self) -> Result<Vec<(&'a str, &'a str)>> {
|
||||
let mut attrs = Vec::new();
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
match self.scanner.peek() {
|
||||
Some(b'>') => {
|
||||
// End of tag
|
||||
break;
|
||||
}
|
||||
Some(b'/') => {
|
||||
// Self-closing tag
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'>') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None => return Err(Error::Parse("Unexpected EOF in attributes".to_string())),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let name_start = self.scanner.pos;
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'=' || ch == b' ' || ch == b'>' || ch == b'/' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if self.scanner.pos == name_start {
|
||||
break; // No more attributes
|
||||
}
|
||||
|
||||
let name = std::str::from_utf8(&self.scanner.data[name_start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in attribute name".to_string()))?;
|
||||
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
if self.scanner.peek() != Some(b'=') {
|
||||
continue;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
let quote = self.scanner.peek()
|
||||
.ok_or_else(|| Error::Parse("Expected quote".to_string()))?;
|
||||
|
||||
if quote != b'"' && quote != b'\'' {
|
||||
return Err(Error::Parse("Expected quote in attribute".to_string()));
|
||||
}
|
||||
|
||||
self.scanner.advance(1);
|
||||
let value_start = self.scanner.pos;
|
||||
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == quote {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
let value = std::str::from_utf8(&self.scanner.data[value_start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in attribute value".to_string()))?;
|
||||
|
||||
self.scanner.advance(1); // Skip closing quote
|
||||
|
||||
attrs.push((name, value));
|
||||
}
|
||||
|
||||
Ok(attrs)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_to_tag_end(&mut self) -> Result<()> {
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'>' {
|
||||
self.scanner.advance(1);
|
||||
return Ok(());
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
Err(Error::Parse("Expected '>'".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn read_text_content(&mut self) -> Result<&'a str> {
|
||||
let start = self.scanner.pos;
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'<' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
let text = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in text content".to_string()))?;
|
||||
|
||||
Ok(text.trim())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_element_from_tag(&mut self) -> Result<()> {
|
||||
// We've already read the tag name, now skip to end of opening tag
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
// Check if it was self-closing
|
||||
if self.scanner.pos >= 2 && self.scanner.data[self.scanner.pos - 2] == b'/' {
|
||||
return Ok(()); // Self-closing tag, we're done
|
||||
}
|
||||
|
||||
// Skip element content and find matching closing tag
|
||||
let mut depth = 1;
|
||||
|
||||
while depth > 0 && !self.scanner.is_eof() {
|
||||
// Find next tag
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'<' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if self.scanner.is_eof() {
|
||||
break;
|
||||
}
|
||||
|
||||
self.scanner.advance(1); // consume '<'
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
depth -= 1;
|
||||
} else if self.scanner.peek() != Some(b'!') && self.scanner.peek() != Some(b'?') {
|
||||
// Check if it's a self-closing tag
|
||||
let mut is_self_closing = false;
|
||||
let _saved_pos = self.scanner.pos;
|
||||
|
||||
// Skip to end of tag to check
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'/' {
|
||||
if self.scanner.pos + 1 < self.scanner.data.len()
|
||||
&& self.scanner.data[self.scanner.pos + 1] == b'>' {
|
||||
is_self_closing = true;
|
||||
}
|
||||
}
|
||||
if ch == b'>' {
|
||||
self.scanner.advance(1);
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if !is_self_closing {
|
||||
depth += 1;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip to end of this tag
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'>' {
|
||||
self.scanner.advance(1);
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_processing_instruction(&mut self) -> Result<()> {
|
||||
// Skip until ?>
|
||||
while !self.scanner.is_eof() {
|
||||
if self.scanner.peek() == Some(b'?') {
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'>') {
|
||||
self.scanner.advance(1);
|
||||
return Ok(());
|
||||
}
|
||||
} else {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
}
|
||||
Err(Error::Parse("Unclosed processing instruction".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_comment(&mut self) -> Result<()> {
|
||||
// Skip until -->
|
||||
while !self.scanner.is_eof() {
|
||||
if self.scanner.peek() == Some(b'-') {
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'-') {
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'>') {
|
||||
self.scanner.advance(1);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
}
|
||||
Err(Error::Parse("Unclosed comment".to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
275
rust/vendor/crabrl/src/schema.rs
vendored
Normal file
275
rust/vendor/crabrl/src/schema.rs
vendored
Normal file
@@ -0,0 +1,275 @@
|
||||
// Schema loading and validation for XBRL
|
||||
use crate::{Error, Result, model::*};
|
||||
use compact_str::CompactString;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct SchemaLoader {
|
||||
cache: HashMap<CompactString, Schema>,
|
||||
}
|
||||
|
||||
impl SchemaLoader {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
cache: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_schema<P: AsRef<Path>>(&mut self, path: P) -> Result<&Schema> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
let key = CompactString::from(path_str.as_ref());
|
||||
|
||||
if self.cache.contains_key(&key) {
|
||||
return Ok(self.cache.get(&key).unwrap());
|
||||
}
|
||||
|
||||
let schema = self.parse_schema_file(path)?;
|
||||
self.cache.insert(key.clone(), schema);
|
||||
Ok(self.cache.get(&key).unwrap())
|
||||
}
|
||||
|
||||
fn parse_schema_file<P: AsRef<Path>>(&self, path: P) -> Result<Schema> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_schema_bytes(&content)
|
||||
}
|
||||
|
||||
fn parse_schema_bytes(&self, data: &[u8]) -> Result<Schema> {
|
||||
// Simple XML parsing for schema
|
||||
let mut schema = Schema {
|
||||
target_namespace: CompactString::new(""),
|
||||
elements: HashMap::new(),
|
||||
types: HashMap::new(),
|
||||
imports: Vec::new(),
|
||||
};
|
||||
|
||||
// Skip BOM if present
|
||||
let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
|
||||
&data[3..]
|
||||
} else {
|
||||
data
|
||||
};
|
||||
|
||||
let text = std::str::from_utf8(data)
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in schema".to_string()))?;
|
||||
|
||||
// Extract target namespace
|
||||
if let Some(ns_start) = text.find("targetNamespace=\"") {
|
||||
let ns_start = ns_start + 17;
|
||||
if let Some(ns_end) = text[ns_start..].find('"') {
|
||||
schema.target_namespace = CompactString::from(&text[ns_start..ns_start + ns_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse elements
|
||||
let mut pos = 0;
|
||||
while let Some(elem_start) = text[pos..].find("<xs:element") {
|
||||
let elem_start = pos + elem_start;
|
||||
pos = elem_start + 1;
|
||||
|
||||
// Find element end
|
||||
let elem_end = if let Some(end) = text[elem_start..].find("/>") {
|
||||
elem_start + end + 2
|
||||
} else if let Some(end) = text[elem_start..].find("</xs:element>") {
|
||||
elem_start + end + 13
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let elem_text = &text[elem_start..elem_end];
|
||||
|
||||
// Extract element attributes
|
||||
let mut element = SchemaElement {
|
||||
name: CompactString::new(""),
|
||||
element_type: CompactString::new(""),
|
||||
substitution_group: None,
|
||||
period_type: None,
|
||||
balance: None,
|
||||
abstract_element: elem_text.contains("abstract=\"true\""),
|
||||
nillable: elem_text.contains("nillable=\"true\""),
|
||||
};
|
||||
|
||||
// Extract name
|
||||
if let Some(name_start) = elem_text.find("name=\"") {
|
||||
let name_start = name_start + 6;
|
||||
if let Some(name_end) = elem_text[name_start..].find('"') {
|
||||
element.name = CompactString::from(&elem_text[name_start..name_start + name_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract type
|
||||
if let Some(type_start) = elem_text.find("type=\"") {
|
||||
let type_start = type_start + 6;
|
||||
if let Some(type_end) = elem_text[type_start..].find('"') {
|
||||
element.element_type = CompactString::from(&elem_text[type_start..type_start + type_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract substitutionGroup
|
||||
if let Some(sg_start) = elem_text.find("substitutionGroup=\"") {
|
||||
let sg_start = sg_start + 19;
|
||||
if let Some(sg_end) = elem_text[sg_start..].find('"') {
|
||||
element.substitution_group = Some(CompactString::from(&elem_text[sg_start..sg_start + sg_end]));
|
||||
}
|
||||
}
|
||||
|
||||
// Extract XBRL-specific attributes
|
||||
if let Some(pt_start) = elem_text.find("xbrli:periodType=\"") {
|
||||
let pt_start = pt_start + 18;
|
||||
if let Some(pt_end) = elem_text[pt_start..].find('"') {
|
||||
element.period_type = Some(CompactString::from(&elem_text[pt_start..pt_start + pt_end]));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(bal_start) = elem_text.find("xbrli:balance=\"") {
|
||||
let bal_start = bal_start + 15;
|
||||
if let Some(bal_end) = elem_text[bal_start..].find('"') {
|
||||
element.balance = Some(CompactString::from(&elem_text[bal_start..bal_start + bal_end]));
|
||||
}
|
||||
}
|
||||
|
||||
if !element.name.is_empty() {
|
||||
schema.elements.insert(element.name.clone(), element);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse imports
|
||||
pos = 0;
|
||||
while let Some(import_start) = text[pos..].find("<xs:import") {
|
||||
let import_start = pos + import_start;
|
||||
pos = import_start + 1;
|
||||
|
||||
if let Some(import_end) = text[import_start..].find("/>") {
|
||||
let import_text = &text[import_start..import_start + import_end];
|
||||
|
||||
let mut import = SchemaImport {
|
||||
namespace: CompactString::new(""),
|
||||
schema_location: CompactString::new(""),
|
||||
};
|
||||
|
||||
if let Some(ns_start) = import_text.find("namespace=\"") {
|
||||
let ns_start = ns_start + 11;
|
||||
if let Some(ns_end) = import_text[ns_start..].find('"') {
|
||||
import.namespace = CompactString::from(&import_text[ns_start..ns_start + ns_end]);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(loc_start) = import_text.find("schemaLocation=\"") {
|
||||
let loc_start = loc_start + 16;
|
||||
if let Some(loc_end) = import_text[loc_start..].find('"') {
|
||||
import.schema_location = CompactString::from(&import_text[loc_start..loc_start + loc_end]);
|
||||
}
|
||||
}
|
||||
|
||||
schema.imports.push(import);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(schema)
|
||||
}
|
||||
|
||||
pub fn validate_element(&self, name: &str, value: &str, schema: &Schema) -> Result<()> {
|
||||
if let Some(element) = schema.elements.get(name) {
|
||||
// Check if element is abstract
|
||||
if element.abstract_element {
|
||||
return Err(Error::Validation(format!("Element {} is abstract", name)));
|
||||
}
|
||||
|
||||
// Validate type
|
||||
if let Some(type_def) = schema.types.get(&element.element_type) {
|
||||
self.validate_type(value, type_def)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
// Element not found in schema - might be from imported schema
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_type(&self, value: &str, type_def: &SchemaType) -> Result<()> {
|
||||
for restriction in &type_def.restrictions {
|
||||
match restriction {
|
||||
TypeRestriction::MinInclusive(min) => {
|
||||
if let (Ok(val), Ok(min_val)) = (value.parse::<f64>(), min.parse::<f64>()) {
|
||||
if val < min_val {
|
||||
return Err(Error::Validation(format!("Value {} is less than minimum {}", val, min_val)));
|
||||
}
|
||||
}
|
||||
}
|
||||
TypeRestriction::MaxInclusive(max) => {
|
||||
if let (Ok(val), Ok(max_val)) = (value.parse::<f64>(), max.parse::<f64>()) {
|
||||
if val > max_val {
|
||||
return Err(Error::Validation(format!("Value {} is greater than maximum {}", val, max_val)));
|
||||
}
|
||||
}
|
||||
}
|
||||
TypeRestriction::Pattern(pattern) => {
|
||||
// Simple pattern matching - could use regex for complex patterns
|
||||
if !value.contains(pattern) {
|
||||
return Err(Error::Validation(format!("Value {} doesn't match pattern {}", value, pattern)));
|
||||
}
|
||||
}
|
||||
TypeRestriction::MinLength(min) => {
|
||||
if value.len() < *min {
|
||||
return Err(Error::Validation(format!("Value length {} is less than minimum {}", value.len(), min)));
|
||||
}
|
||||
}
|
||||
TypeRestriction::MaxLength(max) => {
|
||||
if value.len() > *max {
|
||||
return Err(Error::Validation(format!("Value length {} is greater than maximum {}", value.len(), max)));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// Schema validator for documents
|
||||
pub struct SchemaValidator {
|
||||
schemas: Vec<Schema>,
|
||||
}
|
||||
|
||||
impl SchemaValidator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
schemas: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_schema(&mut self, schema: Schema) {
|
||||
self.schemas.push(schema);
|
||||
}
|
||||
|
||||
pub fn validate_document(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Validate facts against schemas
|
||||
for i in 0..doc.facts.len() {
|
||||
if let Some(_fact) = doc.facts.get(i) {
|
||||
// Would need to map fact concept_id back to concept name
|
||||
// and validate against schema
|
||||
// This is simplified for now
|
||||
}
|
||||
}
|
||||
|
||||
// Check for required elements
|
||||
for schema in &self.schemas {
|
||||
for (name, element) in &schema.elements {
|
||||
if !element.nillable && !element.abstract_element {
|
||||
// Check if this required element exists in document
|
||||
// This would require reverse mapping from concept names to facts
|
||||
let _found = false;
|
||||
// if !found {
|
||||
// errors.push(ValidationError::MissingRequiredElement {
|
||||
// element: name.to_string(),
|
||||
// });
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
51
rust/vendor/crabrl/src/sec.rs
vendored
Normal file
51
rust/vendor/crabrl/src/sec.rs
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
// SEC EDGAR XBRL filing support (local files only)
|
||||
use crate::{Parser, Document, Result};
|
||||
use std::path::Path;
|
||||
|
||||
pub struct SecFilingParser {
|
||||
parser: Parser,
|
||||
}
|
||||
|
||||
impl SecFilingParser {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
parser: Parser::new().with_validation(true),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_filing<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
|
||||
self.parser.parse_file(path)
|
||||
}
|
||||
|
||||
pub fn with_validation(mut self, validate: bool) -> Self {
|
||||
self.parser = self.parser.with_validation(validate);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
// Test utilities for SEC filings
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_local_sec_filing() {
|
||||
let parser = SecFilingParser::new();
|
||||
|
||||
// Test with local test files
|
||||
if std::path::Path::new("test_data/test_tiny.xbrl").exists() {
|
||||
match parser.parse_filing("test_data/test_tiny.xbrl") {
|
||||
Ok(doc) => {
|
||||
println!("Successfully parsed filing:");
|
||||
println!(" Facts: {}", doc.facts.len());
|
||||
println!(" Contexts: {}", doc.contexts.len());
|
||||
println!(" Units: {}", doc.units.len());
|
||||
assert!(doc.contexts.len() > 0, "Should have contexts");
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to parse filing: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
208
rust/vendor/crabrl/src/simd.rs
vendored
Normal file
208
rust/vendor/crabrl/src/simd.rs
vendored
Normal file
@@ -0,0 +1,208 @@
|
||||
use memchr::{memchr, memchr2, memchr3};
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
const XML_TAG_START: u8 = b'<';
|
||||
const XML_TAG_END: u8 = b'>';
|
||||
const XML_SLASH: u8 = b'/';
|
||||
const XML_QUOTE: u8 = b'"';
|
||||
const XML_EQUALS: u8 = b'=';
|
||||
const XML_SPACE: u8 = b' ';
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_tag_start(haystack: &[u8]) -> Option<usize> {
|
||||
memchr(XML_TAG_START, haystack)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_tag_end(haystack: &[u8]) -> Option<usize> {
|
||||
memchr(XML_TAG_END, haystack)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_quote(haystack: &[u8]) -> Option<usize> {
|
||||
memchr(XML_QUOTE, haystack)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_any_delimiter(haystack: &[u8]) -> Option<usize> {
|
||||
memchr3(XML_TAG_START, XML_TAG_END, XML_QUOTE, haystack)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[inline]
|
||||
pub unsafe fn find_pattern_avx2(haystack: &[u8], pattern: &[u8]) -> Option<usize> {
|
||||
if pattern.is_empty() || haystack.len() < pattern.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let first_byte = _mm256_set1_epi8(pattern[0] as i8);
|
||||
let mut i = 0;
|
||||
|
||||
while i + 32 <= haystack.len() {
|
||||
let chunk = _mm256_loadu_si256(haystack.as_ptr().add(i) as *const _);
|
||||
let cmp = _mm256_cmpeq_epi8(chunk, first_byte);
|
||||
let mask = _mm256_movemask_epi8(cmp);
|
||||
|
||||
if mask != 0 {
|
||||
for bit_pos in 0..32 {
|
||||
if (mask & (1 << bit_pos)) != 0 {
|
||||
let pos = i + bit_pos;
|
||||
if pos + pattern.len() <= haystack.len()
|
||||
&& &haystack[pos..pos + pattern.len()] == pattern {
|
||||
return Some(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
i += 32;
|
||||
}
|
||||
|
||||
while i < haystack.len() - pattern.len() + 1 {
|
||||
if &haystack[i..i + pattern.len()] == pattern {
|
||||
return Some(i);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[inline]
|
||||
pub unsafe fn skip_whitespace_avx2(data: &[u8], mut pos: usize) -> usize {
|
||||
let space = _mm256_set1_epi8(0x20);
|
||||
let tab = _mm256_set1_epi8(0x09);
|
||||
let newline = _mm256_set1_epi8(0x0A);
|
||||
let carriage = _mm256_set1_epi8(0x0D);
|
||||
|
||||
while pos + 32 <= data.len() {
|
||||
let chunk = _mm256_loadu_si256(data.as_ptr().add(pos) as *const _);
|
||||
|
||||
let is_space = _mm256_cmpeq_epi8(chunk, space);
|
||||
let is_tab = _mm256_cmpeq_epi8(chunk, tab);
|
||||
let is_newline = _mm256_cmpeq_epi8(chunk, newline);
|
||||
let is_carriage = _mm256_cmpeq_epi8(chunk, carriage);
|
||||
|
||||
let is_whitespace = _mm256_or_si256(
|
||||
_mm256_or_si256(is_space, is_tab),
|
||||
_mm256_or_si256(is_newline, is_carriage)
|
||||
);
|
||||
|
||||
let mask = _mm256_movemask_epi8(is_whitespace);
|
||||
|
||||
if mask != -1 {
|
||||
for i in 0..32 {
|
||||
if (mask & (1 << i)) == 0 {
|
||||
return pos + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos += 32;
|
||||
}
|
||||
|
||||
while pos < data.len() {
|
||||
match data[pos] {
|
||||
b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
pos
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn skip_whitespace(data: &[u8], mut pos: usize) -> usize {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") && data.len() - pos >= 32 {
|
||||
return unsafe { skip_whitespace_avx2(data, pos) };
|
||||
}
|
||||
}
|
||||
|
||||
while pos < data.len() {
|
||||
match data[pos] {
|
||||
b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
pos
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_pattern(haystack: &[u8], pattern: &[u8]) -> Option<usize> {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") && haystack.len() >= 32 {
|
||||
return unsafe { find_pattern_avx2(haystack, pattern) };
|
||||
}
|
||||
}
|
||||
|
||||
haystack.windows(pattern.len())
|
||||
.position(|window| window == pattern)
|
||||
}
|
||||
|
||||
pub struct SimdScanner<'a> {
|
||||
pub data: &'a [u8],
|
||||
pub pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> SimdScanner<'a> {
|
||||
#[inline(always)]
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
Self { data, pos: 0 }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn skip_whitespace(&mut self) {
|
||||
self.pos = skip_whitespace(self.data, self.pos);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_next(&self, byte: u8) -> Option<usize> {
|
||||
memchr(byte, &self.data[self.pos..]).map(|i| self.pos + i)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_pattern(&self, pattern: &[u8]) -> Option<usize> {
|
||||
find_pattern(&self.data[self.pos..], pattern).map(|i| self.pos + i)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn advance(&mut self, n: usize) {
|
||||
self.pos = (self.pos + n).min(self.data.len());
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn peek(&self) -> Option<u8> {
|
||||
self.data.get(self.pos).copied()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn remaining(&self) -> &'a [u8] {
|
||||
&self.data[self.pos..]
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_eof(&self) -> bool {
|
||||
self.pos >= self.data.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_find_pattern() {
|
||||
let haystack = b"<xbrl:context id=\"c1\">";
|
||||
let pattern = b"context";
|
||||
assert_eq!(find_pattern(haystack, pattern), Some(6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_whitespace() {
|
||||
let data = b" \t\n\r<tag>";
|
||||
assert_eq!(skip_whitespace(data, 0), 6);
|
||||
}
|
||||
}
|
||||
99
rust/vendor/crabrl/src/simple_parser.rs
vendored
Normal file
99
rust/vendor/crabrl/src/simple_parser.rs
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
//! Simple working XBRL parser
|
||||
|
||||
use crate::{model::*, Result};
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Parser {
|
||||
#[allow(dead_code)]
|
||||
load_linkbases: bool,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn parse_str(&self, content: &str) -> Result<Document> {
|
||||
self.parse_bytes(content.as_bytes())
|
||||
}
|
||||
|
||||
pub fn parse_file<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_bytes(&content)
|
||||
}
|
||||
|
||||
pub fn parse_bytes(&self, data: &[u8]) -> Result<Document> {
|
||||
// Simple XML parsing - just count elements for now
|
||||
let text = String::from_utf8_lossy(data);
|
||||
|
||||
// Count facts (very simplified)
|
||||
let fact_count = text.matches("<us-gaap:").count()
|
||||
+ text.matches("<dei:").count()
|
||||
+ text.matches("<ifrs:").count();
|
||||
|
||||
// Count contexts
|
||||
let context_count =
|
||||
text.matches("<context ").count() + text.matches("<xbrli:context").count();
|
||||
|
||||
// Count units
|
||||
let unit_count = text.matches("<unit ").count() + text.matches("<xbrli:unit").count();
|
||||
|
||||
// Create dummy document with approximate counts
|
||||
let mut doc = Document {
|
||||
facts: FactStorage {
|
||||
concept_ids: vec![0; fact_count],
|
||||
context_ids: vec![0; fact_count],
|
||||
unit_ids: vec![0; fact_count],
|
||||
values: vec![FactValue::Text(String::from("")); fact_count],
|
||||
decimals: vec![None; fact_count],
|
||||
ids: vec![None; fact_count],
|
||||
footnote_refs: vec![],
|
||||
},
|
||||
contexts: Vec::with_capacity(context_count),
|
||||
units: Vec::with_capacity(unit_count),
|
||||
tuples: Vec::new(),
|
||||
footnotes: Vec::new(),
|
||||
presentation_links: Vec::new(),
|
||||
calculation_links: Vec::new(),
|
||||
definition_links: Vec::new(),
|
||||
label_links: Vec::new(),
|
||||
reference_links: Vec::new(),
|
||||
custom_links: Vec::new(),
|
||||
role_types: Vec::new(),
|
||||
arcrole_types: Vec::new(),
|
||||
schemas: Vec::new(),
|
||||
dimensions: Vec::new(),
|
||||
concept_names: Vec::new(),
|
||||
};
|
||||
|
||||
// Add dummy contexts
|
||||
for i in 0..context_count {
|
||||
doc.contexts.push(Context {
|
||||
id: String::from(&format!("ctx{}", i)),
|
||||
entity: Entity {
|
||||
identifier: String::from("0000000000"),
|
||||
scheme: String::from("http://www.sec.gov/CIK"),
|
||||
segment: None,
|
||||
},
|
||||
period: Period::Instant {
|
||||
date: String::from("2023-12-31"),
|
||||
},
|
||||
scenario: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Add dummy units
|
||||
for i in 0..unit_count {
|
||||
doc.units.push(Unit {
|
||||
id: String::from(&format!("unit{}", i)),
|
||||
unit_type: UnitType::Simple(vec![Measure {
|
||||
namespace: String::from("iso4217"),
|
||||
name: String::from("USD"),
|
||||
}]),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(doc)
|
||||
}
|
||||
}
|
||||
49
rust/vendor/crabrl/src/taxonomy.rs
vendored
Normal file
49
rust/vendor/crabrl/src/taxonomy.rs
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
use crate::Result;
|
||||
use compact_str::CompactString;
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub struct Taxonomy {
|
||||
pub schemas: Vec<Schema>,
|
||||
pub linkbases: Vec<Linkbase>,
|
||||
}
|
||||
|
||||
pub struct Schema {
|
||||
pub target_namespace: CompactString,
|
||||
pub elements: HashMap<CompactString, Element>,
|
||||
}
|
||||
|
||||
pub struct Element {
|
||||
pub name: CompactString,
|
||||
pub element_type: CompactString,
|
||||
pub substitution_group: Option<CompactString>,
|
||||
pub period_type: Option<CompactString>,
|
||||
}
|
||||
|
||||
pub struct Linkbase {
|
||||
pub role: CompactString,
|
||||
pub arcs: Vec<Arc>,
|
||||
}
|
||||
|
||||
pub struct Arc {
|
||||
pub from: CompactString,
|
||||
pub to: CompactString,
|
||||
pub order: f32,
|
||||
pub weight: f32,
|
||||
}
|
||||
|
||||
impl Taxonomy {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
schemas: Vec::new(),
|
||||
linkbases: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_schema(&mut self, _path: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_linkbase(&mut self, _path: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
601
rust/vendor/crabrl/src/validator.rs
vendored
Normal file
601
rust/vendor/crabrl/src/validator.rs
vendored
Normal file
@@ -0,0 +1,601 @@
|
||||
// Comprehensive XBRL validation
|
||||
use crate::{model::*, Error, Result};
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ValidationError {
|
||||
InvalidContextRef {
|
||||
fact_index: usize,
|
||||
context_id: u16,
|
||||
},
|
||||
InvalidUnitRef {
|
||||
fact_index: usize,
|
||||
unit_id: u16,
|
||||
},
|
||||
CalculationInconsistency {
|
||||
concept: String,
|
||||
expected: f64,
|
||||
actual: f64,
|
||||
},
|
||||
InvalidDataType {
|
||||
concept: String,
|
||||
expected_type: String,
|
||||
actual_value: String,
|
||||
},
|
||||
MissingRequiredElement {
|
||||
element: String,
|
||||
},
|
||||
DuplicateId {
|
||||
id: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct XbrlValidator {
|
||||
strict_mode: bool,
|
||||
#[allow(dead_code)]
|
||||
check_calculations: bool,
|
||||
check_duplicates: bool,
|
||||
check_contexts: bool,
|
||||
check_units: bool,
|
||||
#[allow(dead_code)]
|
||||
check_datatypes: bool,
|
||||
decimal_tolerance: f64,
|
||||
}
|
||||
|
||||
impl Default for XbrlValidator {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
strict_mode: false,
|
||||
check_calculations: true,
|
||||
check_duplicates: true,
|
||||
check_contexts: true,
|
||||
check_units: true,
|
||||
check_datatypes: true,
|
||||
decimal_tolerance: 0.01,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl XbrlValidator {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn strict(mut self) -> Self {
|
||||
self.strict_mode = true;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_tolerance(mut self, tolerance: f64) -> Self {
|
||||
self.decimal_tolerance = tolerance;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn validate(&self, doc: &mut Document) -> Result<()> {
|
||||
let mut validation_errors = Vec::new();
|
||||
|
||||
// Context validation
|
||||
if self.check_contexts {
|
||||
validation_errors.extend(self.validate_contexts(doc));
|
||||
}
|
||||
|
||||
// Unit validation
|
||||
if self.check_units {
|
||||
validation_errors.extend(self.validate_units(doc));
|
||||
}
|
||||
|
||||
// Fact validation
|
||||
validation_errors.extend(self.validate_facts(doc));
|
||||
|
||||
// Duplicate detection
|
||||
if self.check_duplicates {
|
||||
validation_errors.extend(self.check_duplicate_facts(doc));
|
||||
}
|
||||
|
||||
// Return error in strict mode if any validation errors
|
||||
if self.strict_mode && !validation_errors.is_empty() {
|
||||
return Err(Error::Validation(format!(
|
||||
"Validation failed with {} errors",
|
||||
validation_errors.len()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_contexts(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
let mut context_ids = HashSet::new();
|
||||
|
||||
for ctx in &doc.contexts {
|
||||
// Check for duplicate context IDs
|
||||
if !context_ids.insert(ctx.id.clone()) {
|
||||
errors.push(ValidationError::DuplicateId {
|
||||
id: ctx.id.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate entity identifier
|
||||
if ctx.entity.identifier.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Entity identifier for context {}", ctx.id),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate period
|
||||
if let Period::Duration { start, end } = &ctx.period {
|
||||
if start > end {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("context_{}", ctx.id),
|
||||
expected_type: "valid period".to_string(),
|
||||
actual_value: format!("start {} > end {}", start, end),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
fn validate_units(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
let mut unit_ids = HashSet::new();
|
||||
|
||||
for unit in &doc.units {
|
||||
// Check for duplicate unit IDs
|
||||
if !unit_ids.insert(unit.id.clone()) {
|
||||
errors.push(ValidationError::DuplicateId {
|
||||
id: unit.id.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate measures
|
||||
match &unit.unit_type {
|
||||
UnitType::Simple(measures) => {
|
||||
if measures.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Measures for unit {}", unit.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
UnitType::Divide {
|
||||
numerator,
|
||||
denominator,
|
||||
} => {
|
||||
if numerator.is_empty() || denominator.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Numerator/denominator for unit {}", unit.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
UnitType::Multiply(measures) => {
|
||||
if measures.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Measures for unit {}", unit.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
fn validate_facts(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Validate fact references
|
||||
for i in 0..doc.facts.len() {
|
||||
if i < doc.facts.context_ids.len() {
|
||||
let context_id = doc.facts.context_ids[i];
|
||||
if context_id as usize >= doc.contexts.len() {
|
||||
errors.push(ValidationError::InvalidContextRef {
|
||||
fact_index: i,
|
||||
context_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if i < doc.facts.unit_ids.len() {
|
||||
let unit_id = doc.facts.unit_ids[i];
|
||||
if unit_id > 0 && unit_id as usize > doc.units.len() {
|
||||
errors.push(ValidationError::InvalidUnitRef {
|
||||
fact_index: i,
|
||||
unit_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
fn check_duplicate_facts(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
let mut fact_keys = HashSet::new();
|
||||
|
||||
for i in 0..doc.facts.len() {
|
||||
if i < doc.facts.concept_ids.len() && i < doc.facts.context_ids.len() {
|
||||
let key = (doc.facts.concept_ids[i], doc.facts.context_ids[i]);
|
||||
if !fact_keys.insert(key) && self.strict_mode {
|
||||
errors.push(ValidationError::DuplicateId {
|
||||
id: format!("Duplicate fact at index {}", i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
|
||||
// Type alias for validation rules
|
||||
type ValidationRule = Box<dyn Fn(&Document) -> Vec<ValidationError>>;
|
||||
|
||||
// Validation context and rules
|
||||
pub struct ValidationContext {
|
||||
pub profile: ValidationProfile,
|
||||
pub custom_rules: Vec<ValidationRule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ValidationProfile {
|
||||
Generic,
|
||||
SecEdgar,
|
||||
Ifrs,
|
||||
UsGaap,
|
||||
}
|
||||
|
||||
impl ValidationContext {
|
||||
pub fn new(profile: ValidationProfile) -> Self {
|
||||
Self {
|
||||
profile,
|
||||
custom_rules: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_rule<F>(&mut self, rule: F)
|
||||
where
|
||||
F: Fn(&Document) -> Vec<ValidationError> + 'static,
|
||||
{
|
||||
self.custom_rules.push(Box::new(rule));
|
||||
}
|
||||
|
||||
pub fn validate(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Apply profile-specific rules
|
||||
match self.profile {
|
||||
ValidationProfile::SecEdgar => {
|
||||
errors.extend(sec_validation_rules(doc));
|
||||
}
|
||||
ValidationProfile::Ifrs => {
|
||||
errors.extend(ifrs_validation_rules(doc));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Apply custom rules
|
||||
for rule in &self.custom_rules {
|
||||
errors.extend(rule(doc));
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
|
||||
// SEC EDGAR specific validation rules
|
||||
pub fn sec_validation_rules(doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Check for required DEI contexts
|
||||
let mut has_current_period = false;
|
||||
let mut has_entity_info = false;
|
||||
let mut has_dei_elements = false;
|
||||
|
||||
for ctx in &doc.contexts {
|
||||
// Check for current period context
|
||||
if ctx.id.contains("CurrentYear")
|
||||
|| ctx.id.contains("CurrentPeriod")
|
||||
|| ctx.id.contains("DocumentPeriodEndDate")
|
||||
{
|
||||
has_current_period = true;
|
||||
}
|
||||
|
||||
// Validate CIK format (10 digits)
|
||||
if ctx.entity.scheme.contains("sec.gov/CIK") {
|
||||
has_entity_info = true;
|
||||
let cik = &ctx.entity.identifier;
|
||||
if cik.len() != 10 || !cik.chars().all(|c| c.is_ascii_digit()) {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: "CIK".to_string(),
|
||||
expected_type: "10-digit number".to_string(),
|
||||
actual_value: cik.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for DEI elements in facts
|
||||
for i in 0..doc.facts.concept_ids.len() {
|
||||
if i < doc.concept_names.len() {
|
||||
let concept = &doc.concept_names[i];
|
||||
if concept.contains("dei:")
|
||||
|| concept.contains("DocumentType")
|
||||
|| concept.contains("EntityRegistrantName")
|
||||
{
|
||||
has_dei_elements = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Required elements validation
|
||||
if !has_current_period {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Current period context required for SEC filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_entity_info {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Entity CIK information required for SEC filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_dei_elements {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "DEI (Document and Entity Information) elements required".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate segment reporting if present
|
||||
for ctx in &doc.contexts {
|
||||
if let Some(segment) = &ctx.entity.segment {
|
||||
// Check explicit members have valid dimension references
|
||||
for member in &segment.explicit_members {
|
||||
if member.dimension.is_empty() || member.member.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("segment_{}", ctx.id),
|
||||
expected_type: "valid dimension member".to_string(),
|
||||
actual_value: format!("{}:{}", member.dimension, member.member),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate calculation consistency for monetary items
|
||||
let mut monetary_facts: Vec<(usize, f64)> = Vec::new();
|
||||
for i in 0..doc.facts.len() {
|
||||
if i < doc.facts.values.len() {
|
||||
if let FactValue::Decimal(val) = &doc.facts.values[i] {
|
||||
// Check if this is a monetary fact (has USD unit)
|
||||
if i < doc.facts.unit_ids.len() {
|
||||
let unit_id = doc.facts.unit_ids[i] as usize;
|
||||
if unit_id < doc.units.len() {
|
||||
if let UnitType::Simple(measures) = &doc.units[unit_id].unit_type {
|
||||
if measures.iter().any(|m| m.name == "USD" || m.name == "usd") {
|
||||
monetary_facts.push((i, *val));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Basic calculation validation - check for reasonable values
|
||||
for (idx, value) in monetary_facts {
|
||||
if value.is_nan() || value.is_infinite() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("fact_{}", idx),
|
||||
expected_type: "valid monetary amount".to_string(),
|
||||
actual_value: format!("{}", value),
|
||||
});
|
||||
}
|
||||
// Check for suspiciously large values (> $10 trillion)
|
||||
if value.abs() > 10_000_000_000_000.0 {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("fact_{}", idx),
|
||||
expected_type: "reasonable monetary amount".to_string(),
|
||||
actual_value: format!("${:.2}", value),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
// IFRS specific validation rules
|
||||
pub fn ifrs_validation_rules(doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Check for IFRS-required contexts
|
||||
let mut has_reporting_period = false;
|
||||
let mut has_comparative_period = false;
|
||||
let mut has_entity_info = false;
|
||||
|
||||
for ctx in &doc.contexts {
|
||||
// Check for reporting period
|
||||
match &ctx.period {
|
||||
Period::Duration { start, end: _ } => {
|
||||
has_reporting_period = true;
|
||||
// IFRS requires comparative information
|
||||
if start.contains("PY")
|
||||
|| ctx.id.contains("PriorYear")
|
||||
|| ctx.id.contains("Comparative")
|
||||
{
|
||||
has_comparative_period = true;
|
||||
}
|
||||
}
|
||||
Period::Instant { date } => {
|
||||
if !date.is_empty() {
|
||||
has_reporting_period = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Validate entity information
|
||||
if !ctx.entity.identifier.is_empty() {
|
||||
has_entity_info = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Required contexts validation
|
||||
if !has_reporting_period {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Reporting period required for IFRS filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_comparative_period {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Comparative period information required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_entity_info {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Entity identification required for IFRS filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate dimensional structure
|
||||
let mut dimension_validations = Vec::new();
|
||||
for ctx in &doc.contexts {
|
||||
// Check segment dimensions
|
||||
if let Some(segment) = &ctx.entity.segment {
|
||||
for member in &segment.explicit_members {
|
||||
// IFRS dimensions should follow specific patterns
|
||||
if !member.dimension.contains(":") {
|
||||
dimension_validations
|
||||
.push(format!("Invalid dimension format: {}", member.dimension));
|
||||
}
|
||||
if member.dimension.contains("ifrs") || member.dimension.contains("ifrs-full") {
|
||||
// Valid IFRS dimension
|
||||
if member.member.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("dimension_{}", ctx.id),
|
||||
expected_type: "valid IFRS dimension member".to_string(),
|
||||
actual_value: member.dimension.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check typed members for IFRS compliance
|
||||
for typed in &segment.typed_members {
|
||||
if typed.dimension.contains("ifrs") && typed.value.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("typed_dimension_{}", ctx.id),
|
||||
expected_type: "non-empty typed dimension value".to_string(),
|
||||
actual_value: typed.dimension.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check scenario dimensions (alternative to segment)
|
||||
if let Some(scenario) = &ctx.scenario {
|
||||
for member in &scenario.explicit_members {
|
||||
if member.dimension.contains("ifrs") && member.member.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("scenario_dimension_{}", ctx.id),
|
||||
expected_type: "valid IFRS scenario member".to_string(),
|
||||
actual_value: member.dimension.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for mandatory IFRS disclosures in facts
|
||||
let mut has_financial_position = false;
|
||||
let mut has_comprehensive_income = false;
|
||||
let mut has_cash_flows = false;
|
||||
let mut has_changes_in_equity = false;
|
||||
|
||||
for i in 0..doc.concept_names.len() {
|
||||
let concept = &doc.concept_names[i];
|
||||
let lower = concept.to_lowercase();
|
||||
|
||||
if lower.contains("financialposition")
|
||||
|| lower.contains("balancesheet")
|
||||
|| lower.contains("assets")
|
||||
|| lower.contains("liabilities")
|
||||
{
|
||||
has_financial_position = true;
|
||||
}
|
||||
|
||||
if lower.contains("comprehensiveincome")
|
||||
|| lower.contains("profitorloss")
|
||||
|| lower.contains("income")
|
||||
|| lower.contains("revenue")
|
||||
{
|
||||
has_comprehensive_income = true;
|
||||
}
|
||||
|
||||
if lower.contains("cashflow") || lower.contains("cashflows") {
|
||||
has_cash_flows = true;
|
||||
}
|
||||
|
||||
if lower.contains("changesinequity") || lower.contains("equity") {
|
||||
has_changes_in_equity = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate mandatory statements
|
||||
if !has_financial_position {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Financial Position required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_comprehensive_income {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Comprehensive Income required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_cash_flows {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Cash Flows required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_changes_in_equity {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Changes in Equity required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate presentation linkbase relationships
|
||||
for link in &doc.presentation_links {
|
||||
// Check order is valid (typically 1.0 to 999.0)
|
||||
if link.order < 0.0 || link.order > 1000.0 {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("presentation_link_{}_{}", link.from, link.to),
|
||||
expected_type: "valid presentation order (0-1000)".to_string(),
|
||||
actual_value: format!("{}", link.order),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Validate calculation relationships
|
||||
for link in &doc.calculation_links {
|
||||
// Check weight is reasonable (-1.0 or 1.0 typically)
|
||||
if link.weight != 1.0 && link.weight != -1.0 && link.weight != 0.0 {
|
||||
// Unusual weight, might be an error
|
||||
if link.weight.abs() > 10.0 {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("calculation_link_{}_{}", link.from, link.to),
|
||||
expected_type: "reasonable calculation weight".to_string(),
|
||||
actual_value: format!("{}", link.weight),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
Reference in New Issue
Block a user