diff --git a/benches/parser.rs b/benches/parser.rs index 13fed48..3d5156e 100644 --- a/benches/parser.rs +++ b/benches/parser.rs @@ -1,5 +1,5 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; use crabrl::Parser; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; fn parse_small_file(c: &mut Criterion) { let parser = Parser::new(); @@ -21,4 +21,3 @@ fn parse_medium_file(c: &mut Criterion) { criterion_group!(benches, parse_small_file, parse_medium_file); criterion_main!(benches); - diff --git a/examples/benchmark_single.rs b/examples/benchmark_single.rs index 12edf0b..e60a15c 100644 --- a/examples/benchmark_single.rs +++ b/examples/benchmark_single.rs @@ -34,4 +34,3 @@ fn main() { } } } - diff --git a/examples/parse.rs b/examples/parse.rs index 1444a06..5b76f19 100644 --- a/examples/parse.rs +++ b/examples/parse.rs @@ -20,4 +20,3 @@ fn main() -> Result<(), Box> { Ok(()) } - diff --git a/examples/validate.rs b/examples/validate.rs index 8aa22e8..7e58913 100644 --- a/examples/validate.rs +++ b/examples/validate.rs @@ -27,4 +27,3 @@ fn main() -> Result<(), Box> { Ok(()) } - diff --git a/src/bin/crabrl_bench.rs b/src/bin/crabrl_bench.rs index 5b1275a..8549146 100644 --- a/src/bin/crabrl_bench.rs +++ b/src/bin/crabrl_bench.rs @@ -17,12 +17,14 @@ fn main() { Ok(doc) => { let elapsed = start.elapsed(); let ms = elapsed.as_secs_f64() * 1000.0; - println!("crabrl found: {} facts, {} contexts, {} units (in {:.3}ms)", - doc.facts.len(), - doc.contexts.len(), - doc.units.len(), - ms); - + println!( + "crabrl found: {} facts, {} contexts, {} units (in {:.3}ms)", + doc.facts.len(), + doc.contexts.len(), + doc.units.len(), + ms + ); + // Additional stats println!("Facts: {}", doc.facts.len()); println!("Contexts: {}", doc.contexts.len()); @@ -37,5 +39,3 @@ fn main() { } } } - - diff --git a/src/lib.rs b/src/lib.rs index 16ee1cb..b9e0289 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ pub mod validator; pub use simple_parser::Parser; // Re-export main types -pub use model::{Document, Fact, Context, Unit}; +pub use model::{Context, Document, Fact, Unit}; // Create validator wrapper for the CLI pub struct Validator { @@ -47,13 +47,13 @@ impl Validator { pub fn validate(&self, doc: &Document) -> Result { let start = std::time::Instant::now(); - + // Clone doc for validation (validator mutates it) let mut doc_copy = doc.clone(); - + // Run validation let is_valid = self.inner.validate(&mut doc_copy).is_ok(); - + Ok(ValidationResult { is_valid, errors: if is_valid { diff --git a/src/main.rs b/src/main.rs index d117a93..fd20bac 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,7 @@ use colored::*; use std::path::PathBuf; use std::time::Instant; -use crabrl::{Parser, Validator, ValidationConfig}; +use crabrl::{Parser, ValidationConfig, Validator}; /// High-performance XBRL parser and validator #[derive(ClapParser)] @@ -23,35 +23,35 @@ enum Commands { Parse { /// Input file input: PathBuf, - + /// Output as JSON #[arg(short, long)] json: bool, - + /// Show statistics #[arg(short, long)] stats: bool, }, - + /// Validate an XBRL file Validate { /// Input file input: PathBuf, - + /// Validation profile (generic, sec-edgar) #[arg(short, long, default_value = "generic")] profile: String, - + /// Treat warnings as errors #[arg(long)] strict: bool, }, - + /// Benchmark parsing performance Bench { /// Input file input: PathBuf, - + /// Number of iterations #[arg(short, long, default_value = "100")] iterations: usize, @@ -60,89 +60,109 @@ enum Commands { fn main() -> Result<()> { let cli = Cli::parse(); - + match cli.command { - Commands::Parse { input, json: _, stats } => { + Commands::Parse { + input, + json: _, + stats, + } => { let start = Instant::now(); let parser = Parser::new(); - let doc = parser.parse_file(&input) + let doc = parser + .parse_file(&input) .with_context(|| format!("Failed to parse {}", input.display()))?; let elapsed = start.elapsed(); - + println!("{} {}", "✓".green().bold(), input.display()); println!(" Facts: {}", doc.facts.len()); println!(" Contexts: {}", doc.contexts.len()); println!(" Units: {}", doc.units.len()); - + if stats { println!(" Time: {:.2}ms", elapsed.as_secs_f64() * 1000.0); - println!(" Throughput: {:.0} facts/sec", - doc.facts.len() as f64 / elapsed.as_secs_f64()); + println!( + " Throughput: {:.0} facts/sec", + doc.facts.len() as f64 / elapsed.as_secs_f64() + ); } } - - Commands::Validate { input, profile, strict } => { + + Commands::Validate { + input, + profile, + strict, + } => { let parser = Parser::new(); - let doc = parser.parse_file(&input) + let doc = parser + .parse_file(&input) .with_context(|| format!("Failed to parse {}", input.display()))?; - + let config = match profile.as_str() { "sec-edgar" => ValidationConfig::sec_edgar(), _ => ValidationConfig::default(), }; - + let validator = Validator::with_config(config); let result = validator.validate(&doc)?; - + if result.is_valid { - println!("{} {} - Document is valid", "✓".green().bold(), input.display()); + println!( + "{} {} - Document is valid", + "✓".green().bold(), + input.display() + ); } else { - println!("{} {} - Validation failed", "✗".red().bold(), input.display()); + println!( + "{} {} - Validation failed", + "✗".red().bold(), + input.display() + ); println!(" Errors: {}", result.errors.len()); println!(" Warnings: {}", result.warnings.len()); - + for error in result.errors.iter().take(5) { println!(" {} {}", "ERROR:".red(), error); } - + if result.errors.len() > 5 { println!(" ... and {} more errors", result.errors.len() - 5); } - + if strict && !result.warnings.is_empty() { std::process::exit(1); } - + if !result.is_valid { std::process::exit(1); } } } - + Commands::Bench { input, iterations } => { let parser = Parser::new(); - + // Warmup for _ in 0..3 { let _ = parser.parse_file(&input)?; } - + let mut times = Vec::with_capacity(iterations); let mut doc_facts = 0; - + for _ in 0..iterations { let start = Instant::now(); let doc = parser.parse_file(&input)?; times.push(start.elapsed()); doc_facts = doc.facts.len(); } - + times.sort(); let min = times[0]; let max = times[times.len() - 1]; let median = times[times.len() / 2]; let mean = times.iter().sum::() / times.len() as u32; - + println!("Benchmark Results for {}", input.display()); println!(" Iterations: {}", iterations); println!(" Facts: {}", doc_facts); @@ -150,10 +170,12 @@ fn main() -> Result<()> { println!(" Median: {:.3}ms", median.as_secs_f64() * 1000.0); println!(" Mean: {:.3}ms", mean.as_secs_f64() * 1000.0); println!(" Max: {:.3}ms", max.as_secs_f64() * 1000.0); - println!(" Throughput: {:.0} facts/sec", - doc_facts as f64 / mean.as_secs_f64()); + println!( + " Throughput: {:.0} facts/sec", + doc_facts as f64 / mean.as_secs_f64() + ); } } - + Ok(()) } diff --git a/src/model.rs b/src/model.rs index 8fe84be..ad4124c 100644 --- a/src/model.rs +++ b/src/model.rs @@ -5,7 +5,6 @@ use std::collections::HashMap; // Core XBRL Data Structures - Full Specification Support // ============================================================================ - #[repr(C, align(64))] #[derive(Clone)] pub struct FactStorage { @@ -111,8 +110,13 @@ pub struct Scenario { // Period with forever support #[derive(Debug, Clone)] pub enum Period { - Instant { date: CompactString }, - Duration { start: CompactString, end: CompactString }, + Instant { + date: CompactString, + }, + Duration { + start: CompactString, + end: CompactString, + }, Forever, } @@ -347,6 +351,3 @@ impl Document { } } } - - - diff --git a/src/simple_parser.rs b/src/simple_parser.rs index 3c1ce3d..d848a23 100644 --- a/src/simple_parser.rs +++ b/src/simple_parser.rs @@ -15,29 +15,28 @@ impl Parser { load_linkbases: false, } } - + pub fn parse_file>(&self, path: P) -> Result { let content = std::fs::read(path)?; self.parse_bytes(&content) } - + pub fn parse_bytes(&self, data: &[u8]) -> Result { // Simple XML parsing - just count elements for now let text = String::from_utf8_lossy(data); - + // Count facts (very simplified) - let fact_count = text.matches(" Result<()> { let mut validation_errors = Vec::new(); - + // Context validation if self.check_contexts { validation_errors.extend(self.validate_contexts(doc)); @@ -82,7 +100,7 @@ impl XbrlValidator { fn validate_contexts(&self, doc: &Document) -> Vec { let mut errors = Vec::new(); let mut context_ids = HashSet::new(); - + for ctx in &doc.contexts { // Check for duplicate context IDs if !context_ids.insert(ctx.id.clone()) { @@ -112,14 +130,14 @@ impl XbrlValidator { _ => {} } } - + errors } fn validate_units(&self, doc: &Document) -> Vec { let mut errors = Vec::new(); let mut unit_ids = HashSet::new(); - + for unit in &doc.units { // Check for duplicate unit IDs if !unit_ids.insert(unit.id.clone()) { @@ -137,7 +155,10 @@ impl XbrlValidator { }); } } - UnitType::Divide { numerator, denominator } => { + UnitType::Divide { + numerator, + denominator, + } => { if numerator.is_empty() || denominator.is_empty() { errors.push(ValidationError::MissingRequiredElement { element: format!("Numerator/denominator for unit {}", unit.id), @@ -153,13 +174,13 @@ impl XbrlValidator { } } } - + errors } fn validate_facts(&self, doc: &Document) -> Vec { let mut errors = Vec::new(); - + // Validate fact references for i in 0..doc.facts.len() { if i < doc.facts.context_ids.len() { @@ -171,7 +192,7 @@ impl XbrlValidator { }); } } - + if i < doc.facts.unit_ids.len() { let unit_id = doc.facts.unit_ids[i]; if unit_id > 0 && unit_id as usize > doc.units.len() { @@ -182,14 +203,14 @@ impl XbrlValidator { } } } - + errors } fn check_duplicate_facts(&self, doc: &Document) -> Vec { let mut errors = Vec::new(); let mut fact_keys = HashSet::new(); - + for i in 0..doc.facts.len() { if i < doc.facts.concept_ids.len() && i < doc.facts.context_ids.len() { let key = (doc.facts.concept_ids[i], doc.facts.context_ids[i]); @@ -200,7 +221,7 @@ impl XbrlValidator { } } } - + errors } } @@ -227,16 +248,16 @@ impl ValidationContext { } } - pub fn add_rule(&mut self, rule: F) + pub fn add_rule(&mut self, rule: F) where - F: Fn(&Document) -> Vec + 'static + F: Fn(&Document) -> Vec + 'static, { self.custom_rules.push(Box::new(rule)); } pub fn validate(&self, doc: &Document) -> Vec { let mut errors = Vec::new(); - + // Apply profile-specific rules match self.profile { ValidationProfile::SecEdgar => { @@ -247,12 +268,12 @@ impl ValidationContext { } _ => {} } - + // Apply custom rules for rule in &self.custom_rules { errors.extend(rule(doc)); } - + errors } } @@ -260,19 +281,21 @@ impl ValidationContext { // SEC EDGAR specific validation rules pub fn sec_validation_rules(doc: &Document) -> Vec { let mut errors = Vec::new(); - + // Check for required DEI contexts let mut has_current_period = false; let mut has_entity_info = false; let mut has_dei_elements = false; - + for ctx in &doc.contexts { // Check for current period context - if ctx.id.contains("CurrentYear") || ctx.id.contains("CurrentPeriod") || - ctx.id.contains("DocumentPeriodEndDate") { + if ctx.id.contains("CurrentYear") + || ctx.id.contains("CurrentPeriod") + || ctx.id.contains("DocumentPeriodEndDate") + { has_current_period = true; } - + // Validate CIK format (10 digits) if ctx.entity.scheme.contains("sec.gov/CIK") { has_entity_info = true; @@ -286,37 +309,39 @@ pub fn sec_validation_rules(doc: &Document) -> Vec { } } } - + // Check for DEI elements in facts for i in 0..doc.facts.concept_ids.len() { if i < doc.concept_names.len() { let concept = &doc.concept_names[i]; - if concept.contains("dei:") || concept.contains("DocumentType") || - concept.contains("EntityRegistrantName") { + if concept.contains("dei:") + || concept.contains("DocumentType") + || concept.contains("EntityRegistrantName") + { has_dei_elements = true; } } } - + // Required elements validation if !has_current_period { errors.push(ValidationError::MissingRequiredElement { element: "Current period context required for SEC filing".to_string(), }); } - + if !has_entity_info { errors.push(ValidationError::MissingRequiredElement { element: "Entity CIK information required for SEC filing".to_string(), }); } - + if !has_dei_elements { errors.push(ValidationError::MissingRequiredElement { element: "DEI (Document and Entity Information) elements required".to_string(), }); } - + // Validate segment reporting if present for ctx in &doc.contexts { if let Some(segment) = &ctx.entity.segment { @@ -332,7 +357,7 @@ pub fn sec_validation_rules(doc: &Document) -> Vec { } } } - + // Validate calculation consistency for monetary items let mut monetary_facts: Vec<(usize, f64)> = Vec::new(); for i in 0..doc.facts.len() { @@ -352,7 +377,7 @@ pub fn sec_validation_rules(doc: &Document) -> Vec { } } } - + // Basic calculation validation - check for reasonable values for (idx, value) in monetary_facts { if value.is_nan() || value.is_infinite() { @@ -371,27 +396,29 @@ pub fn sec_validation_rules(doc: &Document) -> Vec { }); } } - + errors } -// IFRS specific validation rules +// IFRS specific validation rules pub fn ifrs_validation_rules(doc: &Document) -> Vec { let mut errors = Vec::new(); - + // Check for IFRS-required contexts let mut has_reporting_period = false; let mut has_comparative_period = false; let mut has_entity_info = false; - + for ctx in &doc.contexts { // Check for reporting period match &ctx.period { Period::Duration { start, end: _ } => { has_reporting_period = true; // IFRS requires comparative information - if start.contains("PY") || ctx.id.contains("PriorYear") || - ctx.id.contains("Comparative") { + if start.contains("PY") + || ctx.id.contains("PriorYear") + || ctx.id.contains("Comparative") + { has_comparative_period = true; } } @@ -402,32 +429,32 @@ pub fn ifrs_validation_rules(doc: &Document) -> Vec { } _ => {} } - + // Validate entity information if !ctx.entity.identifier.is_empty() { has_entity_info = true; } } - + // Required contexts validation if !has_reporting_period { errors.push(ValidationError::MissingRequiredElement { element: "Reporting period required for IFRS filing".to_string(), }); } - + if !has_comparative_period { errors.push(ValidationError::MissingRequiredElement { element: "Comparative period information required by IFRS".to_string(), }); } - + if !has_entity_info { errors.push(ValidationError::MissingRequiredElement { element: "Entity identification required for IFRS filing".to_string(), }); } - + // Validate dimensional structure let mut dimension_validations = Vec::new(); for ctx in &doc.contexts { @@ -436,7 +463,8 @@ pub fn ifrs_validation_rules(doc: &Document) -> Vec { for member in &segment.explicit_members { // IFRS dimensions should follow specific patterns if !member.dimension.contains(":") { - dimension_validations.push(format!("Invalid dimension format: {}", member.dimension)); + dimension_validations + .push(format!("Invalid dimension format: {}", member.dimension)); } if member.dimension.contains("ifrs") || member.dimension.contains("ifrs-full") { // Valid IFRS dimension @@ -449,7 +477,7 @@ pub fn ifrs_validation_rules(doc: &Document) -> Vec { } } } - + // Check typed members for IFRS compliance for typed in &segment.typed_members { if typed.dimension.contains("ifrs") && typed.value.is_empty() { @@ -461,7 +489,7 @@ pub fn ifrs_validation_rules(doc: &Document) -> Vec { } } } - + // Check scenario dimensions (alternative to segment) if let Some(scenario) = &ctx.scenario { for member in &scenario.explicit_members { @@ -475,61 +503,67 @@ pub fn ifrs_validation_rules(doc: &Document) -> Vec { } } } - + // Check for mandatory IFRS disclosures in facts let mut has_financial_position = false; let mut has_comprehensive_income = false; let mut has_cash_flows = false; let mut has_changes_in_equity = false; - + for i in 0..doc.concept_names.len() { let concept = &doc.concept_names[i]; let lower = concept.to_lowercase(); - - if lower.contains("financialposition") || lower.contains("balancesheet") || - lower.contains("assets") || lower.contains("liabilities") { + + if lower.contains("financialposition") + || lower.contains("balancesheet") + || lower.contains("assets") + || lower.contains("liabilities") + { has_financial_position = true; } - - if lower.contains("comprehensiveincome") || lower.contains("profitorloss") || - lower.contains("income") || lower.contains("revenue") { + + if lower.contains("comprehensiveincome") + || lower.contains("profitorloss") + || lower.contains("income") + || lower.contains("revenue") + { has_comprehensive_income = true; } - + if lower.contains("cashflow") || lower.contains("cashflows") { has_cash_flows = true; } - + if lower.contains("changesinequity") || lower.contains("equity") { has_changes_in_equity = true; } } - + // Validate mandatory statements if !has_financial_position { errors.push(ValidationError::MissingRequiredElement { element: "Statement of Financial Position required by IFRS".to_string(), }); } - + if !has_comprehensive_income { errors.push(ValidationError::MissingRequiredElement { element: "Statement of Comprehensive Income required by IFRS".to_string(), }); } - + if !has_cash_flows { errors.push(ValidationError::MissingRequiredElement { element: "Statement of Cash Flows required by IFRS".to_string(), }); } - + if !has_changes_in_equity { errors.push(ValidationError::MissingRequiredElement { element: "Statement of Changes in Equity required by IFRS".to_string(), }); } - + // Validate presentation linkbase relationships for link in &doc.presentation_links { // Check order is valid (typically 1.0 to 999.0) @@ -541,7 +575,7 @@ pub fn ifrs_validation_rules(doc: &Document) -> Vec { }); } } - + // Validate calculation relationships for link in &doc.calculation_links { // Check weight is reasonable (-1.0 or 1.0 typically) @@ -556,6 +590,6 @@ pub fn ifrs_validation_rules(doc: &Document) -> Vec { } } } - + errors }