// Schema loading and validation for XBRL use crate::model::*; use crate::validator::ValidationError; use crate::{Error, Result}; use compact_str::CompactString; use std::collections::HashMap; use std::path::Path; pub struct SchemaLoader { cache: HashMap, } impl Default for SchemaLoader { fn default() -> Self { Self::new() } } impl SchemaLoader { pub fn new() -> Self { Self { cache: HashMap::new(), } } pub fn load_schema>(&mut self, path: P) -> Result<&Schema> { let path_str = path.as_ref().to_string_lossy(); let key = CompactString::from(path_str.as_ref()); if self.cache.contains_key(&key) { return Ok(self.cache.get(&key).unwrap()); } let schema = self.parse_schema_file(path)?; self.cache.insert(key.clone(), schema); Ok(self.cache.get(&key).unwrap()) } fn parse_schema_file>(&self, path: P) -> Result { let content = std::fs::read(path)?; self.parse_schema_bytes(&content) } fn parse_schema_bytes(&self, data: &[u8]) -> Result { // Simple XML parsing for schema let mut schema = Schema { target_namespace: CompactString::new(""), elements: HashMap::new(), types: HashMap::new(), imports: Vec::new(), }; // Skip BOM if present let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) { &data[3..] } else { data }; let text = std::str::from_utf8(data) .map_err(|_| Error::Parse("Invalid UTF-8 in schema".to_string()))?; // Extract target namespace if let Some(ns_start) = text.find("targetNamespace=\"") { let ns_start = ns_start + 17; if let Some(ns_end) = text[ns_start..].find('"') { schema.target_namespace = CompactString::from(&text[ns_start..ns_start + ns_end]); } } // Parse elements let mut pos = 0; while let Some(elem_start) = text[pos..].find("") { elem_start + end + 2 } else if let Some(end) = text[elem_start..].find("") { elem_start + end + 13 } else { continue; }; let elem_text = &text[elem_start..elem_end]; // Extract element attributes let mut element = SchemaElement { name: CompactString::new(""), element_type: CompactString::new(""), substitution_group: None, period_type: None, balance: None, abstract_element: elem_text.contains("abstract=\"true\""), nillable: elem_text.contains("nillable=\"true\""), }; // Extract name if let Some(name_start) = elem_text.find("name=\"") { let name_start = name_start + 6; if let Some(name_end) = elem_text[name_start..].find('"') { element.name = CompactString::from(&elem_text[name_start..name_start + name_end]); } } // Extract type if let Some(type_start) = elem_text.find("type=\"") { let type_start = type_start + 6; if let Some(type_end) = elem_text[type_start..].find('"') { element.element_type = CompactString::from(&elem_text[type_start..type_start + type_end]); } } // Extract substitutionGroup if let Some(sg_start) = elem_text.find("substitutionGroup=\"") { let sg_start = sg_start + 19; if let Some(sg_end) = elem_text[sg_start..].find('"') { element.substitution_group = Some(CompactString::from(&elem_text[sg_start..sg_start + sg_end])); } } // Extract XBRL-specific attributes if let Some(pt_start) = elem_text.find("xbrli:periodType=\"") { let pt_start = pt_start + 18; if let Some(pt_end) = elem_text[pt_start..].find('"') { element.period_type = Some(CompactString::from(&elem_text[pt_start..pt_start + pt_end])); } } if let Some(bal_start) = elem_text.find("xbrli:balance=\"") { let bal_start = bal_start + 15; if let Some(bal_end) = elem_text[bal_start..].find('"') { element.balance = Some(CompactString::from( &elem_text[bal_start..bal_start + bal_end], )); } } if !element.name.is_empty() { schema.elements.insert(element.name.clone(), element); } } // Parse imports pos = 0; while let Some(import_start) = text[pos..].find("") { let import_text = &text[import_start..import_start + import_end]; let mut import = SchemaImport { namespace: CompactString::new(""), schema_location: CompactString::new(""), }; if let Some(ns_start) = import_text.find("namespace=\"") { let ns_start = ns_start + 11; if let Some(ns_end) = import_text[ns_start..].find('"') { import.namespace = CompactString::from(&import_text[ns_start..ns_start + ns_end]); } } if let Some(loc_start) = import_text.find("schemaLocation=\"") { let loc_start = loc_start + 16; if let Some(loc_end) = import_text[loc_start..].find('"') { import.schema_location = CompactString::from(&import_text[loc_start..loc_start + loc_end]); } } schema.imports.push(import); } } Ok(schema) } pub fn validate_element(&self, name: &str, value: &str, schema: &Schema) -> Result<()> { if let Some(element) = schema.elements.get(name) { // Check if element is abstract if element.abstract_element { return Err(Error::Validation(format!("Element {} is abstract", name))); } // Validate type if let Some(type_def) = schema.types.get(&element.element_type) { self.validate_type(value, type_def)?; } Ok(()) } else { // Element not found in schema - might be from imported schema Ok(()) } } fn validate_type(&self, value: &str, type_def: &SchemaType) -> Result<()> { for restriction in &type_def.restrictions { match restriction { TypeRestriction::MinInclusive(min) => { if let (Ok(val), Ok(min_val)) = (value.parse::(), min.parse::()) { if val < min_val { return Err(Error::Validation(format!( "Value {} is less than minimum {}", val, min_val ))); } } } TypeRestriction::MaxInclusive(max) => { if let (Ok(val), Ok(max_val)) = (value.parse::(), max.parse::()) { if val > max_val { return Err(Error::Validation(format!( "Value {} is greater than maximum {}", val, max_val ))); } } } TypeRestriction::Pattern(pattern) => { if !value.contains(pattern.as_str()) { return Err(Error::Validation(format!( "Value {} doesn't match pattern {}", value, pattern ))); } } TypeRestriction::MinLength(min) => { if value.len() < *min { return Err(Error::Validation(format!( "Value length {} is less than minimum {}", value.len(), min ))); } } TypeRestriction::MaxLength(max) => { if value.len() > *max { return Err(Error::Validation(format!( "Value length {} is greater than maximum {}", value.len(), max ))); } } _ => {} } } Ok(()) } } // Schema validator for documents pub struct SchemaValidator { schemas: Vec, } impl Default for SchemaValidator { fn default() -> Self { Self::new() } } impl SchemaValidator { pub fn new() -> Self { Self { schemas: Vec::new(), } } pub fn add_schema(&mut self, schema: Schema) { self.schemas.push(schema); } pub fn validate_document(&self, doc: &Document) -> Vec { let errors = Vec::new(); for i in 0..doc.facts.len() { let _concept_id = doc.facts.concept_ids.get(i); let _value = doc.facts.values.get(i); } for schema in &self.schemas { for element in schema.elements.values() { if !element.nillable && !element.abstract_element { // Check if this required element exists in document // This would require reverse mapping from concept names to facts let _found = false; // if !found { // errors.push(ValidationError::MissingRequiredElement { // element: name.to_string(), // }); // } } } } errors } }