Vendor crabrl-fork source and remove submodule linkage
- Replace `rust/crabrl-fork` gitlink with tracked source files - Add workspace notes documenting why the fork is vendored - Update ignore rules for vendored fork build artifacts
This commit is contained in:
308
rust/crabrl-fork/src/schema.rs
Normal file
308
rust/crabrl-fork/src/schema.rs
Normal file
@@ -0,0 +1,308 @@
|
||||
// Schema loading and validation for XBRL
|
||||
use crate::model::*;
|
||||
use crate::validator::ValidationError;
|
||||
use crate::{Error, Result};
|
||||
use compact_str::CompactString;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct SchemaLoader {
|
||||
cache: HashMap<CompactString, Schema>,
|
||||
}
|
||||
|
||||
impl Default for SchemaLoader {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaLoader {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
cache: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_schema<P: AsRef<Path>>(&mut self, path: P) -> Result<&Schema> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
let key = CompactString::from(path_str.as_ref());
|
||||
|
||||
if self.cache.contains_key(&key) {
|
||||
return Ok(self.cache.get(&key).unwrap());
|
||||
}
|
||||
|
||||
let schema = self.parse_schema_file(path)?;
|
||||
self.cache.insert(key.clone(), schema);
|
||||
Ok(self.cache.get(&key).unwrap())
|
||||
}
|
||||
|
||||
fn parse_schema_file<P: AsRef<Path>>(&self, path: P) -> Result<Schema> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_schema_bytes(&content)
|
||||
}
|
||||
|
||||
fn parse_schema_bytes(&self, data: &[u8]) -> Result<Schema> {
|
||||
// Simple XML parsing for schema
|
||||
let mut schema = Schema {
|
||||
target_namespace: CompactString::new(""),
|
||||
elements: HashMap::new(),
|
||||
types: HashMap::new(),
|
||||
imports: Vec::new(),
|
||||
};
|
||||
|
||||
// Skip BOM if present
|
||||
let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
|
||||
&data[3..]
|
||||
} else {
|
||||
data
|
||||
};
|
||||
|
||||
let text = std::str::from_utf8(data)
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in schema".to_string()))?;
|
||||
|
||||
// Extract target namespace
|
||||
if let Some(ns_start) = text.find("targetNamespace=\"") {
|
||||
let ns_start = ns_start + 17;
|
||||
if let Some(ns_end) = text[ns_start..].find('"') {
|
||||
schema.target_namespace = CompactString::from(&text[ns_start..ns_start + ns_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse elements
|
||||
let mut pos = 0;
|
||||
while let Some(elem_start) = text[pos..].find("<xs:element") {
|
||||
let elem_start = pos + elem_start;
|
||||
pos = elem_start + 1;
|
||||
|
||||
// Find element end
|
||||
let elem_end = if let Some(end) = text[elem_start..].find("/>") {
|
||||
elem_start + end + 2
|
||||
} else if let Some(end) = text[elem_start..].find("</xs:element>") {
|
||||
elem_start + end + 13
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let elem_text = &text[elem_start..elem_end];
|
||||
|
||||
// Extract element attributes
|
||||
let mut element = SchemaElement {
|
||||
name: CompactString::new(""),
|
||||
element_type: CompactString::new(""),
|
||||
substitution_group: None,
|
||||
period_type: None,
|
||||
balance: None,
|
||||
abstract_element: elem_text.contains("abstract=\"true\""),
|
||||
nillable: elem_text.contains("nillable=\"true\""),
|
||||
};
|
||||
|
||||
// Extract name
|
||||
if let Some(name_start) = elem_text.find("name=\"") {
|
||||
let name_start = name_start + 6;
|
||||
if let Some(name_end) = elem_text[name_start..].find('"') {
|
||||
element.name =
|
||||
CompactString::from(&elem_text[name_start..name_start + name_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract type
|
||||
if let Some(type_start) = elem_text.find("type=\"") {
|
||||
let type_start = type_start + 6;
|
||||
if let Some(type_end) = elem_text[type_start..].find('"') {
|
||||
element.element_type =
|
||||
CompactString::from(&elem_text[type_start..type_start + type_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract substitutionGroup
|
||||
if let Some(sg_start) = elem_text.find("substitutionGroup=\"") {
|
||||
let sg_start = sg_start + 19;
|
||||
if let Some(sg_end) = elem_text[sg_start..].find('"') {
|
||||
element.substitution_group =
|
||||
Some(CompactString::from(&elem_text[sg_start..sg_start + sg_end]));
|
||||
}
|
||||
}
|
||||
|
||||
// Extract XBRL-specific attributes
|
||||
if let Some(pt_start) = elem_text.find("xbrli:periodType=\"") {
|
||||
let pt_start = pt_start + 18;
|
||||
if let Some(pt_end) = elem_text[pt_start..].find('"') {
|
||||
element.period_type =
|
||||
Some(CompactString::from(&elem_text[pt_start..pt_start + pt_end]));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(bal_start) = elem_text.find("xbrli:balance=\"") {
|
||||
let bal_start = bal_start + 15;
|
||||
if let Some(bal_end) = elem_text[bal_start..].find('"') {
|
||||
element.balance = Some(CompactString::from(
|
||||
&elem_text[bal_start..bal_start + bal_end],
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if !element.name.is_empty() {
|
||||
schema.elements.insert(element.name.clone(), element);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse imports
|
||||
pos = 0;
|
||||
while let Some(import_start) = text[pos..].find("<xs:import") {
|
||||
let import_start = pos + import_start;
|
||||
pos = import_start + 1;
|
||||
|
||||
if let Some(import_end) = text[import_start..].find("/>") {
|
||||
let import_text = &text[import_start..import_start + import_end];
|
||||
|
||||
let mut import = SchemaImport {
|
||||
namespace: CompactString::new(""),
|
||||
schema_location: CompactString::new(""),
|
||||
};
|
||||
|
||||
if let Some(ns_start) = import_text.find("namespace=\"") {
|
||||
let ns_start = ns_start + 11;
|
||||
if let Some(ns_end) = import_text[ns_start..].find('"') {
|
||||
import.namespace =
|
||||
CompactString::from(&import_text[ns_start..ns_start + ns_end]);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(loc_start) = import_text.find("schemaLocation=\"") {
|
||||
let loc_start = loc_start + 16;
|
||||
if let Some(loc_end) = import_text[loc_start..].find('"') {
|
||||
import.schema_location =
|
||||
CompactString::from(&import_text[loc_start..loc_start + loc_end]);
|
||||
}
|
||||
}
|
||||
|
||||
schema.imports.push(import);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(schema)
|
||||
}
|
||||
|
||||
pub fn validate_element(&self, name: &str, value: &str, schema: &Schema) -> Result<()> {
|
||||
if let Some(element) = schema.elements.get(name) {
|
||||
// Check if element is abstract
|
||||
if element.abstract_element {
|
||||
return Err(Error::Validation(format!("Element {} is abstract", name)));
|
||||
}
|
||||
|
||||
// Validate type
|
||||
if let Some(type_def) = schema.types.get(&element.element_type) {
|
||||
self.validate_type(value, type_def)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
// Element not found in schema - might be from imported schema
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_type(&self, value: &str, type_def: &SchemaType) -> Result<()> {
|
||||
for restriction in &type_def.restrictions {
|
||||
match restriction {
|
||||
TypeRestriction::MinInclusive(min) => {
|
||||
if let (Ok(val), Ok(min_val)) = (value.parse::<f64>(), min.parse::<f64>()) {
|
||||
if val < min_val {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value {} is less than minimum {}",
|
||||
val, min_val
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
TypeRestriction::MaxInclusive(max) => {
|
||||
if let (Ok(val), Ok(max_val)) = (value.parse::<f64>(), max.parse::<f64>()) {
|
||||
if val > max_val {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value {} is greater than maximum {}",
|
||||
val, max_val
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
TypeRestriction::Pattern(pattern) => {
|
||||
if !value.contains(pattern.as_str()) {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value {} doesn't match pattern {}",
|
||||
value, pattern
|
||||
)));
|
||||
}
|
||||
}
|
||||
TypeRestriction::MinLength(min) => {
|
||||
if value.len() < *min {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value length {} is less than minimum {}",
|
||||
value.len(),
|
||||
min
|
||||
)));
|
||||
}
|
||||
}
|
||||
TypeRestriction::MaxLength(max) => {
|
||||
if value.len() > *max {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value length {} is greater than maximum {}",
|
||||
value.len(),
|
||||
max
|
||||
)));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// Schema validator for documents
|
||||
pub struct SchemaValidator {
|
||||
schemas: Vec<Schema>,
|
||||
}
|
||||
|
||||
impl Default for SchemaValidator {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaValidator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
schemas: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_schema(&mut self, schema: Schema) {
|
||||
self.schemas.push(schema);
|
||||
}
|
||||
|
||||
pub fn validate_document(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let errors = Vec::new();
|
||||
|
||||
for i in 0..doc.facts.len() {
|
||||
let _concept_id = doc.facts.concept_ids.get(i);
|
||||
let _value = doc.facts.values.get(i);
|
||||
}
|
||||
|
||||
for schema in &self.schemas {
|
||||
for element in schema.elements.values() {
|
||||
if !element.nillable && !element.abstract_element {
|
||||
// Check if this required element exists in document
|
||||
// This would require reverse mapping from concept names to facts
|
||||
let _found = false;
|
||||
// if !found {
|
||||
// errors.push(ValidationError::MissingRequiredElement {
|
||||
// element: name.to_string(),
|
||||
// });
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user