Vendor crabrl-fork source and remove submodule linkage

- Replace `rust/crabrl-fork` gitlink with tracked source files
- Add workspace notes documenting why the fork is vendored
- Update ignore rules for vendored fork build artifacts
This commit is contained in:
2026-03-17 19:55:38 -04:00
parent f4a0014572
commit ea130d3299
35 changed files with 6451 additions and 1 deletions

View File

@@ -0,0 +1,308 @@
// Schema loading and validation for XBRL
use crate::model::*;
use crate::validator::ValidationError;
use crate::{Error, Result};
use compact_str::CompactString;
use std::collections::HashMap;
use std::path::Path;
pub struct SchemaLoader {
cache: HashMap<CompactString, Schema>,
}
impl Default for SchemaLoader {
fn default() -> Self {
Self::new()
}
}
impl SchemaLoader {
pub fn new() -> Self {
Self {
cache: HashMap::new(),
}
}
pub fn load_schema<P: AsRef<Path>>(&mut self, path: P) -> Result<&Schema> {
let path_str = path.as_ref().to_string_lossy();
let key = CompactString::from(path_str.as_ref());
if self.cache.contains_key(&key) {
return Ok(self.cache.get(&key).unwrap());
}
let schema = self.parse_schema_file(path)?;
self.cache.insert(key.clone(), schema);
Ok(self.cache.get(&key).unwrap())
}
fn parse_schema_file<P: AsRef<Path>>(&self, path: P) -> Result<Schema> {
let content = std::fs::read(path)?;
self.parse_schema_bytes(&content)
}
fn parse_schema_bytes(&self, data: &[u8]) -> Result<Schema> {
// Simple XML parsing for schema
let mut schema = Schema {
target_namespace: CompactString::new(""),
elements: HashMap::new(),
types: HashMap::new(),
imports: Vec::new(),
};
// Skip BOM if present
let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
&data[3..]
} else {
data
};
let text = std::str::from_utf8(data)
.map_err(|_| Error::Parse("Invalid UTF-8 in schema".to_string()))?;
// Extract target namespace
if let Some(ns_start) = text.find("targetNamespace=\"") {
let ns_start = ns_start + 17;
if let Some(ns_end) = text[ns_start..].find('"') {
schema.target_namespace = CompactString::from(&text[ns_start..ns_start + ns_end]);
}
}
// Parse elements
let mut pos = 0;
while let Some(elem_start) = text[pos..].find("<xs:element") {
let elem_start = pos + elem_start;
pos = elem_start + 1;
// Find element end
let elem_end = if let Some(end) = text[elem_start..].find("/>") {
elem_start + end + 2
} else if let Some(end) = text[elem_start..].find("</xs:element>") {
elem_start + end + 13
} else {
continue;
};
let elem_text = &text[elem_start..elem_end];
// Extract element attributes
let mut element = SchemaElement {
name: CompactString::new(""),
element_type: CompactString::new(""),
substitution_group: None,
period_type: None,
balance: None,
abstract_element: elem_text.contains("abstract=\"true\""),
nillable: elem_text.contains("nillable=\"true\""),
};
// Extract name
if let Some(name_start) = elem_text.find("name=\"") {
let name_start = name_start + 6;
if let Some(name_end) = elem_text[name_start..].find('"') {
element.name =
CompactString::from(&elem_text[name_start..name_start + name_end]);
}
}
// Extract type
if let Some(type_start) = elem_text.find("type=\"") {
let type_start = type_start + 6;
if let Some(type_end) = elem_text[type_start..].find('"') {
element.element_type =
CompactString::from(&elem_text[type_start..type_start + type_end]);
}
}
// Extract substitutionGroup
if let Some(sg_start) = elem_text.find("substitutionGroup=\"") {
let sg_start = sg_start + 19;
if let Some(sg_end) = elem_text[sg_start..].find('"') {
element.substitution_group =
Some(CompactString::from(&elem_text[sg_start..sg_start + sg_end]));
}
}
// Extract XBRL-specific attributes
if let Some(pt_start) = elem_text.find("xbrli:periodType=\"") {
let pt_start = pt_start + 18;
if let Some(pt_end) = elem_text[pt_start..].find('"') {
element.period_type =
Some(CompactString::from(&elem_text[pt_start..pt_start + pt_end]));
}
}
if let Some(bal_start) = elem_text.find("xbrli:balance=\"") {
let bal_start = bal_start + 15;
if let Some(bal_end) = elem_text[bal_start..].find('"') {
element.balance = Some(CompactString::from(
&elem_text[bal_start..bal_start + bal_end],
));
}
}
if !element.name.is_empty() {
schema.elements.insert(element.name.clone(), element);
}
}
// Parse imports
pos = 0;
while let Some(import_start) = text[pos..].find("<xs:import") {
let import_start = pos + import_start;
pos = import_start + 1;
if let Some(import_end) = text[import_start..].find("/>") {
let import_text = &text[import_start..import_start + import_end];
let mut import = SchemaImport {
namespace: CompactString::new(""),
schema_location: CompactString::new(""),
};
if let Some(ns_start) = import_text.find("namespace=\"") {
let ns_start = ns_start + 11;
if let Some(ns_end) = import_text[ns_start..].find('"') {
import.namespace =
CompactString::from(&import_text[ns_start..ns_start + ns_end]);
}
}
if let Some(loc_start) = import_text.find("schemaLocation=\"") {
let loc_start = loc_start + 16;
if let Some(loc_end) = import_text[loc_start..].find('"') {
import.schema_location =
CompactString::from(&import_text[loc_start..loc_start + loc_end]);
}
}
schema.imports.push(import);
}
}
Ok(schema)
}
pub fn validate_element(&self, name: &str, value: &str, schema: &Schema) -> Result<()> {
if let Some(element) = schema.elements.get(name) {
// Check if element is abstract
if element.abstract_element {
return Err(Error::Validation(format!("Element {} is abstract", name)));
}
// Validate type
if let Some(type_def) = schema.types.get(&element.element_type) {
self.validate_type(value, type_def)?;
}
Ok(())
} else {
// Element not found in schema - might be from imported schema
Ok(())
}
}
fn validate_type(&self, value: &str, type_def: &SchemaType) -> Result<()> {
for restriction in &type_def.restrictions {
match restriction {
TypeRestriction::MinInclusive(min) => {
if let (Ok(val), Ok(min_val)) = (value.parse::<f64>(), min.parse::<f64>()) {
if val < min_val {
return Err(Error::Validation(format!(
"Value {} is less than minimum {}",
val, min_val
)));
}
}
}
TypeRestriction::MaxInclusive(max) => {
if let (Ok(val), Ok(max_val)) = (value.parse::<f64>(), max.parse::<f64>()) {
if val > max_val {
return Err(Error::Validation(format!(
"Value {} is greater than maximum {}",
val, max_val
)));
}
}
}
TypeRestriction::Pattern(pattern) => {
if !value.contains(pattern.as_str()) {
return Err(Error::Validation(format!(
"Value {} doesn't match pattern {}",
value, pattern
)));
}
}
TypeRestriction::MinLength(min) => {
if value.len() < *min {
return Err(Error::Validation(format!(
"Value length {} is less than minimum {}",
value.len(),
min
)));
}
}
TypeRestriction::MaxLength(max) => {
if value.len() > *max {
return Err(Error::Validation(format!(
"Value length {} is greater than maximum {}",
value.len(),
max
)));
}
}
_ => {}
}
}
Ok(())
}
}
// Schema validator for documents
pub struct SchemaValidator {
schemas: Vec<Schema>,
}
impl Default for SchemaValidator {
fn default() -> Self {
Self::new()
}
}
impl SchemaValidator {
pub fn new() -> Self {
Self {
schemas: Vec::new(),
}
}
pub fn add_schema(&mut self, schema: Schema) {
self.schemas.push(schema);
}
pub fn validate_document(&self, doc: &Document) -> Vec<ValidationError> {
let errors = Vec::new();
for i in 0..doc.facts.len() {
let _concept_id = doc.facts.concept_ids.get(i);
let _value = doc.facts.values.get(i);
}
for schema in &self.schemas {
for element in schema.elements.values() {
if !element.nillable && !element.abstract_element {
// Check if this required element exists in document
// This would require reverse mapping from concept names to facts
let _found = false;
// if !found {
// errors.push(ValidationError::MissingRequiredElement {
// element: name.to_string(),
// });
// }
}
}
}
errors
}
}