Vendor crabrl-fork source and remove submodule linkage
- Replace `rust/crabrl-fork` gitlink with tracked source files - Add workspace notes documenting why the fork is vendored - Update ignore rules for vendored fork build artifacts
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -49,6 +49,7 @@ data/*.sqlite-wal
|
||||
.workflow-data/
|
||||
output/
|
||||
rust/target/
|
||||
rust/crabrl-fork/target/
|
||||
rust/vendor/crabrl/.git-vendor/
|
||||
bin/fiscal-xbrl
|
||||
|
||||
|
||||
7
doc/rust-workspace.md
Normal file
7
doc/rust-workspace.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Rust Workspace Notes
|
||||
|
||||
`rust/crabrl-fork` is intentionally vendored into this repository as normal tracked source files.
|
||||
|
||||
This is required for clean-clone deployment environments such as Coolify. Deploy builds clone only the main repository, so `crabrl-fork` must exist directly in the checkout and must not rely on nested Git metadata, a submodule checkout, or an external recursive clone step.
|
||||
|
||||
When updating the fork, sync its source intentionally from the upstream fork repository and commit the resulting files into this repository. Do not reintroduce `rust/crabrl-fork` as a submodule, gitlink, or nested repository.
|
||||
Submodule rust/crabrl-fork deleted from bba0aa7fd7
55
rust/crabrl-fork/.gitattributes
vendored
Normal file
55
rust/crabrl-fork/.gitattributes
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
# Auto detect text files and perform LF normalization
|
||||
* text=auto
|
||||
|
||||
# Rust files
|
||||
*.rs text eol=lf
|
||||
*.toml text eol=lf
|
||||
Cargo.lock text eol=lf
|
||||
|
||||
# Python files
|
||||
*.py text eol=lf
|
||||
*.pyx text eol=lf
|
||||
*.pxd text eol=lf
|
||||
|
||||
# Documentation
|
||||
*.md text eol=lf
|
||||
*.txt text eol=lf
|
||||
LICENSE text eol=lf
|
||||
|
||||
# Config files
|
||||
*.json text eol=lf
|
||||
*.yaml text eol=lf
|
||||
*.yml text eol=lf
|
||||
*.xml text eol=lf
|
||||
*.xsd text eol=lf
|
||||
*.xbrl text eol=lf
|
||||
|
||||
# Shell scripts
|
||||
*.sh text eol=lf
|
||||
*.bash text eol=lf
|
||||
|
||||
# Git files
|
||||
.gitignore text eol=lf
|
||||
.gitattributes text eol=lf
|
||||
|
||||
# Binary files
|
||||
*.png binary
|
||||
*.jpg binary
|
||||
*.jpeg binary
|
||||
*.gif binary
|
||||
*.ico binary
|
||||
*.pdf binary
|
||||
*.zip binary
|
||||
*.gz binary
|
||||
*.tar binary
|
||||
*.7z binary
|
||||
*.exe binary
|
||||
*.dll binary
|
||||
*.so binary
|
||||
*.dylib binary
|
||||
|
||||
# Linguist overrides - ensure Rust is recognized as primary language
|
||||
*.rs linguist-language=Rust
|
||||
benchmarks/*.py linguist-documentation
|
||||
scripts/*.py linguist-documentation
|
||||
examples/* linguist-documentation
|
||||
106
rust/crabrl-fork/.github/workflows/ci.yml
vendored
Normal file
106
rust/crabrl-fork/.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, master ]
|
||||
pull_request:
|
||||
branches: [ main, master ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test - ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
rust: [stable, beta]
|
||||
exclude:
|
||||
- os: windows-latest
|
||||
rust: beta
|
||||
- os: macos-latest
|
||||
rust: beta
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
components: rustfmt, clippy
|
||||
|
||||
- name: Cache cargo registry
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cargo/registry
|
||||
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
- name: Cache cargo index
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cargo/git
|
||||
key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
- name: Cache cargo build
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: target
|
||||
key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
- name: Check formatting
|
||||
run: cargo fmt -- --check
|
||||
|
||||
- name: Run clippy
|
||||
run: cargo clippy --all-features -- -D warnings
|
||||
|
||||
- name: Build
|
||||
run: cargo build --verbose --all-features
|
||||
|
||||
- name: Run tests
|
||||
run: cargo test --verbose --all-features
|
||||
|
||||
- name: Build release
|
||||
run: cargo build --release --all-features
|
||||
|
||||
- name: Run benchmarks (smoke test)
|
||||
run: cargo bench --no-run
|
||||
|
||||
coverage:
|
||||
name: Code Coverage
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
components: llvm-tools-preview
|
||||
|
||||
- name: Install cargo-llvm-cov
|
||||
uses: taiki-e/install-action@cargo-llvm-cov
|
||||
|
||||
- name: Generate code coverage
|
||||
run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
files: lcov.info
|
||||
fail_ci_if_error: false
|
||||
|
||||
security-audit:
|
||||
name: Security Audit
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Run cargo audit
|
||||
uses: actions-rs/audit-check@v1
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
195
rust/crabrl-fork/.github/workflows/release.yml
vendored
Normal file
195
rust/crabrl-fork/.github/workflows/release.yml
vendored
Normal file
@@ -0,0 +1,195 @@
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version to publish (e.g., 0.1.0)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Final Tests
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust stable
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
components: rustfmt, clippy
|
||||
|
||||
- name: Run tests
|
||||
run: cargo test --all-features --release
|
||||
|
||||
# Temporarily skip formatting check to get initial release out
|
||||
# - name: Check formatting
|
||||
# run: cargo fmt -- --check
|
||||
|
||||
# - name: Run clippy
|
||||
# run: cargo clippy --all-features -- -D warnings
|
||||
|
||||
publish-crates-io:
|
||||
name: Publish to crates.io
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust stable
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Verify version
|
||||
run: |
|
||||
# Extract version from Cargo.toml
|
||||
CARGO_VERSION=$(grep -E "^version" Cargo.toml | head -1 | cut -d'"' -f2)
|
||||
echo "Cargo.toml version: $CARGO_VERSION"
|
||||
|
||||
# For manual workflow dispatch
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_VERSION="${{ github.event.inputs.version }}"
|
||||
if [ "$CARGO_VERSION" != "$INPUT_VERSION" ]; then
|
||||
echo "Error: Cargo.toml version ($CARGO_VERSION) doesn't match input version ($INPUT_VERSION)"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# For tag push
|
||||
if [ "${{ github.event_name }}" = "push" ]; then
|
||||
TAG_VERSION="${GITHUB_REF#refs/tags/v}"
|
||||
if [ "$CARGO_VERSION" != "$TAG_VERSION" ]; then
|
||||
echo "Error: Cargo.toml version ($CARGO_VERSION) doesn't match tag version ($TAG_VERSION)"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Check if version exists on crates.io
|
||||
run: |
|
||||
CRATE_NAME=$(grep -E "^name" Cargo.toml | head -1 | cut -d'"' -f2)
|
||||
VERSION=$(grep -E "^version" Cargo.toml | head -1 | cut -d'"' -f2)
|
||||
|
||||
if cargo search "$CRATE_NAME" | grep -q "^$CRATE_NAME = \"$VERSION\""; then
|
||||
echo "Version $VERSION already exists on crates.io"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Build release
|
||||
run: cargo build --release --all-features
|
||||
|
||||
- name: Package for crates.io
|
||||
run: cargo package --all-features
|
||||
|
||||
- name: Publish to crates.io
|
||||
run: cargo publish --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
|
||||
create-github-release:
|
||||
name: Create GitHub Release
|
||||
needs: publish-crates-io
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust stable
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Build release binaries
|
||||
run: |
|
||||
cargo build --release --all-features
|
||||
mkdir -p release
|
||||
cp target/release/crabrl release/crabrl-linux-x64
|
||||
chmod +x release/crabrl-linux-x64
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: release/*
|
||||
generate_release_notes: true
|
||||
body: |
|
||||
## Installation
|
||||
|
||||
### From crates.io
|
||||
```bash
|
||||
cargo install crabrl
|
||||
```
|
||||
|
||||
### Download Binary
|
||||
Download the pre-built binary for your platform from the assets below.
|
||||
|
||||
## What's Changed
|
||||
See the full changelog below.
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
build-cross-platform:
|
||||
name: Build ${{ matrix.target }}
|
||||
needs: test
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
target: x86_64-unknown-linux-gnu
|
||||
artifact: crabrl-linux-x64
|
||||
- os: ubuntu-latest
|
||||
target: aarch64-unknown-linux-gnu
|
||||
artifact: crabrl-linux-arm64
|
||||
use-cross: true
|
||||
- os: windows-latest
|
||||
target: x86_64-pc-windows-msvc
|
||||
artifact: crabrl-windows-x64.exe
|
||||
- os: macos-latest
|
||||
target: x86_64-apple-darwin
|
||||
artifact: crabrl-macos-x64
|
||||
- os: macos-latest
|
||||
target: aarch64-apple-darwin
|
||||
artifact: crabrl-macos-arm64
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
targets: ${{ matrix.target }}
|
||||
|
||||
- name: Install cross
|
||||
if: matrix.use-cross
|
||||
run: cargo install cross
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
if [ "${{ matrix.use-cross }}" = "true" ]; then
|
||||
cross build --release --target ${{ matrix.target }} --all-features
|
||||
else
|
||||
cargo build --release --target ${{ matrix.target }} --all-features
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Package
|
||||
run: |
|
||||
mkdir -p release
|
||||
if [ "${{ matrix.os }}" = "windows-latest" ]; then
|
||||
cp target/${{ matrix.target }}/release/crabrl.exe release/${{ matrix.artifact }}
|
||||
else
|
||||
cp target/${{ matrix.target }}/release/crabrl release/${{ matrix.artifact }}
|
||||
chmod +x release/${{ matrix.artifact }}
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.artifact }}
|
||||
path: release/${{ matrix.artifact }}
|
||||
125
rust/crabrl-fork/.gitignore
vendored
Normal file
125
rust/crabrl-fork/.gitignore
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
# Rust
|
||||
/target/
|
||||
**/*.rs.bk
|
||||
*.pdb
|
||||
Cargo.lock
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
venv/
|
||||
ENV/
|
||||
env/
|
||||
.venv
|
||||
.env
|
||||
|
||||
# Virtual environments
|
||||
benchmarks/venv/
|
||||
**/venv/
|
||||
**/virtualenv/
|
||||
**/.venv/
|
||||
|
||||
# Test data and fixtures
|
||||
test_data/
|
||||
benchmarks/fixtures/
|
||||
fixtures/
|
||||
|
||||
# Benchmark outputs
|
||||
*.png
|
||||
*.json
|
||||
benchmark_results/
|
||||
benchmarks/*.png
|
||||
benchmarks/*.json
|
||||
benchmarks/*_results.json
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.DS_Store
|
||||
|
||||
# Build artifacts
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*.dll
|
||||
*.exe
|
||||
*.out
|
||||
|
||||
# Documentation
|
||||
/target/doc/
|
||||
/target/debug/
|
||||
/target/release/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Coverage
|
||||
*.profraw
|
||||
*.profdata
|
||||
/target/coverage/
|
||||
tarpaulin-report.html
|
||||
cobertura.xml
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
desktop.ini
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
*.bak
|
||||
.cache/
|
||||
tmp/
|
||||
|
||||
# Large test files
|
||||
*.xbrl
|
||||
*.xml
|
||||
!examples/*.xml
|
||||
!tests/fixtures/*.xml
|
||||
|
||||
# Downloaded SEC filings
|
||||
benchmarks/fixtures/
|
||||
scripts/fixtures/
|
||||
|
||||
# Benchmark comparison artifacts
|
||||
benchmarks/benchmark_results.png
|
||||
benchmarks/synthetic_benchmark_chart.png
|
||||
benchmarks/real_benchmark_chart.png
|
||||
benchmarks/sec_comparison_results.json
|
||||
benchmarks/synthetic_benchmark_results.json
|
||||
benchmarks/real_benchmark_results.json
|
||||
benchmarks/real_filing_results.json
|
||||
|
||||
# Python artifacts from benchmarking
|
||||
*.pyc
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
.tox/
|
||||
.hypothesis/
|
||||
|
||||
# Backup files
|
||||
*.backup
|
||||
*.old
|
||||
*.orig
|
||||
|
||||
# Archives
|
||||
*.zip
|
||||
*.tar.gz
|
||||
*.tar.bz2
|
||||
*.7z
|
||||
*.rar
|
||||
|
||||
# Keep important config examples
|
||||
!.gitignore
|
||||
!.github/
|
||||
!examples/.gitkeep
|
||||
!tests/fixtures/.gitkeep
|
||||
2
rust/crabrl-fork/.rustfmt.toml
Normal file
2
rust/crabrl-fork/.rustfmt.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
# Rust formatting configuration
|
||||
edition = "2021"
|
||||
20
rust/crabrl-fork/CITATION.cff
Normal file
20
rust/crabrl-fork/CITATION.cff
Normal file
@@ -0,0 +1,20 @@
|
||||
cff-version: 1.2.0
|
||||
message: "If you use this software, please cite it as below."
|
||||
authors:
|
||||
- family-names: "Amorelli"
|
||||
given-names: "Stefano"
|
||||
email: "stefano@amorelli.tech"
|
||||
orcid: "https://orcid.org/0009-0004-4917-0999"
|
||||
title: "crabrl: High-performance XBRL parser for SEC EDGAR filings"
|
||||
version: 0.1.0
|
||||
date-released: 2025-01-16
|
||||
url: "https://github.com/stefanoamorelli/crabrl"
|
||||
repository-code: "https://github.com/stefanoamorelli/crabrl"
|
||||
license: AGPL-3.0
|
||||
keywords:
|
||||
- xbrl
|
||||
- parser
|
||||
- sec-edgar
|
||||
- finance
|
||||
- rust
|
||||
abstract: "A high-performance XBRL parser and validator written in Rust, optimized for SEC EDGAR filings. Achieves 50-150x performance gains over traditional parsers through zero-copy parsing, memory-mapped I/O, and Rust's ownership model."
|
||||
72
rust/crabrl-fork/Cargo.toml
Normal file
72
rust/crabrl-fork/Cargo.toml
Normal file
@@ -0,0 +1,72 @@
|
||||
[package]
|
||||
name = "crabrl"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Stefano Amorelli <stefano@amorelli.tech>"]
|
||||
description = "High-performance XBRL parser and validator"
|
||||
license = "AGPL-3.0"
|
||||
repository = "https://github.com/stefanoamorelli/crabrl"
|
||||
keywords = ["xbrl", "parser", "finance", "sec", "edgar"]
|
||||
categories = ["parser-implementations", "finance", "command-line-utilities"]
|
||||
|
||||
[dependencies]
|
||||
# Core
|
||||
quick-xml = "0.36"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
compact_str = { version = "0.8", features = ["serde"] }
|
||||
|
||||
# Performance
|
||||
ahash = "0.8"
|
||||
parking_lot = "0.12"
|
||||
memchr = "2.7"
|
||||
bumpalo = "3.16"
|
||||
string-interner = "0.18"
|
||||
rayon = { version = "1.10", optional = true }
|
||||
memmap2 = { version = "0.9", optional = true }
|
||||
mimalloc = { version = "0.1", default-features = false }
|
||||
bitflags = "2.6"
|
||||
|
||||
# Async support
|
||||
tokio = { version = "1.40", features = ["fs", "io-util"], optional = true }
|
||||
async-stream = { version = "0.3", optional = true }
|
||||
|
||||
# CLI
|
||||
clap = { version = "4.5", features = ["derive"], optional = true }
|
||||
colored = { version = "2.1", optional = true }
|
||||
|
||||
# Serialization
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
# Error handling
|
||||
thiserror = "2.0"
|
||||
anyhow = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.5"
|
||||
pretty_assertions = "1.4"
|
||||
tempfile = "3.15"
|
||||
|
||||
[[bin]]
|
||||
name = "crabrl"
|
||||
required-features = ["cli"]
|
||||
|
||||
[[bench]]
|
||||
name = "parser"
|
||||
harness = false
|
||||
|
||||
[features]
|
||||
default = ["cli", "parallel"]
|
||||
cli = ["clap", "colored"]
|
||||
parallel = ["rayon"]
|
||||
mmap = ["memmap2"]
|
||||
async = ["tokio", "async-stream"]
|
||||
|
||||
[profile.release]
|
||||
lto = "fat"
|
||||
codegen-units = 1
|
||||
opt-level = 3
|
||||
strip = true
|
||||
|
||||
[profile.bench]
|
||||
inherits = "release"
|
||||
661
rust/crabrl-fork/LICENSE
Normal file
661
rust/crabrl-fork/LICENSE
Normal file
@@ -0,0 +1,661 @@
|
||||
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||
Version 3, 19 November 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU Affero General Public License is a free, copyleft license for
|
||||
software and other kinds of works, specifically designed to ensure
|
||||
cooperation with the community in the case of network server software.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
our General Public Licenses are intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
Developers that use our General Public Licenses protect your rights
|
||||
with two steps: (1) assert copyright on the software, and (2) offer
|
||||
you this License which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
|
||||
A secondary benefit of defending all users' freedom is that
|
||||
improvements made in alternate versions of the program, if they
|
||||
receive widespread use, become available for other developers to
|
||||
incorporate. Many developers of free software are heartened and
|
||||
encouraged by the resulting cooperation. However, in the case of
|
||||
software used on network servers, this result may fail to come about.
|
||||
The GNU General Public License permits making a modified version and
|
||||
letting the public access it on a server without ever releasing its
|
||||
source code to the public.
|
||||
|
||||
The GNU Affero General Public License is designed specifically to
|
||||
ensure that, in such cases, the modified source code becomes available
|
||||
to the community. It requires the operator of a network server to
|
||||
provide the source code of the modified version running there to the
|
||||
users of that server. Therefore, public use of a modified version, on
|
||||
a publicly accessible server, gives the public access to the source
|
||||
code of the modified version.
|
||||
|
||||
An older license, called the Affero General Public License and
|
||||
published by Affero, was designed to accomplish similar goals. This is
|
||||
a different license, not a version of the Affero GPL, but Affero has
|
||||
released a new version of the Affero GPL which permits relicensing under
|
||||
this license.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, if you modify the
|
||||
Program, your modified version must prominently offer all users
|
||||
interacting with it remotely through a computer network (if your version
|
||||
supports such interaction) an opportunity to receive the Corresponding
|
||||
Source of your version by providing access to the Corresponding Source
|
||||
from a network server at no charge, through some standard or customary
|
||||
means of facilitating copying of software. This Corresponding Source
|
||||
shall include the Corresponding Source for any work covered by version 3
|
||||
of the GNU General Public License that is incorporated pursuant to the
|
||||
following paragraph.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the work with which it is combined will remain governed by version
|
||||
3 of the GNU General Public License.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU Affero General Public License from time to time. Such new versions
|
||||
will be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU Affero General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU Affero General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU Affero General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
crabrl - fast XBRL parsers and validator in Rust
|
||||
Copyright (C) 2025 Stefano Amorelli
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published
|
||||
by the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If your software can interact with users remotely through a computer
|
||||
network, you should also make sure that it provides a way for users to
|
||||
get its source. For example, if your program is a web application, its
|
||||
interface could display a "Source" link that leads users to an archive
|
||||
of the code. There are many ways you could offer source, and different
|
||||
solutions will be better for different programs; see section 13 for the
|
||||
specific requirements.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
228
rust/crabrl-fork/README.md
Normal file
228
rust/crabrl-fork/README.md
Normal file
@@ -0,0 +1,228 @@
|
||||
# crabrl 🦀
|
||||
|
||||
[](https://crates.io/crates/crabrl)
|
||||
[](https://github.com/stefanoamorelli/crabrl/actions)
|
||||
[](https://www.gnu.org/licenses/agpl-3.0)
|
||||
[](https://www.rust-lang.org)
|
||||
[](https://crates.io/crates/crabrl)
|
||||
[](https://docs.rs/crabrl)
|
||||
|
||||

|
||||
|
||||
Lightning-fast XBRL parser that's **50-150x faster** than traditional parsers, built for speed and accuracy when processing [SEC EDGAR](https://www.sec.gov/edgar) filings.
|
||||
|
||||
## Performance
|
||||
|
||||

|
||||
|
||||
### Speed Comparison
|
||||
|
||||

|
||||
|
||||
**Key Performance Metrics:**
|
||||
- **50-150x faster** than traditional XBRL parsers
|
||||
- **140,000+ facts/second** throughput
|
||||
- **< 50MB memory** for 100K facts
|
||||
- **Linear scaling** with file size
|
||||
|
||||
## Technical Architecture
|
||||
|
||||
crabrl is built on Rust's zero-cost abstractions and modern parsing techniques. While established parsers like [Arelle](https://arelle.org/) provide comprehensive XBRL specification support and extensive validation capabilities, crabrl focuses on high-performance parsing for scenarios where speed is critical.
|
||||
|
||||
### Implementation Details
|
||||
|
||||
| Optimization | Impact | Technology |
|
||||
|-------------|---------|------------|
|
||||
| **Zero-copy parsing** | -90% memory allocs | [`quick-xml`](https://github.com/tafia/quick-xml) with string slicing |
|
||||
| **No garbage collection** | Predictable latency | Rust's ownership model |
|
||||
| **Faster hashmaps** | 2x lookup speed | [`ahash`](https://github.com/tkaitchuck/aHash) instead of default hasher |
|
||||
| **Compact strings** | -50% memory for small strings | [`compact_str`](https://github.com/ParkMyCar/compact_str) |
|
||||
| **Parallelization** | 4-8x on multicore | [`rayon`](https://github.com/rayon-rs/rayon) work-stealing |
|
||||
| **Memory mapping** | Zero-copy file I/O | [`memmap2`](https://github.com/RazrFalcon/memmap2-rs) |
|
||||
| **Better allocator** | -25% allocation time | [`mimalloc`](https://github.com/microsoft/mimalloc) |
|
||||
|
||||
**Benchmark results:** 100,000 XBRL facts parsed in 56ms (crabrl) vs 2,672ms (Arelle) on identical hardware.
|
||||
|
||||
## XBRL Support Status
|
||||
|
||||
| Feature | Description | Status |
|
||||
|---------|-------------|---------|
|
||||
| **XBRL 2.1 Instance** | Parse facts, contexts, units from `.xml` files | ✅ Stable |
|
||||
| **SEC Validation** | EDGAR-specific rules and checks | ✅ Stable |
|
||||
| **Calculation Linkbase** | Validate arithmetic relationships | ✅ Stable |
|
||||
| **Presentation Linkbase** | Extract display hierarchy | 🚧 Beta |
|
||||
| **Label Linkbase** | Human-readable concept names | 🚧 Beta |
|
||||
| **Definition Linkbase** | Dimensional relationships | 📋 Planned |
|
||||
| **Formula Linkbase** | Business rules validation | 📋 Planned |
|
||||
| **Inline XBRL (iXBRL)** | HTML-embedded XBRL | 📋 Planned |
|
||||
|
||||
## Installation
|
||||
|
||||
### From crates.io
|
||||
```bash
|
||||
cargo install crabrl
|
||||
```
|
||||
|
||||
### From Source
|
||||
```bash
|
||||
git clone https://github.com/stefanoamorelli/crabrl
|
||||
cd crabrl
|
||||
cargo build --release --features cli
|
||||
```
|
||||
|
||||
### As Library Dependency
|
||||
```toml
|
||||
[dependencies]
|
||||
crabrl = "0.1.0"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### CLI
|
||||
|
||||
```bash
|
||||
# Parse and display summary
|
||||
crabrl parse filing.xml
|
||||
|
||||
# Parse with statistics (timing and throughput)
|
||||
crabrl parse filing.xml --stats
|
||||
|
||||
# Validate with generic rules
|
||||
crabrl validate filing.xml
|
||||
|
||||
# Validate with SEC EDGAR rules
|
||||
crabrl validate filing.xml --profile sec-edgar
|
||||
|
||||
# Validate with strict mode (warnings as errors)
|
||||
crabrl validate filing.xml --strict
|
||||
|
||||
# Benchmark performance
|
||||
crabrl bench filing.xml --iterations 100
|
||||
```
|
||||
|
||||
### Library
|
||||
|
||||
#### Basic Usage
|
||||
|
||||
```rust
|
||||
use crabrl::Parser;
|
||||
|
||||
// Parse XBRL document
|
||||
let parser = Parser::new();
|
||||
let doc = parser.parse_file("filing.xml")?;
|
||||
|
||||
// Access parsed data
|
||||
println!("Facts: {}", doc.facts.len());
|
||||
println!("Contexts: {}", doc.contexts.len());
|
||||
println!("Units: {}", doc.units.len());
|
||||
```
|
||||
|
||||
#### Parse from Different Sources
|
||||
|
||||
```rust
|
||||
// From file path
|
||||
let doc = parser.parse_file("filing.xml")?;
|
||||
|
||||
// From bytes
|
||||
let xml_bytes = std::fs::read("filing.xml")?;
|
||||
let doc = parser.parse_bytes(&xml_bytes)?;
|
||||
```
|
||||
|
||||
#### Validation
|
||||
|
||||
```rust
|
||||
use crabrl::{Parser, Validator};
|
||||
|
||||
let parser = Parser::new();
|
||||
let doc = parser.parse_file("filing.xml")?;
|
||||
|
||||
// Generic validation
|
||||
let validator = Validator::new();
|
||||
let result = validator.validate(&doc)?;
|
||||
|
||||
if result.is_valid {
|
||||
println!("Document is valid!");
|
||||
} else {
|
||||
for error in &result.errors {
|
||||
eprintln!("Error: {}", error);
|
||||
}
|
||||
}
|
||||
|
||||
// SEC EDGAR validation (stricter rules)
|
||||
let sec_validator = Validator::sec_edgar();
|
||||
let sec_result = sec_validator.validate(&doc)?;
|
||||
```
|
||||
|
||||
## Performance Measurements
|
||||
|
||||
Performance comparison with [Arelle](https://arelle.org/) v2.17.4 (Python-based XBRL processor with full specification support):
|
||||
|
||||
### Synthetic Dataset Benchmarks
|
||||
|
||||
| File Size | Facts | crabrl | Arelle | Ratio |
|
||||
|-----------|------:|-------:|-------:|------:|
|
||||
| Tiny | 10 | 1.1 ms | 164 ms | 150x |
|
||||
| Small | 100 | 1.4 ms | 168 ms | 119x |
|
||||
| Medium | 1K | 1.7 ms | 184 ms | 108x |
|
||||
| Large | 10K | 6.1 ms | 351 ms | 58x |
|
||||
| Huge | 100K | 57 ms | 2,672 ms | 47x |
|
||||
|
||||
### SEC Filing Parse Times
|
||||
|
||||
| Company | Filing Type | File Size | Facts | Parse Time | Throughput |
|
||||
|---------|-------------|-----------|-------|------------|------------|
|
||||
| Apple | [10-K 2023](https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_htm.xml) | 1.4 MB | 1,075 | 2.1 ms | 516K facts/sec |
|
||||
| Microsoft | [10-Q 2023](https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_htm.xml) | 2.8 MB | 2,341 | 4.3 ms | 544K facts/sec |
|
||||
| Tesla | [10-K 2023](https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231_htm.xml) | 3.1 MB | 3,122 | 5.8 ms | 538K facts/sec |
|
||||
|
||||
### Run Your Own Benchmarks
|
||||
|
||||
```bash
|
||||
# Quick benchmark with Criterion
|
||||
cargo bench
|
||||
|
||||
# Compare against Arelle
|
||||
cd benchmarks && python compare_performance.py
|
||||
|
||||
# Test on real SEC filings
|
||||
python scripts/download_fixtures.py # Download Apple, MSFT, Tesla, etc.
|
||||
cargo run --release --bin crabrl -- bench fixtures/apple/aapl-20230930_htm.xml
|
||||
```
|
||||
|
||||
## Resources & Links
|
||||
|
||||
### XBRL Standards
|
||||
- [XBRL International](https://www.xbrl.org/) - Official XBRL specifications
|
||||
- [XBRL 2.1 Specification](https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html) - Core standard we implement
|
||||
- [SEC EDGAR](https://www.sec.gov/edgar/searchedgar/companysearch) - Search real company filings
|
||||
- [EDGAR Filer Manual](https://www.sec.gov/info/edgar/forms/edgform.pdf) - SEC filing requirements
|
||||
|
||||
### Dependencies We Use
|
||||
|
||||
| Crate | Purpose | Why We Chose It |
|
||||
|-------|---------|-----------------|
|
||||
| [`quick-xml`](https://github.com/tafia/quick-xml) | XML parsing | Zero-copy, fastest XML parser in Rust |
|
||||
| [`ahash`](https://github.com/tkaitchuck/aHash) | HashMap hashing | 2x faster than default hasher |
|
||||
| [`compact_str`](https://github.com/ParkMyCar/compact_str) | String storage | Small string optimization |
|
||||
| [`rayon`](https://github.com/rayon-rs/rayon) | Parallelization | Work-stealing for automatic load balancing |
|
||||
| [`mimalloc`](https://github.com/microsoft/mimalloc) | Memory allocator | Microsoft's high-performance allocator |
|
||||
| [`criterion`](https://github.com/bheisler/criterion.rs) | Benchmarking | Statistical benchmarking with graphs |
|
||||
|
||||
### Alternative XBRL Parsers
|
||||
- [Arelle](https://arelle.org/) - Complete XBRL processor with validation, formulas, and rendering (Python)
|
||||
- [python-xbrl](https://github.com/manusimidt/py-xbrl) - Lightweight Python parser
|
||||
- [xbrl-parser](https://www.npmjs.com/package/xbrl-parser) - JavaScript/Node.js
|
||||
- [XBRL4j](https://github.com/br-data/xbrl-parser) - Java implementation
|
||||
|
||||
## License ⚖️
|
||||
|
||||
This open-source project is licensed under the GNU Affero General Public License v3.0 (AGPL-3.0). This means:
|
||||
|
||||
- You can use, modify, and distribute this software
|
||||
- If you modify and distribute it, you must release your changes under AGPL-3.0
|
||||
- If you run a modified version on a server, you must provide the source code to users
|
||||
- See the [LICENSE](LICENSE) file for full details
|
||||
|
||||
For commercial licensing options or other licensing inquiries, please contact stefano@amorelli.tech.
|
||||
|
||||
© 2025 Stefano Amorelli – Released under the GNU Affero General Public License v3.0. Enjoy! 🎉
|
||||
37
rust/crabrl-fork/benches/parser.rs
Normal file
37
rust/crabrl-fork/benches/parser.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use crabrl::Parser;
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use std::path::Path;
|
||||
|
||||
fn parse_sample_sec_file(c: &mut Criterion) {
|
||||
let parser = Parser::new();
|
||||
let sample_file = Path::new("fixtures/sample-sec.xml");
|
||||
|
||||
if sample_file.exists() {
|
||||
c.bench_function("parse_sample_sec", |b| {
|
||||
b.iter(|| parser.parse_file(black_box(&sample_file)));
|
||||
});
|
||||
} else {
|
||||
// If no fixtures exist, use a minimal inline XBRL for benchmarking
|
||||
let minimal_xbrl = r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xbrl xmlns="http://www.xbrl.org/2003/instance">
|
||||
<context id="ctx1">
|
||||
<entity>
|
||||
<identifier scheme="http://www.sec.gov/CIK">0000000000</identifier>
|
||||
</entity>
|
||||
<period>
|
||||
<instant>2023-12-31</instant>
|
||||
</period>
|
||||
</context>
|
||||
<unit id="usd">
|
||||
<measure>iso4217:USD</measure>
|
||||
</unit>
|
||||
</xbrl>"#;
|
||||
|
||||
c.bench_function("parse_minimal", |b| {
|
||||
b.iter(|| parser.parse_str(black_box(minimal_xbrl)));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, parse_sample_sec_file);
|
||||
criterion_main!(benches);
|
||||
71
rust/crabrl-fork/benchmarks/compare.py
Normal file
71
rust/crabrl-fork/benchmarks/compare.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compare crabrl performance with Arelle
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def run_crabrl(filepath):
|
||||
"""Run crabrl and measure time"""
|
||||
cmd = ["../target/release/crabrl", "parse", filepath]
|
||||
start = time.perf_counter()
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
elapsed = (time.perf_counter() - start) * 1000
|
||||
|
||||
if result.returncode == 0:
|
||||
# Parse output for fact count
|
||||
facts = 0
|
||||
for line in result.stdout.split('\n'):
|
||||
if 'Facts:' in line:
|
||||
facts = int(line.split(':')[1].strip())
|
||||
break
|
||||
return elapsed, facts
|
||||
return None, 0
|
||||
|
||||
def run_arelle(filepath):
|
||||
"""Run Arelle and measure time"""
|
||||
try:
|
||||
cmd = ["python3", "-m", "arelle.CntlrCmdLine",
|
||||
"--file", filepath, "--skipDTS", "--logLevel", "ERROR"]
|
||||
start = time.perf_counter()
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
elapsed = (time.perf_counter() - start) * 1000
|
||||
|
||||
if result.returncode == 0:
|
||||
return elapsed
|
||||
return None
|
||||
except:
|
||||
return None
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: compare.py <xbrl-file>")
|
||||
sys.exit(1)
|
||||
|
||||
filepath = sys.argv[1]
|
||||
print(f"Comparing performance on: {filepath}\n")
|
||||
|
||||
# Run crabrl
|
||||
crabrl_time, facts = run_crabrl(filepath)
|
||||
if crabrl_time:
|
||||
print(f"crabrl: {crabrl_time:.1f}ms ({facts} facts)")
|
||||
else:
|
||||
print("crabrl: Failed")
|
||||
|
||||
# Run Arelle
|
||||
arelle_time = run_arelle(filepath)
|
||||
if arelle_time:
|
||||
print(f"Arelle: {arelle_time:.1f}ms")
|
||||
else:
|
||||
print("Arelle: Failed or not installed")
|
||||
|
||||
# Calculate speedup
|
||||
if crabrl_time and arelle_time:
|
||||
speedup = arelle_time / crabrl_time
|
||||
print(f"\nSpeedup: {speedup:.1f}x faster")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
214
rust/crabrl-fork/benchmarks/compare_performance.py
Normal file
214
rust/crabrl-fork/benchmarks/compare_performance.py
Normal file
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Compare performance between crabrl and Arelle."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import subprocess
|
||||
import json
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
from tabulate import tabulate
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
def benchmark_arelle(file_path, runs=3):
|
||||
"""Benchmark Arelle parsing performance."""
|
||||
times = []
|
||||
|
||||
for _ in range(runs):
|
||||
start = time.perf_counter()
|
||||
|
||||
# Run Arelle in subprocess to isolate memory
|
||||
result = subprocess.run([
|
||||
sys.executable, "-c",
|
||||
f"""
|
||||
import sys
|
||||
sys.path.insert(0, 'venv/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages')
|
||||
from arelle import Cntlr
|
||||
from arelle import ModelManager
|
||||
|
||||
# Suppress Arelle output
|
||||
import logging
|
||||
logging.getLogger("arelle").setLevel(logging.ERROR)
|
||||
|
||||
controller = Cntlr.Cntlr(logFileName=None)
|
||||
controller.webCache.workOffline = True
|
||||
modelManager = ModelManager.initialize(controller)
|
||||
|
||||
# Load and parse the XBRL file
|
||||
modelXbrl = modelManager.load('{file_path}')
|
||||
if modelXbrl:
|
||||
facts = len(modelXbrl.facts)
|
||||
contexts = len(modelXbrl.contexts)
|
||||
units = len(modelXbrl.units)
|
||||
print(f"{{facts}},{{contexts}},{{units}}")
|
||||
modelXbrl.close()
|
||||
"""
|
||||
], capture_output=True, text=True, cwd=Path(__file__).parent)
|
||||
|
||||
end = time.perf_counter()
|
||||
|
||||
if result.returncode == 0 and result.stdout:
|
||||
times.append(end - start)
|
||||
if len(times) == 1: # Print counts on first run
|
||||
parts = result.stdout.strip().split(',')
|
||||
if len(parts) == 3:
|
||||
print(f" Arelle found: {parts[0]} facts, {parts[1]} contexts, {parts[2]} units")
|
||||
else:
|
||||
print(f" Arelle error: {result.stderr}")
|
||||
|
||||
if times:
|
||||
return {
|
||||
'mean': statistics.mean(times),
|
||||
'median': statistics.median(times),
|
||||
'stdev': statistics.stdev(times) if len(times) > 1 else 0,
|
||||
'min': min(times),
|
||||
'max': max(times),
|
||||
'runs': len(times)
|
||||
}
|
||||
return None
|
||||
|
||||
def benchmark_crabrl(file_path, runs=3):
|
||||
"""Benchmark crabrl parsing performance."""
|
||||
times = []
|
||||
|
||||
# Build the benchmark binary if needed
|
||||
subprocess.run(["cargo", "build", "--release", "--example", "benchmark_single"],
|
||||
capture_output=True, cwd=Path(__file__).parent.parent)
|
||||
|
||||
for _ in range(runs):
|
||||
start = time.perf_counter()
|
||||
|
||||
result = subprocess.run([
|
||||
"../target/release/examples/benchmark_single",
|
||||
file_path
|
||||
], capture_output=True, text=True, cwd=Path(__file__).parent)
|
||||
|
||||
end = time.perf_counter()
|
||||
|
||||
if result.returncode == 0:
|
||||
times.append(end - start)
|
||||
if len(times) == 1 and result.stdout: # Print counts on first run
|
||||
print(f" crabrl output: {result.stdout.strip()}")
|
||||
else:
|
||||
print(f" crabrl error: {result.stderr}")
|
||||
|
||||
if times:
|
||||
return {
|
||||
'mean': statistics.mean(times),
|
||||
'median': statistics.median(times),
|
||||
'stdev': statistics.stdev(times) if len(times) > 1 else 0,
|
||||
'min': min(times),
|
||||
'max': max(times),
|
||||
'runs': len(times)
|
||||
}
|
||||
return None
|
||||
|
||||
def main():
|
||||
"""Run comparative benchmarks."""
|
||||
print("=" * 80)
|
||||
print("XBRL Parser Performance Comparison: crabrl vs Arelle")
|
||||
print("=" * 80)
|
||||
|
||||
test_files = [
|
||||
("Tiny (10 facts)", "../test_data/test_tiny.xbrl"),
|
||||
("Small (100 facts)", "../test_data/test_small.xbrl"),
|
||||
("Medium (1K facts)", "../test_data/test_medium.xbrl"),
|
||||
("Large (10K facts)", "../test_data/test_large.xbrl"),
|
||||
("Huge (100K facts)", "../test_data/test_huge.xbrl"),
|
||||
]
|
||||
|
||||
results = []
|
||||
|
||||
for name, file_path in test_files:
|
||||
if not Path(file_path).exists():
|
||||
print(f"Skipping {name}: file not found")
|
||||
continue
|
||||
|
||||
file_size_mb = Path(file_path).stat().st_size / (1024 * 1024)
|
||||
print(f"\nBenchmarking {name} ({file_size_mb:.2f} MB)...")
|
||||
|
||||
# Benchmark Arelle
|
||||
print(" Running Arelle...")
|
||||
arelle_stats = benchmark_arelle(file_path, runs=5)
|
||||
|
||||
# Benchmark crabrl
|
||||
print(" Running crabrl...")
|
||||
crabrl_stats = benchmark_crabrl(file_path, runs=5)
|
||||
|
||||
if arelle_stats and crabrl_stats:
|
||||
speedup = arelle_stats['median'] / crabrl_stats['median']
|
||||
results.append({
|
||||
'File': name,
|
||||
'Size (MB)': f"{file_size_mb:.2f}",
|
||||
'Arelle (ms)': f"{arelle_stats['median']*1000:.1f}",
|
||||
'crabrl (ms)': f"{crabrl_stats['median']*1000:.1f}",
|
||||
'Speedup': f"{speedup:.1f}x",
|
||||
'arelle_raw': arelle_stats['median'],
|
||||
'crabrl_raw': crabrl_stats['median'],
|
||||
})
|
||||
|
||||
# Print results table
|
||||
print("\n" + "=" * 80)
|
||||
print("RESULTS SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
if results:
|
||||
table_data = [{k: v for k, v in r.items() if not k.endswith('_raw')} for r in results]
|
||||
print(tabulate(table_data, headers="keys", tablefmt="grid"))
|
||||
|
||||
# Calculate average speedup
|
||||
speedups = [r['arelle_raw'] / r['crabrl_raw'] for r in results]
|
||||
avg_speedup = statistics.mean(speedups)
|
||||
print(f"\nAverage speedup: {avg_speedup:.1f}x faster than Arelle")
|
||||
|
||||
# Create performance chart
|
||||
create_performance_chart(results)
|
||||
else:
|
||||
print("No results to display")
|
||||
|
||||
def create_performance_chart(results):
|
||||
"""Create a performance comparison chart."""
|
||||
labels = [r['File'].split('(')[0].strip() for r in results]
|
||||
arelle_times = [r['arelle_raw'] * 1000 for r in results]
|
||||
crabrl_times = [r['crabrl_raw'] * 1000 for r in results]
|
||||
|
||||
x = range(len(labels))
|
||||
width = 0.35
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
|
||||
|
||||
# Bar chart
|
||||
ax1.bar([i - width/2 for i in x], arelle_times, width, label='Arelle', color='#FF6B6B')
|
||||
ax1.bar([i + width/2 for i in x], crabrl_times, width, label='crabrl', color='#4ECDC4')
|
||||
ax1.set_xlabel('File Size')
|
||||
ax1.set_ylabel('Time (ms)')
|
||||
ax1.set_title('Parsing Time Comparison')
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(labels, rotation=45)
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Speedup chart
|
||||
speedups = [a/c for a, c in zip(arelle_times, crabrl_times)]
|
||||
ax2.bar(x, speedups, color='#95E77E')
|
||||
ax2.set_xlabel('File Size')
|
||||
ax2.set_ylabel('Speedup Factor')
|
||||
ax2.set_title('crabrl Speedup over Arelle')
|
||||
ax2.set_xticks(x)
|
||||
ax2.set_xticklabels(labels, rotation=45)
|
||||
ax2.grid(True, alpha=0.3)
|
||||
|
||||
# Add value labels on bars
|
||||
for i, v in enumerate(speedups):
|
||||
ax2.text(i, v + 0.5, f'{v:.1f}x', ha='center', va='bottom')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('benchmark_results.png', dpi=150)
|
||||
print(f"\nPerformance chart saved to: benchmarks/benchmark_results.png")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
36
rust/crabrl-fork/examples/benchmark_single.rs
Normal file
36
rust/crabrl-fork/examples/benchmark_single.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
//! Single file benchmark
|
||||
|
||||
use crabrl::Parser;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::time::Instant;
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() != 2 {
|
||||
eprintln!("Usage: {} <xbrl-file>", args[0]);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let content = fs::read(&args[1]).expect("Failed to read file");
|
||||
|
||||
let parser = Parser::new();
|
||||
let start = Instant::now();
|
||||
|
||||
match parser.parse_bytes(&content) {
|
||||
Ok(document) => {
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"Parsed in {:.3}ms: {} facts, {} contexts, {} units",
|
||||
elapsed.as_secs_f64() * 1000.0,
|
||||
document.facts.len(),
|
||||
document.contexts.len(),
|
||||
document.units.len()
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Parse error: {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
22
rust/crabrl-fork/examples/parse.rs
Normal file
22
rust/crabrl-fork/examples/parse.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
//! Parse and display XBRL file info
|
||||
|
||||
use crabrl::Parser;
|
||||
use std::env;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() != 2 {
|
||||
eprintln!("Usage: {} <xbrl-file>", args[0]);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let parser = Parser::new();
|
||||
let doc = parser.parse_file(&args[1])?;
|
||||
|
||||
println!("Parsed {}:", args[1]);
|
||||
println!(" Facts: {}", doc.facts.len());
|
||||
println!(" Contexts: {}", doc.contexts.len());
|
||||
println!(" Units: {}", doc.units.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
29
rust/crabrl-fork/examples/validate.rs
Normal file
29
rust/crabrl-fork/examples/validate.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
//! Validation example
|
||||
|
||||
use crabrl::{Parser, Validator};
|
||||
use std::env;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() != 2 {
|
||||
eprintln!("Usage: {} <xbrl-file>", args[0]);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
// Parse
|
||||
let parser = Parser::new();
|
||||
let doc = parser.parse_file(&args[1])?;
|
||||
|
||||
// Validate
|
||||
let validator = Validator::new();
|
||||
match validator.validate(&doc) {
|
||||
Ok(_) => {
|
||||
println!("✓ Document is valid");
|
||||
}
|
||||
Err(e) => {
|
||||
println!("✗ Validation failed: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
151
rust/crabrl-fork/scripts/download_fixtures.py
Normal file
151
rust/crabrl-fork/scripts/download_fixtures.py
Normal file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download real SEC XBRL filings from various companies to use as test fixtures.
|
||||
These will be used for benchmarking and testing the parser.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
# Create fixtures directory
|
||||
fixtures_dir = Path("fixtures")
|
||||
fixtures_dir.mkdir(exist_ok=True)
|
||||
|
||||
# List of real SEC XBRL filings from various companies
|
||||
# Format: (company_name, ticker, description, url)
|
||||
filings = [
|
||||
# Apple filings
|
||||
("apple", "AAPL", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_htm.xml"),
|
||||
("apple", "AAPL", "10-K 2023 Labels",
|
||||
"https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_lab.xml"),
|
||||
("apple", "AAPL", "10-K 2023 Calculation",
|
||||
"https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_cal.xml"),
|
||||
|
||||
# Microsoft filings
|
||||
("microsoft", "MSFT", "10-Q 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_htm.xml"),
|
||||
("microsoft", "MSFT", "10-Q 2023 Labels",
|
||||
"https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_lab.xml"),
|
||||
("microsoft", "MSFT", "10-Q 2023 Presentation",
|
||||
"https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_pre.xml"),
|
||||
|
||||
# Tesla filings
|
||||
("tesla", "TSLA", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231_htm.xml"),
|
||||
("tesla", "TSLA", "10-K 2023 Definition",
|
||||
"https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231_def.xml"),
|
||||
|
||||
# Amazon filings
|
||||
("amazon", "AMZN", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/1018724/000101872424000006/amzn-20231231_htm.xml"),
|
||||
("amazon", "AMZN", "10-K 2023 Labels",
|
||||
"https://www.sec.gov/Archives/edgar/data/1018724/000101872424000006/amzn-20231231_lab.xml"),
|
||||
|
||||
# Google/Alphabet filings
|
||||
("alphabet", "GOOGL", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/1652044/000165204424000022/goog-20231231_htm.xml"),
|
||||
("alphabet", "GOOGL", "10-K 2023 Calculation",
|
||||
"https://www.sec.gov/Archives/edgar/data/1652044/000165204424000022/goog-20231231_cal.xml"),
|
||||
|
||||
# JPMorgan Chase filings
|
||||
("jpmorgan", "JPM", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/19617/000001961724000198/jpm-20231231_htm.xml"),
|
||||
("jpmorgan", "JPM", "10-K 2023 Labels",
|
||||
"https://www.sec.gov/Archives/edgar/data/19617/000001961724000198/jpm-20231231_lab.xml"),
|
||||
|
||||
# Walmart filings
|
||||
("walmart", "WMT", "10-K 2024 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/104169/000010416924000012/wmt-20240131_htm.xml"),
|
||||
("walmart", "WMT", "10-K 2024 Presentation",
|
||||
"https://www.sec.gov/Archives/edgar/data/104169/000010416924000012/wmt-20240131_pre.xml"),
|
||||
|
||||
# Johnson & Johnson filings
|
||||
("jnj", "JNJ", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/200406/000020040624000016/jnj-20231231_htm.xml"),
|
||||
|
||||
# ExxonMobil filings
|
||||
("exxon", "XOM", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/34088/000003408824000013/xom-20231231_htm.xml"),
|
||||
|
||||
# Berkshire Hathaway filings
|
||||
("berkshire", "BRK", "10-K 2023 Instance",
|
||||
"https://www.sec.gov/Archives/edgar/data/1067983/000095017024021825/brka-20231231_htm.xml"),
|
||||
]
|
||||
|
||||
def download_file(url, filepath):
|
||||
"""Download a file from URL to filepath."""
|
||||
try:
|
||||
# Add headers to avoid being blocked
|
||||
request = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
'User-Agent': 'crabrl-test-fixtures/1.0 (testing@example.com)'
|
||||
}
|
||||
)
|
||||
|
||||
with urllib.request.urlopen(request) as response:
|
||||
content = response.read()
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(content)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
print("Downloading SEC XBRL fixtures from various companies...")
|
||||
print("=" * 60)
|
||||
|
||||
downloaded = 0
|
||||
failed = 0
|
||||
|
||||
for company, ticker, description, url in filings:
|
||||
# Create company directory
|
||||
company_dir = fixtures_dir / company
|
||||
company_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Generate filename from URL
|
||||
filename = url.split('/')[-1]
|
||||
filepath = company_dir / filename
|
||||
|
||||
print(f"\n[{ticker}] {description}")
|
||||
print(f" URL: {url}")
|
||||
print(f" Saving to: {filepath}")
|
||||
|
||||
if filepath.exists():
|
||||
print(" ✓ Already exists, skipping")
|
||||
continue
|
||||
|
||||
if download_file(url, filepath):
|
||||
file_size = os.path.getsize(filepath)
|
||||
print(f" ✓ Downloaded ({file_size:,} bytes)")
|
||||
downloaded += 1
|
||||
else:
|
||||
print(f" ✗ Failed to download")
|
||||
failed += 1
|
||||
|
||||
# Be polite to SEC servers
|
||||
time.sleep(0.5)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Download complete: {downloaded} downloaded, {failed} failed")
|
||||
print(f"Fixtures saved to: {fixtures_dir.absolute()}")
|
||||
|
||||
# Show directory structure
|
||||
print("\nFixture structure:")
|
||||
for company_dir in sorted(fixtures_dir.iterdir()):
|
||||
if company_dir.is_dir():
|
||||
files = list(company_dir.glob("*.xml"))
|
||||
if files:
|
||||
print(f" {company_dir.name}/")
|
||||
for f in sorted(files)[:3]: # Show first 3 files
|
||||
size = os.path.getsize(f)
|
||||
print(f" - {f.name} ({size:,} bytes)")
|
||||
if len(files) > 3:
|
||||
print(f" ... and {len(files)-3} more files")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
260
rust/crabrl-fork/scripts/generate_benchmark_charts.py
Normal file
260
rust/crabrl-fork/scripts/generate_benchmark_charts.py
Normal file
@@ -0,0 +1,260 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate benchmark charts for crabrl README"""
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as mpatches
|
||||
import numpy as np
|
||||
from matplotlib.patches import FancyBboxPatch
|
||||
import seaborn as sns
|
||||
|
||||
# Set style
|
||||
plt.style.use('seaborn-v0_8-darkgrid')
|
||||
sns.set_palette("husl")
|
||||
|
||||
# Performance data (based on claims and benchmarks)
|
||||
parsers = ['crabrl', 'Traditional\nXBRL Parser', 'Arelle', 'Other\nParsers']
|
||||
parse_times = [7.2, 360, 1080, 720] # microseconds for sample file
|
||||
throughput = [140000, 2800, 930, 1400] # facts per second
|
||||
|
||||
# Speed improvement factors
|
||||
speed_factors = [1, 50, 150, 100]
|
||||
|
||||
# Create figure with subplots
|
||||
fig = plt.figure(figsize=(16, 10))
|
||||
fig.suptitle('crabrl Performance Benchmarks', fontsize=24, fontweight='bold', y=0.98)
|
||||
|
||||
# Color scheme
|
||||
colors = ['#2ecc71', '#e74c3c', '#f39c12', '#95a5a6']
|
||||
highlight_color = '#27ae60'
|
||||
|
||||
# 1. Parse Time Comparison (Bar Chart)
|
||||
ax1 = plt.subplot(2, 3, 1)
|
||||
bars1 = ax1.bar(parsers, parse_times, color=colors, edgecolor='black', linewidth=2)
|
||||
bars1[0].set_color(highlight_color)
|
||||
bars1[0].set_edgecolor('#229954')
|
||||
bars1[0].set_linewidth(3)
|
||||
|
||||
ax1.set_ylabel('Parse Time (μs)', fontsize=12, fontweight='bold')
|
||||
ax1.set_title('Parse Time Comparison\n(Lower is Better)', fontsize=14, fontweight='bold')
|
||||
ax1.set_ylim(0, max(parse_times) * 1.2)
|
||||
|
||||
# Add value labels on bars
|
||||
for bar, value in zip(bars1, parse_times):
|
||||
height = bar.get_height()
|
||||
ax1.text(bar.get_x() + bar.get_width()/2., height + max(parse_times) * 0.02,
|
||||
f'{value:.1f}μs', ha='center', va='bottom', fontweight='bold', fontsize=10)
|
||||
|
||||
# 2. Throughput Comparison (Bar Chart)
|
||||
ax2 = plt.subplot(2, 3, 2)
|
||||
bars2 = ax2.bar(parsers, np.array(throughput)/1000, color=colors, edgecolor='black', linewidth=2)
|
||||
bars2[0].set_color(highlight_color)
|
||||
bars2[0].set_edgecolor('#229954')
|
||||
bars2[0].set_linewidth(3)
|
||||
|
||||
ax2.set_ylabel('Throughput (K facts/sec)', fontsize=12, fontweight='bold')
|
||||
ax2.set_title('Throughput Comparison\n(Higher is Better)', fontsize=14, fontweight='bold')
|
||||
ax2.set_ylim(0, max(throughput)/1000 * 1.2)
|
||||
|
||||
# Add value labels
|
||||
for bar, value in zip(bars2, np.array(throughput)/1000):
|
||||
height = bar.get_height()
|
||||
ax2.text(bar.get_x() + bar.get_width()/2., height + max(throughput)/1000 * 0.02,
|
||||
f'{value:.1f}K', ha='center', va='bottom', fontweight='bold', fontsize=10)
|
||||
|
||||
# 3. Speed Improvement Factor
|
||||
ax3 = plt.subplot(2, 3, 3)
|
||||
x_pos = np.arange(len(parsers))
|
||||
bars3 = ax3.barh(x_pos, speed_factors, color=colors, edgecolor='black', linewidth=2)
|
||||
bars3[0].set_color(highlight_color)
|
||||
bars3[0].set_edgecolor('#229954')
|
||||
bars3[0].set_linewidth(3)
|
||||
|
||||
ax3.set_yticks(x_pos)
|
||||
ax3.set_yticklabels(parsers)
|
||||
ax3.set_xlabel('Speed Factor (vs Traditional)', fontsize=12, fontweight='bold')
|
||||
ax3.set_title('Relative Speed\n(crabrl as baseline)', fontsize=14, fontweight='bold')
|
||||
ax3.set_xlim(0, max(speed_factors) * 1.2)
|
||||
|
||||
# Add value labels
|
||||
for i, (bar, value) in enumerate(zip(bars3, speed_factors)):
|
||||
width = bar.get_width()
|
||||
label = f'{value}x' if i == 0 else f'1/{value}x slower'
|
||||
ax3.text(width + max(speed_factors) * 0.02, bar.get_y() + bar.get_height()/2.,
|
||||
label, ha='left', va='center', fontweight='bold', fontsize=10)
|
||||
|
||||
# 4. Memory Usage Comparison (Simulated)
|
||||
ax4 = plt.subplot(2, 3, 4)
|
||||
memory_usage = [50, 850, 1200, 650] # MB for 100k facts
|
||||
bars4 = ax4.bar(parsers, memory_usage, color=colors, edgecolor='black', linewidth=2)
|
||||
bars4[0].set_color(highlight_color)
|
||||
bars4[0].set_edgecolor('#229954')
|
||||
bars4[0].set_linewidth(3)
|
||||
|
||||
ax4.set_ylabel('Memory Usage (MB)', fontsize=12, fontweight='bold')
|
||||
ax4.set_title('Memory Efficiency\n(100K facts, Lower is Better)', fontsize=14, fontweight='bold')
|
||||
ax4.set_ylim(0, max(memory_usage) * 1.2)
|
||||
|
||||
# Add value labels
|
||||
for bar, value in zip(bars4, memory_usage):
|
||||
height = bar.get_height()
|
||||
ax4.text(bar.get_x() + bar.get_width()/2., height + max(memory_usage) * 0.02,
|
||||
f'{value}MB', ha='center', va='bottom', fontweight='bold', fontsize=10)
|
||||
|
||||
# 5. Scalability Chart (Line Plot)
|
||||
ax5 = plt.subplot(2, 3, 5)
|
||||
file_sizes = np.array([1, 10, 50, 100, 500, 1000]) # MB
|
||||
crabrl_times = file_sizes * 0.1 # Linear scaling
|
||||
traditional_times = file_sizes * 5 # Much slower
|
||||
arelle_times = file_sizes * 15 # Even slower
|
||||
|
||||
ax5.plot(file_sizes, crabrl_times, 'o-', color=highlight_color, linewidth=3,
|
||||
markersize=8, label='crabrl', markeredgecolor='#229954', markeredgewidth=2)
|
||||
ax5.plot(file_sizes, traditional_times, 's-', color=colors[1], linewidth=2,
|
||||
markersize=6, label='Traditional', alpha=0.7)
|
||||
ax5.plot(file_sizes, arelle_times, '^-', color=colors[2], linewidth=2,
|
||||
markersize=6, label='Arelle', alpha=0.7)
|
||||
|
||||
ax5.set_xlabel('File Size (MB)', fontsize=12, fontweight='bold')
|
||||
ax5.set_ylabel('Parse Time (seconds)', fontsize=12, fontweight='bold')
|
||||
ax5.set_title('Scalability Performance\n(Linear vs Exponential)', fontsize=14, fontweight='bold')
|
||||
ax5.legend(loc='upper left', fontsize=10, framealpha=0.9)
|
||||
ax5.grid(True, alpha=0.3)
|
||||
ax5.set_xlim(0, 1100)
|
||||
|
||||
# 6. Feature Comparison Matrix
|
||||
ax6 = plt.subplot(2, 3, 6)
|
||||
ax6.axis('off')
|
||||
|
||||
features = ['Speed', 'Memory', 'SEC EDGAR', 'Parallel', 'Streaming']
|
||||
feature_scores = {
|
||||
'crabrl': [5, 5, 5, 5, 4],
|
||||
'Traditional': [1, 2, 3, 1, 2],
|
||||
'Arelle': [1, 1, 5, 2, 2],
|
||||
'Others': [2, 3, 3, 2, 3]
|
||||
}
|
||||
|
||||
# Create feature matrix visualization
|
||||
y_pos = 0.9
|
||||
ax6.text(0.5, y_pos, 'Feature Comparison', fontsize=14, fontweight='bold',
|
||||
ha='center', transform=ax6.transAxes)
|
||||
|
||||
y_pos -= 0.1
|
||||
x_positions = [0.2, 0.35, 0.5, 0.65, 0.8]
|
||||
for i, feature in enumerate(features):
|
||||
ax6.text(x_positions[i], y_pos, feature, fontsize=10, fontweight='bold',
|
||||
ha='center', transform=ax6.transAxes)
|
||||
|
||||
parser_names = ['crabrl', 'Traditional', 'Arelle', 'Others']
|
||||
y_positions = [0.65, 0.5, 0.35, 0.2]
|
||||
|
||||
for j, (parser, scores) in enumerate(zip(parser_names,
|
||||
[feature_scores['crabrl'],
|
||||
feature_scores['Traditional'],
|
||||
feature_scores['Arelle'],
|
||||
feature_scores['Others']])):
|
||||
ax6.text(0.05, y_positions[j], parser, fontsize=10, fontweight='bold',
|
||||
ha='left', transform=ax6.transAxes)
|
||||
|
||||
for i, score in enumerate(scores):
|
||||
# Draw filled circles for score
|
||||
for k in range(5):
|
||||
circle = plt.Circle((x_positions[i] + k*0.02 - 0.04, y_positions[j]),
|
||||
0.008, transform=ax6.transAxes,
|
||||
color=highlight_color if k < score and j == 0 else
|
||||
'#34495e' if k < score else '#ecf0f1',
|
||||
edgecolor='black', linewidth=1)
|
||||
ax6.add_patch(circle)
|
||||
|
||||
# Add performance badges
|
||||
badge_y = 0.05
|
||||
badges = ['🚀 50-150x Faster', '💾 Low Memory', '⚡ Zero-Copy', '🔒 Production Ready']
|
||||
badge_x_positions = [0.125, 0.375, 0.625, 0.875]
|
||||
|
||||
for badge, x_pos in zip(badges, badge_x_positions):
|
||||
bbox = FancyBboxPatch((x_pos - 0.1, badge_y - 0.03), 0.2, 0.06,
|
||||
boxstyle="round,pad=0.01",
|
||||
facecolor=highlight_color, edgecolor='#229954',
|
||||
linewidth=2, transform=ax6.transAxes, alpha=0.9)
|
||||
ax6.add_patch(bbox)
|
||||
ax6.text(x_pos, badge_y, badge, fontsize=9, fontweight='bold',
|
||||
ha='center', va='center', transform=ax6.transAxes, color='white')
|
||||
|
||||
# Adjust layout
|
||||
plt.tight_layout()
|
||||
plt.subplots_adjust(top=0.93, hspace=0.3, wspace=0.3)
|
||||
|
||||
# Save the figure
|
||||
plt.savefig('benchmarks/benchmark_results.png', dpi=150, bbox_inches='tight',
|
||||
facecolor='white', edgecolor='none')
|
||||
print("Saved: benchmarks/benchmark_results.png")
|
||||
|
||||
# Create a simplified hero image for README header
|
||||
fig2, ax = plt.subplots(figsize=(12, 4), facecolor='white')
|
||||
ax.axis('off')
|
||||
|
||||
# Title
|
||||
ax.text(0.5, 0.85, 'crabrl', fontsize=48, fontweight='bold',
|
||||
ha='center', transform=ax.transAxes, color='#2c3e50')
|
||||
ax.text(0.5, 0.65, 'Lightning-Fast XBRL Parser', fontsize=20,
|
||||
ha='center', transform=ax.transAxes, color='#7f8c8d')
|
||||
|
||||
# Performance stats
|
||||
stats = [
|
||||
('50-150x', 'Faster than\ntraditional parsers'),
|
||||
('140K', 'Facts per\nsecond'),
|
||||
('< 50MB', 'Memory for\n100K facts'),
|
||||
('Zero-Copy', 'Parsing\narchitecture')
|
||||
]
|
||||
|
||||
x_positions = [0.125, 0.375, 0.625, 0.875]
|
||||
for (value, desc), x_pos in zip(stats, x_positions):
|
||||
# Value
|
||||
ax.text(x_pos, 0.35, value, fontsize=28, fontweight='bold',
|
||||
ha='center', transform=ax.transAxes, color=highlight_color)
|
||||
# Description
|
||||
ax.text(x_pos, 0.15, desc, fontsize=12,
|
||||
ha='center', transform=ax.transAxes, color='#7f8c8d',
|
||||
multialignment='center')
|
||||
|
||||
plt.savefig('benchmarks/hero_banner.png', dpi=150, bbox_inches='tight',
|
||||
facecolor='white', edgecolor='none')
|
||||
print("Saved: benchmarks/hero_banner.png")
|
||||
|
||||
# Create a speed comparison bar
|
||||
fig3, ax = plt.subplots(figsize=(10, 3), facecolor='white')
|
||||
|
||||
# Speed comparison visualization
|
||||
speeds = [150, 100, 50, 1]
|
||||
labels = ['crabrl\n150x faster', 'crabrl\n100x faster', 'crabrl\n50x faster', 'Baseline']
|
||||
colors_speed = [highlight_color, '#3498db', '#9b59b6', '#95a5a6']
|
||||
|
||||
y_pos = np.arange(len(labels))
|
||||
bars = ax.barh(y_pos, speeds, color=colors_speed, edgecolor='black', linewidth=2)
|
||||
|
||||
ax.set_yticks(y_pos)
|
||||
ax.set_yticklabels(labels, fontsize=11, fontweight='bold')
|
||||
ax.set_xlabel('Relative Performance', fontsize=12, fontweight='bold')
|
||||
ax.set_title('crabrl Speed Advantage', fontsize=16, fontweight='bold', pad=20)
|
||||
|
||||
# Add speed labels
|
||||
for bar, speed in zip(bars, speeds):
|
||||
width = bar.get_width()
|
||||
label = f'{speed}x' if speed > 1 else 'Traditional\nParsers'
|
||||
ax.text(width + 3, bar.get_y() + bar.get_height()/2.,
|
||||
label, ha='left', va='center', fontweight='bold', fontsize=11)
|
||||
|
||||
ax.set_xlim(0, 180)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.grid(axis='x', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('benchmarks/speed_comparison.png', dpi=150, bbox_inches='tight',
|
||||
facecolor='white', edgecolor='none')
|
||||
print("Saved: benchmarks/speed_comparison.png")
|
||||
|
||||
print("\n✅ All benchmark images generated successfully!")
|
||||
print("\nYou can now add these to your README:")
|
||||
print(" - benchmarks/hero_banner.png (header image)")
|
||||
print(" - benchmarks/benchmark_results.png (detailed performance)")
|
||||
print(" - benchmarks/speed_comparison.png (speed comparison)")
|
||||
253
rust/crabrl-fork/scripts/generate_clean_benchmarks.py
Normal file
253
rust/crabrl-fork/scripts/generate_clean_benchmarks.py
Normal file
@@ -0,0 +1,253 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate clean benchmark charts for crabrl README"""
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from matplotlib.patches import Rectangle, FancyBboxPatch
|
||||
import matplotlib.patches as mpatches
|
||||
|
||||
# Set a professional style
|
||||
plt.rcParams['font.family'] = 'sans-serif'
|
||||
plt.rcParams['font.sans-serif'] = ['DejaVu Sans', 'Arial', 'Helvetica']
|
||||
plt.rcParams['axes.linewidth'] = 1.5
|
||||
plt.rcParams['axes.edgecolor'] = '#333333'
|
||||
|
||||
# Color palette (professional and accessible)
|
||||
PRIMARY_COLOR = '#00A86B' # Jade green
|
||||
SECONDARY_COLOR = '#FF6B6B' # Coral red
|
||||
TERTIARY_COLOR = '#4ECDC4' # Teal
|
||||
QUATERNARY_COLOR = '#95E1D3' # Mint
|
||||
GRAY_COLOR = '#95A5A6'
|
||||
DARK_COLOR = '#2C3E50'
|
||||
LIGHT_GRAY = '#ECF0F1'
|
||||
|
||||
# Performance data
|
||||
performance_data = {
|
||||
'crabrl': {
|
||||
'parse_time': 7.2, # microseconds
|
||||
'throughput': 140000, # facts/sec
|
||||
'memory': 50, # MB for 100k facts
|
||||
'speed_factor': 100, # average speedup
|
||||
'color': PRIMARY_COLOR
|
||||
},
|
||||
'Traditional': {
|
||||
'parse_time': 720,
|
||||
'throughput': 1400,
|
||||
'memory': 850,
|
||||
'speed_factor': 1,
|
||||
'color': SECONDARY_COLOR
|
||||
},
|
||||
'Arelle': {
|
||||
'parse_time': 1080,
|
||||
'throughput': 930,
|
||||
'memory': 1200,
|
||||
'speed_factor': 0.67,
|
||||
'color': TERTIARY_COLOR
|
||||
}
|
||||
}
|
||||
|
||||
# Create main comparison chart
|
||||
fig = plt.figure(figsize=(14, 8), facecolor='white')
|
||||
fig.suptitle('crabrl Performance Benchmarks', fontsize=22, fontweight='bold', color=DARK_COLOR)
|
||||
|
||||
# 1. Parse Speed Comparison
|
||||
ax1 = plt.subplot(2, 3, 1)
|
||||
parsers = list(performance_data.keys())
|
||||
parse_times = [performance_data[p]['parse_time'] for p in parsers]
|
||||
colors = [performance_data[p]['color'] for p in parsers]
|
||||
|
||||
bars = ax1.bar(parsers, parse_times, color=colors, edgecolor=DARK_COLOR, linewidth=2)
|
||||
ax1.set_ylabel('Parse Time (μs)', fontsize=11, fontweight='bold', color=DARK_COLOR)
|
||||
ax1.set_title('Parse Time\n(Lower is Better)', fontsize=12, fontweight='bold', color=DARK_COLOR)
|
||||
ax1.set_yscale('log') # Log scale for better visualization
|
||||
ax1.grid(axis='y', alpha=0.3, linestyle='--')
|
||||
|
||||
# Add value labels
|
||||
for bar, value in zip(bars, parse_times):
|
||||
height = bar.get_height()
|
||||
ax1.text(bar.get_x() + bar.get_width()/2., height * 1.1,
|
||||
f'{value:.1f}μs', ha='center', va='bottom', fontweight='bold', fontsize=10)
|
||||
|
||||
# 2. Throughput Comparison
|
||||
ax2 = plt.subplot(2, 3, 2)
|
||||
throughputs = [performance_data[p]['throughput'] for p in parsers]
|
||||
bars = ax2.bar(parsers, np.array(throughputs)/1000, color=colors, edgecolor=DARK_COLOR, linewidth=2)
|
||||
ax2.set_ylabel('Throughput (K facts/sec)', fontsize=11, fontweight='bold', color=DARK_COLOR)
|
||||
ax2.set_title('Processing Speed\n(Higher is Better)', fontsize=12, fontweight='bold', color=DARK_COLOR)
|
||||
ax2.grid(axis='y', alpha=0.3, linestyle='--')
|
||||
|
||||
for bar, value in zip(bars, np.array(throughputs)/1000):
|
||||
height = bar.get_height()
|
||||
ax2.text(bar.get_x() + bar.get_width()/2., height + 2,
|
||||
f'{value:.0f}K', ha='center', va='bottom', fontweight='bold', fontsize=10)
|
||||
|
||||
# 3. Memory Usage
|
||||
ax3 = plt.subplot(2, 3, 3)
|
||||
memory_usage = [performance_data[p]['memory'] for p in parsers]
|
||||
bars = ax3.bar(parsers, memory_usage, color=colors, edgecolor=DARK_COLOR, linewidth=2)
|
||||
ax3.set_ylabel('Memory (MB)', fontsize=11, fontweight='bold', color=DARK_COLOR)
|
||||
ax3.set_title('Memory Usage\n(100K facts)', fontsize=12, fontweight='bold', color=DARK_COLOR)
|
||||
ax3.grid(axis='y', alpha=0.3, linestyle='--')
|
||||
|
||||
for bar, value in zip(bars, memory_usage):
|
||||
height = bar.get_height()
|
||||
ax3.text(bar.get_x() + bar.get_width()/2., height + 20,
|
||||
f'{value}MB', ha='center', va='bottom', fontweight='bold', fontsize=10)
|
||||
|
||||
# 4. Speed Multiplier Visual
|
||||
ax4 = plt.subplot(2, 3, 4)
|
||||
ax4.axis('off')
|
||||
ax4.set_title('Speed Advantage', fontsize=12, fontweight='bold', color=DARK_COLOR, pad=20)
|
||||
|
||||
# Create speed comparison visual
|
||||
y_base = 0.5
|
||||
bar_height = 0.15
|
||||
max_width = 0.8
|
||||
|
||||
# crabrl bar (baseline)
|
||||
crabrl_rect = Rectangle((0.1, y_base), max_width, bar_height,
|
||||
facecolor=PRIMARY_COLOR, edgecolor=DARK_COLOR, linewidth=2)
|
||||
ax4.add_patch(crabrl_rect)
|
||||
ax4.text(0.1 + max_width + 0.02, y_base + bar_height/2, '100x baseline',
|
||||
va='center', fontweight='bold', fontsize=11)
|
||||
ax4.text(0.05, y_base + bar_height/2, 'crabrl', va='center', ha='right', fontweight='bold')
|
||||
|
||||
# Traditional parser bar
|
||||
trad_width = max_width / 100 # 1/100th the speed
|
||||
trad_rect = Rectangle((0.1, y_base - bar_height*1.5), trad_width, bar_height,
|
||||
facecolor=SECONDARY_COLOR, edgecolor=DARK_COLOR, linewidth=2)
|
||||
ax4.add_patch(trad_rect)
|
||||
ax4.text(0.1 + trad_width + 0.02, y_base - bar_height*1.5 + bar_height/2, '1x',
|
||||
va='center', fontweight='bold', fontsize=11)
|
||||
ax4.text(0.05, y_base - bar_height*1.5 + bar_height/2, 'Others', va='center', ha='right', fontweight='bold')
|
||||
|
||||
ax4.set_xlim(0, 1)
|
||||
ax4.set_ylim(0, 1)
|
||||
|
||||
# 5. Scalability Chart
|
||||
ax5 = plt.subplot(2, 3, 5)
|
||||
file_sizes = np.array([1, 10, 50, 100, 500, 1000]) # MB
|
||||
crabrl_times = file_sizes * 0.01 # Linear scaling
|
||||
traditional_times = file_sizes * 1.0 # Much slower
|
||||
arelle_times = file_sizes * 1.5 # Even slower
|
||||
|
||||
ax5.plot(file_sizes, crabrl_times, 'o-', color=PRIMARY_COLOR, linewidth=3,
|
||||
markersize=8, label='crabrl', markeredgecolor=DARK_COLOR, markeredgewidth=1.5)
|
||||
ax5.plot(file_sizes, traditional_times, 's-', color=SECONDARY_COLOR, linewidth=2,
|
||||
markersize=6, label='Traditional', alpha=0.8)
|
||||
ax5.plot(file_sizes, arelle_times, '^-', color=TERTIARY_COLOR, linewidth=2,
|
||||
markersize=6, label='Arelle', alpha=0.8)
|
||||
|
||||
ax5.set_xlabel('File Size (MB)', fontsize=11, fontweight='bold', color=DARK_COLOR)
|
||||
ax5.set_ylabel('Parse Time (seconds)', fontsize=11, fontweight='bold', color=DARK_COLOR)
|
||||
ax5.set_title('Scalability\n(Linear vs Exponential)', fontsize=12, fontweight='bold', color=DARK_COLOR)
|
||||
ax5.legend(loc='upper left', fontsize=10, framealpha=0.95)
|
||||
ax5.grid(True, alpha=0.3, linestyle='--')
|
||||
ax5.set_xlim(0, 1100)
|
||||
|
||||
# 6. Key Features
|
||||
ax6 = plt.subplot(2, 3, 6)
|
||||
ax6.axis('off')
|
||||
ax6.set_title('Key Advantages', fontsize=12, fontweight='bold', color=DARK_COLOR, y=0.95)
|
||||
|
||||
features = [
|
||||
('50-150x Faster', 'Than traditional parsers'),
|
||||
('Zero-Copy', 'Memory efficient design'),
|
||||
('Production Ready', 'SEC EDGAR optimized'),
|
||||
('Rust Powered', 'Safe and concurrent')
|
||||
]
|
||||
|
||||
y_start = 0.75
|
||||
for i, (title, desc) in enumerate(features):
|
||||
y_pos = y_start - i * 0.2
|
||||
|
||||
# Feature box
|
||||
bbox = FancyBboxPatch((0.05, y_pos - 0.05), 0.9, 0.12,
|
||||
boxstyle="round,pad=0.02",
|
||||
facecolor=PRIMARY_COLOR if i == 0 else LIGHT_GRAY,
|
||||
edgecolor=DARK_COLOR,
|
||||
linewidth=1.5, alpha=0.3 if i > 0 else 0.2)
|
||||
ax6.add_patch(bbox)
|
||||
|
||||
# Title
|
||||
ax6.text(0.1, y_pos + 0.02, title, fontsize=11, fontweight='bold',
|
||||
color=PRIMARY_COLOR if i == 0 else DARK_COLOR)
|
||||
# Description
|
||||
ax6.text(0.1, y_pos - 0.02, desc, fontsize=9, color=GRAY_COLOR)
|
||||
|
||||
# Adjust layout
|
||||
plt.tight_layout()
|
||||
plt.subplots_adjust(top=0.92, hspace=0.4, wspace=0.3)
|
||||
|
||||
# Save
|
||||
plt.savefig('benchmarks/performance_charts.png', dpi=150, bbox_inches='tight',
|
||||
facecolor='white', edgecolor='none')
|
||||
print("Saved: benchmarks/performance_charts.png")
|
||||
|
||||
# Create simple speed comparison bar
|
||||
fig2, ax = plt.subplots(figsize=(10, 4), facecolor='white')
|
||||
|
||||
# Data
|
||||
parsers = ['crabrl', 'Parser B', 'Parser C', 'Arelle']
|
||||
speeds = [150, 3, 2, 1] # Relative to slowest
|
||||
colors = [PRIMARY_COLOR, QUATERNARY_COLOR, TERTIARY_COLOR, SECONDARY_COLOR]
|
||||
|
||||
# Create horizontal bars
|
||||
y_pos = np.arange(len(parsers))
|
||||
bars = ax.barh(y_pos, speeds, color=colors, edgecolor=DARK_COLOR, linewidth=2, height=0.6)
|
||||
|
||||
# Styling
|
||||
ax.set_yticks(y_pos)
|
||||
ax.set_yticklabels(parsers, fontsize=12, fontweight='bold')
|
||||
ax.set_xlabel('Relative Speed (Higher is Better)', fontsize=12, fontweight='bold', color=DARK_COLOR)
|
||||
ax.set_title('crabrl vs Traditional XBRL Parsers', fontsize=16, fontweight='bold', color=DARK_COLOR, pad=20)
|
||||
|
||||
# Add value labels
|
||||
for bar, speed in zip(bars, speeds):
|
||||
width = bar.get_width()
|
||||
label = f'{speed}x faster' if speed > 1 else 'Baseline'
|
||||
ax.text(width + 2, bar.get_y() + bar.get_height()/2.,
|
||||
label, ha='left', va='center', fontweight='bold', fontsize=11)
|
||||
|
||||
# Add impressive stats annotation
|
||||
ax.text(0.98, 0.02, 'Up to 150x faster on SEC EDGAR filings',
|
||||
transform=ax.transAxes, ha='right', fontsize=10,
|
||||
style='italic', color=GRAY_COLOR)
|
||||
|
||||
ax.set_xlim(0, 170)
|
||||
ax.spines['top'].set_visible(False)
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.grid(axis='x', alpha=0.3, linestyle='--')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('benchmarks/speed_comparison_clean.png', dpi=150, bbox_inches='tight',
|
||||
facecolor='white', edgecolor='none')
|
||||
print("Saved: benchmarks/speed_comparison_clean.png")
|
||||
|
||||
# Create a minimal header image
|
||||
fig3, ax = plt.subplots(figsize=(12, 3), facecolor='white')
|
||||
ax.axis('off')
|
||||
|
||||
# Background gradient effect using rectangles
|
||||
for i in range(10):
|
||||
alpha = 0.02 * (10 - i)
|
||||
rect = Rectangle((i/10, 0), 0.1, 1, transform=ax.transAxes,
|
||||
facecolor=PRIMARY_COLOR, alpha=alpha)
|
||||
ax.add_patch(rect)
|
||||
|
||||
# Title and tagline
|
||||
ax.text(0.5, 0.65, 'crabrl', fontsize=42, fontweight='bold',
|
||||
ha='center', transform=ax.transAxes, color=DARK_COLOR)
|
||||
ax.text(0.5, 0.35, 'Lightning-Fast XBRL Parser for Rust', fontsize=16,
|
||||
ha='center', transform=ax.transAxes, color=GRAY_COLOR)
|
||||
|
||||
plt.savefig('benchmarks/header.png', dpi=150, bbox_inches='tight',
|
||||
facecolor='white', edgecolor='none')
|
||||
print("Saved: benchmarks/header.png")
|
||||
|
||||
print("\n✅ Clean benchmark visualizations created successfully!")
|
||||
print("\nGenerated files:")
|
||||
print(" - benchmarks/header.png - Minimal header for README")
|
||||
print(" - benchmarks/performance_charts.png - Comprehensive performance metrics")
|
||||
print(" - benchmarks/speed_comparison_clean.png - Simple speed comparison")
|
||||
print("\nYou can now add these images to your GitHub README!")
|
||||
242
rust/crabrl-fork/src/allocator.rs
Normal file
242
rust/crabrl-fork/src/allocator.rs
Normal file
@@ -0,0 +1,242 @@
|
||||
use bumpalo::Bump;
|
||||
use compact_str::CompactString;
|
||||
use parking_lot::Mutex;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::Arc;
|
||||
|
||||
const ARENA_SIZE: usize = 64 * 1024 * 1024; // 64MB arenas
|
||||
const POOL_SIZE: usize = 1024;
|
||||
|
||||
#[repr(align(64))]
|
||||
pub struct ArenaAllocator {
|
||||
current: RefCell<Bump>,
|
||||
arenas: RefCell<Vec<Bump>>,
|
||||
string_to_id: Arc<Mutex<HashMap<CompactString, u32>>>,
|
||||
id_to_string: Arc<Mutex<Vec<CompactString>>>,
|
||||
}
|
||||
|
||||
impl ArenaAllocator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
current: RefCell::new(Bump::with_capacity(ARENA_SIZE)),
|
||||
arenas: RefCell::new(Vec::with_capacity(16)),
|
||||
string_to_id: Arc::new(Mutex::new(HashMap::new())),
|
||||
id_to_string: Arc::new(Mutex::new(Vec::new())),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn alloc<T>(&self, val: T) -> &T {
|
||||
unsafe {
|
||||
let ptr = self.current.borrow().alloc(val) as *const T;
|
||||
&*ptr
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &[T] {
|
||||
unsafe {
|
||||
let ptr = self.current.borrow().alloc_slice_copy(slice) as *const [T];
|
||||
&*ptr
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn alloc_str(&self, s: &str) -> &str {
|
||||
unsafe {
|
||||
let ptr = self.current.borrow().alloc_str(s) as *const str;
|
||||
&*ptr
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn intern_string(&self, s: &str) -> u32 {
|
||||
let key = CompactString::from(s);
|
||||
|
||||
// Check if already interned
|
||||
if let Some(&id) = self.string_to_id.lock().get(&key) {
|
||||
return id;
|
||||
}
|
||||
|
||||
// Add new interned string
|
||||
let mut id_to_string = self.id_to_string.lock();
|
||||
let mut string_to_id = self.string_to_id.lock();
|
||||
|
||||
// Double-check after acquiring both locks
|
||||
if let Some(&id) = string_to_id.get(&key) {
|
||||
return id;
|
||||
}
|
||||
|
||||
let id = id_to_string.len() as u32;
|
||||
id_to_string.push(key.clone());
|
||||
string_to_id.insert(key, id);
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get_interned(&self, id: u32) -> Option<CompactString> {
|
||||
self.id_to_string.lock().get(id as usize).cloned()
|
||||
}
|
||||
|
||||
pub fn get_all_strings(&self) -> Vec<CompactString> {
|
||||
self.id_to_string.lock().clone()
|
||||
}
|
||||
|
||||
pub fn string_count(&self) -> usize {
|
||||
self.id_to_string.lock().len()
|
||||
}
|
||||
|
||||
pub fn reset(&self) {
|
||||
let mut current = self.current.borrow_mut();
|
||||
current.reset();
|
||||
|
||||
let mut arenas = self.arenas.borrow_mut();
|
||||
for arena in arenas.iter_mut() {
|
||||
arena.reset();
|
||||
}
|
||||
|
||||
// Clear string interning
|
||||
self.string_to_id.lock().clear();
|
||||
self.id_to_string.lock().clear();
|
||||
}
|
||||
|
||||
pub fn new_arena(&self) {
|
||||
let mut arenas = self.arenas.borrow_mut();
|
||||
let old = std::mem::replace(
|
||||
&mut *self.current.borrow_mut(),
|
||||
Bump::with_capacity(ARENA_SIZE),
|
||||
);
|
||||
arenas.push(old);
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ArenaAllocator {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ObjectPool<T> {
|
||||
pool: Vec<Box<T>>,
|
||||
factory: fn() -> T,
|
||||
}
|
||||
|
||||
impl<T> ObjectPool<T> {
|
||||
pub fn new(capacity: usize, factory: fn() -> T) -> Self {
|
||||
let mut pool = Vec::with_capacity(capacity);
|
||||
for _ in 0..capacity {
|
||||
pool.push(Box::new(factory()));
|
||||
}
|
||||
Self { pool, factory }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn acquire(&mut self) -> Box<T> {
|
||||
self.pool
|
||||
.pop()
|
||||
.unwrap_or_else(|| Box::new((self.factory)()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn release(&mut self, obj: Box<T>) {
|
||||
if self.pool.len() < POOL_SIZE {
|
||||
self.pool.push(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C, align(64))]
|
||||
pub struct StackBuffer<const N: usize> {
|
||||
data: [MaybeUninit<u8>; N],
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<const N: usize> Default for StackBuffer<N> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> StackBuffer<N> {
|
||||
#[inline(always)]
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
data: unsafe { MaybeUninit::uninit().assume_init() },
|
||||
len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn push(&mut self, byte: u8) -> bool {
|
||||
if self.len < N {
|
||||
self.data[self.len] = MaybeUninit::new(byte);
|
||||
self.len += 1;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const u8, self.len) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn clear(&mut self) {
|
||||
self.len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_arena_allocator() {
|
||||
let arena = ArenaAllocator::new();
|
||||
let s1 = arena.alloc_str("hello");
|
||||
let s2 = arena.alloc_str("world");
|
||||
assert_eq!(s1, "hello");
|
||||
assert_eq!(s2, "world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_interning() {
|
||||
let arena = ArenaAllocator::new();
|
||||
let id1 = arena.intern_string("test");
|
||||
let id2 = arena.intern_string("test");
|
||||
assert_eq!(id1, id2);
|
||||
|
||||
let s = arena.get_interned(id1).unwrap();
|
||||
assert_eq!(s, "test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_interning_different() {
|
||||
let arena = ArenaAllocator::new();
|
||||
let id1 = arena.intern_string("foo");
|
||||
let id2 = arena.intern_string("bar");
|
||||
assert_ne!(id1, id2);
|
||||
|
||||
assert_eq!(arena.get_interned(id1).unwrap(), "foo");
|
||||
assert_eq!(arena.get_interned(id2).unwrap(), "bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_all_strings() {
|
||||
let arena = ArenaAllocator::new();
|
||||
arena.intern_string("a");
|
||||
arena.intern_string("b");
|
||||
arena.intern_string("c");
|
||||
|
||||
let all = arena.get_all_strings();
|
||||
assert_eq!(all.len(), 3);
|
||||
assert!(all.contains(&CompactString::from("a")));
|
||||
assert!(all.contains(&CompactString::from("b")));
|
||||
assert!(all.contains(&CompactString::from("c")));
|
||||
}
|
||||
}
|
||||
47
rust/crabrl-fork/src/cache.rs
Normal file
47
rust/crabrl-fork/src/cache.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use dashmap::DashMap;
|
||||
use std::sync::Arc;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub struct LockFreeCache<K, V> {
|
||||
map: Arc<DashMap<K, V>>,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl<K, V> LockFreeCache<K, V>
|
||||
where
|
||||
K: Eq + Hash + Clone,
|
||||
V: Clone,
|
||||
{
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
map: Arc::new(DashMap::with_capacity(capacity)),
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get(&self, key: &K) -> Option<V> {
|
||||
self.map.get(key).map(|v| v.clone())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn insert(&self, key: K, value: V) {
|
||||
if self.map.len() >= self.capacity {
|
||||
if let Some(entry) = self.map.iter().next() {
|
||||
let k = entry.key().clone();
|
||||
drop(entry);
|
||||
self.map.remove(&k);
|
||||
}
|
||||
}
|
||||
self.map.insert(key, value);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn contains(&self, key: &K) -> bool {
|
||||
self.map.contains_key(key)
|
||||
}
|
||||
|
||||
pub fn clear(&self) {
|
||||
self.map.clear();
|
||||
}
|
||||
}
|
||||
21
rust/crabrl-fork/src/instance.rs
Normal file
21
rust/crabrl-fork/src/instance.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use crate::model::Document;
|
||||
use crate::Result;
|
||||
|
||||
pub struct InstanceValidator {
|
||||
strict: bool,
|
||||
}
|
||||
|
||||
impl InstanceValidator {
|
||||
pub fn new() -> Self {
|
||||
Self { strict: false }
|
||||
}
|
||||
|
||||
pub fn with_strict(mut self, strict: bool) -> Self {
|
||||
self.strict = strict;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn validate(&self, _document: &Document) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
113
rust/crabrl-fork/src/lib.rs
Normal file
113
rust/crabrl-fork/src/lib.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
//! crabrl - High-performance XBRL parser and validator
|
||||
//!
|
||||
//! Licensed under AGPL-3.0
|
||||
|
||||
pub mod allocator;
|
||||
pub mod linkbase;
|
||||
pub mod model;
|
||||
pub mod parser;
|
||||
pub mod schema;
|
||||
pub mod simd;
|
||||
pub mod validator;
|
||||
|
||||
// Primary parser export
|
||||
pub use parser::Parser;
|
||||
|
||||
// Model types
|
||||
pub use model::{
|
||||
CalculationLink, Context, DefinitionLink, DimensionMember, Document, Entity, Fact, FactFlags,
|
||||
FactOrTuple, FactStorage, FactValue, Footnote, Link, Linkbase, Measure, Period,
|
||||
PresentationLink, Reference, ReferenceLink, Scenario, Schema, SchemaElement, SchemaImport,
|
||||
SchemaType, Segment, Tuple, TypedMember, Unit, UnitType,
|
||||
};
|
||||
|
||||
// ValidationError from validator module
|
||||
pub use validator::ValidationError;
|
||||
|
||||
// Allocator
|
||||
pub use allocator::ArenaAllocator;
|
||||
|
||||
// Linkbase processor
|
||||
pub use linkbase::LinkbaseProcessor;
|
||||
|
||||
// Validator
|
||||
pub use validator::XbrlValidator;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Parse error: {0}")]
|
||||
Parse(String),
|
||||
#[error("Validation error: {0}")]
|
||||
Validation(String),
|
||||
#[error("Not found: {0}")]
|
||||
NotFound(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
// Convenience validator wrapper
|
||||
#[derive(Default)]
|
||||
pub struct Validator {
|
||||
inner: XbrlValidator,
|
||||
}
|
||||
|
||||
impl Validator {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn sec_edgar() -> Self {
|
||||
Self {
|
||||
inner: XbrlValidator::new().strict(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_config(_config: ValidationConfig) -> Self {
|
||||
Self::new()
|
||||
}
|
||||
|
||||
pub fn validate(&self, doc: &Document) -> Result<ValidationResult> {
|
||||
let start = std::time::Instant::now();
|
||||
let mut doc_copy = doc.clone();
|
||||
let is_valid = self.inner.validate(&mut doc_copy).is_ok();
|
||||
|
||||
Ok(ValidationResult {
|
||||
is_valid,
|
||||
errors: if is_valid {
|
||||
Vec::new()
|
||||
} else {
|
||||
vec!["Validation failed".to_string()]
|
||||
},
|
||||
warnings: Vec::new(),
|
||||
stats: ValidationStats {
|
||||
facts_validated: doc.facts.len(),
|
||||
duration_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ValidationConfig {
|
||||
pub strict: bool,
|
||||
}
|
||||
|
||||
impl ValidationConfig {
|
||||
pub fn sec_edgar() -> Self {
|
||||
Self { strict: true }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ValidationResult {
|
||||
pub is_valid: bool,
|
||||
pub errors: Vec<String>,
|
||||
pub warnings: Vec<String>,
|
||||
pub stats: ValidationStats,
|
||||
}
|
||||
|
||||
pub struct ValidationStats {
|
||||
pub facts_validated: usize,
|
||||
pub duration_ms: u64,
|
||||
}
|
||||
470
rust/crabrl-fork/src/linkbase.rs
Normal file
470
rust/crabrl-fork/src/linkbase.rs
Normal file
@@ -0,0 +1,470 @@
|
||||
// Linkbase processing for XBRL
|
||||
use crate::model::*;
|
||||
use crate::validator::ValidationError;
|
||||
use crate::{Error, Result};
|
||||
use compact_str::CompactString;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct LinkbaseProcessor {
|
||||
presentation_links: HashMap<CompactString, Vec<PresentationLink>>,
|
||||
calculation_links: HashMap<CompactString, Vec<CalculationLink>>,
|
||||
definition_links: HashMap<CompactString, Vec<DefinitionLink>>,
|
||||
label_links: HashMap<CompactString, Vec<LabelLink>>,
|
||||
reference_links: HashMap<CompactString, Vec<ReferenceLink>>,
|
||||
}
|
||||
|
||||
impl Default for LinkbaseProcessor {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl LinkbaseProcessor {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
presentation_links: HashMap::new(),
|
||||
calculation_links: HashMap::new(),
|
||||
definition_links: HashMap::new(),
|
||||
label_links: HashMap::new(),
|
||||
reference_links: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_linkbase<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_linkbase(&content)
|
||||
}
|
||||
|
||||
pub fn parse_linkbase(&mut self, data: &[u8]) -> Result<()> {
|
||||
// Skip BOM if present
|
||||
let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
|
||||
&data[3..]
|
||||
} else {
|
||||
data
|
||||
};
|
||||
|
||||
let text = std::str::from_utf8(data)
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in linkbase".to_string()))?;
|
||||
|
||||
// Detect linkbase type and parse accordingly
|
||||
if text.contains("presentationLink") {
|
||||
self.parse_presentation_linkbase(text)?;
|
||||
}
|
||||
if text.contains("calculationLink") {
|
||||
self.parse_calculation_linkbase(text)?;
|
||||
}
|
||||
if text.contains("definitionLink") {
|
||||
self.parse_definition_linkbase(text)?;
|
||||
}
|
||||
if text.contains("labelLink") {
|
||||
self.parse_label_linkbase(text)?;
|
||||
}
|
||||
if text.contains("referenceLink") {
|
||||
self.parse_reference_linkbase(text)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_presentation_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse presentation arcs
|
||||
let mut pos = 0;
|
||||
while let Some(arc_start) = text[pos..].find("<link:presentationArc") {
|
||||
let arc_start = pos + arc_start;
|
||||
pos = arc_start + 1;
|
||||
|
||||
if let Some(arc_end) = text[arc_start..].find("/>") {
|
||||
let arc_text = &text[arc_start..arc_start + arc_end];
|
||||
|
||||
let mut link = PresentationLink {
|
||||
from: CompactString::new(""),
|
||||
to: CompactString::new(""),
|
||||
order: 1.0,
|
||||
priority: None,
|
||||
use_attribute: None,
|
||||
};
|
||||
|
||||
// Extract from
|
||||
if let Some(from_start) = arc_text.find("xlink:from=\"") {
|
||||
let from_start = from_start + 12;
|
||||
if let Some(from_end) = arc_text[from_start..].find('"') {
|
||||
link.from =
|
||||
CompactString::from(&arc_text[from_start..from_start + from_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract to
|
||||
if let Some(to_start) = arc_text.find("xlink:to=\"") {
|
||||
let to_start = to_start + 10;
|
||||
if let Some(to_end) = arc_text[to_start..].find('"') {
|
||||
link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract order
|
||||
if let Some(order_start) = arc_text.find("order=\"") {
|
||||
let order_start = order_start + 7;
|
||||
if let Some(order_end) = arc_text[order_start..].find('"') {
|
||||
if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
|
||||
link.order = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract priority
|
||||
if let Some(priority_start) = arc_text.find("priority=\"") {
|
||||
let priority_start = priority_start + 10;
|
||||
if let Some(priority_end) = arc_text[priority_start..].find('"') {
|
||||
if let Ok(priority) =
|
||||
arc_text[priority_start..priority_start + priority_end].parse()
|
||||
{
|
||||
link.priority = Some(priority);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract use
|
||||
if let Some(use_start) = arc_text.find("use=\"") {
|
||||
let use_start = use_start + 5;
|
||||
if let Some(use_end) = arc_text[use_start..].find('"') {
|
||||
link.use_attribute = Some(CompactString::from(
|
||||
&arc_text[use_start..use_start + use_end],
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
self.presentation_links
|
||||
.entry(link.from.clone())
|
||||
.or_default()
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_calculation_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse calculation arcs
|
||||
let mut pos = 0;
|
||||
while let Some(arc_start) = text[pos..].find("<link:calculationArc") {
|
||||
let arc_start = pos + arc_start;
|
||||
pos = arc_start + 1;
|
||||
|
||||
if let Some(arc_end) = text[arc_start..].find("/>") {
|
||||
let arc_text = &text[arc_start..arc_start + arc_end];
|
||||
|
||||
let mut link = CalculationLink {
|
||||
from: CompactString::new(""),
|
||||
to: CompactString::new(""),
|
||||
weight: 1.0,
|
||||
order: 1.0,
|
||||
};
|
||||
|
||||
// Extract from
|
||||
if let Some(from_start) = arc_text.find("xlink:from=\"") {
|
||||
let from_start = from_start + 12;
|
||||
if let Some(from_end) = arc_text[from_start..].find('"') {
|
||||
link.from =
|
||||
CompactString::from(&arc_text[from_start..from_start + from_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract to
|
||||
if let Some(to_start) = arc_text.find("xlink:to=\"") {
|
||||
let to_start = to_start + 10;
|
||||
if let Some(to_end) = arc_text[to_start..].find('"') {
|
||||
link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract weight
|
||||
if let Some(weight_start) = arc_text.find("weight=\"") {
|
||||
let weight_start = weight_start + 8;
|
||||
if let Some(weight_end) = arc_text[weight_start..].find('"') {
|
||||
if let Ok(weight) =
|
||||
arc_text[weight_start..weight_start + weight_end].parse()
|
||||
{
|
||||
link.weight = weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract order
|
||||
if let Some(order_start) = arc_text.find("order=\"") {
|
||||
let order_start = order_start + 7;
|
||||
if let Some(order_end) = arc_text[order_start..].find('"') {
|
||||
if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
|
||||
link.order = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.calculation_links
|
||||
.entry(link.from.clone())
|
||||
.or_default()
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_definition_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse definition arcs
|
||||
let mut pos = 0;
|
||||
while let Some(arc_start) = text[pos..].find("<link:definitionArc") {
|
||||
let arc_start = pos + arc_start;
|
||||
pos = arc_start + 1;
|
||||
|
||||
if let Some(arc_end) = text[arc_start..].find("/>") {
|
||||
let arc_text = &text[arc_start..arc_start + arc_end];
|
||||
|
||||
let mut link = DefinitionLink {
|
||||
from: CompactString::new(""),
|
||||
to: CompactString::new(""),
|
||||
arcrole: CompactString::new(""),
|
||||
order: 1.0,
|
||||
};
|
||||
|
||||
// Extract from
|
||||
if let Some(from_start) = arc_text.find("xlink:from=\"") {
|
||||
let from_start = from_start + 12;
|
||||
if let Some(from_end) = arc_text[from_start..].find('"') {
|
||||
link.from =
|
||||
CompactString::from(&arc_text[from_start..from_start + from_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract to
|
||||
if let Some(to_start) = arc_text.find("xlink:to=\"") {
|
||||
let to_start = to_start + 10;
|
||||
if let Some(to_end) = arc_text[to_start..].find('"') {
|
||||
link.to = CompactString::from(&arc_text[to_start..to_start + to_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract arcrole
|
||||
if let Some(arcrole_start) = arc_text.find("xlink:arcrole=\"") {
|
||||
let arcrole_start = arcrole_start + 15;
|
||||
if let Some(arcrole_end) = arc_text[arcrole_start..].find('"') {
|
||||
link.arcrole = CompactString::from(
|
||||
&arc_text[arcrole_start..arcrole_start + arcrole_end],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract order
|
||||
if let Some(order_start) = arc_text.find("order=\"") {
|
||||
let order_start = order_start + 7;
|
||||
if let Some(order_end) = arc_text[order_start..].find('"') {
|
||||
if let Ok(order) = arc_text[order_start..order_start + order_end].parse() {
|
||||
link.order = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.definition_links
|
||||
.entry(link.from.clone())
|
||||
.or_default()
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_label_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse labels
|
||||
let mut pos = 0;
|
||||
while let Some(label_start) = text[pos..].find("<link:label") {
|
||||
let label_start = pos + label_start;
|
||||
pos = label_start + 1;
|
||||
|
||||
if let Some(label_end) = text[label_start..].find("</link:label>") {
|
||||
let label_text = &text[label_start..label_start + label_end];
|
||||
|
||||
let mut link = LabelLink {
|
||||
concept: CompactString::new(""),
|
||||
label: CompactString::new(""),
|
||||
role: CompactString::new(""),
|
||||
lang: CompactString::new("en"),
|
||||
};
|
||||
|
||||
// Extract label ID for concept mapping
|
||||
if let Some(id_start) = label_text.find("xlink:label=\"") {
|
||||
let id_start = id_start + 13;
|
||||
if let Some(id_end) = label_text[id_start..].find('"') {
|
||||
link.concept =
|
||||
CompactString::from(&label_text[id_start..id_start + id_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract role
|
||||
if let Some(role_start) = label_text.find("xlink:role=\"") {
|
||||
let role_start = role_start + 12;
|
||||
if let Some(role_end) = label_text[role_start..].find('"') {
|
||||
link.role =
|
||||
CompactString::from(&label_text[role_start..role_start + role_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract lang
|
||||
if let Some(lang_start) = label_text.find("xml:lang=\"") {
|
||||
let lang_start = lang_start + 10;
|
||||
if let Some(lang_end) = label_text[lang_start..].find('"') {
|
||||
link.lang =
|
||||
CompactString::from(&label_text[lang_start..lang_start + lang_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract label text content
|
||||
if let Some(content_start) = label_text.find('>') {
|
||||
let content = &label_text[content_start + 1..];
|
||||
link.label = CompactString::from(content.trim());
|
||||
}
|
||||
|
||||
self.label_links
|
||||
.entry(link.concept.clone())
|
||||
.or_default()
|
||||
.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_reference_linkbase(&mut self, text: &str) -> Result<()> {
|
||||
// Parse references - simplified version
|
||||
let mut pos = 0;
|
||||
while let Some(ref_start) = text[pos..].find("<link:reference") {
|
||||
let ref_start = pos + ref_start;
|
||||
pos = ref_start + 1;
|
||||
|
||||
if let Some(ref_end) = text[ref_start..].find("</link:reference>") {
|
||||
let ref_text = &text[ref_start..ref_start + ref_end];
|
||||
|
||||
let mut reference = Reference {
|
||||
role: CompactString::new(""),
|
||||
parts: HashMap::new(),
|
||||
};
|
||||
|
||||
// Extract role
|
||||
if let Some(role_start) = ref_text.find("xlink:role=\"") {
|
||||
let role_start = role_start + 12;
|
||||
if let Some(role_end) = ref_text[role_start..].find('"') {
|
||||
reference.role =
|
||||
CompactString::from(&ref_text[role_start..role_start + role_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse reference parts (simplified)
|
||||
let parts = [
|
||||
"Name",
|
||||
"Number",
|
||||
"Section",
|
||||
"Subsection",
|
||||
"Paragraph",
|
||||
"Subparagraph",
|
||||
"Clause",
|
||||
];
|
||||
for part in &parts {
|
||||
let tag = format!("<link:{}", part);
|
||||
if let Some(part_start) = ref_text.find(&tag) {
|
||||
let part_start = part_start + tag.len();
|
||||
if let Some(content_start) = ref_text[part_start..].find('>') {
|
||||
let content_start = part_start + content_start + 1;
|
||||
if let Some(content_end) = ref_text[content_start..].find('<') {
|
||||
let content = &ref_text[content_start..content_start + content_end];
|
||||
reference.parts.insert(
|
||||
CompactString::from(*part),
|
||||
CompactString::from(content.trim()),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find concept this reference belongs to
|
||||
if let Some(label_start) = ref_text.find("xlink:label=\"") {
|
||||
let label_start = label_start + 13;
|
||||
if let Some(label_end) = ref_text[label_start..].find('"') {
|
||||
let concept =
|
||||
CompactString::from(&ref_text[label_start..label_start + label_end]);
|
||||
|
||||
let link = ReferenceLink {
|
||||
concept: concept.clone(),
|
||||
reference,
|
||||
};
|
||||
|
||||
self.reference_links.entry(concept).or_default().push(link);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_presentation_tree(&self, root: &str) -> Vec<&PresentationLink> {
|
||||
self.presentation_links
|
||||
.get(root)
|
||||
.map(|links| {
|
||||
let mut sorted = links.iter().collect::<Vec<_>>();
|
||||
sorted.sort_by(|a, b| a.order.partial_cmp(&b.order).unwrap());
|
||||
sorted
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn calculate_total(&self, parent: &str, facts: &HashMap<String, f64>) -> f64 {
|
||||
if let Some(links) = self.calculation_links.get(parent) {
|
||||
links
|
||||
.iter()
|
||||
.map(|link| {
|
||||
facts
|
||||
.get(link.to.as_str())
|
||||
.map(|value| value * link.weight)
|
||||
.unwrap_or(0.0)
|
||||
})
|
||||
.sum()
|
||||
} else {
|
||||
facts.get(parent).copied().unwrap_or(0.0)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_label(&self, concept: &str, role: &str, lang: &str) -> Option<&str> {
|
||||
self.label_links
|
||||
.get(concept)
|
||||
.and_then(|labels| {
|
||||
labels
|
||||
.iter()
|
||||
.find(|l| l.role == role && l.lang == lang)
|
||||
.or_else(|| labels.iter().find(|l| l.lang == lang))
|
||||
.or_else(|| labels.first())
|
||||
})
|
||||
.map(|l| l.label.as_str())
|
||||
}
|
||||
|
||||
pub fn validate_calculations(&self, facts: &HashMap<String, f64>) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
for parent in self.calculation_links.keys() {
|
||||
let calculated = self.calculate_total(parent, facts);
|
||||
if let Some(&actual) = facts.get(parent.as_str()) {
|
||||
let diff = (calculated - actual).abs();
|
||||
let tolerance = 0.01; // Allow small rounding differences
|
||||
|
||||
if diff > tolerance {
|
||||
errors.push(ValidationError::CalculationInconsistency {
|
||||
concept: parent.to_string(),
|
||||
expected: calculated,
|
||||
actual,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
181
rust/crabrl-fork/src/main.rs
Normal file
181
rust/crabrl-fork/src/main.rs
Normal file
@@ -0,0 +1,181 @@
|
||||
//! crabrl CLI - High-performance XBRL parser and validator
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{Parser as ClapParser, Subcommand};
|
||||
use colored::*;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use crabrl::{Parser, ValidationConfig, Validator};
|
||||
|
||||
/// High-performance XBRL parser and validator
|
||||
#[derive(ClapParser)]
|
||||
#[command(name = "crabrl")]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Parse an XBRL file
|
||||
Parse {
|
||||
/// Input file
|
||||
input: PathBuf,
|
||||
|
||||
/// Output as JSON
|
||||
#[arg(short, long)]
|
||||
json: bool,
|
||||
|
||||
/// Show statistics
|
||||
#[arg(short, long)]
|
||||
stats: bool,
|
||||
},
|
||||
|
||||
/// Validate an XBRL file
|
||||
Validate {
|
||||
/// Input file
|
||||
input: PathBuf,
|
||||
|
||||
/// Validation profile (generic, sec-edgar)
|
||||
#[arg(short, long, default_value = "generic")]
|
||||
profile: String,
|
||||
|
||||
/// Treat warnings as errors
|
||||
#[arg(long)]
|
||||
strict: bool,
|
||||
},
|
||||
|
||||
/// Benchmark parsing performance
|
||||
Bench {
|
||||
/// Input file
|
||||
input: PathBuf,
|
||||
|
||||
/// Number of iterations
|
||||
#[arg(short, long, default_value = "100")]
|
||||
iterations: usize,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
match cli.command {
|
||||
Commands::Parse {
|
||||
input,
|
||||
json: _,
|
||||
stats,
|
||||
} => {
|
||||
let start = Instant::now();
|
||||
let parser = Parser::new();
|
||||
let doc = parser
|
||||
.parse_file(&input)
|
||||
.with_context(|| format!("Failed to parse {}", input.display()))?;
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
println!("{} {}", "✓".green().bold(), input.display());
|
||||
println!(" Facts: {}", doc.facts.len());
|
||||
println!(" Contexts: {}", doc.contexts.len());
|
||||
println!(" Units: {}", doc.units.len());
|
||||
|
||||
if stats {
|
||||
println!(" Time: {:.2}ms", elapsed.as_secs_f64() * 1000.0);
|
||||
println!(
|
||||
" Throughput: {:.0} facts/sec",
|
||||
doc.facts.len() as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Commands::Validate {
|
||||
input,
|
||||
profile,
|
||||
strict,
|
||||
} => {
|
||||
let parser = Parser::new();
|
||||
let doc = parser
|
||||
.parse_file(&input)
|
||||
.with_context(|| format!("Failed to parse {}", input.display()))?;
|
||||
|
||||
let config = match profile.as_str() {
|
||||
"sec-edgar" => ValidationConfig::sec_edgar(),
|
||||
_ => ValidationConfig::default(),
|
||||
};
|
||||
|
||||
let validator = Validator::with_config(config);
|
||||
let result = validator.validate(&doc)?;
|
||||
|
||||
if result.is_valid {
|
||||
println!(
|
||||
"{} {} - Document is valid",
|
||||
"✓".green().bold(),
|
||||
input.display()
|
||||
);
|
||||
} else {
|
||||
println!(
|
||||
"{} {} - Validation failed",
|
||||
"✗".red().bold(),
|
||||
input.display()
|
||||
);
|
||||
println!(" Errors: {}", result.errors.len());
|
||||
println!(" Warnings: {}", result.warnings.len());
|
||||
|
||||
for error in result.errors.iter().take(5) {
|
||||
println!(" {} {}", "ERROR:".red(), error);
|
||||
}
|
||||
|
||||
if result.errors.len() > 5 {
|
||||
println!(" ... and {} more errors", result.errors.len() - 5);
|
||||
}
|
||||
|
||||
if strict && !result.warnings.is_empty() {
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if !result.is_valid {
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Commands::Bench { input, iterations } => {
|
||||
let parser = Parser::new();
|
||||
|
||||
// Warmup
|
||||
for _ in 0..3 {
|
||||
let _ = parser.parse_file(&input)?;
|
||||
}
|
||||
|
||||
let mut times = Vec::with_capacity(iterations);
|
||||
let mut doc_facts = 0;
|
||||
|
||||
for _ in 0..iterations {
|
||||
let start = Instant::now();
|
||||
let doc = parser.parse_file(&input)?;
|
||||
times.push(start.elapsed());
|
||||
doc_facts = doc.facts.len();
|
||||
}
|
||||
|
||||
times.sort();
|
||||
let min = times[0];
|
||||
let max = times[times.len() - 1];
|
||||
let median = times[times.len() / 2];
|
||||
let mean = times.iter().sum::<std::time::Duration>() / times.len() as u32;
|
||||
|
||||
println!("Benchmark Results for {}", input.display());
|
||||
println!(" Iterations: {}", iterations);
|
||||
println!(" Facts: {}", doc_facts);
|
||||
println!(" Min: {:.3}ms", min.as_secs_f64() * 1000.0);
|
||||
println!(" Median: {:.3}ms", median.as_secs_f64() * 1000.0);
|
||||
println!(" Mean: {:.3}ms", mean.as_secs_f64() * 1000.0);
|
||||
println!(" Max: {:.3}ms", max.as_secs_f64() * 1000.0);
|
||||
println!(
|
||||
" Throughput: {:.0} facts/sec",
|
||||
doc_facts as f64 / mean.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
431
rust/crabrl-fork/src/model.rs
Normal file
431
rust/crabrl-fork/src/model.rs
Normal file
@@ -0,0 +1,431 @@
|
||||
use bitflags::bitflags;
|
||||
use compact_str::CompactString;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ============================================================================
|
||||
// Core XBRL Data Structures - Full Specification Support
|
||||
// ============================================================================
|
||||
|
||||
bitflags! {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct FactFlags: u8 {
|
||||
const NIL = 0b01;
|
||||
const HAS_PRECISION = 0b10;
|
||||
const HAS_DECIMALS = 0b100;
|
||||
const IN_TUPLE = 0b1000;
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C, align(64))]
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct FactStorage {
|
||||
pub concept_ids: Vec<u32>,
|
||||
pub context_ids: Vec<u16>,
|
||||
pub unit_ids: Vec<u16>,
|
||||
pub values: Vec<FactValue>,
|
||||
pub decimals: Vec<Option<i8>>,
|
||||
pub ids: Vec<Option<CompactString>>,
|
||||
pub footnote_refs: Vec<Vec<CompactString>>,
|
||||
}
|
||||
|
||||
impl FactStorage {
|
||||
pub fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
concept_ids: Vec::with_capacity(capacity),
|
||||
context_ids: Vec::with_capacity(capacity),
|
||||
unit_ids: Vec::with_capacity(capacity),
|
||||
values: Vec::with_capacity(capacity),
|
||||
decimals: Vec::with_capacity(capacity),
|
||||
ids: Vec::with_capacity(capacity),
|
||||
footnote_refs: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn len(&self) -> usize {
|
||||
self.concept_ids.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.concept_ids.is_empty()
|
||||
}
|
||||
|
||||
pub fn push(
|
||||
&mut self,
|
||||
concept_id: u32,
|
||||
context_id: u16,
|
||||
unit_id: u16,
|
||||
value: FactValue,
|
||||
decimals: Option<i8>,
|
||||
id: Option<CompactString>,
|
||||
) {
|
||||
self.concept_ids.push(concept_id);
|
||||
self.context_ids.push(context_id);
|
||||
self.unit_ids.push(unit_id);
|
||||
self.values.push(value);
|
||||
self.decimals.push(decimals);
|
||||
self.ids.push(id);
|
||||
self.footnote_refs.push(Vec::new());
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.concept_ids.clear();
|
||||
self.context_ids.clear();
|
||||
self.unit_ids.clear();
|
||||
self.values.clear();
|
||||
self.decimals.clear();
|
||||
self.ids.clear();
|
||||
self.footnote_refs.clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
|
||||
#[serde(untagged)]
|
||||
pub enum FactValue {
|
||||
Text(u32),
|
||||
Decimal(f64),
|
||||
Integer(i64),
|
||||
Boolean(bool),
|
||||
Date(u32),
|
||||
DateTime(u32),
|
||||
#[default]
|
||||
Nil,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Fact {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub id: Option<CompactString>,
|
||||
pub concept: CompactString,
|
||||
pub context_ref: CompactString,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub unit_ref: Option<CompactString>,
|
||||
pub value: CompactString,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub decimals: Option<i8>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub precision: Option<u8>,
|
||||
pub nil: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub nil_reason: Option<CompactString>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub footnote_refs: Vec<CompactString>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Context {
|
||||
pub id: CompactString,
|
||||
pub entity: Entity,
|
||||
pub period: Period,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub scenario: Option<Scenario>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Entity {
|
||||
pub identifier: CompactString,
|
||||
pub scheme: CompactString,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub segment: Option<Segment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Segment {
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub explicit_members: Vec<DimensionMember>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub typed_members: Vec<TypedMember>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DimensionMember {
|
||||
pub dimension: CompactString,
|
||||
pub member: CompactString,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TypedMember {
|
||||
pub dimension: CompactString,
|
||||
pub value: CompactString,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Scenario {
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub explicit_members: Vec<DimensionMember>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub typed_members: Vec<TypedMember>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Period {
|
||||
Instant {
|
||||
date: CompactString,
|
||||
},
|
||||
Duration {
|
||||
start: CompactString,
|
||||
end: CompactString,
|
||||
},
|
||||
Forever,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Unit {
|
||||
pub id: CompactString,
|
||||
pub unit_type: UnitType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum UnitType {
|
||||
Simple(Vec<Measure>),
|
||||
Divide {
|
||||
numerator: Vec<Measure>,
|
||||
denominator: Vec<Measure>,
|
||||
},
|
||||
Multiply(Vec<Measure>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Measure {
|
||||
pub namespace: CompactString,
|
||||
pub name: CompactString,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Tuple {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub id: Option<CompactString>,
|
||||
pub name: CompactString,
|
||||
pub facts: Vec<FactOrTuple>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum FactOrTuple {
|
||||
Fact(Fact),
|
||||
Tuple(Box<Tuple>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Footnote {
|
||||
pub id: CompactString,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub role: Option<CompactString>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub lang: Option<CompactString>,
|
||||
pub content: CompactString,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub fact_refs: Vec<CompactString>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FractionValue {
|
||||
pub numerator: f64,
|
||||
pub denominator: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Schema {
|
||||
pub target_namespace: CompactString,
|
||||
pub elements: HashMap<CompactString, SchemaElement>,
|
||||
pub types: HashMap<CompactString, SchemaType>,
|
||||
pub imports: Vec<SchemaImport>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SchemaElement {
|
||||
pub name: CompactString,
|
||||
pub element_type: CompactString,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub substitution_group: Option<CompactString>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub period_type: Option<CompactString>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub balance: Option<CompactString>,
|
||||
#[serde(default)]
|
||||
pub abstract_element: bool,
|
||||
#[serde(default)]
|
||||
pub nillable: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SchemaType {
|
||||
pub name: CompactString,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub base_type: Option<CompactString>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub restrictions: Vec<TypeRestriction>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum TypeRestriction {
|
||||
MinInclusive(CompactString),
|
||||
MaxInclusive(CompactString),
|
||||
MinExclusive(CompactString),
|
||||
MaxExclusive(CompactString),
|
||||
Pattern(CompactString),
|
||||
Enumeration(Vec<CompactString>),
|
||||
Length(usize),
|
||||
MinLength(usize),
|
||||
MaxLength(usize),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SchemaImport {
|
||||
pub namespace: CompactString,
|
||||
pub schema_location: CompactString,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Linkbase {
|
||||
pub role: CompactString,
|
||||
pub links: Vec<Link>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Link {
|
||||
Presentation(PresentationLink),
|
||||
Calculation(CalculationLink),
|
||||
Definition(DefinitionLink),
|
||||
Label(LabelLink),
|
||||
Reference(ReferenceLink),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PresentationLink {
|
||||
pub from: CompactString,
|
||||
pub to: CompactString,
|
||||
pub order: f32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub priority: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub use_attribute: Option<CompactString>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CalculationLink {
|
||||
pub from: CompactString,
|
||||
pub to: CompactString,
|
||||
pub weight: f64,
|
||||
pub order: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DefinitionLink {
|
||||
pub from: CompactString,
|
||||
pub to: CompactString,
|
||||
pub arcrole: CompactString,
|
||||
pub order: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LabelLink {
|
||||
pub concept: CompactString,
|
||||
pub label: CompactString,
|
||||
pub role: CompactString,
|
||||
pub lang: CompactString,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReferenceLink {
|
||||
pub concept: CompactString,
|
||||
pub reference: Reference,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Reference {
|
||||
pub role: CompactString,
|
||||
pub parts: HashMap<CompactString, CompactString>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct Document {
|
||||
pub facts: FactStorage,
|
||||
pub contexts: Vec<Context>,
|
||||
pub units: Vec<Unit>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub tuples: Vec<Tuple>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub footnotes: Vec<Footnote>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub presentation_links: Vec<PresentationLink>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub calculation_links: Vec<CalculationLink>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub definition_links: Vec<DefinitionLink>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub label_links: Vec<LabelLink>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub reference_links: Vec<ReferenceLink>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub custom_links: Vec<Link>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub role_types: Vec<CompactString>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub arcrole_types: Vec<CompactString>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub schemas: Vec<Schema>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub dimensions: Vec<DimensionMember>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub concept_names: Vec<CompactString>,
|
||||
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
|
||||
pub namespaces: HashMap<CompactString, CompactString>,
|
||||
}
|
||||
|
||||
impl Default for Document {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Document {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
facts: FactStorage::with_capacity(10000),
|
||||
contexts: Vec::with_capacity(100),
|
||||
units: Vec::with_capacity(50),
|
||||
tuples: Vec::new(),
|
||||
footnotes: Vec::new(),
|
||||
presentation_links: Vec::new(),
|
||||
calculation_links: Vec::new(),
|
||||
definition_links: Vec::new(),
|
||||
label_links: Vec::new(),
|
||||
reference_links: Vec::new(),
|
||||
custom_links: Vec::new(),
|
||||
role_types: Vec::new(),
|
||||
arcrole_types: Vec::new(),
|
||||
schemas: Vec::new(),
|
||||
dimensions: Vec::new(),
|
||||
concept_names: Vec::new(),
|
||||
namespaces: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_capacity(facts: usize, contexts: usize, units: usize) -> Self {
|
||||
Self {
|
||||
facts: FactStorage::with_capacity(facts),
|
||||
contexts: Vec::with_capacity(contexts),
|
||||
units: Vec::with_capacity(units),
|
||||
tuples: Vec::new(),
|
||||
footnotes: Vec::new(),
|
||||
presentation_links: Vec::new(),
|
||||
calculation_links: Vec::new(),
|
||||
definition_links: Vec::new(),
|
||||
label_links: Vec::new(),
|
||||
reference_links: Vec::new(),
|
||||
custom_links: Vec::new(),
|
||||
role_types: Vec::new(),
|
||||
arcrole_types: Vec::new(),
|
||||
schemas: Vec::new(),
|
||||
dimensions: Vec::new(),
|
||||
concept_names: Vec::new(),
|
||||
namespaces: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
990
rust/crabrl-fork/src/parser.rs
Normal file
990
rust/crabrl-fork/src/parser.rs
Normal file
@@ -0,0 +1,990 @@
|
||||
use crate::allocator::ArenaAllocator;
|
||||
use crate::model::*;
|
||||
use crate::simd::SimdScanner;
|
||||
use crate::{Error, Result};
|
||||
use compact_str::CompactString;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct Parser {
|
||||
load_schemas: bool,
|
||||
load_linkbases: bool,
|
||||
validate: bool,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
load_schemas: false,
|
||||
load_linkbases: false,
|
||||
validate: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_validation(mut self, validate: bool) -> Self {
|
||||
self.validate = validate;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_schema_loading(mut self, load: bool) -> Self {
|
||||
self.load_schemas = load;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_linkbase_loading(mut self, load: bool) -> Self {
|
||||
self.load_linkbases = load;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn parse_str(&self, content: &str) -> Result<Document> {
|
||||
self.parse_bytes(content.as_bytes())
|
||||
}
|
||||
|
||||
pub fn parse_file<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_bytes(&content)
|
||||
}
|
||||
|
||||
pub fn parse_bytes(&self, data: &[u8]) -> Result<Document> {
|
||||
let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
|
||||
&data[3..]
|
||||
} else {
|
||||
data
|
||||
};
|
||||
|
||||
let allocator = ArenaAllocator::new();
|
||||
let mut parser = FullXbrlParser::new(data, &allocator);
|
||||
parser.load_schemas = self.load_schemas;
|
||||
parser.load_linkbases = self.load_linkbases;
|
||||
parser.validate = self.validate;
|
||||
parser.parse()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
struct FullXbrlParser<'a> {
|
||||
scanner: SimdScanner<'a>,
|
||||
allocator: &'a ArenaAllocator,
|
||||
doc: Document,
|
||||
in_xbrl_root: bool,
|
||||
load_schemas: bool,
|
||||
load_linkbases: bool,
|
||||
validate: bool,
|
||||
}
|
||||
|
||||
impl<'a> FullXbrlParser<'a> {
|
||||
fn new(data: &'a [u8], allocator: &'a ArenaAllocator) -> Self {
|
||||
Self {
|
||||
scanner: SimdScanner::new(data),
|
||||
allocator,
|
||||
doc: Document::new(),
|
||||
in_xbrl_root: false,
|
||||
load_schemas: false,
|
||||
load_linkbases: false,
|
||||
validate: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn read_tag_name(&mut self) -> Result<&'a str> {
|
||||
let start = self.scanner.pos;
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b' ' || ch == b'>' || ch == b'/' || ch == b'\t' || ch == b'\n' || ch == b'\r' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
let end = self.scanner.pos;
|
||||
|
||||
if start == end {
|
||||
return Err(Error::Parse("Empty tag name".to_string()));
|
||||
}
|
||||
|
||||
std::str::from_utf8(&self.scanner.data[start..end])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in tag name".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn parse_attributes(&mut self) -> Result<Vec<(&'a str, &'a str)>> {
|
||||
let mut attrs = Vec::new();
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
match self.scanner.peek() {
|
||||
Some(b'>') => {
|
||||
break;
|
||||
}
|
||||
Some(b'/') => {
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'>') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None => return Err(Error::Parse("Unexpected EOF in attributes".to_string())),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let name_start = self.scanner.pos;
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'=' || ch == b' ' || ch == b'>' || ch == b'/' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if self.scanner.pos == name_start {
|
||||
break;
|
||||
}
|
||||
|
||||
let name = std::str::from_utf8(&self.scanner.data[name_start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in attribute name".to_string()))?;
|
||||
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
if self.scanner.peek() != Some(b'=') {
|
||||
continue;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
let quote = self
|
||||
.scanner
|
||||
.peek()
|
||||
.ok_or_else(|| Error::Parse("Expected quote".to_string()))?;
|
||||
|
||||
if quote != b'"' && quote != b'\'' {
|
||||
return Err(Error::Parse("Expected quote in attribute".to_string()));
|
||||
}
|
||||
|
||||
self.scanner.advance(1);
|
||||
let value_start = self.scanner.pos;
|
||||
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == quote {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
let value = std::str::from_utf8(&self.scanner.data[value_start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in attribute value".to_string()))?;
|
||||
|
||||
self.scanner.advance(1);
|
||||
|
||||
attrs.push((name, value));
|
||||
}
|
||||
|
||||
Ok(attrs)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_to_tag_end(&mut self) -> Result<()> {
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'>' {
|
||||
self.scanner.advance(1);
|
||||
return Ok(());
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
Err(Error::Parse("Expected '>'".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn read_text_content(&mut self) -> Result<&'a str> {
|
||||
let start = self.scanner.pos;
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'<' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
let text = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in text content".to_string()))?;
|
||||
|
||||
Ok(text.trim())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn read_text_content_with_cdata(&mut self) -> Result<CompactString> {
|
||||
let mut result = CompactString::new("");
|
||||
let start = self.scanner.pos;
|
||||
|
||||
loop {
|
||||
if self.scanner.is_eof() {
|
||||
let text = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in text content".to_string()))?;
|
||||
if !result.is_empty() {
|
||||
result.push_str(text.trim());
|
||||
} else {
|
||||
result = CompactString::from(text.trim());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if self.scanner.peek() == Some(b'<') {
|
||||
let text = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos])
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in text content".to_string()))?;
|
||||
if !text.trim().is_empty() {
|
||||
if !result.is_empty() {
|
||||
result.push_str(text.trim());
|
||||
} else {
|
||||
result = CompactString::from(text.trim());
|
||||
}
|
||||
}
|
||||
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.peek_ahead(7) == Some(b"![CDATA[") {
|
||||
self.scanner.advance(8);
|
||||
let cdata_start = self.scanner.pos;
|
||||
|
||||
while !self.scanner.is_eof() {
|
||||
if self.scanner.peek() == Some(b']') && self.peek_ahead(3) == Some(b"]]>") {
|
||||
let cdata = std::str::from_utf8(
|
||||
&self.scanner.data[cdata_start..self.scanner.pos],
|
||||
)
|
||||
.map_err(|_| {
|
||||
Error::Parse("Invalid UTF-8 in CDATA section".to_string())
|
||||
})?;
|
||||
result.push_str(cdata);
|
||||
self.scanner.advance(3);
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
} else {
|
||||
self.scanner.pos = self.scanner.pos.saturating_sub(1);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_element_from_tag(&mut self) -> Result<()> {
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
if self.scanner.pos >= 2 && self.scanner.data[self.scanner.pos - 2] == b'/' {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut depth = 1;
|
||||
|
||||
while depth > 0 && !self.scanner.is_eof() {
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'<' {
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if self.scanner.is_eof() {
|
||||
break;
|
||||
}
|
||||
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
depth -= 1;
|
||||
} else if self.scanner.peek() != Some(b'!') && self.scanner.peek() != Some(b'?') {
|
||||
let mut is_self_closing = false;
|
||||
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'/'
|
||||
&& self.scanner.pos + 1 < self.scanner.data.len()
|
||||
&& self.scanner.data[self.scanner.pos + 1] == b'>'
|
||||
{
|
||||
is_self_closing = true;
|
||||
}
|
||||
if ch == b'>' {
|
||||
self.scanner.advance(1);
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if !is_self_closing {
|
||||
depth += 1;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
while let Some(ch) = self.scanner.peek() {
|
||||
if ch == b'>' {
|
||||
self.scanner.advance(1);
|
||||
break;
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_processing_instruction(&mut self) -> Result<()> {
|
||||
while !self.scanner.is_eof() {
|
||||
if self.scanner.peek() == Some(b'?') {
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'>') {
|
||||
self.scanner.advance(1);
|
||||
return Ok(());
|
||||
}
|
||||
} else {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
}
|
||||
Err(Error::Parse("Unclosed processing instruction".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_comment(&mut self) -> Result<()> {
|
||||
while !self.scanner.is_eof() {
|
||||
if self.scanner.peek() == Some(b'-') {
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'-') {
|
||||
self.scanner.advance(1);
|
||||
if self.scanner.peek() == Some(b'>') {
|
||||
self.scanner.advance(1);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
}
|
||||
Err(Error::Parse("Unclosed comment".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn peek_ahead(&self, n: usize) -> Option<&'a [u8]> {
|
||||
if self.scanner.pos + n <= self.scanner.data.len() {
|
||||
Some(&self.scanner.data[self.scanner.pos..self.scanner.pos + n])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_doctype(&mut self) -> Result<()> {
|
||||
while !self.scanner.is_eof() {
|
||||
if self.scanner.peek() == Some(b'>') {
|
||||
self.scanner.advance(1);
|
||||
return Ok(());
|
||||
}
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
Err(Error::Parse("Unclosed DOCTYPE".to_string()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_closing_tag(&mut self, expected_tag: &str) -> Result<()> {
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
if self.scanner.peek() != Some(b'<') {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() != Some(b'/') {
|
||||
self.scanner.pos = self.scanner.pos.saturating_sub(1);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
|
||||
if tag.ends_with(expected_tag) || tag == expected_tag {
|
||||
self.skip_to_tag_end()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn check_self_closing(&self) -> bool {
|
||||
if self.scanner.pos >= 2 {
|
||||
self.scanner.data[self.scanner.pos - 2] == b'/'
|
||||
&& self.scanner.data[self.scanner.pos - 1] == b'>'
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(&mut self) -> Result<Document> {
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
while !self.scanner.is_eof() {
|
||||
self.scanner.skip_whitespace();
|
||||
|
||||
if self.scanner.peek() != Some(b'<') {
|
||||
while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'?') {
|
||||
self.skip_processing_instruction()?;
|
||||
} else if self.scanner.peek() == Some(b'!') {
|
||||
if self.peek_ahead(3) == Some(b"!--") {
|
||||
self.skip_comment()?;
|
||||
} else {
|
||||
self.skip_doctype()?;
|
||||
}
|
||||
} else if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
self.skip_to_tag_end()?;
|
||||
if tag == "xbrl" || tag.ends_with(":xbrl") {
|
||||
self.in_xbrl_root = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
self.parse_element()?;
|
||||
}
|
||||
}
|
||||
|
||||
self.doc.concept_names = self.allocator.get_all_strings();
|
||||
Ok(std::mem::take(&mut self.doc))
|
||||
}
|
||||
|
||||
fn parse_element(&mut self) -> Result<()> {
|
||||
let tag_name = self.read_tag_name()?;
|
||||
|
||||
if tag_name == "xbrl" || tag_name.ends_with(":xbrl") {
|
||||
self.parse_xbrl_root()?;
|
||||
self.in_xbrl_root = true;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !self.in_xbrl_root {
|
||||
self.skip_element_from_tag()?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if tag_name.ends_with(":context") || tag_name == "context" {
|
||||
self.parse_context()?;
|
||||
} else if tag_name.ends_with(":unit") || tag_name == "unit" {
|
||||
self.parse_unit()?;
|
||||
} else if tag_name.contains(':') {
|
||||
self.parse_fact(tag_name)?;
|
||||
} else {
|
||||
self.skip_element_from_tag()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_xbrl_root(&mut self) -> Result<()> {
|
||||
let attrs = self.parse_attributes()?;
|
||||
for (name, value) in attrs {
|
||||
if name.starts_with("xmlns") {
|
||||
let ns_name = if name.len() > 6 && name.chars().nth(5) == Some(':') {
|
||||
CompactString::from(&name[6..])
|
||||
} else {
|
||||
CompactString::new("")
|
||||
};
|
||||
self.doc
|
||||
.namespaces
|
||||
.insert(ns_name, CompactString::from(value));
|
||||
}
|
||||
}
|
||||
self.skip_to_tag_end()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_context(&mut self) -> Result<()> {
|
||||
let attrs = self.parse_attributes()?;
|
||||
let id = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "id")
|
||||
.map(|(_, v)| CompactString::from(*v))
|
||||
.ok_or_else(|| Error::Parse("Context missing id".to_string()))?;
|
||||
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
let mut entity = None;
|
||||
let mut period = None;
|
||||
let mut scenario = None;
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if self.scanner.is_eof() {
|
||||
break;
|
||||
}
|
||||
|
||||
let saved_pos = self.scanner.pos;
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("context") || tag == "context" {
|
||||
self.skip_to_tag_end()?;
|
||||
break;
|
||||
}
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("entity") {
|
||||
entity = Some(self.parse_entity()?);
|
||||
} else if tag.ends_with("period") {
|
||||
period = Some(self.parse_period()?);
|
||||
} else if tag.ends_with("scenario") {
|
||||
scenario = Some(self.parse_scenario()?);
|
||||
} else {
|
||||
self.skip_element_from_tag()?;
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(entity), Some(period)) = (entity, period) {
|
||||
self.doc.contexts.push(Context {
|
||||
id,
|
||||
entity,
|
||||
period,
|
||||
scenario,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_entity(&mut self) -> Result<Entity> {
|
||||
let _attrs = self.parse_attributes()?;
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
let mut identifier = CompactString::new("");
|
||||
let mut scheme = CompactString::new("");
|
||||
let mut segment = None;
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if self.scanner.is_eof() {
|
||||
break;
|
||||
}
|
||||
|
||||
let saved_pos = self.scanner.pos;
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("entity") || tag == "entity" {
|
||||
self.skip_to_tag_end()?;
|
||||
break;
|
||||
}
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("identifier") {
|
||||
let attrs = self.parse_attributes()?;
|
||||
scheme = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "scheme")
|
||||
.map(|(_, v)| CompactString::from(*v))
|
||||
.unwrap_or_default();
|
||||
self.skip_to_tag_end()?;
|
||||
identifier = CompactString::from(self.read_text_content()?);
|
||||
self.skip_closing_tag("identifier")?;
|
||||
} else if tag.ends_with("segment") {
|
||||
segment = Some(self.parse_segment()?);
|
||||
} else {
|
||||
self.skip_element_from_tag()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Entity {
|
||||
identifier,
|
||||
scheme,
|
||||
segment,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_period(&mut self) -> Result<Period> {
|
||||
let _attrs = self.parse_attributes()?;
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
let mut instant = None;
|
||||
let mut start_date = None;
|
||||
let mut end_date = None;
|
||||
let mut forever = false;
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
if self.scanner.peek() != Some(b'<') {
|
||||
break;
|
||||
}
|
||||
|
||||
let saved_pos = self.scanner.pos;
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("period") {
|
||||
self.skip_to_tag_end()?;
|
||||
break;
|
||||
}
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("instant") {
|
||||
self.skip_to_tag_end()?;
|
||||
instant = Some(CompactString::from(self.read_text_content()?));
|
||||
self.skip_closing_tag("instant")?;
|
||||
} else if tag.ends_with("startDate") {
|
||||
self.skip_to_tag_end()?;
|
||||
start_date = Some(CompactString::from(self.read_text_content()?));
|
||||
self.skip_closing_tag("startDate")?;
|
||||
} else if tag.ends_with("endDate") {
|
||||
self.skip_to_tag_end()?;
|
||||
end_date = Some(CompactString::from(self.read_text_content()?));
|
||||
self.skip_closing_tag("endDate")?;
|
||||
} else if tag.ends_with("forever") {
|
||||
forever = true;
|
||||
self.skip_element_from_tag()?;
|
||||
} else {
|
||||
self.skip_element_from_tag()?;
|
||||
}
|
||||
}
|
||||
|
||||
if forever {
|
||||
Ok(Period::Forever)
|
||||
} else if let Some(date) = instant {
|
||||
Ok(Period::Instant { date })
|
||||
} else if let (Some(start), Some(end)) = (start_date, end_date) {
|
||||
Ok(Period::Duration { start, end })
|
||||
} else {
|
||||
Err(Error::Parse("Invalid period".to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_segment(&mut self) -> Result<Segment> {
|
||||
let _attrs = self.parse_attributes()?;
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
let mut explicit_members = Vec::new();
|
||||
let mut typed_members = Vec::new();
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() {
|
||||
self.scanner.advance(1);
|
||||
}
|
||||
|
||||
if self.scanner.is_eof() {
|
||||
break;
|
||||
}
|
||||
|
||||
let saved_pos = self.scanner.pos;
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("segment") || tag == "segment" {
|
||||
self.skip_to_tag_end()?;
|
||||
break;
|
||||
}
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("explicitMember") {
|
||||
let attrs = self.parse_attributes()?;
|
||||
let dimension = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "dimension")
|
||||
.map(|(_, v)| CompactString::from(*v))
|
||||
.unwrap_or_default();
|
||||
self.skip_to_tag_end()?;
|
||||
let member = CompactString::from(self.read_text_content()?);
|
||||
explicit_members.push(DimensionMember { dimension, member });
|
||||
self.skip_closing_tag("explicitMember")?;
|
||||
} else if tag.ends_with("typedMember") {
|
||||
let attrs = self.parse_attributes()?;
|
||||
let dimension = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "dimension")
|
||||
.map(|(_, v)| CompactString::from(*v))
|
||||
.unwrap_or_default();
|
||||
self.skip_to_tag_end()?;
|
||||
let value = CompactString::from(self.read_text_content()?);
|
||||
typed_members.push(TypedMember { dimension, value });
|
||||
self.skip_closing_tag("typedMember")?;
|
||||
} else {
|
||||
self.skip_element_from_tag()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Segment {
|
||||
explicit_members,
|
||||
typed_members,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_scenario(&mut self) -> Result<Scenario> {
|
||||
let segment = self.parse_segment()?;
|
||||
Ok(Scenario {
|
||||
explicit_members: segment.explicit_members,
|
||||
typed_members: segment.typed_members,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_unit(&mut self) -> Result<()> {
|
||||
let attrs = self.parse_attributes()?;
|
||||
let id = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "id")
|
||||
.map(|(_, v)| CompactString::from(*v))
|
||||
.ok_or_else(|| Error::Parse("Unit missing id".to_string()))?;
|
||||
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
let mut unit_type = None;
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
if self.scanner.peek() != Some(b'<') {
|
||||
break;
|
||||
}
|
||||
|
||||
let saved_pos = self.scanner.pos;
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("unit") {
|
||||
self.skip_to_tag_end()?;
|
||||
break;
|
||||
}
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("measure") {
|
||||
self.skip_to_tag_end()?;
|
||||
let measure_text = self.read_text_content()?;
|
||||
let measure = self.parse_measure(measure_text);
|
||||
|
||||
if unit_type.is_none() {
|
||||
unit_type = Some(UnitType::Simple(vec![measure]));
|
||||
} else if let Some(UnitType::Simple(ref mut measures)) = unit_type {
|
||||
measures.push(measure);
|
||||
}
|
||||
|
||||
self.skip_closing_tag("measure")?;
|
||||
} else if tag.ends_with("divide") {
|
||||
unit_type = Some(self.parse_unit_divide()?);
|
||||
} else {
|
||||
self.skip_element_from_tag()?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(unit_type) = unit_type {
|
||||
self.doc.units.push(Unit { id, unit_type });
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_unit_divide(&mut self) -> Result<UnitType> {
|
||||
let _attrs = self.parse_attributes()?;
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
let mut numerator = Vec::new();
|
||||
let mut denominator = Vec::new();
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
if self.scanner.peek() != Some(b'<') {
|
||||
break;
|
||||
}
|
||||
|
||||
let saved_pos = self.scanner.pos;
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.advance(1);
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("divide") {
|
||||
self.skip_to_tag_end()?;
|
||||
break;
|
||||
}
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("unitNumerator") {
|
||||
self.skip_to_tag_end()?;
|
||||
numerator = self.parse_unit_measures()?;
|
||||
self.skip_closing_tag("unitNumerator")?;
|
||||
} else if tag.ends_with("unitDenominator") {
|
||||
self.skip_to_tag_end()?;
|
||||
denominator = self.parse_unit_measures()?;
|
||||
self.skip_closing_tag("unitDenominator")?;
|
||||
} else {
|
||||
self.skip_element_from_tag()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(UnitType::Divide {
|
||||
numerator,
|
||||
denominator,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_unit_measures(&mut self) -> Result<Vec<Measure>> {
|
||||
let mut measures = Vec::new();
|
||||
|
||||
loop {
|
||||
self.scanner.skip_whitespace();
|
||||
if self.scanner.peek() != Some(b'<') {
|
||||
break;
|
||||
}
|
||||
|
||||
let saved_pos = self.scanner.pos;
|
||||
self.scanner.advance(1);
|
||||
|
||||
if self.scanner.peek() == Some(b'/') {
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
let tag = self.read_tag_name()?;
|
||||
if tag.ends_with("measure") {
|
||||
self.skip_to_tag_end()?;
|
||||
let measure_text = self.read_text_content()?;
|
||||
measures.push(self.parse_measure(measure_text));
|
||||
self.skip_closing_tag("measure")?;
|
||||
} else {
|
||||
self.scanner.pos = saved_pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(measures)
|
||||
}
|
||||
|
||||
fn parse_measure(&self, text: &str) -> Measure {
|
||||
if let Some(colon_pos) = text.find(':') {
|
||||
Measure {
|
||||
namespace: CompactString::from(&text[..colon_pos]),
|
||||
name: CompactString::from(&text[colon_pos + 1..]),
|
||||
}
|
||||
} else {
|
||||
Measure {
|
||||
namespace: CompactString::new(""),
|
||||
name: CompactString::from(text),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_fact(&mut self, tag_name: &str) -> Result<()> {
|
||||
let attrs = self.parse_attributes()?;
|
||||
|
||||
let is_nil = attrs
|
||||
.iter()
|
||||
.any(|(n, v)| *n == "xsi:nil" && (*v == "true" || *v == "1"));
|
||||
|
||||
let context_ref = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "contextRef")
|
||||
.map(|(_, v)| CompactString::from(*v));
|
||||
|
||||
let unit_ref = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "unitRef")
|
||||
.map(|(_, v)| CompactString::from(*v));
|
||||
|
||||
let id = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "id")
|
||||
.map(|(_, v)| CompactString::from(*v));
|
||||
|
||||
let decimals = attrs
|
||||
.iter()
|
||||
.find(|(n, _)| *n == "decimals")
|
||||
.and_then(|(_, v)| v.parse::<i8>().ok());
|
||||
|
||||
let is_self_closing = self.check_self_closing();
|
||||
self.skip_to_tag_end()?;
|
||||
|
||||
let value = if is_self_closing || is_nil {
|
||||
CompactString::new("")
|
||||
} else {
|
||||
let value = self.read_text_content_with_cdata()?;
|
||||
self.skip_closing_tag(tag_name)?;
|
||||
value
|
||||
};
|
||||
|
||||
if let Some(context_ref) = context_ref {
|
||||
let concept_id = self.allocator.intern_string(tag_name);
|
||||
let context_id = self
|
||||
.doc
|
||||
.contexts
|
||||
.iter()
|
||||
.position(|c| c.id == context_ref)
|
||||
.map(|i| i as u16)
|
||||
.unwrap_or(0);
|
||||
|
||||
let unit_id = unit_ref
|
||||
.as_ref()
|
||||
.and_then(|u| self.doc.units.iter().position(|unit| unit.id == *u))
|
||||
.map(|i| (i + 1) as u16)
|
||||
.unwrap_or(0);
|
||||
|
||||
let fact_value = if is_nil {
|
||||
FactValue::Nil
|
||||
} else if value.is_empty() {
|
||||
FactValue::Text(self.allocator.intern_string(""))
|
||||
} else if let Ok(decimal) = value.parse::<f64>() {
|
||||
FactValue::Decimal(decimal)
|
||||
} else if let Ok(integer) = value.parse::<i64>() {
|
||||
FactValue::Integer(integer)
|
||||
} else {
|
||||
FactValue::Text(self.allocator.intern_string(&value))
|
||||
};
|
||||
|
||||
self.doc
|
||||
.facts
|
||||
.push(concept_id, context_id, unit_id, fact_value, decimals, id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
308
rust/crabrl-fork/src/schema.rs
Normal file
308
rust/crabrl-fork/src/schema.rs
Normal file
@@ -0,0 +1,308 @@
|
||||
// Schema loading and validation for XBRL
|
||||
use crate::model::*;
|
||||
use crate::validator::ValidationError;
|
||||
use crate::{Error, Result};
|
||||
use compact_str::CompactString;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct SchemaLoader {
|
||||
cache: HashMap<CompactString, Schema>,
|
||||
}
|
||||
|
||||
impl Default for SchemaLoader {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaLoader {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
cache: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_schema<P: AsRef<Path>>(&mut self, path: P) -> Result<&Schema> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
let key = CompactString::from(path_str.as_ref());
|
||||
|
||||
if self.cache.contains_key(&key) {
|
||||
return Ok(self.cache.get(&key).unwrap());
|
||||
}
|
||||
|
||||
let schema = self.parse_schema_file(path)?;
|
||||
self.cache.insert(key.clone(), schema);
|
||||
Ok(self.cache.get(&key).unwrap())
|
||||
}
|
||||
|
||||
fn parse_schema_file<P: AsRef<Path>>(&self, path: P) -> Result<Schema> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_schema_bytes(&content)
|
||||
}
|
||||
|
||||
fn parse_schema_bytes(&self, data: &[u8]) -> Result<Schema> {
|
||||
// Simple XML parsing for schema
|
||||
let mut schema = Schema {
|
||||
target_namespace: CompactString::new(""),
|
||||
elements: HashMap::new(),
|
||||
types: HashMap::new(),
|
||||
imports: Vec::new(),
|
||||
};
|
||||
|
||||
// Skip BOM if present
|
||||
let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
|
||||
&data[3..]
|
||||
} else {
|
||||
data
|
||||
};
|
||||
|
||||
let text = std::str::from_utf8(data)
|
||||
.map_err(|_| Error::Parse("Invalid UTF-8 in schema".to_string()))?;
|
||||
|
||||
// Extract target namespace
|
||||
if let Some(ns_start) = text.find("targetNamespace=\"") {
|
||||
let ns_start = ns_start + 17;
|
||||
if let Some(ns_end) = text[ns_start..].find('"') {
|
||||
schema.target_namespace = CompactString::from(&text[ns_start..ns_start + ns_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse elements
|
||||
let mut pos = 0;
|
||||
while let Some(elem_start) = text[pos..].find("<xs:element") {
|
||||
let elem_start = pos + elem_start;
|
||||
pos = elem_start + 1;
|
||||
|
||||
// Find element end
|
||||
let elem_end = if let Some(end) = text[elem_start..].find("/>") {
|
||||
elem_start + end + 2
|
||||
} else if let Some(end) = text[elem_start..].find("</xs:element>") {
|
||||
elem_start + end + 13
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let elem_text = &text[elem_start..elem_end];
|
||||
|
||||
// Extract element attributes
|
||||
let mut element = SchemaElement {
|
||||
name: CompactString::new(""),
|
||||
element_type: CompactString::new(""),
|
||||
substitution_group: None,
|
||||
period_type: None,
|
||||
balance: None,
|
||||
abstract_element: elem_text.contains("abstract=\"true\""),
|
||||
nillable: elem_text.contains("nillable=\"true\""),
|
||||
};
|
||||
|
||||
// Extract name
|
||||
if let Some(name_start) = elem_text.find("name=\"") {
|
||||
let name_start = name_start + 6;
|
||||
if let Some(name_end) = elem_text[name_start..].find('"') {
|
||||
element.name =
|
||||
CompactString::from(&elem_text[name_start..name_start + name_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract type
|
||||
if let Some(type_start) = elem_text.find("type=\"") {
|
||||
let type_start = type_start + 6;
|
||||
if let Some(type_end) = elem_text[type_start..].find('"') {
|
||||
element.element_type =
|
||||
CompactString::from(&elem_text[type_start..type_start + type_end]);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract substitutionGroup
|
||||
if let Some(sg_start) = elem_text.find("substitutionGroup=\"") {
|
||||
let sg_start = sg_start + 19;
|
||||
if let Some(sg_end) = elem_text[sg_start..].find('"') {
|
||||
element.substitution_group =
|
||||
Some(CompactString::from(&elem_text[sg_start..sg_start + sg_end]));
|
||||
}
|
||||
}
|
||||
|
||||
// Extract XBRL-specific attributes
|
||||
if let Some(pt_start) = elem_text.find("xbrli:periodType=\"") {
|
||||
let pt_start = pt_start + 18;
|
||||
if let Some(pt_end) = elem_text[pt_start..].find('"') {
|
||||
element.period_type =
|
||||
Some(CompactString::from(&elem_text[pt_start..pt_start + pt_end]));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(bal_start) = elem_text.find("xbrli:balance=\"") {
|
||||
let bal_start = bal_start + 15;
|
||||
if let Some(bal_end) = elem_text[bal_start..].find('"') {
|
||||
element.balance = Some(CompactString::from(
|
||||
&elem_text[bal_start..bal_start + bal_end],
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if !element.name.is_empty() {
|
||||
schema.elements.insert(element.name.clone(), element);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse imports
|
||||
pos = 0;
|
||||
while let Some(import_start) = text[pos..].find("<xs:import") {
|
||||
let import_start = pos + import_start;
|
||||
pos = import_start + 1;
|
||||
|
||||
if let Some(import_end) = text[import_start..].find("/>") {
|
||||
let import_text = &text[import_start..import_start + import_end];
|
||||
|
||||
let mut import = SchemaImport {
|
||||
namespace: CompactString::new(""),
|
||||
schema_location: CompactString::new(""),
|
||||
};
|
||||
|
||||
if let Some(ns_start) = import_text.find("namespace=\"") {
|
||||
let ns_start = ns_start + 11;
|
||||
if let Some(ns_end) = import_text[ns_start..].find('"') {
|
||||
import.namespace =
|
||||
CompactString::from(&import_text[ns_start..ns_start + ns_end]);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(loc_start) = import_text.find("schemaLocation=\"") {
|
||||
let loc_start = loc_start + 16;
|
||||
if let Some(loc_end) = import_text[loc_start..].find('"') {
|
||||
import.schema_location =
|
||||
CompactString::from(&import_text[loc_start..loc_start + loc_end]);
|
||||
}
|
||||
}
|
||||
|
||||
schema.imports.push(import);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(schema)
|
||||
}
|
||||
|
||||
pub fn validate_element(&self, name: &str, value: &str, schema: &Schema) -> Result<()> {
|
||||
if let Some(element) = schema.elements.get(name) {
|
||||
// Check if element is abstract
|
||||
if element.abstract_element {
|
||||
return Err(Error::Validation(format!("Element {} is abstract", name)));
|
||||
}
|
||||
|
||||
// Validate type
|
||||
if let Some(type_def) = schema.types.get(&element.element_type) {
|
||||
self.validate_type(value, type_def)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
// Element not found in schema - might be from imported schema
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_type(&self, value: &str, type_def: &SchemaType) -> Result<()> {
|
||||
for restriction in &type_def.restrictions {
|
||||
match restriction {
|
||||
TypeRestriction::MinInclusive(min) => {
|
||||
if let (Ok(val), Ok(min_val)) = (value.parse::<f64>(), min.parse::<f64>()) {
|
||||
if val < min_val {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value {} is less than minimum {}",
|
||||
val, min_val
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
TypeRestriction::MaxInclusive(max) => {
|
||||
if let (Ok(val), Ok(max_val)) = (value.parse::<f64>(), max.parse::<f64>()) {
|
||||
if val > max_val {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value {} is greater than maximum {}",
|
||||
val, max_val
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
TypeRestriction::Pattern(pattern) => {
|
||||
if !value.contains(pattern.as_str()) {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value {} doesn't match pattern {}",
|
||||
value, pattern
|
||||
)));
|
||||
}
|
||||
}
|
||||
TypeRestriction::MinLength(min) => {
|
||||
if value.len() < *min {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value length {} is less than minimum {}",
|
||||
value.len(),
|
||||
min
|
||||
)));
|
||||
}
|
||||
}
|
||||
TypeRestriction::MaxLength(max) => {
|
||||
if value.len() > *max {
|
||||
return Err(Error::Validation(format!(
|
||||
"Value length {} is greater than maximum {}",
|
||||
value.len(),
|
||||
max
|
||||
)));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// Schema validator for documents
|
||||
pub struct SchemaValidator {
|
||||
schemas: Vec<Schema>,
|
||||
}
|
||||
|
||||
impl Default for SchemaValidator {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaValidator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
schemas: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_schema(&mut self, schema: Schema) {
|
||||
self.schemas.push(schema);
|
||||
}
|
||||
|
||||
pub fn validate_document(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let errors = Vec::new();
|
||||
|
||||
for i in 0..doc.facts.len() {
|
||||
let _concept_id = doc.facts.concept_ids.get(i);
|
||||
let _value = doc.facts.values.get(i);
|
||||
}
|
||||
|
||||
for schema in &self.schemas {
|
||||
for element in schema.elements.values() {
|
||||
if !element.nillable && !element.abstract_element {
|
||||
// Check if this required element exists in document
|
||||
// This would require reverse mapping from concept names to facts
|
||||
let _found = false;
|
||||
// if !found {
|
||||
// errors.push(ValidationError::MissingRequiredElement {
|
||||
// element: name.to_string(),
|
||||
// });
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
51
rust/crabrl-fork/src/sec.rs
Normal file
51
rust/crabrl-fork/src/sec.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
// SEC EDGAR XBRL filing support (local files only)
|
||||
use crate::{Parser, Document, Result};
|
||||
use std::path::Path;
|
||||
|
||||
pub struct SecFilingParser {
|
||||
parser: Parser,
|
||||
}
|
||||
|
||||
impl SecFilingParser {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
parser: Parser::new().with_validation(true),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_filing<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
|
||||
self.parser.parse_file(path)
|
||||
}
|
||||
|
||||
pub fn with_validation(mut self, validate: bool) -> Self {
|
||||
self.parser = self.parser.with_validation(validate);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
// Test utilities for SEC filings
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_local_sec_filing() {
|
||||
let parser = SecFilingParser::new();
|
||||
|
||||
// Test with local test files
|
||||
if std::path::Path::new("test_data/test_tiny.xbrl").exists() {
|
||||
match parser.parse_filing("test_data/test_tiny.xbrl") {
|
||||
Ok(doc) => {
|
||||
println!("Successfully parsed filing:");
|
||||
println!(" Facts: {}", doc.facts.len());
|
||||
println!(" Contexts: {}", doc.contexts.len());
|
||||
println!(" Units: {}", doc.units.len());
|
||||
assert!(doc.contexts.len() > 0, "Should have contexts");
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to parse filing: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
303
rust/crabrl-fork/src/simd.rs
Normal file
303
rust/crabrl-fork/src/simd.rs
Normal file
@@ -0,0 +1,303 @@
|
||||
use memchr::{memchr, memchr3};
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
const XML_TAG_START: u8 = b'<';
|
||||
const XML_TAG_END: u8 = b'>';
|
||||
const XML_QUOTE: u8 = b'"';
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_tag_start(haystack: &[u8]) -> Option<usize> {
|
||||
memchr(XML_TAG_START, haystack)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_tag_end(haystack: &[u8]) -> Option<usize> {
|
||||
memchr(XML_TAG_END, haystack)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_quote(haystack: &[u8]) -> Option<usize> {
|
||||
memchr(XML_QUOTE, haystack)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_any_delimiter(haystack: &[u8]) -> Option<usize> {
|
||||
memchr3(XML_TAG_START, XML_TAG_END, XML_QUOTE, haystack)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[inline]
|
||||
pub unsafe fn find_pattern_avx2(haystack: &[u8], pattern: &[u8]) -> Option<usize> {
|
||||
if pattern.is_empty() || haystack.len() < pattern.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let first_byte = _mm256_set1_epi8(pattern[0] as i8);
|
||||
let mut i = 0;
|
||||
|
||||
while i + 32 <= haystack.len() {
|
||||
let chunk = _mm256_loadu_si256(haystack.as_ptr().add(i) as *const _);
|
||||
let cmp = _mm256_cmpeq_epi8(chunk, first_byte);
|
||||
let mask = _mm256_movemask_epi8(cmp);
|
||||
|
||||
if mask != 0 {
|
||||
for bit_pos in 0..32 {
|
||||
if (mask & (1 << bit_pos)) != 0 {
|
||||
let pos = i + bit_pos;
|
||||
if pos + pattern.len() <= haystack.len()
|
||||
&& &haystack[pos..pos + pattern.len()] == pattern
|
||||
{
|
||||
return Some(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
i += 32;
|
||||
}
|
||||
|
||||
while i < haystack.len() - pattern.len() + 1 {
|
||||
if &haystack[i..i + pattern.len()] == pattern {
|
||||
return Some(i);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[inline]
|
||||
pub unsafe fn skip_whitespace_avx2(data: &[u8], mut pos: usize) -> usize {
|
||||
let space = _mm256_set1_epi8(0x20);
|
||||
let tab = _mm256_set1_epi8(0x09);
|
||||
let newline = _mm256_set1_epi8(0x0A);
|
||||
let carriage = _mm256_set1_epi8(0x0D);
|
||||
|
||||
while pos + 32 <= data.len() {
|
||||
let chunk = _mm256_loadu_si256(data.as_ptr().add(pos) as *const _);
|
||||
|
||||
let is_space = _mm256_cmpeq_epi8(chunk, space);
|
||||
let is_tab = _mm256_cmpeq_epi8(chunk, tab);
|
||||
let is_newline = _mm256_cmpeq_epi8(chunk, newline);
|
||||
let is_carriage = _mm256_cmpeq_epi8(chunk, carriage);
|
||||
|
||||
let is_whitespace = _mm256_or_si256(
|
||||
_mm256_or_si256(is_space, is_tab),
|
||||
_mm256_or_si256(is_newline, is_carriage),
|
||||
);
|
||||
|
||||
let mask = _mm256_movemask_epi8(is_whitespace);
|
||||
|
||||
if mask != -1 {
|
||||
for i in 0..32 {
|
||||
if (mask & (1 << i)) == 0 {
|
||||
return pos + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos += 32;
|
||||
}
|
||||
|
||||
while pos < data.len() {
|
||||
match data[pos] {
|
||||
b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
pos
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn skip_whitespace(data: &[u8], mut pos: usize) -> usize {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") && data.len() - pos >= 32 {
|
||||
return unsafe { skip_whitespace_avx2(data, pos) };
|
||||
}
|
||||
}
|
||||
|
||||
while pos < data.len() {
|
||||
match data[pos] {
|
||||
b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
pos
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_pattern(haystack: &[u8], pattern: &[u8]) -> Option<usize> {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") && haystack.len() >= 32 {
|
||||
return unsafe { find_pattern_avx2(haystack, pattern) };
|
||||
}
|
||||
}
|
||||
|
||||
haystack
|
||||
.windows(pattern.len())
|
||||
.position(|window| window == pattern)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_whitespace(byte: u8) -> bool {
|
||||
matches!(byte, b' ' | b'\t' | b'\n' | b'\r')
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_name_char(byte: u8) -> bool {
|
||||
matches!(byte,
|
||||
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' |
|
||||
b'-' | b'_' | b'.' | b':'
|
||||
)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_name_start_char(byte: u8) -> bool {
|
||||
matches!(byte, b'a'..=b'z' | b'A'..=b'Z' | b'_' | b':')
|
||||
}
|
||||
|
||||
pub struct SimdScanner<'a> {
|
||||
pub data: &'a [u8],
|
||||
pub pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> SimdScanner<'a> {
|
||||
#[inline(always)]
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
Self { data, pos: 0 }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn skip_whitespace(&mut self) {
|
||||
self.pos = skip_whitespace(self.data, self.pos);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_next(&self, byte: u8) -> Option<usize> {
|
||||
memchr(byte, &self.data[self.pos..]).map(|i| self.pos + i)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn find_pattern(&self, pattern: &[u8]) -> Option<usize> {
|
||||
find_pattern(&self.data[self.pos..], pattern).map(|i| self.pos + i)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn advance(&mut self, n: usize) {
|
||||
self.pos = (self.pos + n).min(self.data.len());
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn peek(&self) -> Option<u8> {
|
||||
self.data.get(self.pos).copied()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn peek_ahead(&self, n: usize) -> Option<&'a [u8]> {
|
||||
if self.pos + n <= self.data.len() {
|
||||
Some(&self.data[self.pos..self.pos + n])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn remaining(&self) -> &'a [u8] {
|
||||
&self.data[self.pos..]
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_eof(&self) -> bool {
|
||||
self.pos >= self.data.len()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn read_until(&mut self, byte: u8) -> &'a [u8] {
|
||||
let start = self.pos;
|
||||
if let Some(offset) = memchr(byte, &self.data[self.pos..]) {
|
||||
self.pos += offset;
|
||||
&self.data[start..self.pos]
|
||||
} else {
|
||||
self.pos = self.data.len();
|
||||
&self.data[start..]
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn read_until_any(&mut self, bytes: &[u8]) -> &'a [u8] {
|
||||
let start = self.pos;
|
||||
while self.pos < self.data.len() {
|
||||
if bytes.contains(&self.data[self.pos]) {
|
||||
return &self.data[start..self.pos];
|
||||
}
|
||||
self.pos += 1;
|
||||
}
|
||||
&self.data[start..]
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn consume_if(&mut self, byte: u8) -> bool {
|
||||
if self.peek() == Some(byte) {
|
||||
self.advance(1);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn consume_while<F: Fn(u8) -> bool>(&mut self, predicate: F) -> &'a [u8] {
|
||||
let start = self.pos;
|
||||
while let Some(byte) = self.peek() {
|
||||
if predicate(byte) {
|
||||
self.advance(1);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
&self.data[start..self.pos]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_find_pattern() {
|
||||
let haystack = b"<xbrl:context id=\"c1\">";
|
||||
let pattern = b"context";
|
||||
assert_eq!(find_pattern(haystack, pattern), Some(6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_whitespace() {
|
||||
let data = b" \t\n\r<tag>";
|
||||
assert_eq!(skip_whitespace(data, 0), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scanner_read_until() {
|
||||
let data = b"hello world>";
|
||||
let mut scanner = SimdScanner::new(data);
|
||||
let result = scanner.read_until(b'>');
|
||||
assert_eq!(result, b"hello world");
|
||||
assert_eq!(scanner.peek(), Some(b'>'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scanner_consume_while() {
|
||||
let data = b"abc123";
|
||||
let mut scanner = SimdScanner::new(data);
|
||||
let result = scanner.consume_while(|b| b.is_ascii_alphabetic());
|
||||
assert_eq!(result, b"abc");
|
||||
assert_eq!(scanner.peek(), Some(b'1'));
|
||||
}
|
||||
}
|
||||
99
rust/crabrl-fork/src/simple_parser.rs
Normal file
99
rust/crabrl-fork/src/simple_parser.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
//! Simple working XBRL parser
|
||||
|
||||
use crate::{model::*, Result};
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Parser {
|
||||
#[allow(dead_code)]
|
||||
load_linkbases: bool,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn parse_str(&self, content: &str) -> Result<Document> {
|
||||
self.parse_bytes(content.as_bytes())
|
||||
}
|
||||
|
||||
pub fn parse_file<P: AsRef<Path>>(&self, path: P) -> Result<Document> {
|
||||
let content = std::fs::read(path)?;
|
||||
self.parse_bytes(&content)
|
||||
}
|
||||
|
||||
pub fn parse_bytes(&self, data: &[u8]) -> Result<Document> {
|
||||
// Simple XML parsing - just count elements for now
|
||||
let text = String::from_utf8_lossy(data);
|
||||
|
||||
// Count facts (very simplified)
|
||||
let fact_count = text.matches("<us-gaap:").count()
|
||||
+ text.matches("<dei:").count()
|
||||
+ text.matches("<ifrs:").count();
|
||||
|
||||
// Count contexts
|
||||
let context_count =
|
||||
text.matches("<context ").count() + text.matches("<xbrli:context").count();
|
||||
|
||||
// Count units
|
||||
let unit_count = text.matches("<unit ").count() + text.matches("<xbrli:unit").count();
|
||||
|
||||
// Create dummy document with approximate counts
|
||||
let mut doc = Document {
|
||||
facts: FactStorage {
|
||||
concept_ids: vec![0; fact_count],
|
||||
context_ids: vec![0; fact_count],
|
||||
unit_ids: vec![0; fact_count],
|
||||
values: vec![FactValue::Text(String::from("")); fact_count],
|
||||
decimals: vec![None; fact_count],
|
||||
ids: vec![None; fact_count],
|
||||
footnote_refs: vec![],
|
||||
},
|
||||
contexts: Vec::with_capacity(context_count),
|
||||
units: Vec::with_capacity(unit_count),
|
||||
tuples: Vec::new(),
|
||||
footnotes: Vec::new(),
|
||||
presentation_links: Vec::new(),
|
||||
calculation_links: Vec::new(),
|
||||
definition_links: Vec::new(),
|
||||
label_links: Vec::new(),
|
||||
reference_links: Vec::new(),
|
||||
custom_links: Vec::new(),
|
||||
role_types: Vec::new(),
|
||||
arcrole_types: Vec::new(),
|
||||
schemas: Vec::new(),
|
||||
dimensions: Vec::new(),
|
||||
concept_names: Vec::new(),
|
||||
};
|
||||
|
||||
// Add dummy contexts
|
||||
for i in 0..context_count {
|
||||
doc.contexts.push(Context {
|
||||
id: String::from(&format!("ctx{}", i)),
|
||||
entity: Entity {
|
||||
identifier: String::from("0000000000"),
|
||||
scheme: String::from("http://www.sec.gov/CIK"),
|
||||
segment: None,
|
||||
},
|
||||
period: Period::Instant {
|
||||
date: String::from("2023-12-31"),
|
||||
},
|
||||
scenario: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Add dummy units
|
||||
for i in 0..unit_count {
|
||||
doc.units.push(Unit {
|
||||
id: String::from(&format!("unit{}", i)),
|
||||
unit_type: UnitType::Simple(vec![Measure {
|
||||
namespace: String::from("iso4217"),
|
||||
name: String::from("USD"),
|
||||
}]),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(doc)
|
||||
}
|
||||
}
|
||||
49
rust/crabrl-fork/src/taxonomy.rs
Normal file
49
rust/crabrl-fork/src/taxonomy.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use crate::Result;
|
||||
use compact_str::CompactString;
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub struct Taxonomy {
|
||||
pub schemas: Vec<Schema>,
|
||||
pub linkbases: Vec<Linkbase>,
|
||||
}
|
||||
|
||||
pub struct Schema {
|
||||
pub target_namespace: CompactString,
|
||||
pub elements: HashMap<CompactString, Element>,
|
||||
}
|
||||
|
||||
pub struct Element {
|
||||
pub name: CompactString,
|
||||
pub element_type: CompactString,
|
||||
pub substitution_group: Option<CompactString>,
|
||||
pub period_type: Option<CompactString>,
|
||||
}
|
||||
|
||||
pub struct Linkbase {
|
||||
pub role: CompactString,
|
||||
pub arcs: Vec<Arc>,
|
||||
}
|
||||
|
||||
pub struct Arc {
|
||||
pub from: CompactString,
|
||||
pub to: CompactString,
|
||||
pub order: f32,
|
||||
pub weight: f32,
|
||||
}
|
||||
|
||||
impl Taxonomy {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
schemas: Vec::new(),
|
||||
linkbases: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_schema(&mut self, _path: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_linkbase(&mut self, _path: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
601
rust/crabrl-fork/src/validator.rs
Normal file
601
rust/crabrl-fork/src/validator.rs
Normal file
@@ -0,0 +1,601 @@
|
||||
// Comprehensive XBRL validation
|
||||
use crate::{model::*, Error, Result};
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ValidationError {
|
||||
InvalidContextRef {
|
||||
fact_index: usize,
|
||||
context_id: u16,
|
||||
},
|
||||
InvalidUnitRef {
|
||||
fact_index: usize,
|
||||
unit_id: u16,
|
||||
},
|
||||
CalculationInconsistency {
|
||||
concept: String,
|
||||
expected: f64,
|
||||
actual: f64,
|
||||
},
|
||||
InvalidDataType {
|
||||
concept: String,
|
||||
expected_type: String,
|
||||
actual_value: String,
|
||||
},
|
||||
MissingRequiredElement {
|
||||
element: String,
|
||||
},
|
||||
DuplicateId {
|
||||
id: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct XbrlValidator {
|
||||
strict_mode: bool,
|
||||
#[allow(dead_code)]
|
||||
check_calculations: bool,
|
||||
check_duplicates: bool,
|
||||
check_contexts: bool,
|
||||
check_units: bool,
|
||||
#[allow(dead_code)]
|
||||
check_datatypes: bool,
|
||||
decimal_tolerance: f64,
|
||||
}
|
||||
|
||||
impl Default for XbrlValidator {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
strict_mode: false,
|
||||
check_calculations: true,
|
||||
check_duplicates: true,
|
||||
check_contexts: true,
|
||||
check_units: true,
|
||||
check_datatypes: true,
|
||||
decimal_tolerance: 0.01,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl XbrlValidator {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn strict(mut self) -> Self {
|
||||
self.strict_mode = true;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_tolerance(mut self, tolerance: f64) -> Self {
|
||||
self.decimal_tolerance = tolerance;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn validate(&self, doc: &mut Document) -> Result<()> {
|
||||
let mut validation_errors = Vec::new();
|
||||
|
||||
// Context validation
|
||||
if self.check_contexts {
|
||||
validation_errors.extend(self.validate_contexts(doc));
|
||||
}
|
||||
|
||||
// Unit validation
|
||||
if self.check_units {
|
||||
validation_errors.extend(self.validate_units(doc));
|
||||
}
|
||||
|
||||
// Fact validation
|
||||
validation_errors.extend(self.validate_facts(doc));
|
||||
|
||||
// Duplicate detection
|
||||
if self.check_duplicates {
|
||||
validation_errors.extend(self.check_duplicate_facts(doc));
|
||||
}
|
||||
|
||||
// Return error in strict mode if any validation errors
|
||||
if self.strict_mode && !validation_errors.is_empty() {
|
||||
return Err(Error::Validation(format!(
|
||||
"Validation failed with {} errors",
|
||||
validation_errors.len()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_contexts(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
let mut context_ids = HashSet::new();
|
||||
|
||||
for ctx in &doc.contexts {
|
||||
// Check for duplicate context IDs
|
||||
if !context_ids.insert(ctx.id.clone()) {
|
||||
errors.push(ValidationError::DuplicateId {
|
||||
id: ctx.id.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate entity identifier
|
||||
if ctx.entity.identifier.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Entity identifier for context {}", ctx.id),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate period
|
||||
if let Period::Duration { start, end } = &ctx.period {
|
||||
if start > end {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("context_{}", ctx.id),
|
||||
expected_type: "valid period".to_string(),
|
||||
actual_value: format!("start {} > end {}", start, end),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
fn validate_units(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
let mut unit_ids = HashSet::new();
|
||||
|
||||
for unit in &doc.units {
|
||||
// Check for duplicate unit IDs
|
||||
if !unit_ids.insert(unit.id.clone()) {
|
||||
errors.push(ValidationError::DuplicateId {
|
||||
id: unit.id.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate measures
|
||||
match &unit.unit_type {
|
||||
UnitType::Simple(measures) => {
|
||||
if measures.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Measures for unit {}", unit.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
UnitType::Divide {
|
||||
numerator,
|
||||
denominator,
|
||||
} => {
|
||||
if numerator.is_empty() || denominator.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Numerator/denominator for unit {}", unit.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
UnitType::Multiply(measures) => {
|
||||
if measures.is_empty() {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: format!("Measures for unit {}", unit.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
fn validate_facts(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Validate fact references
|
||||
for i in 0..doc.facts.len() {
|
||||
if i < doc.facts.context_ids.len() {
|
||||
let context_id = doc.facts.context_ids[i];
|
||||
if context_id as usize >= doc.contexts.len() {
|
||||
errors.push(ValidationError::InvalidContextRef {
|
||||
fact_index: i,
|
||||
context_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if i < doc.facts.unit_ids.len() {
|
||||
let unit_id = doc.facts.unit_ids[i];
|
||||
if unit_id > 0 && unit_id as usize > doc.units.len() {
|
||||
errors.push(ValidationError::InvalidUnitRef {
|
||||
fact_index: i,
|
||||
unit_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
fn check_duplicate_facts(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
let mut fact_keys = HashSet::new();
|
||||
|
||||
for i in 0..doc.facts.len() {
|
||||
if i < doc.facts.concept_ids.len() && i < doc.facts.context_ids.len() {
|
||||
let key = (doc.facts.concept_ids[i], doc.facts.context_ids[i]);
|
||||
if !fact_keys.insert(key) && self.strict_mode {
|
||||
errors.push(ValidationError::DuplicateId {
|
||||
id: format!("Duplicate fact at index {}", i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
|
||||
// Type alias for validation rules
|
||||
type ValidationRule = Box<dyn Fn(&Document) -> Vec<ValidationError>>;
|
||||
|
||||
// Validation context and rules
|
||||
pub struct ValidationContext {
|
||||
pub profile: ValidationProfile,
|
||||
pub custom_rules: Vec<ValidationRule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ValidationProfile {
|
||||
Generic,
|
||||
SecEdgar,
|
||||
Ifrs,
|
||||
UsGaap,
|
||||
}
|
||||
|
||||
impl ValidationContext {
|
||||
pub fn new(profile: ValidationProfile) -> Self {
|
||||
Self {
|
||||
profile,
|
||||
custom_rules: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_rule<F>(&mut self, rule: F)
|
||||
where
|
||||
F: Fn(&Document) -> Vec<ValidationError> + 'static,
|
||||
{
|
||||
self.custom_rules.push(Box::new(rule));
|
||||
}
|
||||
|
||||
pub fn validate(&self, doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Apply profile-specific rules
|
||||
match self.profile {
|
||||
ValidationProfile::SecEdgar => {
|
||||
errors.extend(sec_validation_rules(doc));
|
||||
}
|
||||
ValidationProfile::Ifrs => {
|
||||
errors.extend(ifrs_validation_rules(doc));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Apply custom rules
|
||||
for rule in &self.custom_rules {
|
||||
errors.extend(rule(doc));
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
|
||||
// SEC EDGAR specific validation rules
|
||||
pub fn sec_validation_rules(doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Check for required DEI contexts
|
||||
let mut has_current_period = false;
|
||||
let mut has_entity_info = false;
|
||||
let mut has_dei_elements = false;
|
||||
|
||||
for ctx in &doc.contexts {
|
||||
// Check for current period context
|
||||
if ctx.id.contains("CurrentYear")
|
||||
|| ctx.id.contains("CurrentPeriod")
|
||||
|| ctx.id.contains("DocumentPeriodEndDate")
|
||||
{
|
||||
has_current_period = true;
|
||||
}
|
||||
|
||||
// Validate CIK format (10 digits)
|
||||
if ctx.entity.scheme.contains("sec.gov/CIK") {
|
||||
has_entity_info = true;
|
||||
let cik = &ctx.entity.identifier;
|
||||
if cik.len() != 10 || !cik.chars().all(|c| c.is_ascii_digit()) {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: "CIK".to_string(),
|
||||
expected_type: "10-digit number".to_string(),
|
||||
actual_value: cik.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for DEI elements in facts
|
||||
for i in 0..doc.facts.concept_ids.len() {
|
||||
if i < doc.concept_names.len() {
|
||||
let concept = &doc.concept_names[i];
|
||||
if concept.contains("dei:")
|
||||
|| concept.contains("DocumentType")
|
||||
|| concept.contains("EntityRegistrantName")
|
||||
{
|
||||
has_dei_elements = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Required elements validation
|
||||
if !has_current_period {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Current period context required for SEC filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_entity_info {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Entity CIK information required for SEC filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_dei_elements {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "DEI (Document and Entity Information) elements required".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate segment reporting if present
|
||||
for ctx in &doc.contexts {
|
||||
if let Some(segment) = &ctx.entity.segment {
|
||||
// Check explicit members have valid dimension references
|
||||
for member in &segment.explicit_members {
|
||||
if member.dimension.is_empty() || member.member.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("segment_{}", ctx.id),
|
||||
expected_type: "valid dimension member".to_string(),
|
||||
actual_value: format!("{}:{}", member.dimension, member.member),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate calculation consistency for monetary items
|
||||
let mut monetary_facts: Vec<(usize, f64)> = Vec::new();
|
||||
for i in 0..doc.facts.len() {
|
||||
if i < doc.facts.values.len() {
|
||||
if let FactValue::Decimal(val) = &doc.facts.values[i] {
|
||||
// Check if this is a monetary fact (has USD unit)
|
||||
if i < doc.facts.unit_ids.len() {
|
||||
let unit_id = doc.facts.unit_ids[i] as usize;
|
||||
if unit_id < doc.units.len() {
|
||||
if let UnitType::Simple(measures) = &doc.units[unit_id].unit_type {
|
||||
if measures.iter().any(|m| m.name == "USD" || m.name == "usd") {
|
||||
monetary_facts.push((i, *val));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Basic calculation validation - check for reasonable values
|
||||
for (idx, value) in monetary_facts {
|
||||
if value.is_nan() || value.is_infinite() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("fact_{}", idx),
|
||||
expected_type: "valid monetary amount".to_string(),
|
||||
actual_value: format!("{}", value),
|
||||
});
|
||||
}
|
||||
// Check for suspiciously large values (> $10 trillion)
|
||||
if value.abs() > 10_000_000_000_000.0 {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("fact_{}", idx),
|
||||
expected_type: "reasonable monetary amount".to_string(),
|
||||
actual_value: format!("${:.2}", value),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
|
||||
// IFRS specific validation rules
|
||||
pub fn ifrs_validation_rules(doc: &Document) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Check for IFRS-required contexts
|
||||
let mut has_reporting_period = false;
|
||||
let mut has_comparative_period = false;
|
||||
let mut has_entity_info = false;
|
||||
|
||||
for ctx in &doc.contexts {
|
||||
// Check for reporting period
|
||||
match &ctx.period {
|
||||
Period::Duration { start, end: _ } => {
|
||||
has_reporting_period = true;
|
||||
// IFRS requires comparative information
|
||||
if start.contains("PY")
|
||||
|| ctx.id.contains("PriorYear")
|
||||
|| ctx.id.contains("Comparative")
|
||||
{
|
||||
has_comparative_period = true;
|
||||
}
|
||||
}
|
||||
Period::Instant { date } => {
|
||||
if !date.is_empty() {
|
||||
has_reporting_period = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Validate entity information
|
||||
if !ctx.entity.identifier.is_empty() {
|
||||
has_entity_info = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Required contexts validation
|
||||
if !has_reporting_period {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Reporting period required for IFRS filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_comparative_period {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Comparative period information required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_entity_info {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Entity identification required for IFRS filing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate dimensional structure
|
||||
let mut dimension_validations = Vec::new();
|
||||
for ctx in &doc.contexts {
|
||||
// Check segment dimensions
|
||||
if let Some(segment) = &ctx.entity.segment {
|
||||
for member in &segment.explicit_members {
|
||||
// IFRS dimensions should follow specific patterns
|
||||
if !member.dimension.contains(":") {
|
||||
dimension_validations
|
||||
.push(format!("Invalid dimension format: {}", member.dimension));
|
||||
}
|
||||
if member.dimension.contains("ifrs") || member.dimension.contains("ifrs-full") {
|
||||
// Valid IFRS dimension
|
||||
if member.member.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("dimension_{}", ctx.id),
|
||||
expected_type: "valid IFRS dimension member".to_string(),
|
||||
actual_value: member.dimension.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check typed members for IFRS compliance
|
||||
for typed in &segment.typed_members {
|
||||
if typed.dimension.contains("ifrs") && typed.value.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("typed_dimension_{}", ctx.id),
|
||||
expected_type: "non-empty typed dimension value".to_string(),
|
||||
actual_value: typed.dimension.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check scenario dimensions (alternative to segment)
|
||||
if let Some(scenario) = &ctx.scenario {
|
||||
for member in &scenario.explicit_members {
|
||||
if member.dimension.contains("ifrs") && member.member.is_empty() {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("scenario_dimension_{}", ctx.id),
|
||||
expected_type: "valid IFRS scenario member".to_string(),
|
||||
actual_value: member.dimension.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for mandatory IFRS disclosures in facts
|
||||
let mut has_financial_position = false;
|
||||
let mut has_comprehensive_income = false;
|
||||
let mut has_cash_flows = false;
|
||||
let mut has_changes_in_equity = false;
|
||||
|
||||
for i in 0..doc.concept_names.len() {
|
||||
let concept = &doc.concept_names[i];
|
||||
let lower = concept.to_lowercase();
|
||||
|
||||
if lower.contains("financialposition")
|
||||
|| lower.contains("balancesheet")
|
||||
|| lower.contains("assets")
|
||||
|| lower.contains("liabilities")
|
||||
{
|
||||
has_financial_position = true;
|
||||
}
|
||||
|
||||
if lower.contains("comprehensiveincome")
|
||||
|| lower.contains("profitorloss")
|
||||
|| lower.contains("income")
|
||||
|| lower.contains("revenue")
|
||||
{
|
||||
has_comprehensive_income = true;
|
||||
}
|
||||
|
||||
if lower.contains("cashflow") || lower.contains("cashflows") {
|
||||
has_cash_flows = true;
|
||||
}
|
||||
|
||||
if lower.contains("changesinequity") || lower.contains("equity") {
|
||||
has_changes_in_equity = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate mandatory statements
|
||||
if !has_financial_position {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Financial Position required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_comprehensive_income {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Comprehensive Income required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_cash_flows {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Cash Flows required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if !has_changes_in_equity {
|
||||
errors.push(ValidationError::MissingRequiredElement {
|
||||
element: "Statement of Changes in Equity required by IFRS".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate presentation linkbase relationships
|
||||
for link in &doc.presentation_links {
|
||||
// Check order is valid (typically 1.0 to 999.0)
|
||||
if link.order < 0.0 || link.order > 1000.0 {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("presentation_link_{}_{}", link.from, link.to),
|
||||
expected_type: "valid presentation order (0-1000)".to_string(),
|
||||
actual_value: format!("{}", link.order),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Validate calculation relationships
|
||||
for link in &doc.calculation_links {
|
||||
// Check weight is reasonable (-1.0 or 1.0 typically)
|
||||
if link.weight != 1.0 && link.weight != -1.0 && link.weight != 0.0 {
|
||||
// Unusual weight, might be an error
|
||||
if link.weight.abs() > 10.0 {
|
||||
errors.push(ValidationError::InvalidDataType {
|
||||
concept: format!("calculation_link_{}_{}", link.from, link.to),
|
||||
expected_type: "reasonable calculation weight".to_string(),
|
||||
actual_value: format!("{}", link.weight),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
Reference in New Issue
Block a user