Fix P0 issues in financial ingestion architecture
Some checks failed
PR Checks / typecheck-and-build (pull_request) Has been cancelled
Taxonomy Sidecar / taxonomy-sidecar (pull_request) Has been cancelled

- Wrap snapshot updates in transactions with error context for each child table
- Add sidecar retry with exponential backoff (3 attempts, 2s base, 10s max, 30% jitter)
- Add HTTP timeout (30s per request) and SEC rate limiting (10 req/s) in Rust
- Add XBRL validation with status reporting (checks root element, tag balance)
This commit is contained in:
2026-03-15 16:51:32 -04:00
parent edf1cfb421
commit 4313058d65
9 changed files with 468 additions and 142 deletions

View File

@@ -4,6 +4,8 @@ use regex::Regex;
use reqwest::blocking::Client;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::sync::Mutex;
use std::time::{Duration, Instant};
mod kpi_mapper;
mod metrics;
@@ -20,6 +22,39 @@ use crabrl as _;
pub const PARSER_ENGINE: &str = "fiscal-xbrl";
pub const PARSER_VERSION: &str = env!("CARGO_PKG_VERSION");
const DEFAULT_SEC_RATE_LIMIT_MS: u64 = 100;
const HTTP_TIMEOUT_SECS: u64 = 30;
static RATE_LIMITER: Lazy<Mutex<Instant>> = Lazy::new(|| Mutex::new(Instant::now()));
fn sec_rate_limit_delay() -> u64 {
std::env::var("SEC_RATE_LIMIT_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_SEC_RATE_LIMIT_MS)
}
fn rate_limited_fetch<T, F>(fetch_fn: F) -> Result<T>
where
F: FnOnce() -> Result<T>,
{
let delay_ms = sec_rate_limit_delay();
{
let mut last_request = RATE_LIMITER.lock().unwrap();
let elapsed = last_request.elapsed();
let min_delay = Duration::from_millis(delay_ms);
if elapsed < min_delay {
std::thread::sleep(min_delay - elapsed);
}
*last_request = Instant::now();
}
fetch_fn()
}
static CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)</(?:[a-z0-9_\-]+:)?context>"#).unwrap()
});
@@ -118,6 +153,7 @@ pub struct HydrateFilingResponse {
pub contexts: Vec<ContextOutput>,
pub derived_metrics: FilingMetrics,
pub validation_result: ValidationResultOutput,
pub xbrl_validation: XbrlValidationResult,
pub facts_count: usize,
pub concepts_count: usize,
pub dimensions_count: usize,
@@ -481,6 +517,7 @@ struct PresentationNode {
pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingResponse> {
let client = Client::builder()
.user_agent("Fiscal Clone <support@fiscal.local>")
.timeout(Duration::from_secs(120))
.build()
.context("unable to build HTTP client")?;
@@ -521,7 +558,11 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
computed_definitions: vec![],
contexts: vec![],
derived_metrics: FilingMetrics::default(),
validation_result,
validation_result: validation_result.clone(),
xbrl_validation: XbrlValidationResult {
status: "error".to_string(),
message: Some("No XBRL instance found".to_string()),
},
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
@@ -542,6 +583,17 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
let instance_text = fetch_text(&client, &instance_asset.url)
.context("fetch request failed for XBRL instance")?;
let xbrl_validation = validate_xbrl_structure(&instance_text, Some(&instance_asset.name));
if xbrl_validation.status != "passed" {
eprintln!(
"[xbrl] Validation {} for {:?}: {}",
xbrl_validation.status,
instance_asset.name,
xbrl_validation.message.as_deref().unwrap_or("unknown")
);
}
let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone()));
let mut label_by_concept = HashMap::new();
@@ -678,6 +730,7 @@ pub fn hydrate_filing(input: HydrateFilingRequest) -> Result<HydrateFilingRespon
contexts: parsed_instance.contexts,
derived_metrics: metrics::derive_metrics(&facts),
validation_result,
xbrl_validation,
facts_count: facts.len(),
concepts_count: concepts.len(),
dimensions_count: facts
@@ -963,29 +1016,78 @@ fn parse_size(value: Option<&serde_json::Value>) -> Option<i64> {
}
fn fetch_text(client: &Client, url: &str) -> Result<String> {
let response = client
.get(url)
.send()
.with_context(|| format!("request failed for {url}"))?;
if !response.status().is_success() {
return Err(anyhow!("request failed for {url} ({})", response.status()));
}
response
.text()
.with_context(|| format!("unable to read response body for {url}"))
rate_limited_fetch(|| {
let response = client
.get(url)
.timeout(Duration::from_secs(HTTP_TIMEOUT_SECS))
.send()
.with_context(|| format!("request failed for {url}"))?;
if !response.status().is_success() {
return Err(anyhow!("request failed for {url} ({})", response.status()));
}
response
.text()
.with_context(|| format!("unable to read response body for {url}"))
})
}
fn fetch_json<T: for<'de> Deserialize<'de>>(client: &Client, url: &str) -> Result<T> {
let response = client
.get(url)
.send()
.with_context(|| format!("request failed for {url}"))?;
if !response.status().is_success() {
return Err(anyhow!("request failed for {url} ({})", response.status()));
rate_limited_fetch(|| {
let response = client
.get(url)
.timeout(Duration::from_secs(HTTP_TIMEOUT_SECS))
.send()
.with_context(|| format!("request failed for {url}"))?;
if !response.status().is_success() {
return Err(anyhow!("request failed for {url} ({})", response.status()));
}
response
.json::<T>()
.with_context(|| format!("unable to parse JSON response for {url}"))
})
}
#[derive(Debug, Clone, Serialize)]
pub struct XbrlValidationResult {
pub status: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
}
fn validate_xbrl_structure(xml: &str, source_file: Option<&str>) -> XbrlValidationResult {
if xml.is_empty() {
return XbrlValidationResult {
status: "error".to_string(),
message: Some("XBRL content is empty".to_string()),
};
}
if !xml.contains("<xbrl") && !xml.contains("<xbrli:xbrl") {
return XbrlValidationResult {
status: "error".to_string(),
message: Some("Invalid XBRL: missing root element".to_string()),
};
}
let open_count = xml.matches('<').count();
let close_count = xml.matches('>').count();
if open_count != close_count {
return XbrlValidationResult {
status: "warning".to_string(),
message: Some(format!(
"Malformed XML detected in {:?} ({} open, {} close tags)",
source_file.unwrap_or("unknown"),
open_count,
close_count
)),
};
}
XbrlValidationResult {
status: "passed".to_string(),
message: None,
}
response
.json::<T>()
.with_context(|| format!("unable to parse JSON response for {url}"))
}
struct ParsedInstance {