Fix P0 issues in financial ingestion architecture
- Wrap snapshot updates in transactions with error context for each child table - Add sidecar retry with exponential backoff (3 attempts, 2s base, 10s max, 30% jitter) - Add HTTP timeout (30s per request) and SEC rate limiting (10 req/s) in Rust - Add XBRL validation with status reporting (checks root element, tag balance)
This commit is contained in:
@@ -962,10 +962,8 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
|
||||
const now = new Date().toISOString();
|
||||
const normalized = normalizeFilingTaxonomySnapshotPayload(input);
|
||||
|
||||
const [saved] = await withFinancialIngestionSchemaRetry({
|
||||
client: getSqliteClient(),
|
||||
context: 'filing-taxonomy-snapshot-upsert',
|
||||
operation: async () => await db
|
||||
return db.transaction(async (tx) => {
|
||||
const [saved] = await tx
|
||||
.insert(filingTaxonomySnapshot)
|
||||
.values({
|
||||
filing_id: input.filing_id,
|
||||
@@ -1022,125 +1020,149 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
|
||||
updated_at: now
|
||||
}
|
||||
})
|
||||
.returning()
|
||||
.returning();
|
||||
|
||||
const snapshotId = saved.id;
|
||||
|
||||
try {
|
||||
await tx.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
|
||||
await tx.delete(filingTaxonomyContext).where(eq(filingTaxonomyContext.snapshot_id, snapshotId));
|
||||
await tx.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
|
||||
await tx.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
|
||||
await tx.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to delete child records for snapshot ${snapshotId}: ${error}`);
|
||||
}
|
||||
|
||||
if (input.contexts.length > 0) {
|
||||
try {
|
||||
await tx.insert(filingTaxonomyContext).values(input.contexts.map((context) => ({
|
||||
snapshot_id: snapshotId,
|
||||
context_id: context.context_id,
|
||||
entity_identifier: context.entity_identifier,
|
||||
entity_scheme: context.entity_scheme,
|
||||
period_start: context.period_start,
|
||||
period_end: context.period_end,
|
||||
period_instant: context.period_instant,
|
||||
segment_json: context.segment_json,
|
||||
scenario_json: context.scenario_json,
|
||||
created_at: now
|
||||
})));
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to insert ${input.contexts.length} contexts for snapshot ${snapshotId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (input.assets.length > 0) {
|
||||
try {
|
||||
await tx.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({
|
||||
snapshot_id: snapshotId,
|
||||
asset_type: asset.asset_type,
|
||||
name: asset.name,
|
||||
url: asset.url,
|
||||
size_bytes: asset.size_bytes,
|
||||
score: asNumericText(asset.score),
|
||||
is_selected: asset.is_selected,
|
||||
created_at: now
|
||||
})));
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to insert ${input.assets.length} assets for snapshot ${snapshotId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (input.concepts.length > 0) {
|
||||
try {
|
||||
await tx.insert(filingTaxonomyConcept).values(input.concepts.map((concept) => ({
|
||||
snapshot_id: snapshotId,
|
||||
concept_key: concept.concept_key,
|
||||
qname: concept.qname,
|
||||
namespace_uri: concept.namespace_uri,
|
||||
local_name: concept.local_name,
|
||||
label: concept.label,
|
||||
is_extension: concept.is_extension,
|
||||
balance: concept.balance,
|
||||
period_type: concept.period_type,
|
||||
data_type: concept.data_type,
|
||||
statement_kind: concept.statement_kind,
|
||||
role_uri: concept.role_uri,
|
||||
authoritative_concept_key: concept.authoritative_concept_key,
|
||||
mapping_method: concept.mapping_method,
|
||||
surface_key: concept.surface_key,
|
||||
detail_parent_surface_key: concept.detail_parent_surface_key,
|
||||
kpi_key: concept.kpi_key,
|
||||
residual_flag: concept.residual_flag,
|
||||
presentation_order: asNumericText(concept.presentation_order),
|
||||
presentation_depth: concept.presentation_depth,
|
||||
parent_concept_key: concept.parent_concept_key,
|
||||
is_abstract: concept.is_abstract,
|
||||
created_at: now
|
||||
})));
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to insert ${input.concepts.length} concepts for snapshot ${snapshotId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (input.facts.length > 0) {
|
||||
try {
|
||||
await tx.insert(filingTaxonomyFact).values(input.facts.map((fact) => ({
|
||||
snapshot_id: snapshotId,
|
||||
concept_key: fact.concept_key,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespace_uri,
|
||||
local_name: fact.local_name,
|
||||
data_type: fact.data_type,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
authoritative_concept_key: fact.authoritative_concept_key,
|
||||
mapping_method: fact.mapping_method,
|
||||
surface_key: fact.surface_key,
|
||||
detail_parent_surface_key: fact.detail_parent_surface_key,
|
||||
kpi_key: fact.kpi_key,
|
||||
residual_flag: fact.residual_flag,
|
||||
context_id: fact.context_id,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
precision: fact.precision,
|
||||
nil: fact.nil,
|
||||
value_num: String(fact.value_num),
|
||||
period_start: fact.period_start,
|
||||
period_end: fact.period_end,
|
||||
period_instant: fact.period_instant,
|
||||
dimensions: fact.dimensions,
|
||||
is_dimensionless: fact.is_dimensionless,
|
||||
source_file: fact.source_file,
|
||||
created_at: now
|
||||
})));
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to insert ${input.facts.length} facts for snapshot ${snapshotId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (input.metric_validations.length > 0) {
|
||||
try {
|
||||
await tx.insert(filingTaxonomyMetricValidation).values(input.metric_validations.map((check) => ({
|
||||
snapshot_id: snapshotId,
|
||||
metric_key: check.metric_key,
|
||||
taxonomy_value: asNumericText(check.taxonomy_value),
|
||||
llm_value: asNumericText(check.llm_value),
|
||||
absolute_diff: asNumericText(check.absolute_diff),
|
||||
relative_diff: asNumericText(check.relative_diff),
|
||||
status: check.status,
|
||||
evidence_pages: check.evidence_pages,
|
||||
pdf_url: check.pdf_url,
|
||||
provider: check.provider,
|
||||
model: check.model,
|
||||
error: check.error,
|
||||
created_at: now,
|
||||
updated_at: now
|
||||
})));
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to insert ${input.metric_validations.length} metric validations for snapshot ${snapshotId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
return toSnapshotRecord(saved);
|
||||
});
|
||||
|
||||
const snapshotId = saved.id;
|
||||
|
||||
await db.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyContext).where(eq(filingTaxonomyContext.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
|
||||
|
||||
if (input.contexts.length > 0) {
|
||||
await db.insert(filingTaxonomyContext).values(input.contexts.map((context) => ({
|
||||
snapshot_id: snapshotId,
|
||||
context_id: context.context_id,
|
||||
entity_identifier: context.entity_identifier,
|
||||
entity_scheme: context.entity_scheme,
|
||||
period_start: context.period_start,
|
||||
period_end: context.period_end,
|
||||
period_instant: context.period_instant,
|
||||
segment_json: context.segment_json,
|
||||
scenario_json: context.scenario_json,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.assets.length > 0) {
|
||||
await db.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({
|
||||
snapshot_id: snapshotId,
|
||||
asset_type: asset.asset_type,
|
||||
name: asset.name,
|
||||
url: asset.url,
|
||||
size_bytes: asset.size_bytes,
|
||||
score: asNumericText(asset.score),
|
||||
is_selected: asset.is_selected,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.concepts.length > 0) {
|
||||
await db.insert(filingTaxonomyConcept).values(input.concepts.map((concept) => ({
|
||||
snapshot_id: snapshotId,
|
||||
concept_key: concept.concept_key,
|
||||
qname: concept.qname,
|
||||
namespace_uri: concept.namespace_uri,
|
||||
local_name: concept.local_name,
|
||||
label: concept.label,
|
||||
is_extension: concept.is_extension,
|
||||
balance: concept.balance,
|
||||
period_type: concept.period_type,
|
||||
data_type: concept.data_type,
|
||||
statement_kind: concept.statement_kind,
|
||||
role_uri: concept.role_uri,
|
||||
authoritative_concept_key: concept.authoritative_concept_key,
|
||||
mapping_method: concept.mapping_method,
|
||||
surface_key: concept.surface_key,
|
||||
detail_parent_surface_key: concept.detail_parent_surface_key,
|
||||
kpi_key: concept.kpi_key,
|
||||
residual_flag: concept.residual_flag,
|
||||
presentation_order: asNumericText(concept.presentation_order),
|
||||
presentation_depth: concept.presentation_depth,
|
||||
parent_concept_key: concept.parent_concept_key,
|
||||
is_abstract: concept.is_abstract,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.facts.length > 0) {
|
||||
await db.insert(filingTaxonomyFact).values(input.facts.map((fact) => ({
|
||||
snapshot_id: snapshotId,
|
||||
concept_key: fact.concept_key,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespace_uri,
|
||||
local_name: fact.local_name,
|
||||
data_type: fact.data_type,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
authoritative_concept_key: fact.authoritative_concept_key,
|
||||
mapping_method: fact.mapping_method,
|
||||
surface_key: fact.surface_key,
|
||||
detail_parent_surface_key: fact.detail_parent_surface_key,
|
||||
kpi_key: fact.kpi_key,
|
||||
residual_flag: fact.residual_flag,
|
||||
context_id: fact.context_id,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
precision: fact.precision,
|
||||
nil: fact.nil,
|
||||
value_num: String(fact.value_num),
|
||||
period_start: fact.period_start,
|
||||
period_end: fact.period_end,
|
||||
period_instant: fact.period_instant,
|
||||
dimensions: fact.dimensions,
|
||||
is_dimensionless: fact.is_dimensionless,
|
||||
source_file: fact.source_file,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.metric_validations.length > 0) {
|
||||
await db.insert(filingTaxonomyMetricValidation).values(input.metric_validations.map((check) => ({
|
||||
snapshot_id: snapshotId,
|
||||
metric_key: check.metric_key,
|
||||
taxonomy_value: asNumericText(check.taxonomy_value),
|
||||
llm_value: asNumericText(check.llm_value),
|
||||
absolute_diff: asNumericText(check.absolute_diff),
|
||||
relative_diff: asNumericText(check.relative_diff),
|
||||
status: check.status,
|
||||
evidence_pages: check.evidence_pages,
|
||||
pdf_url: check.pdf_url,
|
||||
provider: check.provider,
|
||||
model: check.model,
|
||||
error: check.error,
|
||||
created_at: now,
|
||||
updated_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
return toSnapshotRecord(saved);
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomySnapshotsByTicker(input: {
|
||||
|
||||
Reference in New Issue
Block a user