feat(taxonomy): add rust sidecar compact surface pipeline

This commit is contained in:
2026-03-12 15:23:10 -04:00
parent f2c25fb9c6
commit 58061af006
84 changed files with 19350 additions and 265 deletions

View File

@@ -169,6 +169,67 @@ type TaxonomyStatementBundle = {
statements: Record<FinancialStatementKind, TaxonomyStatementSnapshotRow[]>;
};
type TaxonomySurfaceSnapshotRow = {
key: string;
label: string;
category: string;
templateSection?: string;
order: number;
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
values: Record<string, number | null>;
sourceConcepts: string[];
sourceRowKeys: string[];
sourceFactIds: number[];
formulaKey: string | null;
hasDimensions: boolean;
resolvedSourceRowKeys: Record<string, string | null>;
statement?: 'income' | 'balance' | 'cash_flow';
detailCount?: number;
};
type TaxonomyDetailSnapshotRow = {
key: string;
parentSurfaceKey: string;
label: string;
conceptKey: string;
qname: string;
namespaceUri: string;
localName: string;
unit: string | null;
values: Record<string, number | null>;
sourceFactIds: number[];
isExtension: boolean;
dimensionsSummary: string[];
residualFlag: boolean;
};
type TaxonomyDetailStatementMap = Record<string, TaxonomyDetailSnapshotRow[]>;
type StructuredKpiSnapshotRow = {
key: string;
label: string;
category: string;
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
order: number;
segment: string | null;
axis: string | null;
member: string | null;
values: Record<string, number | null>;
sourceConcepts: string[];
sourceFactIds: number[];
provenanceType: 'taxonomy' | 'structured_note';
hasDimensions: boolean;
};
type TaxonomyNormalizationSummary = {
surfaceRowCount: number;
detailRowCount: number;
kpiRowCount: number;
unmappedRowCount: number;
materialUnmappedRowCount: number;
warnings: string[];
};
type TaxonomyMetricValidationCheck = {
metricKey: keyof FilingMetrics;
taxonomyValue: number | null;
@@ -380,10 +441,19 @@ export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', {
parse_status: text('parse_status').$type<TaxonomyParseStatus>().notNull(),
parse_error: text('parse_error'),
source: text('source').$type<'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback'>().notNull(),
parser_engine: text('parser_engine').notNull().default('fiscal-xbrl'),
parser_version: text('parser_version').notNull().default('unknown'),
taxonomy_regime: text('taxonomy_regime').$type<'us-gaap' | 'ifrs-full' | 'unknown'>().notNull().default('unknown'),
fiscal_pack: text('fiscal_pack'),
periods: text('periods', { mode: 'json' }).$type<FilingStatementPeriod[]>(),
faithful_rows: text('faithful_rows', { mode: 'json' }).$type<TaxonomyStatementBundle['statements'] | null>(),
statement_rows: text('statement_rows', { mode: 'json' }).$type<TaxonomyStatementBundle['statements'] | null>(),
surface_rows: text('surface_rows', { mode: 'json' }).$type<Record<FinancialStatementKind, TaxonomySurfaceSnapshotRow[]> | null>(),
detail_rows: text('detail_rows', { mode: 'json' }).$type<Record<FinancialStatementKind, TaxonomyDetailStatementMap> | null>(),
kpi_rows: text('kpi_rows', { mode: 'json' }).$type<StructuredKpiSnapshotRow[] | null>(),
derived_metrics: text('derived_metrics', { mode: 'json' }).$type<FilingMetrics | null>(),
validation_result: text('validation_result', { mode: 'json' }).$type<TaxonomyMetricValidationResult | null>(),
normalization_summary: text('normalization_summary', { mode: 'json' }).$type<TaxonomyNormalizationSummary | null>(),
facts_count: integer('facts_count').notNull().default(0),
concepts_count: integer('concepts_count').notNull().default(0),
dimensions_count: integer('dimensions_count').notNull().default(0),
@@ -395,6 +465,23 @@ export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', {
filingTaxonomySnapshotStatusIndex: index('filing_taxonomy_snapshot_status_idx').on(table.parse_status)
}));
export const filingTaxonomyContext = sqliteTable('filing_taxonomy_context', {
id: integer('id').primaryKey({ autoIncrement: true }),
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
context_id: text('context_id').notNull(),
entity_identifier: text('entity_identifier'),
entity_scheme: text('entity_scheme'),
period_start: text('period_start'),
period_end: text('period_end'),
period_instant: text('period_instant'),
segment_json: text('segment_json', { mode: 'json' }).$type<Record<string, unknown> | null>(),
scenario_json: text('scenario_json', { mode: 'json' }).$type<Record<string, unknown> | null>(),
created_at: text('created_at').notNull()
}, (table) => ({
filingTaxonomyContextSnapshotIndex: index('filing_taxonomy_context_snapshot_idx').on(table.snapshot_id),
filingTaxonomyContextUnique: uniqueIndex('filing_taxonomy_context_uidx').on(table.snapshot_id, table.context_id)
}));
export const filingTaxonomyAsset = sqliteTable('filing_taxonomy_asset', {
id: integer('id').primaryKey({ autoIncrement: true }),
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
@@ -419,8 +506,17 @@ export const filingTaxonomyConcept = sqliteTable('filing_taxonomy_concept', {
local_name: text('local_name').notNull(),
label: text('label'),
is_extension: integer('is_extension', { mode: 'boolean' }).notNull().default(false),
balance: text('balance'),
period_type: text('period_type'),
data_type: text('data_type'),
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
role_uri: text('role_uri'),
authoritative_concept_key: text('authoritative_concept_key'),
mapping_method: text('mapping_method'),
surface_key: text('surface_key'),
detail_parent_surface_key: text('detail_parent_surface_key'),
kpi_key: text('kpi_key'),
residual_flag: integer('residual_flag', { mode: 'boolean' }).notNull().default(false),
presentation_order: numeric('presentation_order'),
presentation_depth: integer('presentation_depth'),
parent_concept_key: text('parent_concept_key'),
@@ -444,11 +540,20 @@ export const filingTaxonomyFact = sqliteTable('filing_taxonomy_fact', {
qname: text('qname').notNull(),
namespace_uri: text('namespace_uri').notNull(),
local_name: text('local_name').notNull(),
data_type: text('data_type'),
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
role_uri: text('role_uri'),
authoritative_concept_key: text('authoritative_concept_key'),
mapping_method: text('mapping_method'),
surface_key: text('surface_key'),
detail_parent_surface_key: text('detail_parent_surface_key'),
kpi_key: text('kpi_key'),
residual_flag: integer('residual_flag', { mode: 'boolean' }).notNull().default(false),
context_id: text('context_id').notNull(),
unit: text('unit'),
decimals: text('decimals'),
precision: text('precision'),
nil: integer('nil', { mode: 'boolean' }).notNull().default(false),
value_num: numeric('value_num').notNull(),
period_start: text('period_start'),
period_end: text('period_end'),