feat(taxonomy): add rust sidecar compact surface pipeline

This commit is contained in:
2026-03-12 15:23:10 -04:00
parent f2c25fb9c6
commit 58061af006
84 changed files with 19350 additions and 265 deletions

View File

@@ -1,9 +1,21 @@
import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm';
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyDimensionMember, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types';
import type {
Filing,
FinancialStatementKind,
MetricValidationResult,
NormalizationSummary,
StructuredKpiRow,
SurfaceDetailMap,
SurfaceFinancialRow,
TaxonomyDimensionMember,
TaxonomyFactRow,
TaxonomyStatementRow
} from '@/lib/types';
import { db } from '@/lib/server/db';
import {
filingTaxonomyAsset,
filingTaxonomyConcept,
filingTaxonomyContext,
filingTaxonomyFact,
filingTaxonomyMetricValidation,
filingTaxonomySnapshot
@@ -41,10 +53,19 @@ export type FilingTaxonomySnapshotRecord = {
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
parser_engine: string;
parser_version: string;
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
fiscal_pack: string | null;
periods: FilingTaxonomyPeriod[];
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
kpi_rows: StructuredKpiRow[];
derived_metrics: Filing['metrics'];
validation_result: MetricValidationResult | null;
normalization_summary: NormalizationSummary | null;
facts_count: number;
concepts_count: number;
dimensions_count: number;
@@ -52,6 +73,20 @@ export type FilingTaxonomySnapshotRecord = {
updated_at: string;
};
export type FilingTaxonomyContextRecord = {
id: number;
snapshot_id: number;
context_id: string;
entity_identifier: string | null;
entity_scheme: string | null;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
segment_json: Record<string, unknown> | null;
scenario_json: Record<string, unknown> | null;
created_at: string;
};
export type FilingTaxonomyAssetRecord = {
id: number;
snapshot_id: number;
@@ -73,8 +108,17 @@ export type FilingTaxonomyConceptRecord = {
local_name: string;
label: string | null;
is_extension: boolean;
balance: string | null;
period_type: string | null;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
presentation_order: number | null;
presentation_depth: number | null;
parent_concept_key: string | null;
@@ -89,11 +133,20 @@ export type FilingTaxonomyFactRecord = {
qname: string;
namespace_uri: string;
local_name: string;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
context_id: string;
unit: string | null;
decimals: string | null;
precision: string | null;
nil: boolean;
value_num: number;
period_start: string | null;
period_end: string | null;
@@ -130,13 +183,32 @@ export type UpsertFilingTaxonomySnapshotInput = {
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
parser_engine: string;
parser_version: string;
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
fiscal_pack: string | null;
periods: FilingTaxonomyPeriod[];
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
kpi_rows: StructuredKpiRow[];
derived_metrics: Filing['metrics'];
validation_result: MetricValidationResult | null;
normalization_summary: NormalizationSummary | null;
facts_count: number;
concepts_count: number;
dimensions_count: number;
contexts: Array<{
context_id: string;
entity_identifier: string | null;
entity_scheme: string | null;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
segment_json: Record<string, unknown> | null;
scenario_json: Record<string, unknown> | null;
}>;
assets: Array<{
asset_type: FilingTaxonomyAssetType;
name: string;
@@ -152,8 +224,17 @@ export type UpsertFilingTaxonomySnapshotInput = {
local_name: string;
label: string | null;
is_extension: boolean;
balance: string | null;
period_type: string | null;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
presentation_order: number | null;
presentation_depth: number | null;
parent_concept_key: string | null;
@@ -164,11 +245,20 @@ export type UpsertFilingTaxonomySnapshotInput = {
qname: string;
namespace_uri: string;
local_name: string;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
context_id: string;
unit: string | null;
decimals: string | null;
precision: string | null;
nil: boolean;
value_num: number;
period_start: string | null;
period_end: string | null;
@@ -229,7 +319,29 @@ function emptyStatementRows(): Record<FinancialStatementKind, TaxonomyStatementR
};
}
function emptySurfaceRows(): Record<FinancialStatementKind, SurfaceFinancialRow[]> {
return {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
};
}
function emptyDetailRows(): Record<FinancialStatementKind, SurfaceDetailMap> {
return {
income: {},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {}
};
}
function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord {
const faithfulRows = row.faithful_rows ?? row.statement_rows ?? emptyStatementRows();
return {
id: row.id,
filing_id: row.filing_id,
@@ -239,10 +351,19 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili
parse_status: row.parse_status,
parse_error: row.parse_error,
source: row.source,
parser_engine: row.parser_engine,
parser_version: row.parser_version,
taxonomy_regime: row.taxonomy_regime,
fiscal_pack: row.fiscal_pack,
periods: row.periods ?? [],
statement_rows: row.statement_rows ?? emptyStatementRows(),
faithful_rows: faithfulRows,
statement_rows: faithfulRows,
surface_rows: row.surface_rows ?? emptySurfaceRows(),
detail_rows: row.detail_rows ?? emptyDetailRows(),
kpi_rows: row.kpi_rows ?? [],
derived_metrics: row.derived_metrics ?? null,
validation_result: row.validation_result ?? null,
normalization_summary: row.normalization_summary ?? null,
facts_count: row.facts_count,
concepts_count: row.concepts_count,
dimensions_count: row.dimensions_count,
@@ -251,6 +372,22 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili
};
}
function toContextRecord(row: typeof filingTaxonomyContext.$inferSelect): FilingTaxonomyContextRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
context_id: row.context_id,
entity_identifier: row.entity_identifier,
entity_scheme: row.entity_scheme,
period_start: row.period_start,
period_end: row.period_end,
period_instant: row.period_instant,
segment_json: row.segment_json ?? null,
scenario_json: row.scenario_json ?? null,
created_at: row.created_at
};
}
function toAssetRecord(row: typeof filingTaxonomyAsset.$inferSelect): FilingTaxonomyAssetRecord {
return {
id: row.id,
@@ -275,8 +412,17 @@ function toConceptRecord(row: typeof filingTaxonomyConcept.$inferSelect): Filing
local_name: row.local_name,
label: row.label,
is_extension: row.is_extension,
balance: row.balance,
period_type: row.period_type,
data_type: row.data_type,
statement_kind: row.statement_kind ?? null,
role_uri: row.role_uri,
authoritative_concept_key: row.authoritative_concept_key,
mapping_method: row.mapping_method,
surface_key: row.surface_key,
detail_parent_surface_key: row.detail_parent_surface_key,
kpi_key: row.kpi_key,
residual_flag: row.residual_flag,
presentation_order: asNumber(row.presentation_order),
presentation_depth: row.presentation_depth,
parent_concept_key: row.parent_concept_key,
@@ -298,11 +444,20 @@ function toFactRecord(row: typeof filingTaxonomyFact.$inferSelect): FilingTaxono
qname: row.qname,
namespace_uri: row.namespace_uri,
local_name: row.local_name,
data_type: row.data_type,
statement_kind: row.statement_kind ?? null,
role_uri: row.role_uri,
authoritative_concept_key: row.authoritative_concept_key,
mapping_method: row.mapping_method,
surface_key: row.surface_key,
detail_parent_surface_key: row.detail_parent_surface_key,
kpi_key: row.kpi_key,
residual_flag: row.residual_flag,
context_id: row.context_id,
unit: row.unit,
decimals: row.decimals,
precision: row.precision,
nil: row.nil,
value_num: value,
period_start: row.period_start,
period_end: row.period_end,
@@ -354,6 +509,16 @@ export async function listFilingTaxonomyAssets(snapshotId: number) {
return rows.map(toAssetRecord);
}
export async function listFilingTaxonomyContexts(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyContext)
.where(eq(filingTaxonomyContext.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyContext.id));
return rows.map(toContextRecord);
}
export async function listFilingTaxonomyConcepts(snapshotId: number) {
const rows = await db
.select()
@@ -397,10 +562,19 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
parse_status: input.parse_status,
parse_error: input.parse_error,
source: input.source,
parser_engine: input.parser_engine,
parser_version: input.parser_version,
taxonomy_regime: input.taxonomy_regime,
fiscal_pack: input.fiscal_pack,
periods: input.periods,
faithful_rows: input.faithful_rows,
statement_rows: input.statement_rows,
surface_rows: input.surface_rows,
detail_rows: input.detail_rows,
kpi_rows: input.kpi_rows,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
normalization_summary: input.normalization_summary,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
@@ -416,10 +590,19 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
parse_status: input.parse_status,
parse_error: input.parse_error,
source: input.source,
parser_engine: input.parser_engine,
parser_version: input.parser_version,
taxonomy_regime: input.taxonomy_regime,
fiscal_pack: input.fiscal_pack,
periods: input.periods,
faithful_rows: input.faithful_rows,
statement_rows: input.statement_rows,
surface_rows: input.surface_rows,
detail_rows: input.detail_rows,
kpi_rows: input.kpi_rows,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
normalization_summary: input.normalization_summary,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
@@ -431,10 +614,26 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
const snapshotId = saved.id;
await db.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
await db.delete(filingTaxonomyContext).where(eq(filingTaxonomyContext.snapshot_id, snapshotId));
await db.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
await db.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
await db.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
if (input.contexts.length > 0) {
await db.insert(filingTaxonomyContext).values(input.contexts.map((context) => ({
snapshot_id: snapshotId,
context_id: context.context_id,
entity_identifier: context.entity_identifier,
entity_scheme: context.entity_scheme,
period_start: context.period_start,
period_end: context.period_end,
period_instant: context.period_instant,
segment_json: context.segment_json,
scenario_json: context.scenario_json,
created_at: now
})));
}
if (input.assets.length > 0) {
await db.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({
snapshot_id: snapshotId,
@@ -457,8 +656,17 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
local_name: concept.local_name,
label: concept.label,
is_extension: concept.is_extension,
balance: concept.balance,
period_type: concept.period_type,
data_type: concept.data_type,
statement_kind: concept.statement_kind,
role_uri: concept.role_uri,
authoritative_concept_key: concept.authoritative_concept_key,
mapping_method: concept.mapping_method,
surface_key: concept.surface_key,
detail_parent_surface_key: concept.detail_parent_surface_key,
kpi_key: concept.kpi_key,
residual_flag: concept.residual_flag,
presentation_order: asNumericText(concept.presentation_order),
presentation_depth: concept.presentation_depth,
parent_concept_key: concept.parent_concept_key,
@@ -474,11 +682,20 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
qname: fact.qname,
namespace_uri: fact.namespace_uri,
local_name: fact.local_name,
data_type: fact.data_type,
statement_kind: fact.statement_kind,
role_uri: fact.role_uri,
authoritative_concept_key: fact.authoritative_concept_key,
mapping_method: fact.mapping_method,
surface_key: fact.surface_key,
detail_parent_surface_key: fact.detail_parent_surface_key,
kpi_key: fact.kpi_key,
residual_flag: fact.residual_flag,
context_id: fact.context_id,
unit: fact.unit,
decimals: fact.decimals,
precision: fact.precision,
nil: fact.nil,
value_num: String(fact.value_num),
period_start: fact.period_start,
period_end: fact.period_end,