feat(taxonomy): add rust sidecar compact surface pipeline

This commit is contained in:
2026-03-12 15:23:10 -04:00
parent f2c25fb9c6
commit 58061af006
84 changed files with 19350 additions and 265 deletions

View File

@@ -7,6 +7,7 @@ import type { FilingTaxonomySnapshotRecord } from './repos/filing-taxonomy';
import type {
FinancialStatementKind,
FinancialStatementPeriod,
StructuredKpiRow,
TaxonomyFactRow,
TaxonomyStatementRow
} from '@/lib/types';
@@ -67,6 +68,13 @@ function createSnapshot(input: {
statement: input.statement,
values: Object.fromEntries(input.periods.map((period, index) => [period.id, 100 + index]))
});
const faithfulRows = {
income: input.statement === 'income' ? (input.rows ?? [defaultRow]) : [],
balance: input.statement === 'balance' ? (input.rows ?? [{ ...defaultRow, statement: 'balance' }]) : [],
cash_flow: input.statement === 'cash_flow' ? (input.rows ?? [{ ...defaultRow, statement: 'cash_flow' }]) : [],
equity: [],
comprehensive_income: []
} satisfies FilingTaxonomySnapshotRecord['faithful_rows'];
return {
id: input.filingId,
@@ -77,6 +85,10 @@ function createSnapshot(input: {
parse_status: 'ready',
parse_error: null,
source: 'xbrl_instance',
parser_engine: 'fiscal-xbrl',
parser_version: '0.1.0',
taxonomy_regime: 'us-gaap',
fiscal_pack: 'core',
periods: input.periods.map((period) => ({
id: period.id,
filingId: input.filingId,
@@ -87,15 +99,26 @@ function createSnapshot(input: {
filingType: input.filingType,
periodLabel: period.periodLabel
})),
statement_rows: {
income: input.statement === 'income' ? (input.rows ?? [defaultRow]) : [],
balance: input.statement === 'balance' ? (input.rows ?? [{ ...defaultRow, statement: 'balance' }]) : [],
cash_flow: input.statement === 'cash_flow' ? (input.rows ?? [{ ...defaultRow, statement: 'cash_flow' }]) : [],
faithful_rows: faithfulRows,
statement_rows: faithfulRows,
surface_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
detail_rows: {
income: {},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {}
},
kpi_rows: [],
derived_metrics: null,
validation_result: null,
normalization_summary: null,
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
@@ -203,6 +226,30 @@ function createFact(input: {
};
}
function createKpiRow(input: {
key: string;
values: Record<string, number | null>;
provenanceType?: StructuredKpiRow['provenanceType'];
sourceConcepts?: string[];
sourceFactIds?: number[];
}): StructuredKpiRow {
return {
key: input.key,
label: input.key,
category: 'operating_kpi',
unit: 'percent',
order: 10,
segment: null,
axis: null,
member: null,
values: input.values,
sourceConcepts: input.sourceConcepts ?? [],
sourceFactIds: input.sourceFactIds ?? [],
provenanceType: input.provenanceType ?? 'taxonomy',
hasDimensions: false
};
}
function findRow(rows: ReturnType<typeof __financialTaxonomyInternals.buildStandardizedRows>, key: string) {
const row = rows.find((entry) => entry.key === key);
expect(row).toBeDefined();
@@ -1500,4 +1547,174 @@ describe('financial taxonomy internals', () => {
expect(findStandardizedResponseRow(cash, 'changes_accrued_expenses').values[cashPeriod2025]).toBe(21_525_000);
expect(findStandardizedResponseRow(cash, 'other_adjustments').values[cashPeriod2025]).toBe(55_904_000);
});
it('merges KPI rows by priority without overwriting higher-priority periods', () => {
const merged = __financialTaxonomyInternals.mergeStructuredKpiRowsByPriority([
[
createKpiRow({
key: 'loan_growth',
values: { p1: 0.12 },
sourceConcepts: ['us-gaap:LoansReceivableNetReportedAmount'],
sourceFactIds: [1]
})
],
[
createKpiRow({
key: 'loan_growth',
values: { p1: 0.11, p2: 0.09 },
sourceConcepts: ['us-gaap:FinancingReceivableRecordedInvestment'],
sourceFactIds: [2]
})
],
[
createKpiRow({
key: 'loan_growth',
values: { p2: 0.08, p3: 0.07 },
provenanceType: 'structured_note',
sourceFactIds: [3]
})
]
]);
expect(merged).toHaveLength(1);
expect(merged[0]?.values).toEqual({ p1: 0.12, p2: 0.09, p3: 0.07 });
expect(merged[0]?.sourceConcepts).toEqual([
'us-gaap:FinancingReceivableRecordedInvestment',
'us-gaap:LoansReceivableNetReportedAmount'
]);
expect(merged[0]?.sourceFactIds).toEqual([1, 2, 3]);
expect(merged[0]?.provenanceType).toBe('taxonomy');
});
it('builds normalization metadata from snapshot fiscal pack and counts', () => {
const snapshot = {
...createSnapshot({
filingId: 15,
filingType: '10-Q',
filingDate: '2026-01-28',
statement: 'income',
periods: [
{ id: 'quarter', periodStart: '2025-10-01', periodEnd: '2025-12-31', periodLabel: '2025-10-01 to 2025-12-31' }
]
}),
parser_version: '0.1.0',
fiscal_pack: 'bank_lender',
normalization_summary: {
surfaceRowCount: 5,
detailRowCount: 3,
kpiRowCount: 2,
unmappedRowCount: 4,
materialUnmappedRowCount: 1,
warnings: []
}
} satisfies FilingTaxonomySnapshotRecord;
expect(__financialTaxonomyInternals.buildNormalizationMetadata([snapshot])).toEqual({
regime: 'us-gaap',
fiscalPack: 'bank_lender',
parserVersion: '0.1.0',
unmappedRowCount: 4,
materialUnmappedRowCount: 1
});
});
it('retains pinned income surface rows even when they are intentionally null', () => {
const snapshot = {
...createSnapshot({
filingId: 16,
filingType: '10-K',
filingDate: '2026-02-13',
statement: 'income',
periods: [
{ id: '2025-fy', periodStart: '2025-01-01', periodEnd: '2025-12-31', periodLabel: '2025 FY' }
]
}),
fiscal_pack: 'bank_lender',
surface_rows: {
income: [
{
key: 'revenue',
label: 'Revenue',
category: 'surface',
templateSection: 'surface',
order: 10,
unit: 'currency',
values: { '2025-fy': 100_000_000 },
sourceConcepts: ['us-gaap:TotalNetRevenues'],
sourceRowKeys: ['revenue'],
sourceFactIds: [1],
formulaKey: null,
hasDimensions: false,
resolvedSourceRowKeys: { '2025-fy': 'revenue' },
statement: 'income',
detailCount: 0,
resolutionMethod: 'direct',
confidence: 'high',
warningCodes: []
},
{
key: 'gross_profit',
label: 'Gross Profit',
category: 'surface',
templateSection: 'surface',
order: 20,
unit: 'currency',
values: { '2025-fy': null },
sourceConcepts: [],
sourceRowKeys: [],
sourceFactIds: [],
formulaKey: null,
hasDimensions: false,
resolvedSourceRowKeys: { '2025-fy': null },
statement: 'income',
detailCount: 0,
resolutionMethod: 'not_meaningful',
confidence: 'low',
warningCodes: ['gross_profit_not_meaningful_bank_pack']
},
{
key: 'selling_general_and_administrative',
label: 'SG&A',
category: 'surface',
templateSection: 'surface',
order: 31,
unit: 'currency',
values: { '2025-fy': null },
sourceConcepts: [],
sourceRowKeys: [],
sourceFactIds: [],
formulaKey: null,
hasDimensions: false,
resolvedSourceRowKeys: { '2025-fy': null },
statement: 'income',
detailCount: 0,
resolutionMethod: 'not_meaningful',
confidence: 'low',
warningCodes: ['selling_general_and_administrative_not_meaningful_bank_pack']
}
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
}
} satisfies FilingTaxonomySnapshotRecord;
const rows = __financialTaxonomyInternals.aggregateSurfaceRows({
snapshots: [snapshot],
statement: 'income',
selectedPeriodIds: new Set(['2025-fy'])
});
const grossProfit = rows.find((row) => row.key === 'gross_profit');
const sga = rows.find((row) => row.key === 'selling_general_and_administrative');
expect(grossProfit).toBeDefined();
expect(grossProfit?.values['2025-fy']).toBeNull();
expect(grossProfit?.resolutionMethod).toBe('not_meaningful');
expect(grossProfit?.warningCodes).toEqual(['gross_profit_not_meaningful_bank_pack']);
expect(sga).toBeDefined();
expect(sga?.values['2025-fy']).toBeNull();
expect(sga?.resolutionMethod).toBe('not_meaningful');
expect(sga?.warningCodes).toEqual(['selling_general_and_administrative_not_meaningful_bank_pack']);
});
});