diff --git a/app/financials/page.tsx b/app/financials/page.tsx index 0d77d37..f49f325 100644 --- a/app/financials/page.tsx +++ b/app/financials/page.tsx @@ -45,6 +45,7 @@ import { type NumberScaleUnit } from '@/lib/format'; import { buildGraphingHref } from '@/lib/graphing/catalog'; +import { mergeFinancialPages } from '@/lib/financials/page-merge'; import { buildStatementTree, resolveStatementSelection, @@ -63,7 +64,6 @@ import type { RatioRow, StandardizedFinancialRow, StructuredKpiRow, - SurfaceDetailMap, SurfaceFinancialRow, TaxonomyStatementRow, TrendSeries @@ -345,90 +345,6 @@ function groupRows(rows: FlatDisplayRow[], categories: CompanyFinancialStatement .filter((group) => group.rows.length > 0); } -function mergeDetailMaps(base: SurfaceDetailMap | null, next: SurfaceDetailMap | null) { - if (!base) { - return next; - } - - if (!next) { - return base; - } - - const merged: SurfaceDetailMap = structuredClone(base); - - for (const [surfaceKey, detailRows] of Object.entries(next)) { - const existingRows = merged[surfaceKey] ?? []; - const rowMap = new Map(existingRows.map((row) => [row.key, row])); - - for (const detailRow of detailRows) { - const existing = rowMap.get(detailRow.key); - if (!existing) { - rowMap.set(detailRow.key, structuredClone(detailRow)); - continue; - } - - existing.values = { - ...existing.values, - ...detailRow.values - }; - existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...detailRow.sourceFactIds])]; - existing.dimensionsSummary = [...new Set([...existing.dimensionsSummary, ...detailRow.dimensionsSummary])]; - } - - merged[surfaceKey] = [...rowMap.values()]; - } - - return merged; -} - -function mergeFinancialPages( - base: CompanyFinancialStatementsResponse | null, - next: CompanyFinancialStatementsResponse -) { - if (!base) { - return next; - } - - const periods = [...base.periods, ...next.periods] - .filter((period, index, list) => list.findIndex((item) => item.id === period.id) === index) - .sort((left, right) => Date.parse(left.periodEnd ?? left.filingDate) - Date.parse(right.periodEnd ?? right.filingDate)); - - const mergeRows = }>(rows: T[]) => { - const map = new Map(); - for (const row of rows) { - const existing = map.get(row.key); - if (!existing) { - map.set(row.key, structuredClone(row)); - continue; - } - - existing.values = { - ...existing.values, - ...row.values - }; - } - - return [...map.values()]; - }; - - return { - ...next, - periods, - statementRows: next.statementRows && base.statementRows - ? { - faithful: mergeRows([...base.statementRows.faithful, ...next.statementRows.faithful]), - standardized: mergeRows([...base.statementRows.standardized, ...next.statementRows.standardized]) - } - : next.statementRows, - statementDetails: mergeDetailMaps(base.statementDetails, next.statementDetails), - ratioRows: next.ratioRows && base.ratioRows ? mergeRows([...base.ratioRows, ...next.ratioRows]) : next.ratioRows, - kpiRows: next.kpiRows && base.kpiRows ? mergeRows([...base.kpiRows, ...next.kpiRows]) : next.kpiRows, - trendSeries: next.trendSeries, - categories: next.categories, - dimensionBreakdown: next.dimensionBreakdown ?? base.dimensionBreakdown - }; -} - function ChartFrame({ children }: { children: React.ReactNode }) { const containerRef = useRef(null); const [ready, setReady] = useState(false); @@ -1185,11 +1101,11 @@ function FinancialsPageContent() { {isDerivedRow(selectedRow) ? (

Source Row Keys

-

{selectedRow.sourceRowKeys.join(', ') || 'n/a'}

+

{(selectedRow.sourceRowKeys ?? []).join(', ') || 'n/a'}

Source Concepts

-

{selectedRow.sourceConcepts.join(', ') || 'n/a'}

+

{(selectedRow.sourceConcepts ?? []).join(', ') || 'n/a'}

Source Fact IDs

-

{selectedRow.sourceFactIds.join(', ') || 'n/a'}

+

{(selectedRow.sourceFactIds ?? []).join(', ') || 'n/a'}

) : null} @@ -1271,7 +1187,7 @@ function FinancialsPageContent() { surfaceKind })} {check.status} - {check.evidencePages.join(', ') || 'n/a'} + {(check.evidencePages ?? []).join(', ') || 'n/a'} ))} diff --git a/components/financials/statement-row-inspector.tsx b/components/financials/statement-row-inspector.tsx index d3d0082..bc80ab9 100644 --- a/components/financials/statement-row-inspector.tsx +++ b/components/financials/statement-row-inspector.tsx @@ -31,8 +31,8 @@ function InspectorCard(props: { ); } -function renderList(values: string[]) { - return values.length > 0 ? values.join(', ') : 'n/a'; +function renderList(values: string[] | null | undefined) { + return (values ?? []).length > 0 ? (values ?? []).join(', ') : 'n/a'; } export function StatementRowInspector(props: StatementRowInspectorProps) { @@ -64,7 +64,7 @@ export function StatementRowInspector(props: StatementRowInspectorProps) {
- 0 ? selection.row.sourceFactIds.join(', ') : 'n/a'} /> + 0 ? (selection.row.sourceFactIds ?? []).join(', ') : 'n/a'} />
@@ -136,7 +136,7 @@ export function StatementRowInspector(props: StatementRowInspectorProps) {
- 0 ? selection.row.sourceFactIds.join(', ') : 'n/a'} /> + 0 ? (selection.row.sourceFactIds ?? []).join(', ') : 'n/a'} />
diff --git a/lib/financials/page-merge.test.ts b/lib/financials/page-merge.test.ts new file mode 100644 index 0000000..c9a5021 --- /dev/null +++ b/lib/financials/page-merge.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from 'bun:test'; +import { __financialPageMergeInternals } from './page-merge'; +import type { CompanyFinancialStatementsResponse } from '@/lib/types'; + +function createResponse(partial: Partial): CompanyFinancialStatementsResponse { + return { + company: { + ticker: 'MSFT', + companyName: 'Microsoft Corporation', + cik: null + }, + surfaceKind: 'income_statement', + cadence: 'annual', + displayModes: ['standardized', 'faithful'], + defaultDisplayMode: 'standardized', + periods: [], + statementRows: { + faithful: [], + standardized: [] + }, + statementDetails: null, + ratioRows: null, + kpiRows: null, + trendSeries: [], + categories: [], + availability: { + adjusted: false, + customMetrics: false + }, + nextCursor: null, + facts: null, + coverage: { + filings: 0, + rows: 0, + dimensions: 0, + facts: 0 + }, + dataSourceStatus: { + enabled: true, + hydratedFilings: 0, + partialFilings: 0, + failedFilings: 0, + pendingFilings: 0, + queuedSync: false + }, + metrics: { + taxonomy: null, + validation: null + }, + normalization: { + parserEngine: 'fiscal-xbrl', + regime: 'us-gaap', + fiscalPack: 'core', + parserVersion: '0.1.0', + surfaceRowCount: 0, + detailRowCount: 0, + kpiRowCount: 0, + unmappedRowCount: 0, + materialUnmappedRowCount: 0, + warnings: [] + }, + dimensionBreakdown: null, + ...partial + }; +} + +describe('financial page merge helpers', () => { + it('merges detail maps safely when legacy detail rows are missing arrays', () => { + const merged = __financialPageMergeInternals.mergeDetailMaps( + { + revenue: [{ + key: 'detail', + parentSurfaceKey: 'revenue', + label: 'Detail', + conceptKey: 'detail', + qname: 'us-gaap:Detail', + namespaceUri: 'http://fasb.org/us-gaap/2024', + localName: 'Detail', + unit: 'iso4217:USD', + values: { p1: 1 }, + sourceFactIds: undefined, + isExtension: false, + dimensionsSummary: undefined, + residualFlag: false + } as never] + }, + { + revenue: [{ + key: 'detail', + parentSurfaceKey: 'revenue', + label: 'Detail', + conceptKey: 'detail', + qname: 'us-gaap:Detail', + namespaceUri: 'http://fasb.org/us-gaap/2024', + localName: 'Detail', + unit: 'iso4217:USD', + values: { p2: 2 }, + sourceFactIds: [2], + isExtension: false, + dimensionsSummary: ['region:americas'], + residualFlag: false + }] + } + ); + + expect(merged?.revenue?.[0]).toMatchObject({ + values: { p1: 1, p2: 2 }, + sourceFactIds: [2], + dimensionsSummary: ['region:americas'] + }); + }); + + it('merges paged financial responses safely when row arrays are partially missing', () => { + const base = createResponse({ + periods: [{ + id: 'p1', + filingId: 1, + accessionNumber: '0001', + filingDate: '2025-01-01', + periodStart: '2024-01-01', + periodEnd: '2024-12-31', + filingType: '10-K', + periodLabel: 'FY 2024' + }], + statementRows: { + faithful: undefined as never, + standardized: [{ + key: 'revenue', + label: 'Revenue', + category: 'revenue', + order: 10, + unit: 'currency', + values: { p1: 1 }, + sourceConcepts: [], + sourceRowKeys: [], + sourceFactIds: [], + formulaKey: null, + hasDimensions: false, + resolvedSourceRowKeys: { p1: 'revenue' } + }] + } + }); + const next = createResponse({ + periods: [{ + id: 'p2', + filingId: 2, + accessionNumber: '0002', + filingDate: '2026-01-01', + periodStart: '2025-01-01', + periodEnd: '2025-12-31', + filingType: '10-K', + periodLabel: 'FY 2025' + }], + statementRows: { + faithful: [{ + key: 'rev', + label: 'Revenue', + conceptKey: 'rev', + qname: 'us-gaap:Revenue', + namespaceUri: 'http://fasb.org/us-gaap/2024', + localName: 'Revenue', + isExtension: false, + statement: 'income', + roleUri: 'income', + order: 10, + depth: 0, + parentKey: null, + values: { p2: 2 }, + units: { p2: 'iso4217:USD' }, + hasDimensions: false, + sourceFactIds: [] + }], + standardized: undefined as never + } + }); + + const merged = __financialPageMergeInternals.mergeFinancialPages(base, next); + + expect(merged.periods.map((period) => period.id)).toEqual(['p1', 'p2']); + expect(merged.statementRows).toMatchObject({ + faithful: [{ key: 'rev' }], + standardized: [{ key: 'revenue', values: { p1: 1 } }] + }); + }); +}); diff --git a/lib/financials/page-merge.ts b/lib/financials/page-merge.ts new file mode 100644 index 0000000..9595316 --- /dev/null +++ b/lib/financials/page-merge.ts @@ -0,0 +1,98 @@ +import type { + CompanyFinancialStatementsResponse +} from '@/lib/types'; + +export function mergeDetailMaps( + base: CompanyFinancialStatementsResponse['statementDetails'], + next: CompanyFinancialStatementsResponse['statementDetails'] +) { + if (!base) { + return next; + } + + if (!next) { + return base; + } + + const merged: NonNullable = structuredClone(base); + for (const [surfaceKey, detailRows] of Object.entries(next)) { + const existingRows = merged[surfaceKey] ?? []; + const rowMap = new Map(existingRows.map((row) => [row.key, row])); + + for (const detailRow of detailRows) { + const existing = rowMap.get(detailRow.key); + if (!existing) { + rowMap.set(detailRow.key, structuredClone(detailRow)); + continue; + } + + existing.values = { + ...existing.values, + ...detailRow.values + }; + existing.sourceFactIds = [...new Set([...(existing.sourceFactIds ?? []), ...(detailRow.sourceFactIds ?? [])])]; + existing.dimensionsSummary = [...new Set([...(existing.dimensionsSummary ?? []), ...(detailRow.dimensionsSummary ?? [])])]; + } + + merged[surfaceKey] = [...rowMap.values()]; + } + + return merged; +} + +export function mergeFinancialPages( + base: CompanyFinancialStatementsResponse | null, + next: CompanyFinancialStatementsResponse +) { + if (!base) { + return next; + } + + const periods = [...base.periods, ...next.periods] + .filter((period, index, list) => list.findIndex((item) => item.id === period.id) === index) + .sort((left, right) => Date.parse(left.periodEnd ?? left.filingDate) - Date.parse(right.periodEnd ?? right.filingDate)); + + const mergeRows = }>(rows: T[]) => { + const map = new Map(); + for (const row of rows) { + const existing = map.get(row.key); + if (!existing) { + map.set(row.key, structuredClone(row)); + continue; + } + + existing.values = { + ...existing.values, + ...row.values + }; + } + + return [...map.values()]; + }; + + return { + ...next, + periods, + statementRows: next.statementRows && base.statementRows + ? { + faithful: mergeRows([...(base.statementRows.faithful ?? []), ...(next.statementRows.faithful ?? [])]), + standardized: mergeRows([...(base.statementRows.standardized ?? []), ...(next.statementRows.standardized ?? [])]) + } + : next.statementRows, + statementDetails: mergeDetailMaps(base.statementDetails, next.statementDetails), + ratioRows: next.ratioRows && base.ratioRows + ? mergeRows([...(base.ratioRows ?? []), ...(next.ratioRows ?? [])]) + : next.ratioRows, + kpiRows: next.kpiRows && base.kpiRows + ? mergeRows([...(base.kpiRows ?? []), ...(next.kpiRows ?? [])]) + : next.kpiRows, + trendSeries: next.trendSeries, + categories: next.categories, + dimensionBreakdown: next.dimensionBreakdown ?? base.dimensionBreakdown + }; +} + +export const __financialPageMergeInternals = { + mergeDetailMaps, + mergeFinancialPages +}; diff --git a/lib/financials/statement-view-model.test.ts b/lib/financials/statement-view-model.test.ts index 2e0dd09..2286ec0 100644 --- a/lib/financials/statement-view-model.test.ts +++ b/lib/financials/statement-view-model.test.ts @@ -160,6 +160,28 @@ describe('statement view model', () => { expect(child?.kind === 'surface' && child.expanded).toBe(true); }); + it('does not throw when legacy surface rows are missing source arrays', () => { + const malformedRow = { + ...createSurfaceRow({ key: 'revenue', label: 'Revenue', category: 'revenue', values: { p1: 100 } }), + sourceConcepts: undefined, + sourceRowKeys: undefined + } as unknown as SurfaceFinancialRow; + + const model = buildStatementTree({ + surfaceKind: 'income_statement', + rows: [malformedRow], + statementDetails: null, + categories: [], + searchQuery: 'revenue', + expandedRowKeys: new Set() + }); + + expect(model.sections[0]?.nodes[0]).toMatchObject({ + kind: 'surface', + row: { key: 'revenue' } + }); + }); + it('keeps not meaningful rows visible and resolves selections for surface and detail nodes', () => { const rows = [ createSurfaceRow({ diff --git a/lib/financials/statement-view-model.ts b/lib/financials/statement-view-model.ts index b253f21..fa882df 100644 --- a/lib/financials/statement-view-model.ts +++ b/lib/financials/statement-view-model.ts @@ -99,8 +99,8 @@ function searchTextForSurface(row: SurfaceFinancialRow) { return [ row.label, row.key, - ...row.sourceConcepts, - ...row.sourceRowKeys, + ...(row.sourceConcepts ?? []), + ...(row.sourceRowKeys ?? []), ...(row.warningCodes ?? []) ] .join(' ') @@ -115,7 +115,7 @@ function searchTextForDetail(row: DetailFinancialRow) { row.conceptKey, row.qname, row.localName, - ...row.dimensionsSummary + ...(row.dimensionsSummary ?? []) ] .join(' ') .toLowerCase(); diff --git a/lib/server/financial-taxonomy.test.ts b/lib/server/financial-taxonomy.test.ts index ad51db5..994ec17 100644 --- a/lib/server/financial-taxonomy.test.ts +++ b/lib/server/financial-taxonomy.test.ts @@ -1811,6 +1811,56 @@ describe('financial taxonomy internals', () => { }); }); + it('aggregates persisted detail rows when legacy snapshots are missing dimension arrays', () => { + const snapshot = { + ...createSnapshot({ + filingId: 21, + filingType: '10-K', + filingDate: '2026-02-22', + statement: 'income', + periods: [ + { id: '2025-fy', periodStart: '2025-01-01', periodEnd: '2025-12-31', periodLabel: '2025 FY' } + ] + }), + detail_rows: { + income: { + revenue: [{ + key: 'revenue_detail', + parentSurfaceKey: 'revenue', + label: 'Revenue Detail', + conceptKey: 'us-gaap:RevenueDetail', + qname: 'us-gaap:RevenueDetail', + namespaceUri: 'http://fasb.org/us-gaap/2024', + localName: 'RevenueDetail', + unit: 'iso4217:USD', + values: { '2025-fy': 123_000_000 }, + sourceFactIds: undefined, + isExtension: false, + dimensionsSummary: undefined, + residualFlag: false + } as unknown as FilingTaxonomySnapshotRecord['detail_rows']['income'][string][number]] + }, + balance: {}, + cash_flow: {}, + equity: {}, + comprehensive_income: {} + } + } satisfies FilingTaxonomySnapshotRecord; + + const rows = __financialTaxonomyInternals.aggregateDetailRows({ + snapshots: [snapshot], + statement: 'income', + selectedPeriodIds: new Set(['2025-fy']) + }); + + expect(rows.revenue).toHaveLength(1); + expect(rows.revenue?.[0]).toMatchObject({ + key: 'revenue_detail', + sourceFactIds: [], + dimensionsSummary: [] + }); + }); + it('builds normalization metadata from snapshot fiscal pack and counts', () => { const snapshot = { ...createSnapshot({ diff --git a/lib/server/financial-taxonomy.ts b/lib/server/financial-taxonomy.ts index b6e87c8..2b15b0c 100644 --- a/lib/server/financial-taxonomy.ts +++ b/lib/server/financial-taxonomy.ts @@ -78,6 +78,49 @@ type FilingDocumentRef = { primaryDocument: string | null; }; +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === 'object' && !Array.isArray(value); +} + +function hasRequiredDerivedRowArrays(row: unknown) { + return isRecord(row) + && Array.isArray(row.sourceConcepts) + && Array.isArray(row.sourceRowKeys) + && Array.isArray(row.sourceFactIds); +} + +function hasRequiredDetailRowArrays(row: unknown) { + return isRecord(row) + && Array.isArray(row.sourceFactIds) + && Array.isArray(row.dimensionsSummary); +} + +function isValidStatementBundlePayload(value: unknown): value is StandardizedStatementBundlePayload { + if (!isRecord(value) || !Array.isArray(value.rows) || !isRecord(value.detailRows)) { + return false; + } + + if (!value.rows.every((row) => hasRequiredDerivedRowArrays(row))) { + return false; + } + + return Object.values(value.detailRows).every((rows) => ( + Array.isArray(rows) && rows.every((row) => hasRequiredDetailRowArrays(row)) + )); +} + +function isValidRatioBundlePayload(value: unknown): value is Pick { + return isRecord(value) + && Array.isArray(value.ratioRows) + && value.ratioRows.every((row) => hasRequiredDerivedRowArrays(row)); +} + +function isValidKpiBundlePayload(value: unknown): value is Pick { + return isRecord(value) + && Array.isArray(value.kpiRows) + && value.kpiRows.every((row) => isRecord(row) && Array.isArray(row.sourceConcepts) && Array.isArray(row.sourceFactIds)); +} + function safeTicker(input: string) { return input.trim().toUpperCase(); } @@ -350,7 +393,7 @@ function detailConceptIdentity(row: DetailFinancialRow) { } function detailMergeKey(row: DetailFinancialRow) { - const dimensionsKey = [...row.dimensionsSummary] + const dimensionsKey = [...(row.dimensionsSummary ?? [])] .map((value) => value.trim().toLowerCase()) .filter((value) => value.length > 0) .sort((left, right) => left.localeCompare(right)) @@ -713,10 +756,9 @@ async function buildStatementSurfaceBundle(input: { if ( cached - && Array.isArray((cached as Partial).rows) - && typeof (cached as Partial).detailRows === 'object' + && isValidStatementBundlePayload(cached) ) { - return cached as StandardizedStatementBundlePayload; + return cached; } const statement = surfaceToStatementKind(input.surfaceKind); @@ -794,8 +836,8 @@ async function buildRatioSurfaceBundle(input: { snapshots: input.snapshots }); - if (cached) { - return cached as Pick; + if (cached && isValidRatioBundlePayload(cached)) { + return cached; } const pricesByDate = await getHistoricalClosingPrices(input.ticker, input.periods.map((period) => latestPeriodDate(period))); @@ -846,8 +888,8 @@ async function buildKpiSurfaceBundle(input: { snapshots: input.snapshots }); - if (cached) { - return cached as Pick; + if (cached && isValidKpiBundlePayload(cached)) { + return cached; } const persistedRows = aggregatePersistedKpiRows({ diff --git a/lib/server/repos/company-financial-bundles.ts b/lib/server/repos/company-financial-bundles.ts index fa07a19..4c0e6da 100644 --- a/lib/server/repos/company-financial-bundles.ts +++ b/lib/server/repos/company-financial-bundles.ts @@ -7,7 +7,7 @@ import { db, getSqliteClient } from '@/lib/server/db'; import { withFinancialIngestionSchemaRetry } from '@/lib/server/db/financial-ingestion-schema'; import { companyFinancialBundle } from '@/lib/server/db/schema'; -export const CURRENT_COMPANY_FINANCIAL_BUNDLE_VERSION = 14; +export const CURRENT_COMPANY_FINANCIAL_BUNDLE_VERSION = 15; export type CompanyFinancialBundleRecord = { id: number; diff --git a/lib/server/repos/filing-taxonomy.test.ts b/lib/server/repos/filing-taxonomy.test.ts new file mode 100644 index 0000000..eb2e329 --- /dev/null +++ b/lib/server/repos/filing-taxonomy.test.ts @@ -0,0 +1,291 @@ +import { describe, expect, it } from 'bun:test'; +import { __filingTaxonomyInternals } from './filing-taxonomy'; + +describe('filing taxonomy snapshot normalization', () => { + it('normalizes legacy snake_case nested snapshot payloads in toSnapshotRecord', () => { + const record = __filingTaxonomyInternals.toSnapshotRecord({ + id: 1, + filing_id: 10, + ticker: 'MSFT', + filing_date: '2026-01-28', + filing_type: '10-Q', + parse_status: 'ready', + parse_error: null, + source: 'xbrl_instance', + parser_engine: 'fiscal-xbrl', + parser_version: '0.1.0', + taxonomy_regime: 'us-gaap', + fiscal_pack: 'core', + periods: [{ + id: 'fy-2025', + filing_id: 10, + accession_number: '0001', + filing_date: '2026-01-28', + period_start: '2025-01-01', + period_end: '2025-12-31', + filing_type: '10-Q', + period_label: 'FY 2025' + }], + faithful_rows: { + income: [{ + key: 'revenue', + label: 'Revenue', + concept_key: 'us-gaap:Revenue', + qname: 'us-gaap:Revenue', + namespace_uri: 'http://fasb.org/us-gaap/2025', + local_name: 'Revenue', + is_extension: false, + statement: 'income', + role_uri: 'income', + order: 10, + depth: 0, + parent_key: null, + values: { 'fy-2025': 10 }, + units: { 'fy-2025': 'iso4217:USD' }, + has_dimensions: false, + source_fact_ids: [1] + }], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + }, + statement_rows: null, + surface_rows: { + income: [{ + key: 'revenue', + label: 'Revenue', + category: 'revenue', + template_section: 'revenue', + order: 10, + unit: 'currency', + values: { 'fy-2025': 10 }, + source_concepts: ['us-gaap:Revenue'], + source_row_keys: ['revenue'], + source_fact_ids: [1], + formula_key: null, + has_dimensions: false, + resolved_source_row_keys: { 'fy-2025': 'revenue' }, + statement: 'income', + detail_count: 1, + resolution_method: 'direct', + confidence: 'high', + warning_codes: ['legacy_surface'] + }], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + }, + detail_rows: { + income: { + revenue: [{ + key: 'revenue_detail', + parent_surface_key: 'revenue', + label: 'Revenue Detail', + concept_key: 'us-gaap:RevenueDetail', + qname: 'us-gaap:RevenueDetail', + namespace_uri: 'http://fasb.org/us-gaap/2025', + local_name: 'RevenueDetail', + unit: 'iso4217:USD', + values: { 'fy-2025': 10 }, + source_fact_ids: [2], + is_extension: false, + dimensions_summary: ['region:americas'], + residual_flag: false + }] + }, + balance: {}, + cash_flow: {}, + equity: {}, + comprehensive_income: {} + }, + kpi_rows: [{ + key: 'cloud_growth', + label: 'Cloud Growth', + category: 'operating_kpi', + unit: 'percent', + order: 10, + segment: null, + axis: null, + member: null, + values: { 'fy-2025': 0.25 }, + source_concepts: ['msft:CloudGrowth'], + source_fact_ids: [3], + provenance_type: 'taxonomy', + has_dimensions: false + }], + derived_metrics: null, + validation_result: null, + normalization_summary: { + surface_row_count: 1, + detail_row_count: 1, + kpi_row_count: 1, + unmapped_row_count: 0, + material_unmapped_row_count: 0, + warnings: ['legacy_warning'] + }, + facts_count: 3, + concepts_count: 3, + dimensions_count: 1, + created_at: '2026-01-28T00:00:00.000Z', + updated_at: '2026-01-28T00:00:00.000Z' + } as never); + + expect(record.periods[0]).toMatchObject({ + filingId: 10, + accessionNumber: '0001', + filingDate: '2026-01-28', + periodStart: '2025-01-01', + periodEnd: '2025-12-31', + periodLabel: 'FY 2025' + }); + expect(record.faithful_rows.income[0]).toMatchObject({ + conceptKey: 'us-gaap:Revenue', + namespaceUri: 'http://fasb.org/us-gaap/2025', + localName: 'Revenue', + roleUri: 'income', + parentKey: null, + hasDimensions: false, + sourceFactIds: [1] + }); + expect(record.surface_rows.income[0]).toMatchObject({ + templateSection: 'revenue', + sourceConcepts: ['us-gaap:Revenue'], + sourceRowKeys: ['revenue'], + sourceFactIds: [1], + formulaKey: null, + hasDimensions: false, + resolvedSourceRowKeys: { 'fy-2025': 'revenue' }, + detailCount: 1, + resolutionMethod: 'direct', + warningCodes: ['legacy_surface'] + }); + expect(record.detail_rows.income.revenue?.[0]).toMatchObject({ + parentSurfaceKey: 'revenue', + conceptKey: 'us-gaap:RevenueDetail', + namespaceUri: 'http://fasb.org/us-gaap/2025', + sourceFactIds: [2], + dimensionsSummary: ['region:americas'], + residualFlag: false + }); + expect(record.kpi_rows[0]).toMatchObject({ + sourceConcepts: ['msft:CloudGrowth'], + sourceFactIds: [3], + provenanceType: 'taxonomy', + hasDimensions: false + }); + expect(record.normalization_summary).toEqual({ + surfaceRowCount: 1, + detailRowCount: 1, + kpiRowCount: 1, + unmappedRowCount: 0, + materialUnmappedRowCount: 0, + warnings: ['legacy_warning'] + }); + }); + + it('keeps mixed camelCase and snake_case payloads compatible', () => { + const normalized = __filingTaxonomyInternals.normalizeFilingTaxonomySnapshotPayload({ + periods: [{ + id: 'fy-2025', + filingId: 10, + accessionNumber: '0001', + filingDate: '2026-01-28', + periodStart: '2025-01-01', + periodEnd: '2025-12-31', + filingType: '10-K', + periodLabel: 'FY 2025' + }], + faithful_rows: { + income: [{ + key: 'revenue', + label: 'Revenue', + conceptKey: 'us-gaap:Revenue', + qname: 'us-gaap:Revenue', + namespaceUri: 'http://fasb.org/us-gaap/2025', + localName: 'Revenue', + isExtension: false, + statement: 'income', + roleUri: 'income', + order: 10, + depth: 0, + parentKey: null, + values: { 'fy-2025': 10 }, + units: { 'fy-2025': 'iso4217:USD' }, + hasDimensions: false, + sourceFactIds: [1] + }], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + }, + statement_rows: null, + surface_rows: { + income: [{ + key: 'revenue', + label: 'Revenue', + category: 'revenue', + order: 10, + unit: 'currency', + values: { 'fy-2025': 10 }, + source_concepts: ['us-gaap:Revenue'], + source_row_keys: ['revenue'], + source_fact_ids: [1], + formula_key: null, + has_dimensions: false, + resolved_source_row_keys: { 'fy-2025': 'revenue' } + }], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + }, + detail_rows: { + income: { + revenue: [{ + key: 'revenue_detail', + parentSurfaceKey: 'revenue', + label: 'Revenue Detail', + conceptKey: 'us-gaap:RevenueDetail', + qname: 'us-gaap:RevenueDetail', + namespaceUri: 'http://fasb.org/us-gaap/2025', + localName: 'RevenueDetail', + unit: 'iso4217:USD', + values: { 'fy-2025': 10 }, + sourceFactIds: [2], + isExtension: false, + dimensionsSummary: [], + residualFlag: false + }] + }, + balance: {}, + cash_flow: {}, + equity: {}, + comprehensive_income: {} + }, + kpi_rows: [], + normalization_summary: { + surfaceRowCount: 1, + detail_row_count: 1, + kpiRowCount: 0, + unmapped_row_count: 0, + materialUnmappedRowCount: 0, + warnings: [] + } + }); + + expect(normalized.periods[0]?.filingId).toBe(10); + expect(normalized.surface_rows.income[0]?.sourceConcepts).toEqual(['us-gaap:Revenue']); + expect(normalized.detail_rows.income.revenue?.[0]?.parentSurfaceKey).toBe('revenue'); + expect(normalized.normalization_summary).toEqual({ + surfaceRowCount: 1, + detailRowCount: 1, + kpiRowCount: 0, + unmappedRowCount: 0, + materialUnmappedRowCount: 0, + warnings: [] + }); + }); +}); diff --git a/lib/server/repos/filing-taxonomy.ts b/lib/server/repos/filing-taxonomy.ts index b52ab77..7cf35a0 100644 --- a/lib/server/repos/filing-taxonomy.ts +++ b/lib/server/repos/filing-taxonomy.ts @@ -1,5 +1,6 @@ import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm'; import type { + DetailFinancialRow, Filing, FinancialStatementKind, MetricValidationResult, @@ -283,6 +284,18 @@ export type UpsertFilingTaxonomySnapshotInput = { }>; }; +const FINANCIAL_STATEMENT_KINDS = [ + 'income', + 'balance', + 'cash_flow', + 'equity', + 'comprehensive_income' +] as const satisfies FinancialStatementKind[]; + +type StatementRowMap = Record; +type SurfaceRowMap = Record; +type DetailRowMap = Record; + function tenYearsAgoIso() { const date = new Date(); date.setUTCFullYear(date.getUTCFullYear() - 10); @@ -310,7 +323,394 @@ function asNumericText(value: number | null) { return String(value); } -function emptyStatementRows(): Record { +function asObject(value: unknown) { + return value !== null && typeof value === 'object' && !Array.isArray(value) + ? value as Record + : null; +} + +function asString(value: unknown) { + return typeof value === 'string' ? value : null; +} + +function asNullableString(value: unknown) { + return typeof value === 'string' + ? value + : value === null + ? null + : null; +} + +function asBoolean(value: unknown) { + return typeof value === 'boolean' ? value : Boolean(value); +} + +function asStatementKind(value: unknown): FinancialStatementKind | null { + return value === 'income' + || value === 'balance' + || value === 'cash_flow' + || value === 'equity' + || value === 'comprehensive_income' + ? value + : null; +} + +function normalizeNumberMap(value: unknown) { + const object = asObject(value); + if (!object) { + return {}; + } + + return Object.fromEntries( + Object.entries(object).map(([key, entry]) => [key, asNumber(entry)]) + ); +} + +function normalizeNullableStringMap(value: unknown) { + const object = asObject(value); + if (!object) { + return {}; + } + + return Object.fromEntries( + Object.entries(object).map(([key, entry]) => [key, asNullableString(entry)]) + ); +} + +function normalizeStringArray(value: unknown) { + return Array.isArray(value) + ? value.filter((entry): entry is string => typeof entry === 'string') + : []; +} + +function normalizeNumberArray(value: unknown) { + if (!Array.isArray(value)) { + return []; + } + + return value + .map((entry) => asNumber(entry)) + .filter((entry): entry is number => entry !== null); +} + +function normalizePeriods(value: unknown): FilingTaxonomyPeriod[] { + if (!Array.isArray(value)) { + return []; + } + + return value + .map((entry) => { + const row = asObject(entry); + if (!row) { + return null; + } + + const id = asString(row.id); + const filingId = asNumber(row.filingId ?? row.filing_id); + const accessionNumber = asString(row.accessionNumber ?? row.accession_number); + const filingDate = asString(row.filingDate ?? row.filing_date); + const filingType = row.filingType === '10-K' || row.filing_type === '10-K' + ? '10-K' + : row.filingType === '10-Q' || row.filing_type === '10-Q' + ? '10-Q' + : null; + const periodLabel = asString(row.periodLabel ?? row.period_label); + + if (!id || filingId === null || !accessionNumber || !filingDate || !filingType || !periodLabel) { + return null; + } + + return { + id, + filingId, + accessionNumber, + filingDate, + periodStart: asNullableString(row.periodStart ?? row.period_start), + periodEnd: asNullableString(row.periodEnd ?? row.period_end), + filingType, + periodLabel + } satisfies FilingTaxonomyPeriod; + }) + .filter((entry): entry is FilingTaxonomyPeriod => entry !== null); +} + +function normalizeStatementRows( + value: unknown, + fallbackRows: StatementRowMap = emptyStatementRows() +): StatementRowMap { + const object = asObject(value); + if (!object) { + return fallbackRows; + } + + const normalized = emptyStatementRows(); + for (const statement of FINANCIAL_STATEMENT_KINDS) { + const rows = Array.isArray(object[statement]) ? object[statement] : []; + normalized[statement] = rows + .map((entry) => { + const row = asObject(entry); + if (!row) { + return null; + } + + const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key); + const label = asString(row.label); + const conceptKey = asString(row.conceptKey ?? row.concept_key); + const qname = asString(row.qname); + const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri); + const localName = asString(row.localName ?? row.local_name); + if (!key || !label || !conceptKey || !qname || !namespaceUri || !localName) { + return null; + } + + return { + key, + label, + conceptKey, + qname, + namespaceUri, + localName, + isExtension: asBoolean(row.isExtension ?? row.is_extension), + statement: asStatementKind(row.statement) ?? statement, + roleUri: asNullableString(row.roleUri ?? row.role_uri), + order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER, + depth: asNumber(row.depth) ?? 0, + parentKey: asNullableString(row.parentKey ?? row.parent_key), + values: normalizeNumberMap(row.values), + units: normalizeNullableStringMap(row.units), + hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions), + sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids) + }; + }) + .filter((entry): entry is TaxonomyStatementRow => entry !== null); + } + + return normalized; +} + +function normalizeSurfaceRows( + value: unknown, + fallbackRows: SurfaceRowMap = emptySurfaceRows() +): SurfaceRowMap { + const object = asObject(value); + if (!object) { + return fallbackRows; + } + + const normalized = emptySurfaceRows(); + for (const statement of FINANCIAL_STATEMENT_KINDS) { + const rows = Array.isArray(object[statement]) ? object[statement] : []; + normalized[statement] = rows + .map((entry) => { + const row = asObject(entry); + if (!row) { + return null; + } + + const key = asString(row.key); + const label = asString(row.label); + const category = asString(row.category); + const unit = asString(row.unit); + if (!key || !label || !category || !unit) { + return null; + } + + const normalizedStatement = asStatementKind(row.statement); + const resolutionMethod = row.resolutionMethod ?? row.resolution_method; + const confidence = row.confidence; + const normalizedRow: SurfaceFinancialRow = { + key, + label, + category: category as SurfaceFinancialRow['category'], + order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER, + unit: unit as SurfaceFinancialRow['unit'], + values: normalizeNumberMap(row.values), + sourceConcepts: normalizeStringArray(row.sourceConcepts ?? row.source_concepts), + sourceRowKeys: normalizeStringArray(row.sourceRowKeys ?? row.source_row_keys), + sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids), + formulaKey: asNullableString(row.formulaKey ?? row.formula_key), + hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions), + resolvedSourceRowKeys: normalizeNullableStringMap(row.resolvedSourceRowKeys ?? row.resolved_source_row_keys) + }; + + const templateSection = asString(row.templateSection ?? row.template_section); + if (templateSection) { + normalizedRow.templateSection = templateSection as SurfaceFinancialRow['templateSection']; + } + if (normalizedStatement === 'income' || normalizedStatement === 'balance' || normalizedStatement === 'cash_flow') { + normalizedRow.statement = normalizedStatement; + } + + const detailCount = asNumber(row.detailCount ?? row.detail_count); + if (detailCount !== null) { + normalizedRow.detailCount = detailCount; + } + + if ( + resolutionMethod === 'direct' + || resolutionMethod === 'surface_bridge' + || resolutionMethod === 'formula_derived' + || resolutionMethod === 'not_meaningful' + ) { + normalizedRow.resolutionMethod = resolutionMethod; + } + + if (confidence === 'high' || confidence === 'medium' || confidence === 'low') { + normalizedRow.confidence = confidence; + } + + const warningCodes = normalizeStringArray(row.warningCodes ?? row.warning_codes); + if (warningCodes.length > 0) { + normalizedRow.warningCodes = warningCodes; + } + + return normalizedRow; + }) + .filter((entry): entry is SurfaceFinancialRow => entry !== null); + } + + return normalized; +} + +function normalizeDetailRows( + value: unknown, + fallbackRows: DetailRowMap = emptyDetailRows() +): DetailRowMap { + const object = asObject(value); + if (!object) { + return fallbackRows; + } + + const normalized = emptyDetailRows(); + for (const statement of FINANCIAL_STATEMENT_KINDS) { + const groups = asObject(object[statement]) ?? {}; + normalized[statement] = Object.fromEntries( + Object.entries(groups).map(([surfaceKey, rows]) => { + const normalizedRows = Array.isArray(rows) + ? rows + .map((entry) => { + const row = asObject(entry); + if (!row) { + return null; + } + + const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key); + const label = asString(row.label); + const conceptKey = asString(row.conceptKey ?? row.concept_key); + const qname = asString(row.qname); + const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri); + const localName = asString(row.localName ?? row.local_name); + if (!key || !label || !conceptKey || !qname || !namespaceUri || !localName) { + return null; + } + + return { + key, + parentSurfaceKey: asString(row.parentSurfaceKey ?? row.parent_surface_key) ?? surfaceKey, + label, + conceptKey, + qname, + namespaceUri, + localName, + unit: asNullableString(row.unit), + values: normalizeNumberMap(row.values), + sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids), + isExtension: asBoolean(row.isExtension ?? row.is_extension), + dimensionsSummary: normalizeStringArray(row.dimensionsSummary ?? row.dimensions_summary), + residualFlag: asBoolean(row.residualFlag ?? row.residual_flag) + }; + }) + .filter((entry): entry is DetailFinancialRow => entry !== null) + : []; + + return [surfaceKey, normalizedRows]; + }) + ); + } + + return normalized; +} + +function normalizeKpiRows(value: unknown) { + if (!Array.isArray(value)) { + return []; + } + + return value + .map((entry) => { + const row = asObject(entry); + if (!row) { + return null; + } + + const key = asString(row.key); + const label = asString(row.label); + const category = asString(row.category); + const unit = asString(row.unit); + const provenanceType = row.provenanceType ?? row.provenance_type; + if (!key || !label || !category || !unit || (provenanceType !== 'taxonomy' && provenanceType !== 'structured_note')) { + return null; + } + + return { + key, + label, + category: category as StructuredKpiRow['category'], + unit: unit as StructuredKpiRow['unit'], + order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER, + segment: asNullableString(row.segment), + axis: asNullableString(row.axis), + member: asNullableString(row.member), + values: normalizeNumberMap(row.values), + sourceConcepts: normalizeStringArray(row.sourceConcepts ?? row.source_concepts), + sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids), + provenanceType, + hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions) + } satisfies StructuredKpiRow; + }) + .filter((entry): entry is StructuredKpiRow => entry !== null); +} + +function normalizeNormalizationSummary(value: unknown) { + const row = asObject(value); + if (!row) { + return null; + } + + return { + surfaceRowCount: asNumber(row.surfaceRowCount ?? row.surface_row_count) ?? 0, + detailRowCount: asNumber(row.detailRowCount ?? row.detail_row_count) ?? 0, + kpiRowCount: asNumber(row.kpiRowCount ?? row.kpi_row_count) ?? 0, + unmappedRowCount: asNumber(row.unmappedRowCount ?? row.unmapped_row_count) ?? 0, + materialUnmappedRowCount: asNumber(row.materialUnmappedRowCount ?? row.material_unmapped_row_count) ?? 0, + warnings: normalizeStringArray(row.warnings) + } satisfies NormalizationSummary; +} + +export function normalizeFilingTaxonomySnapshotPayload(input: { + periods: unknown; + faithful_rows: unknown; + statement_rows: unknown; + surface_rows: unknown; + detail_rows: unknown; + kpi_rows: unknown; + normalization_summary: unknown; +}) { + const faithfulRows = normalizeStatementRows(input.faithful_rows); + const statementRows = normalizeStatementRows(input.statement_rows, faithfulRows); + + return { + periods: normalizePeriods(input.periods), + faithful_rows: faithfulRows, + statement_rows: statementRows, + surface_rows: normalizeSurfaceRows(input.surface_rows), + detail_rows: normalizeDetailRows(input.detail_rows), + kpi_rows: normalizeKpiRows(input.kpi_rows), + normalization_summary: normalizeNormalizationSummary(input.normalization_summary) + }; +} + +function emptyStatementRows(): StatementRowMap { return { income: [], balance: [], @@ -320,7 +720,7 @@ function emptyStatementRows(): Record { +function emptySurfaceRows(): SurfaceRowMap { return { income: [], balance: [], @@ -330,7 +730,7 @@ function emptySurfaceRows(): Record { +function emptyDetailRows(): DetailRowMap { return { income: {}, balance: {}, @@ -341,7 +741,15 @@ function emptyDetailRows(): Record { } function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord { - const faithfulRows = row.faithful_rows ?? row.statement_rows ?? emptyStatementRows(); + const normalized = normalizeFilingTaxonomySnapshotPayload({ + periods: row.periods, + faithful_rows: row.faithful_rows, + statement_rows: row.statement_rows, + surface_rows: row.surface_rows, + detail_rows: row.detail_rows, + kpi_rows: row.kpi_rows, + normalization_summary: row.normalization_summary + }); return { id: row.id, @@ -356,15 +764,15 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili parser_version: row.parser_version, taxonomy_regime: row.taxonomy_regime, fiscal_pack: row.fiscal_pack, - periods: row.periods ?? [], - faithful_rows: faithfulRows, - statement_rows: faithfulRows, - surface_rows: row.surface_rows ?? emptySurfaceRows(), - detail_rows: row.detail_rows ?? emptyDetailRows(), - kpi_rows: row.kpi_rows ?? [], + periods: normalized.periods, + faithful_rows: normalized.faithful_rows, + statement_rows: normalized.statement_rows, + surface_rows: normalized.surface_rows, + detail_rows: normalized.detail_rows, + kpi_rows: normalized.kpi_rows, derived_metrics: row.derived_metrics ?? null, validation_result: row.validation_result ?? null, - normalization_summary: row.normalization_summary ?? null, + normalization_summary: normalized.normalization_summary, facts_count: row.facts_count, concepts_count: row.concepts_count, dimensions_count: row.dimensions_count, @@ -552,6 +960,7 @@ export async function listFilingTaxonomyMetricValidations(snapshotId: number) { export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySnapshotInput) { const now = new Date().toISOString(); + const normalized = normalizeFilingTaxonomySnapshotPayload(input); const [saved] = await withFinancialIngestionSchemaRetry({ client: getSqliteClient(), @@ -570,15 +979,15 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn parser_version: input.parser_version, taxonomy_regime: input.taxonomy_regime, fiscal_pack: input.fiscal_pack, - periods: input.periods, - faithful_rows: input.faithful_rows, - statement_rows: input.statement_rows, - surface_rows: input.surface_rows, - detail_rows: input.detail_rows, - kpi_rows: input.kpi_rows, + periods: normalized.periods, + faithful_rows: normalized.faithful_rows, + statement_rows: normalized.statement_rows, + surface_rows: normalized.surface_rows, + detail_rows: normalized.detail_rows, + kpi_rows: normalized.kpi_rows, derived_metrics: input.derived_metrics, validation_result: input.validation_result, - normalization_summary: input.normalization_summary, + normalization_summary: normalized.normalization_summary, facts_count: input.facts_count, concepts_count: input.concepts_count, dimensions_count: input.dimensions_count, @@ -598,15 +1007,15 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn parser_version: input.parser_version, taxonomy_regime: input.taxonomy_regime, fiscal_pack: input.fiscal_pack, - periods: input.periods, - faithful_rows: input.faithful_rows, - statement_rows: input.statement_rows, - surface_rows: input.surface_rows, - detail_rows: input.detail_rows, - kpi_rows: input.kpi_rows, + periods: normalized.periods, + faithful_rows: normalized.faithful_rows, + statement_rows: normalized.statement_rows, + surface_rows: normalized.surface_rows, + detail_rows: normalized.detail_rows, + kpi_rows: normalized.kpi_rows, derived_metrics: input.derived_metrics, validation_result: input.validation_result, - normalization_summary: input.normalization_summary, + normalization_summary: normalized.normalization_summary, facts_count: input.facts_count, concepts_count: input.concepts_count, dimensions_count: input.dimensions_count, @@ -906,3 +1315,8 @@ export async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) { return rows.map(toAssetRecord); } + +export const __filingTaxonomyInternals = { + normalizeFilingTaxonomySnapshotPayload, + toSnapshotRecord +}; diff --git a/lib/server/task-processors.ts b/lib/server/task-processors.ts index 2c34991..ae631a2 100644 --- a/lib/server/task-processors.ts +++ b/lib/server/task-processors.ts @@ -23,6 +23,7 @@ import { } from '@/lib/server/repos/company-financial-bundles'; import { getFilingTaxonomySnapshotByFilingId, + normalizeFilingTaxonomySnapshotPayload, upsertFilingTaxonomySnapshot } from '@/lib/server/repos/filing-taxonomy'; import { @@ -726,6 +727,10 @@ async function processSyncFilings(task: Task) { filingUrl: filing.filing_url, primaryDocument: filing.primary_document ?? null }); + const normalizedSnapshot = { + ...snapshot, + ...normalizeFilingTaxonomySnapshotPayload(snapshot) + }; await setProjectionStage( task, @@ -752,8 +757,8 @@ async function processSyncFilings(task: Task) { stageContext('sync.persist_taxonomy') ); - await upsertFilingTaxonomySnapshot(snapshot); - await updateFilingMetricsById(filing.id, snapshot.derived_metrics); + await upsertFilingTaxonomySnapshot(normalizedSnapshot); + await updateFilingMetricsById(filing.id, normalizedSnapshot.derived_metrics); await deleteCompanyFinancialBundlesForTicker(filing.ticker); taxonomySnapshotsHydrated += 1; } catch (error) { diff --git a/lib/server/taxonomy/engine.test.ts b/lib/server/taxonomy/engine.test.ts index 785b8c9..8821156 100644 --- a/lib/server/taxonomy/engine.test.ts +++ b/lib/server/taxonomy/engine.test.ts @@ -43,11 +43,11 @@ function createHydrationResult(): TaxonomyHydrationResult { facts: [], metric_validations: [], normalization_summary: { - surfaceRowCount: 0, - detailRowCount: 0, - kpiRowCount: 0, - unmappedRowCount: 0, - materialUnmappedRowCount: 0, + surface_row_count: 0, + detail_row_count: 0, + kpi_row_count: 0, + unmapped_row_count: 0, + material_unmapped_row_count: 0, warnings: ['rust_warning'] } }; diff --git a/lib/server/taxonomy/types.ts b/lib/server/taxonomy/types.ts index 7f7de56..b37e81c 100644 --- a/lib/server/taxonomy/types.ts +++ b/lib/server/taxonomy/types.ts @@ -1,12 +1,7 @@ import type { Filing, FinancialStatementKind, - MetricValidationResult, - NormalizationSummary, - StructuredKpiRow, - SurfaceDetailMap, - SurfaceFinancialRow, - TaxonomyStatementRow + MetricValidationResult } from '@/lib/types'; import type { FilingTaxonomyAssetType, @@ -117,6 +112,98 @@ export type TaxonomyMetricValidationCheck = { error: string | null; }; +export type TaxonomyHydrationPeriod = { + id: string; + filing_id: number; + accession_number: string; + filing_date: string; + period_start: string | null; + period_end: string | null; + filing_type: '10-K' | '10-Q'; + period_label: string; +}; + +export type TaxonomyHydrationStatementRow = { + key: string; + label: string; + concept_key: string; + qname: string; + namespace_uri: string; + local_name: string; + is_extension: boolean; + statement: FinancialStatementKind; + role_uri: string | null; + order: number; + depth: number; + parent_key: string | null; + values: Record; + units: Record; + has_dimensions: boolean; + source_fact_ids: number[]; +}; + +export type TaxonomyHydrationSurfaceRow = { + key: string; + label: string; + category: string; + template_section?: string; + order: number; + unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio'; + values: Record; + source_concepts: string[]; + source_row_keys: string[]; + source_fact_ids: number[]; + formula_key: string | null; + has_dimensions: boolean; + resolved_source_row_keys: Record; + statement?: 'income' | 'balance' | 'cash_flow'; + detail_count?: number; + resolution_method?: 'direct' | 'surface_bridge' | 'formula_derived' | 'not_meaningful'; + confidence?: 'high' | 'medium' | 'low'; + warning_codes?: string[]; +}; + +export type TaxonomyHydrationDetailRow = { + key: string; + parent_surface_key: string; + label: string; + concept_key: string; + qname: string; + namespace_uri: string; + local_name: string; + unit: string | null; + values: Record; + source_fact_ids: number[]; + is_extension: boolean; + dimensions_summary: string[]; + residual_flag: boolean; +}; + +export type TaxonomyHydrationStructuredKpiRow = { + key: string; + label: string; + category: string; + unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio'; + order: number; + segment: string | null; + axis: string | null; + member: string | null; + values: Record; + source_concepts: string[]; + source_fact_ids: number[]; + provenance_type: 'taxonomy' | 'structured_note'; + has_dimensions: boolean; +}; + +export type TaxonomyHydrationNormalizationSummary = { + surface_row_count: number; + detail_row_count: number; + kpi_row_count: number; + unmapped_row_count: number; + material_unmapped_row_count: number; + warnings: string[]; +}; + export type TaxonomyHydrationInput = { filingId: number; ticker: string; @@ -140,12 +227,12 @@ export type TaxonomyHydrationResult = { parser_version: string; taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown'; fiscal_pack: string | null; - periods: FilingTaxonomyPeriod[]; - faithful_rows: Record; - statement_rows: Record; - surface_rows: Record; - detail_rows: Record; - kpi_rows: StructuredKpiRow[]; + periods: TaxonomyHydrationPeriod[]; + faithful_rows: Record; + statement_rows: Record; + surface_rows: Record; + detail_rows: Record>; + kpi_rows: TaxonomyHydrationStructuredKpiRow[]; contexts: Array<{ context_id: string; entity_identifier: string | null; @@ -191,5 +278,5 @@ export type TaxonomyHydrationResult = { source_file: string | null; }>; metric_validations: TaxonomyMetricValidationCheck[]; - normalization_summary: NormalizationSummary; + normalization_summary: TaxonomyHydrationNormalizationSummary; }; diff --git a/scripts/backfill-taxonomy-snapshots.ts b/scripts/backfill-taxonomy-snapshots.ts index b110e6e..3010d64 100644 --- a/scripts/backfill-taxonomy-snapshots.ts +++ b/scripts/backfill-taxonomy-snapshots.ts @@ -2,6 +2,7 @@ import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine'; import { listFilingsRecords, updateFilingMetricsById } from '@/lib/server/repos/filings'; import { getFilingTaxonomySnapshotByFilingId, + normalizeFilingTaxonomySnapshotPayload, upsertFilingTaxonomySnapshot } from '@/lib/server/repos/filing-taxonomy'; @@ -186,8 +187,12 @@ async function runBackfill(options: ScriptOptions): Promise { summary.wouldWrite += 1; if (options.apply) { - await upsertFilingTaxonomySnapshot(snapshot); - await updateFilingMetricsById(row.id, snapshot.derived_metrics); + const normalizedSnapshot = { + ...snapshot, + ...normalizeFilingTaxonomySnapshotPayload(snapshot) + }; + await upsertFilingTaxonomySnapshot(normalizedSnapshot); + await updateFilingMetricsById(row.id, normalizedSnapshot.derived_metrics); summary.written += 1; } } catch (error) { diff --git a/scripts/compare-fiscal-ai-statements.ts b/scripts/compare-fiscal-ai-statements.ts index 2ff1e42..8cbb951 100644 --- a/scripts/compare-fiscal-ai-statements.ts +++ b/scripts/compare-fiscal-ai-statements.ts @@ -312,12 +312,14 @@ function relativeDiff(left: number | null, right: number | null) { return Math.abs(left - right) / baseline; } -function periodStart(period: ResultPeriod) { - return period.periodStart ?? period.period_start ?? null; +function periodStart(period: ResultPeriod): string | null { + const start = ('periodStart' in period ? period.periodStart : undefined) ?? period.period_start ?? null; + return typeof start === 'string' ? start : null; } -function periodEnd(period: ResultPeriod) { - return period.periodEnd ?? period.period_end ?? null; +function periodEnd(period: ResultPeriod): string | null { + const end = ('periodEnd' in period ? period.periodEnd : undefined) ?? period.period_end ?? null; + return typeof end === 'string' ? end : null; } function chooseDurationPeriodId(result: TaxonomyHydrationResult) {