Fix financial taxonomy snapshot normalization

This commit is contained in:
2026-03-13 19:01:56 -04:00
parent b1c9c0ef08
commit 30977dc15f
16 changed files with 1273 additions and 156 deletions

View File

@@ -45,6 +45,7 @@ import {
type NumberScaleUnit
} from '@/lib/format';
import { buildGraphingHref } from '@/lib/graphing/catalog';
import { mergeFinancialPages } from '@/lib/financials/page-merge';
import {
buildStatementTree,
resolveStatementSelection,
@@ -63,7 +64,6 @@ import type {
RatioRow,
StandardizedFinancialRow,
StructuredKpiRow,
SurfaceDetailMap,
SurfaceFinancialRow,
TaxonomyStatementRow,
TrendSeries
@@ -345,90 +345,6 @@ function groupRows(rows: FlatDisplayRow[], categories: CompanyFinancialStatement
.filter((group) => group.rows.length > 0);
}
function mergeDetailMaps(base: SurfaceDetailMap | null, next: SurfaceDetailMap | null) {
if (!base) {
return next;
}
if (!next) {
return base;
}
const merged: SurfaceDetailMap = structuredClone(base);
for (const [surfaceKey, detailRows] of Object.entries(next)) {
const existingRows = merged[surfaceKey] ?? [];
const rowMap = new Map(existingRows.map((row) => [row.key, row]));
for (const detailRow of detailRows) {
const existing = rowMap.get(detailRow.key);
if (!existing) {
rowMap.set(detailRow.key, structuredClone(detailRow));
continue;
}
existing.values = {
...existing.values,
...detailRow.values
};
existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...detailRow.sourceFactIds])];
existing.dimensionsSummary = [...new Set([...existing.dimensionsSummary, ...detailRow.dimensionsSummary])];
}
merged[surfaceKey] = [...rowMap.values()];
}
return merged;
}
function mergeFinancialPages(
base: CompanyFinancialStatementsResponse | null,
next: CompanyFinancialStatementsResponse
) {
if (!base) {
return next;
}
const periods = [...base.periods, ...next.periods]
.filter((period, index, list) => list.findIndex((item) => item.id === period.id) === index)
.sort((left, right) => Date.parse(left.periodEnd ?? left.filingDate) - Date.parse(right.periodEnd ?? right.filingDate));
const mergeRows = <T extends { key: string; values: Record<string, number | null> }>(rows: T[]) => {
const map = new Map<string, T>();
for (const row of rows) {
const existing = map.get(row.key);
if (!existing) {
map.set(row.key, structuredClone(row));
continue;
}
existing.values = {
...existing.values,
...row.values
};
}
return [...map.values()];
};
return {
...next,
periods,
statementRows: next.statementRows && base.statementRows
? {
faithful: mergeRows([...base.statementRows.faithful, ...next.statementRows.faithful]),
standardized: mergeRows([...base.statementRows.standardized, ...next.statementRows.standardized])
}
: next.statementRows,
statementDetails: mergeDetailMaps(base.statementDetails, next.statementDetails),
ratioRows: next.ratioRows && base.ratioRows ? mergeRows([...base.ratioRows, ...next.ratioRows]) : next.ratioRows,
kpiRows: next.kpiRows && base.kpiRows ? mergeRows([...base.kpiRows, ...next.kpiRows]) : next.kpiRows,
trendSeries: next.trendSeries,
categories: next.categories,
dimensionBreakdown: next.dimensionBreakdown ?? base.dimensionBreakdown
};
}
function ChartFrame({ children }: { children: React.ReactNode }) {
const containerRef = useRef<HTMLDivElement | null>(null);
const [ready, setReady] = useState(false);
@@ -1185,11 +1101,11 @@ function FinancialsPageContent() {
{isDerivedRow(selectedRow) ? (
<div className="rounded-lg border border-[color:var(--line-weak)] bg-[color:var(--panel-soft)] px-3 py-2">
<p className="text-[color:var(--terminal-muted)]">Source Row Keys</p>
<p className="font-semibold text-[color:var(--terminal-bright)]">{selectedRow.sourceRowKeys.join(', ') || 'n/a'}</p>
<p className="font-semibold text-[color:var(--terminal-bright)]">{(selectedRow.sourceRowKeys ?? []).join(', ') || 'n/a'}</p>
<p className="mt-2 text-[color:var(--terminal-muted)]">Source Concepts</p>
<p className="font-semibold text-[color:var(--terminal-bright)]">{selectedRow.sourceConcepts.join(', ') || 'n/a'}</p>
<p className="font-semibold text-[color:var(--terminal-bright)]">{(selectedRow.sourceConcepts ?? []).join(', ') || 'n/a'}</p>
<p className="mt-2 text-[color:var(--terminal-muted)]">Source Fact IDs</p>
<p className="font-semibold text-[color:var(--terminal-bright)]">{selectedRow.sourceFactIds.join(', ') || 'n/a'}</p>
<p className="font-semibold text-[color:var(--terminal-bright)]">{(selectedRow.sourceFactIds ?? []).join(', ') || 'n/a'}</p>
</div>
) : null}
@@ -1271,7 +1187,7 @@ function FinancialsPageContent() {
surfaceKind
})}</td>
<td>{check.status}</td>
<td>{check.evidencePages.join(', ') || 'n/a'}</td>
<td>{(check.evidencePages ?? []).join(', ') || 'n/a'}</td>
</tr>
))}
</tbody>

View File

@@ -31,8 +31,8 @@ function InspectorCard(props: {
);
}
function renderList(values: string[]) {
return values.length > 0 ? values.join(', ') : 'n/a';
function renderList(values: string[] | null | undefined) {
return (values ?? []).length > 0 ? (values ?? []).join(', ') : 'n/a';
}
export function StatementRowInspector(props: StatementRowInspectorProps) {
@@ -64,7 +64,7 @@ export function StatementRowInspector(props: StatementRowInspectorProps) {
</div>
<div className="grid grid-cols-1 gap-3 md:grid-cols-2">
<InspectorCard label="Source Fact IDs" value={selection.row.sourceFactIds.length > 0 ? selection.row.sourceFactIds.join(', ') : 'n/a'} />
<InspectorCard label="Source Fact IDs" value={(selection.row.sourceFactIds ?? []).length > 0 ? (selection.row.sourceFactIds ?? []).join(', ') : 'n/a'} />
<InspectorCard label="Warning Codes" value={renderList(selection.row.warningCodes ?? [])} />
</div>
@@ -136,7 +136,7 @@ export function StatementRowInspector(props: StatementRowInspectorProps) {
<div className="grid grid-cols-1 gap-3 md:grid-cols-2">
<InspectorCard label="Local Name" value={selection.row.localName} />
<InspectorCard label="Source Fact IDs" value={selection.row.sourceFactIds.length > 0 ? selection.row.sourceFactIds.join(', ') : 'n/a'} />
<InspectorCard label="Source Fact IDs" value={(selection.row.sourceFactIds ?? []).length > 0 ? (selection.row.sourceFactIds ?? []).join(', ') : 'n/a'} />
</div>
<div className="rounded-lg border border-[color:var(--line-weak)] bg-[color:var(--panel-soft)] px-3 py-2">

View File

@@ -0,0 +1,185 @@
import { describe, expect, it } from 'bun:test';
import { __financialPageMergeInternals } from './page-merge';
import type { CompanyFinancialStatementsResponse } from '@/lib/types';
function createResponse(partial: Partial<CompanyFinancialStatementsResponse>): CompanyFinancialStatementsResponse {
return {
company: {
ticker: 'MSFT',
companyName: 'Microsoft Corporation',
cik: null
},
surfaceKind: 'income_statement',
cadence: 'annual',
displayModes: ['standardized', 'faithful'],
defaultDisplayMode: 'standardized',
periods: [],
statementRows: {
faithful: [],
standardized: []
},
statementDetails: null,
ratioRows: null,
kpiRows: null,
trendSeries: [],
categories: [],
availability: {
adjusted: false,
customMetrics: false
},
nextCursor: null,
facts: null,
coverage: {
filings: 0,
rows: 0,
dimensions: 0,
facts: 0
},
dataSourceStatus: {
enabled: true,
hydratedFilings: 0,
partialFilings: 0,
failedFilings: 0,
pendingFilings: 0,
queuedSync: false
},
metrics: {
taxonomy: null,
validation: null
},
normalization: {
parserEngine: 'fiscal-xbrl',
regime: 'us-gaap',
fiscalPack: 'core',
parserVersion: '0.1.0',
surfaceRowCount: 0,
detailRowCount: 0,
kpiRowCount: 0,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
warnings: []
},
dimensionBreakdown: null,
...partial
};
}
describe('financial page merge helpers', () => {
it('merges detail maps safely when legacy detail rows are missing arrays', () => {
const merged = __financialPageMergeInternals.mergeDetailMaps(
{
revenue: [{
key: 'detail',
parentSurfaceKey: 'revenue',
label: 'Detail',
conceptKey: 'detail',
qname: 'us-gaap:Detail',
namespaceUri: 'http://fasb.org/us-gaap/2024',
localName: 'Detail',
unit: 'iso4217:USD',
values: { p1: 1 },
sourceFactIds: undefined,
isExtension: false,
dimensionsSummary: undefined,
residualFlag: false
} as never]
},
{
revenue: [{
key: 'detail',
parentSurfaceKey: 'revenue',
label: 'Detail',
conceptKey: 'detail',
qname: 'us-gaap:Detail',
namespaceUri: 'http://fasb.org/us-gaap/2024',
localName: 'Detail',
unit: 'iso4217:USD',
values: { p2: 2 },
sourceFactIds: [2],
isExtension: false,
dimensionsSummary: ['region:americas'],
residualFlag: false
}]
}
);
expect(merged?.revenue?.[0]).toMatchObject({
values: { p1: 1, p2: 2 },
sourceFactIds: [2],
dimensionsSummary: ['region:americas']
});
});
it('merges paged financial responses safely when row arrays are partially missing', () => {
const base = createResponse({
periods: [{
id: 'p1',
filingId: 1,
accessionNumber: '0001',
filingDate: '2025-01-01',
periodStart: '2024-01-01',
periodEnd: '2024-12-31',
filingType: '10-K',
periodLabel: 'FY 2024'
}],
statementRows: {
faithful: undefined as never,
standardized: [{
key: 'revenue',
label: 'Revenue',
category: 'revenue',
order: 10,
unit: 'currency',
values: { p1: 1 },
sourceConcepts: [],
sourceRowKeys: [],
sourceFactIds: [],
formulaKey: null,
hasDimensions: false,
resolvedSourceRowKeys: { p1: 'revenue' }
}]
}
});
const next = createResponse({
periods: [{
id: 'p2',
filingId: 2,
accessionNumber: '0002',
filingDate: '2026-01-01',
periodStart: '2025-01-01',
periodEnd: '2025-12-31',
filingType: '10-K',
periodLabel: 'FY 2025'
}],
statementRows: {
faithful: [{
key: 'rev',
label: 'Revenue',
conceptKey: 'rev',
qname: 'us-gaap:Revenue',
namespaceUri: 'http://fasb.org/us-gaap/2024',
localName: 'Revenue',
isExtension: false,
statement: 'income',
roleUri: 'income',
order: 10,
depth: 0,
parentKey: null,
values: { p2: 2 },
units: { p2: 'iso4217:USD' },
hasDimensions: false,
sourceFactIds: []
}],
standardized: undefined as never
}
});
const merged = __financialPageMergeInternals.mergeFinancialPages(base, next);
expect(merged.periods.map((period) => period.id)).toEqual(['p1', 'p2']);
expect(merged.statementRows).toMatchObject({
faithful: [{ key: 'rev' }],
standardized: [{ key: 'revenue', values: { p1: 1 } }]
});
});
});

View File

@@ -0,0 +1,98 @@
import type {
CompanyFinancialStatementsResponse
} from '@/lib/types';
export function mergeDetailMaps(
base: CompanyFinancialStatementsResponse['statementDetails'],
next: CompanyFinancialStatementsResponse['statementDetails']
) {
if (!base) {
return next;
}
if (!next) {
return base;
}
const merged: NonNullable<CompanyFinancialStatementsResponse['statementDetails']> = structuredClone(base);
for (const [surfaceKey, detailRows] of Object.entries(next)) {
const existingRows = merged[surfaceKey] ?? [];
const rowMap = new Map(existingRows.map((row) => [row.key, row]));
for (const detailRow of detailRows) {
const existing = rowMap.get(detailRow.key);
if (!existing) {
rowMap.set(detailRow.key, structuredClone(detailRow));
continue;
}
existing.values = {
...existing.values,
...detailRow.values
};
existing.sourceFactIds = [...new Set([...(existing.sourceFactIds ?? []), ...(detailRow.sourceFactIds ?? [])])];
existing.dimensionsSummary = [...new Set([...(existing.dimensionsSummary ?? []), ...(detailRow.dimensionsSummary ?? [])])];
}
merged[surfaceKey] = [...rowMap.values()];
}
return merged;
}
export function mergeFinancialPages(
base: CompanyFinancialStatementsResponse | null,
next: CompanyFinancialStatementsResponse
) {
if (!base) {
return next;
}
const periods = [...base.periods, ...next.periods]
.filter((period, index, list) => list.findIndex((item) => item.id === period.id) === index)
.sort((left, right) => Date.parse(left.periodEnd ?? left.filingDate) - Date.parse(right.periodEnd ?? right.filingDate));
const mergeRows = <T extends { key: string; values: Record<string, number | null> }>(rows: T[]) => {
const map = new Map<string, T>();
for (const row of rows) {
const existing = map.get(row.key);
if (!existing) {
map.set(row.key, structuredClone(row));
continue;
}
existing.values = {
...existing.values,
...row.values
};
}
return [...map.values()];
};
return {
...next,
periods,
statementRows: next.statementRows && base.statementRows
? {
faithful: mergeRows([...(base.statementRows.faithful ?? []), ...(next.statementRows.faithful ?? [])]),
standardized: mergeRows([...(base.statementRows.standardized ?? []), ...(next.statementRows.standardized ?? [])])
}
: next.statementRows,
statementDetails: mergeDetailMaps(base.statementDetails, next.statementDetails),
ratioRows: next.ratioRows && base.ratioRows
? mergeRows([...(base.ratioRows ?? []), ...(next.ratioRows ?? [])])
: next.ratioRows,
kpiRows: next.kpiRows && base.kpiRows
? mergeRows([...(base.kpiRows ?? []), ...(next.kpiRows ?? [])])
: next.kpiRows,
trendSeries: next.trendSeries,
categories: next.categories,
dimensionBreakdown: next.dimensionBreakdown ?? base.dimensionBreakdown
};
}
export const __financialPageMergeInternals = {
mergeDetailMaps,
mergeFinancialPages
};

View File

@@ -160,6 +160,28 @@ describe('statement view model', () => {
expect(child?.kind === 'surface' && child.expanded).toBe(true);
});
it('does not throw when legacy surface rows are missing source arrays', () => {
const malformedRow = {
...createSurfaceRow({ key: 'revenue', label: 'Revenue', category: 'revenue', values: { p1: 100 } }),
sourceConcepts: undefined,
sourceRowKeys: undefined
} as unknown as SurfaceFinancialRow;
const model = buildStatementTree({
surfaceKind: 'income_statement',
rows: [malformedRow],
statementDetails: null,
categories: [],
searchQuery: 'revenue',
expandedRowKeys: new Set()
});
expect(model.sections[0]?.nodes[0]).toMatchObject({
kind: 'surface',
row: { key: 'revenue' }
});
});
it('keeps not meaningful rows visible and resolves selections for surface and detail nodes', () => {
const rows = [
createSurfaceRow({

View File

@@ -99,8 +99,8 @@ function searchTextForSurface(row: SurfaceFinancialRow) {
return [
row.label,
row.key,
...row.sourceConcepts,
...row.sourceRowKeys,
...(row.sourceConcepts ?? []),
...(row.sourceRowKeys ?? []),
...(row.warningCodes ?? [])
]
.join(' ')
@@ -115,7 +115,7 @@ function searchTextForDetail(row: DetailFinancialRow) {
row.conceptKey,
row.qname,
row.localName,
...row.dimensionsSummary
...(row.dimensionsSummary ?? [])
]
.join(' ')
.toLowerCase();

View File

@@ -1811,6 +1811,56 @@ describe('financial taxonomy internals', () => {
});
});
it('aggregates persisted detail rows when legacy snapshots are missing dimension arrays', () => {
const snapshot = {
...createSnapshot({
filingId: 21,
filingType: '10-K',
filingDate: '2026-02-22',
statement: 'income',
periods: [
{ id: '2025-fy', periodStart: '2025-01-01', periodEnd: '2025-12-31', periodLabel: '2025 FY' }
]
}),
detail_rows: {
income: {
revenue: [{
key: 'revenue_detail',
parentSurfaceKey: 'revenue',
label: 'Revenue Detail',
conceptKey: 'us-gaap:RevenueDetail',
qname: 'us-gaap:RevenueDetail',
namespaceUri: 'http://fasb.org/us-gaap/2024',
localName: 'RevenueDetail',
unit: 'iso4217:USD',
values: { '2025-fy': 123_000_000 },
sourceFactIds: undefined,
isExtension: false,
dimensionsSummary: undefined,
residualFlag: false
} as unknown as FilingTaxonomySnapshotRecord['detail_rows']['income'][string][number]]
},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {}
}
} satisfies FilingTaxonomySnapshotRecord;
const rows = __financialTaxonomyInternals.aggregateDetailRows({
snapshots: [snapshot],
statement: 'income',
selectedPeriodIds: new Set(['2025-fy'])
});
expect(rows.revenue).toHaveLength(1);
expect(rows.revenue?.[0]).toMatchObject({
key: 'revenue_detail',
sourceFactIds: [],
dimensionsSummary: []
});
});
it('builds normalization metadata from snapshot fiscal pack and counts', () => {
const snapshot = {
...createSnapshot({

View File

@@ -78,6 +78,49 @@ type FilingDocumentRef = {
primaryDocument: string | null;
};
function isRecord(value: unknown): value is Record<string, unknown> {
return value !== null && typeof value === 'object' && !Array.isArray(value);
}
function hasRequiredDerivedRowArrays(row: unknown) {
return isRecord(row)
&& Array.isArray(row.sourceConcepts)
&& Array.isArray(row.sourceRowKeys)
&& Array.isArray(row.sourceFactIds);
}
function hasRequiredDetailRowArrays(row: unknown) {
return isRecord(row)
&& Array.isArray(row.sourceFactIds)
&& Array.isArray(row.dimensionsSummary);
}
function isValidStatementBundlePayload(value: unknown): value is StandardizedStatementBundlePayload {
if (!isRecord(value) || !Array.isArray(value.rows) || !isRecord(value.detailRows)) {
return false;
}
if (!value.rows.every((row) => hasRequiredDerivedRowArrays(row))) {
return false;
}
return Object.values(value.detailRows).every((rows) => (
Array.isArray(rows) && rows.every((row) => hasRequiredDetailRowArrays(row))
));
}
function isValidRatioBundlePayload(value: unknown): value is Pick<CompanyFinancialStatementsResponse, 'ratioRows' | 'trendSeries' | 'categories'> {
return isRecord(value)
&& Array.isArray(value.ratioRows)
&& value.ratioRows.every((row) => hasRequiredDerivedRowArrays(row));
}
function isValidKpiBundlePayload(value: unknown): value is Pick<CompanyFinancialStatementsResponse, 'kpiRows' | 'trendSeries' | 'categories'> {
return isRecord(value)
&& Array.isArray(value.kpiRows)
&& value.kpiRows.every((row) => isRecord(row) && Array.isArray(row.sourceConcepts) && Array.isArray(row.sourceFactIds));
}
function safeTicker(input: string) {
return input.trim().toUpperCase();
}
@@ -350,7 +393,7 @@ function detailConceptIdentity(row: DetailFinancialRow) {
}
function detailMergeKey(row: DetailFinancialRow) {
const dimensionsKey = [...row.dimensionsSummary]
const dimensionsKey = [...(row.dimensionsSummary ?? [])]
.map((value) => value.trim().toLowerCase())
.filter((value) => value.length > 0)
.sort((left, right) => left.localeCompare(right))
@@ -713,10 +756,9 @@ async function buildStatementSurfaceBundle(input: {
if (
cached
&& Array.isArray((cached as Partial<StandardizedStatementBundlePayload>).rows)
&& typeof (cached as Partial<StandardizedStatementBundlePayload>).detailRows === 'object'
&& isValidStatementBundlePayload(cached)
) {
return cached as StandardizedStatementBundlePayload;
return cached;
}
const statement = surfaceToStatementKind(input.surfaceKind);
@@ -794,8 +836,8 @@ async function buildRatioSurfaceBundle(input: {
snapshots: input.snapshots
});
if (cached) {
return cached as Pick<CompanyFinancialStatementsResponse, 'ratioRows' | 'trendSeries' | 'categories'>;
if (cached && isValidRatioBundlePayload(cached)) {
return cached;
}
const pricesByDate = await getHistoricalClosingPrices(input.ticker, input.periods.map((period) => latestPeriodDate(period)));
@@ -846,8 +888,8 @@ async function buildKpiSurfaceBundle(input: {
snapshots: input.snapshots
});
if (cached) {
return cached as Pick<CompanyFinancialStatementsResponse, 'kpiRows' | 'trendSeries' | 'categories'>;
if (cached && isValidKpiBundlePayload(cached)) {
return cached;
}
const persistedRows = aggregatePersistedKpiRows({

View File

@@ -7,7 +7,7 @@ import { db, getSqliteClient } from '@/lib/server/db';
import { withFinancialIngestionSchemaRetry } from '@/lib/server/db/financial-ingestion-schema';
import { companyFinancialBundle } from '@/lib/server/db/schema';
export const CURRENT_COMPANY_FINANCIAL_BUNDLE_VERSION = 14;
export const CURRENT_COMPANY_FINANCIAL_BUNDLE_VERSION = 15;
export type CompanyFinancialBundleRecord = {
id: number;

View File

@@ -0,0 +1,291 @@
import { describe, expect, it } from 'bun:test';
import { __filingTaxonomyInternals } from './filing-taxonomy';
describe('filing taxonomy snapshot normalization', () => {
it('normalizes legacy snake_case nested snapshot payloads in toSnapshotRecord', () => {
const record = __filingTaxonomyInternals.toSnapshotRecord({
id: 1,
filing_id: 10,
ticker: 'MSFT',
filing_date: '2026-01-28',
filing_type: '10-Q',
parse_status: 'ready',
parse_error: null,
source: 'xbrl_instance',
parser_engine: 'fiscal-xbrl',
parser_version: '0.1.0',
taxonomy_regime: 'us-gaap',
fiscal_pack: 'core',
periods: [{
id: 'fy-2025',
filing_id: 10,
accession_number: '0001',
filing_date: '2026-01-28',
period_start: '2025-01-01',
period_end: '2025-12-31',
filing_type: '10-Q',
period_label: 'FY 2025'
}],
faithful_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
concept_key: 'us-gaap:Revenue',
qname: 'us-gaap:Revenue',
namespace_uri: 'http://fasb.org/us-gaap/2025',
local_name: 'Revenue',
is_extension: false,
statement: 'income',
role_uri: 'income',
order: 10,
depth: 0,
parent_key: null,
values: { 'fy-2025': 10 },
units: { 'fy-2025': 'iso4217:USD' },
has_dimensions: false,
source_fact_ids: [1]
}],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
statement_rows: null,
surface_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
category: 'revenue',
template_section: 'revenue',
order: 10,
unit: 'currency',
values: { 'fy-2025': 10 },
source_concepts: ['us-gaap:Revenue'],
source_row_keys: ['revenue'],
source_fact_ids: [1],
formula_key: null,
has_dimensions: false,
resolved_source_row_keys: { 'fy-2025': 'revenue' },
statement: 'income',
detail_count: 1,
resolution_method: 'direct',
confidence: 'high',
warning_codes: ['legacy_surface']
}],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
detail_rows: {
income: {
revenue: [{
key: 'revenue_detail',
parent_surface_key: 'revenue',
label: 'Revenue Detail',
concept_key: 'us-gaap:RevenueDetail',
qname: 'us-gaap:RevenueDetail',
namespace_uri: 'http://fasb.org/us-gaap/2025',
local_name: 'RevenueDetail',
unit: 'iso4217:USD',
values: { 'fy-2025': 10 },
source_fact_ids: [2],
is_extension: false,
dimensions_summary: ['region:americas'],
residual_flag: false
}]
},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {}
},
kpi_rows: [{
key: 'cloud_growth',
label: 'Cloud Growth',
category: 'operating_kpi',
unit: 'percent',
order: 10,
segment: null,
axis: null,
member: null,
values: { 'fy-2025': 0.25 },
source_concepts: ['msft:CloudGrowth'],
source_fact_ids: [3],
provenance_type: 'taxonomy',
has_dimensions: false
}],
derived_metrics: null,
validation_result: null,
normalization_summary: {
surface_row_count: 1,
detail_row_count: 1,
kpi_row_count: 1,
unmapped_row_count: 0,
material_unmapped_row_count: 0,
warnings: ['legacy_warning']
},
facts_count: 3,
concepts_count: 3,
dimensions_count: 1,
created_at: '2026-01-28T00:00:00.000Z',
updated_at: '2026-01-28T00:00:00.000Z'
} as never);
expect(record.periods[0]).toMatchObject({
filingId: 10,
accessionNumber: '0001',
filingDate: '2026-01-28',
periodStart: '2025-01-01',
periodEnd: '2025-12-31',
periodLabel: 'FY 2025'
});
expect(record.faithful_rows.income[0]).toMatchObject({
conceptKey: 'us-gaap:Revenue',
namespaceUri: 'http://fasb.org/us-gaap/2025',
localName: 'Revenue',
roleUri: 'income',
parentKey: null,
hasDimensions: false,
sourceFactIds: [1]
});
expect(record.surface_rows.income[0]).toMatchObject({
templateSection: 'revenue',
sourceConcepts: ['us-gaap:Revenue'],
sourceRowKeys: ['revenue'],
sourceFactIds: [1],
formulaKey: null,
hasDimensions: false,
resolvedSourceRowKeys: { 'fy-2025': 'revenue' },
detailCount: 1,
resolutionMethod: 'direct',
warningCodes: ['legacy_surface']
});
expect(record.detail_rows.income.revenue?.[0]).toMatchObject({
parentSurfaceKey: 'revenue',
conceptKey: 'us-gaap:RevenueDetail',
namespaceUri: 'http://fasb.org/us-gaap/2025',
sourceFactIds: [2],
dimensionsSummary: ['region:americas'],
residualFlag: false
});
expect(record.kpi_rows[0]).toMatchObject({
sourceConcepts: ['msft:CloudGrowth'],
sourceFactIds: [3],
provenanceType: 'taxonomy',
hasDimensions: false
});
expect(record.normalization_summary).toEqual({
surfaceRowCount: 1,
detailRowCount: 1,
kpiRowCount: 1,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
warnings: ['legacy_warning']
});
});
it('keeps mixed camelCase and snake_case payloads compatible', () => {
const normalized = __filingTaxonomyInternals.normalizeFilingTaxonomySnapshotPayload({
periods: [{
id: 'fy-2025',
filingId: 10,
accessionNumber: '0001',
filingDate: '2026-01-28',
periodStart: '2025-01-01',
periodEnd: '2025-12-31',
filingType: '10-K',
periodLabel: 'FY 2025'
}],
faithful_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
conceptKey: 'us-gaap:Revenue',
qname: 'us-gaap:Revenue',
namespaceUri: 'http://fasb.org/us-gaap/2025',
localName: 'Revenue',
isExtension: false,
statement: 'income',
roleUri: 'income',
order: 10,
depth: 0,
parentKey: null,
values: { 'fy-2025': 10 },
units: { 'fy-2025': 'iso4217:USD' },
hasDimensions: false,
sourceFactIds: [1]
}],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
statement_rows: null,
surface_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
category: 'revenue',
order: 10,
unit: 'currency',
values: { 'fy-2025': 10 },
source_concepts: ['us-gaap:Revenue'],
source_row_keys: ['revenue'],
source_fact_ids: [1],
formula_key: null,
has_dimensions: false,
resolved_source_row_keys: { 'fy-2025': 'revenue' }
}],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
detail_rows: {
income: {
revenue: [{
key: 'revenue_detail',
parentSurfaceKey: 'revenue',
label: 'Revenue Detail',
conceptKey: 'us-gaap:RevenueDetail',
qname: 'us-gaap:RevenueDetail',
namespaceUri: 'http://fasb.org/us-gaap/2025',
localName: 'RevenueDetail',
unit: 'iso4217:USD',
values: { 'fy-2025': 10 },
sourceFactIds: [2],
isExtension: false,
dimensionsSummary: [],
residualFlag: false
}]
},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {}
},
kpi_rows: [],
normalization_summary: {
surfaceRowCount: 1,
detail_row_count: 1,
kpiRowCount: 0,
unmapped_row_count: 0,
materialUnmappedRowCount: 0,
warnings: []
}
});
expect(normalized.periods[0]?.filingId).toBe(10);
expect(normalized.surface_rows.income[0]?.sourceConcepts).toEqual(['us-gaap:Revenue']);
expect(normalized.detail_rows.income.revenue?.[0]?.parentSurfaceKey).toBe('revenue');
expect(normalized.normalization_summary).toEqual({
surfaceRowCount: 1,
detailRowCount: 1,
kpiRowCount: 0,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
warnings: []
});
});
});

View File

@@ -1,5 +1,6 @@
import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm';
import type {
DetailFinancialRow,
Filing,
FinancialStatementKind,
MetricValidationResult,
@@ -283,6 +284,18 @@ export type UpsertFilingTaxonomySnapshotInput = {
}>;
};
const FINANCIAL_STATEMENT_KINDS = [
'income',
'balance',
'cash_flow',
'equity',
'comprehensive_income'
] as const satisfies FinancialStatementKind[];
type StatementRowMap = Record<FinancialStatementKind, TaxonomyStatementRow[]>;
type SurfaceRowMap = Record<FinancialStatementKind, SurfaceFinancialRow[]>;
type DetailRowMap = Record<FinancialStatementKind, SurfaceDetailMap>;
function tenYearsAgoIso() {
const date = new Date();
date.setUTCFullYear(date.getUTCFullYear() - 10);
@@ -310,7 +323,394 @@ function asNumericText(value: number | null) {
return String(value);
}
function emptyStatementRows(): Record<FinancialStatementKind, TaxonomyStatementRow[]> {
function asObject(value: unknown) {
return value !== null && typeof value === 'object' && !Array.isArray(value)
? value as Record<string, unknown>
: null;
}
function asString(value: unknown) {
return typeof value === 'string' ? value : null;
}
function asNullableString(value: unknown) {
return typeof value === 'string'
? value
: value === null
? null
: null;
}
function asBoolean(value: unknown) {
return typeof value === 'boolean' ? value : Boolean(value);
}
function asStatementKind(value: unknown): FinancialStatementKind | null {
return value === 'income'
|| value === 'balance'
|| value === 'cash_flow'
|| value === 'equity'
|| value === 'comprehensive_income'
? value
: null;
}
function normalizeNumberMap(value: unknown) {
const object = asObject(value);
if (!object) {
return {};
}
return Object.fromEntries(
Object.entries(object).map(([key, entry]) => [key, asNumber(entry)])
);
}
function normalizeNullableStringMap(value: unknown) {
const object = asObject(value);
if (!object) {
return {};
}
return Object.fromEntries(
Object.entries(object).map(([key, entry]) => [key, asNullableString(entry)])
);
}
function normalizeStringArray(value: unknown) {
return Array.isArray(value)
? value.filter((entry): entry is string => typeof entry === 'string')
: [];
}
function normalizeNumberArray(value: unknown) {
if (!Array.isArray(value)) {
return [];
}
return value
.map((entry) => asNumber(entry))
.filter((entry): entry is number => entry !== null);
}
function normalizePeriods(value: unknown): FilingTaxonomyPeriod[] {
if (!Array.isArray(value)) {
return [];
}
return value
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const id = asString(row.id);
const filingId = asNumber(row.filingId ?? row.filing_id);
const accessionNumber = asString(row.accessionNumber ?? row.accession_number);
const filingDate = asString(row.filingDate ?? row.filing_date);
const filingType = row.filingType === '10-K' || row.filing_type === '10-K'
? '10-K'
: row.filingType === '10-Q' || row.filing_type === '10-Q'
? '10-Q'
: null;
const periodLabel = asString(row.periodLabel ?? row.period_label);
if (!id || filingId === null || !accessionNumber || !filingDate || !filingType || !periodLabel) {
return null;
}
return {
id,
filingId,
accessionNumber,
filingDate,
periodStart: asNullableString(row.periodStart ?? row.period_start),
periodEnd: asNullableString(row.periodEnd ?? row.period_end),
filingType,
periodLabel
} satisfies FilingTaxonomyPeriod;
})
.filter((entry): entry is FilingTaxonomyPeriod => entry !== null);
}
function normalizeStatementRows(
value: unknown,
fallbackRows: StatementRowMap = emptyStatementRows()
): StatementRowMap {
const object = asObject(value);
if (!object) {
return fallbackRows;
}
const normalized = emptyStatementRows();
for (const statement of FINANCIAL_STATEMENT_KINDS) {
const rows = Array.isArray(object[statement]) ? object[statement] : [];
normalized[statement] = rows
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key);
const label = asString(row.label);
const conceptKey = asString(row.conceptKey ?? row.concept_key);
const qname = asString(row.qname);
const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri);
const localName = asString(row.localName ?? row.local_name);
if (!key || !label || !conceptKey || !qname || !namespaceUri || !localName) {
return null;
}
return {
key,
label,
conceptKey,
qname,
namespaceUri,
localName,
isExtension: asBoolean(row.isExtension ?? row.is_extension),
statement: asStatementKind(row.statement) ?? statement,
roleUri: asNullableString(row.roleUri ?? row.role_uri),
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
depth: asNumber(row.depth) ?? 0,
parentKey: asNullableString(row.parentKey ?? row.parent_key),
values: normalizeNumberMap(row.values),
units: normalizeNullableStringMap(row.units),
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids)
};
})
.filter((entry): entry is TaxonomyStatementRow => entry !== null);
}
return normalized;
}
function normalizeSurfaceRows(
value: unknown,
fallbackRows: SurfaceRowMap = emptySurfaceRows()
): SurfaceRowMap {
const object = asObject(value);
if (!object) {
return fallbackRows;
}
const normalized = emptySurfaceRows();
for (const statement of FINANCIAL_STATEMENT_KINDS) {
const rows = Array.isArray(object[statement]) ? object[statement] : [];
normalized[statement] = rows
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key = asString(row.key);
const label = asString(row.label);
const category = asString(row.category);
const unit = asString(row.unit);
if (!key || !label || !category || !unit) {
return null;
}
const normalizedStatement = asStatementKind(row.statement);
const resolutionMethod = row.resolutionMethod ?? row.resolution_method;
const confidence = row.confidence;
const normalizedRow: SurfaceFinancialRow = {
key,
label,
category: category as SurfaceFinancialRow['category'],
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
unit: unit as SurfaceFinancialRow['unit'],
values: normalizeNumberMap(row.values),
sourceConcepts: normalizeStringArray(row.sourceConcepts ?? row.source_concepts),
sourceRowKeys: normalizeStringArray(row.sourceRowKeys ?? row.source_row_keys),
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids),
formulaKey: asNullableString(row.formulaKey ?? row.formula_key),
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
resolvedSourceRowKeys: normalizeNullableStringMap(row.resolvedSourceRowKeys ?? row.resolved_source_row_keys)
};
const templateSection = asString(row.templateSection ?? row.template_section);
if (templateSection) {
normalizedRow.templateSection = templateSection as SurfaceFinancialRow['templateSection'];
}
if (normalizedStatement === 'income' || normalizedStatement === 'balance' || normalizedStatement === 'cash_flow') {
normalizedRow.statement = normalizedStatement;
}
const detailCount = asNumber(row.detailCount ?? row.detail_count);
if (detailCount !== null) {
normalizedRow.detailCount = detailCount;
}
if (
resolutionMethod === 'direct'
|| resolutionMethod === 'surface_bridge'
|| resolutionMethod === 'formula_derived'
|| resolutionMethod === 'not_meaningful'
) {
normalizedRow.resolutionMethod = resolutionMethod;
}
if (confidence === 'high' || confidence === 'medium' || confidence === 'low') {
normalizedRow.confidence = confidence;
}
const warningCodes = normalizeStringArray(row.warningCodes ?? row.warning_codes);
if (warningCodes.length > 0) {
normalizedRow.warningCodes = warningCodes;
}
return normalizedRow;
})
.filter((entry): entry is SurfaceFinancialRow => entry !== null);
}
return normalized;
}
function normalizeDetailRows(
value: unknown,
fallbackRows: DetailRowMap = emptyDetailRows()
): DetailRowMap {
const object = asObject(value);
if (!object) {
return fallbackRows;
}
const normalized = emptyDetailRows();
for (const statement of FINANCIAL_STATEMENT_KINDS) {
const groups = asObject(object[statement]) ?? {};
normalized[statement] = Object.fromEntries(
Object.entries(groups).map(([surfaceKey, rows]) => {
const normalizedRows = Array.isArray(rows)
? rows
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key);
const label = asString(row.label);
const conceptKey = asString(row.conceptKey ?? row.concept_key);
const qname = asString(row.qname);
const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri);
const localName = asString(row.localName ?? row.local_name);
if (!key || !label || !conceptKey || !qname || !namespaceUri || !localName) {
return null;
}
return {
key,
parentSurfaceKey: asString(row.parentSurfaceKey ?? row.parent_surface_key) ?? surfaceKey,
label,
conceptKey,
qname,
namespaceUri,
localName,
unit: asNullableString(row.unit),
values: normalizeNumberMap(row.values),
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids),
isExtension: asBoolean(row.isExtension ?? row.is_extension),
dimensionsSummary: normalizeStringArray(row.dimensionsSummary ?? row.dimensions_summary),
residualFlag: asBoolean(row.residualFlag ?? row.residual_flag)
};
})
.filter((entry): entry is DetailFinancialRow => entry !== null)
: [];
return [surfaceKey, normalizedRows];
})
);
}
return normalized;
}
function normalizeKpiRows(value: unknown) {
if (!Array.isArray(value)) {
return [];
}
return value
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key = asString(row.key);
const label = asString(row.label);
const category = asString(row.category);
const unit = asString(row.unit);
const provenanceType = row.provenanceType ?? row.provenance_type;
if (!key || !label || !category || !unit || (provenanceType !== 'taxonomy' && provenanceType !== 'structured_note')) {
return null;
}
return {
key,
label,
category: category as StructuredKpiRow['category'],
unit: unit as StructuredKpiRow['unit'],
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
segment: asNullableString(row.segment),
axis: asNullableString(row.axis),
member: asNullableString(row.member),
values: normalizeNumberMap(row.values),
sourceConcepts: normalizeStringArray(row.sourceConcepts ?? row.source_concepts),
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids),
provenanceType,
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions)
} satisfies StructuredKpiRow;
})
.filter((entry): entry is StructuredKpiRow => entry !== null);
}
function normalizeNormalizationSummary(value: unknown) {
const row = asObject(value);
if (!row) {
return null;
}
return {
surfaceRowCount: asNumber(row.surfaceRowCount ?? row.surface_row_count) ?? 0,
detailRowCount: asNumber(row.detailRowCount ?? row.detail_row_count) ?? 0,
kpiRowCount: asNumber(row.kpiRowCount ?? row.kpi_row_count) ?? 0,
unmappedRowCount: asNumber(row.unmappedRowCount ?? row.unmapped_row_count) ?? 0,
materialUnmappedRowCount: asNumber(row.materialUnmappedRowCount ?? row.material_unmapped_row_count) ?? 0,
warnings: normalizeStringArray(row.warnings)
} satisfies NormalizationSummary;
}
export function normalizeFilingTaxonomySnapshotPayload(input: {
periods: unknown;
faithful_rows: unknown;
statement_rows: unknown;
surface_rows: unknown;
detail_rows: unknown;
kpi_rows: unknown;
normalization_summary: unknown;
}) {
const faithfulRows = normalizeStatementRows(input.faithful_rows);
const statementRows = normalizeStatementRows(input.statement_rows, faithfulRows);
return {
periods: normalizePeriods(input.periods),
faithful_rows: faithfulRows,
statement_rows: statementRows,
surface_rows: normalizeSurfaceRows(input.surface_rows),
detail_rows: normalizeDetailRows(input.detail_rows),
kpi_rows: normalizeKpiRows(input.kpi_rows),
normalization_summary: normalizeNormalizationSummary(input.normalization_summary)
};
}
function emptyStatementRows(): StatementRowMap {
return {
income: [],
balance: [],
@@ -320,7 +720,7 @@ function emptyStatementRows(): Record<FinancialStatementKind, TaxonomyStatementR
};
}
function emptySurfaceRows(): Record<FinancialStatementKind, SurfaceFinancialRow[]> {
function emptySurfaceRows(): SurfaceRowMap {
return {
income: [],
balance: [],
@@ -330,7 +730,7 @@ function emptySurfaceRows(): Record<FinancialStatementKind, SurfaceFinancialRow[
};
}
function emptyDetailRows(): Record<FinancialStatementKind, SurfaceDetailMap> {
function emptyDetailRows(): DetailRowMap {
return {
income: {},
balance: {},
@@ -341,7 +741,15 @@ function emptyDetailRows(): Record<FinancialStatementKind, SurfaceDetailMap> {
}
function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord {
const faithfulRows = row.faithful_rows ?? row.statement_rows ?? emptyStatementRows();
const normalized = normalizeFilingTaxonomySnapshotPayload({
periods: row.periods,
faithful_rows: row.faithful_rows,
statement_rows: row.statement_rows,
surface_rows: row.surface_rows,
detail_rows: row.detail_rows,
kpi_rows: row.kpi_rows,
normalization_summary: row.normalization_summary
});
return {
id: row.id,
@@ -356,15 +764,15 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili
parser_version: row.parser_version,
taxonomy_regime: row.taxonomy_regime,
fiscal_pack: row.fiscal_pack,
periods: row.periods ?? [],
faithful_rows: faithfulRows,
statement_rows: faithfulRows,
surface_rows: row.surface_rows ?? emptySurfaceRows(),
detail_rows: row.detail_rows ?? emptyDetailRows(),
kpi_rows: row.kpi_rows ?? [],
periods: normalized.periods,
faithful_rows: normalized.faithful_rows,
statement_rows: normalized.statement_rows,
surface_rows: normalized.surface_rows,
detail_rows: normalized.detail_rows,
kpi_rows: normalized.kpi_rows,
derived_metrics: row.derived_metrics ?? null,
validation_result: row.validation_result ?? null,
normalization_summary: row.normalization_summary ?? null,
normalization_summary: normalized.normalization_summary,
facts_count: row.facts_count,
concepts_count: row.concepts_count,
dimensions_count: row.dimensions_count,
@@ -552,6 +960,7 @@ export async function listFilingTaxonomyMetricValidations(snapshotId: number) {
export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySnapshotInput) {
const now = new Date().toISOString();
const normalized = normalizeFilingTaxonomySnapshotPayload(input);
const [saved] = await withFinancialIngestionSchemaRetry({
client: getSqliteClient(),
@@ -570,15 +979,15 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
parser_version: input.parser_version,
taxonomy_regime: input.taxonomy_regime,
fiscal_pack: input.fiscal_pack,
periods: input.periods,
faithful_rows: input.faithful_rows,
statement_rows: input.statement_rows,
surface_rows: input.surface_rows,
detail_rows: input.detail_rows,
kpi_rows: input.kpi_rows,
periods: normalized.periods,
faithful_rows: normalized.faithful_rows,
statement_rows: normalized.statement_rows,
surface_rows: normalized.surface_rows,
detail_rows: normalized.detail_rows,
kpi_rows: normalized.kpi_rows,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
normalization_summary: input.normalization_summary,
normalization_summary: normalized.normalization_summary,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
@@ -598,15 +1007,15 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
parser_version: input.parser_version,
taxonomy_regime: input.taxonomy_regime,
fiscal_pack: input.fiscal_pack,
periods: input.periods,
faithful_rows: input.faithful_rows,
statement_rows: input.statement_rows,
surface_rows: input.surface_rows,
detail_rows: input.detail_rows,
kpi_rows: input.kpi_rows,
periods: normalized.periods,
faithful_rows: normalized.faithful_rows,
statement_rows: normalized.statement_rows,
surface_rows: normalized.surface_rows,
detail_rows: normalized.detail_rows,
kpi_rows: normalized.kpi_rows,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
normalization_summary: input.normalization_summary,
normalization_summary: normalized.normalization_summary,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
@@ -906,3 +1315,8 @@ export async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) {
return rows.map(toAssetRecord);
}
export const __filingTaxonomyInternals = {
normalizeFilingTaxonomySnapshotPayload,
toSnapshotRecord
};

View File

@@ -23,6 +23,7 @@ import {
} from '@/lib/server/repos/company-financial-bundles';
import {
getFilingTaxonomySnapshotByFilingId,
normalizeFilingTaxonomySnapshotPayload,
upsertFilingTaxonomySnapshot
} from '@/lib/server/repos/filing-taxonomy';
import {
@@ -726,6 +727,10 @@ async function processSyncFilings(task: Task) {
filingUrl: filing.filing_url,
primaryDocument: filing.primary_document ?? null
});
const normalizedSnapshot = {
...snapshot,
...normalizeFilingTaxonomySnapshotPayload(snapshot)
};
await setProjectionStage(
task,
@@ -752,8 +757,8 @@ async function processSyncFilings(task: Task) {
stageContext('sync.persist_taxonomy')
);
await upsertFilingTaxonomySnapshot(snapshot);
await updateFilingMetricsById(filing.id, snapshot.derived_metrics);
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
await updateFilingMetricsById(filing.id, normalizedSnapshot.derived_metrics);
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
taxonomySnapshotsHydrated += 1;
} catch (error) {

View File

@@ -43,11 +43,11 @@ function createHydrationResult(): TaxonomyHydrationResult {
facts: [],
metric_validations: [],
normalization_summary: {
surfaceRowCount: 0,
detailRowCount: 0,
kpiRowCount: 0,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
surface_row_count: 0,
detail_row_count: 0,
kpi_row_count: 0,
unmapped_row_count: 0,
material_unmapped_row_count: 0,
warnings: ['rust_warning']
}
};

View File

@@ -1,12 +1,7 @@
import type {
Filing,
FinancialStatementKind,
MetricValidationResult,
NormalizationSummary,
StructuredKpiRow,
SurfaceDetailMap,
SurfaceFinancialRow,
TaxonomyStatementRow
MetricValidationResult
} from '@/lib/types';
import type {
FilingTaxonomyAssetType,
@@ -117,6 +112,98 @@ export type TaxonomyMetricValidationCheck = {
error: string | null;
};
export type TaxonomyHydrationPeriod = {
id: string;
filing_id: number;
accession_number: string;
filing_date: string;
period_start: string | null;
period_end: string | null;
filing_type: '10-K' | '10-Q';
period_label: string;
};
export type TaxonomyHydrationStatementRow = {
key: string;
label: string;
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
is_extension: boolean;
statement: FinancialStatementKind;
role_uri: string | null;
order: number;
depth: number;
parent_key: string | null;
values: Record<string, number | null>;
units: Record<string, string | null>;
has_dimensions: boolean;
source_fact_ids: number[];
};
export type TaxonomyHydrationSurfaceRow = {
key: string;
label: string;
category: string;
template_section?: string;
order: number;
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
values: Record<string, number | null>;
source_concepts: string[];
source_row_keys: string[];
source_fact_ids: number[];
formula_key: string | null;
has_dimensions: boolean;
resolved_source_row_keys: Record<string, string | null>;
statement?: 'income' | 'balance' | 'cash_flow';
detail_count?: number;
resolution_method?: 'direct' | 'surface_bridge' | 'formula_derived' | 'not_meaningful';
confidence?: 'high' | 'medium' | 'low';
warning_codes?: string[];
};
export type TaxonomyHydrationDetailRow = {
key: string;
parent_surface_key: string;
label: string;
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
unit: string | null;
values: Record<string, number | null>;
source_fact_ids: number[];
is_extension: boolean;
dimensions_summary: string[];
residual_flag: boolean;
};
export type TaxonomyHydrationStructuredKpiRow = {
key: string;
label: string;
category: string;
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
order: number;
segment: string | null;
axis: string | null;
member: string | null;
values: Record<string, number | null>;
source_concepts: string[];
source_fact_ids: number[];
provenance_type: 'taxonomy' | 'structured_note';
has_dimensions: boolean;
};
export type TaxonomyHydrationNormalizationSummary = {
surface_row_count: number;
detail_row_count: number;
kpi_row_count: number;
unmapped_row_count: number;
material_unmapped_row_count: number;
warnings: string[];
};
export type TaxonomyHydrationInput = {
filingId: number;
ticker: string;
@@ -140,12 +227,12 @@ export type TaxonomyHydrationResult = {
parser_version: string;
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
fiscal_pack: string | null;
periods: FilingTaxonomyPeriod[];
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
kpi_rows: StructuredKpiRow[];
periods: TaxonomyHydrationPeriod[];
faithful_rows: Record<FinancialStatementKind, TaxonomyHydrationStatementRow[]>;
statement_rows: Record<FinancialStatementKind, TaxonomyHydrationStatementRow[]>;
surface_rows: Record<FinancialStatementKind, TaxonomyHydrationSurfaceRow[]>;
detail_rows: Record<FinancialStatementKind, Record<string, TaxonomyHydrationDetailRow[]>>;
kpi_rows: TaxonomyHydrationStructuredKpiRow[];
contexts: Array<{
context_id: string;
entity_identifier: string | null;
@@ -191,5 +278,5 @@ export type TaxonomyHydrationResult = {
source_file: string | null;
}>;
metric_validations: TaxonomyMetricValidationCheck[];
normalization_summary: NormalizationSummary;
normalization_summary: TaxonomyHydrationNormalizationSummary;
};

View File

@@ -2,6 +2,7 @@ import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
import { listFilingsRecords, updateFilingMetricsById } from '@/lib/server/repos/filings';
import {
getFilingTaxonomySnapshotByFilingId,
normalizeFilingTaxonomySnapshotPayload,
upsertFilingTaxonomySnapshot
} from '@/lib/server/repos/filing-taxonomy';
@@ -186,8 +187,12 @@ async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
summary.wouldWrite += 1;
if (options.apply) {
await upsertFilingTaxonomySnapshot(snapshot);
await updateFilingMetricsById(row.id, snapshot.derived_metrics);
const normalizedSnapshot = {
...snapshot,
...normalizeFilingTaxonomySnapshotPayload(snapshot)
};
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
await updateFilingMetricsById(row.id, normalizedSnapshot.derived_metrics);
summary.written += 1;
}
} catch (error) {

View File

@@ -312,12 +312,14 @@ function relativeDiff(left: number | null, right: number | null) {
return Math.abs(left - right) / baseline;
}
function periodStart(period: ResultPeriod) {
return period.periodStart ?? period.period_start ?? null;
function periodStart(period: ResultPeriod): string | null {
const start = ('periodStart' in period ? period.periodStart : undefined) ?? period.period_start ?? null;
return typeof start === 'string' ? start : null;
}
function periodEnd(period: ResultPeriod) {
return period.periodEnd ?? period.period_end ?? null;
function periodEnd(period: ResultPeriod): string | null {
const end = ('periodEnd' in period ? period.periodEnd : undefined) ?? period.period_end ?? null;
return typeof end === 'string' ? end : null;
}
function chooseDurationPeriodId(result: TaxonomyHydrationResult) {