feat(taxonomy): add rust sidecar compact surface pipeline
This commit is contained in:
@@ -92,7 +92,9 @@ function applySqlMigrations(client: { exec: (query: string) => void }) {
|
||||
'0006_coverage_journal_tracking.sql',
|
||||
'0007_company_financial_bundles.sql',
|
||||
'0008_research_workspace.sql',
|
||||
'0009_task_notification_context.sql'
|
||||
'0009_task_notification_context.sql',
|
||||
'0010_taxonomy_surface_sidecar.sql',
|
||||
'0011_remove_legacy_xbrl_defaults.sql'
|
||||
];
|
||||
|
||||
for (const file of migrationFiles) {
|
||||
|
||||
@@ -169,6 +169,67 @@ type TaxonomyStatementBundle = {
|
||||
statements: Record<FinancialStatementKind, TaxonomyStatementSnapshotRow[]>;
|
||||
};
|
||||
|
||||
type TaxonomySurfaceSnapshotRow = {
|
||||
key: string;
|
||||
label: string;
|
||||
category: string;
|
||||
templateSection?: string;
|
||||
order: number;
|
||||
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
|
||||
values: Record<string, number | null>;
|
||||
sourceConcepts: string[];
|
||||
sourceRowKeys: string[];
|
||||
sourceFactIds: number[];
|
||||
formulaKey: string | null;
|
||||
hasDimensions: boolean;
|
||||
resolvedSourceRowKeys: Record<string, string | null>;
|
||||
statement?: 'income' | 'balance' | 'cash_flow';
|
||||
detailCount?: number;
|
||||
};
|
||||
|
||||
type TaxonomyDetailSnapshotRow = {
|
||||
key: string;
|
||||
parentSurfaceKey: string;
|
||||
label: string;
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
unit: string | null;
|
||||
values: Record<string, number | null>;
|
||||
sourceFactIds: number[];
|
||||
isExtension: boolean;
|
||||
dimensionsSummary: string[];
|
||||
residualFlag: boolean;
|
||||
};
|
||||
|
||||
type TaxonomyDetailStatementMap = Record<string, TaxonomyDetailSnapshotRow[]>;
|
||||
|
||||
type StructuredKpiSnapshotRow = {
|
||||
key: string;
|
||||
label: string;
|
||||
category: string;
|
||||
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
|
||||
order: number;
|
||||
segment: string | null;
|
||||
axis: string | null;
|
||||
member: string | null;
|
||||
values: Record<string, number | null>;
|
||||
sourceConcepts: string[];
|
||||
sourceFactIds: number[];
|
||||
provenanceType: 'taxonomy' | 'structured_note';
|
||||
hasDimensions: boolean;
|
||||
};
|
||||
|
||||
type TaxonomyNormalizationSummary = {
|
||||
surfaceRowCount: number;
|
||||
detailRowCount: number;
|
||||
kpiRowCount: number;
|
||||
unmappedRowCount: number;
|
||||
materialUnmappedRowCount: number;
|
||||
warnings: string[];
|
||||
};
|
||||
|
||||
type TaxonomyMetricValidationCheck = {
|
||||
metricKey: keyof FilingMetrics;
|
||||
taxonomyValue: number | null;
|
||||
@@ -380,10 +441,19 @@ export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', {
|
||||
parse_status: text('parse_status').$type<TaxonomyParseStatus>().notNull(),
|
||||
parse_error: text('parse_error'),
|
||||
source: text('source').$type<'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback'>().notNull(),
|
||||
parser_engine: text('parser_engine').notNull().default('fiscal-xbrl'),
|
||||
parser_version: text('parser_version').notNull().default('unknown'),
|
||||
taxonomy_regime: text('taxonomy_regime').$type<'us-gaap' | 'ifrs-full' | 'unknown'>().notNull().default('unknown'),
|
||||
fiscal_pack: text('fiscal_pack'),
|
||||
periods: text('periods', { mode: 'json' }).$type<FilingStatementPeriod[]>(),
|
||||
faithful_rows: text('faithful_rows', { mode: 'json' }).$type<TaxonomyStatementBundle['statements'] | null>(),
|
||||
statement_rows: text('statement_rows', { mode: 'json' }).$type<TaxonomyStatementBundle['statements'] | null>(),
|
||||
surface_rows: text('surface_rows', { mode: 'json' }).$type<Record<FinancialStatementKind, TaxonomySurfaceSnapshotRow[]> | null>(),
|
||||
detail_rows: text('detail_rows', { mode: 'json' }).$type<Record<FinancialStatementKind, TaxonomyDetailStatementMap> | null>(),
|
||||
kpi_rows: text('kpi_rows', { mode: 'json' }).$type<StructuredKpiSnapshotRow[] | null>(),
|
||||
derived_metrics: text('derived_metrics', { mode: 'json' }).$type<FilingMetrics | null>(),
|
||||
validation_result: text('validation_result', { mode: 'json' }).$type<TaxonomyMetricValidationResult | null>(),
|
||||
normalization_summary: text('normalization_summary', { mode: 'json' }).$type<TaxonomyNormalizationSummary | null>(),
|
||||
facts_count: integer('facts_count').notNull().default(0),
|
||||
concepts_count: integer('concepts_count').notNull().default(0),
|
||||
dimensions_count: integer('dimensions_count').notNull().default(0),
|
||||
@@ -395,6 +465,23 @@ export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', {
|
||||
filingTaxonomySnapshotStatusIndex: index('filing_taxonomy_snapshot_status_idx').on(table.parse_status)
|
||||
}));
|
||||
|
||||
export const filingTaxonomyContext = sqliteTable('filing_taxonomy_context', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
|
||||
context_id: text('context_id').notNull(),
|
||||
entity_identifier: text('entity_identifier'),
|
||||
entity_scheme: text('entity_scheme'),
|
||||
period_start: text('period_start'),
|
||||
period_end: text('period_end'),
|
||||
period_instant: text('period_instant'),
|
||||
segment_json: text('segment_json', { mode: 'json' }).$type<Record<string, unknown> | null>(),
|
||||
scenario_json: text('scenario_json', { mode: 'json' }).$type<Record<string, unknown> | null>(),
|
||||
created_at: text('created_at').notNull()
|
||||
}, (table) => ({
|
||||
filingTaxonomyContextSnapshotIndex: index('filing_taxonomy_context_snapshot_idx').on(table.snapshot_id),
|
||||
filingTaxonomyContextUnique: uniqueIndex('filing_taxonomy_context_uidx').on(table.snapshot_id, table.context_id)
|
||||
}));
|
||||
|
||||
export const filingTaxonomyAsset = sqliteTable('filing_taxonomy_asset', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
|
||||
@@ -419,8 +506,17 @@ export const filingTaxonomyConcept = sqliteTable('filing_taxonomy_concept', {
|
||||
local_name: text('local_name').notNull(),
|
||||
label: text('label'),
|
||||
is_extension: integer('is_extension', { mode: 'boolean' }).notNull().default(false),
|
||||
balance: text('balance'),
|
||||
period_type: text('period_type'),
|
||||
data_type: text('data_type'),
|
||||
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
|
||||
role_uri: text('role_uri'),
|
||||
authoritative_concept_key: text('authoritative_concept_key'),
|
||||
mapping_method: text('mapping_method'),
|
||||
surface_key: text('surface_key'),
|
||||
detail_parent_surface_key: text('detail_parent_surface_key'),
|
||||
kpi_key: text('kpi_key'),
|
||||
residual_flag: integer('residual_flag', { mode: 'boolean' }).notNull().default(false),
|
||||
presentation_order: numeric('presentation_order'),
|
||||
presentation_depth: integer('presentation_depth'),
|
||||
parent_concept_key: text('parent_concept_key'),
|
||||
@@ -444,11 +540,20 @@ export const filingTaxonomyFact = sqliteTable('filing_taxonomy_fact', {
|
||||
qname: text('qname').notNull(),
|
||||
namespace_uri: text('namespace_uri').notNull(),
|
||||
local_name: text('local_name').notNull(),
|
||||
data_type: text('data_type'),
|
||||
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
|
||||
role_uri: text('role_uri'),
|
||||
authoritative_concept_key: text('authoritative_concept_key'),
|
||||
mapping_method: text('mapping_method'),
|
||||
surface_key: text('surface_key'),
|
||||
detail_parent_surface_key: text('detail_parent_surface_key'),
|
||||
kpi_key: text('kpi_key'),
|
||||
residual_flag: integer('residual_flag', { mode: 'boolean' }).notNull().default(false),
|
||||
context_id: text('context_id').notNull(),
|
||||
unit: text('unit'),
|
||||
decimals: text('decimals'),
|
||||
precision: text('precision'),
|
||||
nil: integer('nil', { mode: 'boolean' }).notNull().default(false),
|
||||
value_num: numeric('value_num').notNull(),
|
||||
period_start: text('period_start'),
|
||||
period_end: text('period_end'),
|
||||
|
||||
@@ -7,6 +7,7 @@ import type { FilingTaxonomySnapshotRecord } from './repos/filing-taxonomy';
|
||||
import type {
|
||||
FinancialStatementKind,
|
||||
FinancialStatementPeriod,
|
||||
StructuredKpiRow,
|
||||
TaxonomyFactRow,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
@@ -67,6 +68,13 @@ function createSnapshot(input: {
|
||||
statement: input.statement,
|
||||
values: Object.fromEntries(input.periods.map((period, index) => [period.id, 100 + index]))
|
||||
});
|
||||
const faithfulRows = {
|
||||
income: input.statement === 'income' ? (input.rows ?? [defaultRow]) : [],
|
||||
balance: input.statement === 'balance' ? (input.rows ?? [{ ...defaultRow, statement: 'balance' }]) : [],
|
||||
cash_flow: input.statement === 'cash_flow' ? (input.rows ?? [{ ...defaultRow, statement: 'cash_flow' }]) : [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
} satisfies FilingTaxonomySnapshotRecord['faithful_rows'];
|
||||
|
||||
return {
|
||||
id: input.filingId,
|
||||
@@ -77,6 +85,10 @@ function createSnapshot(input: {
|
||||
parse_status: 'ready',
|
||||
parse_error: null,
|
||||
source: 'xbrl_instance',
|
||||
parser_engine: 'fiscal-xbrl',
|
||||
parser_version: '0.1.0',
|
||||
taxonomy_regime: 'us-gaap',
|
||||
fiscal_pack: 'core',
|
||||
periods: input.periods.map((period) => ({
|
||||
id: period.id,
|
||||
filingId: input.filingId,
|
||||
@@ -87,15 +99,26 @@ function createSnapshot(input: {
|
||||
filingType: input.filingType,
|
||||
periodLabel: period.periodLabel
|
||||
})),
|
||||
statement_rows: {
|
||||
income: input.statement === 'income' ? (input.rows ?? [defaultRow]) : [],
|
||||
balance: input.statement === 'balance' ? (input.rows ?? [{ ...defaultRow, statement: 'balance' }]) : [],
|
||||
cash_flow: input.statement === 'cash_flow' ? (input.rows ?? [{ ...defaultRow, statement: 'cash_flow' }]) : [],
|
||||
faithful_rows: faithfulRows,
|
||||
statement_rows: faithfulRows,
|
||||
surface_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
},
|
||||
detail_rows: {
|
||||
income: {},
|
||||
balance: {},
|
||||
cash_flow: {},
|
||||
equity: {},
|
||||
comprehensive_income: {}
|
||||
},
|
||||
kpi_rows: [],
|
||||
derived_metrics: null,
|
||||
validation_result: null,
|
||||
normalization_summary: null,
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
@@ -203,6 +226,30 @@ function createFact(input: {
|
||||
};
|
||||
}
|
||||
|
||||
function createKpiRow(input: {
|
||||
key: string;
|
||||
values: Record<string, number | null>;
|
||||
provenanceType?: StructuredKpiRow['provenanceType'];
|
||||
sourceConcepts?: string[];
|
||||
sourceFactIds?: number[];
|
||||
}): StructuredKpiRow {
|
||||
return {
|
||||
key: input.key,
|
||||
label: input.key,
|
||||
category: 'operating_kpi',
|
||||
unit: 'percent',
|
||||
order: 10,
|
||||
segment: null,
|
||||
axis: null,
|
||||
member: null,
|
||||
values: input.values,
|
||||
sourceConcepts: input.sourceConcepts ?? [],
|
||||
sourceFactIds: input.sourceFactIds ?? [],
|
||||
provenanceType: input.provenanceType ?? 'taxonomy',
|
||||
hasDimensions: false
|
||||
};
|
||||
}
|
||||
|
||||
function findRow(rows: ReturnType<typeof __financialTaxonomyInternals.buildStandardizedRows>, key: string) {
|
||||
const row = rows.find((entry) => entry.key === key);
|
||||
expect(row).toBeDefined();
|
||||
@@ -1500,4 +1547,174 @@ describe('financial taxonomy internals', () => {
|
||||
expect(findStandardizedResponseRow(cash, 'changes_accrued_expenses').values[cashPeriod2025]).toBe(21_525_000);
|
||||
expect(findStandardizedResponseRow(cash, 'other_adjustments').values[cashPeriod2025]).toBe(55_904_000);
|
||||
});
|
||||
|
||||
it('merges KPI rows by priority without overwriting higher-priority periods', () => {
|
||||
const merged = __financialTaxonomyInternals.mergeStructuredKpiRowsByPriority([
|
||||
[
|
||||
createKpiRow({
|
||||
key: 'loan_growth',
|
||||
values: { p1: 0.12 },
|
||||
sourceConcepts: ['us-gaap:LoansReceivableNetReportedAmount'],
|
||||
sourceFactIds: [1]
|
||||
})
|
||||
],
|
||||
[
|
||||
createKpiRow({
|
||||
key: 'loan_growth',
|
||||
values: { p1: 0.11, p2: 0.09 },
|
||||
sourceConcepts: ['us-gaap:FinancingReceivableRecordedInvestment'],
|
||||
sourceFactIds: [2]
|
||||
})
|
||||
],
|
||||
[
|
||||
createKpiRow({
|
||||
key: 'loan_growth',
|
||||
values: { p2: 0.08, p3: 0.07 },
|
||||
provenanceType: 'structured_note',
|
||||
sourceFactIds: [3]
|
||||
})
|
||||
]
|
||||
]);
|
||||
|
||||
expect(merged).toHaveLength(1);
|
||||
expect(merged[0]?.values).toEqual({ p1: 0.12, p2: 0.09, p3: 0.07 });
|
||||
expect(merged[0]?.sourceConcepts).toEqual([
|
||||
'us-gaap:FinancingReceivableRecordedInvestment',
|
||||
'us-gaap:LoansReceivableNetReportedAmount'
|
||||
]);
|
||||
expect(merged[0]?.sourceFactIds).toEqual([1, 2, 3]);
|
||||
expect(merged[0]?.provenanceType).toBe('taxonomy');
|
||||
});
|
||||
|
||||
it('builds normalization metadata from snapshot fiscal pack and counts', () => {
|
||||
const snapshot = {
|
||||
...createSnapshot({
|
||||
filingId: 15,
|
||||
filingType: '10-Q',
|
||||
filingDate: '2026-01-28',
|
||||
statement: 'income',
|
||||
periods: [
|
||||
{ id: 'quarter', periodStart: '2025-10-01', periodEnd: '2025-12-31', periodLabel: '2025-10-01 to 2025-12-31' }
|
||||
]
|
||||
}),
|
||||
parser_version: '0.1.0',
|
||||
fiscal_pack: 'bank_lender',
|
||||
normalization_summary: {
|
||||
surfaceRowCount: 5,
|
||||
detailRowCount: 3,
|
||||
kpiRowCount: 2,
|
||||
unmappedRowCount: 4,
|
||||
materialUnmappedRowCount: 1,
|
||||
warnings: []
|
||||
}
|
||||
} satisfies FilingTaxonomySnapshotRecord;
|
||||
|
||||
expect(__financialTaxonomyInternals.buildNormalizationMetadata([snapshot])).toEqual({
|
||||
regime: 'us-gaap',
|
||||
fiscalPack: 'bank_lender',
|
||||
parserVersion: '0.1.0',
|
||||
unmappedRowCount: 4,
|
||||
materialUnmappedRowCount: 1
|
||||
});
|
||||
});
|
||||
|
||||
it('retains pinned income surface rows even when they are intentionally null', () => {
|
||||
const snapshot = {
|
||||
...createSnapshot({
|
||||
filingId: 16,
|
||||
filingType: '10-K',
|
||||
filingDate: '2026-02-13',
|
||||
statement: 'income',
|
||||
periods: [
|
||||
{ id: '2025-fy', periodStart: '2025-01-01', periodEnd: '2025-12-31', periodLabel: '2025 FY' }
|
||||
]
|
||||
}),
|
||||
fiscal_pack: 'bank_lender',
|
||||
surface_rows: {
|
||||
income: [
|
||||
{
|
||||
key: 'revenue',
|
||||
label: 'Revenue',
|
||||
category: 'surface',
|
||||
templateSection: 'surface',
|
||||
order: 10,
|
||||
unit: 'currency',
|
||||
values: { '2025-fy': 100_000_000 },
|
||||
sourceConcepts: ['us-gaap:TotalNetRevenues'],
|
||||
sourceRowKeys: ['revenue'],
|
||||
sourceFactIds: [1],
|
||||
formulaKey: null,
|
||||
hasDimensions: false,
|
||||
resolvedSourceRowKeys: { '2025-fy': 'revenue' },
|
||||
statement: 'income',
|
||||
detailCount: 0,
|
||||
resolutionMethod: 'direct',
|
||||
confidence: 'high',
|
||||
warningCodes: []
|
||||
},
|
||||
{
|
||||
key: 'gross_profit',
|
||||
label: 'Gross Profit',
|
||||
category: 'surface',
|
||||
templateSection: 'surface',
|
||||
order: 20,
|
||||
unit: 'currency',
|
||||
values: { '2025-fy': null },
|
||||
sourceConcepts: [],
|
||||
sourceRowKeys: [],
|
||||
sourceFactIds: [],
|
||||
formulaKey: null,
|
||||
hasDimensions: false,
|
||||
resolvedSourceRowKeys: { '2025-fy': null },
|
||||
statement: 'income',
|
||||
detailCount: 0,
|
||||
resolutionMethod: 'not_meaningful',
|
||||
confidence: 'low',
|
||||
warningCodes: ['gross_profit_not_meaningful_bank_pack']
|
||||
},
|
||||
{
|
||||
key: 'selling_general_and_administrative',
|
||||
label: 'SG&A',
|
||||
category: 'surface',
|
||||
templateSection: 'surface',
|
||||
order: 31,
|
||||
unit: 'currency',
|
||||
values: { '2025-fy': null },
|
||||
sourceConcepts: [],
|
||||
sourceRowKeys: [],
|
||||
sourceFactIds: [],
|
||||
formulaKey: null,
|
||||
hasDimensions: false,
|
||||
resolvedSourceRowKeys: { '2025-fy': null },
|
||||
statement: 'income',
|
||||
detailCount: 0,
|
||||
resolutionMethod: 'not_meaningful',
|
||||
confidence: 'low',
|
||||
warningCodes: ['selling_general_and_administrative_not_meaningful_bank_pack']
|
||||
}
|
||||
],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
}
|
||||
} satisfies FilingTaxonomySnapshotRecord;
|
||||
|
||||
const rows = __financialTaxonomyInternals.aggregateSurfaceRows({
|
||||
snapshots: [snapshot],
|
||||
statement: 'income',
|
||||
selectedPeriodIds: new Set(['2025-fy'])
|
||||
});
|
||||
|
||||
const grossProfit = rows.find((row) => row.key === 'gross_profit');
|
||||
const sga = rows.find((row) => row.key === 'selling_general_and_administrative');
|
||||
expect(grossProfit).toBeDefined();
|
||||
expect(grossProfit?.values['2025-fy']).toBeNull();
|
||||
expect(grossProfit?.resolutionMethod).toBe('not_meaningful');
|
||||
expect(grossProfit?.warningCodes).toEqual(['gross_profit_not_meaningful_bank_pack']);
|
||||
expect(sga).toBeDefined();
|
||||
expect(sga?.values['2025-fy']).toBeNull();
|
||||
expect(sga?.resolutionMethod).toBe('not_meaningful');
|
||||
expect(sga?.warningCodes).toEqual(['selling_general_and_administrative_not_meaningful_bank_pack']);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
import type {
|
||||
CompanyFinancialStatementsResponse,
|
||||
DetailFinancialRow,
|
||||
FinancialCadence,
|
||||
FinancialDisplayMode,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementPeriod,
|
||||
FinancialSurfaceKind,
|
||||
NormalizationMetadata,
|
||||
StandardizedFinancialRow,
|
||||
StructuredKpiRow,
|
||||
SurfaceDetailMap,
|
||||
SurfaceFinancialRow,
|
||||
TaxonomyFactRow,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
@@ -59,9 +63,11 @@ type GetCompanyFinancialsInput = {
|
||||
};
|
||||
|
||||
type StandardizedStatementBundlePayload = {
|
||||
rows: StandardizedFinancialRow[];
|
||||
rows: SurfaceFinancialRow[];
|
||||
detailRows: SurfaceDetailMap;
|
||||
trendSeries: CompanyFinancialStatementsResponse['trendSeries'];
|
||||
categories: CompanyFinancialStatementsResponse['categories'];
|
||||
normalization: NormalizationMetadata;
|
||||
};
|
||||
|
||||
type FilingDocumentRef = {
|
||||
@@ -204,6 +210,354 @@ function latestPeriodDate(period: FinancialStatementPeriod) {
|
||||
return period.periodEnd ?? period.filingDate;
|
||||
}
|
||||
|
||||
function cloneStructuredKpiRow(row: StructuredKpiRow): StructuredKpiRow {
|
||||
return {
|
||||
...row,
|
||||
values: { ...row.values },
|
||||
sourceConcepts: [...row.sourceConcepts],
|
||||
sourceFactIds: [...row.sourceFactIds]
|
||||
};
|
||||
}
|
||||
|
||||
function mergeStructuredKpiRowsByPriority(groups: StructuredKpiRow[][]) {
|
||||
const rowsByKey = new Map<string, StructuredKpiRow>();
|
||||
|
||||
for (const group of groups) {
|
||||
for (const row of group) {
|
||||
const existing = rowsByKey.get(row.key);
|
||||
if (!existing) {
|
||||
rowsByKey.set(row.key, cloneStructuredKpiRow(row));
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const [periodId, value] of Object.entries(row.values)) {
|
||||
const hasExistingValue = Object.prototype.hasOwnProperty.call(existing.values, periodId)
|
||||
&& existing.values[periodId] !== null;
|
||||
if (!hasExistingValue) {
|
||||
existing.values[periodId] = value;
|
||||
}
|
||||
}
|
||||
|
||||
existing.sourceConcepts = [...new Set([...existing.sourceConcepts, ...row.sourceConcepts])]
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])]
|
||||
.sort((left, right) => left - right);
|
||||
existing.hasDimensions = existing.hasDimensions || row.hasDimensions;
|
||||
existing.segment ??= row.segment;
|
||||
existing.axis ??= row.axis;
|
||||
existing.member ??= row.member;
|
||||
}
|
||||
}
|
||||
|
||||
return [...rowsByKey.values()].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
}
|
||||
|
||||
function emptyNormalizationMetadata(): NormalizationMetadata {
|
||||
return {
|
||||
regime: 'unknown',
|
||||
fiscalPack: null,
|
||||
parserVersion: '0.0.0',
|
||||
unmappedRowCount: 0,
|
||||
materialUnmappedRowCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
function buildNormalizationMetadata(
|
||||
snapshots: FilingTaxonomySnapshotRecord[]
|
||||
): NormalizationMetadata {
|
||||
const latestSnapshot = snapshots[snapshots.length - 1];
|
||||
if (!latestSnapshot) {
|
||||
return emptyNormalizationMetadata();
|
||||
}
|
||||
|
||||
return {
|
||||
regime: latestSnapshot.taxonomy_regime,
|
||||
fiscalPack: latestSnapshot.fiscal_pack,
|
||||
parserVersion: latestSnapshot.parser_version,
|
||||
unmappedRowCount: snapshots.reduce(
|
||||
(sum, snapshot) => sum + (snapshot.normalization_summary?.unmappedRowCount ?? 0),
|
||||
0
|
||||
),
|
||||
materialUnmappedRowCount: snapshots.reduce(
|
||||
(sum, snapshot) => sum + (snapshot.normalization_summary?.materialUnmappedRowCount ?? 0),
|
||||
0
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
function rowHasValues(values: Record<string, number | null>) {
|
||||
return Object.values(values).some((value) => value !== null);
|
||||
}
|
||||
|
||||
const PINNED_INCOME_SURFACE_ROWS = new Set([
|
||||
'revenue',
|
||||
'gross_profit',
|
||||
'operating_expenses',
|
||||
'selling_general_and_administrative',
|
||||
'research_and_development',
|
||||
'other_operating_expense',
|
||||
'operating_income',
|
||||
'income_tax_expense',
|
||||
'net_income'
|
||||
]);
|
||||
|
||||
function shouldRetainSurfaceRow(
|
||||
statement: FinancialStatementKind,
|
||||
row: SurfaceFinancialRow,
|
||||
values: Record<string, number | null>
|
||||
) {
|
||||
if (rowHasValues(values)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return statement === 'income' && PINNED_INCOME_SURFACE_ROWS.has(row.key);
|
||||
}
|
||||
|
||||
function aggregateSurfaceRows(input: {
|
||||
snapshots: FilingTaxonomySnapshotRecord[];
|
||||
statement: FinancialStatementKind;
|
||||
selectedPeriodIds: Set<string>;
|
||||
}) {
|
||||
const rowMap = new Map<string, SurfaceFinancialRow>();
|
||||
|
||||
for (const snapshot of input.snapshots) {
|
||||
const rows = snapshot.surface_rows?.[input.statement] ?? [];
|
||||
for (const row of rows) {
|
||||
const filteredValues = Object.fromEntries(
|
||||
Object.entries(row.values).filter(([periodId]) => input.selectedPeriodIds.has(periodId))
|
||||
);
|
||||
const filteredResolvedSourceRowKeys = Object.fromEntries(
|
||||
Object.entries(row.resolvedSourceRowKeys ?? {}).filter(([periodId]) => input.selectedPeriodIds.has(periodId))
|
||||
);
|
||||
if (!shouldRetainSurfaceRow(input.statement, row, filteredValues)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = rowMap.get(row.key);
|
||||
if (!existing) {
|
||||
rowMap.set(row.key, {
|
||||
...row,
|
||||
values: filteredValues,
|
||||
resolvedSourceRowKeys: filteredResolvedSourceRowKeys,
|
||||
sourceConcepts: [...row.sourceConcepts],
|
||||
sourceRowKeys: [...row.sourceRowKeys],
|
||||
sourceFactIds: [...row.sourceFactIds],
|
||||
warningCodes: row.warningCodes ? [...row.warningCodes] : undefined
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const [periodId, value] of Object.entries(filteredValues)) {
|
||||
if (!(periodId in existing.values)) {
|
||||
existing.values[periodId] = value;
|
||||
}
|
||||
}
|
||||
|
||||
for (const [periodId, sourceRowKey] of Object.entries(filteredResolvedSourceRowKeys)) {
|
||||
if (!(periodId in existing.resolvedSourceRowKeys)) {
|
||||
existing.resolvedSourceRowKeys[periodId] = sourceRowKey;
|
||||
}
|
||||
}
|
||||
|
||||
existing.sourceConcepts = [...new Set([...existing.sourceConcepts, ...row.sourceConcepts])].sort((left, right) => left.localeCompare(right));
|
||||
existing.sourceRowKeys = [...new Set([...existing.sourceRowKeys, ...row.sourceRowKeys])].sort((left, right) => left.localeCompare(right));
|
||||
existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])].sort((left, right) => left - right);
|
||||
existing.hasDimensions = existing.hasDimensions || row.hasDimensions;
|
||||
existing.order = Math.min(existing.order, row.order);
|
||||
existing.detailCount = Math.max(existing.detailCount ?? 0, row.detailCount ?? 0);
|
||||
existing.formulaKey = existing.formulaKey ?? row.formulaKey;
|
||||
existing.statement = existing.statement ?? row.statement;
|
||||
existing.resolutionMethod = existing.resolutionMethod ?? row.resolutionMethod;
|
||||
existing.confidence = existing.confidence ?? row.confidence;
|
||||
existing.warningCodes = [...new Set([...(existing.warningCodes ?? []), ...(row.warningCodes ?? [])])]
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
}
|
||||
|
||||
return [...rowMap.values()].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
}
|
||||
|
||||
function aggregateDetailRows(input: {
|
||||
snapshots: FilingTaxonomySnapshotRecord[];
|
||||
statement: FinancialStatementKind;
|
||||
selectedPeriodIds: Set<string>;
|
||||
}) {
|
||||
const detailBuckets = new Map<string, Map<string, DetailFinancialRow>>();
|
||||
|
||||
for (const snapshot of input.snapshots) {
|
||||
const groups = snapshot.detail_rows?.[input.statement] ?? {};
|
||||
for (const [surfaceKey, rows] of Object.entries(groups)) {
|
||||
let bucket = detailBuckets.get(surfaceKey);
|
||||
if (!bucket) {
|
||||
bucket = new Map<string, DetailFinancialRow>();
|
||||
detailBuckets.set(surfaceKey, bucket);
|
||||
}
|
||||
|
||||
for (const row of rows) {
|
||||
const filteredValues = Object.fromEntries(
|
||||
Object.entries(row.values).filter(([periodId]) => input.selectedPeriodIds.has(periodId))
|
||||
);
|
||||
if (!rowHasValues(filteredValues)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = bucket.get(row.key);
|
||||
if (!existing) {
|
||||
bucket.set(row.key, {
|
||||
...row,
|
||||
values: filteredValues,
|
||||
sourceFactIds: [...row.sourceFactIds],
|
||||
dimensionsSummary: [...row.dimensionsSummary]
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const [periodId, value] of Object.entries(filteredValues)) {
|
||||
if (!(periodId in existing.values)) {
|
||||
existing.values[periodId] = value;
|
||||
}
|
||||
}
|
||||
|
||||
existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])].sort((left, right) => left - right);
|
||||
existing.dimensionsSummary = [...new Set([...existing.dimensionsSummary, ...row.dimensionsSummary])].sort((left, right) => left.localeCompare(right));
|
||||
existing.isExtension = existing.isExtension || row.isExtension;
|
||||
existing.residualFlag = existing.residualFlag || row.residualFlag;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Object.fromEntries(
|
||||
[...detailBuckets.entries()].map(([surfaceKey, bucket]) => [
|
||||
surfaceKey,
|
||||
[...bucket.values()].sort((left, right) => left.label.localeCompare(right.label))
|
||||
])
|
||||
) satisfies SurfaceDetailMap;
|
||||
}
|
||||
|
||||
function buildLtmDetailRows(input: {
|
||||
detailRows: SurfaceDetailMap;
|
||||
quarterlyPeriods: FinancialStatementPeriod[];
|
||||
ltmPeriods: FinancialStatementPeriod[];
|
||||
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>;
|
||||
}) {
|
||||
const sortedQuarterlyPeriods = [...input.quarterlyPeriods].sort(periodSorter);
|
||||
|
||||
return Object.fromEntries(
|
||||
Object.entries(input.detailRows).map(([surfaceKey, rows]) => {
|
||||
const ltmRows = rows
|
||||
.map((row) => {
|
||||
const values: Record<string, number | null> = {};
|
||||
|
||||
for (const ltmPeriod of input.ltmPeriods) {
|
||||
const anchorIndex = sortedQuarterlyPeriods.findIndex((period) => `ltm:${period.id}` === ltmPeriod.id);
|
||||
if (anchorIndex < 3) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const slice = sortedQuarterlyPeriods.slice(anchorIndex - 3, anchorIndex + 1);
|
||||
const sourceValues = slice.map((period) => row.values[period.id] ?? null);
|
||||
values[ltmPeriod.id] = input.statement === 'balance'
|
||||
? sourceValues[sourceValues.length - 1] ?? null
|
||||
: sourceValues.some((value) => value === null)
|
||||
? null
|
||||
: sourceValues.reduce<number>((sum, value) => sum + (value ?? 0), 0);
|
||||
}
|
||||
|
||||
return {
|
||||
...row,
|
||||
values
|
||||
};
|
||||
})
|
||||
.filter((row) => rowHasValues(row.values));
|
||||
|
||||
return [surfaceKey, ltmRows];
|
||||
})
|
||||
) satisfies SurfaceDetailMap;
|
||||
}
|
||||
|
||||
function buildQuarterlyStatementSurfaceRows(input: {
|
||||
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>;
|
||||
sourcePeriods: FinancialStatementPeriod[];
|
||||
selectedPeriodIds: Set<string>;
|
||||
faithfulRows: TaxonomyStatementRow[];
|
||||
facts: TaxonomyFactRow[];
|
||||
snapshots: FilingTaxonomySnapshotRecord[];
|
||||
}) {
|
||||
const aggregatedRows = aggregateSurfaceRows({
|
||||
snapshots: input.snapshots,
|
||||
statement: input.statement,
|
||||
selectedPeriodIds: input.selectedPeriodIds
|
||||
});
|
||||
|
||||
if (aggregatedRows.length > 0) {
|
||||
return aggregatedRows;
|
||||
}
|
||||
|
||||
return buildStandardizedRows({
|
||||
rows: input.faithfulRows,
|
||||
statement: input.statement,
|
||||
periods: input.sourcePeriods,
|
||||
facts: input.facts
|
||||
}) as SurfaceFinancialRow[];
|
||||
}
|
||||
|
||||
function aggregatePersistedKpiRows(input: {
|
||||
snapshots: FilingTaxonomySnapshotRecord[];
|
||||
selectedPeriodIds: Set<string>;
|
||||
}) {
|
||||
const rowMap = new Map<string, StructuredKpiRow>();
|
||||
|
||||
for (const snapshot of input.snapshots) {
|
||||
for (const row of snapshot.kpi_rows ?? []) {
|
||||
const filteredValues = Object.fromEntries(
|
||||
Object.entries(row.values).filter(([periodId]) => input.selectedPeriodIds.has(periodId))
|
||||
);
|
||||
if (!rowHasValues(filteredValues)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = rowMap.get(row.key);
|
||||
if (!existing) {
|
||||
rowMap.set(row.key, {
|
||||
...row,
|
||||
values: filteredValues,
|
||||
sourceConcepts: [...row.sourceConcepts],
|
||||
sourceFactIds: [...row.sourceFactIds]
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
existing.values = {
|
||||
...existing.values,
|
||||
...filteredValues
|
||||
};
|
||||
existing.sourceConcepts = [...new Set([...existing.sourceConcepts, ...row.sourceConcepts])].sort((left, right) => left.localeCompare(right));
|
||||
existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])].sort((left, right) => left - right);
|
||||
existing.hasDimensions = existing.hasDimensions || row.hasDimensions;
|
||||
}
|
||||
}
|
||||
|
||||
return [...rowMap.values()].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
}
|
||||
|
||||
function buildEmptyResponse(input: {
|
||||
ticker: string;
|
||||
companyName: string;
|
||||
@@ -230,6 +584,7 @@ function buildEmptyResponse(input: {
|
||||
statementRows: isStatementSurface(input.surfaceKind)
|
||||
? { faithful: [], standardized: [] }
|
||||
: null,
|
||||
statementDetails: null,
|
||||
ratioRows: input.surfaceKind === 'ratios' ? [] : null,
|
||||
kpiRows: input.surfaceKind === 'segments_kpis' ? [] : null,
|
||||
trendSeries: [],
|
||||
@@ -255,6 +610,7 @@ function buildEmptyResponse(input: {
|
||||
queuedSync: input.queuedSync
|
||||
},
|
||||
metrics: input.metrics,
|
||||
normalization: emptyNormalizationMetadata(),
|
||||
dimensionBreakdown: null
|
||||
} satisfies CompanyFinancialStatementsResponse;
|
||||
}
|
||||
@@ -262,7 +618,9 @@ function buildEmptyResponse(input: {
|
||||
async function buildStatementSurfaceBundle(input: {
|
||||
surfaceKind: Extract<FinancialSurfaceKind, 'income_statement' | 'balance_sheet' | 'cash_flow_statement'>;
|
||||
cadence: FinancialCadence;
|
||||
periods: FinancialStatementPeriod[];
|
||||
sourcePeriods: FinancialStatementPeriod[];
|
||||
targetPeriods: FinancialStatementPeriod[];
|
||||
selectedPeriodIds: Set<string>;
|
||||
faithfulRows: TaxonomyStatementRow[];
|
||||
facts: TaxonomyFactRow[];
|
||||
snapshots: FilingTaxonomySnapshotRecord[];
|
||||
@@ -274,7 +632,11 @@ async function buildStatementSurfaceBundle(input: {
|
||||
snapshots: input.snapshots
|
||||
});
|
||||
|
||||
if (cached) {
|
||||
if (
|
||||
cached
|
||||
&& Array.isArray((cached as Partial<StandardizedStatementBundlePayload>).rows)
|
||||
&& typeof (cached as Partial<StandardizedStatementBundlePayload>).detailRows === 'object'
|
||||
) {
|
||||
return cached as StandardizedStatementBundlePayload;
|
||||
}
|
||||
|
||||
@@ -282,25 +644,48 @@ async function buildStatementSurfaceBundle(input: {
|
||||
if (!statement || (statement !== 'income' && statement !== 'balance' && statement !== 'cash_flow')) {
|
||||
return {
|
||||
rows: [],
|
||||
detailRows: {},
|
||||
trendSeries: [],
|
||||
categories: []
|
||||
categories: [],
|
||||
normalization: buildNormalizationMetadata(input.snapshots)
|
||||
} satisfies StandardizedStatementBundlePayload;
|
||||
}
|
||||
|
||||
const standardizedRows = buildStandardizedRows({
|
||||
rows: input.faithfulRows,
|
||||
const quarterlyRows = buildQuarterlyStatementSurfaceRows({
|
||||
statement,
|
||||
periods: input.periods,
|
||||
facts: input.facts
|
||||
sourcePeriods: input.sourcePeriods,
|
||||
selectedPeriodIds: input.selectedPeriodIds,
|
||||
faithfulRows: input.faithfulRows,
|
||||
facts: input.facts,
|
||||
snapshots: input.snapshots
|
||||
});
|
||||
const quarterlyDetailRows = aggregateDetailRows({
|
||||
snapshots: input.snapshots,
|
||||
statement,
|
||||
selectedPeriodIds: input.selectedPeriodIds
|
||||
});
|
||||
const rows = input.cadence === 'ltm'
|
||||
? buildLtmStandardizedRows(quarterlyRows, input.sourcePeriods, input.targetPeriods, statement) as SurfaceFinancialRow[]
|
||||
: quarterlyRows;
|
||||
const detailRows = input.cadence === 'ltm'
|
||||
? buildLtmDetailRows({
|
||||
detailRows: quarterlyDetailRows,
|
||||
quarterlyPeriods: input.sourcePeriods,
|
||||
ltmPeriods: input.targetPeriods,
|
||||
statement
|
||||
})
|
||||
: quarterlyDetailRows;
|
||||
const normalization = buildNormalizationMetadata(input.snapshots);
|
||||
|
||||
const payload = {
|
||||
rows: standardizedRows,
|
||||
rows,
|
||||
detailRows,
|
||||
trendSeries: buildTrendSeries({
|
||||
surfaceKind: input.surfaceKind,
|
||||
statementRows: standardizedRows
|
||||
statementRows: rows
|
||||
}),
|
||||
categories: buildFinancialCategories(standardizedRows, input.surfaceKind)
|
||||
categories: buildFinancialCategories(rows, input.surfaceKind),
|
||||
normalization
|
||||
} satisfies StandardizedStatementBundlePayload;
|
||||
|
||||
await writeFinancialBundle({
|
||||
@@ -386,12 +771,19 @@ async function buildKpiSurfaceBundle(input: {
|
||||
return cached as Pick<CompanyFinancialStatementsResponse, 'kpiRows' | 'trendSeries' | 'categories'>;
|
||||
}
|
||||
|
||||
const persistedRows = aggregatePersistedKpiRows({
|
||||
snapshots: input.snapshots,
|
||||
selectedPeriodIds: new Set(input.periods.map((period) => period.id))
|
||||
});
|
||||
const resolved = resolveKpiDefinitions(input.ticker);
|
||||
if (!resolved.template) {
|
||||
return {
|
||||
kpiRows: [],
|
||||
trendSeries: [],
|
||||
categories: []
|
||||
kpiRows: persistedRows,
|
||||
trendSeries: buildTrendSeries({
|
||||
surfaceKind: 'segments_kpis',
|
||||
kpiRows: persistedRows
|
||||
}),
|
||||
categories: buildFinancialCategories(persistedRows, 'segments_kpis')
|
||||
};
|
||||
}
|
||||
|
||||
@@ -408,27 +800,11 @@ async function buildKpiSurfaceBundle(input: {
|
||||
definitions: resolved.definitions
|
||||
});
|
||||
|
||||
const rowsByKey = new Map<string, StructuredKpiRow>();
|
||||
for (const row of [...taxonomyRows, ...noteRows]) {
|
||||
const existing = rowsByKey.get(row.key);
|
||||
if (existing) {
|
||||
existing.values = {
|
||||
...existing.values,
|
||||
...row.values
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
rowsByKey.set(row.key, row);
|
||||
}
|
||||
|
||||
const kpiRows = [...rowsByKey.values()].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
const kpiRows = mergeStructuredKpiRowsByPriority([
|
||||
persistedRows,
|
||||
taxonomyRows,
|
||||
noteRows
|
||||
]);
|
||||
|
||||
const payload = {
|
||||
kpiRows,
|
||||
@@ -515,7 +891,8 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
failedFilings: statuses.failed,
|
||||
pendingFilings: Math.max(0, financialFilings.filter((filing) => filingTypes.includes(filing.filing_type as '10-K' | '10-Q')).length - statuses.ready - statuses.partial - statuses.failed),
|
||||
queuedSync: input.queuedSync
|
||||
}
|
||||
},
|
||||
normalization: buildNormalizationMetadata(snapshotResult.snapshots)
|
||||
};
|
||||
}
|
||||
|
||||
@@ -539,48 +916,39 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
const periods = input.cadence === 'ltm'
|
||||
? buildLtmPeriods(selection.periods)
|
||||
: selection.periods;
|
||||
const baseFaithfulRows = buildRows(selection.snapshots, statement, selection.selectedPeriodIds);
|
||||
const faithfulRows = input.cadence === 'ltm'
|
||||
? buildLtmFaithfulRows(
|
||||
buildRows(selection.snapshots, statement, selection.selectedPeriodIds),
|
||||
baseFaithfulRows,
|
||||
selection.periods,
|
||||
periods,
|
||||
statement
|
||||
)
|
||||
: buildRows(selection.snapshots, statement, selection.selectedPeriodIds);
|
||||
: baseFaithfulRows;
|
||||
|
||||
const factsForStatement = allFacts.facts.filter((fact) => fact.statement === statement);
|
||||
const factsForStandardization = allFacts.facts;
|
||||
const standardizedPayload = await buildStatementSurfaceBundle({
|
||||
surfaceKind: input.surfaceKind as Extract<FinancialSurfaceKind, 'income_statement' | 'balance_sheet' | 'cash_flow_statement'>,
|
||||
cadence: input.cadence,
|
||||
periods,
|
||||
faithfulRows,
|
||||
sourcePeriods: selection.periods,
|
||||
targetPeriods: periods,
|
||||
selectedPeriodIds: selection.selectedPeriodIds,
|
||||
faithfulRows: baseFaithfulRows,
|
||||
facts: factsForStandardization,
|
||||
snapshots: selection.snapshots
|
||||
});
|
||||
|
||||
const standardizedRows = input.cadence === 'ltm'
|
||||
? buildLtmStandardizedRows(
|
||||
buildStandardizedRows({
|
||||
rows: buildRows(selection.snapshots, statement, selection.selectedPeriodIds),
|
||||
statement: statement as Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>,
|
||||
periods: selection.periods,
|
||||
facts: factsForStandardization
|
||||
}),
|
||||
selection.periods,
|
||||
periods,
|
||||
statement as Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>
|
||||
)
|
||||
: standardizedPayload.rows;
|
||||
const standardizedRows = standardizedPayload.rows;
|
||||
|
||||
const rawFacts = input.includeFacts
|
||||
? await listTaxonomyFactsByTicker({
|
||||
ticker,
|
||||
window: 'all',
|
||||
filingTypes: [...filingTypes],
|
||||
statement,
|
||||
cursor: input.factsCursor,
|
||||
limit: input.factsLimit
|
||||
statement,
|
||||
cursor: input.factsCursor,
|
||||
limit: input.factsLimit
|
||||
})
|
||||
: { facts: [], nextCursor: null };
|
||||
|
||||
@@ -603,12 +971,10 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
faithful: faithfulRows,
|
||||
standardized: standardizedRows
|
||||
},
|
||||
statementDetails: standardizedPayload.detailRows,
|
||||
ratioRows: null,
|
||||
kpiRows: null,
|
||||
trendSeries: buildTrendSeries({
|
||||
surfaceKind: input.surfaceKind,
|
||||
statementRows: standardizedRows
|
||||
}),
|
||||
trendSeries: standardizedPayload.trendSeries,
|
||||
categories: standardizedPayload.categories,
|
||||
availability: {
|
||||
adjusted: false,
|
||||
@@ -636,6 +1002,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
queuedSync: input.queuedSync
|
||||
},
|
||||
metrics,
|
||||
normalization: standardizedPayload.normalization,
|
||||
dimensionBreakdown
|
||||
};
|
||||
}
|
||||
@@ -654,23 +1021,29 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
? buildLtmPeriods(incomeSelection.periods)
|
||||
: incomeSelection.periods;
|
||||
|
||||
const incomeQuarterlyRows = buildStandardizedRows({
|
||||
rows: buildRows(incomeSelection.snapshots, 'income', incomeSelection.selectedPeriodIds),
|
||||
const incomeQuarterlyRows = buildQuarterlyStatementSurfaceRows({
|
||||
statement: 'income',
|
||||
periods: incomeSelection.periods,
|
||||
facts: allFacts.facts
|
||||
sourcePeriods: incomeSelection.periods,
|
||||
selectedPeriodIds: incomeSelection.selectedPeriodIds,
|
||||
faithfulRows: buildRows(incomeSelection.snapshots, 'income', incomeSelection.selectedPeriodIds),
|
||||
facts: allFacts.facts,
|
||||
snapshots: incomeSelection.snapshots
|
||||
});
|
||||
const balanceQuarterlyRows = rekeyRowsByFilingId(buildStandardizedRows({
|
||||
rows: buildRows(balanceSelection.snapshots, 'balance', balanceSelection.selectedPeriodIds),
|
||||
const balanceQuarterlyRows = rekeyRowsByFilingId(buildQuarterlyStatementSurfaceRows({
|
||||
statement: 'balance',
|
||||
periods: balanceSelection.periods,
|
||||
facts: allFacts.facts
|
||||
sourcePeriods: balanceSelection.periods,
|
||||
selectedPeriodIds: balanceSelection.selectedPeriodIds,
|
||||
faithfulRows: buildRows(balanceSelection.snapshots, 'balance', balanceSelection.selectedPeriodIds),
|
||||
facts: allFacts.facts,
|
||||
snapshots: balanceSelection.snapshots
|
||||
}), balanceSelection.periods, incomeSelection.periods);
|
||||
const cashFlowQuarterlyRows = rekeyRowsByFilingId(buildStandardizedRows({
|
||||
rows: buildRows(cashFlowSelection.snapshots, 'cash_flow', cashFlowSelection.selectedPeriodIds),
|
||||
const cashFlowQuarterlyRows = rekeyRowsByFilingId(buildQuarterlyStatementSurfaceRows({
|
||||
statement: 'cash_flow',
|
||||
periods: cashFlowSelection.periods,
|
||||
facts: allFacts.facts
|
||||
sourcePeriods: cashFlowSelection.periods,
|
||||
selectedPeriodIds: cashFlowSelection.selectedPeriodIds,
|
||||
faithfulRows: buildRows(cashFlowSelection.snapshots, 'cash_flow', cashFlowSelection.selectedPeriodIds),
|
||||
facts: allFacts.facts,
|
||||
snapshots: cashFlowSelection.snapshots
|
||||
}), cashFlowSelection.periods, incomeSelection.periods);
|
||||
|
||||
const incomeRows = input.cadence === 'ltm'
|
||||
@@ -706,6 +1079,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
defaultDisplayMode: 'standardized',
|
||||
periods: basePeriods,
|
||||
statementRows: null,
|
||||
statementDetails: null,
|
||||
ratioRows: ratioBundle.ratioRows,
|
||||
kpiRows: null,
|
||||
trendSeries: ratioBundle.trendSeries,
|
||||
@@ -731,6 +1105,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
queuedSync: input.queuedSync
|
||||
},
|
||||
metrics,
|
||||
normalization: buildNormalizationMetadata(incomeSelection.snapshots),
|
||||
dimensionBreakdown: null
|
||||
};
|
||||
}
|
||||
@@ -770,6 +1145,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
defaultDisplayMode: 'standardized',
|
||||
periods: basePeriods,
|
||||
statementRows: null,
|
||||
statementDetails: null,
|
||||
ratioRows: null,
|
||||
kpiRows: kpiBundle.kpiRows,
|
||||
trendSeries: kpiBundle.trendSeries,
|
||||
@@ -795,6 +1171,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr
|
||||
queuedSync: input.queuedSync
|
||||
},
|
||||
metrics,
|
||||
normalization: buildNormalizationMetadata(incomeSelection.snapshots),
|
||||
dimensionBreakdown: mergeDimensionBreakdownMaps(kpiBreakdown)
|
||||
};
|
||||
}
|
||||
@@ -807,6 +1184,9 @@ export const __financialTaxonomyInternals = {
|
||||
buildRows,
|
||||
buildStandardizedRows,
|
||||
buildDimensionBreakdown,
|
||||
buildNormalizationMetadata,
|
||||
aggregateSurfaceRows,
|
||||
mergeStructuredKpiRowsByPriority,
|
||||
periodSorter,
|
||||
selectPrimaryPeriodsByCadence,
|
||||
buildLtmPeriods,
|
||||
|
||||
320
lib/server/financials/surface.ts
Normal file
320
lib/server/financials/surface.ts
Normal file
@@ -0,0 +1,320 @@
|
||||
import type {
|
||||
DetailFinancialRow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementPeriod,
|
||||
NormalizationSummary,
|
||||
StructuredKpiRow,
|
||||
SurfaceDetailMap,
|
||||
SurfaceFinancialRow,
|
||||
TaxonomyFactRow,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
import { buildStandardizedRows } from '@/lib/server/financials/standardize';
|
||||
|
||||
type CompactStatement = Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>;
|
||||
|
||||
type SurfaceDefinition = {
|
||||
key: string;
|
||||
label: string;
|
||||
category: string;
|
||||
order: number;
|
||||
unit: SurfaceFinancialRow['unit'];
|
||||
rowKey?: string;
|
||||
componentKeys?: string[];
|
||||
formula?: {
|
||||
kind: 'subtract';
|
||||
left: string;
|
||||
right: string;
|
||||
};
|
||||
};
|
||||
|
||||
const EMPTY_SURFACE_ROWS: Record<FinancialStatementKind, SurfaceFinancialRow[]> = {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
};
|
||||
|
||||
const EMPTY_DETAIL_ROWS: Record<FinancialStatementKind, SurfaceDetailMap> = {
|
||||
income: {},
|
||||
balance: {},
|
||||
cash_flow: {},
|
||||
equity: {},
|
||||
comprehensive_income: {}
|
||||
};
|
||||
|
||||
const SURFACE_DEFINITIONS: Record<CompactStatement, SurfaceDefinition[]> = {
|
||||
income: [
|
||||
{ key: 'revenue', label: 'Revenue', category: 'surface', order: 10, unit: 'currency', rowKey: 'revenue' },
|
||||
{ key: 'cost_of_revenue', label: 'Cost of Revenue', category: 'surface', order: 20, unit: 'currency', rowKey: 'cost_of_revenue' },
|
||||
{ key: 'gross_profit', label: 'Gross Profit', category: 'surface', order: 30, unit: 'currency', rowKey: 'gross_profit' },
|
||||
{
|
||||
key: 'operating_expenses',
|
||||
label: 'Operating Expenses',
|
||||
category: 'surface',
|
||||
order: 40,
|
||||
unit: 'currency',
|
||||
componentKeys: ['selling_general_and_administrative', 'research_and_development', 'depreciation_and_amortization']
|
||||
},
|
||||
{ key: 'operating_income', label: 'Operating Income', category: 'surface', order: 50, unit: 'currency', rowKey: 'operating_income' },
|
||||
{
|
||||
key: 'interest_and_other',
|
||||
label: 'Interest and Other',
|
||||
category: 'surface',
|
||||
order: 60,
|
||||
unit: 'currency',
|
||||
formula: {
|
||||
kind: 'subtract',
|
||||
left: 'pretax_income',
|
||||
right: 'operating_income'
|
||||
}
|
||||
},
|
||||
{ key: 'pretax_income', label: 'Pretax Income', category: 'surface', order: 70, unit: 'currency', rowKey: 'pretax_income' },
|
||||
{ key: 'income_taxes', label: 'Income Taxes', category: 'surface', order: 80, unit: 'currency', rowKey: 'income_tax_expense' },
|
||||
{ key: 'net_income', label: 'Net Income', category: 'surface', order: 90, unit: 'currency', rowKey: 'net_income' }
|
||||
],
|
||||
balance: [
|
||||
{ key: 'cash_and_equivalents', label: 'Cash and Equivalents', category: 'surface', order: 10, unit: 'currency', rowKey: 'cash_and_equivalents' },
|
||||
{ key: 'receivables', label: 'Receivables', category: 'surface', order: 20, unit: 'currency', rowKey: 'accounts_receivable' },
|
||||
{ key: 'inventory', label: 'Inventory', category: 'surface', order: 30, unit: 'currency', rowKey: 'inventory' },
|
||||
{ key: 'current_assets', label: 'Current Assets', category: 'surface', order: 40, unit: 'currency', rowKey: 'current_assets' },
|
||||
{ key: 'ppe', label: 'Property, Plant & Equipment', category: 'surface', order: 50, unit: 'currency', rowKey: 'property_plant_equipment' },
|
||||
{
|
||||
key: 'goodwill_and_intangibles',
|
||||
label: 'Goodwill and Intangibles',
|
||||
category: 'surface',
|
||||
order: 60,
|
||||
unit: 'currency',
|
||||
componentKeys: ['goodwill', 'intangible_assets']
|
||||
},
|
||||
{ key: 'total_assets', label: 'Total Assets', category: 'surface', order: 70, unit: 'currency', rowKey: 'total_assets' },
|
||||
{ key: 'current_liabilities', label: 'Current Liabilities', category: 'surface', order: 80, unit: 'currency', rowKey: 'current_liabilities' },
|
||||
{ key: 'debt', label: 'Debt', category: 'surface', order: 90, unit: 'currency', rowKey: 'total_debt' },
|
||||
{ key: 'total_liabilities', label: 'Total Liabilities', category: 'surface', order: 100, unit: 'currency', rowKey: 'total_liabilities' },
|
||||
{ key: 'shareholders_equity', label: 'Shareholders Equity', category: 'surface', order: 110, unit: 'currency', rowKey: 'total_equity' }
|
||||
],
|
||||
cash_flow: [
|
||||
{ key: 'operating_cash_flow', label: 'Operating Cash Flow', category: 'surface', order: 10, unit: 'currency', rowKey: 'operating_cash_flow' },
|
||||
{ key: 'capital_expenditures', label: 'Capital Expenditures', category: 'surface', order: 20, unit: 'currency', rowKey: 'capital_expenditures' },
|
||||
{ key: 'acquisitions', label: 'Acquisitions', category: 'surface', order: 30, unit: 'currency', rowKey: 'acquisitions' },
|
||||
{ key: 'investing_cash_flow', label: 'Investing Cash Flow', category: 'surface', order: 40, unit: 'currency', rowKey: 'investing_cash_flow' },
|
||||
{ key: 'financing_cash_flow', label: 'Financing Cash Flow', category: 'surface', order: 50, unit: 'currency', rowKey: 'financing_cash_flow' },
|
||||
{ key: 'free_cash_flow', label: 'Free Cash Flow', category: 'surface', order: 60, unit: 'currency', rowKey: 'free_cash_flow' }
|
||||
]
|
||||
};
|
||||
|
||||
function rowHasAnyValue(row: { values: Record<string, number | null> }) {
|
||||
return Object.values(row.values).some((value) => value !== null);
|
||||
}
|
||||
|
||||
function sumValues(values: Array<number | null>) {
|
||||
if (values.every((value) => value === null)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return values.reduce<number>((sum, value) => sum + (value ?? 0), 0);
|
||||
}
|
||||
|
||||
function valueForPeriod(
|
||||
rowByKey: Map<string, SurfaceFinancialRow>,
|
||||
rowKey: string,
|
||||
periodId: string
|
||||
) {
|
||||
return rowByKey.get(rowKey)?.values[periodId] ?? null;
|
||||
}
|
||||
|
||||
function maxAbsValue(values: Record<string, number | null>) {
|
||||
return Object.values(values).reduce<number>((max, value) => Math.max(max, Math.abs(value ?? 0)), 0);
|
||||
}
|
||||
|
||||
function detailUnit(row: SurfaceFinancialRow, faithfulRow: TaxonomyStatementRow | undefined) {
|
||||
if (faithfulRow) {
|
||||
return Object.values(faithfulRow.units)[0] ?? null;
|
||||
}
|
||||
|
||||
switch (row.unit) {
|
||||
case 'currency':
|
||||
return 'USD';
|
||||
case 'shares':
|
||||
return 'shares';
|
||||
case 'percent':
|
||||
return 'pure';
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function buildDetailRow(input: {
|
||||
row: SurfaceFinancialRow;
|
||||
parentSurfaceKey: string;
|
||||
faithfulRowByKey: Map<string, TaxonomyStatementRow>;
|
||||
}): DetailFinancialRow {
|
||||
const sourceRowKey = input.row.sourceRowKeys.find((key) => input.faithfulRowByKey.has(key)) ?? input.row.sourceRowKeys[0] ?? input.row.key;
|
||||
const faithfulRow = sourceRowKey ? input.faithfulRowByKey.get(sourceRowKey) : undefined;
|
||||
const qname = faithfulRow?.qname ?? input.row.sourceConcepts[0] ?? input.row.key;
|
||||
const [prefix, ...rest] = qname.split(':');
|
||||
const localName = faithfulRow?.localName ?? (rest.length > 0 ? rest.join(':') : qname);
|
||||
|
||||
return {
|
||||
key: input.row.key,
|
||||
parentSurfaceKey: input.parentSurfaceKey,
|
||||
label: input.row.label,
|
||||
conceptKey: faithfulRow?.conceptKey ?? sourceRowKey,
|
||||
qname,
|
||||
namespaceUri: faithfulRow?.namespaceUri ?? (prefix && rest.length > 0 ? `urn:unknown:${prefix}` : 'urn:surface'),
|
||||
localName,
|
||||
unit: detailUnit(input.row, faithfulRow),
|
||||
values: { ...input.row.values },
|
||||
sourceFactIds: [...input.row.sourceFactIds],
|
||||
isExtension: faithfulRow?.isExtension ?? false,
|
||||
dimensionsSummary: faithfulRow?.hasDimensions ? ['has_dimensions'] : [],
|
||||
residualFlag: input.parentSurfaceKey === 'unmapped'
|
||||
};
|
||||
}
|
||||
|
||||
function baselineForStatement(statement: CompactStatement, rowByKey: Map<string, SurfaceFinancialRow>) {
|
||||
const anchorKey = statement === 'balance' ? 'total_assets' : 'revenue';
|
||||
return maxAbsValue(rowByKey.get(anchorKey)?.values ?? {});
|
||||
}
|
||||
|
||||
function materialityThreshold(statement: CompactStatement, baseline: number) {
|
||||
if (statement === 'balance') {
|
||||
return Math.max(5_000_000, baseline * 0.005);
|
||||
}
|
||||
|
||||
return Math.max(1_000_000, baseline * 0.01);
|
||||
}
|
||||
|
||||
export function buildCompactHydrationModel(input: {
|
||||
periods: FinancialStatementPeriod[];
|
||||
faithfulRows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
facts: TaxonomyFactRow[];
|
||||
kpiRows?: StructuredKpiRow[];
|
||||
}) {
|
||||
const surfaceRows = structuredClone(EMPTY_SURFACE_ROWS);
|
||||
const detailRows = structuredClone(EMPTY_DETAIL_ROWS);
|
||||
let surfaceRowCount = 0;
|
||||
let detailRowCount = 0;
|
||||
let unmappedRowCount = 0;
|
||||
let materialUnmappedRowCount = 0;
|
||||
|
||||
for (const statement of Object.keys(SURFACE_DEFINITIONS) as CompactStatement[]) {
|
||||
const faithfulRows = input.faithfulRows[statement] ?? [];
|
||||
const facts = input.facts.filter((fact) => fact.statement === statement);
|
||||
const fullRows = buildStandardizedRows({
|
||||
rows: faithfulRows,
|
||||
statement,
|
||||
periods: input.periods,
|
||||
facts
|
||||
});
|
||||
const rowByKey = new Map(fullRows.map((row) => [row.key, row]));
|
||||
const faithfulRowByKey = new Map(faithfulRows.map((row) => [row.key, row]));
|
||||
const statementDetails: SurfaceDetailMap = {};
|
||||
|
||||
for (const definition of SURFACE_DEFINITIONS[statement]) {
|
||||
const contributingRows = definition.rowKey
|
||||
? [rowByKey.get(definition.rowKey)].filter((row): row is SurfaceFinancialRow => row !== undefined)
|
||||
: (definition.componentKeys ?? [])
|
||||
.map((key) => rowByKey.get(key))
|
||||
.filter((row): row is SurfaceFinancialRow => row !== undefined);
|
||||
|
||||
const values = Object.fromEntries(input.periods.map((period) => {
|
||||
const nextValue = definition.rowKey
|
||||
? valueForPeriod(rowByKey, definition.rowKey, period.id)
|
||||
: definition.formula
|
||||
? (() => {
|
||||
const left = valueForPeriod(rowByKey, definition.formula!.left, period.id);
|
||||
const right = valueForPeriod(rowByKey, definition.formula!.right, period.id);
|
||||
return left === null || right === null ? null : left - right;
|
||||
})()
|
||||
: sumValues(contributingRows.map((row) => row.values[period.id] ?? null));
|
||||
|
||||
return [period.id, nextValue];
|
||||
})) satisfies Record<string, number | null>;
|
||||
|
||||
if (!rowHasAnyValue({ values })) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const sourceConcepts = [...new Set(contributingRows.flatMap((row) => row.sourceConcepts))].sort((left, right) => left.localeCompare(right));
|
||||
const sourceRowKeys = [...new Set(contributingRows.flatMap((row) => row.sourceRowKeys))].sort((left, right) => left.localeCompare(right));
|
||||
const sourceFactIds = [...new Set(contributingRows.flatMap((row) => row.sourceFactIds))].sort((left, right) => left - right);
|
||||
const hasDimensions = contributingRows.some((row) => row.hasDimensions);
|
||||
const resolvedSourceRowKeys = Object.fromEntries(input.periods.map((period) => [
|
||||
period.id,
|
||||
definition.rowKey
|
||||
? rowByKey.get(definition.rowKey)?.resolvedSourceRowKeys[period.id] ?? null
|
||||
: null
|
||||
]));
|
||||
|
||||
const rowsForDetail = definition.componentKeys
|
||||
? contributingRows
|
||||
: [];
|
||||
const details = rowsForDetail
|
||||
.filter((row) => rowHasAnyValue(row))
|
||||
.map((row) => buildDetailRow({
|
||||
row,
|
||||
parentSurfaceKey: definition.key,
|
||||
faithfulRowByKey
|
||||
}));
|
||||
|
||||
statementDetails[definition.key] = details;
|
||||
detailRowCount += details.length;
|
||||
|
||||
surfaceRows[statement].push({
|
||||
key: definition.key,
|
||||
label: definition.label,
|
||||
category: definition.category,
|
||||
templateSection: definition.category,
|
||||
order: definition.order,
|
||||
unit: definition.unit,
|
||||
values,
|
||||
sourceConcepts,
|
||||
sourceRowKeys,
|
||||
sourceFactIds,
|
||||
formulaKey: definition.formula ? definition.key : null,
|
||||
hasDimensions,
|
||||
resolvedSourceRowKeys,
|
||||
statement,
|
||||
detailCount: details.length
|
||||
});
|
||||
surfaceRowCount += 1;
|
||||
}
|
||||
|
||||
const baseline = baselineForStatement(statement, rowByKey);
|
||||
const threshold = materialityThreshold(statement, baseline);
|
||||
const residualRows = fullRows
|
||||
.filter((row) => row.key.startsWith('other:'))
|
||||
.filter((row) => rowHasAnyValue(row))
|
||||
.map((row) => buildDetailRow({
|
||||
row,
|
||||
parentSurfaceKey: 'unmapped',
|
||||
faithfulRowByKey
|
||||
}));
|
||||
|
||||
if (residualRows.length > 0) {
|
||||
statementDetails.unmapped = residualRows;
|
||||
detailRowCount += residualRows.length;
|
||||
unmappedRowCount += residualRows.length;
|
||||
materialUnmappedRowCount += residualRows.filter((row) => maxAbsValue(row.values) >= threshold).length;
|
||||
}
|
||||
|
||||
detailRows[statement] = statementDetails;
|
||||
}
|
||||
|
||||
const normalizationSummary: NormalizationSummary = {
|
||||
surfaceRowCount,
|
||||
detailRowCount,
|
||||
kpiRowCount: input.kpiRows?.length ?? 0,
|
||||
unmappedRowCount,
|
||||
materialUnmappedRowCount,
|
||||
warnings: []
|
||||
};
|
||||
|
||||
return {
|
||||
surfaceRows,
|
||||
detailRows,
|
||||
normalizationSummary
|
||||
};
|
||||
}
|
||||
@@ -1,9 +1,21 @@
|
||||
import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm';
|
||||
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyDimensionMember, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type {
|
||||
Filing,
|
||||
FinancialStatementKind,
|
||||
MetricValidationResult,
|
||||
NormalizationSummary,
|
||||
StructuredKpiRow,
|
||||
SurfaceDetailMap,
|
||||
SurfaceFinancialRow,
|
||||
TaxonomyDimensionMember,
|
||||
TaxonomyFactRow,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
import { db } from '@/lib/server/db';
|
||||
import {
|
||||
filingTaxonomyAsset,
|
||||
filingTaxonomyConcept,
|
||||
filingTaxonomyContext,
|
||||
filingTaxonomyFact,
|
||||
filingTaxonomyMetricValidation,
|
||||
filingTaxonomySnapshot
|
||||
@@ -41,10 +53,19 @@ export type FilingTaxonomySnapshotRecord = {
|
||||
parse_status: FilingTaxonomyParseStatus;
|
||||
parse_error: string | null;
|
||||
source: FilingTaxonomySource;
|
||||
parser_engine: string;
|
||||
parser_version: string;
|
||||
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
|
||||
fiscal_pack: string | null;
|
||||
periods: FilingTaxonomyPeriod[];
|
||||
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
||||
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
|
||||
kpi_rows: StructuredKpiRow[];
|
||||
derived_metrics: Filing['metrics'];
|
||||
validation_result: MetricValidationResult | null;
|
||||
normalization_summary: NormalizationSummary | null;
|
||||
facts_count: number;
|
||||
concepts_count: number;
|
||||
dimensions_count: number;
|
||||
@@ -52,6 +73,20 @@ export type FilingTaxonomySnapshotRecord = {
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type FilingTaxonomyContextRecord = {
|
||||
id: number;
|
||||
snapshot_id: number;
|
||||
context_id: string;
|
||||
entity_identifier: string | null;
|
||||
entity_scheme: string | null;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
period_instant: string | null;
|
||||
segment_json: Record<string, unknown> | null;
|
||||
scenario_json: Record<string, unknown> | null;
|
||||
created_at: string;
|
||||
};
|
||||
|
||||
export type FilingTaxonomyAssetRecord = {
|
||||
id: number;
|
||||
snapshot_id: number;
|
||||
@@ -73,8 +108,17 @@ export type FilingTaxonomyConceptRecord = {
|
||||
local_name: string;
|
||||
label: string | null;
|
||||
is_extension: boolean;
|
||||
balance: string | null;
|
||||
period_type: string | null;
|
||||
data_type: string | null;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
authoritative_concept_key: string | null;
|
||||
mapping_method: string | null;
|
||||
surface_key: string | null;
|
||||
detail_parent_surface_key: string | null;
|
||||
kpi_key: string | null;
|
||||
residual_flag: boolean;
|
||||
presentation_order: number | null;
|
||||
presentation_depth: number | null;
|
||||
parent_concept_key: string | null;
|
||||
@@ -89,11 +133,20 @@ export type FilingTaxonomyFactRecord = {
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
data_type: string | null;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
authoritative_concept_key: string | null;
|
||||
mapping_method: string | null;
|
||||
surface_key: string | null;
|
||||
detail_parent_surface_key: string | null;
|
||||
kpi_key: string | null;
|
||||
residual_flag: boolean;
|
||||
context_id: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
precision: string | null;
|
||||
nil: boolean;
|
||||
value_num: number;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
@@ -130,13 +183,32 @@ export type UpsertFilingTaxonomySnapshotInput = {
|
||||
parse_status: FilingTaxonomyParseStatus;
|
||||
parse_error: string | null;
|
||||
source: FilingTaxonomySource;
|
||||
parser_engine: string;
|
||||
parser_version: string;
|
||||
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
|
||||
fiscal_pack: string | null;
|
||||
periods: FilingTaxonomyPeriod[];
|
||||
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
||||
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
|
||||
kpi_rows: StructuredKpiRow[];
|
||||
derived_metrics: Filing['metrics'];
|
||||
validation_result: MetricValidationResult | null;
|
||||
normalization_summary: NormalizationSummary | null;
|
||||
facts_count: number;
|
||||
concepts_count: number;
|
||||
dimensions_count: number;
|
||||
contexts: Array<{
|
||||
context_id: string;
|
||||
entity_identifier: string | null;
|
||||
entity_scheme: string | null;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
period_instant: string | null;
|
||||
segment_json: Record<string, unknown> | null;
|
||||
scenario_json: Record<string, unknown> | null;
|
||||
}>;
|
||||
assets: Array<{
|
||||
asset_type: FilingTaxonomyAssetType;
|
||||
name: string;
|
||||
@@ -152,8 +224,17 @@ export type UpsertFilingTaxonomySnapshotInput = {
|
||||
local_name: string;
|
||||
label: string | null;
|
||||
is_extension: boolean;
|
||||
balance: string | null;
|
||||
period_type: string | null;
|
||||
data_type: string | null;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
authoritative_concept_key: string | null;
|
||||
mapping_method: string | null;
|
||||
surface_key: string | null;
|
||||
detail_parent_surface_key: string | null;
|
||||
kpi_key: string | null;
|
||||
residual_flag: boolean;
|
||||
presentation_order: number | null;
|
||||
presentation_depth: number | null;
|
||||
parent_concept_key: string | null;
|
||||
@@ -164,11 +245,20 @@ export type UpsertFilingTaxonomySnapshotInput = {
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
data_type: string | null;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
authoritative_concept_key: string | null;
|
||||
mapping_method: string | null;
|
||||
surface_key: string | null;
|
||||
detail_parent_surface_key: string | null;
|
||||
kpi_key: string | null;
|
||||
residual_flag: boolean;
|
||||
context_id: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
precision: string | null;
|
||||
nil: boolean;
|
||||
value_num: number;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
@@ -229,7 +319,29 @@ function emptyStatementRows(): Record<FinancialStatementKind, TaxonomyStatementR
|
||||
};
|
||||
}
|
||||
|
||||
function emptySurfaceRows(): Record<FinancialStatementKind, SurfaceFinancialRow[]> {
|
||||
return {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
};
|
||||
}
|
||||
|
||||
function emptyDetailRows(): Record<FinancialStatementKind, SurfaceDetailMap> {
|
||||
return {
|
||||
income: {},
|
||||
balance: {},
|
||||
cash_flow: {},
|
||||
equity: {},
|
||||
comprehensive_income: {}
|
||||
};
|
||||
}
|
||||
|
||||
function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord {
|
||||
const faithfulRows = row.faithful_rows ?? row.statement_rows ?? emptyStatementRows();
|
||||
|
||||
return {
|
||||
id: row.id,
|
||||
filing_id: row.filing_id,
|
||||
@@ -239,10 +351,19 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili
|
||||
parse_status: row.parse_status,
|
||||
parse_error: row.parse_error,
|
||||
source: row.source,
|
||||
parser_engine: row.parser_engine,
|
||||
parser_version: row.parser_version,
|
||||
taxonomy_regime: row.taxonomy_regime,
|
||||
fiscal_pack: row.fiscal_pack,
|
||||
periods: row.periods ?? [],
|
||||
statement_rows: row.statement_rows ?? emptyStatementRows(),
|
||||
faithful_rows: faithfulRows,
|
||||
statement_rows: faithfulRows,
|
||||
surface_rows: row.surface_rows ?? emptySurfaceRows(),
|
||||
detail_rows: row.detail_rows ?? emptyDetailRows(),
|
||||
kpi_rows: row.kpi_rows ?? [],
|
||||
derived_metrics: row.derived_metrics ?? null,
|
||||
validation_result: row.validation_result ?? null,
|
||||
normalization_summary: row.normalization_summary ?? null,
|
||||
facts_count: row.facts_count,
|
||||
concepts_count: row.concepts_count,
|
||||
dimensions_count: row.dimensions_count,
|
||||
@@ -251,6 +372,22 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili
|
||||
};
|
||||
}
|
||||
|
||||
function toContextRecord(row: typeof filingTaxonomyContext.$inferSelect): FilingTaxonomyContextRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
snapshot_id: row.snapshot_id,
|
||||
context_id: row.context_id,
|
||||
entity_identifier: row.entity_identifier,
|
||||
entity_scheme: row.entity_scheme,
|
||||
period_start: row.period_start,
|
||||
period_end: row.period_end,
|
||||
period_instant: row.period_instant,
|
||||
segment_json: row.segment_json ?? null,
|
||||
scenario_json: row.scenario_json ?? null,
|
||||
created_at: row.created_at
|
||||
};
|
||||
}
|
||||
|
||||
function toAssetRecord(row: typeof filingTaxonomyAsset.$inferSelect): FilingTaxonomyAssetRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
@@ -275,8 +412,17 @@ function toConceptRecord(row: typeof filingTaxonomyConcept.$inferSelect): Filing
|
||||
local_name: row.local_name,
|
||||
label: row.label,
|
||||
is_extension: row.is_extension,
|
||||
balance: row.balance,
|
||||
period_type: row.period_type,
|
||||
data_type: row.data_type,
|
||||
statement_kind: row.statement_kind ?? null,
|
||||
role_uri: row.role_uri,
|
||||
authoritative_concept_key: row.authoritative_concept_key,
|
||||
mapping_method: row.mapping_method,
|
||||
surface_key: row.surface_key,
|
||||
detail_parent_surface_key: row.detail_parent_surface_key,
|
||||
kpi_key: row.kpi_key,
|
||||
residual_flag: row.residual_flag,
|
||||
presentation_order: asNumber(row.presentation_order),
|
||||
presentation_depth: row.presentation_depth,
|
||||
parent_concept_key: row.parent_concept_key,
|
||||
@@ -298,11 +444,20 @@ function toFactRecord(row: typeof filingTaxonomyFact.$inferSelect): FilingTaxono
|
||||
qname: row.qname,
|
||||
namespace_uri: row.namespace_uri,
|
||||
local_name: row.local_name,
|
||||
data_type: row.data_type,
|
||||
statement_kind: row.statement_kind ?? null,
|
||||
role_uri: row.role_uri,
|
||||
authoritative_concept_key: row.authoritative_concept_key,
|
||||
mapping_method: row.mapping_method,
|
||||
surface_key: row.surface_key,
|
||||
detail_parent_surface_key: row.detail_parent_surface_key,
|
||||
kpi_key: row.kpi_key,
|
||||
residual_flag: row.residual_flag,
|
||||
context_id: row.context_id,
|
||||
unit: row.unit,
|
||||
decimals: row.decimals,
|
||||
precision: row.precision,
|
||||
nil: row.nil,
|
||||
value_num: value,
|
||||
period_start: row.period_start,
|
||||
period_end: row.period_end,
|
||||
@@ -354,6 +509,16 @@ export async function listFilingTaxonomyAssets(snapshotId: number) {
|
||||
return rows.map(toAssetRecord);
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomyContexts(snapshotId: number) {
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(filingTaxonomyContext)
|
||||
.where(eq(filingTaxonomyContext.snapshot_id, snapshotId))
|
||||
.orderBy(desc(filingTaxonomyContext.id));
|
||||
|
||||
return rows.map(toContextRecord);
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomyConcepts(snapshotId: number) {
|
||||
const rows = await db
|
||||
.select()
|
||||
@@ -397,10 +562,19 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
|
||||
parse_status: input.parse_status,
|
||||
parse_error: input.parse_error,
|
||||
source: input.source,
|
||||
parser_engine: input.parser_engine,
|
||||
parser_version: input.parser_version,
|
||||
taxonomy_regime: input.taxonomy_regime,
|
||||
fiscal_pack: input.fiscal_pack,
|
||||
periods: input.periods,
|
||||
faithful_rows: input.faithful_rows,
|
||||
statement_rows: input.statement_rows,
|
||||
surface_rows: input.surface_rows,
|
||||
detail_rows: input.detail_rows,
|
||||
kpi_rows: input.kpi_rows,
|
||||
derived_metrics: input.derived_metrics,
|
||||
validation_result: input.validation_result,
|
||||
normalization_summary: input.normalization_summary,
|
||||
facts_count: input.facts_count,
|
||||
concepts_count: input.concepts_count,
|
||||
dimensions_count: input.dimensions_count,
|
||||
@@ -416,10 +590,19 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
|
||||
parse_status: input.parse_status,
|
||||
parse_error: input.parse_error,
|
||||
source: input.source,
|
||||
parser_engine: input.parser_engine,
|
||||
parser_version: input.parser_version,
|
||||
taxonomy_regime: input.taxonomy_regime,
|
||||
fiscal_pack: input.fiscal_pack,
|
||||
periods: input.periods,
|
||||
faithful_rows: input.faithful_rows,
|
||||
statement_rows: input.statement_rows,
|
||||
surface_rows: input.surface_rows,
|
||||
detail_rows: input.detail_rows,
|
||||
kpi_rows: input.kpi_rows,
|
||||
derived_metrics: input.derived_metrics,
|
||||
validation_result: input.validation_result,
|
||||
normalization_summary: input.normalization_summary,
|
||||
facts_count: input.facts_count,
|
||||
concepts_count: input.concepts_count,
|
||||
dimensions_count: input.dimensions_count,
|
||||
@@ -431,10 +614,26 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
|
||||
const snapshotId = saved.id;
|
||||
|
||||
await db.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyContext).where(eq(filingTaxonomyContext.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
|
||||
|
||||
if (input.contexts.length > 0) {
|
||||
await db.insert(filingTaxonomyContext).values(input.contexts.map((context) => ({
|
||||
snapshot_id: snapshotId,
|
||||
context_id: context.context_id,
|
||||
entity_identifier: context.entity_identifier,
|
||||
entity_scheme: context.entity_scheme,
|
||||
period_start: context.period_start,
|
||||
period_end: context.period_end,
|
||||
period_instant: context.period_instant,
|
||||
segment_json: context.segment_json,
|
||||
scenario_json: context.scenario_json,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.assets.length > 0) {
|
||||
await db.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({
|
||||
snapshot_id: snapshotId,
|
||||
@@ -457,8 +656,17 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
|
||||
local_name: concept.local_name,
|
||||
label: concept.label,
|
||||
is_extension: concept.is_extension,
|
||||
balance: concept.balance,
|
||||
period_type: concept.period_type,
|
||||
data_type: concept.data_type,
|
||||
statement_kind: concept.statement_kind,
|
||||
role_uri: concept.role_uri,
|
||||
authoritative_concept_key: concept.authoritative_concept_key,
|
||||
mapping_method: concept.mapping_method,
|
||||
surface_key: concept.surface_key,
|
||||
detail_parent_surface_key: concept.detail_parent_surface_key,
|
||||
kpi_key: concept.kpi_key,
|
||||
residual_flag: concept.residual_flag,
|
||||
presentation_order: asNumericText(concept.presentation_order),
|
||||
presentation_depth: concept.presentation_depth,
|
||||
parent_concept_key: concept.parent_concept_key,
|
||||
@@ -474,11 +682,20 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespace_uri,
|
||||
local_name: fact.local_name,
|
||||
data_type: fact.data_type,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
authoritative_concept_key: fact.authoritative_concept_key,
|
||||
mapping_method: fact.mapping_method,
|
||||
surface_key: fact.surface_key,
|
||||
detail_parent_surface_key: fact.detail_parent_surface_key,
|
||||
kpi_key: fact.kpi_key,
|
||||
residual_flag: fact.residual_flag,
|
||||
context_id: fact.context_id,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
precision: fact.precision,
|
||||
nil: fact.nil,
|
||||
value_num: String(fact.value_num),
|
||||
period_start: fact.period_start,
|
||||
period_end: fact.period_end,
|
||||
|
||||
@@ -766,7 +766,18 @@ async function processSyncFilings(task: Task) {
|
||||
parse_status: 'failed',
|
||||
parse_error: error instanceof Error ? error.message : 'Taxonomy hydration failed',
|
||||
source: 'legacy_html_fallback',
|
||||
parser_engine: 'fiscal-xbrl',
|
||||
parser_version: 'unknown',
|
||||
taxonomy_regime: 'unknown',
|
||||
fiscal_pack: 'core',
|
||||
periods: [],
|
||||
faithful_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
},
|
||||
statement_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
@@ -774,12 +785,36 @@ async function processSyncFilings(task: Task) {
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
},
|
||||
surface_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
},
|
||||
detail_rows: {
|
||||
income: {},
|
||||
balance: {},
|
||||
cash_flow: {},
|
||||
equity: {},
|
||||
comprehensive_income: {}
|
||||
},
|
||||
kpi_rows: [],
|
||||
contexts: [],
|
||||
derived_metrics: filing.metrics ?? null,
|
||||
validation_result: {
|
||||
status: 'error',
|
||||
checks: [],
|
||||
validatedAt: now
|
||||
},
|
||||
normalization_summary: {
|
||||
surfaceRowCount: 0,
|
||||
detailRowCount: 0,
|
||||
kpiRowCount: 0,
|
||||
unmappedRowCount: 0,
|
||||
materialUnmappedRowCount: 0,
|
||||
warnings: []
|
||||
},
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
|
||||
53
lib/server/taxonomy/classifiers.ts
Normal file
53
lib/server/taxonomy/classifiers.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
|
||||
export function classifyStatementRole(roleUri: string): FinancialStatementKind | null {
|
||||
const normalized = roleUri.toLowerCase();
|
||||
|
||||
if (/cash\s*flow|statementsof?cashflows|netcash/.test(normalized)) {
|
||||
return 'cash_flow';
|
||||
}
|
||||
|
||||
if (/shareholders?|stockholders?|equity|retainedearnings/.test(normalized)) {
|
||||
return 'equity';
|
||||
}
|
||||
|
||||
if (/comprehensive\s*income/.test(normalized)) {
|
||||
return 'comprehensive_income';
|
||||
}
|
||||
|
||||
if (/balance\s*sheet|financial\s*position|assets?andliabilities/.test(normalized)) {
|
||||
return 'balance';
|
||||
}
|
||||
|
||||
if (/operations|income\s*statement|statementsofincome|profit/.test(normalized)) {
|
||||
return 'income';
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function conceptStatementFallback(localName: string): FinancialStatementKind | null {
|
||||
const normalized = localName.toLowerCase();
|
||||
|
||||
if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) {
|
||||
return 'cash_flow';
|
||||
}
|
||||
|
||||
if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) {
|
||||
return 'equity';
|
||||
}
|
||||
|
||||
if (/comprehensiveincome/.test(normalized)) {
|
||||
return 'comprehensive_income';
|
||||
}
|
||||
|
||||
if (/asset|liabilit|debt/.test(normalized)) {
|
||||
return 'balance';
|
||||
}
|
||||
|
||||
if (/revenue|income|profit|expense|costof/.test(normalized)) {
|
||||
return 'income';
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -1,185 +1,8 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
|
||||
import { parseLabelLinkbase, parsePresentationLinkbase } from '@/lib/server/taxonomy/linkbase-parser';
|
||||
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
|
||||
import { materializeTaxonomyStatements } from '@/lib/server/taxonomy/materialize';
|
||||
import { validateMetricsWithPdfLlm } from '@/lib/server/taxonomy/pdf-validation';
|
||||
import { hydrateFilingTaxonomySnapshotFromSidecar } from '@/lib/server/taxonomy/parser-client';
|
||||
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
|
||||
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
|
||||
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
||||
return {
|
||||
income: factory(),
|
||||
balance: factory(),
|
||||
cash_flow: factory(),
|
||||
equity: factory(),
|
||||
comprehensive_income: factory()
|
||||
};
|
||||
}
|
||||
|
||||
function envUserAgent() {
|
||||
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
||||
}
|
||||
|
||||
async function fetchText(url: string, fetchImpl: typeof fetch) {
|
||||
const response = await fetchImpl(url, {
|
||||
headers: {
|
||||
'User-Agent': envUserAgent(),
|
||||
Accept: 'text/xml, text/plain, text/html;q=0.8, */*;q=0.5'
|
||||
},
|
||||
cache: 'no-store'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`SEC request failed (${response.status})`);
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
}
|
||||
|
||||
export async function hydrateFilingTaxonomySnapshot(
|
||||
input: TaxonomyHydrationInput,
|
||||
options?: {
|
||||
fetchImpl?: typeof fetch;
|
||||
}
|
||||
input: TaxonomyHydrationInput
|
||||
): Promise<TaxonomyHydrationResult> {
|
||||
const fetchImpl = options?.fetchImpl ?? fetch;
|
||||
|
||||
const discovered = await discoverFilingAssets({
|
||||
cik: input.cik,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingUrl: input.filingUrl,
|
||||
primaryDocument: input.primaryDocument,
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
const emptyResult: TaxonomyHydrationResult = {
|
||||
filing_id: input.filingId,
|
||||
ticker: input.ticker.trim().toUpperCase(),
|
||||
filing_date: input.filingDate,
|
||||
filing_type: input.filingType,
|
||||
parse_status: 'failed',
|
||||
parse_error: 'No XBRL instance found',
|
||||
source: 'legacy_html_fallback',
|
||||
periods: [],
|
||||
statement_rows: createStatementRecord(() => []),
|
||||
derived_metrics: null,
|
||||
validation_result: {
|
||||
status: 'not_run',
|
||||
checks: [],
|
||||
validatedAt: null
|
||||
},
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
assets: discovered.assets,
|
||||
concepts: [],
|
||||
facts: [],
|
||||
metric_validations: []
|
||||
};
|
||||
|
||||
const selectedInstance = discovered.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected)
|
||||
?? discovered.assets.find((asset) => asset.asset_type === 'instance')
|
||||
?? null;
|
||||
|
||||
if (!selectedInstance) {
|
||||
return emptyResult;
|
||||
}
|
||||
|
||||
let parseError: string | null = null;
|
||||
let source: TaxonomyHydrationResult['source'] = 'xbrl_instance';
|
||||
|
||||
let instanceText = '';
|
||||
try {
|
||||
instanceText = await fetchText(selectedInstance.url, fetchImpl);
|
||||
} catch (error) {
|
||||
parseError = error instanceof Error ? error.message : 'Unable to fetch instance file';
|
||||
return {
|
||||
...emptyResult,
|
||||
parse_error: parseError
|
||||
};
|
||||
}
|
||||
|
||||
const parsedInstance = parseXbrlInstance(instanceText, selectedInstance.name);
|
||||
|
||||
const labelByConcept = new Map<string, string>();
|
||||
const presentation: ReturnType<typeof parsePresentationLinkbase> = [];
|
||||
|
||||
for (const asset of discovered.assets) {
|
||||
if (!asset.is_selected) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asset.asset_type !== 'presentation' && asset.asset_type !== 'label') {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = await fetchText(asset.url, fetchImpl);
|
||||
if (asset.asset_type === 'presentation') {
|
||||
const parsed = parsePresentationLinkbase(content);
|
||||
if (parsed.length > 0) {
|
||||
source = 'xbrl_instance_with_linkbase';
|
||||
}
|
||||
|
||||
presentation.push(...parsed);
|
||||
} else if (asset.asset_type === 'label') {
|
||||
const parsed = parseLabelLinkbase(content);
|
||||
for (const [conceptKey, label] of parsed.entries()) {
|
||||
if (!labelByConcept.has(conceptKey)) {
|
||||
labelByConcept.set(conceptKey, label);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
parseError = parseError ?? (error instanceof Error ? error.message : 'Failed to parse taxonomy linkbase');
|
||||
}
|
||||
}
|
||||
|
||||
const materialized = materializeTaxonomyStatements({
|
||||
filingId: input.filingId,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
filingType: input.filingType,
|
||||
facts: parsedInstance.facts,
|
||||
presentation,
|
||||
labelByConcept
|
||||
});
|
||||
|
||||
const derivedMetrics = deriveTaxonomyMetrics(parsedInstance.facts);
|
||||
const llmValidation = await validateMetricsWithPdfLlm({
|
||||
metrics: derivedMetrics,
|
||||
assets: discovered.assets,
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
const hasRows = (Object.values(materialized.statement_rows).reduce((total, rows) => total + rows.length, 0)) > 0;
|
||||
const hasFacts = materialized.facts.length > 0;
|
||||
|
||||
const parseStatus: TaxonomyHydrationResult['parse_status'] = hasRows && hasFacts
|
||||
? 'ready'
|
||||
: hasFacts
|
||||
? 'partial'
|
||||
: 'failed';
|
||||
|
||||
return {
|
||||
filing_id: input.filingId,
|
||||
ticker: input.ticker.trim().toUpperCase(),
|
||||
filing_date: input.filingDate,
|
||||
filing_type: input.filingType,
|
||||
parse_status: parseStatus,
|
||||
parse_error: parseStatus === 'failed' ? (parseError ?? 'No XBRL facts extracted') : parseError,
|
||||
source,
|
||||
periods: materialized.periods,
|
||||
statement_rows: materialized.statement_rows,
|
||||
derived_metrics: derivedMetrics,
|
||||
validation_result: llmValidation.validation_result,
|
||||
facts_count: materialized.facts.length,
|
||||
concepts_count: materialized.concepts.length,
|
||||
dimensions_count: materialized.dimensionsCount,
|
||||
assets: discovered.assets,
|
||||
concepts: materialized.concepts,
|
||||
facts: materialized.facts,
|
||||
metric_validations: llmValidation.metric_validations
|
||||
};
|
||||
return await hydrateFilingTaxonomySnapshotFromSidecar(input);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
||||
import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy';
|
||||
import { classifyStatementRole } from '@/lib/server/taxonomy/linkbase-parser';
|
||||
import { conceptStatementFallback } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
import { classifyStatementRole, conceptStatementFallback } from '@/lib/server/taxonomy/classifiers';
|
||||
|
||||
function compactAccessionNumber(value: string) {
|
||||
return value.replace(/-/g, '');
|
||||
@@ -308,8 +307,17 @@ export function materializeTaxonomyStatements(input: {
|
||||
local_name: localName,
|
||||
label,
|
||||
is_extension: !isUsGaapNamespace(namespaceUri),
|
||||
balance: null,
|
||||
period_type: null,
|
||||
data_type: null,
|
||||
statement_kind: statement,
|
||||
role_uri: orderedConcept.roleUri,
|
||||
authoritative_concept_key: null,
|
||||
mapping_method: null,
|
||||
surface_key: null,
|
||||
detail_parent_surface_key: null,
|
||||
kpi_key: null,
|
||||
residual_flag: false,
|
||||
presentation_order: row.order,
|
||||
presentation_depth: row.depth,
|
||||
parent_concept_key: row.parentKey,
|
||||
@@ -331,8 +339,17 @@ export function materializeTaxonomyStatements(input: {
|
||||
local_name: fact.localName,
|
||||
label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName),
|
||||
is_extension: !isUsGaapNamespace(fact.namespaceUri),
|
||||
balance: null,
|
||||
period_type: null,
|
||||
data_type: fact.dataType,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
authoritative_concept_key: null,
|
||||
mapping_method: null,
|
||||
surface_key: null,
|
||||
detail_parent_surface_key: null,
|
||||
kpi_key: null,
|
||||
residual_flag: false,
|
||||
presentation_order: null,
|
||||
presentation_depth: null,
|
||||
parent_concept_key: null,
|
||||
@@ -346,11 +363,20 @@ export function materializeTaxonomyStatements(input: {
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespaceUri,
|
||||
local_name: fact.localName,
|
||||
data_type: fact.dataType,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
authoritative_concept_key: null,
|
||||
mapping_method: null,
|
||||
surface_key: null,
|
||||
detail_parent_surface_key: null,
|
||||
kpi_key: null,
|
||||
residual_flag: false,
|
||||
context_id: fact.contextId,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
precision: fact.precision,
|
||||
nil: fact.nil,
|
||||
value_num: fact.value,
|
||||
period_start: fact.periodStart,
|
||||
period_end: fact.periodEnd,
|
||||
|
||||
@@ -8,9 +8,12 @@ function fact(localName: string, value: number, overrides?: Partial<TaxonomyFact
|
||||
qname: `us-gaap:${localName}`,
|
||||
namespaceUri: 'http://fasb.org/us-gaap/2024',
|
||||
localName,
|
||||
dataType: null,
|
||||
contextId: 'c1',
|
||||
unit: 'iso4217:USD',
|
||||
decimals: '-6',
|
||||
precision: null,
|
||||
nil: false,
|
||||
value,
|
||||
periodStart: '2025-01-01',
|
||||
periodEnd: '2025-12-31',
|
||||
|
||||
76
lib/server/taxonomy/parser-client.ts
Normal file
76
lib/server/taxonomy/parser-client.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import { existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
|
||||
|
||||
function candidateBinaryPaths() {
|
||||
return [
|
||||
process.env.FISCAL_XBRL_BIN?.trim(),
|
||||
join(process.cwd(), 'bin', 'fiscal-xbrl'),
|
||||
join(process.cwd(), 'rust', 'target', 'release', 'fiscal-xbrl'),
|
||||
join(process.cwd(), 'rust', 'target', 'debug', 'fiscal-xbrl')
|
||||
].filter((value): value is string => typeof value === 'string' && value.length > 0);
|
||||
}
|
||||
|
||||
export function resolveFiscalXbrlBinary() {
|
||||
const resolved = candidateBinaryPaths().find((path) => existsSync(path));
|
||||
if (!resolved) {
|
||||
throw new Error('Rust XBRL sidecar binary is required but was not found. Set FISCAL_XBRL_BIN or build `fiscal-xbrl` under rust/target.');
|
||||
}
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
export async function hydrateFilingTaxonomySnapshotFromSidecar(
|
||||
input: TaxonomyHydrationInput
|
||||
): Promise<TaxonomyHydrationResult> {
|
||||
const binary = resolveFiscalXbrlBinary();
|
||||
const timeoutMs = Math.max(Number(process.env.XBRL_ENGINE_TIMEOUT_MS ?? 45_000), 1_000);
|
||||
const command = [binary, 'hydrate-filing'];
|
||||
const requestBody = JSON.stringify({
|
||||
filingId: input.filingId,
|
||||
ticker: input.ticker,
|
||||
cik: input.cik,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
filingType: input.filingType,
|
||||
filingUrl: input.filingUrl,
|
||||
primaryDocument: input.primaryDocument,
|
||||
cacheDir: process.env.FISCAL_XBRL_CACHE_DIR ?? join(process.cwd(), '.cache', 'xbrl')
|
||||
});
|
||||
|
||||
const child = Bun.spawn(command, {
|
||||
stdin: 'pipe',
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
env: {
|
||||
...process.env
|
||||
}
|
||||
});
|
||||
|
||||
child.stdin.write(new TextEncoder().encode(requestBody));
|
||||
child.stdin.end();
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
child.kill();
|
||||
}, timeoutMs);
|
||||
|
||||
try {
|
||||
const [stdout, stderr, exitCode] = await Promise.all([
|
||||
new Response(child.stdout).text(),
|
||||
new Response(child.stderr).text(),
|
||||
child.exited
|
||||
]);
|
||||
|
||||
if (stderr.trim().length > 0) {
|
||||
console.warn(`[fiscal-xbrl] ${stderr.trim()}`);
|
||||
}
|
||||
|
||||
if (exitCode !== 0) {
|
||||
throw new Error(`Rust XBRL sidecar failed with exit code ${exitCode}: ${stderr.trim() || stdout.trim() || 'no error output'}`);
|
||||
}
|
||||
|
||||
return JSON.parse(stdout) as TaxonomyHydrationResult;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,13 @@
|
||||
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type {
|
||||
Filing,
|
||||
FinancialStatementKind,
|
||||
MetricValidationResult,
|
||||
NormalizationSummary,
|
||||
StructuredKpiRow,
|
||||
SurfaceDetailMap,
|
||||
SurfaceFinancialRow,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
import type {
|
||||
FilingTaxonomyAssetType,
|
||||
FilingTaxonomyParseStatus,
|
||||
@@ -19,10 +28,20 @@ export type TaxonomyNamespaceMap = Record<string, string>;
|
||||
|
||||
export type TaxonomyContext = {
|
||||
id: string;
|
||||
entityIdentifier: string | null;
|
||||
entityScheme: string | null;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
periodInstant: string | null;
|
||||
dimensions: Array<{ axis: string; member: string }>;
|
||||
segment: {
|
||||
explicitMembers: Array<{ axis: string; member: string }>;
|
||||
typedMembers: Array<{ axis: string; value: string }>;
|
||||
} | null;
|
||||
scenario: {
|
||||
explicitMembers: Array<{ axis: string; member: string }>;
|
||||
typedMembers: Array<{ axis: string; value: string }>;
|
||||
} | null;
|
||||
};
|
||||
|
||||
export type TaxonomyUnit = {
|
||||
@@ -35,9 +54,12 @@ export type TaxonomyFact = {
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
dataType: string | null;
|
||||
contextId: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
precision: string | null;
|
||||
nil: boolean;
|
||||
value: number;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
@@ -64,8 +86,17 @@ export type TaxonomyConcept = {
|
||||
local_name: string;
|
||||
label: string | null;
|
||||
is_extension: boolean;
|
||||
balance: string | null;
|
||||
period_type: string | null;
|
||||
data_type: string | null;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
authoritative_concept_key: string | null;
|
||||
mapping_method: string | null;
|
||||
surface_key: string | null;
|
||||
detail_parent_surface_key: string | null;
|
||||
kpi_key: string | null;
|
||||
residual_flag: boolean;
|
||||
presentation_order: number | null;
|
||||
presentation_depth: number | null;
|
||||
parent_concept_key: string | null;
|
||||
@@ -105,8 +136,26 @@ export type TaxonomyHydrationResult = {
|
||||
parse_status: FilingTaxonomyParseStatus;
|
||||
parse_error: string | null;
|
||||
source: FilingTaxonomySource;
|
||||
parser_engine: string;
|
||||
parser_version: string;
|
||||
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
|
||||
fiscal_pack: string | null;
|
||||
periods: FilingTaxonomyPeriod[];
|
||||
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
||||
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
|
||||
kpi_rows: StructuredKpiRow[];
|
||||
contexts: Array<{
|
||||
context_id: string;
|
||||
entity_identifier: string | null;
|
||||
entity_scheme: string | null;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
period_instant: string | null;
|
||||
segment_json: Record<string, unknown> | null;
|
||||
scenario_json: Record<string, unknown> | null;
|
||||
}>;
|
||||
derived_metrics: Filing['metrics'];
|
||||
validation_result: MetricValidationResult | null;
|
||||
facts_count: number;
|
||||
@@ -119,11 +168,20 @@ export type TaxonomyHydrationResult = {
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
data_type: string | null;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
authoritative_concept_key: string | null;
|
||||
mapping_method: string | null;
|
||||
surface_key: string | null;
|
||||
detail_parent_surface_key: string | null;
|
||||
kpi_key: string | null;
|
||||
residual_flag: boolean;
|
||||
context_id: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
precision: string | null;
|
||||
nil: boolean;
|
||||
value_num: number;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
@@ -133,4 +191,5 @@ export type TaxonomyHydrationResult = {
|
||||
source_file: string | null;
|
||||
}>;
|
||||
metric_validations: TaxonomyMetricValidationCheck[];
|
||||
normalization_summary: NormalizationSummary;
|
||||
};
|
||||
|
||||
46
lib/types.ts
46
lib/types.ts
@@ -462,6 +462,48 @@ export type DerivedFinancialRow = {
|
||||
|
||||
export type StandardizedFinancialRow = DerivedFinancialRow;
|
||||
export type StandardizedStatementRow = StandardizedFinancialRow;
|
||||
export type SurfaceFinancialRow = StandardizedFinancialRow & {
|
||||
statement?: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>;
|
||||
detailCount?: number;
|
||||
resolutionMethod?: 'direct' | 'surface_bridge' | 'formula_derived' | 'not_meaningful';
|
||||
confidence?: 'high' | 'medium' | 'low';
|
||||
warningCodes?: string[];
|
||||
};
|
||||
|
||||
export type DetailFinancialRow = {
|
||||
key: string;
|
||||
parentSurfaceKey: string;
|
||||
label: string;
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
unit: string | null;
|
||||
values: Record<string, number | null>;
|
||||
sourceFactIds: number[];
|
||||
isExtension: boolean;
|
||||
dimensionsSummary: string[];
|
||||
residualFlag: boolean;
|
||||
};
|
||||
|
||||
export type SurfaceDetailMap = Record<string, DetailFinancialRow[]>;
|
||||
|
||||
export type NormalizationSummary = {
|
||||
surfaceRowCount: number;
|
||||
detailRowCount: number;
|
||||
kpiRowCount: number;
|
||||
unmappedRowCount: number;
|
||||
materialUnmappedRowCount: number;
|
||||
warnings: string[];
|
||||
};
|
||||
|
||||
export type NormalizationMetadata = {
|
||||
regime: 'us-gaap' | 'ifrs-full' | 'unknown';
|
||||
fiscalPack: string | null;
|
||||
parserVersion: string;
|
||||
unmappedRowCount: number;
|
||||
materialUnmappedRowCount: number;
|
||||
};
|
||||
|
||||
export type RatioRow = DerivedFinancialRow & {
|
||||
denominatorKey: string | null;
|
||||
@@ -571,8 +613,9 @@ export type CompanyFinancialStatementsResponse = {
|
||||
periods: FinancialStatementPeriod[];
|
||||
statementRows: {
|
||||
faithful: TaxonomyStatementRow[];
|
||||
standardized: StandardizedFinancialRow[];
|
||||
standardized: SurfaceFinancialRow[];
|
||||
} | null;
|
||||
statementDetails: SurfaceDetailMap | null;
|
||||
ratioRows: RatioRow[] | null;
|
||||
kpiRows: StructuredKpiRow[] | null;
|
||||
trendSeries: TrendSeries[];
|
||||
@@ -608,6 +651,7 @@ export type CompanyFinancialStatementsResponse = {
|
||||
taxonomy: Filing['metrics'];
|
||||
validation: MetricValidationResult | null;
|
||||
};
|
||||
normalization: NormalizationMetadata;
|
||||
dimensionBreakdown: Record<string, DimensionBreakdownRow[]> | null;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user