From 3f3182310b83aa9c2c589a8978b59774c37e12a7 Mon Sep 17 00:00:00 2001 From: francy51 Date: Mon, 2 Mar 2026 09:33:58 -0500 Subject: [PATCH] feat(financials-v2): hydrate filing statements and aggregate history --- lib/server/financial-statements.ts | 315 +++++++++ lib/server/repos/filing-statements.ts | 229 +++++++ lib/server/sec.ts | 923 +++++++++++++++++++++++++- lib/server/task-processors.ts | 68 +- 4 files changed, 1532 insertions(+), 3 deletions(-) create mode 100644 lib/server/financial-statements.ts create mode 100644 lib/server/repos/filing-statements.ts diff --git a/lib/server/financial-statements.ts b/lib/server/financial-statements.ts new file mode 100644 index 0000000..1abfa57 --- /dev/null +++ b/lib/server/financial-statements.ts @@ -0,0 +1,315 @@ +import type { + CompanyFinancialStatementsResponse, + DimensionBreakdownRow, + FilingFaithfulStatementRow, + FinancialHistoryWindow, + FinancialStatementKind, + FinancialStatementMode, + FinancialStatementPeriod, + StandardizedStatementRow +} from '@/lib/types'; +import { listFilingsRecords } from '@/lib/server/repos/filings'; +import { + countFilingStatementSnapshotStatuses, + type DimensionStatementSnapshotRow, + type FilingFaithfulStatementSnapshotRow, + type FilingStatementSnapshotRecord, + listFilingStatementSnapshotsByTicker, + type StandardizedStatementSnapshotRow +} from '@/lib/server/repos/filing-statements'; + +type GetCompanyFinancialStatementsInput = { + ticker: string; + mode: FinancialStatementMode; + statement: FinancialStatementKind; + window: FinancialHistoryWindow; + includeDimensions: boolean; + cursor?: string | null; + limit?: number; + v2Enabled: boolean; + queuedSync: boolean; +}; + +type FinancialStatementRowByMode = StandardizedStatementRow | FilingFaithfulStatementRow; + +function safeTicker(input: string) { + return input.trim().toUpperCase(); +} + +function isFinancialForm(type: string): type is '10-K' | '10-Q' { + return type === '10-K' || type === '10-Q'; +} + +function rowDimensionMatcher(row: { key: string; concept: string | null }, item: DimensionStatementSnapshotRow) { + const concept = row.concept?.toLowerCase() ?? ''; + const itemConcept = item.concept?.toLowerCase() ?? ''; + if (item.rowKey === row.key) { + return true; + } + + return Boolean(concept && itemConcept && concept === itemConcept); +} + +function periodSorter(left: FinancialStatementPeriod, right: FinancialStatementPeriod) { + const byDate = Date.parse(left.filingDate) - Date.parse(right.filingDate); + if (Number.isFinite(byDate) && byDate !== 0) { + return byDate; + } + + return left.id.localeCompare(right.id); +} + +function resolveDimensionPeriodId(rawPeriodId: string, periods: FinancialStatementPeriod[]) { + const exact = periods.find((period) => period.id === rawPeriodId); + if (exact) { + return exact.id; + } + + const byDate = periods.find((period) => period.filingDate === rawPeriodId || period.periodEnd === rawPeriodId); + return byDate?.id ?? null; +} + +function getRowsForSnapshot( + snapshot: FilingStatementSnapshotRecord, + mode: FinancialStatementMode, + statement: FinancialStatementKind +) { + if (mode === 'standardized') { + return snapshot.standardized_bundle?.statements?.[statement] ?? []; + } + + return snapshot.statement_bundle?.statements?.[statement] ?? []; +} + +function buildPeriods( + snapshots: FilingStatementSnapshotRecord[], + mode: FinancialStatementMode, + statement: FinancialStatementKind +) { + const map = new Map(); + + for (const snapshot of snapshots) { + const rows = getRowsForSnapshot(snapshot, mode, statement); + if (rows.length === 0) { + continue; + } + + const sourcePeriods = mode === 'standardized' + ? snapshot.standardized_bundle?.periods + : snapshot.statement_bundle?.periods; + + for (const period of sourcePeriods ?? []) { + if (!map.has(period.id)) { + map.set(period.id, { + id: period.id, + filingId: period.filingId, + accessionNumber: period.accessionNumber, + filingDate: period.filingDate, + periodEnd: period.periodEnd, + filingType: period.filingType, + periodLabel: period.periodLabel + }); + } + } + } + + return [...map.values()].sort(periodSorter); +} + +function buildRows( + snapshots: FilingStatementSnapshotRecord[], + periods: FinancialStatementPeriod[], + mode: FinancialStatementMode, + statement: FinancialStatementKind, + includeDimensions: boolean +) { + const rowMap = new Map(); + const dimensionMap = includeDimensions + ? new Map() + : null; + + for (const snapshot of snapshots) { + const rows = getRowsForSnapshot(snapshot, mode, statement); + const dimensions = snapshot.dimension_bundle?.statements?.[statement] ?? []; + + if (mode === 'standardized') { + for (const sourceRow of rows as StandardizedStatementSnapshotRow[]) { + const existing = rowMap.get(sourceRow.key) as StandardizedStatementRow | undefined; + const hasDimensions = dimensions.some((item) => rowDimensionMatcher(sourceRow, item)); + + if (!existing) { + rowMap.set(sourceRow.key, { + key: sourceRow.key, + label: sourceRow.label, + concept: sourceRow.concept, + category: sourceRow.category, + sourceConcepts: [...sourceRow.sourceConcepts], + values: { ...sourceRow.values }, + hasDimensions + }); + continue; + } + + existing.hasDimensions = existing.hasDimensions || hasDimensions; + for (const concept of sourceRow.sourceConcepts) { + if (!existing.sourceConcepts.includes(concept)) { + existing.sourceConcepts.push(concept); + } + } + + for (const [periodId, value] of Object.entries(sourceRow.values)) { + if (!(periodId in existing.values)) { + existing.values[periodId] = value; + } + } + } + } else { + for (const sourceRow of rows as FilingFaithfulStatementSnapshotRow[]) { + const rowKey = sourceRow.concept ? `concept-${sourceRow.concept.toLowerCase()}` : `label-${sourceRow.key}`; + const existing = rowMap.get(rowKey) as FilingFaithfulStatementRow | undefined; + const hasDimensions = dimensions.some((item) => rowDimensionMatcher(sourceRow, item)); + + if (!existing) { + rowMap.set(rowKey, { + key: rowKey, + label: sourceRow.label, + concept: sourceRow.concept, + order: sourceRow.order, + depth: sourceRow.depth, + isSubtotal: sourceRow.isSubtotal, + values: { ...sourceRow.values }, + hasDimensions + }); + continue; + } + + existing.hasDimensions = existing.hasDimensions || hasDimensions; + existing.order = Math.min(existing.order, sourceRow.order); + existing.depth = Math.min(existing.depth, sourceRow.depth); + existing.isSubtotal = existing.isSubtotal || sourceRow.isSubtotal; + for (const [periodId, value] of Object.entries(sourceRow.values)) { + if (!(periodId in existing.values)) { + existing.values[periodId] = value; + } + } + } + } + + if (dimensionMap) { + for (const item of dimensions) { + const periodId = resolveDimensionPeriodId(item.periodId, periods); + if (!periodId) { + continue; + } + + const entry: DimensionBreakdownRow = { + rowKey: item.rowKey, + concept: item.concept, + periodId, + axis: item.axis, + member: item.member, + value: item.value, + unit: item.unit + }; + + const group = dimensionMap.get(item.rowKey); + if (group) { + group.push(entry); + } else { + dimensionMap.set(item.rowKey, [entry]); + } + } + } + } + + const rows = [...rowMap.values()].sort((a, b) => { + const left = mode === 'standardized' ? a.label : `${(a as FilingFaithfulStatementRow).order.toString().padStart(5, '0')}::${a.label}`; + const right = mode === 'standardized' ? b.label : `${(b as FilingFaithfulStatementRow).order.toString().padStart(5, '0')}::${b.label}`; + return left.localeCompare(right); + }); + + if (mode === 'standardized') { + const standardized = rows as StandardizedStatementRow[]; + const core = standardized.filter((row) => row.category === 'core'); + const nonCore = standardized.filter((row) => row.category !== 'core'); + const orderedRows = [...core, ...nonCore]; + + return { + rows: orderedRows, + dimensions: dimensionMap ? Object.fromEntries(dimensionMap.entries()) : null + }; + } + + return { + rows: rows as FilingFaithfulStatementRow[], + dimensions: dimensionMap ? Object.fromEntries(dimensionMap.entries()) : null + }; +} + +export function defaultFinancialSyncLimit(window: FinancialHistoryWindow) { + return window === 'all' ? 120 : 60; +} + +export async function getCompanyFinancialStatements(input: GetCompanyFinancialStatementsInput): Promise { + const ticker = safeTicker(input.ticker); + const snapshotResult = await listFilingStatementSnapshotsByTicker({ + ticker, + window: input.window, + limit: input.limit, + cursor: input.cursor + }); + + const statuses = await countFilingStatementSnapshotStatuses(ticker); + const filings = await listFilingsRecords({ + ticker, + limit: input.window === 'all' ? 250 : 120 + }); + + const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type)); + const periods = buildPeriods(snapshotResult.snapshots, input.mode, input.statement); + const rowResult = buildRows( + snapshotResult.snapshots, + periods, + input.mode, + input.statement, + input.includeDimensions + ); + + const latestFiling = filings[0] ?? null; + + return { + company: { + ticker, + companyName: latestFiling?.company_name ?? ticker, + cik: latestFiling?.cik ?? null + }, + mode: input.mode, + statement: input.statement, + window: input.window, + periods, + rows: rowResult.rows, + nextCursor: snapshotResult.nextCursor, + coverage: { + filings: periods.length, + rows: rowResult.rows.length, + dimensions: rowResult.dimensions + ? Object.values(rowResult.dimensions).reduce((total, rows) => total + rows.length, 0) + : 0 + }, + dataSourceStatus: { + enabled: input.v2Enabled, + hydratedFilings: statuses.ready, + partialFilings: statuses.partial, + failedFilings: statuses.failed, + pendingFilings: Math.max(0, financialFilings.length - statuses.ready - statuses.partial - statuses.failed), + queuedSync: input.queuedSync + }, + dimensionBreakdown: rowResult.dimensions + }; +} + +export const __financialStatementsInternals = { + buildPeriods, + buildRows, + defaultFinancialSyncLimit +}; diff --git a/lib/server/repos/filing-statements.ts b/lib/server/repos/filing-statements.ts new file mode 100644 index 0000000..c808025 --- /dev/null +++ b/lib/server/repos/filing-statements.ts @@ -0,0 +1,229 @@ +import { and, desc, eq, gte, lt, sql } from 'drizzle-orm'; +import { db } from '@/lib/server/db'; +import { filingStatementSnapshot } from '@/lib/server/db/schema'; + +type FilingStatementSnapshotRow = typeof filingStatementSnapshot.$inferSelect; + +type ParseStatus = 'ready' | 'partial' | 'failed'; +type SnapshotSource = 'sec_filing_summary' | 'xbrl_instance' | 'companyfacts_fallback'; + +type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income'; + +type StatementValuesByPeriod = Record; + +export type FilingStatementSnapshotPeriod = { + id: string; + filingId: number; + accessionNumber: string; + filingDate: string; + periodEnd: string | null; + filingType: '10-K' | '10-Q'; + periodLabel: string; +}; + +export type FilingFaithfulStatementSnapshotRow = { + key: string; + label: string; + concept: string | null; + order: number; + depth: number; + isSubtotal: boolean; + values: StatementValuesByPeriod; +}; + +export type StandardizedStatementSnapshotRow = { + key: string; + label: string; + concept: string; + category: string; + sourceConcepts: string[]; + values: StatementValuesByPeriod; +}; + +export type DimensionStatementSnapshotRow = { + rowKey: string; + concept: string | null; + periodId: string; + axis: string; + member: string; + value: number | null; + unit: string | null; +}; + +export type FilingStatementBundle = { + periods: FilingStatementSnapshotPeriod[]; + statements: Record; +}; + +export type StandardizedStatementBundle = { + periods: FilingStatementSnapshotPeriod[]; + statements: Record; +}; + +export type DimensionStatementBundle = { + statements: Record; +}; + +export type FilingStatementSnapshotRecord = { + id: number; + filing_id: number; + ticker: string; + filing_date: string; + filing_type: '10-K' | '10-Q'; + period_end: string | null; + statement_bundle: FilingStatementBundle | null; + standardized_bundle: StandardizedStatementBundle | null; + dimension_bundle: DimensionStatementBundle | null; + parse_status: ParseStatus; + parse_error: string | null; + source: SnapshotSource; + created_at: string; + updated_at: string; +}; + +export type UpsertFilingStatementSnapshotInput = { + filing_id: number; + ticker: string; + filing_date: string; + filing_type: '10-K' | '10-Q'; + period_end: string | null; + statement_bundle: FilingStatementBundle | null; + standardized_bundle: StandardizedStatementBundle | null; + dimension_bundle: DimensionStatementBundle | null; + parse_status: ParseStatus; + parse_error: string | null; + source: SnapshotSource; +}; + +function toSnapshotRecord(row: FilingStatementSnapshotRow): FilingStatementSnapshotRecord { + return { + id: row.id, + filing_id: row.filing_id, + ticker: row.ticker, + filing_date: row.filing_date, + filing_type: row.filing_type, + period_end: row.period_end, + statement_bundle: row.statement_bundle ?? null, + standardized_bundle: row.standardized_bundle ?? null, + dimension_bundle: row.dimension_bundle ?? null, + parse_status: row.parse_status, + parse_error: row.parse_error, + source: row.source, + created_at: row.created_at, + updated_at: row.updated_at + }; +} + +function tenYearsAgoIso() { + const date = new Date(); + date.setUTCFullYear(date.getUTCFullYear() - 10); + return date.toISOString().slice(0, 10); +} + +export async function getFilingStatementSnapshotByFilingId(filingId: number) { + const [row] = await db + .select() + .from(filingStatementSnapshot) + .where(eq(filingStatementSnapshot.filing_id, filingId)) + .limit(1); + + return row ? toSnapshotRecord(row) : null; +} + +export async function upsertFilingStatementSnapshot(input: UpsertFilingStatementSnapshotInput) { + const now = new Date().toISOString(); + + const [saved] = await db + .insert(filingStatementSnapshot) + .values({ + filing_id: input.filing_id, + ticker: input.ticker, + filing_date: input.filing_date, + filing_type: input.filing_type, + period_end: input.period_end, + statement_bundle: input.statement_bundle, + standardized_bundle: input.standardized_bundle, + dimension_bundle: input.dimension_bundle, + parse_status: input.parse_status, + parse_error: input.parse_error, + source: input.source, + created_at: now, + updated_at: now + }) + .onConflictDoUpdate({ + target: filingStatementSnapshot.filing_id, + set: { + ticker: input.ticker, + filing_date: input.filing_date, + filing_type: input.filing_type, + period_end: input.period_end, + statement_bundle: input.statement_bundle, + standardized_bundle: input.standardized_bundle, + dimension_bundle: input.dimension_bundle, + parse_status: input.parse_status, + parse_error: input.parse_error, + source: input.source, + updated_at: now + } + }) + .returning(); + + return toSnapshotRecord(saved); +} + +export async function listFilingStatementSnapshotsByTicker(input: { + ticker: string; + window: '10y' | 'all'; + limit?: number; + cursor?: string | null; +}) { + const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 40), 1), 120); + const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null; + const constraints = [eq(filingStatementSnapshot.ticker, input.ticker.trim().toUpperCase())]; + + if (input.window === '10y') { + constraints.push(gte(filingStatementSnapshot.filing_date, tenYearsAgoIso())); + } + + if (cursorId && Number.isFinite(cursorId) && cursorId > 0) { + constraints.push(lt(filingStatementSnapshot.id, cursorId)); + } + + const rows = await db + .select() + .from(filingStatementSnapshot) + .where(and(...constraints)) + .orderBy(desc(filingStatementSnapshot.filing_date), desc(filingStatementSnapshot.id)) + .limit(safeLimit + 1); + + const hasMore = rows.length > safeLimit; + const usedRows = hasMore ? rows.slice(0, safeLimit) : rows; + const nextCursor = hasMore + ? String(usedRows[usedRows.length - 1]?.id ?? '') + : null; + + return { + snapshots: usedRows.map(toSnapshotRecord), + nextCursor + }; +} + +export async function countFilingStatementSnapshotStatuses(ticker: string) { + const rows = await db + .select({ + status: filingStatementSnapshot.parse_status, + count: sql`count(*)` + }) + .from(filingStatementSnapshot) + .where(eq(filingStatementSnapshot.ticker, ticker.trim().toUpperCase())) + .groupBy(filingStatementSnapshot.parse_status); + + return rows.reduce>((acc, row) => { + acc[row.status] = Number(row.count); + return acc; + }, { + ready: 0, + partial: 0, + failed: 0 + }); +} diff --git a/lib/server/sec.ts b/lib/server/sec.ts index 00c7e1e..b712944 100644 --- a/lib/server/sec.ts +++ b/lib/server/sec.ts @@ -1,4 +1,13 @@ -import type { Filing } from '@/lib/types'; +import type { Filing, FinancialStatementKind } from '@/lib/types'; +import type { + DimensionStatementBundle, + DimensionStatementSnapshotRow, + FilingFaithfulStatementSnapshotRow, + FilingStatementBundle, + FilingStatementSnapshotPeriod, + StandardizedStatementBundle, + StandardizedStatementSnapshotRow +} from '@/lib/server/repos/filing-statements'; type FilingType = Filing['filing_type']; type FilingMetrics = NonNullable; @@ -579,3 +588,915 @@ export async function fetchFilingMetricsForFilings( return metricsByAccession; } } + +type FilingStatementHydrationInput = { + filingId: number; + ticker: string; + cik: string; + accessionNumber: string; + filingDate: string; + filingType: '10-K' | '10-Q'; + filingUrl: string | null; + primaryDocument: string | null; + metrics: Filing['metrics']; +}; + +type FilingStatementHydrationResult = { + filing_id: number; + ticker: string; + filing_date: string; + filing_type: '10-K' | '10-Q'; + period_end: string | null; + statement_bundle: FilingStatementBundle | null; + standardized_bundle: StandardizedStatementBundle | null; + dimension_bundle: DimensionStatementBundle | null; + parse_status: 'ready' | 'partial' | 'failed'; + parse_error: string | null; + source: 'sec_filing_summary' | 'xbrl_instance' | 'companyfacts_fallback'; +}; + +type StatementReportDescriptor = { + shortName: string; + longName: string; + htmlFileName: string | null; + xmlFileName: string | null; +}; + +type StatementParseRow = { + key: string; + label: string; + concept: string | null; + order: number; + depth: number; + isSubtotal: boolean; + value: number | null; +}; + +type DimensionContext = { + endDate: string | null; + dimensions: Array<{ axis: string; member: string }>; +}; + +type CanonicalRowDefinition = { + key: string; + label: string; + category: string; + conceptPatterns: RegExp[]; + labelPatterns: RegExp[]; +}; + +const FINANCIAL_STATEMENT_KINDS: FinancialStatementKind[] = [ + 'income', + 'balance', + 'cash_flow', + 'equity', + 'comprehensive_income' +]; + +const STATEMENT_REPORT_PATTERNS: Record = { + income: [ + /\bstatements?\s+of\s+operations?\b/i, + /\bstatements?\s+of\s+income\b/i, + /\bincome\s+statement/i + ], + balance: [ + /\bbalance\s+sheets?\b/i, + /\bstatement\s+of\s+financial\s+position\b/i + ], + cash_flow: [ + /\bstatements?\s+of\s+cash\s+flows?\b/i, + /\bcash\s+flows?\b/i + ], + equity: [ + /\bstatements?\s+of\s+(stockholders|shareholders)['’]?\s+equity\b/i, + /\bchanges\s+in\s+equity\b/i + ], + comprehensive_income: [ + /\bstatements?\s+of\s+comprehensive\s+income\b/i, + /\bcomprehensive\s+income\b/i + ] +}; + +const STANDARDIZED_ROW_DEFINITIONS: Record = { + income: [ + { + key: 'revenue', + label: 'Revenue', + category: 'core', + conceptPatterns: [/revenue/i, /salesrevenuenet/i], + labelPatterns: [/\brevenue\b/i, /\bsales\b/i] + }, + { + key: 'cost-of-revenue', + label: 'Cost of Revenue', + category: 'core', + conceptPatterns: [/costofrevenue/i, /costofgoods/i], + labelPatterns: [/\bcost of revenue\b/i, /\bcost of sales\b/i] + }, + { + key: 'gross-profit', + label: 'Gross Profit', + category: 'core', + conceptPatterns: [/grossprofit/i], + labelPatterns: [/\bgross profit\b/i] + }, + { + key: 'operating-income', + label: 'Operating Income', + category: 'core', + conceptPatterns: [/operatingincome/i, /incomefromoperations/i], + labelPatterns: [/\boperating income\b/i, /\bincome from operations\b/i] + }, + { + key: 'net-income', + label: 'Net Income', + category: 'core', + conceptPatterns: [/netincomeloss/i, /profitloss/i], + labelPatterns: [/\bnet income\b/i, /\bnet earnings\b/i] + } + ], + balance: [ + { + key: 'total-assets', + label: 'Total Assets', + category: 'core', + conceptPatterns: [/^assets$/i], + labelPatterns: [/\btotal assets\b/i] + }, + { + key: 'total-liabilities', + label: 'Total Liabilities', + category: 'core', + conceptPatterns: [/liabilities/i], + labelPatterns: [/\btotal liabilities\b/i] + }, + { + key: 'stockholders-equity', + label: 'Stockholders Equity', + category: 'core', + conceptPatterns: [/stockholdersequity/i, /shareholdersequity/i, /equity/i], + labelPatterns: [/\bequity\b/i] + }, + { + key: 'cash-and-equivalents', + label: 'Cash and Equivalents', + category: 'liquidity', + conceptPatterns: [/cashandcashequivalents/i, /cashandequivalents/i], + labelPatterns: [/\bcash\b/i, /\bcash equivalents\b/i] + }, + { + key: 'total-debt', + label: 'Total Debt', + category: 'leverage', + conceptPatterns: [/longtermdebt/i, /debt/i, /borrowings/i], + labelPatterns: [/\btotal debt\b/i, /\blong-term debt\b/i, /\bdebt\b/i] + } + ], + cash_flow: [ + { + key: 'net-cash-operating', + label: 'Net Cash from Operating Activities', + category: 'core', + conceptPatterns: [/netcashprovidedbyusedinoperatingactivities/i, /netcashfromoperatingactivities/i], + labelPatterns: [/\boperating activities\b/i] + }, + { + key: 'net-cash-investing', + label: 'Net Cash from Investing Activities', + category: 'core', + conceptPatterns: [/netcashprovidedbyusedininvestingactivities/i], + labelPatterns: [/\binvesting activities\b/i] + }, + { + key: 'net-cash-financing', + label: 'Net Cash from Financing Activities', + category: 'core', + conceptPatterns: [/netcashprovidedbyusedinfinancingactivities/i], + labelPatterns: [/\bfinancing activities\b/i] + }, + { + key: 'net-change-cash', + label: 'Net Change in Cash', + category: 'core', + conceptPatterns: [/cashandcashequivalentsperiodincrease/i, /increase.*cash/i], + labelPatterns: [/\bnet change\b/i, /\bincrease.*cash\b/i] + } + ], + equity: [ + { + key: 'equity-balance', + label: 'Total Equity', + category: 'core', + conceptPatterns: [/stockholdersequity/i, /shareholdersequity/i, /equity/i], + labelPatterns: [/\btotal equity\b/i, /\bequity\b/i] + }, + { + key: 'retained-earnings', + label: 'Retained Earnings', + category: 'core', + conceptPatterns: [/retainedearnings/i], + labelPatterns: [/\bretained earnings\b/i] + } + ], + comprehensive_income: [ + { + key: 'comprehensive-income', + label: 'Comprehensive Income', + category: 'core', + conceptPatterns: [/comprehensiveincome/i], + labelPatterns: [/\bcomprehensive income\b/i] + }, + { + key: 'other-comprehensive-income', + label: 'Other Comprehensive Income', + category: 'core', + conceptPatterns: [/othercomprehensiveincome/i], + labelPatterns: [/\bother comprehensive income\b/i] + } + ] +}; + +function createStatementRecord(factory: () => T): Record { + return FINANCIAL_STATEMENT_KINDS.reduce((acc, kind) => { + acc[kind] = factory(); + return acc; + }, {} as Record); +} + +function statementKindLabel(kind: FinancialStatementKind) { + switch (kind) { + case 'income': + return 'Income Statement'; + case 'balance': + return 'Balance Sheet'; + case 'cash_flow': + return 'Cash Flow Statement'; + case 'equity': + return 'Statement of Equity'; + case 'comprehensive_income': + return 'Comprehensive Income'; + default: + return kind; + } +} + +function resolveFilingDirectoryUrl(input: { + filingUrl: string | null; + cik: string; + accessionNumber: string; +}) { + const direct = input.filingUrl?.trim(); + if (direct) { + const lastSlash = direct.lastIndexOf('/'); + if (lastSlash > 'https://'.length) { + return direct.slice(0, lastSlash + 1); + } + } + + const cikPath = normalizeCikForPath(input.cik); + const accessionPath = compactAccessionNumber(input.accessionNumber); + if (!cikPath || !accessionPath) { + return null; + } + + return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`; +} + +function toAbsoluteArchiveUrl(baseUrl: string, relativePath: string | null) { + const normalized = (relativePath ?? '').trim(); + if (!normalized) { + return null; + } + + if (/^https?:\/\//i.test(normalized)) { + return normalized; + } + + return `${baseUrl}${normalized.replace(/^\/+/, '')}`; +} + +async function fetchText(url: string, fetchImpl: typeof fetch) { + const response = await fetchImpl(url, { + headers: { + 'User-Agent': envUserAgent(), + Accept: 'text/xml, text/html, text/plain;q=0.9, */*;q=0.8' + }, + cache: 'no-store' + }); + + if (!response.ok) { + throw new Error(`SEC request failed (${response.status})`); + } + + return await response.text(); +} + +function xmlTextValue(block: string, tagName: string) { + const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const pattern = new RegExp(`<${escaped}>([\\s\\S]*?)<\\/${escaped}>`, 'i'); + const match = block.match(pattern); + if (!match) { + return ''; + } + + return decodeHtmlEntities(match[1] ?? '').trim(); +} + +function parseFilingSummaryReports(xml: string) { + const reports: StatementReportDescriptor[] = []; + const reportPattern = /([\s\S]*?)<\/Report>/gi; + + for (const match of xml.matchAll(reportPattern)) { + const block = match[1] ?? ''; + reports.push({ + shortName: xmlTextValue(block, 'ShortName'), + longName: xmlTextValue(block, 'LongName'), + htmlFileName: xmlTextValue(block, 'HtmlFileName') || null, + xmlFileName: xmlTextValue(block, 'XmlFileName') || null + }); + } + + return reports; +} + +function scoreReport(kind: FinancialStatementKind, report: StatementReportDescriptor) { + const haystack = `${report.shortName} ${report.longName}`.trim(); + if (!haystack) { + return 0; + } + + let score = 0; + for (const pattern of STATEMENT_REPORT_PATTERNS[kind]) { + if (pattern.test(haystack)) { + score += 2; + } + } + + if (/\bparenthetical\b/i.test(haystack) || /\bdetail\b/i.test(haystack)) { + score -= 1; + } + + return score; +} + +function chooseStatementReport(kind: FinancialStatementKind, reports: StatementReportDescriptor[]) { + let best: StatementReportDescriptor | null = null; + let bestScore = 0; + + for (const report of reports) { + const score = scoreReport(kind, report); + if (score > bestScore) { + best = report; + bestScore = score; + } + } + + return bestScore > 0 ? best : null; +} + +function sanitizeCellText(raw: string) { + return decodeHtmlEntities( + raw + .replace(//gi, '\n') + .replace(/<[^>]+>/g, ' ') + ) + .replace(/[ \t]+/g, ' ') + .replace(/\n+/g, ' ') + .trim(); +} + +function extractConceptFromMarkup(markup: string) { + const defref = markup.match(/defref[_:-]([a-z0-9_:.:-]+)/i); + if (defref?.[1]) { + return defref[1].replace(/_/g, ':'); + } + + const nameAttr = markup.match(/\bname=[\"']([a-z0-9_:.:-]+)[\"']/i); + if (nameAttr?.[1]) { + return nameAttr[1]; + } + + return null; +} + +function parseIndentDepth(attrs: string) { + const style = attrs.match(/\bstyle=[\"']([^\"']+)[\"']/i)?.[1] ?? ''; + const padding = style.match(/padding-left:\s*([0-9.]+)px/i)?.[1]; + if (padding) { + const numeric = Number.parseFloat(padding); + if (Number.isFinite(numeric) && numeric > 0) { + return Math.max(0, Math.round(numeric / 12)); + } + } + + const margin = style.match(/margin-left:\s*([0-9.]+)px/i)?.[1]; + if (margin) { + const numeric = Number.parseFloat(margin); + if (Number.isFinite(numeric) && numeric > 0) { + return Math.max(0, Math.round(numeric / 12)); + } + } + + return 0; +} + +function parseStatementNumber(raw: string): number | null { + const trimmed = raw.trim(); + if (!trimmed || /^n\/a$/i.test(trimmed) || /^--+$/.test(trimmed)) { + return null; + } + + if (/%$/.test(trimmed)) { + return null; + } + + const negative = trimmed.startsWith('(') && trimmed.endsWith(')'); + const cleaned = trimmed + .replace(/[$,\s]/g, '') + .replace(/[()]/g, '') + .replace(/\u2212/g, '-'); + + const value = Number.parseFloat(cleaned); + if (!Number.isFinite(value)) { + return null; + } + + return negative ? -Math.abs(value) : value; +} + +function slug(value: string) { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} + +function parseStatementRowsFromReport(content: string): StatementParseRow[] { + const tables = [...content.matchAll(/]*>([\s\S]*?)<\/table>/gi)]; + if (tables.length === 0) { + return []; + } + + let bestRows: StatementParseRow[] = []; + + for (const tableMatch of tables) { + const table = tableMatch[0] ?? ''; + const rows: StatementParseRow[] = []; + let order = 0; + + for (const rowMatch of table.matchAll(/]*>([\s\S]*?)<\/tr>/gi)) { + const rowMarkup = rowMatch[0] ?? ''; + const cells = [...rowMarkup.matchAll(/]*)>([\s\S]*?)<\/t[dh]>/gi)]; + if (cells.length < 2) { + continue; + } + + const labelCell = cells[0]; + const labelAttrs = labelCell?.[1] ?? ''; + const labelRaw = labelCell?.[2] ?? ''; + const label = sanitizeCellText(labelRaw); + if (!label || /^(years ended|assets|liabilities|equity)$/i.test(label)) { + continue; + } + + let value: number | null = null; + for (let i = 1; i < cells.length; i += 1) { + const text = sanitizeCellText(cells[i]?.[2] ?? ''); + const parsed = parseStatementNumber(text); + if (parsed !== null) { + value = parsed; + break; + } + } + + if (value === null) { + continue; + } + + order += 1; + + const concept = extractConceptFromMarkup(rowMarkup); + rows.push({ + key: concept ? slug(concept) : `${slug(label)}-${order}`, + label, + concept, + order, + depth: parseIndentDepth(labelAttrs), + isSubtotal: /^total\b/i.test(label) || /\bsubtotal\b/i.test(label), + value + }); + } + + if (rows.length > bestRows.length) { + bestRows = rows; + } + } + + return bestRows; +} + +function toSnapshotRows(periodId: string, rows: StatementParseRow[]): FilingFaithfulStatementSnapshotRow[] { + return rows.map((row) => ({ + key: row.key, + label: row.label, + concept: row.concept, + order: row.order, + depth: row.depth, + isSubtotal: row.isSubtotal, + values: { + [periodId]: row.value + } + })); +} + +function matchStandardizedDefinition( + row: FilingFaithfulStatementSnapshotRow, + definition: CanonicalRowDefinition +) { + const concept = row.concept ?? ''; + return definition.conceptPatterns.some((pattern) => pattern.test(concept)) + || definition.labelPatterns.some((pattern) => pattern.test(row.label)); +} + +function fallbackMetricValue( + kind: FinancialStatementKind, + rowKey: string, + metrics: Filing['metrics'] +) { + if (!metrics) { + return null; + } + + if (kind === 'income' && rowKey === 'revenue') { + return metrics.revenue ?? null; + } + + if (kind === 'income' && rowKey === 'net-income') { + return metrics.netIncome ?? null; + } + + if (kind === 'balance' && rowKey === 'total-assets') { + return metrics.totalAssets ?? null; + } + + if (kind === 'balance' && rowKey === 'cash-and-equivalents') { + return metrics.cash ?? null; + } + + if (kind === 'balance' && rowKey === 'total-debt') { + return metrics.debt ?? null; + } + + return null; +} + +function toStandardizedRows( + kind: FinancialStatementKind, + periodId: string, + rows: FilingFaithfulStatementSnapshotRow[], + metrics: Filing['metrics'] +): StandardizedStatementSnapshotRow[] { + const definitions = STANDARDIZED_ROW_DEFINITIONS[kind]; + const normalizedRows = [...rows]; + const usedKeys = new Set(); + const standardizedRows: StandardizedStatementSnapshotRow[] = []; + + for (const definition of definitions) { + const matched = normalizedRows.find((row) => !usedKeys.has(row.key) && matchStandardizedDefinition(row, definition)); + const matchedValue = matched?.values[periodId] ?? null; + const fallbackValue = matchedValue === null + ? fallbackMetricValue(kind, definition.key, metrics) + : null; + + if (matched) { + usedKeys.add(matched.key); + } + + standardizedRows.push({ + key: definition.key, + label: definition.label, + concept: matched?.concept ?? definition.key, + category: definition.category, + sourceConcepts: matched?.concept ? [matched.concept] : [], + values: { + [periodId]: matchedValue ?? fallbackValue + } + }); + } + + for (const row of normalizedRows) { + if (usedKeys.has(row.key)) { + continue; + } + + standardizedRows.push({ + key: `other-${row.key}`, + label: row.label, + concept: row.concept ?? row.key, + category: 'other', + sourceConcepts: row.concept ? [row.concept] : [], + values: { + [periodId]: row.values[periodId] ?? null + } + }); + } + + return standardizedRows; +} + +function parseContextsWithDimensions(raw: string) { + const contexts = new Map(); + const contextPattern = /<(?:[a-z0-9]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9]+:)?context>/gi; + + for (const match of raw.matchAll(contextPattern)) { + const contextId = match[1] ?? ''; + const block = match[2] ?? ''; + if (!contextId) { + continue; + } + + const endDate = block.match(/<(?:[a-z0-9]+:)?endDate>([^<]+)<\/(?:[a-z0-9]+:)?endDate>/i)?.[1]?.trim() ?? null; + const dimensions: Array<{ axis: string; member: string }> = []; + + const dimPattern = /<(?:[a-z0-9]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9]+:)?explicitMember>/gi; + for (const dimMatch of block.matchAll(dimPattern)) { + const axis = (dimMatch[1] ?? '').trim(); + const member = (dimMatch[2] ?? '').trim(); + if (!axis || !member) { + continue; + } + + dimensions.push({ axis, member }); + } + + if (dimensions.length === 0) { + continue; + } + + contexts.set(contextId, { endDate, dimensions }); + } + + return contexts; +} + +function statementKindFromConcept(concept: string): FinancialStatementKind | null { + const normalized = concept.toLowerCase(); + + if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) { + return 'cash_flow'; + } + + if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) { + return 'equity'; + } + + if (/comprehensiveincome/.test(normalized)) { + return 'comprehensive_income'; + } + + if (/asset|liabilit|debt/.test(normalized)) { + return 'balance'; + } + + if (/revenue|income|profit|expense|costof/.test(normalized)) { + return 'income'; + } + + return null; +} + +function parseDimensionFacts( + raw: string, + fallbackPeriodId: string +) { + const contexts = parseContextsWithDimensions(raw); + if (contexts.size === 0) { + return createStatementRecord(() => []); + } + + const rows = createStatementRecord(() => []); + + const ixPattern = /]*)>([\s\S]*?)<\/ix:nonfraction>/gi; + let guard = 0; + + for (const match of raw.matchAll(ixPattern)) { + guard += 1; + if (guard > 8_000) { + break; + } + + const attrs = match[1] ?? ''; + const body = sanitizeCellText(match[2] ?? ''); + + const contextRef = attrs.match(/\bcontextref=["']([^"']+)["']/i)?.[1] ?? ''; + const concept = attrs.match(/\bname=["']([^"']+)["']/i)?.[1] ?? ''; + const unit = attrs.match(/\bunitref=["']([^"']+)["']/i)?.[1] ?? null; + + if (!contextRef || !concept) { + continue; + } + + const context = contexts.get(contextRef); + if (!context || context.dimensions.length === 0) { + continue; + } + + const kind = statementKindFromConcept(concept); + if (!kind) { + continue; + } + + const value = parseStatementNumber(body); + if (value === null) { + continue; + } + + const periodId = context.endDate ?? fallbackPeriodId; + const rowKey = slug(concept); + for (const dimension of context.dimensions) { + rows[kind].push({ + rowKey, + concept, + periodId, + axis: dimension.axis, + member: dimension.member, + value, + unit + }); + } + } + + return rows; +} + +function markHasDimensions( + rows: T[], + dimensions: DimensionStatementSnapshotRow[] +) { + const dimensionConcepts = new Set(dimensions.map((item) => item.concept?.toLowerCase() ?? '').filter(Boolean)); + const dimensionRowKeys = new Set(dimensions.map((item) => item.rowKey)); + + return rows.map((row) => { + const concept = row.concept?.toLowerCase() ?? ''; + const hasDimensions = dimensionRowKeys.has(row.key) || (concept ? dimensionConcepts.has(concept) : false); + return { + ...row, + hasDimensions + }; + }); +} + +function emptyStatementBundle(period: FilingStatementSnapshotPeriod): FilingStatementBundle { + return { + periods: [period], + statements: createStatementRecord(() => []) + }; +} + +function emptyStandardizedBundle(period: FilingStatementSnapshotPeriod): StandardizedStatementBundle { + return { + periods: [period], + statements: createStatementRecord(() => []) + }; +} + +function emptyDimensionBundle(): DimensionStatementBundle { + return { + statements: createStatementRecord(() => []) + }; +} + +export async function hydrateFilingStatementSnapshot( + input: FilingStatementHydrationInput, + options?: { + fetchImpl?: typeof fetch; + } +): Promise { + const periodId = `${input.filingDate}-${compactAccessionNumber(input.accessionNumber)}`; + const period: FilingStatementSnapshotPeriod = { + id: periodId, + filingId: input.filingId, + accessionNumber: input.accessionNumber, + filingDate: input.filingDate, + periodEnd: input.filingDate, + filingType: input.filingType, + periodLabel: input.filingType === '10-Q' ? 'Quarter End' : 'Fiscal Year End' + }; + + const fetchImpl = options?.fetchImpl ?? fetch; + const statementBundle = emptyStatementBundle(period); + const standardizedBundle = emptyStandardizedBundle(period); + const dimensionBundle = emptyDimensionBundle(); + let source: FilingStatementHydrationResult['source'] = 'companyfacts_fallback'; + let parseError: string | null = null; + + try { + const filingDirectory = resolveFilingDirectoryUrl({ + filingUrl: input.filingUrl, + cik: input.cik, + accessionNumber: input.accessionNumber + }); + + if (filingDirectory) { + const summaryXml = await fetchText(`${filingDirectory}FilingSummary.xml`, fetchImpl); + const reports = parseFilingSummaryReports(summaryXml); + + for (const kind of FINANCIAL_STATEMENT_KINDS) { + const report = chooseStatementReport(kind, reports); + if (!report) { + continue; + } + + const reportUrl = toAbsoluteArchiveUrl(filingDirectory, report.htmlFileName ?? report.xmlFileName); + if (!reportUrl) { + continue; + } + + try { + const reportText = await fetchText(reportUrl, fetchImpl); + const parsedRows = parseStatementRowsFromReport(reportText); + if (parsedRows.length === 0) { + continue; + } + + source = 'sec_filing_summary'; + statementBundle.statements[kind] = toSnapshotRows(periodId, parsedRows); + } catch { + // Continue to other statements when one report fails. + } + } + } + } catch (error) { + parseError = error instanceof Error ? error.message : 'Failed to parse filing summary'; + } + + try { + const primaryUrl = resolvePrimaryFilingUrl({ + filingUrl: input.filingUrl, + cik: input.cik, + accessionNumber: input.accessionNumber, + primaryDocument: input.primaryDocument + }); + + if (primaryUrl) { + const rawDocument = await fetchText(primaryUrl, fetchImpl); + const dimensions = parseDimensionFacts(rawDocument, periodId); + for (const kind of FINANCIAL_STATEMENT_KINDS) { + dimensionBundle.statements[kind] = dimensions[kind]; + } + + const hasAnyDimensions = FINANCIAL_STATEMENT_KINDS.some((kind) => dimensionBundle.statements[kind].length > 0); + if (hasAnyDimensions && source === 'companyfacts_fallback') { + source = 'xbrl_instance'; + } + } + } catch (error) { + if (!parseError) { + parseError = error instanceof Error ? error.message : 'Failed to parse inline XBRL dimensions'; + } + } + + for (const kind of FINANCIAL_STATEMENT_KINDS) { + const faithfulRows = statementBundle.statements[kind]; + standardizedBundle.statements[kind] = toStandardizedRows(kind, periodId, faithfulRows, input.metrics); + + statementBundle.statements[kind] = markHasDimensions( + faithfulRows, + dimensionBundle.statements[kind] + ); + + standardizedBundle.statements[kind] = markHasDimensions( + standardizedBundle.statements[kind], + dimensionBundle.statements[kind] + ); + } + + const statementCount = FINANCIAL_STATEMENT_KINDS.filter((kind) => statementBundle.statements[kind].length > 0).length; + const standardizedCount = FINANCIAL_STATEMENT_KINDS.filter((kind) => standardizedBundle.statements[kind].length > 0).length; + const parseStatus: FilingStatementHydrationResult['parse_status'] = statementCount === FINANCIAL_STATEMENT_KINDS.length + ? 'ready' + : (statementCount > 0 || standardizedCount > 0) + ? 'partial' + : 'failed'; + + return { + filing_id: input.filingId, + ticker: input.ticker.trim().toUpperCase(), + filing_date: input.filingDate, + filing_type: input.filingType, + period_end: input.filingDate, + statement_bundle: statementBundle, + standardized_bundle: standardizedBundle, + dimension_bundle: dimensionBundle, + parse_status: parseStatus, + parse_error: parseStatus === 'failed' ? (parseError ?? 'No financial statement tables found') : parseError, + source, + }; +} + +export const __statementInternals = { + parseFilingSummaryReports, + parseStatementRowsFromReport, + parseDimensionFacts, + statementKindLabel +}; diff --git a/lib/server/task-processors.ts b/lib/server/task-processors.ts index f1ce8b9..a7c4bec 100644 --- a/lib/server/task-processors.ts +++ b/lib/server/task-processors.ts @@ -10,9 +10,14 @@ import { buildPortfolioSummary } from '@/lib/server/portfolio'; import { getQuote } from '@/lib/server/prices'; import { getFilingByAccession, + listFilingsRecords, saveFilingAnalysis, upsertFilingsRecords } from '@/lib/server/repos/filings'; +import { + getFilingStatementSnapshotByFilingId, + upsertFilingStatementSnapshot +} from '@/lib/server/repos/filing-statements'; import { applyRefreshedPrices, listHoldingsForPriceRefresh, @@ -22,7 +27,8 @@ import { createPortfolioInsight } from '@/lib/server/repos/insights'; import { fetchFilingMetricsForFilings, fetchPrimaryFilingText, - fetchRecentFilings + fetchRecentFilings, + hydrateFilingStatementSnapshot } from '@/lib/server/sec'; const EXTRACTION_REQUIRED_KEYS = [ @@ -40,6 +46,8 @@ const EXTRACTION_REQUIRED_KEYS = [ const EXTRACTION_MAX_ITEMS = 6; const EXTRACTION_ITEM_MAX_LENGTH = 280; const EXTRACTION_SUMMARY_MAX_LENGTH = 900; +const STATEMENT_HYDRATION_DELAY_MS = 120; +const STATEMENT_HYDRATION_MAX_FILINGS = 80; const SEGMENT_PATTERNS = [ /\boperating segment\b/i, /\bsegment revenue\b/i, @@ -556,11 +564,67 @@ async function processSyncFilings(task: Task) { })) ); + let statementSnapshotsHydrated = 0; + let statementSnapshotsFailed = 0; + const hydrateCandidates = (await listFilingsRecords({ + ticker, + limit: Math.min(Math.max(limit * 3, 40), STATEMENT_HYDRATION_MAX_FILINGS) + })) + .filter((filing): filing is Filing & { filing_type: '10-K' | '10-Q' } => { + return filing.filing_type === '10-K' || filing.filing_type === '10-Q'; + }); + + for (const filing of hydrateCandidates) { + const existingSnapshot = await getFilingStatementSnapshotByFilingId(filing.id); + const shouldRefresh = !existingSnapshot + || Date.parse(existingSnapshot.updated_at) < Date.parse(filing.updated_at); + + if (!shouldRefresh) { + continue; + } + + try { + const snapshot = await hydrateFilingStatementSnapshot({ + filingId: filing.id, + ticker: filing.ticker, + cik: filing.cik, + accessionNumber: filing.accession_number, + filingDate: filing.filing_date, + filingType: filing.filing_type, + filingUrl: filing.filing_url, + primaryDocument: filing.primary_document ?? null, + metrics: filing.metrics + }); + + await upsertFilingStatementSnapshot(snapshot); + statementSnapshotsHydrated += 1; + } catch (error) { + await upsertFilingStatementSnapshot({ + filing_id: filing.id, + ticker: filing.ticker, + filing_date: filing.filing_date, + filing_type: filing.filing_type, + period_end: filing.filing_date, + statement_bundle: null, + standardized_bundle: null, + dimension_bundle: null, + parse_status: 'failed', + parse_error: error instanceof Error ? error.message : 'Statement hydration failed', + source: 'companyfacts_fallback' + }); + statementSnapshotsFailed += 1; + } + + await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS); + } + return { ticker, fetched: filings.length, inserted: saveResult.inserted, - updated: saveResult.updated + updated: saveResult.updated, + statementSnapshotsHydrated, + statementSnapshotsFailed }; }