Implement dual-surface financials and db bootstrap

This commit is contained in:
2026-03-06 16:24:56 -05:00
parent 8e62c66677
commit 8b1fff4130
7 changed files with 1207 additions and 179 deletions

View File

@@ -4,6 +4,7 @@ import type {
FinancialHistoryWindow,
FinancialStatementKind,
FinancialStatementPeriod,
StandardizedStatementRow,
TaxonomyStatementRow
} from '@/lib/types';
import { listFilingsRecords } from '@/lib/server/repos/filings';
@@ -28,6 +29,19 @@ type GetCompanyFinancialTaxonomyInput = {
queuedSync: boolean;
};
type CanonicalRowDefinition = {
key: string;
label: string;
category: string;
order: number;
localNames?: readonly string[];
labelIncludes?: readonly string[];
formula?: (
rowsByKey: Map<string, StandardizedStatementRow>,
periodIds: string[]
) => Pick<StandardizedStatementRow, 'values' | 'resolvedSourceRowKeys'> | null;
};
function safeTicker(input: string) {
return input.trim().toUpperCase();
}
@@ -215,16 +229,419 @@ function buildRows(
});
}
function normalizeToken(value: string) {
return value.trim().toLowerCase();
}
function sumValues(left: number | null, right: number | null) {
if (left === null || right === null) {
return null;
}
return left + right;
}
function subtractValues(left: number | null, right: number | null) {
if (left === null || right === null) {
return null;
}
return left - right;
}
const STANDARDIZED_ROW_DEFINITIONS: Record<FinancialStatementKind, CanonicalRowDefinition[]> = {
income: [
{
key: 'revenue',
label: 'Revenue',
category: 'revenue',
order: 10,
localNames: [
'RevenueFromContractWithCustomerExcludingAssessedTax',
'Revenues',
'SalesRevenueNet',
'TotalRevenuesAndOtherIncome'
]
},
{
key: 'cost-of-revenue',
label: 'Cost of Revenue',
category: 'expense',
order: 20,
localNames: [
'CostOfRevenue',
'CostOfGoodsSold',
'CostOfSales',
'CostOfProductsSold',
'CostOfServices'
]
},
{
key: 'gross-profit',
label: 'Gross Profit',
category: 'profit',
order: 30,
localNames: ['GrossProfit'],
formula: (rowsByKey, periodIds) => {
const revenue = rowsByKey.get('revenue');
const cogs = rowsByKey.get('cost-of-revenue');
if (!revenue || !cogs) {
return null;
}
return {
values: Object.fromEntries(periodIds.map((periodId) => [
periodId,
subtractValues(revenue.values[periodId] ?? null, cogs.values[periodId] ?? null)
])),
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
};
}
},
{
key: 'research-and-development',
label: 'Research & Development',
category: 'opex',
order: 40,
localNames: ['ResearchAndDevelopmentExpense']
},
{
key: 'selling-general-and-administrative',
label: 'Selling, General & Administrative',
category: 'opex',
order: 50,
localNames: [
'SellingGeneralAndAdministrativeExpense',
'SellingAndMarketingExpense',
'GeneralAndAdministrativeExpense'
],
labelIncludes: ['selling, general', 'selling general', 'general and administrative']
},
{
key: 'operating-income',
label: 'Operating Income',
category: 'profit',
order: 60,
localNames: ['OperatingIncomeLoss', 'IncomeLossFromOperations']
},
{
key: 'net-income',
label: 'Net Income',
category: 'profit',
order: 70,
localNames: ['NetIncomeLoss', 'ProfitLoss']
}
],
balance: [
{
key: 'cash-and-equivalents',
label: 'Cash & Equivalents',
category: 'asset',
order: 10,
localNames: [
'CashAndCashEquivalentsAtCarryingValue',
'CashCashEquivalentsAndShortTermInvestments',
'CashAndShortTermInvestments'
]
},
{
key: 'accounts-receivable',
label: 'Accounts Receivable',
category: 'asset',
order: 20,
localNames: [
'AccountsReceivableNetCurrent',
'ReceivablesNetCurrent'
]
},
{
key: 'inventory',
label: 'Inventory',
category: 'asset',
order: 30,
localNames: ['InventoryNet']
},
{
key: 'total-assets',
label: 'Total Assets',
category: 'asset',
order: 40,
localNames: ['Assets']
},
{
key: 'current-liabilities',
label: 'Current Liabilities',
category: 'liability',
order: 50,
localNames: ['LiabilitiesCurrent']
},
{
key: 'long-term-debt',
label: 'Long-Term Debt',
category: 'liability',
order: 60,
localNames: [
'LongTermDebtNoncurrent',
'LongTermDebt',
'DebtNoncurrent',
'LongTermDebtAndCapitalLeaseObligations'
]
},
{
key: 'current-debt',
label: 'Current Debt',
category: 'liability',
order: 70,
localNames: ['DebtCurrent', 'ShortTermBorrowings', 'LongTermDebtCurrent']
},
{
key: 'total-debt',
label: 'Total Debt',
category: 'liability',
order: 80,
localNames: ['DebtAndFinanceLeaseLiabilities', 'Debt'],
formula: (rowsByKey, periodIds) => {
const longTermDebt = rowsByKey.get('long-term-debt');
const currentDebt = rowsByKey.get('current-debt');
if (!longTermDebt || !currentDebt) {
return null;
}
return {
values: Object.fromEntries(periodIds.map((periodId) => [
periodId,
sumValues(longTermDebt.values[periodId] ?? null, currentDebt.values[periodId] ?? null)
])),
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
};
}
},
{
key: 'total-equity',
label: 'Total Equity',
category: 'equity',
order: 90,
localNames: [
'StockholdersEquity',
'StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest',
'PartnersCapital'
]
}
],
cash_flow: [
{
key: 'operating-cash-flow',
label: 'Operating Cash Flow',
category: 'cash-flow',
order: 10,
localNames: [
'NetCashProvidedByUsedInOperatingActivities',
'NetCashProvidedByUsedInOperatingActivitiesContinuingOperations'
]
},
{
key: 'capital-expenditures',
label: 'Capital Expenditures',
category: 'cash-flow',
order: 20,
localNames: ['PaymentsToAcquirePropertyPlantAndEquipment', 'CapitalExpendituresIncurredButNotYetPaid']
},
{
key: 'free-cash-flow',
label: 'Free Cash Flow',
category: 'cash-flow',
order: 30,
formula: (rowsByKey, periodIds) => {
const operatingCashFlow = rowsByKey.get('operating-cash-flow');
const capex = rowsByKey.get('capital-expenditures');
if (!operatingCashFlow || !capex) {
return null;
}
return {
values: Object.fromEntries(periodIds.map((periodId) => [
periodId,
subtractValues(operatingCashFlow.values[periodId] ?? null, capex.values[periodId] ?? null)
])),
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
};
}
}
],
equity: [
{
key: 'total-equity',
label: 'Total Equity',
category: 'equity',
order: 10,
localNames: [
'StockholdersEquity',
'StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest',
'PartnersCapital'
]
}
],
comprehensive_income: [
{
key: 'comprehensive-income',
label: 'Comprehensive Income',
category: 'profit',
order: 10,
localNames: ['ComprehensiveIncomeNetOfTax', 'ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest']
}
]
};
function matchesDefinition(row: TaxonomyStatementRow, definition: CanonicalRowDefinition) {
const rowLocalName = normalizeToken(row.localName);
if (definition.localNames?.some((localName) => normalizeToken(localName) === rowLocalName)) {
return true;
}
const label = normalizeToken(row.label);
return definition.labelIncludes?.some((token) => label.includes(normalizeToken(token))) ?? false;
}
function buildCanonicalRow(
definition: CanonicalRowDefinition,
matches: TaxonomyStatementRow[],
periodIds: string[]
) {
const sortedMatches = [...matches].sort((left, right) => {
if (left.order !== right.order) {
return left.order - right.order;
}
return left.label.localeCompare(right.label);
});
const sourceConcepts = new Set<string>();
const sourceRowKeys = new Set<string>();
const sourceFactIds = new Set<number>();
for (const row of sortedMatches) {
sourceConcepts.add(row.qname);
sourceRowKeys.add(row.key);
for (const factId of row.sourceFactIds) {
sourceFactIds.add(factId);
}
}
const values: Record<string, number | null> = {};
const resolvedSourceRowKeys: Record<string, string | null> = {};
for (const periodId of periodIds) {
const match = sortedMatches.find((row) => periodId in row.values);
values[periodId] = match?.values[periodId] ?? null;
resolvedSourceRowKeys[periodId] = match?.key ?? null;
}
return {
key: definition.key,
label: definition.label,
category: definition.category,
order: definition.order,
values,
hasDimensions: sortedMatches.some((row) => row.hasDimensions),
sourceConcepts: [...sourceConcepts].sort((left, right) => left.localeCompare(right)),
sourceRowKeys: [...sourceRowKeys].sort((left, right) => left.localeCompare(right)),
sourceFactIds: [...sourceFactIds].sort((left, right) => left - right),
resolvedSourceRowKeys
} satisfies StandardizedStatementRow;
}
function buildStandardizedRows(
rows: TaxonomyStatementRow[],
statement: FinancialStatementKind,
periods: FinancialStatementPeriod[]
) {
const definitions = STANDARDIZED_ROW_DEFINITIONS[statement] ?? [];
const periodIds = periods.map((period) => period.id);
const rowsByKey = new Map<string, StandardizedStatementRow>();
const matchedRowKeys = new Set<string>();
for (const definition of definitions) {
const matches = rows.filter((row) => matchesDefinition(row, definition));
if (matches.length === 0 && !definition.formula) {
continue;
}
for (const row of matches) {
matchedRowKeys.add(row.key);
}
const canonicalRow = buildCanonicalRow(definition, matches, periodIds);
rowsByKey.set(definition.key, canonicalRow);
const derived = definition.formula?.(rowsByKey, periodIds) ?? null;
if (derived) {
rowsByKey.set(definition.key, {
...canonicalRow,
values: derived.values,
resolvedSourceRowKeys: derived.resolvedSourceRowKeys
});
}
}
const unmatchedRows = rows
.filter((row) => !matchedRowKeys.has(row.key))
.map((row) => ({
key: `other:${row.key}`,
label: row.label,
category: 'other',
order: 10_000 + row.order,
values: { ...row.values },
hasDimensions: row.hasDimensions,
sourceConcepts: [row.qname],
sourceRowKeys: [row.key],
sourceFactIds: [...row.sourceFactIds],
resolvedSourceRowKeys: Object.fromEntries(
periodIds.map((periodId) => [periodId, periodId in row.values ? row.key : null])
)
} satisfies StandardizedStatementRow));
return [...rowsByKey.values(), ...unmatchedRows].sort((left, right) => {
if (left.order !== right.order) {
return left.order - right.order;
}
return left.label.localeCompare(right.label);
});
}
function buildDimensionBreakdown(
facts: Awaited<ReturnType<typeof listTaxonomyFactsByTicker>>['facts'],
periods: FinancialStatementPeriod[]
periods: FinancialStatementPeriod[],
faithfulRows: TaxonomyStatementRow[],
standardizedRows: StandardizedStatementRow[]
) {
const periodByFilingId = new Map<number, FinancialStatementPeriod>();
for (const period of periods) {
periodByFilingId.set(period.filingId, period);
}
const faithfulRowByKey = new Map(faithfulRows.map((row) => [row.key, row]));
const standardizedRowsBySource = new Map<string, StandardizedStatementRow[]>();
for (const row of standardizedRows) {
for (const sourceRowKey of row.sourceRowKeys) {
const existing = standardizedRowsBySource.get(sourceRowKey);
if (existing) {
existing.push(row);
} else {
standardizedRowsBySource.set(sourceRowKey, [row]);
}
}
}
const map = new Map<string, DimensionBreakdownRow[]>();
const pushRow = (key: string, row: DimensionBreakdownRow) => {
const existing = map.get(key);
if (existing) {
existing.push(row);
} else {
map.set(key, [row]);
}
};
for (const fact of facts) {
if (fact.dimensions.length === 0) {
@@ -244,10 +661,15 @@ function buildDimensionBreakdown(
continue;
}
const faithfulRow = faithfulRowByKey.get(fact.conceptKey) ?? null;
const standardizedMatches = standardizedRowsBySource.get(fact.conceptKey) ?? [];
for (const dimension of fact.dimensions) {
const row: DimensionBreakdownRow = {
const faithfulDimensionRow: DimensionBreakdownRow = {
rowKey: fact.conceptKey,
concept: fact.qname,
sourceRowKey: fact.conceptKey,
sourceLabel: faithfulRow?.label ?? null,
periodId: period.id,
axis: dimension.axis,
member: dimension.member,
@@ -255,11 +677,13 @@ function buildDimensionBreakdown(
unit: fact.unit
};
const existing = map.get(fact.conceptKey);
if (existing) {
existing.push(row);
} else {
map.set(fact.conceptKey, [row]);
pushRow(fact.conceptKey, faithfulDimensionRow);
for (const standardizedRow of standardizedMatches) {
pushRow(standardizedRow.key, {
...faithfulDimensionRow,
rowKey: standardizedRow.key
});
}
}
}
@@ -305,7 +729,8 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type));
const selection = selectPrimaryPeriods(snapshotResult.snapshots, input.statement);
const periods = selection.periods;
const rows = buildRows(snapshotResult.snapshots, input.statement, selection.selectedPeriodIds);
const faithfulRows = buildRows(snapshotResult.snapshots, input.statement, selection.selectedPeriodIds);
const standardizedRows = buildStandardizedRows(faithfulRows, input.statement, periods);
const factsResult = input.includeFacts
? await listTaxonomyFactsByTicker({
@@ -329,11 +754,11 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
const latestFiling = filings[0] ?? null;
const metrics = latestMetrics(snapshotResult.snapshots);
const dimensionBreakdown = input.includeDimensions
? buildDimensionBreakdown(dimensionFacts.facts, periods)
? buildDimensionBreakdown(dimensionFacts.facts, periods, faithfulRows, standardizedRows)
: null;
const dimensionsCount = dimensionBreakdown
? Object.values(dimensionBreakdown).reduce((total, entries) => total + entries.length, 0)
const dimensionsCount = input.includeDimensions
? dimensionFacts.facts.reduce((total, fact) => total + fact.dimensions.length, 0)
: 0;
const factsCoverage = input.includeFacts
@@ -348,8 +773,18 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
},
statement: input.statement,
window: input.window,
defaultSurface: 'standardized',
periods,
rows,
surfaces: {
faithful: {
kind: 'faithful',
rows: faithfulRows
},
standardized: {
kind: 'standardized',
rows: standardizedRows
}
},
nextCursor: snapshotResult.nextCursor,
facts: input.includeFacts
? {
@@ -359,7 +794,7 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
: null,
coverage: {
filings: periods.length,
rows: rows.length,
rows: faithfulRows.length,
dimensions: dimensionsCount,
facts: factsCoverage
},
@@ -378,7 +813,11 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
export const __financialTaxonomyInternals = {
buildPeriods,
buildRows,
buildStandardizedRows,
buildDimensionBreakdown,
isInstantPeriod,
matchesDefinition,
periodDurationDays,
selectPrimaryPeriods
};