Files
Neon-Desk/lib/server/financial-taxonomy.ts

824 lines
24 KiB
TypeScript

import type {
CompanyFinancialStatementsResponse,
DimensionBreakdownRow,
FinancialHistoryWindow,
FinancialStatementKind,
FinancialStatementPeriod,
StandardizedStatementRow,
TaxonomyStatementRow
} from '@/lib/types';
import { listFilingsRecords } from '@/lib/server/repos/filings';
import {
countFilingTaxonomySnapshotStatuses,
listFilingTaxonomySnapshotsByTicker,
listTaxonomyFactsByTicker,
type FilingTaxonomySnapshotRecord
} from '@/lib/server/repos/filing-taxonomy';
type GetCompanyFinancialTaxonomyInput = {
ticker: string;
statement: FinancialStatementKind;
window: FinancialHistoryWindow;
includeDimensions: boolean;
includeFacts: boolean;
factsCursor?: string | null;
factsLimit?: number;
cursor?: string | null;
limit?: number;
v3Enabled: boolean;
queuedSync: boolean;
};
type CanonicalRowDefinition = {
key: string;
label: string;
category: string;
order: number;
localNames?: readonly string[];
labelIncludes?: readonly string[];
formula?: (
rowsByKey: Map<string, StandardizedStatementRow>,
periodIds: string[]
) => Pick<StandardizedStatementRow, 'values' | 'resolvedSourceRowKeys'> | null;
};
function safeTicker(input: string) {
return input.trim().toUpperCase();
}
function isFinancialForm(type: string): type is '10-K' | '10-Q' {
return type === '10-K' || type === '10-Q';
}
function parseEpoch(value: string | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function periodSorter(left: FinancialStatementPeriod, right: FinancialStatementPeriod) {
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return leftDate - rightDate;
}
return left.id.localeCompare(right.id);
}
function isInstantPeriod(period: FinancialStatementPeriod) {
return period.periodStart === null;
}
function periodDurationDays(period: FinancialStatementPeriod) {
if (!period.periodStart || !period.periodEnd) {
return null;
}
const start = Date.parse(period.periodStart);
const end = Date.parse(period.periodEnd);
if (!Number.isFinite(start) || !Number.isFinite(end) || end < start) {
return null;
}
return Math.round((end - start) / 86_400_000) + 1;
}
function preferredDurationDays(filingType: FinancialStatementPeriod['filingType']) {
return filingType === '10-K' ? 365 : 90;
}
function selectPrimaryPeriods(
snapshots: FilingTaxonomySnapshotRecord[],
statement: FinancialStatementKind
) {
const selectedByFilingId = new Map<number, FinancialStatementPeriod>();
for (const snapshot of snapshots) {
const rows = snapshot.statement_rows?.[statement] ?? [];
if (rows.length === 0) {
continue;
}
const usedPeriodIds = new Set<string>();
for (const row of rows) {
for (const periodId of Object.keys(row.values)) {
usedPeriodIds.add(periodId);
}
}
const candidates = (snapshot.periods ?? []).filter((period) => usedPeriodIds.has(period.id));
if (candidates.length === 0) {
continue;
}
const selected = (() => {
if (statement === 'balance') {
const instantCandidates = candidates.filter(isInstantPeriod);
return (instantCandidates.length > 0 ? instantCandidates : candidates)
.sort((left, right) => periodSorter(right, left))[0] ?? null;
}
const durationCandidates = candidates.filter((period) => !isInstantPeriod(period));
if (durationCandidates.length === 0) {
return candidates.sort((left, right) => periodSorter(right, left))[0] ?? null;
}
const targetDays = preferredDurationDays(snapshot.filing_type);
return durationCandidates.sort((left, right) => {
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return rightDate - leftDate;
}
const leftDistance = Math.abs((periodDurationDays(left) ?? targetDays) - targetDays);
const rightDistance = Math.abs((periodDurationDays(right) ?? targetDays) - targetDays);
if (leftDistance !== rightDistance) {
return leftDistance - rightDistance;
}
return left.id.localeCompare(right.id);
})[0] ?? null;
})();
if (selected) {
selectedByFilingId.set(selected.filingId, selected);
}
}
const periods = [...selectedByFilingId.values()].sort(periodSorter);
return {
periods,
selectedPeriodIds: new Set(periods.map((period) => period.id)),
periodByFilingId: new Map(periods.map((period) => [period.filingId, period]))
};
}
function buildPeriods(
snapshots: FilingTaxonomySnapshotRecord[],
statement: FinancialStatementKind
) {
return selectPrimaryPeriods(snapshots, statement).periods;
}
function buildRows(
snapshots: FilingTaxonomySnapshotRecord[],
statement: FinancialStatementKind,
selectedPeriodIds: Set<string>
) {
const rowMap = new Map<string, TaxonomyStatementRow>();
for (const snapshot of snapshots) {
const rows = snapshot.statement_rows?.[statement] ?? [];
for (const row of rows) {
const existing = rowMap.get(row.key);
if (!existing) {
rowMap.set(row.key, {
...row,
values: Object.fromEntries(
Object.entries(row.values).filter(([periodId]) => selectedPeriodIds.has(periodId))
),
units: Object.fromEntries(
Object.entries(row.units).filter(([periodId]) => selectedPeriodIds.has(periodId))
),
sourceFactIds: [...row.sourceFactIds]
});
if (Object.keys(rowMap.get(row.key)?.values ?? {}).length === 0) {
rowMap.delete(row.key);
}
continue;
}
existing.hasDimensions = existing.hasDimensions || row.hasDimensions;
existing.order = Math.min(existing.order, row.order);
existing.depth = Math.min(existing.depth, row.depth);
if (!existing.parentKey && row.parentKey) {
existing.parentKey = row.parentKey;
}
for (const [periodId, value] of Object.entries(row.values)) {
if (selectedPeriodIds.has(periodId) && !(periodId in existing.values)) {
existing.values[periodId] = value;
}
}
for (const [periodId, unit] of Object.entries(row.units)) {
if (selectedPeriodIds.has(periodId) && !(periodId in existing.units)) {
existing.units[periodId] = unit;
}
}
for (const factId of row.sourceFactIds) {
if (!existing.sourceFactIds.includes(factId)) {
existing.sourceFactIds.push(factId);
}
}
}
}
return [...rowMap.values()].sort((left, right) => {
if (left.order !== right.order) {
return left.order - right.order;
}
return left.label.localeCompare(right.label);
});
}
function normalizeToken(value: string) {
return value.trim().toLowerCase();
}
function sumValues(left: number | null, right: number | null) {
if (left === null || right === null) {
return null;
}
return left + right;
}
function subtractValues(left: number | null, right: number | null) {
if (left === null || right === null) {
return null;
}
return left - right;
}
const STANDARDIZED_ROW_DEFINITIONS: Record<FinancialStatementKind, CanonicalRowDefinition[]> = {
income: [
{
key: 'revenue',
label: 'Revenue',
category: 'revenue',
order: 10,
localNames: [
'RevenueFromContractWithCustomerExcludingAssessedTax',
'Revenues',
'SalesRevenueNet',
'TotalRevenuesAndOtherIncome'
]
},
{
key: 'cost-of-revenue',
label: 'Cost of Revenue',
category: 'expense',
order: 20,
localNames: [
'CostOfRevenue',
'CostOfGoodsSold',
'CostOfSales',
'CostOfProductsSold',
'CostOfServices'
]
},
{
key: 'gross-profit',
label: 'Gross Profit',
category: 'profit',
order: 30,
localNames: ['GrossProfit'],
formula: (rowsByKey, periodIds) => {
const revenue = rowsByKey.get('revenue');
const cogs = rowsByKey.get('cost-of-revenue');
if (!revenue || !cogs) {
return null;
}
return {
values: Object.fromEntries(periodIds.map((periodId) => [
periodId,
subtractValues(revenue.values[periodId] ?? null, cogs.values[periodId] ?? null)
])),
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
};
}
},
{
key: 'research-and-development',
label: 'Research & Development',
category: 'opex',
order: 40,
localNames: ['ResearchAndDevelopmentExpense']
},
{
key: 'selling-general-and-administrative',
label: 'Selling, General & Administrative',
category: 'opex',
order: 50,
localNames: [
'SellingGeneralAndAdministrativeExpense',
'SellingAndMarketingExpense',
'GeneralAndAdministrativeExpense'
],
labelIncludes: ['selling, general', 'selling general', 'general and administrative']
},
{
key: 'operating-income',
label: 'Operating Income',
category: 'profit',
order: 60,
localNames: ['OperatingIncomeLoss', 'IncomeLossFromOperations']
},
{
key: 'net-income',
label: 'Net Income',
category: 'profit',
order: 70,
localNames: ['NetIncomeLoss', 'ProfitLoss']
}
],
balance: [
{
key: 'cash-and-equivalents',
label: 'Cash & Equivalents',
category: 'asset',
order: 10,
localNames: [
'CashAndCashEquivalentsAtCarryingValue',
'CashCashEquivalentsAndShortTermInvestments',
'CashAndShortTermInvestments'
]
},
{
key: 'accounts-receivable',
label: 'Accounts Receivable',
category: 'asset',
order: 20,
localNames: [
'AccountsReceivableNetCurrent',
'ReceivablesNetCurrent'
]
},
{
key: 'inventory',
label: 'Inventory',
category: 'asset',
order: 30,
localNames: ['InventoryNet']
},
{
key: 'total-assets',
label: 'Total Assets',
category: 'asset',
order: 40,
localNames: ['Assets']
},
{
key: 'current-liabilities',
label: 'Current Liabilities',
category: 'liability',
order: 50,
localNames: ['LiabilitiesCurrent']
},
{
key: 'long-term-debt',
label: 'Long-Term Debt',
category: 'liability',
order: 60,
localNames: [
'LongTermDebtNoncurrent',
'LongTermDebt',
'DebtNoncurrent',
'LongTermDebtAndCapitalLeaseObligations'
]
},
{
key: 'current-debt',
label: 'Current Debt',
category: 'liability',
order: 70,
localNames: ['DebtCurrent', 'ShortTermBorrowings', 'LongTermDebtCurrent']
},
{
key: 'total-debt',
label: 'Total Debt',
category: 'liability',
order: 80,
localNames: ['DebtAndFinanceLeaseLiabilities', 'Debt'],
formula: (rowsByKey, periodIds) => {
const longTermDebt = rowsByKey.get('long-term-debt');
const currentDebt = rowsByKey.get('current-debt');
if (!longTermDebt || !currentDebt) {
return null;
}
return {
values: Object.fromEntries(periodIds.map((periodId) => [
periodId,
sumValues(longTermDebt.values[periodId] ?? null, currentDebt.values[periodId] ?? null)
])),
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
};
}
},
{
key: 'total-equity',
label: 'Total Equity',
category: 'equity',
order: 90,
localNames: [
'StockholdersEquity',
'StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest',
'PartnersCapital'
]
}
],
cash_flow: [
{
key: 'operating-cash-flow',
label: 'Operating Cash Flow',
category: 'cash-flow',
order: 10,
localNames: [
'NetCashProvidedByUsedInOperatingActivities',
'NetCashProvidedByUsedInOperatingActivitiesContinuingOperations'
]
},
{
key: 'capital-expenditures',
label: 'Capital Expenditures',
category: 'cash-flow',
order: 20,
localNames: ['PaymentsToAcquirePropertyPlantAndEquipment', 'CapitalExpendituresIncurredButNotYetPaid']
},
{
key: 'free-cash-flow',
label: 'Free Cash Flow',
category: 'cash-flow',
order: 30,
formula: (rowsByKey, periodIds) => {
const operatingCashFlow = rowsByKey.get('operating-cash-flow');
const capex = rowsByKey.get('capital-expenditures');
if (!operatingCashFlow || !capex) {
return null;
}
return {
values: Object.fromEntries(periodIds.map((periodId) => [
periodId,
subtractValues(operatingCashFlow.values[periodId] ?? null, capex.values[periodId] ?? null)
])),
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
};
}
}
],
equity: [
{
key: 'total-equity',
label: 'Total Equity',
category: 'equity',
order: 10,
localNames: [
'StockholdersEquity',
'StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest',
'PartnersCapital'
]
}
],
comprehensive_income: [
{
key: 'comprehensive-income',
label: 'Comprehensive Income',
category: 'profit',
order: 10,
localNames: ['ComprehensiveIncomeNetOfTax', 'ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest']
}
]
};
function matchesDefinition(row: TaxonomyStatementRow, definition: CanonicalRowDefinition) {
const rowLocalName = normalizeToken(row.localName);
if (definition.localNames?.some((localName) => normalizeToken(localName) === rowLocalName)) {
return true;
}
const label = normalizeToken(row.label);
return definition.labelIncludes?.some((token) => label.includes(normalizeToken(token))) ?? false;
}
function buildCanonicalRow(
definition: CanonicalRowDefinition,
matches: TaxonomyStatementRow[],
periodIds: string[]
) {
const sortedMatches = [...matches].sort((left, right) => {
if (left.order !== right.order) {
return left.order - right.order;
}
return left.label.localeCompare(right.label);
});
const sourceConcepts = new Set<string>();
const sourceRowKeys = new Set<string>();
const sourceFactIds = new Set<number>();
for (const row of sortedMatches) {
sourceConcepts.add(row.qname);
sourceRowKeys.add(row.key);
for (const factId of row.sourceFactIds) {
sourceFactIds.add(factId);
}
}
const values: Record<string, number | null> = {};
const resolvedSourceRowKeys: Record<string, string | null> = {};
for (const periodId of periodIds) {
const match = sortedMatches.find((row) => periodId in row.values);
values[periodId] = match?.values[periodId] ?? null;
resolvedSourceRowKeys[periodId] = match?.key ?? null;
}
return {
key: definition.key,
label: definition.label,
category: definition.category,
order: definition.order,
values,
hasDimensions: sortedMatches.some((row) => row.hasDimensions),
sourceConcepts: [...sourceConcepts].sort((left, right) => left.localeCompare(right)),
sourceRowKeys: [...sourceRowKeys].sort((left, right) => left.localeCompare(right)),
sourceFactIds: [...sourceFactIds].sort((left, right) => left - right),
resolvedSourceRowKeys
} satisfies StandardizedStatementRow;
}
function buildStandardizedRows(
rows: TaxonomyStatementRow[],
statement: FinancialStatementKind,
periods: FinancialStatementPeriod[]
) {
const definitions = STANDARDIZED_ROW_DEFINITIONS[statement] ?? [];
const periodIds = periods.map((period) => period.id);
const rowsByKey = new Map<string, StandardizedStatementRow>();
const matchedRowKeys = new Set<string>();
for (const definition of definitions) {
const matches = rows.filter((row) => matchesDefinition(row, definition));
if (matches.length === 0 && !definition.formula) {
continue;
}
for (const row of matches) {
matchedRowKeys.add(row.key);
}
const canonicalRow = buildCanonicalRow(definition, matches, periodIds);
rowsByKey.set(definition.key, canonicalRow);
const derived = definition.formula?.(rowsByKey, periodIds) ?? null;
if (derived) {
rowsByKey.set(definition.key, {
...canonicalRow,
values: derived.values,
resolvedSourceRowKeys: derived.resolvedSourceRowKeys
});
}
}
const unmatchedRows = rows
.filter((row) => !matchedRowKeys.has(row.key))
.map((row) => ({
key: `other:${row.key}`,
label: row.label,
category: 'other',
order: 10_000 + row.order,
values: { ...row.values },
hasDimensions: row.hasDimensions,
sourceConcepts: [row.qname],
sourceRowKeys: [row.key],
sourceFactIds: [...row.sourceFactIds],
resolvedSourceRowKeys: Object.fromEntries(
periodIds.map((periodId) => [periodId, periodId in row.values ? row.key : null])
)
} satisfies StandardizedStatementRow));
return [...rowsByKey.values(), ...unmatchedRows].sort((left, right) => {
if (left.order !== right.order) {
return left.order - right.order;
}
return left.label.localeCompare(right.label);
});
}
function buildDimensionBreakdown(
facts: Awaited<ReturnType<typeof listTaxonomyFactsByTicker>>['facts'],
periods: FinancialStatementPeriod[],
faithfulRows: TaxonomyStatementRow[],
standardizedRows: StandardizedStatementRow[]
) {
const periodByFilingId = new Map<number, FinancialStatementPeriod>();
for (const period of periods) {
periodByFilingId.set(period.filingId, period);
}
const faithfulRowByKey = new Map(faithfulRows.map((row) => [row.key, row]));
const standardizedRowsBySource = new Map<string, StandardizedStatementRow[]>();
for (const row of standardizedRows) {
for (const sourceRowKey of row.sourceRowKeys) {
const existing = standardizedRowsBySource.get(sourceRowKey);
if (existing) {
existing.push(row);
} else {
standardizedRowsBySource.set(sourceRowKey, [row]);
}
}
}
const map = new Map<string, DimensionBreakdownRow[]>();
const pushRow = (key: string, row: DimensionBreakdownRow) => {
const existing = map.get(key);
if (existing) {
existing.push(row);
} else {
map.set(key, [row]);
}
};
for (const fact of facts) {
if (fact.dimensions.length === 0) {
continue;
}
const period = periodByFilingId.get(fact.filingId) ?? null;
if (!period) {
continue;
}
const matchesPeriod = period.periodStart
? fact.periodStart === period.periodStart && fact.periodEnd === period.periodEnd
: (fact.periodInstant ?? fact.periodEnd) === period.periodEnd;
if (!matchesPeriod) {
continue;
}
const faithfulRow = faithfulRowByKey.get(fact.conceptKey) ?? null;
const standardizedMatches = standardizedRowsBySource.get(fact.conceptKey) ?? [];
for (const dimension of fact.dimensions) {
const faithfulDimensionRow: DimensionBreakdownRow = {
rowKey: fact.conceptKey,
concept: fact.qname,
sourceRowKey: fact.conceptKey,
sourceLabel: faithfulRow?.label ?? null,
periodId: period.id,
axis: dimension.axis,
member: dimension.member,
value: fact.value,
unit: fact.unit
};
pushRow(fact.conceptKey, faithfulDimensionRow);
for (const standardizedRow of standardizedMatches) {
pushRow(standardizedRow.key, {
...faithfulDimensionRow,
rowKey: standardizedRow.key
});
}
}
}
return map.size > 0 ? Object.fromEntries(map.entries()) : null;
}
function latestMetrics(snapshots: FilingTaxonomySnapshotRecord[]) {
for (const snapshot of snapshots) {
if (snapshot.derived_metrics) {
return {
taxonomy: snapshot.derived_metrics,
validation: snapshot.validation_result
};
}
}
return {
taxonomy: null,
validation: null
};
}
export function defaultFinancialSyncLimit(window: FinancialHistoryWindow) {
return window === 'all' ? 120 : 60;
}
export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxonomyInput): Promise<CompanyFinancialStatementsResponse> {
const ticker = safeTicker(input.ticker);
const snapshotResult = await listFilingTaxonomySnapshotsByTicker({
ticker,
window: input.window,
limit: input.limit,
cursor: input.cursor
});
const statuses = await countFilingTaxonomySnapshotStatuses(ticker);
const filings = await listFilingsRecords({
ticker,
limit: input.window === 'all' ? 250 : 120
});
const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type));
const selection = selectPrimaryPeriods(snapshotResult.snapshots, input.statement);
const periods = selection.periods;
const faithfulRows = buildRows(snapshotResult.snapshots, input.statement, selection.selectedPeriodIds);
const standardizedRows = buildStandardizedRows(faithfulRows, input.statement, periods);
const factsResult = input.includeFacts
? await listTaxonomyFactsByTicker({
ticker,
window: input.window,
statement: input.statement,
cursor: input.factsCursor,
limit: input.factsLimit
})
: { facts: [], nextCursor: null };
const dimensionFacts = input.includeDimensions
? await listTaxonomyFactsByTicker({
ticker,
window: input.window,
statement: input.statement,
limit: 1200
})
: { facts: [], nextCursor: null };
const latestFiling = filings[0] ?? null;
const metrics = latestMetrics(snapshotResult.snapshots);
const dimensionBreakdown = input.includeDimensions
? buildDimensionBreakdown(dimensionFacts.facts, periods, faithfulRows, standardizedRows)
: null;
const dimensionsCount = input.includeDimensions
? dimensionFacts.facts.reduce((total, fact) => total + fact.dimensions.length, 0)
: 0;
const factsCoverage = input.includeFacts
? factsResult.facts.length
: snapshotResult.snapshots.reduce((total, snapshot) => total + snapshot.facts_count, 0);
return {
company: {
ticker,
companyName: latestFiling?.company_name ?? ticker,
cik: latestFiling?.cik ?? null
},
statement: input.statement,
window: input.window,
defaultSurface: 'standardized',
periods,
surfaces: {
faithful: {
kind: 'faithful',
rows: faithfulRows
},
standardized: {
kind: 'standardized',
rows: standardizedRows
}
},
nextCursor: snapshotResult.nextCursor,
facts: input.includeFacts
? {
rows: factsResult.facts,
nextCursor: factsResult.nextCursor
}
: null,
coverage: {
filings: periods.length,
rows: faithfulRows.length,
dimensions: dimensionsCount,
facts: factsCoverage
},
dataSourceStatus: {
enabled: input.v3Enabled,
hydratedFilings: statuses.ready,
partialFilings: statuses.partial,
failedFilings: statuses.failed,
pendingFilings: Math.max(0, financialFilings.length - statuses.ready - statuses.partial - statuses.failed),
queuedSync: input.queuedSync
},
metrics,
dimensionBreakdown
};
}
export const __financialTaxonomyInternals = {
buildPeriods,
buildRows,
buildStandardizedRows,
buildDimensionBreakdown,
isInstantPeriod,
matchesDefinition,
periodDurationDays,
selectPrimaryPeriods
};