Implement dual-surface financials and db bootstrap
This commit is contained in:
@@ -4,6 +4,7 @@ import type {
|
||||
FinancialHistoryWindow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementPeriod,
|
||||
StandardizedStatementRow,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
import { listFilingsRecords } from '@/lib/server/repos/filings';
|
||||
@@ -28,6 +29,19 @@ type GetCompanyFinancialTaxonomyInput = {
|
||||
queuedSync: boolean;
|
||||
};
|
||||
|
||||
type CanonicalRowDefinition = {
|
||||
key: string;
|
||||
label: string;
|
||||
category: string;
|
||||
order: number;
|
||||
localNames?: readonly string[];
|
||||
labelIncludes?: readonly string[];
|
||||
formula?: (
|
||||
rowsByKey: Map<string, StandardizedStatementRow>,
|
||||
periodIds: string[]
|
||||
) => Pick<StandardizedStatementRow, 'values' | 'resolvedSourceRowKeys'> | null;
|
||||
};
|
||||
|
||||
function safeTicker(input: string) {
|
||||
return input.trim().toUpperCase();
|
||||
}
|
||||
@@ -215,16 +229,419 @@ function buildRows(
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeToken(value: string) {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
function sumValues(left: number | null, right: number | null) {
|
||||
if (left === null || right === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return left + right;
|
||||
}
|
||||
|
||||
function subtractValues(left: number | null, right: number | null) {
|
||||
if (left === null || right === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return left - right;
|
||||
}
|
||||
|
||||
const STANDARDIZED_ROW_DEFINITIONS: Record<FinancialStatementKind, CanonicalRowDefinition[]> = {
|
||||
income: [
|
||||
{
|
||||
key: 'revenue',
|
||||
label: 'Revenue',
|
||||
category: 'revenue',
|
||||
order: 10,
|
||||
localNames: [
|
||||
'RevenueFromContractWithCustomerExcludingAssessedTax',
|
||||
'Revenues',
|
||||
'SalesRevenueNet',
|
||||
'TotalRevenuesAndOtherIncome'
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'cost-of-revenue',
|
||||
label: 'Cost of Revenue',
|
||||
category: 'expense',
|
||||
order: 20,
|
||||
localNames: [
|
||||
'CostOfRevenue',
|
||||
'CostOfGoodsSold',
|
||||
'CostOfSales',
|
||||
'CostOfProductsSold',
|
||||
'CostOfServices'
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'gross-profit',
|
||||
label: 'Gross Profit',
|
||||
category: 'profit',
|
||||
order: 30,
|
||||
localNames: ['GrossProfit'],
|
||||
formula: (rowsByKey, periodIds) => {
|
||||
const revenue = rowsByKey.get('revenue');
|
||||
const cogs = rowsByKey.get('cost-of-revenue');
|
||||
if (!revenue || !cogs) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
values: Object.fromEntries(periodIds.map((periodId) => [
|
||||
periodId,
|
||||
subtractValues(revenue.values[periodId] ?? null, cogs.values[periodId] ?? null)
|
||||
])),
|
||||
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
|
||||
};
|
||||
}
|
||||
},
|
||||
{
|
||||
key: 'research-and-development',
|
||||
label: 'Research & Development',
|
||||
category: 'opex',
|
||||
order: 40,
|
||||
localNames: ['ResearchAndDevelopmentExpense']
|
||||
},
|
||||
{
|
||||
key: 'selling-general-and-administrative',
|
||||
label: 'Selling, General & Administrative',
|
||||
category: 'opex',
|
||||
order: 50,
|
||||
localNames: [
|
||||
'SellingGeneralAndAdministrativeExpense',
|
||||
'SellingAndMarketingExpense',
|
||||
'GeneralAndAdministrativeExpense'
|
||||
],
|
||||
labelIncludes: ['selling, general', 'selling general', 'general and administrative']
|
||||
},
|
||||
{
|
||||
key: 'operating-income',
|
||||
label: 'Operating Income',
|
||||
category: 'profit',
|
||||
order: 60,
|
||||
localNames: ['OperatingIncomeLoss', 'IncomeLossFromOperations']
|
||||
},
|
||||
{
|
||||
key: 'net-income',
|
||||
label: 'Net Income',
|
||||
category: 'profit',
|
||||
order: 70,
|
||||
localNames: ['NetIncomeLoss', 'ProfitLoss']
|
||||
}
|
||||
],
|
||||
balance: [
|
||||
{
|
||||
key: 'cash-and-equivalents',
|
||||
label: 'Cash & Equivalents',
|
||||
category: 'asset',
|
||||
order: 10,
|
||||
localNames: [
|
||||
'CashAndCashEquivalentsAtCarryingValue',
|
||||
'CashCashEquivalentsAndShortTermInvestments',
|
||||
'CashAndShortTermInvestments'
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'accounts-receivable',
|
||||
label: 'Accounts Receivable',
|
||||
category: 'asset',
|
||||
order: 20,
|
||||
localNames: [
|
||||
'AccountsReceivableNetCurrent',
|
||||
'ReceivablesNetCurrent'
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'inventory',
|
||||
label: 'Inventory',
|
||||
category: 'asset',
|
||||
order: 30,
|
||||
localNames: ['InventoryNet']
|
||||
},
|
||||
{
|
||||
key: 'total-assets',
|
||||
label: 'Total Assets',
|
||||
category: 'asset',
|
||||
order: 40,
|
||||
localNames: ['Assets']
|
||||
},
|
||||
{
|
||||
key: 'current-liabilities',
|
||||
label: 'Current Liabilities',
|
||||
category: 'liability',
|
||||
order: 50,
|
||||
localNames: ['LiabilitiesCurrent']
|
||||
},
|
||||
{
|
||||
key: 'long-term-debt',
|
||||
label: 'Long-Term Debt',
|
||||
category: 'liability',
|
||||
order: 60,
|
||||
localNames: [
|
||||
'LongTermDebtNoncurrent',
|
||||
'LongTermDebt',
|
||||
'DebtNoncurrent',
|
||||
'LongTermDebtAndCapitalLeaseObligations'
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'current-debt',
|
||||
label: 'Current Debt',
|
||||
category: 'liability',
|
||||
order: 70,
|
||||
localNames: ['DebtCurrent', 'ShortTermBorrowings', 'LongTermDebtCurrent']
|
||||
},
|
||||
{
|
||||
key: 'total-debt',
|
||||
label: 'Total Debt',
|
||||
category: 'liability',
|
||||
order: 80,
|
||||
localNames: ['DebtAndFinanceLeaseLiabilities', 'Debt'],
|
||||
formula: (rowsByKey, periodIds) => {
|
||||
const longTermDebt = rowsByKey.get('long-term-debt');
|
||||
const currentDebt = rowsByKey.get('current-debt');
|
||||
if (!longTermDebt || !currentDebt) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
values: Object.fromEntries(periodIds.map((periodId) => [
|
||||
periodId,
|
||||
sumValues(longTermDebt.values[periodId] ?? null, currentDebt.values[periodId] ?? null)
|
||||
])),
|
||||
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
|
||||
};
|
||||
}
|
||||
},
|
||||
{
|
||||
key: 'total-equity',
|
||||
label: 'Total Equity',
|
||||
category: 'equity',
|
||||
order: 90,
|
||||
localNames: [
|
||||
'StockholdersEquity',
|
||||
'StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest',
|
||||
'PartnersCapital'
|
||||
]
|
||||
}
|
||||
],
|
||||
cash_flow: [
|
||||
{
|
||||
key: 'operating-cash-flow',
|
||||
label: 'Operating Cash Flow',
|
||||
category: 'cash-flow',
|
||||
order: 10,
|
||||
localNames: [
|
||||
'NetCashProvidedByUsedInOperatingActivities',
|
||||
'NetCashProvidedByUsedInOperatingActivitiesContinuingOperations'
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'capital-expenditures',
|
||||
label: 'Capital Expenditures',
|
||||
category: 'cash-flow',
|
||||
order: 20,
|
||||
localNames: ['PaymentsToAcquirePropertyPlantAndEquipment', 'CapitalExpendituresIncurredButNotYetPaid']
|
||||
},
|
||||
{
|
||||
key: 'free-cash-flow',
|
||||
label: 'Free Cash Flow',
|
||||
category: 'cash-flow',
|
||||
order: 30,
|
||||
formula: (rowsByKey, periodIds) => {
|
||||
const operatingCashFlow = rowsByKey.get('operating-cash-flow');
|
||||
const capex = rowsByKey.get('capital-expenditures');
|
||||
if (!operatingCashFlow || !capex) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
values: Object.fromEntries(periodIds.map((periodId) => [
|
||||
periodId,
|
||||
subtractValues(operatingCashFlow.values[periodId] ?? null, capex.values[periodId] ?? null)
|
||||
])),
|
||||
resolvedSourceRowKeys: Object.fromEntries(periodIds.map((periodId) => [periodId, null]))
|
||||
};
|
||||
}
|
||||
}
|
||||
],
|
||||
equity: [
|
||||
{
|
||||
key: 'total-equity',
|
||||
label: 'Total Equity',
|
||||
category: 'equity',
|
||||
order: 10,
|
||||
localNames: [
|
||||
'StockholdersEquity',
|
||||
'StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest',
|
||||
'PartnersCapital'
|
||||
]
|
||||
}
|
||||
],
|
||||
comprehensive_income: [
|
||||
{
|
||||
key: 'comprehensive-income',
|
||||
label: 'Comprehensive Income',
|
||||
category: 'profit',
|
||||
order: 10,
|
||||
localNames: ['ComprehensiveIncomeNetOfTax', 'ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest']
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
function matchesDefinition(row: TaxonomyStatementRow, definition: CanonicalRowDefinition) {
|
||||
const rowLocalName = normalizeToken(row.localName);
|
||||
if (definition.localNames?.some((localName) => normalizeToken(localName) === rowLocalName)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const label = normalizeToken(row.label);
|
||||
return definition.labelIncludes?.some((token) => label.includes(normalizeToken(token))) ?? false;
|
||||
}
|
||||
|
||||
function buildCanonicalRow(
|
||||
definition: CanonicalRowDefinition,
|
||||
matches: TaxonomyStatementRow[],
|
||||
periodIds: string[]
|
||||
) {
|
||||
const sortedMatches = [...matches].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
|
||||
const sourceConcepts = new Set<string>();
|
||||
const sourceRowKeys = new Set<string>();
|
||||
const sourceFactIds = new Set<number>();
|
||||
|
||||
for (const row of sortedMatches) {
|
||||
sourceConcepts.add(row.qname);
|
||||
sourceRowKeys.add(row.key);
|
||||
for (const factId of row.sourceFactIds) {
|
||||
sourceFactIds.add(factId);
|
||||
}
|
||||
}
|
||||
|
||||
const values: Record<string, number | null> = {};
|
||||
const resolvedSourceRowKeys: Record<string, string | null> = {};
|
||||
|
||||
for (const periodId of periodIds) {
|
||||
const match = sortedMatches.find((row) => periodId in row.values);
|
||||
values[periodId] = match?.values[periodId] ?? null;
|
||||
resolvedSourceRowKeys[periodId] = match?.key ?? null;
|
||||
}
|
||||
|
||||
return {
|
||||
key: definition.key,
|
||||
label: definition.label,
|
||||
category: definition.category,
|
||||
order: definition.order,
|
||||
values,
|
||||
hasDimensions: sortedMatches.some((row) => row.hasDimensions),
|
||||
sourceConcepts: [...sourceConcepts].sort((left, right) => left.localeCompare(right)),
|
||||
sourceRowKeys: [...sourceRowKeys].sort((left, right) => left.localeCompare(right)),
|
||||
sourceFactIds: [...sourceFactIds].sort((left, right) => left - right),
|
||||
resolvedSourceRowKeys
|
||||
} satisfies StandardizedStatementRow;
|
||||
}
|
||||
|
||||
function buildStandardizedRows(
|
||||
rows: TaxonomyStatementRow[],
|
||||
statement: FinancialStatementKind,
|
||||
periods: FinancialStatementPeriod[]
|
||||
) {
|
||||
const definitions = STANDARDIZED_ROW_DEFINITIONS[statement] ?? [];
|
||||
const periodIds = periods.map((period) => period.id);
|
||||
const rowsByKey = new Map<string, StandardizedStatementRow>();
|
||||
const matchedRowKeys = new Set<string>();
|
||||
|
||||
for (const definition of definitions) {
|
||||
const matches = rows.filter((row) => matchesDefinition(row, definition));
|
||||
if (matches.length === 0 && !definition.formula) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const row of matches) {
|
||||
matchedRowKeys.add(row.key);
|
||||
}
|
||||
|
||||
const canonicalRow = buildCanonicalRow(definition, matches, periodIds);
|
||||
rowsByKey.set(definition.key, canonicalRow);
|
||||
|
||||
const derived = definition.formula?.(rowsByKey, periodIds) ?? null;
|
||||
if (derived) {
|
||||
rowsByKey.set(definition.key, {
|
||||
...canonicalRow,
|
||||
values: derived.values,
|
||||
resolvedSourceRowKeys: derived.resolvedSourceRowKeys
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const unmatchedRows = rows
|
||||
.filter((row) => !matchedRowKeys.has(row.key))
|
||||
.map((row) => ({
|
||||
key: `other:${row.key}`,
|
||||
label: row.label,
|
||||
category: 'other',
|
||||
order: 10_000 + row.order,
|
||||
values: { ...row.values },
|
||||
hasDimensions: row.hasDimensions,
|
||||
sourceConcepts: [row.qname],
|
||||
sourceRowKeys: [row.key],
|
||||
sourceFactIds: [...row.sourceFactIds],
|
||||
resolvedSourceRowKeys: Object.fromEntries(
|
||||
periodIds.map((periodId) => [periodId, periodId in row.values ? row.key : null])
|
||||
)
|
||||
} satisfies StandardizedStatementRow));
|
||||
|
||||
return [...rowsByKey.values(), ...unmatchedRows].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
}
|
||||
|
||||
function buildDimensionBreakdown(
|
||||
facts: Awaited<ReturnType<typeof listTaxonomyFactsByTicker>>['facts'],
|
||||
periods: FinancialStatementPeriod[]
|
||||
periods: FinancialStatementPeriod[],
|
||||
faithfulRows: TaxonomyStatementRow[],
|
||||
standardizedRows: StandardizedStatementRow[]
|
||||
) {
|
||||
const periodByFilingId = new Map<number, FinancialStatementPeriod>();
|
||||
for (const period of periods) {
|
||||
periodByFilingId.set(period.filingId, period);
|
||||
}
|
||||
|
||||
const faithfulRowByKey = new Map(faithfulRows.map((row) => [row.key, row]));
|
||||
const standardizedRowsBySource = new Map<string, StandardizedStatementRow[]>();
|
||||
for (const row of standardizedRows) {
|
||||
for (const sourceRowKey of row.sourceRowKeys) {
|
||||
const existing = standardizedRowsBySource.get(sourceRowKey);
|
||||
if (existing) {
|
||||
existing.push(row);
|
||||
} else {
|
||||
standardizedRowsBySource.set(sourceRowKey, [row]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const map = new Map<string, DimensionBreakdownRow[]>();
|
||||
const pushRow = (key: string, row: DimensionBreakdownRow) => {
|
||||
const existing = map.get(key);
|
||||
if (existing) {
|
||||
existing.push(row);
|
||||
} else {
|
||||
map.set(key, [row]);
|
||||
}
|
||||
};
|
||||
|
||||
for (const fact of facts) {
|
||||
if (fact.dimensions.length === 0) {
|
||||
@@ -244,10 +661,15 @@ function buildDimensionBreakdown(
|
||||
continue;
|
||||
}
|
||||
|
||||
const faithfulRow = faithfulRowByKey.get(fact.conceptKey) ?? null;
|
||||
const standardizedMatches = standardizedRowsBySource.get(fact.conceptKey) ?? [];
|
||||
|
||||
for (const dimension of fact.dimensions) {
|
||||
const row: DimensionBreakdownRow = {
|
||||
const faithfulDimensionRow: DimensionBreakdownRow = {
|
||||
rowKey: fact.conceptKey,
|
||||
concept: fact.qname,
|
||||
sourceRowKey: fact.conceptKey,
|
||||
sourceLabel: faithfulRow?.label ?? null,
|
||||
periodId: period.id,
|
||||
axis: dimension.axis,
|
||||
member: dimension.member,
|
||||
@@ -255,11 +677,13 @@ function buildDimensionBreakdown(
|
||||
unit: fact.unit
|
||||
};
|
||||
|
||||
const existing = map.get(fact.conceptKey);
|
||||
if (existing) {
|
||||
existing.push(row);
|
||||
} else {
|
||||
map.set(fact.conceptKey, [row]);
|
||||
pushRow(fact.conceptKey, faithfulDimensionRow);
|
||||
|
||||
for (const standardizedRow of standardizedMatches) {
|
||||
pushRow(standardizedRow.key, {
|
||||
...faithfulDimensionRow,
|
||||
rowKey: standardizedRow.key
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -305,7 +729,8 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
|
||||
const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type));
|
||||
const selection = selectPrimaryPeriods(snapshotResult.snapshots, input.statement);
|
||||
const periods = selection.periods;
|
||||
const rows = buildRows(snapshotResult.snapshots, input.statement, selection.selectedPeriodIds);
|
||||
const faithfulRows = buildRows(snapshotResult.snapshots, input.statement, selection.selectedPeriodIds);
|
||||
const standardizedRows = buildStandardizedRows(faithfulRows, input.statement, periods);
|
||||
|
||||
const factsResult = input.includeFacts
|
||||
? await listTaxonomyFactsByTicker({
|
||||
@@ -329,11 +754,11 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
|
||||
const latestFiling = filings[0] ?? null;
|
||||
const metrics = latestMetrics(snapshotResult.snapshots);
|
||||
const dimensionBreakdown = input.includeDimensions
|
||||
? buildDimensionBreakdown(dimensionFacts.facts, periods)
|
||||
? buildDimensionBreakdown(dimensionFacts.facts, periods, faithfulRows, standardizedRows)
|
||||
: null;
|
||||
|
||||
const dimensionsCount = dimensionBreakdown
|
||||
? Object.values(dimensionBreakdown).reduce((total, entries) => total + entries.length, 0)
|
||||
const dimensionsCount = input.includeDimensions
|
||||
? dimensionFacts.facts.reduce((total, fact) => total + fact.dimensions.length, 0)
|
||||
: 0;
|
||||
|
||||
const factsCoverage = input.includeFacts
|
||||
@@ -348,8 +773,18 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
|
||||
},
|
||||
statement: input.statement,
|
||||
window: input.window,
|
||||
defaultSurface: 'standardized',
|
||||
periods,
|
||||
rows,
|
||||
surfaces: {
|
||||
faithful: {
|
||||
kind: 'faithful',
|
||||
rows: faithfulRows
|
||||
},
|
||||
standardized: {
|
||||
kind: 'standardized',
|
||||
rows: standardizedRows
|
||||
}
|
||||
},
|
||||
nextCursor: snapshotResult.nextCursor,
|
||||
facts: input.includeFacts
|
||||
? {
|
||||
@@ -359,7 +794,7 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
|
||||
: null,
|
||||
coverage: {
|
||||
filings: periods.length,
|
||||
rows: rows.length,
|
||||
rows: faithfulRows.length,
|
||||
dimensions: dimensionsCount,
|
||||
facts: factsCoverage
|
||||
},
|
||||
@@ -378,7 +813,11 @@ export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxo
|
||||
|
||||
export const __financialTaxonomyInternals = {
|
||||
buildPeriods,
|
||||
buildRows,
|
||||
buildStandardizedRows,
|
||||
buildDimensionBreakdown,
|
||||
isInstantPeriod,
|
||||
matchesDefinition,
|
||||
periodDurationDays,
|
||||
selectPrimaryPeriods
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user