Expand financials surfaces with ratios, KPIs, and cadence support
- Add bundled financial modeling pipeline (ratios, KPI dimensions/notes, trend series, standardization) - Introduce company financial bundles storage (Drizzle migration + repo wiring) - Refactor financials page/API/query flow to use surfaceKind + cadence and new response shapes
This commit is contained in:
450
lib/server/financials/standardize.ts
Normal file
450
lib/server/financials/standardize.ts
Normal file
@@ -0,0 +1,450 @@
|
||||
import type {
|
||||
DerivedFinancialRow,
|
||||
DimensionBreakdownRow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementPeriod,
|
||||
FinancialUnit,
|
||||
StandardizedFinancialRow,
|
||||
TaxonomyFactRow,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
import {
|
||||
CANONICAL_ROW_DEFINITIONS,
|
||||
type CanonicalRowDefinition
|
||||
} from '@/lib/server/financials/canonical-definitions';
|
||||
|
||||
function normalizeToken(value: string) {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
function valueOrNull(values: Record<string, number | null>, periodId: string) {
|
||||
return periodId in values ? values[periodId] : null;
|
||||
}
|
||||
|
||||
function sumValues(values: Array<number | null>) {
|
||||
if (values.some((value) => value === null)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return values.reduce<number>((sum, value) => sum + (value ?? 0), 0);
|
||||
}
|
||||
|
||||
function subtractValues(left: number | null, right: number | null) {
|
||||
if (left === null || right === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return left - right;
|
||||
}
|
||||
|
||||
function divideValues(left: number | null, right: number | null) {
|
||||
if (left === null || right === null || right === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return left / right;
|
||||
}
|
||||
|
||||
function matchesDefinition(row: TaxonomyStatementRow, definition: CanonicalRowDefinition) {
|
||||
const rowLocalName = normalizeToken(row.localName);
|
||||
if (definition.localNames?.some((localName) => normalizeToken(localName) === rowLocalName)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const label = normalizeToken(row.label);
|
||||
return definition.labelIncludes?.some((token) => label.includes(normalizeToken(token))) ?? false;
|
||||
}
|
||||
|
||||
function matchesDefinitionFact(fact: TaxonomyFactRow, definition: CanonicalRowDefinition) {
|
||||
const localName = normalizeToken(fact.localName);
|
||||
return definition.localNames?.some((entry) => normalizeToken(entry) === localName) ?? false;
|
||||
}
|
||||
|
||||
function inferUnit(rawUnit: string | null, fallback: FinancialUnit) {
|
||||
const normalized = (rawUnit ?? '').toLowerCase();
|
||||
if (!normalized) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
if (normalized.includes('usd') || normalized.includes('iso4217')) {
|
||||
return 'currency';
|
||||
}
|
||||
|
||||
if (normalized.includes('shares')) {
|
||||
return 'shares';
|
||||
}
|
||||
|
||||
if (normalized.includes('pure') || normalized.includes('percent')) {
|
||||
return fallback === 'percent' ? 'percent' : 'ratio';
|
||||
}
|
||||
|
||||
return fallback;
|
||||
}
|
||||
|
||||
export function factMatchesPeriod(fact: TaxonomyFactRow, period: FinancialStatementPeriod) {
|
||||
if (period.periodStart) {
|
||||
return fact.periodStart === period.periodStart && fact.periodEnd === period.periodEnd;
|
||||
}
|
||||
|
||||
return (fact.periodInstant ?? fact.periodEnd) === period.periodEnd;
|
||||
}
|
||||
|
||||
function buildCanonicalRow(
|
||||
definition: CanonicalRowDefinition,
|
||||
matches: TaxonomyStatementRow[],
|
||||
facts: TaxonomyFactRow[],
|
||||
periods: FinancialStatementPeriod[]
|
||||
) {
|
||||
const sortedMatches = [...matches].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
const matchedFacts = facts.filter((fact) => matchesDefinitionFact(fact, definition) && fact.isDimensionless);
|
||||
|
||||
const sourceConcepts = new Set<string>();
|
||||
const sourceRowKeys = new Set<string>();
|
||||
const sourceFactIds = new Set<number>();
|
||||
for (const row of sortedMatches) {
|
||||
sourceConcepts.add(row.qname);
|
||||
sourceRowKeys.add(row.key);
|
||||
for (const factId of row.sourceFactIds) {
|
||||
sourceFactIds.add(factId);
|
||||
}
|
||||
}
|
||||
|
||||
const values: Record<string, number | null> = {};
|
||||
const resolvedSourceRowKeys: Record<string, string | null> = {};
|
||||
let unit = definition.unit;
|
||||
|
||||
for (const period of periods) {
|
||||
const directMatch = sortedMatches.find((row) => period.id in row.values);
|
||||
if (directMatch) {
|
||||
values[period.id] = directMatch.values[period.id] ?? null;
|
||||
unit = inferUnit(directMatch.units[period.id] ?? null, definition.unit);
|
||||
resolvedSourceRowKeys[period.id] = directMatch.key;
|
||||
continue;
|
||||
}
|
||||
|
||||
const factMatch = matchedFacts.find((fact) => factMatchesPeriod(fact, period));
|
||||
values[period.id] = factMatch?.value ?? null;
|
||||
unit = inferUnit(factMatch?.unit ?? null, definition.unit);
|
||||
resolvedSourceRowKeys[period.id] = factMatch?.conceptKey ?? null;
|
||||
|
||||
if (factMatch) {
|
||||
sourceConcepts.add(factMatch.qname);
|
||||
sourceRowKeys.add(factMatch.conceptKey);
|
||||
sourceFactIds.add(factMatch.id);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
key: definition.key,
|
||||
label: definition.label,
|
||||
category: definition.category,
|
||||
order: definition.order,
|
||||
unit,
|
||||
values,
|
||||
sourceConcepts: [...sourceConcepts].sort((left, right) => left.localeCompare(right)),
|
||||
sourceRowKeys: [...sourceRowKeys].sort((left, right) => left.localeCompare(right)),
|
||||
sourceFactIds: [...sourceFactIds].sort((left, right) => left - right),
|
||||
formulaKey: null,
|
||||
hasDimensions: sortedMatches.some((row) => row.hasDimensions),
|
||||
resolvedSourceRowKeys
|
||||
} satisfies StandardizedFinancialRow;
|
||||
}
|
||||
|
||||
type FormulaDefinition = {
|
||||
key: string;
|
||||
formulaKey: string;
|
||||
compute: (rowsByKey: Map<string, StandardizedFinancialRow>, periodId: string) => number | null;
|
||||
};
|
||||
|
||||
const FORMULAS: Record<Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>, FormulaDefinition[]> = {
|
||||
income: [
|
||||
{
|
||||
key: 'gross_profit',
|
||||
formulaKey: 'gross_profit',
|
||||
compute: (rowsByKey, periodId) => subtractValues(
|
||||
valueOrNull(rowsByKey.get('revenue')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('cost_of_revenue')?.values ?? {}, periodId)
|
||||
)
|
||||
},
|
||||
{
|
||||
key: 'gross_margin',
|
||||
formulaKey: 'gross_margin',
|
||||
compute: (rowsByKey, periodId) => divideValues(
|
||||
valueOrNull(rowsByKey.get('gross_profit')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('revenue')?.values ?? {}, periodId)
|
||||
)
|
||||
},
|
||||
{
|
||||
key: 'operating_margin',
|
||||
formulaKey: 'operating_margin',
|
||||
compute: (rowsByKey, periodId) => divideValues(
|
||||
valueOrNull(rowsByKey.get('operating_income')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('revenue')?.values ?? {}, periodId)
|
||||
)
|
||||
},
|
||||
{
|
||||
key: 'effective_tax_rate',
|
||||
formulaKey: 'effective_tax_rate',
|
||||
compute: (rowsByKey, periodId) => divideValues(
|
||||
valueOrNull(rowsByKey.get('income_tax_expense')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('pretax_income')?.values ?? {}, periodId)
|
||||
)
|
||||
},
|
||||
{
|
||||
key: 'ebitda',
|
||||
formulaKey: 'ebitda',
|
||||
compute: (rowsByKey, periodId) => sumValues([
|
||||
valueOrNull(rowsByKey.get('operating_income')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('depreciation_and_amortization')?.values ?? {}, periodId)
|
||||
])
|
||||
}
|
||||
],
|
||||
balance: [
|
||||
{
|
||||
key: 'total_debt',
|
||||
formulaKey: 'total_debt',
|
||||
compute: (rowsByKey, periodId) => sumValues([
|
||||
valueOrNull(rowsByKey.get('long_term_debt')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('current_debt')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('lease_liabilities')?.values ?? {}, periodId)
|
||||
])
|
||||
},
|
||||
{
|
||||
key: 'net_cash_position',
|
||||
formulaKey: 'net_cash_position',
|
||||
compute: (rowsByKey, periodId) => subtractValues(
|
||||
sumValues([
|
||||
valueOrNull(rowsByKey.get('cash_and_equivalents')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('short_term_investments')?.values ?? {}, periodId)
|
||||
]),
|
||||
valueOrNull(rowsByKey.get('total_debt')?.values ?? {}, periodId)
|
||||
)
|
||||
}
|
||||
],
|
||||
cash_flow: [
|
||||
{
|
||||
key: 'free_cash_flow',
|
||||
formulaKey: 'free_cash_flow',
|
||||
compute: (rowsByKey, periodId) => subtractValues(
|
||||
valueOrNull(rowsByKey.get('operating_cash_flow')?.values ?? {}, periodId),
|
||||
valueOrNull(rowsByKey.get('capital_expenditures')?.values ?? {}, periodId)
|
||||
)
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
function applyFormulas(
|
||||
rowsByKey: Map<string, StandardizedFinancialRow>,
|
||||
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>,
|
||||
periods: FinancialStatementPeriod[]
|
||||
) {
|
||||
for (const formula of FORMULAS[statement]) {
|
||||
const target = rowsByKey.get(formula.key);
|
||||
if (!target) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let usedFormula = target.formulaKey !== null;
|
||||
for (const period of periods) {
|
||||
if (target.values[period.id] !== null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const computed = formula.compute(rowsByKey, period.id);
|
||||
if (computed === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
target.values[period.id] = computed;
|
||||
target.resolvedSourceRowKeys[period.id] = null;
|
||||
usedFormula = true;
|
||||
}
|
||||
|
||||
if (usedFormula) {
|
||||
target.formulaKey = formula.formulaKey;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function buildStandardizedRows(input: {
|
||||
rows: TaxonomyStatementRow[];
|
||||
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>;
|
||||
periods: FinancialStatementPeriod[];
|
||||
facts: TaxonomyFactRow[];
|
||||
}) {
|
||||
const definitions = CANONICAL_ROW_DEFINITIONS[input.statement];
|
||||
const rowsByKey = new Map<string, StandardizedFinancialRow>();
|
||||
const matchedRowKeys = new Set<string>();
|
||||
|
||||
for (const definition of definitions) {
|
||||
const matches = input.rows.filter((row) => matchesDefinition(row, definition));
|
||||
for (const row of matches) {
|
||||
matchedRowKeys.add(row.key);
|
||||
}
|
||||
|
||||
const canonical = buildCanonicalRow(definition, matches, input.facts, input.periods);
|
||||
const hasAnyValue = Object.values(canonical.values).some((value) => value !== null);
|
||||
if (hasAnyValue || definition.key.startsWith('gross_') || definition.key === 'operating_margin' || definition.key === 'effective_tax_rate' || definition.key === 'ebitda' || definition.key === 'total_debt' || definition.key === 'net_cash_position' || definition.key === 'free_cash_flow') {
|
||||
rowsByKey.set(definition.key, canonical);
|
||||
}
|
||||
}
|
||||
|
||||
applyFormulas(rowsByKey, input.statement, input.periods);
|
||||
|
||||
const unmatchedRows = input.rows
|
||||
.filter((row) => !matchedRowKeys.has(row.key))
|
||||
.map((row) => ({
|
||||
key: `other:${row.key}`,
|
||||
label: row.label,
|
||||
category: 'other',
|
||||
order: 10_000 + row.order,
|
||||
unit: inferUnit(Object.values(row.units)[0] ?? null, 'currency'),
|
||||
values: { ...row.values },
|
||||
sourceConcepts: [row.qname],
|
||||
sourceRowKeys: [row.key],
|
||||
sourceFactIds: [...row.sourceFactIds],
|
||||
formulaKey: null,
|
||||
hasDimensions: row.hasDimensions,
|
||||
resolvedSourceRowKeys: Object.fromEntries(
|
||||
input.periods.map((period) => [period.id, period.id in row.values ? row.key : null])
|
||||
)
|
||||
} satisfies StandardizedFinancialRow));
|
||||
|
||||
return [...rowsByKey.values(), ...unmatchedRows].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
}
|
||||
|
||||
export function buildDimensionBreakdown(
|
||||
facts: TaxonomyFactRow[],
|
||||
periods: FinancialStatementPeriod[],
|
||||
faithfulRows: TaxonomyStatementRow[],
|
||||
standardizedRows: StandardizedFinancialRow[]
|
||||
) {
|
||||
const periodByFilingId = new Map<number, FinancialStatementPeriod>();
|
||||
for (const period of periods) {
|
||||
periodByFilingId.set(period.filingId, period);
|
||||
}
|
||||
|
||||
const faithfulRowByKey = new Map(faithfulRows.map((row) => [row.key, row]));
|
||||
const standardizedRowsBySource = new Map<string, StandardizedFinancialRow[]>();
|
||||
for (const row of standardizedRows) {
|
||||
for (const sourceRowKey of row.sourceRowKeys) {
|
||||
const existing = standardizedRowsBySource.get(sourceRowKey);
|
||||
if (existing) {
|
||||
existing.push(row);
|
||||
} else {
|
||||
standardizedRowsBySource.set(sourceRowKey, [row]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const map = new Map<string, DimensionBreakdownRow[]>();
|
||||
const pushRow = (key: string, row: DimensionBreakdownRow) => {
|
||||
const existing = map.get(key);
|
||||
if (existing) {
|
||||
existing.push(row);
|
||||
} else {
|
||||
map.set(key, [row]);
|
||||
}
|
||||
};
|
||||
|
||||
for (const fact of facts) {
|
||||
if (fact.dimensions.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const period = periodByFilingId.get(fact.filingId) ?? null;
|
||||
if (!period || !factMatchesPeriod(fact, period)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const faithfulRow = faithfulRowByKey.get(fact.conceptKey) ?? null;
|
||||
const standardizedMatches = standardizedRowsBySource.get(fact.conceptKey) ?? [];
|
||||
|
||||
for (const dimension of fact.dimensions) {
|
||||
const faithfulDimensionRow: DimensionBreakdownRow = {
|
||||
rowKey: fact.conceptKey,
|
||||
concept: fact.qname,
|
||||
sourceRowKey: fact.conceptKey,
|
||||
sourceLabel: faithfulRow?.label ?? null,
|
||||
periodId: period.id,
|
||||
axis: dimension.axis,
|
||||
member: dimension.member,
|
||||
value: fact.value,
|
||||
unit: fact.unit,
|
||||
provenanceType: 'taxonomy'
|
||||
};
|
||||
|
||||
pushRow(fact.conceptKey, faithfulDimensionRow);
|
||||
for (const standardizedRow of standardizedMatches) {
|
||||
pushRow(standardizedRow.key, {
|
||||
...faithfulDimensionRow,
|
||||
rowKey: standardizedRow.key
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map.size > 0 ? Object.fromEntries(map.entries()) : null;
|
||||
}
|
||||
|
||||
export function cloneStandardizedRows(rows: StandardizedFinancialRow[]) {
|
||||
return rows.map((row) => ({
|
||||
...row,
|
||||
values: { ...row.values },
|
||||
sourceConcepts: [...row.sourceConcepts],
|
||||
sourceRowKeys: [...row.sourceRowKeys],
|
||||
sourceFactIds: [...row.sourceFactIds],
|
||||
resolvedSourceRowKeys: { ...row.resolvedSourceRowKeys }
|
||||
}));
|
||||
}
|
||||
|
||||
export function buildLtmStandardizedRows(
|
||||
quarterlyRows: StandardizedFinancialRow[],
|
||||
quarterlyPeriods: FinancialStatementPeriod[],
|
||||
ltmPeriods: FinancialStatementPeriod[],
|
||||
statement: Extract<FinancialStatementKind, 'income' | 'balance' | 'cash_flow'>
|
||||
) {
|
||||
const sortedQuarterlyPeriods = [...quarterlyPeriods].sort((left, right) => {
|
||||
return Date.parse(left.periodEnd ?? left.filingDate) - Date.parse(right.periodEnd ?? right.filingDate);
|
||||
});
|
||||
const result = cloneStandardizedRows(quarterlyRows).map((row) => ({
|
||||
...row,
|
||||
values: {} as Record<string, number | null>,
|
||||
resolvedSourceRowKeys: {} as Record<string, string | null>
|
||||
}));
|
||||
|
||||
for (const row of result) {
|
||||
const source = quarterlyRows.find((entry) => entry.key === row.key);
|
||||
if (!source) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const ltmPeriod of ltmPeriods) {
|
||||
const anchorIndex = sortedQuarterlyPeriods.findIndex((period) => `ltm:${period.id}` === ltmPeriod.id);
|
||||
if (anchorIndex < 3) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const slice = sortedQuarterlyPeriods.slice(anchorIndex - 3, anchorIndex + 1);
|
||||
const sourceValues = slice.map((period) => source.values[period.id] ?? null);
|
||||
row.values[ltmPeriod.id] = statement === 'balance'
|
||||
? sourceValues[sourceValues.length - 1] ?? null
|
||||
: sumValues(sourceValues);
|
||||
row.resolvedSourceRowKeys[ltmPeriod.id] = source.formulaKey ? null : source.resolvedSourceRowKeys[slice[slice.length - 1]?.id ?? ''] ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Reference in New Issue
Block a user