Files
Neon-Desk/lib/server/financials/kpi-notes.ts
francy51 db01f207a5 Expand financials surfaces with ratios, KPIs, and cadence support
- Add bundled financial modeling pipeline (ratios, KPI dimensions/notes, trend series, standardization)
- Introduce company financial bundles storage (Drizzle migration + repo wiring)
- Refactor financials page/API/query flow to use surfaceKind + cadence and new response shapes
2026-03-07 15:16:35 -05:00

132 lines
3.5 KiB
TypeScript

import { load } from 'cheerio';
import type {
FinancialStatementPeriod,
StructuredKpiRow
} from '@/lib/types';
import { resolvePrimaryFilingUrl } from '@/lib/server/sec';
import type { KpiDefinition } from '@/lib/server/financials/kpi-registry';
type FilingDocumentRef = {
filingId: number;
cik: string;
accessionNumber: string;
filingUrl: string | null;
primaryDocument: string | null;
};
function parseNumericCell(value: string) {
const normalized = value.replace(/[$,%]/g, '').replace(/[(),]/g, '').trim();
if (!normalized) {
return null;
}
const numeric = Number(normalized);
return Number.isFinite(numeric) ? numeric : null;
}
function buildRowKey(definition: KpiDefinition, label: string) {
const normalized = label.trim().toLowerCase().replace(/[^a-z0-9]+/g, '_').replace(/^_+|_+$/g, '');
return normalized ? `${definition.key}__note__${normalized}` : definition.key;
}
async function fetchHtml(ref: FilingDocumentRef) {
const url = resolvePrimaryFilingUrl({
filingUrl: ref.filingUrl,
cik: ref.cik,
accessionNumber: ref.accessionNumber,
primaryDocument: ref.primaryDocument
});
if (!url) {
return null;
}
try {
const response = await fetch(url, {
headers: {
'User-Agent': process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>'
},
cache: 'no-store'
});
if (!response.ok) {
return null;
}
return await response.text();
} catch {
return null;
}
}
export async function extractStructuredKpisFromNotes(input: {
ticker: string;
periods: FinancialStatementPeriod[];
filings: FilingDocumentRef[];
definitions: KpiDefinition[];
}) {
const rows = new Map<string, StructuredKpiRow>();
for (const definition of input.definitions) {
if (!definition.noteLabelIncludes || definition.noteLabelIncludes.length === 0) {
continue;
}
for (const period of input.periods) {
const filing = input.filings.find((entry) => entry.filingId === period.filingId);
if (!filing) {
continue;
}
const html = await fetchHtml(filing);
if (!html) {
continue;
}
const $ = load(html);
$('table tr').each((_index, element) => {
const cells = $(element).find('th,td').toArray().map((node) => $(node).text().replace(/\s+/g, ' ').trim()).filter(Boolean);
if (cells.length < 2) {
return;
}
const label = cells[0] ?? '';
const normalizedLabel = label.toLowerCase();
if (!definition.noteLabelIncludes?.some((token) => normalizedLabel.includes(token.toLowerCase()))) {
return;
}
const numericCell = cells.slice(1).map(parseNumericCell).find((value) => value !== null) ?? null;
if (numericCell === null) {
return;
}
const key = buildRowKey(definition, label === definition.label ? '' : label);
const existing = rows.get(key);
if (existing) {
existing.values[period.id] = numericCell;
return;
}
rows.set(key, {
key,
label: label || definition.label,
category: definition.category,
unit: definition.unit,
order: 500,
segment: null,
axis: null,
member: null,
values: { [period.id]: numericCell },
sourceConcepts: [],
sourceFactIds: [],
provenanceType: 'structured_note',
hasDimensions: false
});
});
}
}
return [...rows.values()].sort((left, right) => left.label.localeCompare(right.label));
}