- Add bundled financial modeling pipeline (ratios, KPI dimensions/notes, trend series, standardization) - Introduce company financial bundles storage (Drizzle migration + repo wiring) - Refactor financials page/API/query flow to use surfaceKind + cadence and new response shapes
132 lines
3.5 KiB
TypeScript
132 lines
3.5 KiB
TypeScript
import { load } from 'cheerio';
|
|
import type {
|
|
FinancialStatementPeriod,
|
|
StructuredKpiRow
|
|
} from '@/lib/types';
|
|
import { resolvePrimaryFilingUrl } from '@/lib/server/sec';
|
|
import type { KpiDefinition } from '@/lib/server/financials/kpi-registry';
|
|
|
|
type FilingDocumentRef = {
|
|
filingId: number;
|
|
cik: string;
|
|
accessionNumber: string;
|
|
filingUrl: string | null;
|
|
primaryDocument: string | null;
|
|
};
|
|
|
|
function parseNumericCell(value: string) {
|
|
const normalized = value.replace(/[$,%]/g, '').replace(/[(),]/g, '').trim();
|
|
if (!normalized) {
|
|
return null;
|
|
}
|
|
|
|
const numeric = Number(normalized);
|
|
return Number.isFinite(numeric) ? numeric : null;
|
|
}
|
|
|
|
function buildRowKey(definition: KpiDefinition, label: string) {
|
|
const normalized = label.trim().toLowerCase().replace(/[^a-z0-9]+/g, '_').replace(/^_+|_+$/g, '');
|
|
return normalized ? `${definition.key}__note__${normalized}` : definition.key;
|
|
}
|
|
|
|
async function fetchHtml(ref: FilingDocumentRef) {
|
|
const url = resolvePrimaryFilingUrl({
|
|
filingUrl: ref.filingUrl,
|
|
cik: ref.cik,
|
|
accessionNumber: ref.accessionNumber,
|
|
primaryDocument: ref.primaryDocument
|
|
});
|
|
|
|
if (!url) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
'User-Agent': process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>'
|
|
},
|
|
cache: 'no-store'
|
|
});
|
|
|
|
if (!response.ok) {
|
|
return null;
|
|
}
|
|
|
|
return await response.text();
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
export async function extractStructuredKpisFromNotes(input: {
|
|
ticker: string;
|
|
periods: FinancialStatementPeriod[];
|
|
filings: FilingDocumentRef[];
|
|
definitions: KpiDefinition[];
|
|
}) {
|
|
const rows = new Map<string, StructuredKpiRow>();
|
|
|
|
for (const definition of input.definitions) {
|
|
if (!definition.noteLabelIncludes || definition.noteLabelIncludes.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
for (const period of input.periods) {
|
|
const filing = input.filings.find((entry) => entry.filingId === period.filingId);
|
|
if (!filing) {
|
|
continue;
|
|
}
|
|
|
|
const html = await fetchHtml(filing);
|
|
if (!html) {
|
|
continue;
|
|
}
|
|
|
|
const $ = load(html);
|
|
$('table tr').each((_index, element) => {
|
|
const cells = $(element).find('th,td').toArray().map((node) => $(node).text().replace(/\s+/g, ' ').trim()).filter(Boolean);
|
|
if (cells.length < 2) {
|
|
return;
|
|
}
|
|
|
|
const label = cells[0] ?? '';
|
|
const normalizedLabel = label.toLowerCase();
|
|
if (!definition.noteLabelIncludes?.some((token) => normalizedLabel.includes(token.toLowerCase()))) {
|
|
return;
|
|
}
|
|
|
|
const numericCell = cells.slice(1).map(parseNumericCell).find((value) => value !== null) ?? null;
|
|
if (numericCell === null) {
|
|
return;
|
|
}
|
|
|
|
const key = buildRowKey(definition, label === definition.label ? '' : label);
|
|
const existing = rows.get(key);
|
|
if (existing) {
|
|
existing.values[period.id] = numericCell;
|
|
return;
|
|
}
|
|
|
|
rows.set(key, {
|
|
key,
|
|
label: label || definition.label,
|
|
category: definition.category,
|
|
unit: definition.unit,
|
|
order: 500,
|
|
segment: null,
|
|
axis: null,
|
|
member: null,
|
|
values: { [period.id]: numericCell },
|
|
sourceConcepts: [],
|
|
sourceFactIds: [],
|
|
provenanceType: 'structured_note',
|
|
hasDimensions: false
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
return [...rows.values()].sort((left, right) => left.label.localeCompare(right.label));
|
|
}
|