Files
Neon-Desk/lib/server/sec.ts

582 lines
15 KiB
TypeScript

import type { Filing } from '@/lib/types';
type FilingType = Filing['filing_type'];
type FilingMetrics = NonNullable<Filing['metrics']>;
type TickerDirectoryRecord = {
cik_str: number;
ticker: string;
title: string;
};
type RecentFilingsPayload = {
filings?: {
recent?: {
accessionNumber?: string[];
filingDate?: string[];
form?: string[];
primaryDocument?: string[];
};
};
cik?: string;
name?: string;
};
type CompanyFactsPayload = {
facts?: {
'us-gaap'?: Record<string, { units?: Record<string, CompanyFactPoint[]> }>;
};
};
type CompanyFactPoint = {
val?: number;
end?: string;
filed?: string;
accn?: string;
form?: string;
fy?: number;
fp?: string;
frame?: string;
};
type SecFiling = {
ticker: string;
cik: string;
companyName: string;
filingType: FilingType;
filingDate: string;
accessionNumber: string;
filingUrl: string | null;
submissionUrl: string | null;
primaryDocument: string | null;
};
type FilingDocumentInput = {
filingUrl: string | null;
cik: string;
accessionNumber: string;
primaryDocument: string | null;
};
type FetchPrimaryFilingTextOptions = {
fetchImpl?: typeof fetch;
maxChars?: number;
};
export type FilingDocumentText = {
source: 'primary_document';
url: string;
text: string;
truncated: boolean;
};
type FilingMetricsLookupInput = {
accessionNumber: string;
filingDate: string;
filingType: FilingType;
};
const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K'];
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12;
const FILING_TEXT_MAX_CHARS = 24_000;
const METRIC_TAGS = {
revenue: [
'Revenues',
'SalesRevenueNet',
'RevenueFromContractWithCustomerExcludingAssessedTax',
'TotalRevenuesAndOtherIncome'
],
netIncome: ['NetIncomeLoss', 'ProfitLoss'],
totalAssets: ['Assets'],
cash: [
'CashAndCashEquivalentsAtCarryingValue',
'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
],
debt: [
'LongTermDebtAndCapitalLeaseObligations',
'LongTermDebtNoncurrent',
'LongTermDebt',
'DebtAndFinanceLeaseLiabilities'
]
} as const;
let tickerCache = new Map<string, TickerDirectoryRecord>();
let tickerCacheLoadedAt = 0;
function envUserAgent() {
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
}
function todayIso() {
return new Date().toISOString().slice(0, 10);
}
function decodeHtmlEntities(value: string) {
const decodeCodePoint = (code: number) => {
if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) {
return ' ';
}
try {
return String.fromCodePoint(code);
} catch {
return ' ';
}
};
return value
.replace(/&nbsp;|&#160;/gi, ' ')
.replace(/&amp;/gi, '&')
.replace(/&lt;/gi, '<')
.replace(/&gt;/gi, '>')
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, '\'')
.replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => {
const code = Number.parseInt(rawCode, 16);
return decodeCodePoint(code);
})
.replace(/&#([0-9]+);/g, (_match, rawCode: string) => {
const code = Number.parseInt(rawCode, 10);
return decodeCodePoint(code);
});
}
export function normalizeSecDocumentText(raw: string) {
return decodeHtmlEntities(
raw
.replace(/\r/g, '\n')
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
.replace(/<noscript[\s\S]*?<\/noscript>/gi, ' ')
.replace(/<!--[\s\S]*?-->/g, ' ')
.replace(/<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi, '\n')
.replace(/<[^>]+>/g, ' ')
)
.replace(/[ \t]+\n/g, '\n')
.replace(/\n[ \t]+/g, '\n')
.replace(/[ \t]{2,}/g, ' ')
.replace(/\n{3,}/g, '\n\n')
.trim();
}
export function trimSecDocumentTextForPrompt(text: string, maxChars = FILING_TEXT_MAX_CHARS) {
const safeMax = Math.max(Math.trunc(maxChars), 1_000);
if (text.length <= safeMax) {
return { text, truncated: false };
}
const slice = text.slice(0, safeMax);
const newlineBoundary = slice.lastIndexOf('\n');
const wordBoundary = slice.lastIndexOf(' ');
const boundary = Math.max(newlineBoundary, wordBoundary);
const clipped = (boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice).trimEnd();
return { text: clipped, truncated: true };
}
function compactAccessionNumber(value: string) {
return value.replace(/-/g, '');
}
function normalizeAccessionKey(value: string | undefined | null) {
return (value ?? '').replace(/\D/g, '');
}
function normalizeForm(value: string | undefined | null) {
const normalized = (value ?? '').trim().toUpperCase();
if (!normalized) {
return '';
}
return normalized.endsWith('/A')
? normalized.slice(0, -2)
: normalized;
}
function parseDate(value: string | undefined | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function normalizeCikForPath(value: string) {
const digits = value.replace(/\D/g, '');
if (!digits) {
return null;
}
const numeric = Number(digits);
if (!Number.isFinite(numeric)) {
return null;
}
return String(numeric);
}
export function resolvePrimaryFilingUrl(input: FilingDocumentInput) {
const directUrl = input.filingUrl?.trim();
if (directUrl) {
return directUrl;
}
if (!input.primaryDocument) {
return null;
}
const cikPath = normalizeCikForPath(input.cik);
const accessionPath = compactAccessionNumber(input.accessionNumber);
if (!cikPath || !accessionPath) {
return null;
}
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`;
}
export async function fetchPrimaryFilingText(
input: FilingDocumentInput,
options?: FetchPrimaryFilingTextOptions
): Promise<FilingDocumentText | null> {
const url = resolvePrimaryFilingUrl(input);
if (!url) {
return null;
}
const doFetch = options?.fetchImpl ?? fetch;
const response = await doFetch(url, {
headers: {
'User-Agent': envUserAgent(),
Accept: 'text/html, text/plain;q=0.9, */*;q=0.8'
},
cache: 'no-store'
});
if (!response.ok) {
throw new Error(`SEC filing request failed (${response.status})`);
}
const raw = await response.text();
const normalized = normalizeSecDocumentText(raw);
if (!normalized) {
return null;
}
const clipped = trimSecDocumentTextForPrompt(normalized, options?.maxChars ?? FILING_TEXT_MAX_CHARS);
if (!clipped.text) {
return null;
}
return {
source: 'primary_document',
url,
text: clipped.text,
truncated: clipped.truncated
};
}
async function fetchJson<T>(url: string): Promise<T> {
const response = await fetch(url, {
headers: {
'User-Agent': envUserAgent(),
Accept: 'application/json'
},
cache: 'no-store'
});
if (!response.ok) {
throw new Error(`SEC request failed (${response.status})`);
}
return await response.json() as T;
}
async function ensureTickerCache() {
const isFresh = Date.now() - tickerCacheLoadedAt < TICKER_CACHE_TTL_MS;
if (isFresh && tickerCache.size > 0) {
return;
}
const payload = await fetchJson<Record<string, TickerDirectoryRecord>>('https://www.sec.gov/files/company_tickers.json');
const next = new Map<string, TickerDirectoryRecord>();
for (const record of Object.values(payload)) {
next.set(record.ticker.toUpperCase(), record);
}
tickerCache = next;
tickerCacheLoadedAt = Date.now();
}
async function resolveTicker(ticker: string) {
await ensureTickerCache();
const normalized = ticker.trim().toUpperCase();
const record = tickerCache.get(normalized);
if (!record) {
throw new Error(`Ticker ${normalized} not found in SEC directory`);
}
return {
ticker: normalized,
cik: String(record.cik_str),
companyName: record.title
};
}
function pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null {
return pickFactForFiling(payload, tag, {
accessionNumber: '',
filingDate: '',
filingType: '10-Q'
});
}
function collectFactSeries(payload: CompanyFactsPayload, tag: string): CompanyFactPoint[] {
const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units;
if (!unitCollections) {
return [];
}
const usdSeries: CompanyFactPoint[] = [];
const fallbackSeries: CompanyFactPoint[] = [];
for (const [unit, series] of Object.entries(unitCollections)) {
if (!Array.isArray(series) || series.length === 0) {
continue;
}
if (unit === 'USD' || /^USD(?!\/shares)/i.test(unit)) {
usdSeries.push(...series);
continue;
}
fallbackSeries.push(...series);
}
const points = usdSeries.length > 0 ? usdSeries : fallbackSeries;
return points.filter((point) => typeof point.val === 'number' && Number.isFinite(point.val));
}
function pickMostRecentFact(points: CompanyFactPoint[]) {
return [...points].sort((a, b) => {
const aDate = parseDate(a.filed ?? a.end);
const bDate = parseDate(b.filed ?? b.end);
if (Number.isFinite(aDate) && Number.isFinite(bDate)) {
return bDate - aDate;
}
if (Number.isFinite(bDate)) {
return 1;
}
if (Number.isFinite(aDate)) {
return -1;
}
return 0;
})[0] ?? null;
}
function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) {
if (points.length === 0) {
return null;
}
if (!Number.isFinite(targetDate)) {
return pickMostRecentFact(points);
}
const dated = points
.map((point) => ({ point, date: parseDate(point.filed ?? point.end) }))
.filter((entry) => Number.isFinite(entry.date));
if (dated.length === 0) {
return pickMostRecentFact(points);
}
const beforeTarget = dated.filter((entry) => entry.date <= targetDate);
if (beforeTarget.length > 0) {
return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null;
}
return dated.sort((a, b) => {
const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate);
if (distance !== 0) {
return distance;
}
return b.date - a.date;
})[0]?.point ?? null;
}
function pickFactForFiling(
payload: CompanyFactsPayload,
tag: string,
filing: FilingMetricsLookupInput
): number | null {
const points = collectFactSeries(payload, tag);
if (points.length === 0) {
return null;
}
const accessionKey = normalizeAccessionKey(filing.accessionNumber);
if (accessionKey) {
const byAccession = points.filter((point) => normalizeAccessionKey(point.accn) === accessionKey);
if (byAccession.length > 0) {
const matched = pickMostRecentFact(byAccession);
if (typeof matched?.val === 'number' && Number.isFinite(matched.val)) {
return matched.val;
}
}
}
const filingForm = normalizeForm(filing.filingType);
const byForm = filingForm
? points.filter((point) => normalizeForm(point.form) === filingForm)
: points;
const targetDate = parseDate(filing.filingDate);
const bestByForm = pickClosestByDate(byForm, targetDate);
if (typeof bestByForm?.val === 'number' && Number.isFinite(bestByForm.val)) {
return bestByForm.val;
}
const bestAny = pickClosestByDate(points, targetDate);
return typeof bestAny?.val === 'number' && Number.isFinite(bestAny.val)
? bestAny.val
: null;
}
function pickFactByTags(
payload: CompanyFactsPayload,
tags: readonly string[],
filing: FilingMetricsLookupInput
) {
for (const tag of tags) {
const value = pickFactForFiling(payload, tag, filing);
if (value !== null) {
return value;
}
}
return null;
}
function emptyMetrics(): FilingMetrics {
return {
revenue: null,
netIncome: null,
totalAssets: null,
cash: null,
debt: null
};
}
export async function fetchRecentFilings(ticker: string, limit = 20): Promise<SecFiling[]> {
const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50);
const company = await resolveTicker(ticker);
const cikPadded = company.cik.padStart(10, '0');
const payload = await fetchJson<RecentFilingsPayload>(`https://data.sec.gov/submissions/CIK${cikPadded}.json`);
const recent = payload.filings?.recent;
const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`;
if (!recent) {
return [];
}
const forms = recent.form ?? [];
const accessionNumbers = recent.accessionNumber ?? [];
const filingDates = recent.filingDate ?? [];
const primaryDocuments = recent.primaryDocument ?? [];
const filings: SecFiling[] = [];
for (let i = 0; i < forms.length; i += 1) {
const normalizedForm = normalizeForm(forms[i]) as FilingType;
if (!SUPPORTED_FORMS.includes(normalizedForm)) {
continue;
}
const accessionNumber = accessionNumbers[i];
if (!accessionNumber) {
continue;
}
const compactAccession = accessionNumber.replace(/-/g, '');
const documentName = primaryDocuments[i];
const filingUrl = documentName
? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}`
: null;
filings.push({
ticker: company.ticker,
cik: company.cik,
companyName: payload.name ?? company.companyName,
filingType: normalizedForm,
filingDate: filingDates[i] ?? todayIso(),
accessionNumber,
filingUrl,
submissionUrl,
primaryDocument: documentName ?? null
});
if (filings.length >= safeLimit) {
break;
}
}
return filings;
}
export async function fetchLatestFilingMetrics(cik: string) {
const normalized = cik.padStart(10, '0');
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
return {
revenue: pickLatestFact(payload, 'Revenues'),
netIncome: pickLatestFact(payload, 'NetIncomeLoss'),
totalAssets: pickLatestFact(payload, 'Assets'),
cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'),
debt: pickLatestFact(payload, 'LongTermDebt')
} satisfies FilingMetrics;
}
export async function fetchFilingMetricsForFilings(
cik: string,
_ticker: string,
filings: FilingMetricsLookupInput[]
) {
const metricsByAccession = new Map<string, FilingMetrics>();
if (filings.length === 0) {
return metricsByAccession;
}
try {
const normalized = cik.padStart(10, '0');
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
for (const filing of filings) {
metricsByAccession.set(filing.accessionNumber, {
revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing),
netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing),
totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing),
cash: pickFactByTags(payload, METRIC_TAGS.cash, filing),
debt: pickFactByTags(payload, METRIC_TAGS.debt, filing)
});
}
return metricsByAccession;
} catch {
for (const filing of filings) {
metricsByAccession.set(filing.accessionNumber, emptyMetrics());
}
return metricsByAccession;
}
}