582 lines
15 KiB
TypeScript
582 lines
15 KiB
TypeScript
import type { Filing } from '@/lib/types';
|
|
|
|
type FilingType = Filing['filing_type'];
|
|
type FilingMetrics = NonNullable<Filing['metrics']>;
|
|
|
|
type TickerDirectoryRecord = {
|
|
cik_str: number;
|
|
ticker: string;
|
|
title: string;
|
|
};
|
|
|
|
type RecentFilingsPayload = {
|
|
filings?: {
|
|
recent?: {
|
|
accessionNumber?: string[];
|
|
filingDate?: string[];
|
|
form?: string[];
|
|
primaryDocument?: string[];
|
|
};
|
|
};
|
|
cik?: string;
|
|
name?: string;
|
|
};
|
|
|
|
type CompanyFactsPayload = {
|
|
facts?: {
|
|
'us-gaap'?: Record<string, { units?: Record<string, CompanyFactPoint[]> }>;
|
|
};
|
|
};
|
|
|
|
type CompanyFactPoint = {
|
|
val?: number;
|
|
end?: string;
|
|
filed?: string;
|
|
accn?: string;
|
|
form?: string;
|
|
fy?: number;
|
|
fp?: string;
|
|
frame?: string;
|
|
};
|
|
|
|
type SecFiling = {
|
|
ticker: string;
|
|
cik: string;
|
|
companyName: string;
|
|
filingType: FilingType;
|
|
filingDate: string;
|
|
accessionNumber: string;
|
|
filingUrl: string | null;
|
|
submissionUrl: string | null;
|
|
primaryDocument: string | null;
|
|
};
|
|
|
|
type FilingDocumentInput = {
|
|
filingUrl: string | null;
|
|
cik: string;
|
|
accessionNumber: string;
|
|
primaryDocument: string | null;
|
|
};
|
|
|
|
type FetchPrimaryFilingTextOptions = {
|
|
fetchImpl?: typeof fetch;
|
|
maxChars?: number;
|
|
};
|
|
|
|
export type FilingDocumentText = {
|
|
source: 'primary_document';
|
|
url: string;
|
|
text: string;
|
|
truncated: boolean;
|
|
};
|
|
|
|
type FilingMetricsLookupInput = {
|
|
accessionNumber: string;
|
|
filingDate: string;
|
|
filingType: FilingType;
|
|
};
|
|
|
|
const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K'];
|
|
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12;
|
|
const FILING_TEXT_MAX_CHARS = 24_000;
|
|
const METRIC_TAGS = {
|
|
revenue: [
|
|
'Revenues',
|
|
'SalesRevenueNet',
|
|
'RevenueFromContractWithCustomerExcludingAssessedTax',
|
|
'TotalRevenuesAndOtherIncome'
|
|
],
|
|
netIncome: ['NetIncomeLoss', 'ProfitLoss'],
|
|
totalAssets: ['Assets'],
|
|
cash: [
|
|
'CashAndCashEquivalentsAtCarryingValue',
|
|
'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
|
|
],
|
|
debt: [
|
|
'LongTermDebtAndCapitalLeaseObligations',
|
|
'LongTermDebtNoncurrent',
|
|
'LongTermDebt',
|
|
'DebtAndFinanceLeaseLiabilities'
|
|
]
|
|
} as const;
|
|
|
|
let tickerCache = new Map<string, TickerDirectoryRecord>();
|
|
let tickerCacheLoadedAt = 0;
|
|
|
|
function envUserAgent() {
|
|
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
|
}
|
|
|
|
function todayIso() {
|
|
return new Date().toISOString().slice(0, 10);
|
|
}
|
|
|
|
function decodeHtmlEntities(value: string) {
|
|
const decodeCodePoint = (code: number) => {
|
|
if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) {
|
|
return ' ';
|
|
}
|
|
|
|
try {
|
|
return String.fromCodePoint(code);
|
|
} catch {
|
|
return ' ';
|
|
}
|
|
};
|
|
|
|
return value
|
|
.replace(/ | /gi, ' ')
|
|
.replace(/&/gi, '&')
|
|
.replace(/</gi, '<')
|
|
.replace(/>/gi, '>')
|
|
.replace(/"/gi, '"')
|
|
.replace(/'/gi, '\'')
|
|
.replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => {
|
|
const code = Number.parseInt(rawCode, 16);
|
|
return decodeCodePoint(code);
|
|
})
|
|
.replace(/&#([0-9]+);/g, (_match, rawCode: string) => {
|
|
const code = Number.parseInt(rawCode, 10);
|
|
return decodeCodePoint(code);
|
|
});
|
|
}
|
|
|
|
export function normalizeSecDocumentText(raw: string) {
|
|
return decodeHtmlEntities(
|
|
raw
|
|
.replace(/\r/g, '\n')
|
|
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
|
.replace(/<noscript[\s\S]*?<\/noscript>/gi, ' ')
|
|
.replace(/<!--[\s\S]*?-->/g, ' ')
|
|
.replace(/<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi, '\n')
|
|
.replace(/<[^>]+>/g, ' ')
|
|
)
|
|
.replace(/[ \t]+\n/g, '\n')
|
|
.replace(/\n[ \t]+/g, '\n')
|
|
.replace(/[ \t]{2,}/g, ' ')
|
|
.replace(/\n{3,}/g, '\n\n')
|
|
.trim();
|
|
}
|
|
|
|
export function trimSecDocumentTextForPrompt(text: string, maxChars = FILING_TEXT_MAX_CHARS) {
|
|
const safeMax = Math.max(Math.trunc(maxChars), 1_000);
|
|
if (text.length <= safeMax) {
|
|
return { text, truncated: false };
|
|
}
|
|
|
|
const slice = text.slice(0, safeMax);
|
|
const newlineBoundary = slice.lastIndexOf('\n');
|
|
const wordBoundary = slice.lastIndexOf(' ');
|
|
const boundary = Math.max(newlineBoundary, wordBoundary);
|
|
const clipped = (boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice).trimEnd();
|
|
|
|
return { text: clipped, truncated: true };
|
|
}
|
|
|
|
function compactAccessionNumber(value: string) {
|
|
return value.replace(/-/g, '');
|
|
}
|
|
|
|
function normalizeAccessionKey(value: string | undefined | null) {
|
|
return (value ?? '').replace(/\D/g, '');
|
|
}
|
|
|
|
function normalizeForm(value: string | undefined | null) {
|
|
const normalized = (value ?? '').trim().toUpperCase();
|
|
|
|
if (!normalized) {
|
|
return '';
|
|
}
|
|
|
|
return normalized.endsWith('/A')
|
|
? normalized.slice(0, -2)
|
|
: normalized;
|
|
}
|
|
|
|
function parseDate(value: string | undefined | null) {
|
|
if (!value) {
|
|
return Number.NaN;
|
|
}
|
|
|
|
return Date.parse(value);
|
|
}
|
|
|
|
function normalizeCikForPath(value: string) {
|
|
const digits = value.replace(/\D/g, '');
|
|
if (!digits) {
|
|
return null;
|
|
}
|
|
|
|
const numeric = Number(digits);
|
|
if (!Number.isFinite(numeric)) {
|
|
return null;
|
|
}
|
|
|
|
return String(numeric);
|
|
}
|
|
|
|
export function resolvePrimaryFilingUrl(input: FilingDocumentInput) {
|
|
const directUrl = input.filingUrl?.trim();
|
|
if (directUrl) {
|
|
return directUrl;
|
|
}
|
|
|
|
if (!input.primaryDocument) {
|
|
return null;
|
|
}
|
|
|
|
const cikPath = normalizeCikForPath(input.cik);
|
|
const accessionPath = compactAccessionNumber(input.accessionNumber);
|
|
if (!cikPath || !accessionPath) {
|
|
return null;
|
|
}
|
|
|
|
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`;
|
|
}
|
|
|
|
export async function fetchPrimaryFilingText(
|
|
input: FilingDocumentInput,
|
|
options?: FetchPrimaryFilingTextOptions
|
|
): Promise<FilingDocumentText | null> {
|
|
const url = resolvePrimaryFilingUrl(input);
|
|
if (!url) {
|
|
return null;
|
|
}
|
|
|
|
const doFetch = options?.fetchImpl ?? fetch;
|
|
const response = await doFetch(url, {
|
|
headers: {
|
|
'User-Agent': envUserAgent(),
|
|
Accept: 'text/html, text/plain;q=0.9, */*;q=0.8'
|
|
},
|
|
cache: 'no-store'
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`SEC filing request failed (${response.status})`);
|
|
}
|
|
|
|
const raw = await response.text();
|
|
const normalized = normalizeSecDocumentText(raw);
|
|
if (!normalized) {
|
|
return null;
|
|
}
|
|
|
|
const clipped = trimSecDocumentTextForPrompt(normalized, options?.maxChars ?? FILING_TEXT_MAX_CHARS);
|
|
if (!clipped.text) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
source: 'primary_document',
|
|
url,
|
|
text: clipped.text,
|
|
truncated: clipped.truncated
|
|
};
|
|
}
|
|
|
|
async function fetchJson<T>(url: string): Promise<T> {
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
'User-Agent': envUserAgent(),
|
|
Accept: 'application/json'
|
|
},
|
|
cache: 'no-store'
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`SEC request failed (${response.status})`);
|
|
}
|
|
|
|
return await response.json() as T;
|
|
}
|
|
|
|
async function ensureTickerCache() {
|
|
const isFresh = Date.now() - tickerCacheLoadedAt < TICKER_CACHE_TTL_MS;
|
|
if (isFresh && tickerCache.size > 0) {
|
|
return;
|
|
}
|
|
|
|
const payload = await fetchJson<Record<string, TickerDirectoryRecord>>('https://www.sec.gov/files/company_tickers.json');
|
|
const next = new Map<string, TickerDirectoryRecord>();
|
|
|
|
for (const record of Object.values(payload)) {
|
|
next.set(record.ticker.toUpperCase(), record);
|
|
}
|
|
|
|
tickerCache = next;
|
|
tickerCacheLoadedAt = Date.now();
|
|
}
|
|
|
|
async function resolveTicker(ticker: string) {
|
|
await ensureTickerCache();
|
|
|
|
const normalized = ticker.trim().toUpperCase();
|
|
const record = tickerCache.get(normalized);
|
|
|
|
if (!record) {
|
|
throw new Error(`Ticker ${normalized} not found in SEC directory`);
|
|
}
|
|
|
|
return {
|
|
ticker: normalized,
|
|
cik: String(record.cik_str),
|
|
companyName: record.title
|
|
};
|
|
}
|
|
|
|
function pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null {
|
|
return pickFactForFiling(payload, tag, {
|
|
accessionNumber: '',
|
|
filingDate: '',
|
|
filingType: '10-Q'
|
|
});
|
|
}
|
|
|
|
function collectFactSeries(payload: CompanyFactsPayload, tag: string): CompanyFactPoint[] {
|
|
const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units;
|
|
if (!unitCollections) {
|
|
return [];
|
|
}
|
|
|
|
const usdSeries: CompanyFactPoint[] = [];
|
|
const fallbackSeries: CompanyFactPoint[] = [];
|
|
|
|
for (const [unit, series] of Object.entries(unitCollections)) {
|
|
if (!Array.isArray(series) || series.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
if (unit === 'USD' || /^USD(?!\/shares)/i.test(unit)) {
|
|
usdSeries.push(...series);
|
|
continue;
|
|
}
|
|
|
|
fallbackSeries.push(...series);
|
|
}
|
|
|
|
const points = usdSeries.length > 0 ? usdSeries : fallbackSeries;
|
|
|
|
return points.filter((point) => typeof point.val === 'number' && Number.isFinite(point.val));
|
|
}
|
|
|
|
function pickMostRecentFact(points: CompanyFactPoint[]) {
|
|
return [...points].sort((a, b) => {
|
|
const aDate = parseDate(a.filed ?? a.end);
|
|
const bDate = parseDate(b.filed ?? b.end);
|
|
|
|
if (Number.isFinite(aDate) && Number.isFinite(bDate)) {
|
|
return bDate - aDate;
|
|
}
|
|
|
|
if (Number.isFinite(bDate)) {
|
|
return 1;
|
|
}
|
|
|
|
if (Number.isFinite(aDate)) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
})[0] ?? null;
|
|
}
|
|
|
|
function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) {
|
|
if (points.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
if (!Number.isFinite(targetDate)) {
|
|
return pickMostRecentFact(points);
|
|
}
|
|
|
|
const dated = points
|
|
.map((point) => ({ point, date: parseDate(point.filed ?? point.end) }))
|
|
.filter((entry) => Number.isFinite(entry.date));
|
|
|
|
if (dated.length === 0) {
|
|
return pickMostRecentFact(points);
|
|
}
|
|
|
|
const beforeTarget = dated.filter((entry) => entry.date <= targetDate);
|
|
if (beforeTarget.length > 0) {
|
|
return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null;
|
|
}
|
|
|
|
return dated.sort((a, b) => {
|
|
const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate);
|
|
if (distance !== 0) {
|
|
return distance;
|
|
}
|
|
|
|
return b.date - a.date;
|
|
})[0]?.point ?? null;
|
|
}
|
|
|
|
function pickFactForFiling(
|
|
payload: CompanyFactsPayload,
|
|
tag: string,
|
|
filing: FilingMetricsLookupInput
|
|
): number | null {
|
|
const points = collectFactSeries(payload, tag);
|
|
if (points.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
const accessionKey = normalizeAccessionKey(filing.accessionNumber);
|
|
if (accessionKey) {
|
|
const byAccession = points.filter((point) => normalizeAccessionKey(point.accn) === accessionKey);
|
|
if (byAccession.length > 0) {
|
|
const matched = pickMostRecentFact(byAccession);
|
|
if (typeof matched?.val === 'number' && Number.isFinite(matched.val)) {
|
|
return matched.val;
|
|
}
|
|
}
|
|
}
|
|
|
|
const filingForm = normalizeForm(filing.filingType);
|
|
const byForm = filingForm
|
|
? points.filter((point) => normalizeForm(point.form) === filingForm)
|
|
: points;
|
|
|
|
const targetDate = parseDate(filing.filingDate);
|
|
const bestByForm = pickClosestByDate(byForm, targetDate);
|
|
if (typeof bestByForm?.val === 'number' && Number.isFinite(bestByForm.val)) {
|
|
return bestByForm.val;
|
|
}
|
|
|
|
const bestAny = pickClosestByDate(points, targetDate);
|
|
return typeof bestAny?.val === 'number' && Number.isFinite(bestAny.val)
|
|
? bestAny.val
|
|
: null;
|
|
}
|
|
|
|
function pickFactByTags(
|
|
payload: CompanyFactsPayload,
|
|
tags: readonly string[],
|
|
filing: FilingMetricsLookupInput
|
|
) {
|
|
for (const tag of tags) {
|
|
const value = pickFactForFiling(payload, tag, filing);
|
|
if (value !== null) {
|
|
return value;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function emptyMetrics(): FilingMetrics {
|
|
return {
|
|
revenue: null,
|
|
netIncome: null,
|
|
totalAssets: null,
|
|
cash: null,
|
|
debt: null
|
|
};
|
|
}
|
|
|
|
export async function fetchRecentFilings(ticker: string, limit = 20): Promise<SecFiling[]> {
|
|
const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50);
|
|
|
|
const company = await resolveTicker(ticker);
|
|
const cikPadded = company.cik.padStart(10, '0');
|
|
const payload = await fetchJson<RecentFilingsPayload>(`https://data.sec.gov/submissions/CIK${cikPadded}.json`);
|
|
const recent = payload.filings?.recent;
|
|
const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`;
|
|
|
|
if (!recent) {
|
|
return [];
|
|
}
|
|
|
|
const forms = recent.form ?? [];
|
|
const accessionNumbers = recent.accessionNumber ?? [];
|
|
const filingDates = recent.filingDate ?? [];
|
|
const primaryDocuments = recent.primaryDocument ?? [];
|
|
const filings: SecFiling[] = [];
|
|
|
|
for (let i = 0; i < forms.length; i += 1) {
|
|
const normalizedForm = normalizeForm(forms[i]) as FilingType;
|
|
if (!SUPPORTED_FORMS.includes(normalizedForm)) {
|
|
continue;
|
|
}
|
|
|
|
const accessionNumber = accessionNumbers[i];
|
|
if (!accessionNumber) {
|
|
continue;
|
|
}
|
|
|
|
const compactAccession = accessionNumber.replace(/-/g, '');
|
|
const documentName = primaryDocuments[i];
|
|
const filingUrl = documentName
|
|
? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}`
|
|
: null;
|
|
|
|
filings.push({
|
|
ticker: company.ticker,
|
|
cik: company.cik,
|
|
companyName: payload.name ?? company.companyName,
|
|
filingType: normalizedForm,
|
|
filingDate: filingDates[i] ?? todayIso(),
|
|
accessionNumber,
|
|
filingUrl,
|
|
submissionUrl,
|
|
primaryDocument: documentName ?? null
|
|
});
|
|
|
|
if (filings.length >= safeLimit) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return filings;
|
|
}
|
|
|
|
export async function fetchLatestFilingMetrics(cik: string) {
|
|
const normalized = cik.padStart(10, '0');
|
|
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
|
|
|
|
return {
|
|
revenue: pickLatestFact(payload, 'Revenues'),
|
|
netIncome: pickLatestFact(payload, 'NetIncomeLoss'),
|
|
totalAssets: pickLatestFact(payload, 'Assets'),
|
|
cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'),
|
|
debt: pickLatestFact(payload, 'LongTermDebt')
|
|
} satisfies FilingMetrics;
|
|
}
|
|
|
|
export async function fetchFilingMetricsForFilings(
|
|
cik: string,
|
|
_ticker: string,
|
|
filings: FilingMetricsLookupInput[]
|
|
) {
|
|
const metricsByAccession = new Map<string, FilingMetrics>();
|
|
if (filings.length === 0) {
|
|
return metricsByAccession;
|
|
}
|
|
|
|
try {
|
|
const normalized = cik.padStart(10, '0');
|
|
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
|
|
|
|
for (const filing of filings) {
|
|
metricsByAccession.set(filing.accessionNumber, {
|
|
revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing),
|
|
netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing),
|
|
totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing),
|
|
cash: pickFactByTags(payload, METRIC_TAGS.cash, filing),
|
|
debt: pickFactByTags(payload, METRIC_TAGS.debt, filing)
|
|
});
|
|
}
|
|
|
|
return metricsByAccession;
|
|
} catch {
|
|
for (const filing of filings) {
|
|
metricsByAccession.set(filing.accessionNumber, emptyMetrics());
|
|
}
|
|
|
|
return metricsByAccession;
|
|
}
|
|
}
|