import type { Filing } from '@/lib/types'; type FilingType = Filing['filing_type']; type FilingMetrics = NonNullable; type TickerDirectoryRecord = { cik_str: number; ticker: string; title: string; }; type RecentFilingsPayload = { filings?: { recent?: { accessionNumber?: string[]; filingDate?: string[]; form?: string[]; primaryDocument?: string[]; }; }; cik?: string; name?: string; }; type CompanyFactsPayload = { facts?: { 'us-gaap'?: Record }>; }; }; type CompanyFactPoint = { val?: number; end?: string; filed?: string; accn?: string; form?: string; fy?: number; fp?: string; frame?: string; }; type SecFiling = { ticker: string; cik: string; companyName: string; filingType: FilingType; filingDate: string; accessionNumber: string; filingUrl: string | null; submissionUrl: string | null; primaryDocument: string | null; }; type FilingDocumentInput = { filingUrl: string | null; cik: string; accessionNumber: string; primaryDocument: string | null; }; type FetchPrimaryFilingTextOptions = { fetchImpl?: typeof fetch; maxChars?: number; }; export type FilingDocumentText = { source: 'primary_document'; url: string; text: string; truncated: boolean; }; type FilingMetricsLookupInput = { accessionNumber: string; filingDate: string; filingType: FilingType; }; const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K']; const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12; const FILING_TEXT_MAX_CHARS = 24_000; const METRIC_TAGS = { revenue: [ 'Revenues', 'SalesRevenueNet', 'RevenueFromContractWithCustomerExcludingAssessedTax', 'TotalRevenuesAndOtherIncome' ], netIncome: ['NetIncomeLoss', 'ProfitLoss'], totalAssets: ['Assets'], cash: [ 'CashAndCashEquivalentsAtCarryingValue', 'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents' ], debt: [ 'LongTermDebtAndCapitalLeaseObligations', 'LongTermDebtNoncurrent', 'LongTermDebt', 'DebtAndFinanceLeaseLiabilities' ] } as const; let tickerCache = new Map(); let tickerCacheLoadedAt = 0; function envUserAgent() { return process.env.SEC_USER_AGENT || 'Fiscal Clone '; } function todayIso() { return new Date().toISOString().slice(0, 10); } function decodeHtmlEntities(value: string) { const decodeCodePoint = (code: number) => { if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) { return ' '; } try { return String.fromCodePoint(code); } catch { return ' '; } }; return value .replace(/ | /gi, ' ') .replace(/&/gi, '&') .replace(/</gi, '<') .replace(/>/gi, '>') .replace(/"/gi, '"') .replace(/'/gi, '\'') .replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => { const code = Number.parseInt(rawCode, 16); return decodeCodePoint(code); }) .replace(/&#([0-9]+);/g, (_match, rawCode: string) => { const code = Number.parseInt(rawCode, 10); return decodeCodePoint(code); }); } export function normalizeSecDocumentText(raw: string) { return decodeHtmlEntities( raw .replace(/\r/g, '\n') .replace(//gi, ' ') .replace(//gi, ' ') .replace(//gi, ' ') .replace(//g, ' ') .replace(/<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi, '\n') .replace(/<[^>]+>/g, ' ') ) .replace(/[ \t]+\n/g, '\n') .replace(/\n[ \t]+/g, '\n') .replace(/[ \t]{2,}/g, ' ') .replace(/\n{3,}/g, '\n\n') .trim(); } export function trimSecDocumentTextForPrompt(text: string, maxChars = FILING_TEXT_MAX_CHARS) { const safeMax = Math.max(Math.trunc(maxChars), 1_000); if (text.length <= safeMax) { return { text, truncated: false }; } const slice = text.slice(0, safeMax); const newlineBoundary = slice.lastIndexOf('\n'); const wordBoundary = slice.lastIndexOf(' '); const boundary = Math.max(newlineBoundary, wordBoundary); const clipped = (boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice).trimEnd(); return { text: clipped, truncated: true }; } function compactAccessionNumber(value: string) { return value.replace(/-/g, ''); } function normalizeAccessionKey(value: string | undefined | null) { return (value ?? '').replace(/\D/g, ''); } function normalizeForm(value: string | undefined | null) { const normalized = (value ?? '').trim().toUpperCase(); if (!normalized) { return ''; } return normalized.endsWith('/A') ? normalized.slice(0, -2) : normalized; } function parseDate(value: string | undefined | null) { if (!value) { return Number.NaN; } return Date.parse(value); } function normalizeCikForPath(value: string) { const digits = value.replace(/\D/g, ''); if (!digits) { return null; } const numeric = Number(digits); if (!Number.isFinite(numeric)) { return null; } return String(numeric); } export function resolvePrimaryFilingUrl(input: FilingDocumentInput) { const directUrl = input.filingUrl?.trim(); if (directUrl) { return directUrl; } if (!input.primaryDocument) { return null; } const cikPath = normalizeCikForPath(input.cik); const accessionPath = compactAccessionNumber(input.accessionNumber); if (!cikPath || !accessionPath) { return null; } return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`; } export async function fetchPrimaryFilingText( input: FilingDocumentInput, options?: FetchPrimaryFilingTextOptions ): Promise { const url = resolvePrimaryFilingUrl(input); if (!url) { return null; } const doFetch = options?.fetchImpl ?? fetch; const response = await doFetch(url, { headers: { 'User-Agent': envUserAgent(), Accept: 'text/html, text/plain;q=0.9, */*;q=0.8' }, cache: 'no-store' }); if (!response.ok) { throw new Error(`SEC filing request failed (${response.status})`); } const raw = await response.text(); const normalized = normalizeSecDocumentText(raw); if (!normalized) { return null; } const clipped = trimSecDocumentTextForPrompt(normalized, options?.maxChars ?? FILING_TEXT_MAX_CHARS); if (!clipped.text) { return null; } return { source: 'primary_document', url, text: clipped.text, truncated: clipped.truncated }; } async function fetchJson(url: string): Promise { const response = await fetch(url, { headers: { 'User-Agent': envUserAgent(), Accept: 'application/json' }, cache: 'no-store' }); if (!response.ok) { throw new Error(`SEC request failed (${response.status})`); } return await response.json() as T; } async function ensureTickerCache() { const isFresh = Date.now() - tickerCacheLoadedAt < TICKER_CACHE_TTL_MS; if (isFresh && tickerCache.size > 0) { return; } const payload = await fetchJson>('https://www.sec.gov/files/company_tickers.json'); const next = new Map(); for (const record of Object.values(payload)) { next.set(record.ticker.toUpperCase(), record); } tickerCache = next; tickerCacheLoadedAt = Date.now(); } async function resolveTicker(ticker: string) { await ensureTickerCache(); const normalized = ticker.trim().toUpperCase(); const record = tickerCache.get(normalized); if (!record) { throw new Error(`Ticker ${normalized} not found in SEC directory`); } return { ticker: normalized, cik: String(record.cik_str), companyName: record.title }; } function pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null { return pickFactForFiling(payload, tag, { accessionNumber: '', filingDate: '', filingType: '10-Q' }); } function collectFactSeries(payload: CompanyFactsPayload, tag: string): CompanyFactPoint[] { const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units; if (!unitCollections) { return []; } const usdSeries: CompanyFactPoint[] = []; const fallbackSeries: CompanyFactPoint[] = []; for (const [unit, series] of Object.entries(unitCollections)) { if (!Array.isArray(series) || series.length === 0) { continue; } if (unit === 'USD' || /^USD(?!\/shares)/i.test(unit)) { usdSeries.push(...series); continue; } fallbackSeries.push(...series); } const points = usdSeries.length > 0 ? usdSeries : fallbackSeries; return points.filter((point) => typeof point.val === 'number' && Number.isFinite(point.val)); } function pickMostRecentFact(points: CompanyFactPoint[]) { return [...points].sort((a, b) => { const aDate = parseDate(a.filed ?? a.end); const bDate = parseDate(b.filed ?? b.end); if (Number.isFinite(aDate) && Number.isFinite(bDate)) { return bDate - aDate; } if (Number.isFinite(bDate)) { return 1; } if (Number.isFinite(aDate)) { return -1; } return 0; })[0] ?? null; } function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) { if (points.length === 0) { return null; } if (!Number.isFinite(targetDate)) { return pickMostRecentFact(points); } const dated = points .map((point) => ({ point, date: parseDate(point.filed ?? point.end) })) .filter((entry) => Number.isFinite(entry.date)); if (dated.length === 0) { return pickMostRecentFact(points); } const beforeTarget = dated.filter((entry) => entry.date <= targetDate); if (beforeTarget.length > 0) { return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null; } return dated.sort((a, b) => { const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate); if (distance !== 0) { return distance; } return b.date - a.date; })[0]?.point ?? null; } function pickFactForFiling( payload: CompanyFactsPayload, tag: string, filing: FilingMetricsLookupInput ): number | null { const points = collectFactSeries(payload, tag); if (points.length === 0) { return null; } const accessionKey = normalizeAccessionKey(filing.accessionNumber); if (accessionKey) { const byAccession = points.filter((point) => normalizeAccessionKey(point.accn) === accessionKey); if (byAccession.length > 0) { const matched = pickMostRecentFact(byAccession); if (typeof matched?.val === 'number' && Number.isFinite(matched.val)) { return matched.val; } } } const filingForm = normalizeForm(filing.filingType); const byForm = filingForm ? points.filter((point) => normalizeForm(point.form) === filingForm) : points; const targetDate = parseDate(filing.filingDate); const bestByForm = pickClosestByDate(byForm, targetDate); if (typeof bestByForm?.val === 'number' && Number.isFinite(bestByForm.val)) { return bestByForm.val; } const bestAny = pickClosestByDate(points, targetDate); return typeof bestAny?.val === 'number' && Number.isFinite(bestAny.val) ? bestAny.val : null; } function pickFactByTags( payload: CompanyFactsPayload, tags: readonly string[], filing: FilingMetricsLookupInput ) { for (const tag of tags) { const value = pickFactForFiling(payload, tag, filing); if (value !== null) { return value; } } return null; } function emptyMetrics(): FilingMetrics { return { revenue: null, netIncome: null, totalAssets: null, cash: null, debt: null }; } export async function fetchRecentFilings(ticker: string, limit = 20): Promise { const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50); const company = await resolveTicker(ticker); const cikPadded = company.cik.padStart(10, '0'); const payload = await fetchJson(`https://data.sec.gov/submissions/CIK${cikPadded}.json`); const recent = payload.filings?.recent; const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`; if (!recent) { return []; } const forms = recent.form ?? []; const accessionNumbers = recent.accessionNumber ?? []; const filingDates = recent.filingDate ?? []; const primaryDocuments = recent.primaryDocument ?? []; const filings: SecFiling[] = []; for (let i = 0; i < forms.length; i += 1) { const normalizedForm = normalizeForm(forms[i]) as FilingType; if (!SUPPORTED_FORMS.includes(normalizedForm)) { continue; } const accessionNumber = accessionNumbers[i]; if (!accessionNumber) { continue; } const compactAccession = accessionNumber.replace(/-/g, ''); const documentName = primaryDocuments[i]; const filingUrl = documentName ? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}` : null; filings.push({ ticker: company.ticker, cik: company.cik, companyName: payload.name ?? company.companyName, filingType: normalizedForm, filingDate: filingDates[i] ?? todayIso(), accessionNumber, filingUrl, submissionUrl, primaryDocument: documentName ?? null }); if (filings.length >= safeLimit) { break; } } return filings; } export async function fetchLatestFilingMetrics(cik: string) { const normalized = cik.padStart(10, '0'); const payload = await fetchJson(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`); return { revenue: pickLatestFact(payload, 'Revenues'), netIncome: pickLatestFact(payload, 'NetIncomeLoss'), totalAssets: pickLatestFact(payload, 'Assets'), cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'), debt: pickLatestFact(payload, 'LongTermDebt') } satisfies FilingMetrics; } export async function fetchFilingMetricsForFilings( cik: string, _ticker: string, filings: FilingMetricsLookupInput[] ) { const metricsByAccession = new Map(); if (filings.length === 0) { return metricsByAccession; } try { const normalized = cik.padStart(10, '0'); const payload = await fetchJson(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`); for (const filing of filings) { metricsByAccession.set(filing.accessionNumber, { revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing), netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing), totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing), cash: pickFactByTags(payload, METRIC_TAGS.cash, filing), debt: pickFactByTags(payload, METRIC_TAGS.debt, filing) }); } return metricsByAccession; } catch { for (const filing of filings) { metricsByAccession.set(filing.accessionNumber, emptyMetrics()); } return metricsByAccession; } }