import type { Filing } from '@/lib/types'; import { fetchPrimaryFilingText } from '@/lib/server/sec'; type CacheEntry = { expiresAt: number; value: T; }; const DESCRIPTION_CACHE_TTL_MS = 1000 * 60 * 60 * 6; const DESCRIPTION_MAX_CHARS = 1_600; const descriptionCache = new Map>(); function normalizeWhitespace(value: string) { return value .replace(/[ \t]+/g, ' ') .replace(/\n{3,}/g, '\n\n') .trim(); } function clipAtSentenceBoundary(value: string, maxChars = DESCRIPTION_MAX_CHARS) { if (value.length <= maxChars) { return value; } const slice = value.slice(0, maxChars); const sentenceBoundary = Math.max( slice.lastIndexOf('. '), slice.lastIndexOf('! '), slice.lastIndexOf('? ') ); if (sentenceBoundary > maxChars * 0.6) { return slice.slice(0, sentenceBoundary + 1).trim(); } const wordBoundary = slice.lastIndexOf(' '); return (wordBoundary > maxChars * 0.7 ? slice.slice(0, wordBoundary) : slice).trim(); } function cleanupExtractedSection(value: string) { return clipAtSentenceBoundary( normalizeWhitespace( value .replace(/\btable of contents\b/gi, ' ') .replace(/\bitem\s+1\.?\s+business\b/gi, ' ') .replace(/\bpart\s+i\b/gi, ' ') ) ); } function fallbackDescription(text: string) { const paragraphs = text .split(/\n{2,}/) .map((paragraph) => normalizeWhitespace(paragraph)) .filter((paragraph) => paragraph.length >= 80) .filter((paragraph) => !/^item\s+\d+[a-z]?\.?/i.test(paragraph)) .slice(0, 3); if (paragraphs.length === 0) { return null; } return clipAtSentenceBoundary(paragraphs.join(' ')); } export function extractBusinessDescription(text: string) { const normalized = normalizeWhitespace(text); if (!normalized) { return null; } const startMatch = /\bitem\s+1\.?\s+business\b/i.exec(normalized); if (!startMatch || startMatch.index < 0) { return fallbackDescription(normalized); } const afterStart = normalized.slice(startMatch.index + startMatch[0].length); const endMatch = /\bitem\s+1a\.?\s+risk factors\b|\bitem\s+2\.?\s+properties\b|\bitem\s+2\.?\b/i.exec(afterStart); const section = endMatch ? afterStart.slice(0, endMatch.index) : afterStart; const extracted = cleanupExtractedSection(section); if (extracted.length >= 120) { return extracted; } return fallbackDescription(normalized); } export async function getCompanyDescription( filing: Pick | null ) { if (!filing) { return null; } const cached = descriptionCache.get(filing.accession_number); if (cached && cached.expiresAt > Date.now()) { return cached.value; } try { const document = await fetchPrimaryFilingText({ filingUrl: filing.filing_url, cik: filing.cik, accessionNumber: filing.accession_number, primaryDocument: filing.primary_document ?? null }, { maxChars: 40_000 }); const description = document ? extractBusinessDescription(document.text) : null; descriptionCache.set(filing.accession_number, { value: description, expiresAt: Date.now() + DESCRIPTION_CACHE_TTL_MS }); return description; } catch { descriptionCache.set(filing.accession_number, { value: null, expiresAt: Date.now() + DESCRIPTION_CACHE_TTL_MS }); return null; } } export const __secDescriptionInternals = { cleanupExtractedSection, clipAtSentenceBoundary, fallbackDescription };