Rebuild company overview analysis page
This commit is contained in:
134
lib/server/sec-description.ts
Normal file
134
lib/server/sec-description.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import type { Filing } from '@/lib/types';
|
||||
import { fetchPrimaryFilingText } from '@/lib/server/sec';
|
||||
|
||||
type CacheEntry<T> = {
|
||||
expiresAt: number;
|
||||
value: T;
|
||||
};
|
||||
|
||||
const DESCRIPTION_CACHE_TTL_MS = 1000 * 60 * 60 * 6;
|
||||
const DESCRIPTION_MAX_CHARS = 1_600;
|
||||
|
||||
const descriptionCache = new Map<string, CacheEntry<string | null>>();
|
||||
|
||||
function normalizeWhitespace(value: string) {
|
||||
return value
|
||||
.replace(/[ \t]+/g, ' ')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function clipAtSentenceBoundary(value: string, maxChars = DESCRIPTION_MAX_CHARS) {
|
||||
if (value.length <= maxChars) {
|
||||
return value;
|
||||
}
|
||||
|
||||
const slice = value.slice(0, maxChars);
|
||||
const sentenceBoundary = Math.max(
|
||||
slice.lastIndexOf('. '),
|
||||
slice.lastIndexOf('! '),
|
||||
slice.lastIndexOf('? ')
|
||||
);
|
||||
|
||||
if (sentenceBoundary > maxChars * 0.6) {
|
||||
return slice.slice(0, sentenceBoundary + 1).trim();
|
||||
}
|
||||
|
||||
const wordBoundary = slice.lastIndexOf(' ');
|
||||
return (wordBoundary > maxChars * 0.7 ? slice.slice(0, wordBoundary) : slice).trim();
|
||||
}
|
||||
|
||||
function cleanupExtractedSection(value: string) {
|
||||
return clipAtSentenceBoundary(
|
||||
normalizeWhitespace(
|
||||
value
|
||||
.replace(/\btable of contents\b/gi, ' ')
|
||||
.replace(/\bitem\s+1\.?\s+business\b/gi, ' ')
|
||||
.replace(/\bpart\s+i\b/gi, ' ')
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function fallbackDescription(text: string) {
|
||||
const paragraphs = text
|
||||
.split(/\n{2,}/)
|
||||
.map((paragraph) => normalizeWhitespace(paragraph))
|
||||
.filter((paragraph) => paragraph.length >= 80)
|
||||
.filter((paragraph) => !/^item\s+\d+[a-z]?\.?/i.test(paragraph))
|
||||
.slice(0, 3);
|
||||
|
||||
if (paragraphs.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return clipAtSentenceBoundary(paragraphs.join(' '));
|
||||
}
|
||||
|
||||
export function extractBusinessDescription(text: string) {
|
||||
const normalized = normalizeWhitespace(text);
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const startMatch = /\bitem\s+1\.?\s+business\b/i.exec(normalized);
|
||||
if (!startMatch || startMatch.index < 0) {
|
||||
return fallbackDescription(normalized);
|
||||
}
|
||||
|
||||
const afterStart = normalized.slice(startMatch.index + startMatch[0].length);
|
||||
const endMatch = /\bitem\s+1a\.?\s+risk factors\b|\bitem\s+2\.?\s+properties\b|\bitem\s+2\.?\b/i.exec(afterStart);
|
||||
const section = endMatch
|
||||
? afterStart.slice(0, endMatch.index)
|
||||
: afterStart;
|
||||
const extracted = cleanupExtractedSection(section);
|
||||
|
||||
if (extracted.length >= 120) {
|
||||
return extracted;
|
||||
}
|
||||
|
||||
return fallbackDescription(normalized);
|
||||
}
|
||||
|
||||
export async function getCompanyDescription(
|
||||
filing: Pick<Filing, 'accession_number' | 'cik' | 'filing_url' | 'primary_document'> | null
|
||||
) {
|
||||
if (!filing) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cached = descriptionCache.get(filing.accession_number);
|
||||
if (cached && cached.expiresAt > Date.now()) {
|
||||
return cached.value;
|
||||
}
|
||||
|
||||
try {
|
||||
const document = await fetchPrimaryFilingText({
|
||||
filingUrl: filing.filing_url,
|
||||
cik: filing.cik,
|
||||
accessionNumber: filing.accession_number,
|
||||
primaryDocument: filing.primary_document ?? null
|
||||
}, {
|
||||
maxChars: 40_000
|
||||
});
|
||||
const description = document ? extractBusinessDescription(document.text) : null;
|
||||
|
||||
descriptionCache.set(filing.accession_number, {
|
||||
value: description,
|
||||
expiresAt: Date.now() + DESCRIPTION_CACHE_TTL_MS
|
||||
});
|
||||
|
||||
return description;
|
||||
} catch {
|
||||
descriptionCache.set(filing.accession_number, {
|
||||
value: null,
|
||||
expiresAt: Date.now() + DESCRIPTION_CACHE_TTL_MS
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export const __secDescriptionInternals = {
|
||||
cleanupExtractedSection,
|
||||
clipAtSentenceBoundary,
|
||||
fallbackDescription
|
||||
};
|
||||
Reference in New Issue
Block a user