Prioritize SEC financials for 10-K/10-Q and keep other filings qualitative
This commit is contained in:
@@ -20,7 +20,7 @@ import {
|
||||
} from '@/lib/server/repos/holdings';
|
||||
import { createPortfolioInsight } from '@/lib/server/repos/insights';
|
||||
import {
|
||||
fetchFilingMetrics,
|
||||
fetchFilingMetricsForFilings,
|
||||
fetchPrimaryFilingText,
|
||||
fetchRecentFilings
|
||||
} from '@/lib/server/sec';
|
||||
@@ -31,11 +31,88 @@ const EXTRACTION_REQUIRED_KEYS = [
|
||||
'redFlags',
|
||||
'followUpQuestions',
|
||||
'portfolioSignals',
|
||||
'segmentSpecificData',
|
||||
'geographicRevenueBreakdown',
|
||||
'companySpecificData',
|
||||
'secApiCrossChecks',
|
||||
'confidence'
|
||||
] as const;
|
||||
const EXTRACTION_MAX_ITEMS = 6;
|
||||
const EXTRACTION_ITEM_MAX_LENGTH = 280;
|
||||
const EXTRACTION_SUMMARY_MAX_LENGTH = 900;
|
||||
const SEGMENT_PATTERNS = [
|
||||
/\boperating segment\b/i,
|
||||
/\bsegment revenue\b/i,
|
||||
/\bsegment margin\b/i,
|
||||
/\bsegment profit\b/i,
|
||||
/\bbusiness segment\b/i,
|
||||
/\breportable segment\b/i
|
||||
];
|
||||
const GEOGRAPHIC_PATTERNS = [
|
||||
/\bgeographic\b/i,
|
||||
/\bamericas\b/i,
|
||||
/\bemea\b/i,
|
||||
/\bapac\b/i,
|
||||
/\basia pacific\b/i,
|
||||
/\bnorth america\b/i,
|
||||
/\beurope\b/i,
|
||||
/\bchina\b/i,
|
||||
/\binternational\b/i
|
||||
];
|
||||
const COMPANY_SPECIFIC_PATTERNS = [
|
||||
/\bsame[- ]store\b/i,
|
||||
/\bcomparable[- ]store\b/i,
|
||||
/\bcomp sales\b/i,
|
||||
/\borganic sales\b/i,
|
||||
/\bbookings\b/i,
|
||||
/\bbacklog\b/i,
|
||||
/\barpu\b/i,
|
||||
/\bmau\b/i,
|
||||
/\bdau\b/i,
|
||||
/\bsubscriber\b/i,
|
||||
/\boccupancy\b/i,
|
||||
/\brevpar\b/i,
|
||||
/\bretention\b/i,
|
||||
/\bchurn\b/i
|
||||
];
|
||||
|
||||
type FilingMetricKey = keyof NonNullable<Filing['metrics']>;
|
||||
|
||||
const METRIC_CHECK_PATTERNS: Array<{
|
||||
key: FilingMetricKey;
|
||||
label: string;
|
||||
patterns: RegExp[];
|
||||
}> = [
|
||||
{
|
||||
key: 'revenue',
|
||||
label: 'Revenue',
|
||||
patterns: [/\brevenue\b/i, /\bsales\b/i]
|
||||
},
|
||||
{
|
||||
key: 'netIncome',
|
||||
label: 'Net income',
|
||||
patterns: [/\bnet income\b/i, /\bprofit\b/i]
|
||||
},
|
||||
{
|
||||
key: 'totalAssets',
|
||||
label: 'Total assets',
|
||||
patterns: [/\btotal assets\b/i, /\bassets\b/i]
|
||||
},
|
||||
{
|
||||
key: 'cash',
|
||||
label: 'Cash',
|
||||
patterns: [/\bcash\b/i, /\bcash equivalents\b/i]
|
||||
},
|
||||
{
|
||||
key: 'debt',
|
||||
label: 'Debt',
|
||||
patterns: [/\bdebt\b/i, /\bborrowings\b/i, /\bliabilit(?:y|ies)\b/i]
|
||||
}
|
||||
];
|
||||
|
||||
function isFinancialMetricsForm(form: Filing['filing_type']) {
|
||||
return form === '10-K' || form === '10-Q';
|
||||
}
|
||||
|
||||
function toTaskResult(value: unknown): Record<string, unknown> {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
@@ -99,6 +176,55 @@ function sanitizeExtractionList(value: unknown) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
function uniqueExtractionList(items: Array<string | null | undefined>) {
|
||||
const seen = new Set<string>();
|
||||
const unique: string[] = [];
|
||||
|
||||
for (const item of items) {
|
||||
const normalized = sanitizeExtractionText(item, EXTRACTION_ITEM_MAX_LENGTH);
|
||||
if (!normalized) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const signature = normalized.toLowerCase();
|
||||
if (seen.has(signature)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.add(signature);
|
||||
unique.push(normalized);
|
||||
|
||||
if (unique.length >= EXTRACTION_MAX_ITEMS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return unique;
|
||||
}
|
||||
|
||||
function collectTextSignals(filingText: string, patterns: RegExp[]) {
|
||||
const lines = filingText
|
||||
.replace(/\r/g, '\n')
|
||||
.split(/\n+/)
|
||||
.map((line) => line.replace(/\s+/g, ' ').trim())
|
||||
.filter((line) => line.length >= 24);
|
||||
|
||||
const matches: string[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
if (!patterns.some((pattern) => pattern.test(line))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
matches.push(line);
|
||||
if (matches.length >= EXTRACTION_MAX_ITEMS * 2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueExtractionList(matches);
|
||||
}
|
||||
|
||||
function parseExtractionPayload(raw: string): FilingExtraction | null {
|
||||
const fencedJson = raw.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
|
||||
const candidate = fencedJson ?? (() => {
|
||||
@@ -145,11 +271,26 @@ function parseExtractionPayload(raw: string): FilingExtraction | null {
|
||||
const redFlags = sanitizeExtractionList(payload.redFlags);
|
||||
const followUpQuestions = sanitizeExtractionList(payload.followUpQuestions);
|
||||
const portfolioSignals = sanitizeExtractionList(payload.portfolioSignals);
|
||||
const segmentSpecificData = sanitizeExtractionList(payload.segmentSpecificData);
|
||||
const geographicRevenueBreakdown = sanitizeExtractionList(payload.geographicRevenueBreakdown);
|
||||
const companySpecificData = sanitizeExtractionList(payload.companySpecificData);
|
||||
const secApiCrossChecks = sanitizeExtractionList(payload.secApiCrossChecks);
|
||||
const confidenceRaw = typeof payload.confidence === 'number'
|
||||
? payload.confidence
|
||||
: Number(payload.confidence);
|
||||
|
||||
if (!summary || !keyPoints || !redFlags || !followUpQuestions || !portfolioSignals || !Number.isFinite(confidenceRaw)) {
|
||||
if (
|
||||
!summary
|
||||
|| !keyPoints
|
||||
|| !redFlags
|
||||
|| !followUpQuestions
|
||||
|| !portfolioSignals
|
||||
|| !segmentSpecificData
|
||||
|| !geographicRevenueBreakdown
|
||||
|| !companySpecificData
|
||||
|| !secApiCrossChecks
|
||||
|| !Number.isFinite(confidenceRaw)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -159,6 +300,10 @@ function parseExtractionPayload(raw: string): FilingExtraction | null {
|
||||
redFlags,
|
||||
followUpQuestions,
|
||||
portfolioSignals,
|
||||
segmentSpecificData,
|
||||
geographicRevenueBreakdown,
|
||||
companySpecificData,
|
||||
secApiCrossChecks,
|
||||
confidence: Math.min(Math.max(confidenceRaw, 0), 1)
|
||||
};
|
||||
}
|
||||
@@ -171,11 +316,37 @@ function metricSnapshotLine(label: string, value: number | null | undefined) {
|
||||
return `${label}: ${Math.round(value).toLocaleString('en-US')}`;
|
||||
}
|
||||
|
||||
function buildSecApiCrossChecks(filing: Filing, filingText: string) {
|
||||
const normalizedText = filingText.toLowerCase();
|
||||
const checks: string[] = [];
|
||||
|
||||
for (const descriptor of METRIC_CHECK_PATTERNS) {
|
||||
const value = filing.metrics?.[descriptor.key];
|
||||
if (value === null || value === undefined || !Number.isFinite(value)) {
|
||||
checks.push(`${descriptor.label}: SEC API metric unavailable for this filing.`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const hasMention = descriptor.patterns.some((pattern) => pattern.test(normalizedText));
|
||||
if (hasMention) {
|
||||
checks.push(
|
||||
`${descriptor.label}: SEC API value ${Math.round(value).toLocaleString('en-US')} appears referenced in filing narrative.`
|
||||
);
|
||||
} else {
|
||||
checks.push(
|
||||
`${descriptor.label}: SEC API value ${Math.round(value).toLocaleString('en-US')} was not confidently located in sampled filing text.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueExtractionList(checks);
|
||||
}
|
||||
|
||||
function deterministicExtractionFallback(filing: Filing): FilingExtraction {
|
||||
const metrics = filing.metrics;
|
||||
|
||||
return {
|
||||
summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. Deterministic extraction fallback used due unavailable or invalid local parsing output.`,
|
||||
summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. Deterministic extraction fallback was used because filing text parsing was unavailable or invalid.`,
|
||||
keyPoints: [
|
||||
`${filing.filing_type} filing recorded for ${filing.ticker}.`,
|
||||
metricSnapshotLine('Revenue', metrics?.revenue),
|
||||
@@ -197,19 +368,101 @@ function deterministicExtractionFallback(filing: Filing): FilingExtraction {
|
||||
'Cross-check leverage and liquidity metrics against position sizing rules.',
|
||||
'Track language shifts around guidance or demand assumptions.'
|
||||
],
|
||||
segmentSpecificData: [
|
||||
'Segment-level disclosures were not parsed in deterministic fallback mode.'
|
||||
],
|
||||
geographicRevenueBreakdown: [
|
||||
'Geographic revenue disclosures were not parsed in deterministic fallback mode.'
|
||||
],
|
||||
companySpecificData: [
|
||||
'Company-specific operating KPIs (for example same-store sales) were not parsed in deterministic fallback mode.'
|
||||
],
|
||||
secApiCrossChecks: [
|
||||
`${metricSnapshotLine('Revenue', metrics?.revenue)} (SEC API baseline; text verification unavailable).`,
|
||||
`${metricSnapshotLine('Net income', metrics?.netIncome)} (SEC API baseline; text verification unavailable).`
|
||||
],
|
||||
confidence: 0.2
|
||||
};
|
||||
}
|
||||
|
||||
function buildRuleBasedExtraction(filing: Filing, filingText: string): FilingExtraction {
|
||||
const baseline = deterministicExtractionFallback(filing);
|
||||
const segmentSpecificData = collectTextSignals(filingText, SEGMENT_PATTERNS);
|
||||
const geographicRevenueBreakdown = collectTextSignals(filingText, GEOGRAPHIC_PATTERNS);
|
||||
const companySpecificData = collectTextSignals(filingText, COMPANY_SPECIFIC_PATTERNS);
|
||||
const secApiCrossChecks = buildSecApiCrossChecks(filing, filingText);
|
||||
|
||||
const segmentLead = segmentSpecificData[0] ? `Segment detail: ${segmentSpecificData[0]}` : null;
|
||||
const geographicLead = geographicRevenueBreakdown[0] ? `Geographic detail: ${geographicRevenueBreakdown[0]}` : null;
|
||||
const companyLead = companySpecificData[0] ? `Company-specific KPI: ${companySpecificData[0]}` : null;
|
||||
|
||||
return {
|
||||
summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. SEC API metrics were retained as the baseline and filing text was scanned for segment and company-specific disclosures.`,
|
||||
keyPoints: uniqueExtractionList([
|
||||
...baseline.keyPoints,
|
||||
segmentLead,
|
||||
geographicLead,
|
||||
companyLead
|
||||
]),
|
||||
redFlags: uniqueExtractionList([
|
||||
...baseline.redFlags,
|
||||
secApiCrossChecks.find((line) => /not confidently located/i.test(line))
|
||||
]),
|
||||
followUpQuestions: uniqueExtractionList([
|
||||
...baseline.followUpQuestions,
|
||||
segmentSpecificData.length > 0 ? 'How do segment trends change the consolidated margin outlook?' : 'Does management provide segment-level KPIs in supplemental exhibits?'
|
||||
]),
|
||||
portfolioSignals: uniqueExtractionList([
|
||||
...baseline.portfolioSignals,
|
||||
companySpecificData.length > 0 ? 'Incorporate company-specific KPI direction into near-term position sizing.' : 'Track future filings for explicit operating KPI disclosures.'
|
||||
]),
|
||||
segmentSpecificData: segmentSpecificData.length > 0
|
||||
? segmentSpecificData
|
||||
: baseline.segmentSpecificData,
|
||||
geographicRevenueBreakdown: geographicRevenueBreakdown.length > 0
|
||||
? geographicRevenueBreakdown
|
||||
: baseline.geographicRevenueBreakdown,
|
||||
companySpecificData: companySpecificData.length > 0
|
||||
? companySpecificData
|
||||
: baseline.companySpecificData,
|
||||
secApiCrossChecks: secApiCrossChecks.length > 0
|
||||
? secApiCrossChecks
|
||||
: baseline.secApiCrossChecks,
|
||||
confidence: segmentSpecificData.length + geographicRevenueBreakdown.length + companySpecificData.length > 0 ? 0.4 : 0.3
|
||||
};
|
||||
}
|
||||
|
||||
function preferExtractionList(primary: string[], fallback: string[]) {
|
||||
return primary.length > 0 ? primary : fallback;
|
||||
}
|
||||
|
||||
function mergeExtractionWithFallback(primary: FilingExtraction, fallback: FilingExtraction): FilingExtraction {
|
||||
return {
|
||||
summary: primary.summary || fallback.summary,
|
||||
keyPoints: preferExtractionList(primary.keyPoints, fallback.keyPoints),
|
||||
redFlags: preferExtractionList(primary.redFlags, fallback.redFlags),
|
||||
followUpQuestions: preferExtractionList(primary.followUpQuestions, fallback.followUpQuestions),
|
||||
portfolioSignals: preferExtractionList(primary.portfolioSignals, fallback.portfolioSignals),
|
||||
segmentSpecificData: preferExtractionList(primary.segmentSpecificData, fallback.segmentSpecificData),
|
||||
geographicRevenueBreakdown: preferExtractionList(primary.geographicRevenueBreakdown, fallback.geographicRevenueBreakdown),
|
||||
companySpecificData: preferExtractionList(primary.companySpecificData, fallback.companySpecificData),
|
||||
secApiCrossChecks: preferExtractionList(primary.secApiCrossChecks, fallback.secApiCrossChecks),
|
||||
confidence: Math.min(Math.max(primary.confidence, 0), 1)
|
||||
};
|
||||
}
|
||||
|
||||
function extractionPrompt(filing: Filing, filingText: string) {
|
||||
return [
|
||||
'Extract structured signals from the SEC filing text.',
|
||||
`Company: ${filing.company_name} (${filing.ticker})`,
|
||||
`Form: ${filing.filing_type}`,
|
||||
`Filed: ${filing.filing_date}`,
|
||||
`SEC API baseline metrics: ${JSON.stringify(filing.metrics ?? {})}`,
|
||||
'Use SEC API metrics as canonical numeric values and validate whether each appears consistent with filing text context.',
|
||||
'Prioritize company-specific and segment-specific disclosures not covered by SEC endpoint fields (for example same-store sales, geographic mix, segment margin).',
|
||||
'Return ONLY valid JSON with exactly these keys and no extra keys:',
|
||||
'{"summary":"string","keyPoints":["string"],"redFlags":["string"],"followUpQuestions":["string"],"portfolioSignals":["string"],"confidence":0}',
|
||||
`Rules: keyPoints/redFlags/followUpQuestions/portfolioSignals arrays max ${EXTRACTION_MAX_ITEMS} items; each item <= ${EXTRACTION_ITEM_MAX_LENGTH} chars; summary <= ${EXTRACTION_SUMMARY_MAX_LENGTH} chars; confidence between 0 and 1.`,
|
||||
'{"summary":"string","keyPoints":["string"],"redFlags":["string"],"followUpQuestions":["string"],"portfolioSignals":["string"],"segmentSpecificData":["string"],"geographicRevenueBreakdown":["string"],"companySpecificData":["string"],"secApiCrossChecks":["string"],"confidence":0}',
|
||||
`Rules: every array max ${EXTRACTION_MAX_ITEMS} items; each item <= ${EXTRACTION_ITEM_MAX_LENGTH} chars; summary <= ${EXTRACTION_SUMMARY_MAX_LENGTH} chars; confidence between 0 and 1.`,
|
||||
'Filing text follows:',
|
||||
filingText
|
||||
].join('\n\n');
|
||||
@@ -225,8 +478,9 @@ function reportPrompt(
|
||||
`Analyze this SEC filing from ${filing.company_name} (${filing.ticker}).`,
|
||||
`Form: ${filing.filing_type}`,
|
||||
`Filed: ${filing.filing_date}`,
|
||||
`Metrics: ${JSON.stringify(filing.metrics ?? {})}`,
|
||||
`SEC API baseline metrics: ${JSON.stringify(filing.metrics ?? {})}`,
|
||||
`Structured extraction context (${extractionMeta.source}): ${JSON.stringify(extraction)}`,
|
||||
'Use SEC API values as the baseline financials and explicitly reference segment/company-specific details from extraction.',
|
||||
'Return concise sections: Thesis, Red Flags, Follow-up Questions, Portfolio Impact.'
|
||||
].join('\n');
|
||||
}
|
||||
@@ -252,12 +506,37 @@ async function processSyncFilings(task: Task) {
|
||||
const ticker = parseTicker(task.payload.ticker);
|
||||
const limit = parseLimit(task.payload.limit, 20, 1, 50);
|
||||
const filings = await fetchRecentFilings(ticker, limit);
|
||||
const metricsByCik = new Map<string, Filing['metrics']>();
|
||||
const metricsByAccession = new Map<string, Filing['metrics']>();
|
||||
const filingsByCik = new Map<string, typeof filings>();
|
||||
|
||||
for (const filing of filings) {
|
||||
if (!metricsByCik.has(filing.cik)) {
|
||||
const metrics = await fetchFilingMetrics(filing.cik, filing.ticker);
|
||||
metricsByCik.set(filing.cik, metrics);
|
||||
const group = filingsByCik.get(filing.cik);
|
||||
if (group) {
|
||||
group.push(filing);
|
||||
continue;
|
||||
}
|
||||
|
||||
filingsByCik.set(filing.cik, [filing]);
|
||||
}
|
||||
|
||||
for (const [cik, filingsForCik] of filingsByCik) {
|
||||
const filingsForFinancialMetrics = filingsForCik.filter((filing) => isFinancialMetricsForm(filing.filingType));
|
||||
if (filingsForFinancialMetrics.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const metricsMap = await fetchFilingMetricsForFilings(
|
||||
cik,
|
||||
filingsForCik[0]?.ticker ?? ticker,
|
||||
filingsForFinancialMetrics.map((filing) => ({
|
||||
accessionNumber: filing.accessionNumber,
|
||||
filingDate: filing.filingDate,
|
||||
filingType: filing.filingType
|
||||
}))
|
||||
);
|
||||
|
||||
for (const [accessionNumber, metrics] of metricsMap.entries()) {
|
||||
metricsByAccession.set(accessionNumber, metrics);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -272,7 +551,7 @@ async function processSyncFilings(task: Task) {
|
||||
filing_url: filing.filingUrl,
|
||||
submission_url: filing.submissionUrl,
|
||||
primary_document: filing.primaryDocument,
|
||||
metrics: metricsByCik.get(filing.cik) ?? null,
|
||||
metrics: metricsByAccession.get(filing.accessionNumber) ?? null,
|
||||
links: filingLinks(filing)
|
||||
}))
|
||||
);
|
||||
@@ -341,6 +620,15 @@ async function processAnalyzeFiling(task: Task) {
|
||||
});
|
||||
|
||||
if (filingDocument?.text) {
|
||||
const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text);
|
||||
extraction = ruleBasedExtraction;
|
||||
extractionMeta = {
|
||||
provider: 'deterministic-fallback',
|
||||
model: 'filing-rule-based',
|
||||
source: filingDocument.source,
|
||||
generatedAt: new Date().toISOString()
|
||||
};
|
||||
|
||||
const extractionResult = await runAiAnalysis(
|
||||
extractionPrompt(filing, filingDocument.text),
|
||||
'Return strict JSON only.',
|
||||
@@ -349,7 +637,7 @@ async function processAnalyzeFiling(task: Task) {
|
||||
|
||||
const parsed = parseExtractionPayload(extractionResult.text);
|
||||
if (parsed) {
|
||||
extraction = parsed;
|
||||
extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction);
|
||||
extractionMeta = {
|
||||
provider: extractionResult.provider === 'local-fallback' ? 'deterministic-fallback' : 'ollama',
|
||||
model: extractionResult.model,
|
||||
@@ -360,6 +648,12 @@ async function processAnalyzeFiling(task: Task) {
|
||||
}
|
||||
} catch {
|
||||
extraction = defaultExtraction;
|
||||
extractionMeta = {
|
||||
provider: 'deterministic-fallback',
|
||||
model: 'metadata-fallback',
|
||||
source: 'metadata_fallback',
|
||||
generatedAt: new Date().toISOString()
|
||||
};
|
||||
}
|
||||
|
||||
const analysis = await runAiAnalysis(
|
||||
@@ -435,7 +729,8 @@ async function processPortfolioInsights(task: Task) {
|
||||
|
||||
export const __taskProcessorInternals = {
|
||||
parseExtractionPayload,
|
||||
deterministicExtractionFallback
|
||||
deterministicExtractionFallback,
|
||||
isFinancialMetricsForm
|
||||
};
|
||||
|
||||
export async function runTaskProcessor(task: Task) {
|
||||
|
||||
Reference in New Issue
Block a user