From 2a5b548d89024f6547e3e530c7819503842fd964 Mon Sep 17 00:00:00 2001 From: francy51 Date: Sun, 1 Mar 2026 00:37:47 -0500 Subject: [PATCH] Prioritize SEC financials for 10-K/10-Q and keep other filings qualitative --- app/analysis/page.tsx | 14 +- app/filings/page.tsx | 16 +- app/financials/page.tsx | 4 +- lib/server/api/app.ts | 22 +- lib/server/api/filing-redaction.test.ts | 4 + lib/server/db/schema.ts | 4 + lib/server/sec.test.ts | 108 +++++++ lib/server/sec.ts | 407 +++++++++++++++++------- lib/server/task-processors.test.ts | 18 ++ lib/server/task-processors.ts | 321 ++++++++++++++++++- lib/types.ts | 4 + 11 files changed, 773 insertions(+), 149 deletions(-) diff --git a/app/analysis/page.tsx b/app/analysis/page.tsx index a4c0835..cad8a9f 100644 --- a/app/analysis/page.tsx +++ b/app/analysis/page.tsx @@ -30,6 +30,10 @@ function formatShortDate(value: string) { return format(new Date(value), 'MMM yyyy'); } +function hasFinancialSnapshot(filingType: CompanyAnalysis['filings'][number]['filing_type']) { + return filingType === '10-K' || filingType === '10-Q'; +} + export default function AnalysisPage() { return ( Loading analysis desk...}> @@ -110,7 +114,7 @@ function AnalysisPageContent() { return ( void loadAnalysis(ticker)}> @@ -246,10 +250,10 @@ function AnalysisPageContent() { {analysis.filings.map((filing) => ( {format(new Date(filing.filing_date), 'MMM dd, yyyy')} - {filing.filing_type} - {filing.metrics?.revenue ? formatCompactCurrency(filing.metrics.revenue) : 'n/a'} - {filing.metrics?.netIncome ? formatCompactCurrency(filing.metrics.netIncome) : 'n/a'} - {filing.metrics?.totalAssets ? formatCompactCurrency(filing.metrics.totalAssets) : 'n/a'} + {filing.filing_type}{hasFinancialSnapshot(filing.filing_type) ? '' : ' (Qualitative)'} + {hasFinancialSnapshot(filing.filing_type) ? (filing.metrics?.revenue ? formatCompactCurrency(filing.metrics.revenue) : 'n/a') : 'qualitative only'} + {hasFinancialSnapshot(filing.filing_type) ? (filing.metrics?.netIncome ? formatCompactCurrency(filing.metrics.netIncome) : 'n/a') : 'qualitative only'} + {hasFinancialSnapshot(filing.filing_type) ? (filing.metrics?.totalAssets ? formatCompactCurrency(filing.metrics.totalAssets) : 'n/a') : 'qualitative only'} {filing.filing_url ? ( diff --git a/app/filings/page.tsx b/app/filings/page.tsx index 6d90c4c..f1ef90d 100644 --- a/app/filings/page.tsx +++ b/app/filings/page.tsx @@ -35,6 +35,10 @@ function formatFilingDate(value: string) { return format(date, 'MMM dd, yyyy'); } +function hasFinancialSnapshot(filing: Filing) { + return filing.filing_type === '10-K' || filing.filing_type === '10-Q'; +} + function resolveOriginalFilingUrl(filing: Filing) { if (filing.filing_url) { return filing.filing_url; @@ -171,7 +175,7 @@ function FilingsPageContent() { return ( void loadFilings(searchTicker || undefined)}> @@ -254,6 +258,7 @@ function FilingsPageContent() {
{filings.map((filing) => { + const financialForm = hasFinancialSnapshot(filing); const revenue = filing.metrics?.revenue; const hasAnalysis = Boolean(filing.analysis?.text || filing.analysis?.legacyInsights); const originalFilingUrl = resolveOriginalFilingUrl(filing); @@ -275,8 +280,10 @@ function FilingsPageContent() {
-
Revenue Snapshot
-
{revenue ? formatCompactCurrency(revenue) : 'n/a'}
+
Financial Snapshot
+
+ {financialForm ? (revenue ? formatCompactCurrency(revenue) : 'n/a') : 'Qualitative filing'} +
Accession
@@ -331,6 +338,7 @@ function FilingsPageContent() { {filings.map((filing) => { + const financialForm = hasFinancialSnapshot(filing); const revenue = filing.metrics?.revenue; const hasAnalysis = Boolean(filing.analysis?.text || filing.analysis?.legacyInsights); const originalFilingUrl = resolveOriginalFilingUrl(filing); @@ -343,7 +351,7 @@ function FilingsPageContent() { {filing.filing_type} {formatFilingDate(filing.filing_date)} - {revenue ? formatCompactCurrency(revenue) : 'n/a'} + {financialForm ? (revenue ? formatCompactCurrency(revenue) : 'n/a') : 'Qualitative filing'} {filing.company_name} {hasAnalysis ? 'Ready' : 'Not generated'} diff --git a/app/financials/page.tsx b/app/financials/page.tsx index f46f780..cdaf77e 100644 --- a/app/financials/page.tsx +++ b/app/financials/page.tsx @@ -220,7 +220,7 @@ function FinancialsPageContent() { return ( void loadFinancials(ticker)}> @@ -228,7 +228,7 @@ function FinancialsPageContent() { )} > - +
{ diff --git a/lib/server/api/app.ts b/lib/server/api/app.ts index 392b582..3fa64f0 100644 --- a/lib/server/api/app.ts +++ b/lib/server/api/app.ts @@ -1,5 +1,5 @@ import { Elysia, t } from 'elysia'; -import type { TaskStatus } from '@/lib/types'; +import type { Filing, TaskStatus } from '@/lib/types'; import { auth } from '@/lib/auth'; import { requireAuthenticatedSession } from '@/lib/server/auth-session'; import { asErrorMessage, jsonError } from '@/lib/server/http'; @@ -27,6 +27,7 @@ import { } from '@/lib/server/tasks'; const ALLOWED_STATUSES: TaskStatus[] = ['queued', 'running', 'completed', 'failed']; +const FINANCIAL_FORMS: ReadonlySet = new Set(['10-K', '10-Q']); function asRecord(value: unknown): Record { if (!value || typeof value !== 'object' || Array.isArray(value)) { @@ -41,6 +42,17 @@ function asPositiveNumber(value: unknown) { return Number.isFinite(parsed) && parsed > 0 ? parsed : null; } +function withFinancialMetricsPolicy(filing: Filing): Filing { + if (FINANCIAL_FORMS.has(filing.filing_type)) { + return filing; + } + + return { + ...filing, + metrics: null + }; +} + const authHandler = ({ request }: { request: Request }) => auth.handler(request); export const app = new Elysia({ prefix: '/api' }) @@ -333,7 +345,9 @@ export const app = new Elysia({ prefix: '/api' }) getQuote(ticker), getPriceHistory(ticker) ]); - const redactedFilings = filings.map(redactInternalFilingAnalysisFields); + const redactedFilings = filings + .map(redactInternalFilingAnalysisFields) + .map(withFinancialMetricsPolicy); const latestFiling = redactedFilings[0] ?? null; const holding = holdings.find((entry) => entry.ticker === ticker) ?? null; @@ -344,7 +358,7 @@ export const app = new Elysia({ prefix: '/api' }) ?? ticker; const financials = redactedFilings - .filter((entry) => entry.metrics) + .filter((entry) => entry.metrics && FINANCIAL_FORMS.has(entry.filing_type)) .map((entry) => ({ filingDate: entry.filing_date, filingType: entry.filing_type, @@ -448,7 +462,7 @@ export const app = new Elysia({ prefix: '/api' }) limit: Number.isFinite(limit) ? limit : 50 }); - return Response.json({ filings: filings.map(redactInternalFilingAnalysisFields) }); + return Response.json({ filings: filings.map(redactInternalFilingAnalysisFields).map(withFinancialMetricsPolicy) }); }, { query: t.Object({ ticker: t.Optional(t.String()), diff --git a/lib/server/api/filing-redaction.test.ts b/lib/server/api/filing-redaction.test.ts index 1567d45..2d6032e 100644 --- a/lib/server/api/filing-redaction.test.ts +++ b/lib/server/api/filing-redaction.test.ts @@ -25,6 +25,10 @@ function filingWithExtraction(): Filing { redFlags: ['b'], followUpQuestions: ['c'], portfolioSignals: ['d'], + segmentSpecificData: ['e'], + geographicRevenueBreakdown: ['f'], + companySpecificData: ['g'], + secApiCrossChecks: ['h'], confidence: 0.4 }, extractionMeta: { diff --git a/lib/server/db/schema.ts b/lib/server/db/schema.ts index cfafb77..d450b63 100644 --- a/lib/server/db/schema.ts +++ b/lib/server/db/schema.ts @@ -26,6 +26,10 @@ type FilingAnalysis = { redFlags: string[]; followUpQuestions: string[]; portfolioSignals: string[]; + segmentSpecificData: string[]; + geographicRevenueBreakdown: string[]; + companySpecificData: string[]; + secApiCrossChecks: string[]; confidence: number; }; extractionMeta?: { diff --git a/lib/server/sec.test.ts b/lib/server/sec.test.ts index 6fbc1e3..8fa3c75 100644 --- a/lib/server/sec.test.ts +++ b/lib/server/sec.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it, mock } from 'bun:test'; import { + fetchFilingMetricsForFilings, fetchPrimaryFilingText, normalizeSecDocumentText, resolvePrimaryFilingUrl, @@ -81,4 +82,111 @@ describe('sec filing text helpers', () => { expect(result?.truncated).toBe(true); expect(result?.text.length).toBeLessThanOrEqual(1_000); }); + + it('maps SEC companyfacts metrics to each filing by accession', async () => { + const fetchMock = mock(async (_input: RequestInfo | URL, _init?: RequestInit) => { + return new Response(JSON.stringify({ + facts: { + 'us-gaap': { + Revenues: { + units: { + USD: [ + { accn: '0000320193-25-000010', val: 101_000, filed: '2025-11-01', form: '10-Q' }, + { accn: '0000320193-25-000020', val: 111_000, filed: '2026-02-01', form: '10-Q' } + ] + } + }, + NetIncomeLoss: { + units: { + USD: [ + { accn: '0000320193-25-000010', val: 21_000, filed: '2025-11-01', form: '10-Q' }, + { accn: '0000320193-25-000020', val: 25_000, filed: '2026-02-01', form: '10-Q' } + ] + } + }, + Assets: { + units: { + USD: [ + { accn: '0000320193-25-000010', val: 405_000, filed: '2025-11-01', form: '10-Q' }, + { accn: '0000320193-25-000020', val: 410_000, filed: '2026-02-01', form: '10-Q' } + ] + } + }, + CashAndCashEquivalentsAtCarryingValue: { + units: { + USD: [ + { accn: '0000320193-25-000010', val: 65_000, filed: '2025-11-01', form: '10-Q' }, + { accn: '0000320193-25-000020', val: 70_000, filed: '2026-02-01', form: '10-Q' } + ] + } + }, + LongTermDebt: { + units: { + USD: [ + { accn: '0000320193-25-000010', val: 95_000, filed: '2025-11-01', form: '10-Q' }, + { accn: '0000320193-25-000020', val: 98_000, filed: '2026-02-01', form: '10-Q' } + ] + } + } + } + } + }), { status: 200 }); + }) as unknown as typeof fetch; + + const originalFetch = globalThis.fetch; + globalThis.fetch = fetchMock; + + try { + const map = await fetchFilingMetricsForFilings('0000320193', 'AAPL', [ + { + accessionNumber: '0000320193-25-000010', + filingDate: '2025-11-01', + filingType: '10-Q' + }, + { + accessionNumber: '0000320193-25-000020', + filingDate: '2026-02-01', + filingType: '10-Q' + } + ]); + + expect(fetchMock).toHaveBeenCalledTimes(1); + expect(map.get('0000320193-25-000010')?.revenue).toBe(101_000); + expect(map.get('0000320193-25-000010')?.netIncome).toBe(21_000); + expect(map.get('0000320193-25-000020')?.revenue).toBe(111_000); + expect(map.get('0000320193-25-000020')?.cash).toBe(70_000); + } finally { + globalThis.fetch = originalFetch; + } + }); + + it('returns null-valued metrics when companyfacts lookup fails', async () => { + const fetchMock = mock(async (_input: RequestInfo | URL, _init?: RequestInit) => { + return new Response('error', { status: 500 }); + }) as unknown as typeof fetch; + + const originalFetch = globalThis.fetch; + globalThis.fetch = fetchMock; + + try { + const map = await fetchFilingMetricsForFilings('0000320193', 'AAPL', [ + { + accessionNumber: '0000320193-25-000010', + filingDate: '2025-11-01', + filingType: '10-Q' + } + ]); + + expect(fetchMock).toHaveBeenCalledTimes(1); + expect(map.get('0000320193-25-000010')).toEqual({ + revenue: null, + netIncome: null, + totalAssets: null, + cash: null, + debt: null + }); + } finally { + globalThis.fetch = originalFetch; + } + }); }); diff --git a/lib/server/sec.ts b/lib/server/sec.ts index 437b149..00c7e1e 100644 --- a/lib/server/sec.ts +++ b/lib/server/sec.ts @@ -1,6 +1,7 @@ import type { Filing } from '@/lib/types'; type FilingType = Filing['filing_type']; +type FilingMetrics = NonNullable; type TickerDirectoryRecord = { cik_str: number; @@ -23,10 +24,21 @@ type RecentFilingsPayload = { type CompanyFactsPayload = { facts?: { - 'us-gaap'?: Record> }>; + 'us-gaap'?: Record }>; }; }; +type CompanyFactPoint = { + val?: number; + end?: string; + filed?: string; + accn?: string; + form?: string; + fy?: number; + fp?: string; + frame?: string; +}; + type SecFiling = { ticker: string; cik: string; @@ -58,9 +70,35 @@ export type FilingDocumentText = { truncated: boolean; }; +type FilingMetricsLookupInput = { + accessionNumber: string; + filingDate: string; + filingType: FilingType; +}; + const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K']; const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12; const FILING_TEXT_MAX_CHARS = 24_000; +const METRIC_TAGS = { + revenue: [ + 'Revenues', + 'SalesRevenueNet', + 'RevenueFromContractWithCustomerExcludingAssessedTax', + 'TotalRevenuesAndOtherIncome' + ], + netIncome: ['NetIncomeLoss', 'ProfitLoss'], + totalAssets: ['Assets'], + cash: [ + 'CashAndCashEquivalentsAtCarryingValue', + 'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents' + ], + debt: [ + 'LongTermDebtAndCapitalLeaseObligations', + 'LongTermDebtNoncurrent', + 'LongTermDebt', + 'DebtAndFinanceLeaseLiabilities' + ] +} as const; let tickerCache = new Map(); let tickerCacheLoadedAt = 0; @@ -140,6 +178,30 @@ function compactAccessionNumber(value: string) { return value.replace(/-/g, ''); } +function normalizeAccessionKey(value: string | undefined | null) { + return (value ?? '').replace(/\D/g, ''); +} + +function normalizeForm(value: string | undefined | null) { + const normalized = (value ?? '').trim().toUpperCase(); + + if (!normalized) { + return ''; + } + + return normalized.endsWith('/A') + ? normalized.slice(0, -2) + : normalized; +} + +function parseDate(value: string | undefined | null) { + if (!value) { + return Number.NaN; + } + + return Date.parse(value); +} + function normalizeCikForPath(value: string) { const digits = value.replace(/\D/g, ''); if (!digits) { @@ -214,42 +276,6 @@ export async function fetchPrimaryFilingText( }; } -function pseudoMetric(seed: string, min: number, max: number) { - let hash = 0; - for (const char of seed) { - hash = (hash * 33 + char.charCodeAt(0)) % 100000; - } - - const fraction = (hash % 10000) / 10000; - return min + (max - min) * fraction; -} - -function fallbackFilings(ticker: string, limit: number): SecFiling[] { - const normalized = ticker.trim().toUpperCase(); - const companyName = `${normalized} Holdings Inc.`; - const filings: SecFiling[] = []; - - for (let i = 0; i < limit; i += 1) { - const filingType = SUPPORTED_FORMS[i % SUPPORTED_FORMS.length]; - const date = new Date(Date.now() - i * 1000 * 60 * 60 * 24 * 35).toISOString().slice(0, 10); - const accessionNumber = `${Date.now()}-${i}`; - - filings.push({ - ticker: normalized, - cik: String(100000 + i), - companyName, - filingType, - filingDate: date, - accessionNumber, - filingUrl: null, - submissionUrl: null, - primaryDocument: null - }); - } - - return filings; -} - async function fetchJson(url: string): Promise { const response = await fetch(url, { headers: { @@ -301,116 +327,255 @@ async function resolveTicker(ticker: string) { } function pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null { - const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units; + return pickFactForFiling(payload, tag, { + accessionNumber: '', + filingDate: '', + filingType: '10-Q' + }); +} +function collectFactSeries(payload: CompanyFactsPayload, tag: string): CompanyFactPoint[] { + const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units; if (!unitCollections) { - return null; + return []; } - const preferredUnits = ['USD', 'USD/shares']; + const usdSeries: CompanyFactPoint[] = []; + const fallbackSeries: CompanyFactPoint[] = []; - for (const unit of preferredUnits) { - const series = unitCollections[unit]; - if (!series?.length) { + for (const [unit, series] of Object.entries(unitCollections)) { + if (!Array.isArray(series) || series.length === 0) { continue; } - const best = [...series] - .filter((item) => typeof item.val === 'number') - .sort((a, b) => { - const aDate = Date.parse(a.filed ?? a.end ?? '1970-01-01'); - const bDate = Date.parse(b.filed ?? b.end ?? '1970-01-01'); - return bDate - aDate; - })[0]; + if (unit === 'USD' || /^USD(?!\/shares)/i.test(unit)) { + usdSeries.push(...series); + continue; + } - if (best?.val !== undefined) { - return best.val; + fallbackSeries.push(...series); + } + + const points = usdSeries.length > 0 ? usdSeries : fallbackSeries; + + return points.filter((point) => typeof point.val === 'number' && Number.isFinite(point.val)); +} + +function pickMostRecentFact(points: CompanyFactPoint[]) { + return [...points].sort((a, b) => { + const aDate = parseDate(a.filed ?? a.end); + const bDate = parseDate(b.filed ?? b.end); + + if (Number.isFinite(aDate) && Number.isFinite(bDate)) { + return bDate - aDate; + } + + if (Number.isFinite(bDate)) { + return 1; + } + + if (Number.isFinite(aDate)) { + return -1; + } + + return 0; + })[0] ?? null; +} + +function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) { + if (points.length === 0) { + return null; + } + + if (!Number.isFinite(targetDate)) { + return pickMostRecentFact(points); + } + + const dated = points + .map((point) => ({ point, date: parseDate(point.filed ?? point.end) })) + .filter((entry) => Number.isFinite(entry.date)); + + if (dated.length === 0) { + return pickMostRecentFact(points); + } + + const beforeTarget = dated.filter((entry) => entry.date <= targetDate); + if (beforeTarget.length > 0) { + return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null; + } + + return dated.sort((a, b) => { + const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate); + if (distance !== 0) { + return distance; + } + + return b.date - a.date; + })[0]?.point ?? null; +} + +function pickFactForFiling( + payload: CompanyFactsPayload, + tag: string, + filing: FilingMetricsLookupInput +): number | null { + const points = collectFactSeries(payload, tag); + if (points.length === 0) { + return null; + } + + const accessionKey = normalizeAccessionKey(filing.accessionNumber); + if (accessionKey) { + const byAccession = points.filter((point) => normalizeAccessionKey(point.accn) === accessionKey); + if (byAccession.length > 0) { + const matched = pickMostRecentFact(byAccession); + if (typeof matched?.val === 'number' && Number.isFinite(matched.val)) { + return matched.val; + } + } + } + + const filingForm = normalizeForm(filing.filingType); + const byForm = filingForm + ? points.filter((point) => normalizeForm(point.form) === filingForm) + : points; + + const targetDate = parseDate(filing.filingDate); + const bestByForm = pickClosestByDate(byForm, targetDate); + if (typeof bestByForm?.val === 'number' && Number.isFinite(bestByForm.val)) { + return bestByForm.val; + } + + const bestAny = pickClosestByDate(points, targetDate); + return typeof bestAny?.val === 'number' && Number.isFinite(bestAny.val) + ? bestAny.val + : null; +} + +function pickFactByTags( + payload: CompanyFactsPayload, + tags: readonly string[], + filing: FilingMetricsLookupInput +) { + for (const tag of tags) { + const value = pickFactForFiling(payload, tag, filing); + if (value !== null) { + return value; } } return null; } +function emptyMetrics(): FilingMetrics { + return { + revenue: null, + netIncome: null, + totalAssets: null, + cash: null, + debt: null + }; +} + export async function fetchRecentFilings(ticker: string, limit = 20): Promise { const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50); - try { - const company = await resolveTicker(ticker); - const cikPadded = company.cik.padStart(10, '0'); - const payload = await fetchJson(`https://data.sec.gov/submissions/CIK${cikPadded}.json`); - const recent = payload.filings?.recent; - const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`; + const company = await resolveTicker(ticker); + const cikPadded = company.cik.padStart(10, '0'); + const payload = await fetchJson(`https://data.sec.gov/submissions/CIK${cikPadded}.json`); + const recent = payload.filings?.recent; + const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`; - if (!recent) { - return fallbackFilings(company.ticker, safeLimit); - } - - const forms = recent.form ?? []; - const accessionNumbers = recent.accessionNumber ?? []; - const filingDates = recent.filingDate ?? []; - const primaryDocuments = recent.primaryDocument ?? []; - const filings: SecFiling[] = []; - - for (let i = 0; i < forms.length; i += 1) { - const filingType = forms[i] as FilingType; - - if (!SUPPORTED_FORMS.includes(filingType)) { - continue; - } - - const accessionNumber = accessionNumbers[i]; - if (!accessionNumber) { - continue; - } - - const compactAccession = accessionNumber.replace(/-/g, ''); - const documentName = primaryDocuments[i]; - const filingUrl = documentName - ? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}` - : null; - - filings.push({ - ticker: company.ticker, - cik: company.cik, - companyName: payload.name ?? company.companyName, - filingType, - filingDate: filingDates[i] ?? todayIso(), - accessionNumber, - filingUrl, - submissionUrl, - primaryDocument: documentName ?? null - }); - - if (filings.length >= safeLimit) { - break; - } - } - - return filings.length > 0 ? filings : fallbackFilings(company.ticker, safeLimit); - } catch { - return fallbackFilings(ticker, safeLimit); + if (!recent) { + return []; } + + const forms = recent.form ?? []; + const accessionNumbers = recent.accessionNumber ?? []; + const filingDates = recent.filingDate ?? []; + const primaryDocuments = recent.primaryDocument ?? []; + const filings: SecFiling[] = []; + + for (let i = 0; i < forms.length; i += 1) { + const normalizedForm = normalizeForm(forms[i]) as FilingType; + if (!SUPPORTED_FORMS.includes(normalizedForm)) { + continue; + } + + const accessionNumber = accessionNumbers[i]; + if (!accessionNumber) { + continue; + } + + const compactAccession = accessionNumber.replace(/-/g, ''); + const documentName = primaryDocuments[i]; + const filingUrl = documentName + ? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}` + : null; + + filings.push({ + ticker: company.ticker, + cik: company.cik, + companyName: payload.name ?? company.companyName, + filingType: normalizedForm, + filingDate: filingDates[i] ?? todayIso(), + accessionNumber, + filingUrl, + submissionUrl, + primaryDocument: documentName ?? null + }); + + if (filings.length >= safeLimit) { + break; + } + } + + return filings; } -export async function fetchFilingMetrics(cik: string, ticker: string) { +export async function fetchLatestFilingMetrics(cik: string) { + const normalized = cik.padStart(10, '0'); + const payload = await fetchJson(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`); + + return { + revenue: pickLatestFact(payload, 'Revenues'), + netIncome: pickLatestFact(payload, 'NetIncomeLoss'), + totalAssets: pickLatestFact(payload, 'Assets'), + cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'), + debt: pickLatestFact(payload, 'LongTermDebt') + } satisfies FilingMetrics; +} + +export async function fetchFilingMetricsForFilings( + cik: string, + _ticker: string, + filings: FilingMetricsLookupInput[] +) { + const metricsByAccession = new Map(); + if (filings.length === 0) { + return metricsByAccession; + } + try { const normalized = cik.padStart(10, '0'); const payload = await fetchJson(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`); - return { - revenue: pickLatestFact(payload, 'Revenues'), - netIncome: pickLatestFact(payload, 'NetIncomeLoss'), - totalAssets: pickLatestFact(payload, 'Assets'), - cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'), - debt: pickLatestFact(payload, 'LongTermDebt') - }; + for (const filing of filings) { + metricsByAccession.set(filing.accessionNumber, { + revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing), + netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing), + totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing), + cash: pickFactByTags(payload, METRIC_TAGS.cash, filing), + debt: pickFactByTags(payload, METRIC_TAGS.debt, filing) + }); + } + + return metricsByAccession; } catch { - return { - revenue: Math.round(pseudoMetric(`${ticker}-revenue`, 2_000_000_000, 350_000_000_000)), - netIncome: Math.round(pseudoMetric(`${ticker}-net`, 150_000_000, 40_000_000_000)), - totalAssets: Math.round(pseudoMetric(`${ticker}-assets`, 4_000_000_000, 500_000_000_000)), - cash: Math.round(pseudoMetric(`${ticker}-cash`, 200_000_000, 180_000_000_000)), - debt: Math.round(pseudoMetric(`${ticker}-debt`, 300_000_000, 220_000_000_000)) - }; + for (const filing of filings) { + metricsByAccession.set(filing.accessionNumber, emptyMetrics()); + } + + return metricsByAccession; } } diff --git a/lib/server/task-processors.test.ts b/lib/server/task-processors.test.ts index d95eee4..6eed37a 100644 --- a/lib/server/task-processors.test.ts +++ b/lib/server/task-processors.test.ts @@ -35,6 +35,10 @@ describe('task processor extraction helpers', () => { redFlags: ['Debt service burden is rising'], followUpQuestions: ['Is margin guidance sustainable?'], portfolioSignals: ['Monitor leverage trend'], + segmentSpecificData: ['Services segment outgrew hardware segment.'], + geographicRevenueBreakdown: ['EMEA revenue grew faster than Americas.'], + companySpecificData: ['Same-store sales increased 4.2%.'], + secApiCrossChecks: ['Revenue from SEC API aligns with filing narrative.'], confidence: 0.72 }); @@ -52,6 +56,10 @@ describe('task processor extraction helpers', () => { redFlags: [], followUpQuestions: [], portfolioSignals: [], + segmentSpecificData: [], + geographicRevenueBreakdown: [], + companySpecificData: [], + secApiCrossChecks: [], confidence: 0.2, extra: 'not-allowed' }); @@ -66,6 +74,16 @@ describe('task processor extraction helpers', () => { expect(fallback.summary).toContain('Deterministic extraction fallback'); expect(fallback.keyPoints.length).toBeGreaterThan(0); expect(fallback.redFlags.length).toBeGreaterThan(0); + expect(fallback.segmentSpecificData.length).toBeGreaterThan(0); + expect(fallback.geographicRevenueBreakdown.length).toBeGreaterThan(0); + expect(fallback.companySpecificData.length).toBeGreaterThan(0); + expect(fallback.secApiCrossChecks.length).toBeGreaterThan(0); expect(fallback.confidence).toBe(0.2); }); + + it('treats only 10-K and 10-Q as financial metric filings', () => { + expect(__taskProcessorInternals.isFinancialMetricsForm('10-K')).toBe(true); + expect(__taskProcessorInternals.isFinancialMetricsForm('10-Q')).toBe(true); + expect(__taskProcessorInternals.isFinancialMetricsForm('8-K')).toBe(false); + }); }); diff --git a/lib/server/task-processors.ts b/lib/server/task-processors.ts index 11a5af9..f1ce8b9 100644 --- a/lib/server/task-processors.ts +++ b/lib/server/task-processors.ts @@ -20,7 +20,7 @@ import { } from '@/lib/server/repos/holdings'; import { createPortfolioInsight } from '@/lib/server/repos/insights'; import { - fetchFilingMetrics, + fetchFilingMetricsForFilings, fetchPrimaryFilingText, fetchRecentFilings } from '@/lib/server/sec'; @@ -31,11 +31,88 @@ const EXTRACTION_REQUIRED_KEYS = [ 'redFlags', 'followUpQuestions', 'portfolioSignals', + 'segmentSpecificData', + 'geographicRevenueBreakdown', + 'companySpecificData', + 'secApiCrossChecks', 'confidence' ] as const; const EXTRACTION_MAX_ITEMS = 6; const EXTRACTION_ITEM_MAX_LENGTH = 280; const EXTRACTION_SUMMARY_MAX_LENGTH = 900; +const SEGMENT_PATTERNS = [ + /\boperating segment\b/i, + /\bsegment revenue\b/i, + /\bsegment margin\b/i, + /\bsegment profit\b/i, + /\bbusiness segment\b/i, + /\breportable segment\b/i +]; +const GEOGRAPHIC_PATTERNS = [ + /\bgeographic\b/i, + /\bamericas\b/i, + /\bemea\b/i, + /\bapac\b/i, + /\basia pacific\b/i, + /\bnorth america\b/i, + /\beurope\b/i, + /\bchina\b/i, + /\binternational\b/i +]; +const COMPANY_SPECIFIC_PATTERNS = [ + /\bsame[- ]store\b/i, + /\bcomparable[- ]store\b/i, + /\bcomp sales\b/i, + /\borganic sales\b/i, + /\bbookings\b/i, + /\bbacklog\b/i, + /\barpu\b/i, + /\bmau\b/i, + /\bdau\b/i, + /\bsubscriber\b/i, + /\boccupancy\b/i, + /\brevpar\b/i, + /\bretention\b/i, + /\bchurn\b/i +]; + +type FilingMetricKey = keyof NonNullable; + +const METRIC_CHECK_PATTERNS: Array<{ + key: FilingMetricKey; + label: string; + patterns: RegExp[]; +}> = [ + { + key: 'revenue', + label: 'Revenue', + patterns: [/\brevenue\b/i, /\bsales\b/i] + }, + { + key: 'netIncome', + label: 'Net income', + patterns: [/\bnet income\b/i, /\bprofit\b/i] + }, + { + key: 'totalAssets', + label: 'Total assets', + patterns: [/\btotal assets\b/i, /\bassets\b/i] + }, + { + key: 'cash', + label: 'Cash', + patterns: [/\bcash\b/i, /\bcash equivalents\b/i] + }, + { + key: 'debt', + label: 'Debt', + patterns: [/\bdebt\b/i, /\bborrowings\b/i, /\bliabilit(?:y|ies)\b/i] + } +]; + +function isFinancialMetricsForm(form: Filing['filing_type']) { + return form === '10-K' || form === '10-Q'; +} function toTaskResult(value: unknown): Record { if (!value || typeof value !== 'object' || Array.isArray(value)) { @@ -99,6 +176,55 @@ function sanitizeExtractionList(value: unknown) { return cleaned; } +function uniqueExtractionList(items: Array) { + const seen = new Set(); + const unique: string[] = []; + + for (const item of items) { + const normalized = sanitizeExtractionText(item, EXTRACTION_ITEM_MAX_LENGTH); + if (!normalized) { + continue; + } + + const signature = normalized.toLowerCase(); + if (seen.has(signature)) { + continue; + } + + seen.add(signature); + unique.push(normalized); + + if (unique.length >= EXTRACTION_MAX_ITEMS) { + break; + } + } + + return unique; +} + +function collectTextSignals(filingText: string, patterns: RegExp[]) { + const lines = filingText + .replace(/\r/g, '\n') + .split(/\n+/) + .map((line) => line.replace(/\s+/g, ' ').trim()) + .filter((line) => line.length >= 24); + + const matches: string[] = []; + + for (const line of lines) { + if (!patterns.some((pattern) => pattern.test(line))) { + continue; + } + + matches.push(line); + if (matches.length >= EXTRACTION_MAX_ITEMS * 2) { + break; + } + } + + return uniqueExtractionList(matches); +} + function parseExtractionPayload(raw: string): FilingExtraction | null { const fencedJson = raw.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1]; const candidate = fencedJson ?? (() => { @@ -145,11 +271,26 @@ function parseExtractionPayload(raw: string): FilingExtraction | null { const redFlags = sanitizeExtractionList(payload.redFlags); const followUpQuestions = sanitizeExtractionList(payload.followUpQuestions); const portfolioSignals = sanitizeExtractionList(payload.portfolioSignals); + const segmentSpecificData = sanitizeExtractionList(payload.segmentSpecificData); + const geographicRevenueBreakdown = sanitizeExtractionList(payload.geographicRevenueBreakdown); + const companySpecificData = sanitizeExtractionList(payload.companySpecificData); + const secApiCrossChecks = sanitizeExtractionList(payload.secApiCrossChecks); const confidenceRaw = typeof payload.confidence === 'number' ? payload.confidence : Number(payload.confidence); - if (!summary || !keyPoints || !redFlags || !followUpQuestions || !portfolioSignals || !Number.isFinite(confidenceRaw)) { + if ( + !summary + || !keyPoints + || !redFlags + || !followUpQuestions + || !portfolioSignals + || !segmentSpecificData + || !geographicRevenueBreakdown + || !companySpecificData + || !secApiCrossChecks + || !Number.isFinite(confidenceRaw) + ) { return null; } @@ -159,6 +300,10 @@ function parseExtractionPayload(raw: string): FilingExtraction | null { redFlags, followUpQuestions, portfolioSignals, + segmentSpecificData, + geographicRevenueBreakdown, + companySpecificData, + secApiCrossChecks, confidence: Math.min(Math.max(confidenceRaw, 0), 1) }; } @@ -171,11 +316,37 @@ function metricSnapshotLine(label: string, value: number | null | undefined) { return `${label}: ${Math.round(value).toLocaleString('en-US')}`; } +function buildSecApiCrossChecks(filing: Filing, filingText: string) { + const normalizedText = filingText.toLowerCase(); + const checks: string[] = []; + + for (const descriptor of METRIC_CHECK_PATTERNS) { + const value = filing.metrics?.[descriptor.key]; + if (value === null || value === undefined || !Number.isFinite(value)) { + checks.push(`${descriptor.label}: SEC API metric unavailable for this filing.`); + continue; + } + + const hasMention = descriptor.patterns.some((pattern) => pattern.test(normalizedText)); + if (hasMention) { + checks.push( + `${descriptor.label}: SEC API value ${Math.round(value).toLocaleString('en-US')} appears referenced in filing narrative.` + ); + } else { + checks.push( + `${descriptor.label}: SEC API value ${Math.round(value).toLocaleString('en-US')} was not confidently located in sampled filing text.` + ); + } + } + + return uniqueExtractionList(checks); +} + function deterministicExtractionFallback(filing: Filing): FilingExtraction { const metrics = filing.metrics; return { - summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. Deterministic extraction fallback used due unavailable or invalid local parsing output.`, + summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. Deterministic extraction fallback was used because filing text parsing was unavailable or invalid.`, keyPoints: [ `${filing.filing_type} filing recorded for ${filing.ticker}.`, metricSnapshotLine('Revenue', metrics?.revenue), @@ -197,19 +368,101 @@ function deterministicExtractionFallback(filing: Filing): FilingExtraction { 'Cross-check leverage and liquidity metrics against position sizing rules.', 'Track language shifts around guidance or demand assumptions.' ], + segmentSpecificData: [ + 'Segment-level disclosures were not parsed in deterministic fallback mode.' + ], + geographicRevenueBreakdown: [ + 'Geographic revenue disclosures were not parsed in deterministic fallback mode.' + ], + companySpecificData: [ + 'Company-specific operating KPIs (for example same-store sales) were not parsed in deterministic fallback mode.' + ], + secApiCrossChecks: [ + `${metricSnapshotLine('Revenue', metrics?.revenue)} (SEC API baseline; text verification unavailable).`, + `${metricSnapshotLine('Net income', metrics?.netIncome)} (SEC API baseline; text verification unavailable).` + ], confidence: 0.2 }; } +function buildRuleBasedExtraction(filing: Filing, filingText: string): FilingExtraction { + const baseline = deterministicExtractionFallback(filing); + const segmentSpecificData = collectTextSignals(filingText, SEGMENT_PATTERNS); + const geographicRevenueBreakdown = collectTextSignals(filingText, GEOGRAPHIC_PATTERNS); + const companySpecificData = collectTextSignals(filingText, COMPANY_SPECIFIC_PATTERNS); + const secApiCrossChecks = buildSecApiCrossChecks(filing, filingText); + + const segmentLead = segmentSpecificData[0] ? `Segment detail: ${segmentSpecificData[0]}` : null; + const geographicLead = geographicRevenueBreakdown[0] ? `Geographic detail: ${geographicRevenueBreakdown[0]}` : null; + const companyLead = companySpecificData[0] ? `Company-specific KPI: ${companySpecificData[0]}` : null; + + return { + summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. SEC API metrics were retained as the baseline and filing text was scanned for segment and company-specific disclosures.`, + keyPoints: uniqueExtractionList([ + ...baseline.keyPoints, + segmentLead, + geographicLead, + companyLead + ]), + redFlags: uniqueExtractionList([ + ...baseline.redFlags, + secApiCrossChecks.find((line) => /not confidently located/i.test(line)) + ]), + followUpQuestions: uniqueExtractionList([ + ...baseline.followUpQuestions, + segmentSpecificData.length > 0 ? 'How do segment trends change the consolidated margin outlook?' : 'Does management provide segment-level KPIs in supplemental exhibits?' + ]), + portfolioSignals: uniqueExtractionList([ + ...baseline.portfolioSignals, + companySpecificData.length > 0 ? 'Incorporate company-specific KPI direction into near-term position sizing.' : 'Track future filings for explicit operating KPI disclosures.' + ]), + segmentSpecificData: segmentSpecificData.length > 0 + ? segmentSpecificData + : baseline.segmentSpecificData, + geographicRevenueBreakdown: geographicRevenueBreakdown.length > 0 + ? geographicRevenueBreakdown + : baseline.geographicRevenueBreakdown, + companySpecificData: companySpecificData.length > 0 + ? companySpecificData + : baseline.companySpecificData, + secApiCrossChecks: secApiCrossChecks.length > 0 + ? secApiCrossChecks + : baseline.secApiCrossChecks, + confidence: segmentSpecificData.length + geographicRevenueBreakdown.length + companySpecificData.length > 0 ? 0.4 : 0.3 + }; +} + +function preferExtractionList(primary: string[], fallback: string[]) { + return primary.length > 0 ? primary : fallback; +} + +function mergeExtractionWithFallback(primary: FilingExtraction, fallback: FilingExtraction): FilingExtraction { + return { + summary: primary.summary || fallback.summary, + keyPoints: preferExtractionList(primary.keyPoints, fallback.keyPoints), + redFlags: preferExtractionList(primary.redFlags, fallback.redFlags), + followUpQuestions: preferExtractionList(primary.followUpQuestions, fallback.followUpQuestions), + portfolioSignals: preferExtractionList(primary.portfolioSignals, fallback.portfolioSignals), + segmentSpecificData: preferExtractionList(primary.segmentSpecificData, fallback.segmentSpecificData), + geographicRevenueBreakdown: preferExtractionList(primary.geographicRevenueBreakdown, fallback.geographicRevenueBreakdown), + companySpecificData: preferExtractionList(primary.companySpecificData, fallback.companySpecificData), + secApiCrossChecks: preferExtractionList(primary.secApiCrossChecks, fallback.secApiCrossChecks), + confidence: Math.min(Math.max(primary.confidence, 0), 1) + }; +} + function extractionPrompt(filing: Filing, filingText: string) { return [ 'Extract structured signals from the SEC filing text.', `Company: ${filing.company_name} (${filing.ticker})`, `Form: ${filing.filing_type}`, `Filed: ${filing.filing_date}`, + `SEC API baseline metrics: ${JSON.stringify(filing.metrics ?? {})}`, + 'Use SEC API metrics as canonical numeric values and validate whether each appears consistent with filing text context.', + 'Prioritize company-specific and segment-specific disclosures not covered by SEC endpoint fields (for example same-store sales, geographic mix, segment margin).', 'Return ONLY valid JSON with exactly these keys and no extra keys:', - '{"summary":"string","keyPoints":["string"],"redFlags":["string"],"followUpQuestions":["string"],"portfolioSignals":["string"],"confidence":0}', - `Rules: keyPoints/redFlags/followUpQuestions/portfolioSignals arrays max ${EXTRACTION_MAX_ITEMS} items; each item <= ${EXTRACTION_ITEM_MAX_LENGTH} chars; summary <= ${EXTRACTION_SUMMARY_MAX_LENGTH} chars; confidence between 0 and 1.`, + '{"summary":"string","keyPoints":["string"],"redFlags":["string"],"followUpQuestions":["string"],"portfolioSignals":["string"],"segmentSpecificData":["string"],"geographicRevenueBreakdown":["string"],"companySpecificData":["string"],"secApiCrossChecks":["string"],"confidence":0}', + `Rules: every array max ${EXTRACTION_MAX_ITEMS} items; each item <= ${EXTRACTION_ITEM_MAX_LENGTH} chars; summary <= ${EXTRACTION_SUMMARY_MAX_LENGTH} chars; confidence between 0 and 1.`, 'Filing text follows:', filingText ].join('\n\n'); @@ -225,8 +478,9 @@ function reportPrompt( `Analyze this SEC filing from ${filing.company_name} (${filing.ticker}).`, `Form: ${filing.filing_type}`, `Filed: ${filing.filing_date}`, - `Metrics: ${JSON.stringify(filing.metrics ?? {})}`, + `SEC API baseline metrics: ${JSON.stringify(filing.metrics ?? {})}`, `Structured extraction context (${extractionMeta.source}): ${JSON.stringify(extraction)}`, + 'Use SEC API values as the baseline financials and explicitly reference segment/company-specific details from extraction.', 'Return concise sections: Thesis, Red Flags, Follow-up Questions, Portfolio Impact.' ].join('\n'); } @@ -252,12 +506,37 @@ async function processSyncFilings(task: Task) { const ticker = parseTicker(task.payload.ticker); const limit = parseLimit(task.payload.limit, 20, 1, 50); const filings = await fetchRecentFilings(ticker, limit); - const metricsByCik = new Map(); + const metricsByAccession = new Map(); + const filingsByCik = new Map(); for (const filing of filings) { - if (!metricsByCik.has(filing.cik)) { - const metrics = await fetchFilingMetrics(filing.cik, filing.ticker); - metricsByCik.set(filing.cik, metrics); + const group = filingsByCik.get(filing.cik); + if (group) { + group.push(filing); + continue; + } + + filingsByCik.set(filing.cik, [filing]); + } + + for (const [cik, filingsForCik] of filingsByCik) { + const filingsForFinancialMetrics = filingsForCik.filter((filing) => isFinancialMetricsForm(filing.filingType)); + if (filingsForFinancialMetrics.length === 0) { + continue; + } + + const metricsMap = await fetchFilingMetricsForFilings( + cik, + filingsForCik[0]?.ticker ?? ticker, + filingsForFinancialMetrics.map((filing) => ({ + accessionNumber: filing.accessionNumber, + filingDate: filing.filingDate, + filingType: filing.filingType + })) + ); + + for (const [accessionNumber, metrics] of metricsMap.entries()) { + metricsByAccession.set(accessionNumber, metrics); } } @@ -272,7 +551,7 @@ async function processSyncFilings(task: Task) { filing_url: filing.filingUrl, submission_url: filing.submissionUrl, primary_document: filing.primaryDocument, - metrics: metricsByCik.get(filing.cik) ?? null, + metrics: metricsByAccession.get(filing.accessionNumber) ?? null, links: filingLinks(filing) })) ); @@ -341,6 +620,15 @@ async function processAnalyzeFiling(task: Task) { }); if (filingDocument?.text) { + const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text); + extraction = ruleBasedExtraction; + extractionMeta = { + provider: 'deterministic-fallback', + model: 'filing-rule-based', + source: filingDocument.source, + generatedAt: new Date().toISOString() + }; + const extractionResult = await runAiAnalysis( extractionPrompt(filing, filingDocument.text), 'Return strict JSON only.', @@ -349,7 +637,7 @@ async function processAnalyzeFiling(task: Task) { const parsed = parseExtractionPayload(extractionResult.text); if (parsed) { - extraction = parsed; + extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction); extractionMeta = { provider: extractionResult.provider === 'local-fallback' ? 'deterministic-fallback' : 'ollama', model: extractionResult.model, @@ -360,6 +648,12 @@ async function processAnalyzeFiling(task: Task) { } } catch { extraction = defaultExtraction; + extractionMeta = { + provider: 'deterministic-fallback', + model: 'metadata-fallback', + source: 'metadata_fallback', + generatedAt: new Date().toISOString() + }; } const analysis = await runAiAnalysis( @@ -435,7 +729,8 @@ async function processPortfolioInsights(task: Task) { export const __taskProcessorInternals = { parseExtractionPayload, - deterministicExtractionFallback + deterministicExtractionFallback, + isFinancialMetricsForm }; export async function runTaskProcessor(task: Task) { diff --git a/lib/types.ts b/lib/types.ts index 5c59828..0604170 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -43,6 +43,10 @@ export type FilingExtraction = { redFlags: string[]; followUpQuestions: string[]; portfolioSignals: string[]; + segmentSpecificData: string[]; + geographicRevenueBreakdown: string[]; + companySpecificData: string[]; + secApiCrossChecks: string[]; confidence: number; };