Prioritize SEC financials for 10-K/10-Q and keep other filings qualitative

This commit is contained in:
2026-03-01 00:37:47 -05:00
parent 953d7c0099
commit 2a5b548d89
11 changed files with 773 additions and 149 deletions

View File

@@ -30,6 +30,10 @@ function formatShortDate(value: string) {
return format(new Date(value), 'MMM yyyy');
}
function hasFinancialSnapshot(filingType: CompanyAnalysis['filings'][number]['filing_type']) {
return filingType === '10-K' || filingType === '10-Q';
}
export default function AnalysisPage() {
return (
<Suspense fallback={<div className="flex min-h-screen items-center justify-center text-sm text-[color:var(--terminal-muted)]">Loading analysis desk...</div>}>
@@ -110,7 +114,7 @@ function AnalysisPageContent() {
return (
<AppShell
title="Company Analysis"
subtitle="Research a single ticker across pricing, reported financials, filings, and generated AI reports."
subtitle="Research a single ticker across pricing, 10-K/10-Q financials, qualitative filings, and generated AI reports."
actions={(
<Button variant="secondary" onClick={() => void loadAnalysis(ticker)}>
<RefreshCcw className="size-4" />
@@ -246,10 +250,10 @@ function AnalysisPageContent() {
{analysis.filings.map((filing) => (
<tr key={filing.accession_number}>
<td>{format(new Date(filing.filing_date), 'MMM dd, yyyy')}</td>
<td>{filing.filing_type}</td>
<td>{filing.metrics?.revenue ? formatCompactCurrency(filing.metrics.revenue) : 'n/a'}</td>
<td>{filing.metrics?.netIncome ? formatCompactCurrency(filing.metrics.netIncome) : 'n/a'}</td>
<td>{filing.metrics?.totalAssets ? formatCompactCurrency(filing.metrics.totalAssets) : 'n/a'}</td>
<td>{filing.filing_type}{hasFinancialSnapshot(filing.filing_type) ? '' : ' (Qualitative)'}</td>
<td>{hasFinancialSnapshot(filing.filing_type) ? (filing.metrics?.revenue ? formatCompactCurrency(filing.metrics.revenue) : 'n/a') : 'qualitative only'}</td>
<td>{hasFinancialSnapshot(filing.filing_type) ? (filing.metrics?.netIncome ? formatCompactCurrency(filing.metrics.netIncome) : 'n/a') : 'qualitative only'}</td>
<td>{hasFinancialSnapshot(filing.filing_type) ? (filing.metrics?.totalAssets ? formatCompactCurrency(filing.metrics.totalAssets) : 'n/a') : 'qualitative only'}</td>
<td>
{filing.filing_url ? (
<a href={filing.filing_url} target="_blank" rel="noreferrer" className="text-xs text-[color:var(--accent)] hover:text-[color:var(--accent-strong)]">

View File

@@ -35,6 +35,10 @@ function formatFilingDate(value: string) {
return format(date, 'MMM dd, yyyy');
}
function hasFinancialSnapshot(filing: Filing) {
return filing.filing_type === '10-K' || filing.filing_type === '10-Q';
}
function resolveOriginalFilingUrl(filing: Filing) {
if (filing.filing_url) {
return filing.filing_url;
@@ -171,7 +175,7 @@ function FilingsPageContent() {
return (
<AppShell
title="Filings Stream"
subtitle="Sync SEC submissions and generate AI red-flag analysis asynchronously."
subtitle="Sync SEC submissions, keep 10-K/10-Q financial snapshots, and analyze qualitative signals from other forms."
actions={(
<Button variant="secondary" className="w-full sm:w-auto" onClick={() => void loadFilings(searchTicker || undefined)}>
<TimerReset className="size-4" />
@@ -254,6 +258,7 @@ function FilingsPageContent() {
<div className="space-y-3">
<div className="space-y-3 lg:hidden">
{filings.map((filing) => {
const financialForm = hasFinancialSnapshot(filing);
const revenue = filing.metrics?.revenue;
const hasAnalysis = Boolean(filing.analysis?.text || filing.analysis?.legacyInsights);
const originalFilingUrl = resolveOriginalFilingUrl(filing);
@@ -275,8 +280,10 @@ function FilingsPageContent() {
<dl className="mt-3 grid grid-cols-1 gap-2 text-xs sm:grid-cols-2">
<div className="rounded-md border border-[color:var(--line-weak)] px-2 py-1.5">
<dt className="text-[color:var(--terminal-muted)]">Revenue Snapshot</dt>
<dd className="mt-1 text-[color:var(--terminal-bright)]">{revenue ? formatCompactCurrency(revenue) : 'n/a'}</dd>
<dt className="text-[color:var(--terminal-muted)]">Financial Snapshot</dt>
<dd className="mt-1 text-[color:var(--terminal-bright)]">
{financialForm ? (revenue ? formatCompactCurrency(revenue) : 'n/a') : 'Qualitative filing'}
</dd>
</div>
<div className="rounded-md border border-[color:var(--line-weak)] px-2 py-1.5">
<dt className="text-[color:var(--terminal-muted)]">Accession</dt>
@@ -331,6 +338,7 @@ function FilingsPageContent() {
</thead>
<tbody>
{filings.map((filing) => {
const financialForm = hasFinancialSnapshot(filing);
const revenue = filing.metrics?.revenue;
const hasAnalysis = Boolean(filing.analysis?.text || filing.analysis?.legacyInsights);
const originalFilingUrl = resolveOriginalFilingUrl(filing);
@@ -343,7 +351,7 @@ function FilingsPageContent() {
</td>
<td>{filing.filing_type}</td>
<td>{formatFilingDate(filing.filing_date)}</td>
<td>{revenue ? formatCompactCurrency(revenue) : 'n/a'}</td>
<td>{financialForm ? (revenue ? formatCompactCurrency(revenue) : 'n/a') : 'Qualitative filing'}</td>
<td className="max-w-[18rem]">{filing.company_name}</td>
<td>{hasAnalysis ? 'Ready' : 'Not generated'}</td>
<td>

View File

@@ -220,7 +220,7 @@ function FinancialsPageContent() {
return (
<AppShell
title="Financials"
subtitle="Explore filing-derived fundamentals, profitability, and balance sheet dynamics by ticker."
subtitle="Explore 10-K and 10-Q fundamentals, profitability, and balance sheet dynamics by ticker."
actions={(
<Button variant="secondary" onClick={() => void loadFinancials(ticker)}>
<RefreshCcw className="size-4" />
@@ -228,7 +228,7 @@ function FinancialsPageContent() {
</Button>
)}
>
<Panel title="Company Selector" subtitle="Load the latest financial statement trend available in your filings index.">
<Panel title="Company Selector" subtitle="Load the latest 10-K / 10-Q financial statement trend available in your filings index.">
<form
className="flex flex-wrap items-center gap-3"
onSubmit={(event) => {

View File

@@ -1,5 +1,5 @@
import { Elysia, t } from 'elysia';
import type { TaskStatus } from '@/lib/types';
import type { Filing, TaskStatus } from '@/lib/types';
import { auth } from '@/lib/auth';
import { requireAuthenticatedSession } from '@/lib/server/auth-session';
import { asErrorMessage, jsonError } from '@/lib/server/http';
@@ -27,6 +27,7 @@ import {
} from '@/lib/server/tasks';
const ALLOWED_STATUSES: TaskStatus[] = ['queued', 'running', 'completed', 'failed'];
const FINANCIAL_FORMS: ReadonlySet<Filing['filing_type']> = new Set(['10-K', '10-Q']);
function asRecord(value: unknown): Record<string, unknown> {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
@@ -41,6 +42,17 @@ function asPositiveNumber(value: unknown) {
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
function withFinancialMetricsPolicy(filing: Filing): Filing {
if (FINANCIAL_FORMS.has(filing.filing_type)) {
return filing;
}
return {
...filing,
metrics: null
};
}
const authHandler = ({ request }: { request: Request }) => auth.handler(request);
export const app = new Elysia({ prefix: '/api' })
@@ -333,7 +345,9 @@ export const app = new Elysia({ prefix: '/api' })
getQuote(ticker),
getPriceHistory(ticker)
]);
const redactedFilings = filings.map(redactInternalFilingAnalysisFields);
const redactedFilings = filings
.map(redactInternalFilingAnalysisFields)
.map(withFinancialMetricsPolicy);
const latestFiling = redactedFilings[0] ?? null;
const holding = holdings.find((entry) => entry.ticker === ticker) ?? null;
@@ -344,7 +358,7 @@ export const app = new Elysia({ prefix: '/api' })
?? ticker;
const financials = redactedFilings
.filter((entry) => entry.metrics)
.filter((entry) => entry.metrics && FINANCIAL_FORMS.has(entry.filing_type))
.map((entry) => ({
filingDate: entry.filing_date,
filingType: entry.filing_type,
@@ -448,7 +462,7 @@ export const app = new Elysia({ prefix: '/api' })
limit: Number.isFinite(limit) ? limit : 50
});
return Response.json({ filings: filings.map(redactInternalFilingAnalysisFields) });
return Response.json({ filings: filings.map(redactInternalFilingAnalysisFields).map(withFinancialMetricsPolicy) });
}, {
query: t.Object({
ticker: t.Optional(t.String()),

View File

@@ -25,6 +25,10 @@ function filingWithExtraction(): Filing {
redFlags: ['b'],
followUpQuestions: ['c'],
portfolioSignals: ['d'],
segmentSpecificData: ['e'],
geographicRevenueBreakdown: ['f'],
companySpecificData: ['g'],
secApiCrossChecks: ['h'],
confidence: 0.4
},
extractionMeta: {

View File

@@ -26,6 +26,10 @@ type FilingAnalysis = {
redFlags: string[];
followUpQuestions: string[];
portfolioSignals: string[];
segmentSpecificData: string[];
geographicRevenueBreakdown: string[];
companySpecificData: string[];
secApiCrossChecks: string[];
confidence: number;
};
extractionMeta?: {

View File

@@ -1,5 +1,6 @@
import { describe, expect, it, mock } from 'bun:test';
import {
fetchFilingMetricsForFilings,
fetchPrimaryFilingText,
normalizeSecDocumentText,
resolvePrimaryFilingUrl,
@@ -81,4 +82,111 @@ describe('sec filing text helpers', () => {
expect(result?.truncated).toBe(true);
expect(result?.text.length).toBeLessThanOrEqual(1_000);
});
it('maps SEC companyfacts metrics to each filing by accession', async () => {
const fetchMock = mock(async (_input: RequestInfo | URL, _init?: RequestInit) => {
return new Response(JSON.stringify({
facts: {
'us-gaap': {
Revenues: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 101_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 111_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
NetIncomeLoss: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 21_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 25_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
Assets: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 405_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 410_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
CashAndCashEquivalentsAtCarryingValue: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 65_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 70_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
LongTermDebt: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 95_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 98_000, filed: '2026-02-01', form: '10-Q' }
]
}
}
}
}
}), { status: 200 });
}) as unknown as typeof fetch;
const originalFetch = globalThis.fetch;
globalThis.fetch = fetchMock;
try {
const map = await fetchFilingMetricsForFilings('0000320193', 'AAPL', [
{
accessionNumber: '0000320193-25-000010',
filingDate: '2025-11-01',
filingType: '10-Q'
},
{
accessionNumber: '0000320193-25-000020',
filingDate: '2026-02-01',
filingType: '10-Q'
}
]);
expect(fetchMock).toHaveBeenCalledTimes(1);
expect(map.get('0000320193-25-000010')?.revenue).toBe(101_000);
expect(map.get('0000320193-25-000010')?.netIncome).toBe(21_000);
expect(map.get('0000320193-25-000020')?.revenue).toBe(111_000);
expect(map.get('0000320193-25-000020')?.cash).toBe(70_000);
} finally {
globalThis.fetch = originalFetch;
}
});
it('returns null-valued metrics when companyfacts lookup fails', async () => {
const fetchMock = mock(async (_input: RequestInfo | URL, _init?: RequestInit) => {
return new Response('error', { status: 500 });
}) as unknown as typeof fetch;
const originalFetch = globalThis.fetch;
globalThis.fetch = fetchMock;
try {
const map = await fetchFilingMetricsForFilings('0000320193', 'AAPL', [
{
accessionNumber: '0000320193-25-000010',
filingDate: '2025-11-01',
filingType: '10-Q'
}
]);
expect(fetchMock).toHaveBeenCalledTimes(1);
expect(map.get('0000320193-25-000010')).toEqual({
revenue: null,
netIncome: null,
totalAssets: null,
cash: null,
debt: null
});
} finally {
globalThis.fetch = originalFetch;
}
});
});

View File

@@ -1,6 +1,7 @@
import type { Filing } from '@/lib/types';
type FilingType = Filing['filing_type'];
type FilingMetrics = NonNullable<Filing['metrics']>;
type TickerDirectoryRecord = {
cik_str: number;
@@ -23,10 +24,21 @@ type RecentFilingsPayload = {
type CompanyFactsPayload = {
facts?: {
'us-gaap'?: Record<string, { units?: Record<string, Array<{ val?: number; end?: string; filed?: string }>> }>;
'us-gaap'?: Record<string, { units?: Record<string, CompanyFactPoint[]> }>;
};
};
type CompanyFactPoint = {
val?: number;
end?: string;
filed?: string;
accn?: string;
form?: string;
fy?: number;
fp?: string;
frame?: string;
};
type SecFiling = {
ticker: string;
cik: string;
@@ -58,9 +70,35 @@ export type FilingDocumentText = {
truncated: boolean;
};
type FilingMetricsLookupInput = {
accessionNumber: string;
filingDate: string;
filingType: FilingType;
};
const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K'];
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12;
const FILING_TEXT_MAX_CHARS = 24_000;
const METRIC_TAGS = {
revenue: [
'Revenues',
'SalesRevenueNet',
'RevenueFromContractWithCustomerExcludingAssessedTax',
'TotalRevenuesAndOtherIncome'
],
netIncome: ['NetIncomeLoss', 'ProfitLoss'],
totalAssets: ['Assets'],
cash: [
'CashAndCashEquivalentsAtCarryingValue',
'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
],
debt: [
'LongTermDebtAndCapitalLeaseObligations',
'LongTermDebtNoncurrent',
'LongTermDebt',
'DebtAndFinanceLeaseLiabilities'
]
} as const;
let tickerCache = new Map<string, TickerDirectoryRecord>();
let tickerCacheLoadedAt = 0;
@@ -140,6 +178,30 @@ function compactAccessionNumber(value: string) {
return value.replace(/-/g, '');
}
function normalizeAccessionKey(value: string | undefined | null) {
return (value ?? '').replace(/\D/g, '');
}
function normalizeForm(value: string | undefined | null) {
const normalized = (value ?? '').trim().toUpperCase();
if (!normalized) {
return '';
}
return normalized.endsWith('/A')
? normalized.slice(0, -2)
: normalized;
}
function parseDate(value: string | undefined | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function normalizeCikForPath(value: string) {
const digits = value.replace(/\D/g, '');
if (!digits) {
@@ -214,42 +276,6 @@ export async function fetchPrimaryFilingText(
};
}
function pseudoMetric(seed: string, min: number, max: number) {
let hash = 0;
for (const char of seed) {
hash = (hash * 33 + char.charCodeAt(0)) % 100000;
}
const fraction = (hash % 10000) / 10000;
return min + (max - min) * fraction;
}
function fallbackFilings(ticker: string, limit: number): SecFiling[] {
const normalized = ticker.trim().toUpperCase();
const companyName = `${normalized} Holdings Inc.`;
const filings: SecFiling[] = [];
for (let i = 0; i < limit; i += 1) {
const filingType = SUPPORTED_FORMS[i % SUPPORTED_FORMS.length];
const date = new Date(Date.now() - i * 1000 * 60 * 60 * 24 * 35).toISOString().slice(0, 10);
const accessionNumber = `${Date.now()}-${i}`;
filings.push({
ticker: normalized,
cik: String(100000 + i),
companyName,
filingType,
filingDate: date,
accessionNumber,
filingUrl: null,
submissionUrl: null,
primaryDocument: null
});
}
return filings;
}
async function fetchJson<T>(url: string): Promise<T> {
const response = await fetch(url, {
headers: {
@@ -301,40 +327,159 @@ async function resolveTicker(ticker: string) {
}
function pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null {
const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units;
return pickFactForFiling(payload, tag, {
accessionNumber: '',
filingDate: '',
filingType: '10-Q'
});
}
function collectFactSeries(payload: CompanyFactsPayload, tag: string): CompanyFactPoint[] {
const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units;
if (!unitCollections) {
return null;
return [];
}
const preferredUnits = ['USD', 'USD/shares'];
const usdSeries: CompanyFactPoint[] = [];
const fallbackSeries: CompanyFactPoint[] = [];
for (const unit of preferredUnits) {
const series = unitCollections[unit];
if (!series?.length) {
for (const [unit, series] of Object.entries(unitCollections)) {
if (!Array.isArray(series) || series.length === 0) {
continue;
}
const best = [...series]
.filter((item) => typeof item.val === 'number')
.sort((a, b) => {
const aDate = Date.parse(a.filed ?? a.end ?? '1970-01-01');
const bDate = Date.parse(b.filed ?? b.end ?? '1970-01-01');
return bDate - aDate;
})[0];
if (unit === 'USD' || /^USD(?!\/shares)/i.test(unit)) {
usdSeries.push(...series);
continue;
}
if (best?.val !== undefined) {
return best.val;
fallbackSeries.push(...series);
}
const points = usdSeries.length > 0 ? usdSeries : fallbackSeries;
return points.filter((point) => typeof point.val === 'number' && Number.isFinite(point.val));
}
function pickMostRecentFact(points: CompanyFactPoint[]) {
return [...points].sort((a, b) => {
const aDate = parseDate(a.filed ?? a.end);
const bDate = parseDate(b.filed ?? b.end);
if (Number.isFinite(aDate) && Number.isFinite(bDate)) {
return bDate - aDate;
}
if (Number.isFinite(bDate)) {
return 1;
}
if (Number.isFinite(aDate)) {
return -1;
}
return 0;
})[0] ?? null;
}
function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) {
if (points.length === 0) {
return null;
}
if (!Number.isFinite(targetDate)) {
return pickMostRecentFact(points);
}
const dated = points
.map((point) => ({ point, date: parseDate(point.filed ?? point.end) }))
.filter((entry) => Number.isFinite(entry.date));
if (dated.length === 0) {
return pickMostRecentFact(points);
}
const beforeTarget = dated.filter((entry) => entry.date <= targetDate);
if (beforeTarget.length > 0) {
return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null;
}
return dated.sort((a, b) => {
const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate);
if (distance !== 0) {
return distance;
}
return b.date - a.date;
})[0]?.point ?? null;
}
function pickFactForFiling(
payload: CompanyFactsPayload,
tag: string,
filing: FilingMetricsLookupInput
): number | null {
const points = collectFactSeries(payload, tag);
if (points.length === 0) {
return null;
}
const accessionKey = normalizeAccessionKey(filing.accessionNumber);
if (accessionKey) {
const byAccession = points.filter((point) => normalizeAccessionKey(point.accn) === accessionKey);
if (byAccession.length > 0) {
const matched = pickMostRecentFact(byAccession);
if (typeof matched?.val === 'number' && Number.isFinite(matched.val)) {
return matched.val;
}
}
}
const filingForm = normalizeForm(filing.filingType);
const byForm = filingForm
? points.filter((point) => normalizeForm(point.form) === filingForm)
: points;
const targetDate = parseDate(filing.filingDate);
const bestByForm = pickClosestByDate(byForm, targetDate);
if (typeof bestByForm?.val === 'number' && Number.isFinite(bestByForm.val)) {
return bestByForm.val;
}
const bestAny = pickClosestByDate(points, targetDate);
return typeof bestAny?.val === 'number' && Number.isFinite(bestAny.val)
? bestAny.val
: null;
}
function pickFactByTags(
payload: CompanyFactsPayload,
tags: readonly string[],
filing: FilingMetricsLookupInput
) {
for (const tag of tags) {
const value = pickFactForFiling(payload, tag, filing);
if (value !== null) {
return value;
}
}
return null;
}
function emptyMetrics(): FilingMetrics {
return {
revenue: null,
netIncome: null,
totalAssets: null,
cash: null,
debt: null
};
}
export async function fetchRecentFilings(ticker: string, limit = 20): Promise<SecFiling[]> {
const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50);
try {
const company = await resolveTicker(ticker);
const cikPadded = company.cik.padStart(10, '0');
const payload = await fetchJson<RecentFilingsPayload>(`https://data.sec.gov/submissions/CIK${cikPadded}.json`);
@@ -342,7 +487,7 @@ export async function fetchRecentFilings(ticker: string, limit = 20): Promise<Se
const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`;
if (!recent) {
return fallbackFilings(company.ticker, safeLimit);
return [];
}
const forms = recent.form ?? [];
@@ -352,9 +497,8 @@ export async function fetchRecentFilings(ticker: string, limit = 20): Promise<Se
const filings: SecFiling[] = [];
for (let i = 0; i < forms.length; i += 1) {
const filingType = forms[i] as FilingType;
if (!SUPPORTED_FORMS.includes(filingType)) {
const normalizedForm = normalizeForm(forms[i]) as FilingType;
if (!SUPPORTED_FORMS.includes(normalizedForm)) {
continue;
}
@@ -373,7 +517,7 @@ export async function fetchRecentFilings(ticker: string, limit = 20): Promise<Se
ticker: company.ticker,
cik: company.cik,
companyName: payload.name ?? company.companyName,
filingType,
filingType: normalizedForm,
filingDate: filingDates[i] ?? todayIso(),
accessionNumber,
filingUrl,
@@ -386,14 +530,10 @@ export async function fetchRecentFilings(ticker: string, limit = 20): Promise<Se
}
}
return filings.length > 0 ? filings : fallbackFilings(company.ticker, safeLimit);
} catch {
return fallbackFilings(ticker, safeLimit);
}
return filings;
}
export async function fetchFilingMetrics(cik: string, ticker: string) {
try {
export async function fetchLatestFilingMetrics(cik: string) {
const normalized = cik.padStart(10, '0');
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
@@ -403,14 +543,39 @@ export async function fetchFilingMetrics(cik: string, ticker: string) {
totalAssets: pickLatestFact(payload, 'Assets'),
cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'),
debt: pickLatestFact(payload, 'LongTermDebt')
};
} satisfies FilingMetrics;
}
export async function fetchFilingMetricsForFilings(
cik: string,
_ticker: string,
filings: FilingMetricsLookupInput[]
) {
const metricsByAccession = new Map<string, FilingMetrics>();
if (filings.length === 0) {
return metricsByAccession;
}
try {
const normalized = cik.padStart(10, '0');
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
for (const filing of filings) {
metricsByAccession.set(filing.accessionNumber, {
revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing),
netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing),
totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing),
cash: pickFactByTags(payload, METRIC_TAGS.cash, filing),
debt: pickFactByTags(payload, METRIC_TAGS.debt, filing)
});
}
return metricsByAccession;
} catch {
return {
revenue: Math.round(pseudoMetric(`${ticker}-revenue`, 2_000_000_000, 350_000_000_000)),
netIncome: Math.round(pseudoMetric(`${ticker}-net`, 150_000_000, 40_000_000_000)),
totalAssets: Math.round(pseudoMetric(`${ticker}-assets`, 4_000_000_000, 500_000_000_000)),
cash: Math.round(pseudoMetric(`${ticker}-cash`, 200_000_000, 180_000_000_000)),
debt: Math.round(pseudoMetric(`${ticker}-debt`, 300_000_000, 220_000_000_000))
};
for (const filing of filings) {
metricsByAccession.set(filing.accessionNumber, emptyMetrics());
}
return metricsByAccession;
}
}

View File

@@ -35,6 +35,10 @@ describe('task processor extraction helpers', () => {
redFlags: ['Debt service burden is rising'],
followUpQuestions: ['Is margin guidance sustainable?'],
portfolioSignals: ['Monitor leverage trend'],
segmentSpecificData: ['Services segment outgrew hardware segment.'],
geographicRevenueBreakdown: ['EMEA revenue grew faster than Americas.'],
companySpecificData: ['Same-store sales increased 4.2%.'],
secApiCrossChecks: ['Revenue from SEC API aligns with filing narrative.'],
confidence: 0.72
});
@@ -52,6 +56,10 @@ describe('task processor extraction helpers', () => {
redFlags: [],
followUpQuestions: [],
portfolioSignals: [],
segmentSpecificData: [],
geographicRevenueBreakdown: [],
companySpecificData: [],
secApiCrossChecks: [],
confidence: 0.2,
extra: 'not-allowed'
});
@@ -66,6 +74,16 @@ describe('task processor extraction helpers', () => {
expect(fallback.summary).toContain('Deterministic extraction fallback');
expect(fallback.keyPoints.length).toBeGreaterThan(0);
expect(fallback.redFlags.length).toBeGreaterThan(0);
expect(fallback.segmentSpecificData.length).toBeGreaterThan(0);
expect(fallback.geographicRevenueBreakdown.length).toBeGreaterThan(0);
expect(fallback.companySpecificData.length).toBeGreaterThan(0);
expect(fallback.secApiCrossChecks.length).toBeGreaterThan(0);
expect(fallback.confidence).toBe(0.2);
});
it('treats only 10-K and 10-Q as financial metric filings', () => {
expect(__taskProcessorInternals.isFinancialMetricsForm('10-K')).toBe(true);
expect(__taskProcessorInternals.isFinancialMetricsForm('10-Q')).toBe(true);
expect(__taskProcessorInternals.isFinancialMetricsForm('8-K')).toBe(false);
});
});

View File

@@ -20,7 +20,7 @@ import {
} from '@/lib/server/repos/holdings';
import { createPortfolioInsight } from '@/lib/server/repos/insights';
import {
fetchFilingMetrics,
fetchFilingMetricsForFilings,
fetchPrimaryFilingText,
fetchRecentFilings
} from '@/lib/server/sec';
@@ -31,11 +31,88 @@ const EXTRACTION_REQUIRED_KEYS = [
'redFlags',
'followUpQuestions',
'portfolioSignals',
'segmentSpecificData',
'geographicRevenueBreakdown',
'companySpecificData',
'secApiCrossChecks',
'confidence'
] as const;
const EXTRACTION_MAX_ITEMS = 6;
const EXTRACTION_ITEM_MAX_LENGTH = 280;
const EXTRACTION_SUMMARY_MAX_LENGTH = 900;
const SEGMENT_PATTERNS = [
/\boperating segment\b/i,
/\bsegment revenue\b/i,
/\bsegment margin\b/i,
/\bsegment profit\b/i,
/\bbusiness segment\b/i,
/\breportable segment\b/i
];
const GEOGRAPHIC_PATTERNS = [
/\bgeographic\b/i,
/\bamericas\b/i,
/\bemea\b/i,
/\bapac\b/i,
/\basia pacific\b/i,
/\bnorth america\b/i,
/\beurope\b/i,
/\bchina\b/i,
/\binternational\b/i
];
const COMPANY_SPECIFIC_PATTERNS = [
/\bsame[- ]store\b/i,
/\bcomparable[- ]store\b/i,
/\bcomp sales\b/i,
/\borganic sales\b/i,
/\bbookings\b/i,
/\bbacklog\b/i,
/\barpu\b/i,
/\bmau\b/i,
/\bdau\b/i,
/\bsubscriber\b/i,
/\boccupancy\b/i,
/\brevpar\b/i,
/\bretention\b/i,
/\bchurn\b/i
];
type FilingMetricKey = keyof NonNullable<Filing['metrics']>;
const METRIC_CHECK_PATTERNS: Array<{
key: FilingMetricKey;
label: string;
patterns: RegExp[];
}> = [
{
key: 'revenue',
label: 'Revenue',
patterns: [/\brevenue\b/i, /\bsales\b/i]
},
{
key: 'netIncome',
label: 'Net income',
patterns: [/\bnet income\b/i, /\bprofit\b/i]
},
{
key: 'totalAssets',
label: 'Total assets',
patterns: [/\btotal assets\b/i, /\bassets\b/i]
},
{
key: 'cash',
label: 'Cash',
patterns: [/\bcash\b/i, /\bcash equivalents\b/i]
},
{
key: 'debt',
label: 'Debt',
patterns: [/\bdebt\b/i, /\bborrowings\b/i, /\bliabilit(?:y|ies)\b/i]
}
];
function isFinancialMetricsForm(form: Filing['filing_type']) {
return form === '10-K' || form === '10-Q';
}
function toTaskResult(value: unknown): Record<string, unknown> {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
@@ -99,6 +176,55 @@ function sanitizeExtractionList(value: unknown) {
return cleaned;
}
function uniqueExtractionList(items: Array<string | null | undefined>) {
const seen = new Set<string>();
const unique: string[] = [];
for (const item of items) {
const normalized = sanitizeExtractionText(item, EXTRACTION_ITEM_MAX_LENGTH);
if (!normalized) {
continue;
}
const signature = normalized.toLowerCase();
if (seen.has(signature)) {
continue;
}
seen.add(signature);
unique.push(normalized);
if (unique.length >= EXTRACTION_MAX_ITEMS) {
break;
}
}
return unique;
}
function collectTextSignals(filingText: string, patterns: RegExp[]) {
const lines = filingText
.replace(/\r/g, '\n')
.split(/\n+/)
.map((line) => line.replace(/\s+/g, ' ').trim())
.filter((line) => line.length >= 24);
const matches: string[] = [];
for (const line of lines) {
if (!patterns.some((pattern) => pattern.test(line))) {
continue;
}
matches.push(line);
if (matches.length >= EXTRACTION_MAX_ITEMS * 2) {
break;
}
}
return uniqueExtractionList(matches);
}
function parseExtractionPayload(raw: string): FilingExtraction | null {
const fencedJson = raw.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
const candidate = fencedJson ?? (() => {
@@ -145,11 +271,26 @@ function parseExtractionPayload(raw: string): FilingExtraction | null {
const redFlags = sanitizeExtractionList(payload.redFlags);
const followUpQuestions = sanitizeExtractionList(payload.followUpQuestions);
const portfolioSignals = sanitizeExtractionList(payload.portfolioSignals);
const segmentSpecificData = sanitizeExtractionList(payload.segmentSpecificData);
const geographicRevenueBreakdown = sanitizeExtractionList(payload.geographicRevenueBreakdown);
const companySpecificData = sanitizeExtractionList(payload.companySpecificData);
const secApiCrossChecks = sanitizeExtractionList(payload.secApiCrossChecks);
const confidenceRaw = typeof payload.confidence === 'number'
? payload.confidence
: Number(payload.confidence);
if (!summary || !keyPoints || !redFlags || !followUpQuestions || !portfolioSignals || !Number.isFinite(confidenceRaw)) {
if (
!summary
|| !keyPoints
|| !redFlags
|| !followUpQuestions
|| !portfolioSignals
|| !segmentSpecificData
|| !geographicRevenueBreakdown
|| !companySpecificData
|| !secApiCrossChecks
|| !Number.isFinite(confidenceRaw)
) {
return null;
}
@@ -159,6 +300,10 @@ function parseExtractionPayload(raw: string): FilingExtraction | null {
redFlags,
followUpQuestions,
portfolioSignals,
segmentSpecificData,
geographicRevenueBreakdown,
companySpecificData,
secApiCrossChecks,
confidence: Math.min(Math.max(confidenceRaw, 0), 1)
};
}
@@ -171,11 +316,37 @@ function metricSnapshotLine(label: string, value: number | null | undefined) {
return `${label}: ${Math.round(value).toLocaleString('en-US')}`;
}
function buildSecApiCrossChecks(filing: Filing, filingText: string) {
const normalizedText = filingText.toLowerCase();
const checks: string[] = [];
for (const descriptor of METRIC_CHECK_PATTERNS) {
const value = filing.metrics?.[descriptor.key];
if (value === null || value === undefined || !Number.isFinite(value)) {
checks.push(`${descriptor.label}: SEC API metric unavailable for this filing.`);
continue;
}
const hasMention = descriptor.patterns.some((pattern) => pattern.test(normalizedText));
if (hasMention) {
checks.push(
`${descriptor.label}: SEC API value ${Math.round(value).toLocaleString('en-US')} appears referenced in filing narrative.`
);
} else {
checks.push(
`${descriptor.label}: SEC API value ${Math.round(value).toLocaleString('en-US')} was not confidently located in sampled filing text.`
);
}
}
return uniqueExtractionList(checks);
}
function deterministicExtractionFallback(filing: Filing): FilingExtraction {
const metrics = filing.metrics;
return {
summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. Deterministic extraction fallback used due unavailable or invalid local parsing output.`,
summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. Deterministic extraction fallback was used because filing text parsing was unavailable or invalid.`,
keyPoints: [
`${filing.filing_type} filing recorded for ${filing.ticker}.`,
metricSnapshotLine('Revenue', metrics?.revenue),
@@ -197,19 +368,101 @@ function deterministicExtractionFallback(filing: Filing): FilingExtraction {
'Cross-check leverage and liquidity metrics against position sizing rules.',
'Track language shifts around guidance or demand assumptions.'
],
segmentSpecificData: [
'Segment-level disclosures were not parsed in deterministic fallback mode.'
],
geographicRevenueBreakdown: [
'Geographic revenue disclosures were not parsed in deterministic fallback mode.'
],
companySpecificData: [
'Company-specific operating KPIs (for example same-store sales) were not parsed in deterministic fallback mode.'
],
secApiCrossChecks: [
`${metricSnapshotLine('Revenue', metrics?.revenue)} (SEC API baseline; text verification unavailable).`,
`${metricSnapshotLine('Net income', metrics?.netIncome)} (SEC API baseline; text verification unavailable).`
],
confidence: 0.2
};
}
function buildRuleBasedExtraction(filing: Filing, filingText: string): FilingExtraction {
const baseline = deterministicExtractionFallback(filing);
const segmentSpecificData = collectTextSignals(filingText, SEGMENT_PATTERNS);
const geographicRevenueBreakdown = collectTextSignals(filingText, GEOGRAPHIC_PATTERNS);
const companySpecificData = collectTextSignals(filingText, COMPANY_SPECIFIC_PATTERNS);
const secApiCrossChecks = buildSecApiCrossChecks(filing, filingText);
const segmentLead = segmentSpecificData[0] ? `Segment detail: ${segmentSpecificData[0]}` : null;
const geographicLead = geographicRevenueBreakdown[0] ? `Geographic detail: ${geographicRevenueBreakdown[0]}` : null;
const companyLead = companySpecificData[0] ? `Company-specific KPI: ${companySpecificData[0]}` : null;
return {
summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. SEC API metrics were retained as the baseline and filing text was scanned for segment and company-specific disclosures.`,
keyPoints: uniqueExtractionList([
...baseline.keyPoints,
segmentLead,
geographicLead,
companyLead
]),
redFlags: uniqueExtractionList([
...baseline.redFlags,
secApiCrossChecks.find((line) => /not confidently located/i.test(line))
]),
followUpQuestions: uniqueExtractionList([
...baseline.followUpQuestions,
segmentSpecificData.length > 0 ? 'How do segment trends change the consolidated margin outlook?' : 'Does management provide segment-level KPIs in supplemental exhibits?'
]),
portfolioSignals: uniqueExtractionList([
...baseline.portfolioSignals,
companySpecificData.length > 0 ? 'Incorporate company-specific KPI direction into near-term position sizing.' : 'Track future filings for explicit operating KPI disclosures.'
]),
segmentSpecificData: segmentSpecificData.length > 0
? segmentSpecificData
: baseline.segmentSpecificData,
geographicRevenueBreakdown: geographicRevenueBreakdown.length > 0
? geographicRevenueBreakdown
: baseline.geographicRevenueBreakdown,
companySpecificData: companySpecificData.length > 0
? companySpecificData
: baseline.companySpecificData,
secApiCrossChecks: secApiCrossChecks.length > 0
? secApiCrossChecks
: baseline.secApiCrossChecks,
confidence: segmentSpecificData.length + geographicRevenueBreakdown.length + companySpecificData.length > 0 ? 0.4 : 0.3
};
}
function preferExtractionList(primary: string[], fallback: string[]) {
return primary.length > 0 ? primary : fallback;
}
function mergeExtractionWithFallback(primary: FilingExtraction, fallback: FilingExtraction): FilingExtraction {
return {
summary: primary.summary || fallback.summary,
keyPoints: preferExtractionList(primary.keyPoints, fallback.keyPoints),
redFlags: preferExtractionList(primary.redFlags, fallback.redFlags),
followUpQuestions: preferExtractionList(primary.followUpQuestions, fallback.followUpQuestions),
portfolioSignals: preferExtractionList(primary.portfolioSignals, fallback.portfolioSignals),
segmentSpecificData: preferExtractionList(primary.segmentSpecificData, fallback.segmentSpecificData),
geographicRevenueBreakdown: preferExtractionList(primary.geographicRevenueBreakdown, fallback.geographicRevenueBreakdown),
companySpecificData: preferExtractionList(primary.companySpecificData, fallback.companySpecificData),
secApiCrossChecks: preferExtractionList(primary.secApiCrossChecks, fallback.secApiCrossChecks),
confidence: Math.min(Math.max(primary.confidence, 0), 1)
};
}
function extractionPrompt(filing: Filing, filingText: string) {
return [
'Extract structured signals from the SEC filing text.',
`Company: ${filing.company_name} (${filing.ticker})`,
`Form: ${filing.filing_type}`,
`Filed: ${filing.filing_date}`,
`SEC API baseline metrics: ${JSON.stringify(filing.metrics ?? {})}`,
'Use SEC API metrics as canonical numeric values and validate whether each appears consistent with filing text context.',
'Prioritize company-specific and segment-specific disclosures not covered by SEC endpoint fields (for example same-store sales, geographic mix, segment margin).',
'Return ONLY valid JSON with exactly these keys and no extra keys:',
'{"summary":"string","keyPoints":["string"],"redFlags":["string"],"followUpQuestions":["string"],"portfolioSignals":["string"],"confidence":0}',
`Rules: keyPoints/redFlags/followUpQuestions/portfolioSignals arrays max ${EXTRACTION_MAX_ITEMS} items; each item <= ${EXTRACTION_ITEM_MAX_LENGTH} chars; summary <= ${EXTRACTION_SUMMARY_MAX_LENGTH} chars; confidence between 0 and 1.`,
'{"summary":"string","keyPoints":["string"],"redFlags":["string"],"followUpQuestions":["string"],"portfolioSignals":["string"],"segmentSpecificData":["string"],"geographicRevenueBreakdown":["string"],"companySpecificData":["string"],"secApiCrossChecks":["string"],"confidence":0}',
`Rules: every array max ${EXTRACTION_MAX_ITEMS} items; each item <= ${EXTRACTION_ITEM_MAX_LENGTH} chars; summary <= ${EXTRACTION_SUMMARY_MAX_LENGTH} chars; confidence between 0 and 1.`,
'Filing text follows:',
filingText
].join('\n\n');
@@ -225,8 +478,9 @@ function reportPrompt(
`Analyze this SEC filing from ${filing.company_name} (${filing.ticker}).`,
`Form: ${filing.filing_type}`,
`Filed: ${filing.filing_date}`,
`Metrics: ${JSON.stringify(filing.metrics ?? {})}`,
`SEC API baseline metrics: ${JSON.stringify(filing.metrics ?? {})}`,
`Structured extraction context (${extractionMeta.source}): ${JSON.stringify(extraction)}`,
'Use SEC API values as the baseline financials and explicitly reference segment/company-specific details from extraction.',
'Return concise sections: Thesis, Red Flags, Follow-up Questions, Portfolio Impact.'
].join('\n');
}
@@ -252,12 +506,37 @@ async function processSyncFilings(task: Task) {
const ticker = parseTicker(task.payload.ticker);
const limit = parseLimit(task.payload.limit, 20, 1, 50);
const filings = await fetchRecentFilings(ticker, limit);
const metricsByCik = new Map<string, Filing['metrics']>();
const metricsByAccession = new Map<string, Filing['metrics']>();
const filingsByCik = new Map<string, typeof filings>();
for (const filing of filings) {
if (!metricsByCik.has(filing.cik)) {
const metrics = await fetchFilingMetrics(filing.cik, filing.ticker);
metricsByCik.set(filing.cik, metrics);
const group = filingsByCik.get(filing.cik);
if (group) {
group.push(filing);
continue;
}
filingsByCik.set(filing.cik, [filing]);
}
for (const [cik, filingsForCik] of filingsByCik) {
const filingsForFinancialMetrics = filingsForCik.filter((filing) => isFinancialMetricsForm(filing.filingType));
if (filingsForFinancialMetrics.length === 0) {
continue;
}
const metricsMap = await fetchFilingMetricsForFilings(
cik,
filingsForCik[0]?.ticker ?? ticker,
filingsForFinancialMetrics.map((filing) => ({
accessionNumber: filing.accessionNumber,
filingDate: filing.filingDate,
filingType: filing.filingType
}))
);
for (const [accessionNumber, metrics] of metricsMap.entries()) {
metricsByAccession.set(accessionNumber, metrics);
}
}
@@ -272,7 +551,7 @@ async function processSyncFilings(task: Task) {
filing_url: filing.filingUrl,
submission_url: filing.submissionUrl,
primary_document: filing.primaryDocument,
metrics: metricsByCik.get(filing.cik) ?? null,
metrics: metricsByAccession.get(filing.accessionNumber) ?? null,
links: filingLinks(filing)
}))
);
@@ -341,6 +620,15 @@ async function processAnalyzeFiling(task: Task) {
});
if (filingDocument?.text) {
const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text);
extraction = ruleBasedExtraction;
extractionMeta = {
provider: 'deterministic-fallback',
model: 'filing-rule-based',
source: filingDocument.source,
generatedAt: new Date().toISOString()
};
const extractionResult = await runAiAnalysis(
extractionPrompt(filing, filingDocument.text),
'Return strict JSON only.',
@@ -349,7 +637,7 @@ async function processAnalyzeFiling(task: Task) {
const parsed = parseExtractionPayload(extractionResult.text);
if (parsed) {
extraction = parsed;
extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction);
extractionMeta = {
provider: extractionResult.provider === 'local-fallback' ? 'deterministic-fallback' : 'ollama',
model: extractionResult.model,
@@ -360,6 +648,12 @@ async function processAnalyzeFiling(task: Task) {
}
} catch {
extraction = defaultExtraction;
extractionMeta = {
provider: 'deterministic-fallback',
model: 'metadata-fallback',
source: 'metadata_fallback',
generatedAt: new Date().toISOString()
};
}
const analysis = await runAiAnalysis(
@@ -435,7 +729,8 @@ async function processPortfolioInsights(task: Task) {
export const __taskProcessorInternals = {
parseExtractionPayload,
deterministicExtractionFallback
deterministicExtractionFallback,
isFinancialMetricsForm
};
export async function runTaskProcessor(task: Task) {

View File

@@ -43,6 +43,10 @@ export type FilingExtraction = {
redFlags: string[];
followUpQuestions: string[];
portfolioSignals: string[];
segmentSpecificData: string[];
geographicRevenueBreakdown: string[];
companySpecificData: string[];
secApiCrossChecks: string[];
confidence: number;
};