Files
Neon-Desk/lib/server/sec.test.ts

325 lines
11 KiB
TypeScript

import { describe, expect, it, mock } from 'bun:test';
import {
__statementInternals,
fetchFilingMetricsForFilings,
hydrateFilingStatementSnapshot,
fetchPrimaryFilingText,
normalizeSecDocumentText,
resolvePrimaryFilingUrl,
trimSecDocumentTextForPrompt
} from './sec';
describe('sec filing text helpers', () => {
it('normalizes html filing content into plain text', () => {
const html = `
<html>
<head>
<style>.x { color: red; }</style>
<script>console.log("ignore")</script>
</head>
<body>
<h1>Quarterly&nbsp;Report</h1>
<p>Revenue &amp; margin improved.</p>
<div>See &#39;Risk Factors&#39; section.</div>
</body>
</html>
`;
const normalized = normalizeSecDocumentText(html);
expect(normalized).toContain('Quarterly Report');
expect(normalized).toContain('Revenue & margin improved.');
expect(normalized).toContain('See \'Risk Factors\' section.');
expect(normalized).not.toContain('<script>');
expect(normalized).not.toContain('console.log');
});
it('trims filing text to prompt budget boundaries', () => {
const text = `A`.repeat(4_500);
const result = trimSecDocumentTextForPrompt(text, 2_000);
expect(result.truncated).toBe(true);
expect(result.text.length).toBeLessThanOrEqual(2_000);
});
it('prefers explicit filing url when available', () => {
const url = resolvePrimaryFilingUrl({
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123/x.htm',
cik: '123',
accessionNumber: '0000-00-00',
primaryDocument: 'x.htm'
});
expect(url).toBe('https://www.sec.gov/Archives/edgar/data/123/x.htm');
});
it('reconstructs primary filing url when filing url is absent', () => {
const url = resolvePrimaryFilingUrl({
filingUrl: null,
cik: '0000320193',
accessionNumber: '0000320193-24-000001',
primaryDocument: 'a10q.htm'
});
expect(url).toBe('https://www.sec.gov/Archives/edgar/data/320193/000032019324000001/a10q.htm');
});
it('fetches, normalizes, and clips primary filing text', async () => {
const longHtml = `<html><body><p>${'Alpha '.repeat(600)}</p></body></html>`;
const fetchImpl = mock(async () => new Response(longHtml, { status: 200 })) as unknown as typeof fetch;
const result = await fetchPrimaryFilingText({
filingUrl: null,
cik: '0000320193',
accessionNumber: '0000320193-24-000001',
primaryDocument: 'a10q.htm'
}, {
fetchImpl,
maxChars: 1_000
});
expect(fetchImpl).toHaveBeenCalledTimes(1);
expect(result).not.toBeNull();
expect(result?.source).toBe('primary_document');
expect(result?.truncated).toBe(true);
expect(result?.text.length).toBeLessThanOrEqual(1_000);
});
it('maps SEC companyfacts metrics to each filing by accession', async () => {
const fetchMock = mock(async (_input: RequestInfo | URL, _init?: RequestInit) => {
return new Response(JSON.stringify({
facts: {
'us-gaap': {
Revenues: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 101_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 111_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
NetIncomeLoss: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 21_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 25_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
Assets: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 405_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 410_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
CashAndCashEquivalentsAtCarryingValue: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 65_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 70_000, filed: '2026-02-01', form: '10-Q' }
]
}
},
LongTermDebt: {
units: {
USD: [
{ accn: '0000320193-25-000010', val: 95_000, filed: '2025-11-01', form: '10-Q' },
{ accn: '0000320193-25-000020', val: 98_000, filed: '2026-02-01', form: '10-Q' }
]
}
}
}
}
}), { status: 200 });
}) as unknown as typeof fetch;
const originalFetch = globalThis.fetch;
globalThis.fetch = fetchMock;
try {
const map = await fetchFilingMetricsForFilings('0000320193', 'AAPL', [
{
accessionNumber: '0000320193-25-000010',
filingDate: '2025-11-01',
filingType: '10-Q'
},
{
accessionNumber: '0000320193-25-000020',
filingDate: '2026-02-01',
filingType: '10-Q'
}
]);
expect(fetchMock).toHaveBeenCalledTimes(1);
expect(map.get('0000320193-25-000010')?.revenue).toBe(101_000);
expect(map.get('0000320193-25-000010')?.netIncome).toBe(21_000);
expect(map.get('0000320193-25-000020')?.revenue).toBe(111_000);
expect(map.get('0000320193-25-000020')?.cash).toBe(70_000);
} finally {
globalThis.fetch = originalFetch;
}
});
it('returns null-valued metrics when companyfacts lookup fails', async () => {
const fetchMock = mock(async (_input: RequestInfo | URL, _init?: RequestInit) => {
return new Response('error', { status: 500 });
}) as unknown as typeof fetch;
const originalFetch = globalThis.fetch;
globalThis.fetch = fetchMock;
try {
const map = await fetchFilingMetricsForFilings('0000320193', 'AAPL', [
{
accessionNumber: '0000320193-25-000010',
filingDate: '2025-11-01',
filingType: '10-Q'
}
]);
expect(fetchMock).toHaveBeenCalledTimes(1);
expect(map.get('0000320193-25-000010')).toEqual({
revenue: null,
netIncome: null,
totalAssets: null,
cash: null,
debt: null
});
} finally {
globalThis.fetch = originalFetch;
}
});
});
describe('statement snapshot parsing', () => {
it('parses FilingSummary reports and statement rows with order/depth/subtotals', () => {
const reports = __statementInternals.parseFilingSummaryReports(`
<FilingSummary>
<Report>
<ShortName>Statements of Operations</ShortName>
<LongName>Consolidated Statements of Operations</LongName>
<HtmlFileName>income.htm</HtmlFileName>
</Report>
</FilingSummary>
`);
expect(reports.length).toBe(1);
expect(reports[0]?.htmlFileName).toBe('income.htm');
const rows = __statementInternals.parseStatementRowsFromReport(`
<html>
<table>
<tr>
<td style="padding-left: 0px"><a id="defref_us-gaap_Revenues">Revenue</a></td>
<td>$120,000</td>
</tr>
<tr>
<td style="padding-left: 24px"><a id="defref_us-gaap_CostOfRevenue">Cost of Revenue</a></td>
<td>(50,000)</td>
</tr>
<tr>
<td style="padding-left: 0px">Total Net Income</td>
<td>25,000</td>
</tr>
</table>
</html>
`);
expect(rows.length).toBe(3);
expect(rows[0]?.label).toBe('Revenue');
expect(rows[0]?.order).toBe(1);
expect(rows[1]?.depth).toBe(2);
expect(rows[1]?.value).toBe(-50_000);
expect(rows[2]?.isSubtotal).toBe(true);
});
it('extracts dimensional facts from inline XBRL contexts', () => {
const dimensions = __statementInternals.parseDimensionFacts(`
<xbrli:context id="ctx_seg">
<xbrli:period><xbrli:endDate>2025-12-31</xbrli:endDate></xbrli:period>
<xbrli:scenario>
<xbrldi:explicitMember dimension="srt:ProductOrServiceAxis">us-gaap:ProductMember</xbrldi:explicitMember>
</xbrli:scenario>
</xbrli:context>
<ix:nonFraction name="us-gaap:Revenues" contextRef="ctx_seg" unitRef="USD">50000</ix:nonFraction>
`, 'fallback-period');
expect(dimensions.income.length).toBe(1);
expect(dimensions.income[0]?.axis).toContain('ProductOrServiceAxis');
expect(dimensions.income[0]?.member).toContain('ProductMember');
expect(dimensions.income[0]?.periodId).toBe('2025-12-31');
});
it('hydrates a filing snapshot with partial status when only one statement is found', async () => {
const fetchImpl = mock(async (input: RequestInfo | URL, _init?: RequestInit) => {
const url = String(input);
if (url.endsWith('FilingSummary.xml')) {
return new Response(`
<FilingSummary>
<Report>
<ShortName>Statements of Operations</ShortName>
<LongName>Consolidated Statements of Operations</LongName>
<HtmlFileName>income.htm</HtmlFileName>
</Report>
</FilingSummary>
`, { status: 200 });
}
if (url.endsWith('income.htm')) {
return new Response(`
<html>
<table>
<tr>
<td><a id="defref_us-gaap_Revenues">Revenue</a></td>
<td>120000</td>
</tr>
<tr>
<td><a id="defref_us-gaap_NetIncomeLoss">Net Income</a></td>
<td>24000</td>
</tr>
</table>
</html>
`, { status: 200 });
}
return new Response(`
<xbrli:context id="ctx_seg">
<xbrli:period><xbrli:endDate>2025-12-31</xbrli:endDate></xbrli:period>
<xbrli:scenario>
<xbrldi:explicitMember dimension="srt:StatementBusinessSegmentsAxis">acme:EnterpriseMember</xbrldi:explicitMember>
</xbrli:scenario>
</xbrli:context>
<ix:nonFraction name="us-gaap:Revenues" contextRef="ctx_seg" unitRef="USD">120000</ix:nonFraction>
`, { status: 200 });
}) as unknown as typeof fetch;
const snapshot = await hydrateFilingStatementSnapshot({
filingId: 99,
ticker: 'MSFT',
cik: '0000789019',
accessionNumber: '0000789019-25-000001',
filingDate: '2025-12-31',
filingType: '10-K',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/789019/000078901925000001/msft10k.htm',
primaryDocument: 'msft10k.htm',
metrics: {
revenue: 120_000,
netIncome: 24_000,
totalAssets: 450_000,
cash: 90_000,
debt: 110_000
}
}, {
fetchImpl
});
expect(snapshot.parse_status).toBe('partial');
expect(snapshot.statement_bundle?.statements.income.length).toBeGreaterThan(0);
expect(snapshot.standardized_bundle?.statements.income.find((row) => row.key === 'revenue')?.values).toBeDefined();
expect(snapshot.dimension_bundle?.statements.income.length).toBeGreaterThan(0);
});
});