import { describe, expect, it } from 'bun:test'; import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery'; describe('taxonomy asset discovery', () => { it('classifies assets and selects ranked instance/pdf candidates', async () => { const fetchImpl = (async () => { return new Response(JSON.stringify({ directory: { item: [ { name: 'abc_htm.xml', size: '900000' }, { name: 'abc_pre.xml', size: '250000' }, { name: 'abc_lab.xml', size: '120000' }, { name: '10k_financial_statements.pdf', size: '400000' }, { name: 'annual_report.pdf', size: '300000' }, { name: 'quarter_statement.pdf', size: '200000' }, { name: 'exhibit99.pdf', size: '500000' } ] } }), { status: 200, headers: { 'content-type': 'application/json' } }); }) as unknown as typeof fetch; const result = await discoverFilingAssets({ cik: '0000123456', accessionNumber: '0000123456-26-000001', filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.htm', primaryDocument: 'abc.htm', fetchImpl }); expect(result.directoryUrl).toBe('https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/'); const selectedInstance = result.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected); expect(selectedInstance?.name).toBe('abc_htm.xml'); const selectedPdfs = result.assets .filter((asset) => asset.asset_type === 'pdf' && asset.is_selected) .map((asset) => asset.name); expect(selectedPdfs.length).toBe(3); expect(selectedPdfs).toContain('10k_financial_statements.pdf'); expect(selectedPdfs).toContain('annual_report.pdf'); expect(selectedPdfs).toContain('quarter_statement.pdf'); expect(selectedPdfs).not.toContain('exhibit99.pdf'); }); it('falls back to filing url when SEC directory assets are unavailable', async () => { const fetchImpl = (async () => { return new Response('not found', { status: 404 }); }) as unknown as typeof fetch; const result = await discoverFilingAssets({ cik: '0000123456', accessionNumber: '0000123456-26-000001', filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml', primaryDocument: 'abc.xml', fetchImpl }); expect(result.assets.length).toBe(1); expect(result.assets[0]).toEqual({ asset_type: 'instance', name: 'abc.xml', url: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml', size_bytes: null, score: 6, is_selected: true }); }); });