diff --git a/.env.example b/.env.example index 1aacdfa..5b888fb 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,12 @@ ZHIPU_API_KEY= ZHIPU_MODEL=glm-4.7-flashx AI_TEMPERATURE=0.2 +# Local extraction model (Ollama, OpenAI-compatible API) +# For host Ollama from Docker, use http://host.docker.internal:11434 +OLLAMA_BASE_URL=http://127.0.0.1:11434 +OLLAMA_MODEL=qwen3:8b +OLLAMA_API_KEY=ollama + # SEC API etiquette SEC_USER_AGENT=Fiscal Clone diff --git a/README.md b/README.md index 11b88b0..c8d22d5 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,9 @@ Turbopack-first rebuild of a fiscal.ai-style terminal with Vercel AI SDK integra - Eden Treaty for type-safe frontend API calls - Workflow DevKit Local World for background task execution - SQLite-backed domain storage (watchlist, holdings, filings, tasks, insights) -- Vercel AI SDK (`ai`) + Zhipu community provider (`zhipu-ai-provider`) for analysis tasks (hardcoded to `https://api.z.ai/api/coding/paas/v4`) +- Vercel AI SDK (`ai`) with dual-model routing: + - Ollama (`@ai-sdk/openai`) for lightweight filing extraction/parsing + - Zhipu (`zhipu-ai-provider`) for heavyweight narrative reports (`https://api.z.ai/api/coding/paas/v4`) ## Run locally @@ -45,7 +47,9 @@ docker compose up --build -d ``` For local Docker, host port mapping comes from `docker-compose.override.yml` (default `http://localhost:3000` via `APP_PORT`). -The app calls Zhipu directly via AI SDK and always targets the Coding API endpoint (`https://api.z.ai/api/coding/paas/v4`), so no extra AI gateway container is required. +The app calls Zhipu directly via AI SDK for heavy reports and calls Ollama for lightweight filing extraction. +When running in Docker and Ollama runs on the host, set `OLLAMA_BASE_URL=http://host.docker.internal:11434`. +Zhipu always targets the Coding API endpoint (`https://api.z.ai/api/coding/paas/v4`). On container startup, the app applies Drizzle migrations automatically before launching Next.js. The app stores SQLite data in Docker volume `fiscal_sqlite_data` (mounted to `/app/data`) and workflow local data in `fiscal_workflow_data` (mounted to `/app/.workflow-data`). @@ -90,6 +94,10 @@ ZHIPU_API_KEY= ZHIPU_MODEL=glm-4.7-flashx # optional generation tuning AI_TEMPERATURE=0.2 + +OLLAMA_BASE_URL=http://127.0.0.1:11434 +OLLAMA_MODEL=qwen3:8b +OLLAMA_API_KEY=ollama SEC_USER_AGENT=Fiscal Clone WORKFLOW_TARGET_WORLD=local @@ -98,6 +106,7 @@ WORKFLOW_LOCAL_QUEUE_CONCURRENCY=100 ``` If `ZHIPU_API_KEY` is unset, the app uses local fallback analysis so task workflows still run. +If Ollama is unavailable, filing extraction falls back to deterministic metadata-based extraction and still proceeds to heavy report generation. `ZHIPU_BASE_URL` is deprecated and ignored; runtime always uses `https://api.z.ai/api/coding/paas/v4`. ## API surface diff --git a/bun.lock b/bun.lock index 6d9f963..a823711 100644 --- a/bun.lock +++ b/bun.lock @@ -5,6 +5,7 @@ "": { "name": "fiscal-frontend", "dependencies": { + "@ai-sdk/openai": "^2.0.62", "@elysiajs/eden": "^1.4.8", "@libsql/client": "^0.17.0", "@tailwindcss/postcss": "^4.2.1", @@ -38,9 +39,11 @@ "packages": { "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.58", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2e1hBCKsd+7m0hELwrakR1QDfZfFhz9PF2d4qb8TxQueEyApo7ydlEWRpXeKC+KdA2FRV21dMb1G6FxdeNDa2w=="], - "@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + "@ai-sdk/openai": ["@ai-sdk/openai@2.0.95", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@ai-sdk/provider-utils": "3.0.21" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2CABPaa1UNh7dPyZUIB/Dc4AbvJioFnmryRx45sx7ezBSOdR0zxG6gbrSd/fZ0GVbptSZeLmF9omu10d/GxmJA=="], - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + "@ai-sdk/provider": ["@ai-sdk/provider@2.0.1", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-KCUwswvsC5VsW2PWFqF8eJgSCu5Ysj7m1TxiHTVA6g7k360bk0RNQENT8KTMAYEs+8fWPD3Uu4dEmzGHc+jGng=="], + + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-veuMwTLxsgh31Jjn0SnBABnM1f7ebHhRWcV2ZuY3hP3iJDCZ8VXBaYqcHXoOQDqUXTCas08sKQcHyWK+zl882Q=="], "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], @@ -1488,6 +1491,10 @@ "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + "@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + "@aws-crypto/sha256-browser/@aws-sdk/types": ["@aws-sdk/types@3.973.3", "", { "dependencies": { "@smithy/types": "^4.13.0", "tslib": "^2.6.2" } }, "sha512-tma6D8/xHZHJEUqmr6ksZjZ0onyIUqKDQLyp50ttZJmS0IwFYzxBgp5CxFvpYAnah52V3UtgrqGA6E83gtT7NQ=="], "@aws-crypto/sha256-browser/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], @@ -1716,6 +1723,10 @@ "@xhmikosr/downloader/file-type": ["file-type@20.5.0", "", { "dependencies": { "@tokenizer/inflate": "^0.2.6", "strtok3": "^10.2.0", "token-types": "^6.0.0", "uint8array-extras": "^1.4.0" } }, "sha512-BfHZtG/l9iMm4Ecianu7P8HRD2tBHLtjXinm4X62XBOYzi7CYA7jyqfJzOvXHqzVrVPYqBo2/GvbARMaaJkKVg=="], + "ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + "ansi-escapes/type-fest": ["type-fest@0.21.3", "", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="], "body-parser/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], @@ -1792,10 +1803,6 @@ "wsl-utils/is-wsl": ["is-wsl@3.1.1", "", { "dependencies": { "is-inside-container": "^1.0.0" } }, "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw=="], - "zhipu-ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@2.0.1", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-KCUwswvsC5VsW2PWFqF8eJgSCu5Ysj7m1TxiHTVA6g7k360bk0RNQENT8KTMAYEs+8fWPD3Uu4dEmzGHc+jGng=="], - - "zhipu-ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-veuMwTLxsgh31Jjn0SnBABnM1f7ebHhRWcV2ZuY3hP3iJDCZ8VXBaYqcHXoOQDqUXTCas08sKQcHyWK+zl882Q=="], - "@aws-crypto/sha256-browser/@aws-sdk/types/@smithy/types": ["@smithy/types@4.13.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-COuLsZILbbQsdrwKQpkkpyep7lCsByxwj7m0Mg5v66/ZTyenlfBc40/QFQ5chO0YN/PNEH1Bi3fGtfXPnYNeDw=="], "@aws-crypto/sha256-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], diff --git a/docker-compose.yml b/docker-compose.yml index 889c353..cf82dd6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,9 @@ services: ZHIPU_API_KEY: ${ZHIPU_API_KEY:-} ZHIPU_MODEL: ${ZHIPU_MODEL:-glm-4.7-flashx} AI_TEMPERATURE: ${AI_TEMPERATURE:-0.2} + OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://127.0.0.1:11434} + OLLAMA_MODEL: ${OLLAMA_MODEL:-qwen3:8b} + OLLAMA_API_KEY: ${OLLAMA_API_KEY:-ollama} SEC_USER_AGENT: ${SEC_USER_AGENT:-Fiscal Clone } WORKFLOW_TARGET_WORLD: local WORKFLOW_LOCAL_DATA_DIR: ${WORKFLOW_LOCAL_DATA_DIR:-/app/.workflow-data} diff --git a/lib/server/ai.test.ts b/lib/server/ai.test.ts index b4f4e32..1cd8a0c 100644 --- a/lib/server/ai.test.ts +++ b/lib/server/ai.test.ts @@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, mock } from 'bun:test'; import { __resetAiWarningsForTests, getAiConfig, + getExtractionAiConfig, runAiAnalysis } from './ai'; @@ -154,4 +155,65 @@ describe('ai config and runtime', () => { }) ).rejects.toThrow('AI SDK returned an empty response'); }); + + it('uses ollama defaults for extraction workload config', () => { + const config = getExtractionAiConfig({ + env: {}, + warn: () => {} + }); + + expect(config.provider).toBe('ollama'); + expect(config.baseUrl).toBe('http://127.0.0.1:11434'); + expect(config.model).toBe('qwen3:8b'); + expect(config.apiKey).toBe('ollama'); + expect(config.temperature).toBe(0); + }); + + it('uses extraction workload and returns ollama provider on success', async () => { + const createModel = mock((config: { + provider: string; + apiKey?: string; + model: string; + baseUrl: string; + temperature: number; + }) => { + expect(config.provider).toBe('ollama'); + expect(config.baseUrl).toBe('http://127.0.0.1:11434'); + expect(config.model).toBe('qwen3:8b'); + expect(config.temperature).toBe(0); + return { modelId: config.model }; + }); + const generate = mock(async () => ({ text: '{"summary":"ok","keyPoints":[],"redFlags":[],"followUpQuestions":[],"portfolioSignals":[],"confidence":0.6}' })); + + const result = await runAiAnalysis('Extract this filing', 'Return JSON', { + env: { + OLLAMA_MODEL: 'qwen3:8b' + }, + warn: () => {}, + workload: 'extraction', + createModel, + generate + }); + + expect(createModel).toHaveBeenCalledTimes(1); + expect(generate).toHaveBeenCalledTimes(1); + expect(result.provider).toBe('ollama'); + expect(result.model).toBe('qwen3:8b'); + }); + + it('falls back to local text when extraction workload generation fails', async () => { + const result = await runAiAnalysis('Extract this filing', 'Return JSON', { + env: {}, + warn: () => {}, + workload: 'extraction', + createModel: () => ({}), + generate: async () => { + throw new Error('ollama unavailable'); + } + }); + + expect(result.provider).toBe('local-fallback'); + expect(result.model).toBe('qwen3:8b'); + expect(result.text).toContain('AI SDK fallback mode is active'); + }); }); diff --git a/lib/server/ai.ts b/lib/server/ai.ts index 2a48c4d..bfab93a 100644 --- a/lib/server/ai.ts +++ b/lib/server/ai.ts @@ -1,7 +1,12 @@ +import { createOpenAI } from '@ai-sdk/openai'; import { generateText } from 'ai'; import { createZhipu } from 'zhipu-ai-provider'; +type AiWorkload = 'report' | 'extraction'; +type AiProvider = 'zhipu' | 'ollama'; + type AiConfig = { + provider: AiProvider; apiKey?: string; baseUrl: string; model: string; @@ -27,11 +32,15 @@ type AiGenerateOutput = { }; type RunAiAnalysisOptions = GetAiConfigOptions & { + workload?: AiWorkload; createModel?: (config: AiConfig) => unknown; generate?: (input: AiGenerateInput) => Promise; }; const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'; +const OLLAMA_BASE_URL = 'http://127.0.0.1:11434'; +const OLLAMA_MODEL = 'qwen3:8b'; +const OLLAMA_API_KEY = 'ollama'; let warnedIgnoredZhipuBaseUrl = false; @@ -74,20 +83,47 @@ function fallbackResponse(prompt: string) { const clipped = prompt.split('\n').slice(0, 6).join(' ').slice(0, 260); return [ - 'AI SDK fallback mode is active (Zhipu configuration is missing).', + 'AI SDK fallback mode is active (live model configuration is missing or unavailable).', 'Thesis: Portfolio remains analyzable with local heuristics until live model access is configured.', 'Risk scan: Concentration and filing sentiment should be monitored after each sync cycle.', `Context digest: ${clipped}` ].join('\n\n'); } +function toOpenAiCompatibleBaseUrl(baseUrl: string) { + const normalized = baseUrl.endsWith('/') + ? baseUrl.slice(0, -1) + : baseUrl; + + return normalized.endsWith('/v1') + ? normalized + : `${normalized}/v1`; +} + +function asErrorMessage(error: unknown) { + if (error instanceof Error && error.message) { + return error.message; + } + + return String(error); +} + function defaultCreateModel(config: AiConfig) { - const zhipu = createZhipu({ - apiKey: config.apiKey, - baseURL: config.baseUrl + if (config.provider === 'zhipu') { + const zhipu = createZhipu({ + apiKey: config.apiKey, + baseURL: config.baseUrl + }); + + return zhipu(config.model); + } + + const openai = createOpenAI({ + apiKey: config.apiKey ?? OLLAMA_API_KEY, + baseURL: toOpenAiCompatibleBaseUrl(config.baseUrl) }); - return zhipu(config.model); + return openai.chat(config.model); } async function defaultGenerate(input: AiGenerateInput): Promise { @@ -102,10 +138,15 @@ async function defaultGenerate(input: AiGenerateInput): Promise entry.ticker === ticker) ?? null; const watchlistItem = watchlist.find((entry) => entry.ticker === ticker) ?? null; @@ -341,7 +343,7 @@ export const app = new Elysia({ prefix: '/api' }) ?? watchlistItem?.company_name ?? ticker; - const financials = filings + const financials = redactedFilings .filter((entry) => entry.metrics) .map((entry) => ({ filingDate: entry.filing_date, @@ -353,7 +355,7 @@ export const app = new Elysia({ prefix: '/api' }) debt: entry.metrics?.debt ?? null })); - const aiReports = filings + const aiReports = redactedFilings .filter((entry) => entry.analysis?.text || entry.analysis?.legacyInsights) .slice(0, 8) .map((entry) => ({ @@ -377,7 +379,7 @@ export const app = new Elysia({ prefix: '/api' }) position: holding, priceHistory, financials, - filings: filings.slice(0, 20), + filings: redactedFilings.slice(0, 20), aiReports } }); @@ -446,7 +448,7 @@ export const app = new Elysia({ prefix: '/api' }) limit: Number.isFinite(limit) ? limit : 50 }); - return Response.json({ filings }); + return Response.json({ filings: filings.map(redactInternalFilingAnalysisFields) }); }, { query: t.Object({ ticker: t.Optional(t.String()), diff --git a/lib/server/api/filing-redaction.test.ts b/lib/server/api/filing-redaction.test.ts new file mode 100644 index 0000000..1567d45 --- /dev/null +++ b/lib/server/api/filing-redaction.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'bun:test'; +import type { Filing } from '@/lib/types'; +import { redactInternalFilingAnalysisFields } from './filing-redaction'; + +function filingWithExtraction(): Filing { + return { + id: 7, + ticker: 'MSFT', + filing_type: '10-K', + filing_date: '2026-02-01', + accession_number: '0000789019-26-000001', + cik: '0000789019', + company_name: 'Microsoft Corporation', + filing_url: 'https://www.sec.gov/Archives/edgar/data/789019/000078901926000001/a10k.htm', + submission_url: null, + primary_document: 'a10k.htm', + metrics: null, + analysis: { + provider: 'zhipu', + model: 'glm-4.7-flashx', + text: 'Report text', + extraction: { + summary: 'Internal extraction summary', + keyPoints: ['a'], + redFlags: ['b'], + followUpQuestions: ['c'], + portfolioSignals: ['d'], + confidence: 0.4 + }, + extractionMeta: { + provider: 'ollama', + model: 'qwen3:8b', + source: 'primary_document', + generatedAt: '2026-02-01T00:00:00.000Z' + } + }, + created_at: '2026-02-01T00:00:00.000Z', + updated_at: '2026-02-01T00:00:00.000Z' + }; +} + +describe('filing response redaction', () => { + it('removes internal extraction fields while preserving public analysis fields', () => { + const redacted = redactInternalFilingAnalysisFields(filingWithExtraction()); + + expect(redacted.analysis?.provider).toBe('zhipu'); + expect(redacted.analysis?.model).toBe('glm-4.7-flashx'); + expect(redacted.analysis?.text).toBe('Report text'); + expect(redacted.analysis?.extraction).toBeUndefined(); + expect(redacted.analysis?.extractionMeta).toBeUndefined(); + }); +}); diff --git a/lib/server/api/filing-redaction.ts b/lib/server/api/filing-redaction.ts new file mode 100644 index 0000000..0a69c66 --- /dev/null +++ b/lib/server/api/filing-redaction.ts @@ -0,0 +1,15 @@ +import type { Filing } from '@/lib/types'; + +export function redactInternalFilingAnalysisFields(filing: Filing): Filing { + if (!filing.analysis) { + return filing; + } + + const { extraction: _extraction, extractionMeta: _extractionMeta, ...analysis } = filing.analysis; + const hasPublicFields = Object.keys(analysis).length > 0; + + return { + ...filing, + analysis: hasPublicFields ? analysis : null + }; +} diff --git a/lib/server/db/schema.ts b/lib/server/db/schema.ts index f6a3cc6..cfafb77 100644 --- a/lib/server/db/schema.ts +++ b/lib/server/db/schema.ts @@ -20,6 +20,20 @@ type FilingAnalysis = { model?: string; text?: string; legacyInsights?: string; + extraction?: { + summary: string; + keyPoints: string[]; + redFlags: string[]; + followUpQuestions: string[]; + portfolioSignals: string[]; + confidence: number; + }; + extractionMeta?: { + provider: string; + model: string; + source: 'primary_document' | 'metadata_fallback'; + generatedAt: string; + }; }; const authDateColumn = { diff --git a/lib/server/sec.test.ts b/lib/server/sec.test.ts new file mode 100644 index 0000000..6fbc1e3 --- /dev/null +++ b/lib/server/sec.test.ts @@ -0,0 +1,84 @@ +import { describe, expect, it, mock } from 'bun:test'; +import { + fetchPrimaryFilingText, + normalizeSecDocumentText, + resolvePrimaryFilingUrl, + trimSecDocumentTextForPrompt +} from './sec'; + +describe('sec filing text helpers', () => { + it('normalizes html filing content into plain text', () => { + const html = ` + + + + + + +

Quarterly Report

+

Revenue & margin improved.

+
See 'Risk Factors' section.
+ + + `; + + const normalized = normalizeSecDocumentText(html); + + expect(normalized).toContain('Quarterly Report'); + expect(normalized).toContain('Revenue & margin improved.'); + expect(normalized).toContain('See \'Risk Factors\' section.'); + expect(normalized).not.toContain('