Implement dual-model filing pipeline with Ollama extraction

This commit is contained in:
2026-02-28 16:31:25 -05:00
parent 0615534f4b
commit a09001501e
16 changed files with 872 additions and 51 deletions

View File

@@ -19,6 +19,12 @@ ZHIPU_API_KEY=
ZHIPU_MODEL=glm-4.7-flashx ZHIPU_MODEL=glm-4.7-flashx
AI_TEMPERATURE=0.2 AI_TEMPERATURE=0.2
# Local extraction model (Ollama, OpenAI-compatible API)
# For host Ollama from Docker, use http://host.docker.internal:11434
OLLAMA_BASE_URL=http://127.0.0.1:11434
OLLAMA_MODEL=qwen3:8b
OLLAMA_API_KEY=ollama
# SEC API etiquette # SEC API etiquette
SEC_USER_AGENT=Fiscal Clone <support@fiscal.local> SEC_USER_AGENT=Fiscal Clone <support@fiscal.local>

View File

@@ -14,7 +14,9 @@ Turbopack-first rebuild of a fiscal.ai-style terminal with Vercel AI SDK integra
- Eden Treaty for type-safe frontend API calls - Eden Treaty for type-safe frontend API calls
- Workflow DevKit Local World for background task execution - Workflow DevKit Local World for background task execution
- SQLite-backed domain storage (watchlist, holdings, filings, tasks, insights) - SQLite-backed domain storage (watchlist, holdings, filings, tasks, insights)
- Vercel AI SDK (`ai`) + Zhipu community provider (`zhipu-ai-provider`) for analysis tasks (hardcoded to `https://api.z.ai/api/coding/paas/v4`) - Vercel AI SDK (`ai`) with dual-model routing:
- Ollama (`@ai-sdk/openai`) for lightweight filing extraction/parsing
- Zhipu (`zhipu-ai-provider`) for heavyweight narrative reports (`https://api.z.ai/api/coding/paas/v4`)
## Run locally ## Run locally
@@ -45,7 +47,9 @@ docker compose up --build -d
``` ```
For local Docker, host port mapping comes from `docker-compose.override.yml` (default `http://localhost:3000` via `APP_PORT`). For local Docker, host port mapping comes from `docker-compose.override.yml` (default `http://localhost:3000` via `APP_PORT`).
The app calls Zhipu directly via AI SDK and always targets the Coding API endpoint (`https://api.z.ai/api/coding/paas/v4`), so no extra AI gateway container is required. The app calls Zhipu directly via AI SDK for heavy reports and calls Ollama for lightweight filing extraction.
When running in Docker and Ollama runs on the host, set `OLLAMA_BASE_URL=http://host.docker.internal:11434`.
Zhipu always targets the Coding API endpoint (`https://api.z.ai/api/coding/paas/v4`).
On container startup, the app applies Drizzle migrations automatically before launching Next.js. On container startup, the app applies Drizzle migrations automatically before launching Next.js.
The app stores SQLite data in Docker volume `fiscal_sqlite_data` (mounted to `/app/data`) and workflow local data in `fiscal_workflow_data` (mounted to `/app/.workflow-data`). The app stores SQLite data in Docker volume `fiscal_sqlite_data` (mounted to `/app/data`) and workflow local data in `fiscal_workflow_data` (mounted to `/app/.workflow-data`).
@@ -90,6 +94,10 @@ ZHIPU_API_KEY=
ZHIPU_MODEL=glm-4.7-flashx ZHIPU_MODEL=glm-4.7-flashx
# optional generation tuning # optional generation tuning
AI_TEMPERATURE=0.2 AI_TEMPERATURE=0.2
OLLAMA_BASE_URL=http://127.0.0.1:11434
OLLAMA_MODEL=qwen3:8b
OLLAMA_API_KEY=ollama
SEC_USER_AGENT=Fiscal Clone <support@fiscal.local> SEC_USER_AGENT=Fiscal Clone <support@fiscal.local>
WORKFLOW_TARGET_WORLD=local WORKFLOW_TARGET_WORLD=local
@@ -98,6 +106,7 @@ WORKFLOW_LOCAL_QUEUE_CONCURRENCY=100
``` ```
If `ZHIPU_API_KEY` is unset, the app uses local fallback analysis so task workflows still run. If `ZHIPU_API_KEY` is unset, the app uses local fallback analysis so task workflows still run.
If Ollama is unavailable, filing extraction falls back to deterministic metadata-based extraction and still proceeds to heavy report generation.
`ZHIPU_BASE_URL` is deprecated and ignored; runtime always uses `https://api.z.ai/api/coding/paas/v4`. `ZHIPU_BASE_URL` is deprecated and ignored; runtime always uses `https://api.z.ai/api/coding/paas/v4`.
## API surface ## API surface

View File

@@ -5,6 +5,7 @@
"": { "": {
"name": "fiscal-frontend", "name": "fiscal-frontend",
"dependencies": { "dependencies": {
"@ai-sdk/openai": "^2.0.62",
"@elysiajs/eden": "^1.4.8", "@elysiajs/eden": "^1.4.8",
"@libsql/client": "^0.17.0", "@libsql/client": "^0.17.0",
"@tailwindcss/postcss": "^4.2.1", "@tailwindcss/postcss": "^4.2.1",
@@ -38,9 +39,11 @@
"packages": { "packages": {
"@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.58", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2e1hBCKsd+7m0hELwrakR1QDfZfFhz9PF2d4qb8TxQueEyApo7ydlEWRpXeKC+KdA2FRV21dMb1G6FxdeNDa2w=="], "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.58", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2e1hBCKsd+7m0hELwrakR1QDfZfFhz9PF2d4qb8TxQueEyApo7ydlEWRpXeKC+KdA2FRV21dMb1G6FxdeNDa2w=="],
"@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], "@ai-sdk/openai": ["@ai-sdk/openai@2.0.95", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@ai-sdk/provider-utils": "3.0.21" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2CABPaa1UNh7dPyZUIB/Dc4AbvJioFnmryRx45sx7ezBSOdR0zxG6gbrSd/fZ0GVbptSZeLmF9omu10d/GxmJA=="],
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], "@ai-sdk/provider": ["@ai-sdk/provider@2.0.1", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-KCUwswvsC5VsW2PWFqF8eJgSCu5Ysj7m1TxiHTVA6g7k360bk0RNQENT8KTMAYEs+8fWPD3Uu4dEmzGHc+jGng=="],
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-veuMwTLxsgh31Jjn0SnBABnM1f7ebHhRWcV2ZuY3hP3iJDCZ8VXBaYqcHXoOQDqUXTCas08sKQcHyWK+zl882Q=="],
"@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="],
@@ -1488,6 +1491,10 @@
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
"@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"@aws-crypto/sha256-browser/@aws-sdk/types": ["@aws-sdk/types@3.973.3", "", { "dependencies": { "@smithy/types": "^4.13.0", "tslib": "^2.6.2" } }, "sha512-tma6D8/xHZHJEUqmr6ksZjZ0onyIUqKDQLyp50ttZJmS0IwFYzxBgp5CxFvpYAnah52V3UtgrqGA6E83gtT7NQ=="], "@aws-crypto/sha256-browser/@aws-sdk/types": ["@aws-sdk/types@3.973.3", "", { "dependencies": { "@smithy/types": "^4.13.0", "tslib": "^2.6.2" } }, "sha512-tma6D8/xHZHJEUqmr6ksZjZ0onyIUqKDQLyp50ttZJmS0IwFYzxBgp5CxFvpYAnah52V3UtgrqGA6E83gtT7NQ=="],
"@aws-crypto/sha256-browser/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], "@aws-crypto/sha256-browser/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="],
@@ -1716,6 +1723,10 @@
"@xhmikosr/downloader/file-type": ["file-type@20.5.0", "", { "dependencies": { "@tokenizer/inflate": "^0.2.6", "strtok3": "^10.2.0", "token-types": "^6.0.0", "uint8array-extras": "^1.4.0" } }, "sha512-BfHZtG/l9iMm4Ecianu7P8HRD2tBHLtjXinm4X62XBOYzi7CYA7jyqfJzOvXHqzVrVPYqBo2/GvbARMaaJkKVg=="], "@xhmikosr/downloader/file-type": ["file-type@20.5.0", "", { "dependencies": { "@tokenizer/inflate": "^0.2.6", "strtok3": "^10.2.0", "token-types": "^6.0.0", "uint8array-extras": "^1.4.0" } }, "sha512-BfHZtG/l9iMm4Ecianu7P8HRD2tBHLtjXinm4X62XBOYzi7CYA7jyqfJzOvXHqzVrVPYqBo2/GvbARMaaJkKVg=="],
"ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"ansi-escapes/type-fest": ["type-fest@0.21.3", "", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="], "ansi-escapes/type-fest": ["type-fest@0.21.3", "", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="],
"body-parser/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], "body-parser/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="],
@@ -1792,10 +1803,6 @@
"wsl-utils/is-wsl": ["is-wsl@3.1.1", "", { "dependencies": { "is-inside-container": "^1.0.0" } }, "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw=="], "wsl-utils/is-wsl": ["is-wsl@3.1.1", "", { "dependencies": { "is-inside-container": "^1.0.0" } }, "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw=="],
"zhipu-ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@2.0.1", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-KCUwswvsC5VsW2PWFqF8eJgSCu5Ysj7m1TxiHTVA6g7k360bk0RNQENT8KTMAYEs+8fWPD3Uu4dEmzGHc+jGng=="],
"zhipu-ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-veuMwTLxsgh31Jjn0SnBABnM1f7ebHhRWcV2ZuY3hP3iJDCZ8VXBaYqcHXoOQDqUXTCas08sKQcHyWK+zl882Q=="],
"@aws-crypto/sha256-browser/@aws-sdk/types/@smithy/types": ["@smithy/types@4.13.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-COuLsZILbbQsdrwKQpkkpyep7lCsByxwj7m0Mg5v66/ZTyenlfBc40/QFQ5chO0YN/PNEH1Bi3fGtfXPnYNeDw=="], "@aws-crypto/sha256-browser/@aws-sdk/types/@smithy/types": ["@smithy/types@4.13.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-COuLsZILbbQsdrwKQpkkpyep7lCsByxwj7m0Mg5v66/ZTyenlfBc40/QFQ5chO0YN/PNEH1Bi3fGtfXPnYNeDw=="],
"@aws-crypto/sha256-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], "@aws-crypto/sha256-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="],

View File

@@ -21,6 +21,9 @@ services:
ZHIPU_API_KEY: ${ZHIPU_API_KEY:-} ZHIPU_API_KEY: ${ZHIPU_API_KEY:-}
ZHIPU_MODEL: ${ZHIPU_MODEL:-glm-4.7-flashx} ZHIPU_MODEL: ${ZHIPU_MODEL:-glm-4.7-flashx}
AI_TEMPERATURE: ${AI_TEMPERATURE:-0.2} AI_TEMPERATURE: ${AI_TEMPERATURE:-0.2}
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://127.0.0.1:11434}
OLLAMA_MODEL: ${OLLAMA_MODEL:-qwen3:8b}
OLLAMA_API_KEY: ${OLLAMA_API_KEY:-ollama}
SEC_USER_AGENT: ${SEC_USER_AGENT:-Fiscal Clone <support@fiscal.local>} SEC_USER_AGENT: ${SEC_USER_AGENT:-Fiscal Clone <support@fiscal.local>}
WORKFLOW_TARGET_WORLD: local WORKFLOW_TARGET_WORLD: local
WORKFLOW_LOCAL_DATA_DIR: ${WORKFLOW_LOCAL_DATA_DIR:-/app/.workflow-data} WORKFLOW_LOCAL_DATA_DIR: ${WORKFLOW_LOCAL_DATA_DIR:-/app/.workflow-data}

View File

@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, mock } from 'bun:test';
import { import {
__resetAiWarningsForTests, __resetAiWarningsForTests,
getAiConfig, getAiConfig,
getExtractionAiConfig,
runAiAnalysis runAiAnalysis
} from './ai'; } from './ai';
@@ -154,4 +155,65 @@ describe('ai config and runtime', () => {
}) })
).rejects.toThrow('AI SDK returned an empty response'); ).rejects.toThrow('AI SDK returned an empty response');
}); });
it('uses ollama defaults for extraction workload config', () => {
const config = getExtractionAiConfig({
env: {},
warn: () => {}
});
expect(config.provider).toBe('ollama');
expect(config.baseUrl).toBe('http://127.0.0.1:11434');
expect(config.model).toBe('qwen3:8b');
expect(config.apiKey).toBe('ollama');
expect(config.temperature).toBe(0);
});
it('uses extraction workload and returns ollama provider on success', async () => {
const createModel = mock((config: {
provider: string;
apiKey?: string;
model: string;
baseUrl: string;
temperature: number;
}) => {
expect(config.provider).toBe('ollama');
expect(config.baseUrl).toBe('http://127.0.0.1:11434');
expect(config.model).toBe('qwen3:8b');
expect(config.temperature).toBe(0);
return { modelId: config.model };
});
const generate = mock(async () => ({ text: '{"summary":"ok","keyPoints":[],"redFlags":[],"followUpQuestions":[],"portfolioSignals":[],"confidence":0.6}' }));
const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
env: {
OLLAMA_MODEL: 'qwen3:8b'
},
warn: () => {},
workload: 'extraction',
createModel,
generate
});
expect(createModel).toHaveBeenCalledTimes(1);
expect(generate).toHaveBeenCalledTimes(1);
expect(result.provider).toBe('ollama');
expect(result.model).toBe('qwen3:8b');
});
it('falls back to local text when extraction workload generation fails', async () => {
const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
env: {},
warn: () => {},
workload: 'extraction',
createModel: () => ({}),
generate: async () => {
throw new Error('ollama unavailable');
}
});
expect(result.provider).toBe('local-fallback');
expect(result.model).toBe('qwen3:8b');
expect(result.text).toContain('AI SDK fallback mode is active');
});
}); });

View File

@@ -1,7 +1,12 @@
import { createOpenAI } from '@ai-sdk/openai';
import { generateText } from 'ai'; import { generateText } from 'ai';
import { createZhipu } from 'zhipu-ai-provider'; import { createZhipu } from 'zhipu-ai-provider';
type AiWorkload = 'report' | 'extraction';
type AiProvider = 'zhipu' | 'ollama';
type AiConfig = { type AiConfig = {
provider: AiProvider;
apiKey?: string; apiKey?: string;
baseUrl: string; baseUrl: string;
model: string; model: string;
@@ -27,11 +32,15 @@ type AiGenerateOutput = {
}; };
type RunAiAnalysisOptions = GetAiConfigOptions & { type RunAiAnalysisOptions = GetAiConfigOptions & {
workload?: AiWorkload;
createModel?: (config: AiConfig) => unknown; createModel?: (config: AiConfig) => unknown;
generate?: (input: AiGenerateInput) => Promise<AiGenerateOutput>; generate?: (input: AiGenerateInput) => Promise<AiGenerateOutput>;
}; };
const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'; const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4';
const OLLAMA_BASE_URL = 'http://127.0.0.1:11434';
const OLLAMA_MODEL = 'qwen3:8b';
const OLLAMA_API_KEY = 'ollama';
let warnedIgnoredZhipuBaseUrl = false; let warnedIgnoredZhipuBaseUrl = false;
@@ -74,20 +83,47 @@ function fallbackResponse(prompt: string) {
const clipped = prompt.split('\n').slice(0, 6).join(' ').slice(0, 260); const clipped = prompt.split('\n').slice(0, 6).join(' ').slice(0, 260);
return [ return [
'AI SDK fallback mode is active (Zhipu configuration is missing).', 'AI SDK fallback mode is active (live model configuration is missing or unavailable).',
'Thesis: Portfolio remains analyzable with local heuristics until live model access is configured.', 'Thesis: Portfolio remains analyzable with local heuristics until live model access is configured.',
'Risk scan: Concentration and filing sentiment should be monitored after each sync cycle.', 'Risk scan: Concentration and filing sentiment should be monitored after each sync cycle.',
`Context digest: ${clipped}` `Context digest: ${clipped}`
].join('\n\n'); ].join('\n\n');
} }
function toOpenAiCompatibleBaseUrl(baseUrl: string) {
const normalized = baseUrl.endsWith('/')
? baseUrl.slice(0, -1)
: baseUrl;
return normalized.endsWith('/v1')
? normalized
: `${normalized}/v1`;
}
function asErrorMessage(error: unknown) {
if (error instanceof Error && error.message) {
return error.message;
}
return String(error);
}
function defaultCreateModel(config: AiConfig) { function defaultCreateModel(config: AiConfig) {
const zhipu = createZhipu({ if (config.provider === 'zhipu') {
apiKey: config.apiKey, const zhipu = createZhipu({
baseURL: config.baseUrl apiKey: config.apiKey,
baseURL: config.baseUrl
});
return zhipu(config.model);
}
const openai = createOpenAI({
apiKey: config.apiKey ?? OLLAMA_API_KEY,
baseURL: toOpenAiCompatibleBaseUrl(config.baseUrl)
}); });
return zhipu(config.model); return openai.chat(config.model);
} }
async function defaultGenerate(input: AiGenerateInput): Promise<AiGenerateOutput> { async function defaultGenerate(input: AiGenerateInput): Promise<AiGenerateOutput> {
@@ -102,10 +138,15 @@ async function defaultGenerate(input: AiGenerateInput): Promise<AiGenerateOutput
} }
export function getAiConfig(options?: GetAiConfigOptions) { export function getAiConfig(options?: GetAiConfigOptions) {
return getReportAiConfig(options);
}
export function getReportAiConfig(options?: GetAiConfigOptions) {
const env = options?.env ?? process.env; const env = options?.env ?? process.env;
warnIgnoredZhipuBaseUrl(env, options?.warn ?? console.warn); warnIgnoredZhipuBaseUrl(env, options?.warn ?? console.warn);
return { return {
provider: 'zhipu',
apiKey: envValue('ZHIPU_API_KEY', env), apiKey: envValue('ZHIPU_API_KEY', env),
baseUrl: CODING_API_BASE_URL, baseUrl: CODING_API_BASE_URL,
model: envValue('ZHIPU_MODEL', env) ?? 'glm-4.7-flashx', model: envValue('ZHIPU_MODEL', env) ?? 'glm-4.7-flashx',
@@ -113,15 +154,30 @@ export function getAiConfig(options?: GetAiConfigOptions) {
} satisfies AiConfig; } satisfies AiConfig;
} }
export function getExtractionAiConfig(options?: GetAiConfigOptions) {
const env = options?.env ?? process.env;
return {
provider: 'ollama',
apiKey: envValue('OLLAMA_API_KEY', env) ?? OLLAMA_API_KEY,
baseUrl: envValue('OLLAMA_BASE_URL', env) ?? OLLAMA_BASE_URL,
model: envValue('OLLAMA_MODEL', env) ?? OLLAMA_MODEL,
temperature: 0
} satisfies AiConfig;
}
export function isAiConfigured(options?: GetAiConfigOptions) { export function isAiConfigured(options?: GetAiConfigOptions) {
const config = getAiConfig(options); const config = getReportAiConfig(options);
return Boolean(config.apiKey); return Boolean(config.apiKey);
} }
export async function runAiAnalysis(prompt: string, systemPrompt?: string, options?: RunAiAnalysisOptions) { export async function runAiAnalysis(prompt: string, systemPrompt?: string, options?: RunAiAnalysisOptions) {
const config = getAiConfig(options); const workload = options?.workload ?? 'report';
const config = workload === 'extraction'
? getExtractionAiConfig(options)
: getReportAiConfig(options);
if (!config.apiKey) { if (workload === 'report' && !config.apiKey) {
return { return {
provider: 'local-fallback', provider: 'local-fallback',
model: config.model, model: config.model,
@@ -131,25 +187,49 @@ export async function runAiAnalysis(prompt: string, systemPrompt?: string, optio
const createModel = options?.createModel ?? defaultCreateModel; const createModel = options?.createModel ?? defaultCreateModel;
const generate = options?.generate ?? defaultGenerate; const generate = options?.generate ?? defaultGenerate;
const model = createModel(config); const warn = options?.warn ?? console.warn;
const result = await generate({ try {
model, const model = createModel(config);
system: systemPrompt,
prompt,
temperature: config.temperature
});
const text = result.text.trim(); const result = await generate({
if (!text) { model,
throw new Error('AI SDK returned an empty response'); system: systemPrompt,
prompt,
temperature: config.temperature
});
const text = result.text.trim();
if (!text) {
if (workload === 'extraction') {
return {
provider: 'local-fallback',
model: config.model,
text: fallbackResponse(prompt)
};
}
throw new Error('AI SDK returned an empty response');
}
return {
provider: config.provider,
model: config.model,
text
};
} catch (error) {
if (workload === 'extraction') {
warn(`[AI SDK] Extraction fallback activated: ${asErrorMessage(error)}`);
return {
provider: 'local-fallback',
model: config.model,
text: fallbackResponse(prompt)
};
}
throw error;
} }
return {
provider: 'zhipu',
model: config.model,
text
};
} }
export function __resetAiWarningsForTests() { export function __resetAiWarningsForTests() {

View File

@@ -4,6 +4,7 @@ import { auth } from '@/lib/auth';
import { requireAuthenticatedSession } from '@/lib/server/auth-session'; import { requireAuthenticatedSession } from '@/lib/server/auth-session';
import { asErrorMessage, jsonError } from '@/lib/server/http'; import { asErrorMessage, jsonError } from '@/lib/server/http';
import { buildPortfolioSummary } from '@/lib/server/portfolio'; import { buildPortfolioSummary } from '@/lib/server/portfolio';
import { redactInternalFilingAnalysisFields } from '@/lib/server/api/filing-redaction';
import { getFilingByAccession, listFilingsRecords } from '@/lib/server/repos/filings'; import { getFilingByAccession, listFilingsRecords } from '@/lib/server/repos/filings';
import { import {
deleteHoldingByIdRecord, deleteHoldingByIdRecord,
@@ -332,8 +333,9 @@ export const app = new Elysia({ prefix: '/api' })
getQuote(ticker), getQuote(ticker),
getPriceHistory(ticker) getPriceHistory(ticker)
]); ]);
const redactedFilings = filings.map(redactInternalFilingAnalysisFields);
const latestFiling = filings[0] ?? null; const latestFiling = redactedFilings[0] ?? null;
const holding = holdings.find((entry) => entry.ticker === ticker) ?? null; const holding = holdings.find((entry) => entry.ticker === ticker) ?? null;
const watchlistItem = watchlist.find((entry) => entry.ticker === ticker) ?? null; const watchlistItem = watchlist.find((entry) => entry.ticker === ticker) ?? null;
@@ -341,7 +343,7 @@ export const app = new Elysia({ prefix: '/api' })
?? watchlistItem?.company_name ?? watchlistItem?.company_name
?? ticker; ?? ticker;
const financials = filings const financials = redactedFilings
.filter((entry) => entry.metrics) .filter((entry) => entry.metrics)
.map((entry) => ({ .map((entry) => ({
filingDate: entry.filing_date, filingDate: entry.filing_date,
@@ -353,7 +355,7 @@ export const app = new Elysia({ prefix: '/api' })
debt: entry.metrics?.debt ?? null debt: entry.metrics?.debt ?? null
})); }));
const aiReports = filings const aiReports = redactedFilings
.filter((entry) => entry.analysis?.text || entry.analysis?.legacyInsights) .filter((entry) => entry.analysis?.text || entry.analysis?.legacyInsights)
.slice(0, 8) .slice(0, 8)
.map((entry) => ({ .map((entry) => ({
@@ -377,7 +379,7 @@ export const app = new Elysia({ prefix: '/api' })
position: holding, position: holding,
priceHistory, priceHistory,
financials, financials,
filings: filings.slice(0, 20), filings: redactedFilings.slice(0, 20),
aiReports aiReports
} }
}); });
@@ -446,7 +448,7 @@ export const app = new Elysia({ prefix: '/api' })
limit: Number.isFinite(limit) ? limit : 50 limit: Number.isFinite(limit) ? limit : 50
}); });
return Response.json({ filings }); return Response.json({ filings: filings.map(redactInternalFilingAnalysisFields) });
}, { }, {
query: t.Object({ query: t.Object({
ticker: t.Optional(t.String()), ticker: t.Optional(t.String()),

View File

@@ -0,0 +1,52 @@
import { describe, expect, it } from 'bun:test';
import type { Filing } from '@/lib/types';
import { redactInternalFilingAnalysisFields } from './filing-redaction';
function filingWithExtraction(): Filing {
return {
id: 7,
ticker: 'MSFT',
filing_type: '10-K',
filing_date: '2026-02-01',
accession_number: '0000789019-26-000001',
cik: '0000789019',
company_name: 'Microsoft Corporation',
filing_url: 'https://www.sec.gov/Archives/edgar/data/789019/000078901926000001/a10k.htm',
submission_url: null,
primary_document: 'a10k.htm',
metrics: null,
analysis: {
provider: 'zhipu',
model: 'glm-4.7-flashx',
text: 'Report text',
extraction: {
summary: 'Internal extraction summary',
keyPoints: ['a'],
redFlags: ['b'],
followUpQuestions: ['c'],
portfolioSignals: ['d'],
confidence: 0.4
},
extractionMeta: {
provider: 'ollama',
model: 'qwen3:8b',
source: 'primary_document',
generatedAt: '2026-02-01T00:00:00.000Z'
}
},
created_at: '2026-02-01T00:00:00.000Z',
updated_at: '2026-02-01T00:00:00.000Z'
};
}
describe('filing response redaction', () => {
it('removes internal extraction fields while preserving public analysis fields', () => {
const redacted = redactInternalFilingAnalysisFields(filingWithExtraction());
expect(redacted.analysis?.provider).toBe('zhipu');
expect(redacted.analysis?.model).toBe('glm-4.7-flashx');
expect(redacted.analysis?.text).toBe('Report text');
expect(redacted.analysis?.extraction).toBeUndefined();
expect(redacted.analysis?.extractionMeta).toBeUndefined();
});
});

View File

@@ -0,0 +1,15 @@
import type { Filing } from '@/lib/types';
export function redactInternalFilingAnalysisFields(filing: Filing): Filing {
if (!filing.analysis) {
return filing;
}
const { extraction: _extraction, extractionMeta: _extractionMeta, ...analysis } = filing.analysis;
const hasPublicFields = Object.keys(analysis).length > 0;
return {
...filing,
analysis: hasPublicFields ? analysis : null
};
}

View File

@@ -20,6 +20,20 @@ type FilingAnalysis = {
model?: string; model?: string;
text?: string; text?: string;
legacyInsights?: string; legacyInsights?: string;
extraction?: {
summary: string;
keyPoints: string[];
redFlags: string[];
followUpQuestions: string[];
portfolioSignals: string[];
confidence: number;
};
extractionMeta?: {
provider: string;
model: string;
source: 'primary_document' | 'metadata_fallback';
generatedAt: string;
};
}; };
const authDateColumn = { const authDateColumn = {

84
lib/server/sec.test.ts Normal file
View File

@@ -0,0 +1,84 @@
import { describe, expect, it, mock } from 'bun:test';
import {
fetchPrimaryFilingText,
normalizeSecDocumentText,
resolvePrimaryFilingUrl,
trimSecDocumentTextForPrompt
} from './sec';
describe('sec filing text helpers', () => {
it('normalizes html filing content into plain text', () => {
const html = `
<html>
<head>
<style>.x { color: red; }</style>
<script>console.log("ignore")</script>
</head>
<body>
<h1>Quarterly&nbsp;Report</h1>
<p>Revenue &amp; margin improved.</p>
<div>See &#39;Risk Factors&#39; section.</div>
</body>
</html>
`;
const normalized = normalizeSecDocumentText(html);
expect(normalized).toContain('Quarterly Report');
expect(normalized).toContain('Revenue & margin improved.');
expect(normalized).toContain('See \'Risk Factors\' section.');
expect(normalized).not.toContain('<script>');
expect(normalized).not.toContain('console.log');
});
it('trims filing text to prompt budget boundaries', () => {
const text = `A`.repeat(4_500);
const result = trimSecDocumentTextForPrompt(text, 2_000);
expect(result.truncated).toBe(true);
expect(result.text.length).toBeLessThanOrEqual(2_000);
});
it('prefers explicit filing url when available', () => {
const url = resolvePrimaryFilingUrl({
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123/x.htm',
cik: '123',
accessionNumber: '0000-00-00',
primaryDocument: 'x.htm'
});
expect(url).toBe('https://www.sec.gov/Archives/edgar/data/123/x.htm');
});
it('reconstructs primary filing url when filing url is absent', () => {
const url = resolvePrimaryFilingUrl({
filingUrl: null,
cik: '0000320193',
accessionNumber: '0000320193-24-000001',
primaryDocument: 'a10q.htm'
});
expect(url).toBe('https://www.sec.gov/Archives/edgar/data/320193/000032019324000001/a10q.htm');
});
it('fetches, normalizes, and clips primary filing text', async () => {
const longHtml = `<html><body><p>${'Alpha '.repeat(600)}</p></body></html>`;
const fetchImpl = mock(async () => new Response(longHtml, { status: 200 })) as unknown as typeof fetch;
const result = await fetchPrimaryFilingText({
filingUrl: null,
cik: '0000320193',
accessionNumber: '0000320193-24-000001',
primaryDocument: 'a10q.htm'
}, {
fetchImpl,
maxChars: 1_000
});
expect(fetchImpl).toHaveBeenCalledTimes(1);
expect(result).not.toBeNull();
expect(result?.source).toBe('primary_document');
expect(result?.truncated).toBe(true);
expect(result?.text.length).toBeLessThanOrEqual(1_000);
});
});

View File

@@ -39,8 +39,28 @@ type SecFiling = {
primaryDocument: string | null; primaryDocument: string | null;
}; };
type FilingDocumentInput = {
filingUrl: string | null;
cik: string;
accessionNumber: string;
primaryDocument: string | null;
};
type FetchPrimaryFilingTextOptions = {
fetchImpl?: typeof fetch;
maxChars?: number;
};
export type FilingDocumentText = {
source: 'primary_document';
url: string;
text: string;
truncated: boolean;
};
const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K']; const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K'];
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12; const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12;
const FILING_TEXT_MAX_CHARS = 24_000;
let tickerCache = new Map<string, TickerDirectoryRecord>(); let tickerCache = new Map<string, TickerDirectoryRecord>();
let tickerCacheLoadedAt = 0; let tickerCacheLoadedAt = 0;
@@ -53,6 +73,147 @@ function todayIso() {
return new Date().toISOString().slice(0, 10); return new Date().toISOString().slice(0, 10);
} }
function decodeHtmlEntities(value: string) {
const decodeCodePoint = (code: number) => {
if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) {
return ' ';
}
try {
return String.fromCodePoint(code);
} catch {
return ' ';
}
};
return value
.replace(/&nbsp;|&#160;/gi, ' ')
.replace(/&amp;/gi, '&')
.replace(/&lt;/gi, '<')
.replace(/&gt;/gi, '>')
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, '\'')
.replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => {
const code = Number.parseInt(rawCode, 16);
return decodeCodePoint(code);
})
.replace(/&#([0-9]+);/g, (_match, rawCode: string) => {
const code = Number.parseInt(rawCode, 10);
return decodeCodePoint(code);
});
}
export function normalizeSecDocumentText(raw: string) {
return decodeHtmlEntities(
raw
.replace(/\r/g, '\n')
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
.replace(/<noscript[\s\S]*?<\/noscript>/gi, ' ')
.replace(/<!--[\s\S]*?-->/g, ' ')
.replace(/<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi, '\n')
.replace(/<[^>]+>/g, ' ')
)
.replace(/[ \t]+\n/g, '\n')
.replace(/\n[ \t]+/g, '\n')
.replace(/[ \t]{2,}/g, ' ')
.replace(/\n{3,}/g, '\n\n')
.trim();
}
export function trimSecDocumentTextForPrompt(text: string, maxChars = FILING_TEXT_MAX_CHARS) {
const safeMax = Math.max(Math.trunc(maxChars), 1_000);
if (text.length <= safeMax) {
return { text, truncated: false };
}
const slice = text.slice(0, safeMax);
const newlineBoundary = slice.lastIndexOf('\n');
const wordBoundary = slice.lastIndexOf(' ');
const boundary = Math.max(newlineBoundary, wordBoundary);
const clipped = (boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice).trimEnd();
return { text: clipped, truncated: true };
}
function compactAccessionNumber(value: string) {
return value.replace(/-/g, '');
}
function normalizeCikForPath(value: string) {
const digits = value.replace(/\D/g, '');
if (!digits) {
return null;
}
const numeric = Number(digits);
if (!Number.isFinite(numeric)) {
return null;
}
return String(numeric);
}
export function resolvePrimaryFilingUrl(input: FilingDocumentInput) {
const directUrl = input.filingUrl?.trim();
if (directUrl) {
return directUrl;
}
if (!input.primaryDocument) {
return null;
}
const cikPath = normalizeCikForPath(input.cik);
const accessionPath = compactAccessionNumber(input.accessionNumber);
if (!cikPath || !accessionPath) {
return null;
}
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`;
}
export async function fetchPrimaryFilingText(
input: FilingDocumentInput,
options?: FetchPrimaryFilingTextOptions
): Promise<FilingDocumentText | null> {
const url = resolvePrimaryFilingUrl(input);
if (!url) {
return null;
}
const doFetch = options?.fetchImpl ?? fetch;
const response = await doFetch(url, {
headers: {
'User-Agent': envUserAgent(),
Accept: 'text/html, text/plain;q=0.9, */*;q=0.8'
},
cache: 'no-store'
});
if (!response.ok) {
throw new Error(`SEC filing request failed (${response.status})`);
}
const raw = await response.text();
const normalized = normalizeSecDocumentText(raw);
if (!normalized) {
return null;
}
const clipped = trimSecDocumentTextForPrompt(normalized, options?.maxChars ?? FILING_TEXT_MAX_CHARS);
if (!clipped.text) {
return null;
}
return {
source: 'primary_document',
url,
text: clipped.text,
truncated: clipped.truncated
};
}
function pseudoMetric(seed: string, min: number, max: number) { function pseudoMetric(seed: string, min: number, max: number) {
let hash = 0; let hash = 0;
for (const char of seed) { for (const char of seed) {

View File

@@ -0,0 +1,71 @@
import { describe, expect, it } from 'bun:test';
import type { Filing } from '@/lib/types';
import { __taskProcessorInternals } from './task-processors';
function sampleFiling(): Filing {
return {
id: 1,
ticker: 'AAPL',
filing_type: '10-Q',
filing_date: '2026-01-30',
accession_number: '0000320193-26-000001',
cik: '0000320193',
company_name: 'Apple Inc.',
filing_url: 'https://www.sec.gov/Archives/edgar/data/320193/000032019326000001/a10q.htm',
submission_url: 'https://data.sec.gov/submissions/CIK0000320193.json',
primary_document: 'a10q.htm',
metrics: {
revenue: 120_000_000_000,
netIncome: 25_000_000_000,
totalAssets: 410_000_000_000,
cash: 70_000_000_000,
debt: 98_000_000_000
},
analysis: null,
created_at: '2026-01-30T00:00:00.000Z',
updated_at: '2026-01-30T00:00:00.000Z'
};
}
describe('task processor extraction helpers', () => {
it('parses strict extraction payloads', () => {
const raw = JSON.stringify({
summary: 'Revenue growth remained resilient despite FX pressure.',
keyPoints: ['Revenue up year-over-year'],
redFlags: ['Debt service burden is rising'],
followUpQuestions: ['Is margin guidance sustainable?'],
portfolioSignals: ['Monitor leverage trend'],
confidence: 0.72
});
const parsed = __taskProcessorInternals.parseExtractionPayload(raw);
expect(parsed).not.toBeNull();
expect(parsed?.summary).toContain('Revenue growth');
expect(parsed?.confidence).toBe(0.72);
});
it('rejects extraction payloads with extra keys', () => {
const raw = JSON.stringify({
summary: 'ok',
keyPoints: [],
redFlags: [],
followUpQuestions: [],
portfolioSignals: [],
confidence: 0.2,
extra: 'not-allowed'
});
const parsed = __taskProcessorInternals.parseExtractionPayload(raw);
expect(parsed).toBeNull();
});
it('builds deterministic extraction fallback from filing metadata', () => {
const fallback = __taskProcessorInternals.deterministicExtractionFallback(sampleFiling());
expect(fallback.summary).toContain('Deterministic extraction fallback');
expect(fallback.keyPoints.length).toBeGreaterThan(0);
expect(fallback.redFlags.length).toBeGreaterThan(0);
expect(fallback.confidence).toBe(0.2);
});
});

View File

@@ -1,4 +1,10 @@
import type { Filing, Holding, Task } from '@/lib/types'; import type {
Filing,
FilingExtraction,
FilingExtractionMeta,
Holding,
Task
} from '@/lib/types';
import { runAiAnalysis } from '@/lib/server/ai'; import { runAiAnalysis } from '@/lib/server/ai';
import { buildPortfolioSummary } from '@/lib/server/portfolio'; import { buildPortfolioSummary } from '@/lib/server/portfolio';
import { getQuote } from '@/lib/server/prices'; import { getQuote } from '@/lib/server/prices';
@@ -13,7 +19,23 @@ import {
listUserHoldings listUserHoldings
} from '@/lib/server/repos/holdings'; } from '@/lib/server/repos/holdings';
import { createPortfolioInsight } from '@/lib/server/repos/insights'; import { createPortfolioInsight } from '@/lib/server/repos/insights';
import { fetchFilingMetrics, fetchRecentFilings } from '@/lib/server/sec'; import {
fetchFilingMetrics,
fetchPrimaryFilingText,
fetchRecentFilings
} from '@/lib/server/sec';
const EXTRACTION_REQUIRED_KEYS = [
'summary',
'keyPoints',
'redFlags',
'followUpQuestions',
'portfolioSignals',
'confidence'
] as const;
const EXTRACTION_MAX_ITEMS = 6;
const EXTRACTION_ITEM_MAX_LENGTH = 280;
const EXTRACTION_SUMMARY_MAX_LENGTH = 900;
function toTaskResult(value: unknown): Record<string, unknown> { function toTaskResult(value: unknown): Record<string, unknown> {
if (!value || typeof value !== 'object' || Array.isArray(value)) { if (!value || typeof value !== 'object' || Array.isArray(value)) {
@@ -42,6 +64,173 @@ function parseLimit(raw: unknown, fallback: number, min: number, max: number) {
return Math.min(Math.max(intValue, min), max); return Math.min(Math.max(intValue, min), max);
} }
function sanitizeExtractionText(value: unknown, maxLength: number) {
if (typeof value !== 'string') {
return null;
}
const collapsed = value.replace(/\s+/g, ' ').trim();
if (!collapsed) {
return null;
}
return collapsed.slice(0, maxLength);
}
function sanitizeExtractionList(value: unknown) {
if (!Array.isArray(value)) {
return null;
}
const cleaned: string[] = [];
for (const entry of value) {
const normalized = sanitizeExtractionText(entry, EXTRACTION_ITEM_MAX_LENGTH);
if (!normalized) {
continue;
}
cleaned.push(normalized);
if (cleaned.length >= EXTRACTION_MAX_ITEMS) {
break;
}
}
return cleaned;
}
function parseExtractionPayload(raw: string): FilingExtraction | null {
const fencedJson = raw.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
const candidate = fencedJson ?? (() => {
const start = raw.indexOf('{');
const end = raw.lastIndexOf('}');
return start >= 0 && end > start ? raw.slice(start, end + 1) : null;
})();
if (!candidate) {
return null;
}
let parsed: unknown;
try {
parsed = JSON.parse(candidate);
} catch {
return null;
}
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
return null;
}
const payload = parsed as Record<string, unknown>;
const keys = Object.keys(payload);
if (keys.length !== EXTRACTION_REQUIRED_KEYS.length) {
return null;
}
for (const key of EXTRACTION_REQUIRED_KEYS) {
if (!(key in payload)) {
return null;
}
}
for (const key of keys) {
if (!EXTRACTION_REQUIRED_KEYS.includes(key as (typeof EXTRACTION_REQUIRED_KEYS)[number])) {
return null;
}
}
const summary = sanitizeExtractionText(payload.summary, EXTRACTION_SUMMARY_MAX_LENGTH);
const keyPoints = sanitizeExtractionList(payload.keyPoints);
const redFlags = sanitizeExtractionList(payload.redFlags);
const followUpQuestions = sanitizeExtractionList(payload.followUpQuestions);
const portfolioSignals = sanitizeExtractionList(payload.portfolioSignals);
const confidenceRaw = typeof payload.confidence === 'number'
? payload.confidence
: Number(payload.confidence);
if (!summary || !keyPoints || !redFlags || !followUpQuestions || !portfolioSignals || !Number.isFinite(confidenceRaw)) {
return null;
}
return {
summary,
keyPoints,
redFlags,
followUpQuestions,
portfolioSignals,
confidence: Math.min(Math.max(confidenceRaw, 0), 1)
};
}
function metricSnapshotLine(label: string, value: number | null | undefined) {
if (value === null || value === undefined || !Number.isFinite(value)) {
return `${label}: not reported`;
}
return `${label}: ${Math.round(value).toLocaleString('en-US')}`;
}
function deterministicExtractionFallback(filing: Filing): FilingExtraction {
const metrics = filing.metrics;
return {
summary: `${filing.company_name} ${filing.filing_type} filed on ${filing.filing_date}. Deterministic extraction fallback used due unavailable or invalid local parsing output.`,
keyPoints: [
`${filing.filing_type} filing recorded for ${filing.ticker}.`,
metricSnapshotLine('Revenue', metrics?.revenue),
metricSnapshotLine('Net income', metrics?.netIncome),
metricSnapshotLine('Total assets', metrics?.totalAssets)
],
redFlags: [
metricSnapshotLine('Cash', metrics?.cash),
metricSnapshotLine('Debt', metrics?.debt),
filing.primary_document ? 'Primary document is indexed and available for review.' : 'Primary document reference is unavailable in current filing metadata.'
],
followUpQuestions: [
'What changed versus the prior filing in guidance, margins, or liquidity?',
'Are any material risks under-emphasized relative to historical filings?',
'Should portfolio exposure be adjusted before the next reporting cycle?'
],
portfolioSignals: [
'Validate trend direction using at least two prior filings.',
'Cross-check leverage and liquidity metrics against position sizing rules.',
'Track language shifts around guidance or demand assumptions.'
],
confidence: 0.2
};
}
function extractionPrompt(filing: Filing, filingText: string) {
return [
'Extract structured signals from the SEC filing text.',
`Company: ${filing.company_name} (${filing.ticker})`,
`Form: ${filing.filing_type}`,
`Filed: ${filing.filing_date}`,
'Return ONLY valid JSON with exactly these keys and no extra keys:',
'{"summary":"string","keyPoints":["string"],"redFlags":["string"],"followUpQuestions":["string"],"portfolioSignals":["string"],"confidence":0}',
`Rules: keyPoints/redFlags/followUpQuestions/portfolioSignals arrays max ${EXTRACTION_MAX_ITEMS} items; each item <= ${EXTRACTION_ITEM_MAX_LENGTH} chars; summary <= ${EXTRACTION_SUMMARY_MAX_LENGTH} chars; confidence between 0 and 1.`,
'Filing text follows:',
filingText
].join('\n\n');
}
function reportPrompt(
filing: Filing,
extraction: FilingExtraction,
extractionMeta: FilingExtractionMeta
) {
return [
'You are a fiscal research assistant focused on regulatory signals.',
`Analyze this SEC filing from ${filing.company_name} (${filing.ticker}).`,
`Form: ${filing.filing_type}`,
`Filed: ${filing.filing_date}`,
`Metrics: ${JSON.stringify(filing.metrics ?? {})}`,
`Structured extraction context (${extractionMeta.source}): ${JSON.stringify(extraction)}`,
'Return concise sections: Thesis, Red Flags, Follow-up Questions, Portfolio Impact.'
].join('\n');
}
function filingLinks(filing: { function filingLinks(filing: {
filingUrl: string | null; filingUrl: string | null;
submissionUrl: string | null; submissionUrl: string | null;
@@ -134,27 +323,65 @@ async function processAnalyzeFiling(task: Task) {
throw new Error(`Filing ${accessionNumber} not found`); throw new Error(`Filing ${accessionNumber} not found`);
} }
const prompt = [ const defaultExtraction = deterministicExtractionFallback(filing);
'You are a fiscal research assistant focused on regulatory signals.', let extraction = defaultExtraction;
`Analyze this SEC filing from ${filing.company_name} (${filing.ticker}).`, let extractionMeta: FilingExtractionMeta = {
`Form: ${filing.filing_type}`, provider: 'deterministic-fallback',
`Filed: ${filing.filing_date}`, model: 'metadata-fallback',
`Metrics: ${JSON.stringify(filing.metrics ?? {})}`, source: 'metadata_fallback',
'Return concise sections: Thesis, Red Flags, Follow-up Questions, Portfolio Impact.' generatedAt: new Date().toISOString()
].join('\n'); };
const analysis = await runAiAnalysis(prompt, 'Use concise institutional analyst language.'); try {
const filingDocument = await fetchPrimaryFilingText({
filingUrl: filing.filing_url,
cik: filing.cik,
accessionNumber: filing.accession_number,
primaryDocument: filing.primary_document ?? null
});
if (filingDocument?.text) {
const extractionResult = await runAiAnalysis(
extractionPrompt(filing, filingDocument.text),
'Return strict JSON only.',
{ workload: 'extraction' }
);
const parsed = parseExtractionPayload(extractionResult.text);
if (parsed) {
extraction = parsed;
extractionMeta = {
provider: extractionResult.provider === 'local-fallback' ? 'deterministic-fallback' : 'ollama',
model: extractionResult.model,
source: filingDocument.source,
generatedAt: new Date().toISOString()
};
}
}
} catch {
extraction = defaultExtraction;
}
const analysis = await runAiAnalysis(
reportPrompt(filing, extraction, extractionMeta),
'Use concise institutional analyst language.',
{ workload: 'report' }
);
await saveFilingAnalysis(accessionNumber, { await saveFilingAnalysis(accessionNumber, {
provider: analysis.provider, provider: analysis.provider,
model: analysis.model, model: analysis.model,
text: analysis.text text: analysis.text,
extraction,
extractionMeta
}); });
return { return {
accessionNumber, accessionNumber,
provider: analysis.provider, provider: analysis.provider,
model: analysis.model model: analysis.model,
extractionProvider: extractionMeta.provider,
extractionModel: extractionMeta.model
}; };
} }
@@ -186,7 +413,11 @@ async function processPortfolioInsights(task: Task) {
'Respond with: 1) health score (0-100), 2) top 3 risks, 3) top 3 opportunities, 4) next actions in 7 days.' 'Respond with: 1) health score (0-100), 2) top 3 risks, 3) top 3 opportunities, 4) next actions in 7 days.'
].join('\n'); ].join('\n');
const analysis = await runAiAnalysis(prompt, 'Act as a risk-aware buy-side analyst.'); const analysis = await runAiAnalysis(
prompt,
'Act as a risk-aware buy-side analyst.',
{ workload: 'report' }
);
await createPortfolioInsight({ await createPortfolioInsight({
userId, userId,
@@ -202,6 +433,11 @@ async function processPortfolioInsights(task: Task) {
}; };
} }
export const __taskProcessorInternals = {
parseExtractionPayload,
deterministicExtractionFallback
};
export async function runTaskProcessor(task: Task) { export async function runTaskProcessor(task: Task) {
switch (task.task_type) { switch (task.task_type) {
case 'sync_filings': case 'sync_filings':

View File

@@ -37,6 +37,22 @@ export type PortfolioSummary = {
avg_return_pct: string; avg_return_pct: string;
}; };
export type FilingExtraction = {
summary: string;
keyPoints: string[];
redFlags: string[];
followUpQuestions: string[];
portfolioSignals: string[];
confidence: number;
};
export type FilingExtractionMeta = {
provider: string;
model: string;
source: 'primary_document' | 'metadata_fallback';
generatedAt: string;
};
export type Filing = { export type Filing = {
id: number; id: number;
ticker: string; ticker: string;
@@ -60,6 +76,8 @@ export type Filing = {
model?: string; model?: string;
text?: string; text?: string;
legacyInsights?: string; legacyInsights?: string;
extraction?: FilingExtraction;
extractionMeta?: FilingExtractionMeta;
} | null; } | null;
created_at: string; created_at: string;
updated_at: string; updated_at: string;

View File

@@ -13,6 +13,7 @@
}, },
"dependencies": { "dependencies": {
"@elysiajs/eden": "^1.4.8", "@elysiajs/eden": "^1.4.8",
"@ai-sdk/openai": "^2.0.62",
"@libsql/client": "^0.17.0", "@libsql/client": "^0.17.0",
"@tailwindcss/postcss": "^4.2.1", "@tailwindcss/postcss": "^4.2.1",
"ai": "^6.0.104", "ai": "^6.0.104",