From da2ce23baba2c2e5a5c7eee0e2c70382bb2d3b2d Mon Sep 17 00:00:00 2001 From: francy51 Date: Mon, 2 Mar 2026 22:27:39 -0500 Subject: [PATCH] refactor: make AI runtime z.ai-only and default to glm-5 --- .env.example | 8 +- README.md | 15 +- bun.lock | 19 +-- docker-compose.yml | 5 +- lib/server/ai.test.ts | 211 ++++++++++------------- lib/server/ai.ts | 216 +++--------------------- lib/server/api/filing-redaction.test.ts | 4 +- lib/server/task-processors.ts | 57 +++---- package.json | 1 - 9 files changed, 152 insertions(+), 384 deletions(-) diff --git a/.env.example b/.env.example index 788f01a..2ec2508 100644 --- a/.env.example +++ b/.env.example @@ -16,15 +16,9 @@ BETTER_AUTH_TRUSTED_ORIGINS=https://fiscal.b11studio.xyz # Legacy OPENCLAW_* variables are removed and no longer read by the app. # Coding endpoint is hardcoded in runtime: https://api.z.ai/api/coding/paas/v4 ZHIPU_API_KEY= -ZHIPU_MODEL=glm-4.7-flashx +ZHIPU_MODEL=glm-5 AI_TEMPERATURE=0.2 -# Local extraction model (Ollama, OpenAI-compatible API) -# For host Ollama from Docker, use http://host.docker.internal:11434 -OLLAMA_BASE_URL=http://127.0.0.1:11434 -OLLAMA_MODEL=qwen3:8b -OLLAMA_API_KEY=ollama - # SEC API etiquette SEC_USER_AGENT=Fiscal Clone diff --git a/README.md b/README.md index 87daf5a..692c0c6 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,7 @@ Turbopack-first rebuild of a fiscal.ai-style terminal with Vercel AI SDK integra - Eden Treaty for type-safe frontend API calls - Workflow DevKit Postgres World for background task execution durability - SQLite-backed app domain storage (watchlist, holdings, filings, task projection, insights) -- Vercel AI SDK (`ai`) with dual-model routing: - - Ollama (`@ai-sdk/openai`) for lightweight filing extraction/parsing - - Zhipu (`zhipu-ai-provider`) for heavyweight narrative reports (`https://api.z.ai/api/coding/paas/v4`) +- Vercel AI SDK (`ai`) with Zhipu (`zhipu-ai-provider`) via Coding API (`https://api.z.ai/api/coding/paas/v4`) ## Run locally @@ -47,8 +45,7 @@ docker compose up --build -d ``` For local Docker, host port mapping comes from `docker-compose.override.yml` (default `http://localhost:3000` via `APP_PORT`). -The app calls Zhipu directly via AI SDK for heavy reports and calls Ollama for lightweight filing extraction. -When running in Docker and Ollama runs on the host, set `OLLAMA_BASE_URL=http://host.docker.internal:11434`. +The app calls Zhipu directly via AI SDK for extraction and report generation. Zhipu always targets the Coding API endpoint (`https://api.z.ai/api/coding/paas/v4`). On container startup, the app applies Drizzle migrations automatically before launching Next.js. The app stores SQLite data in Docker volume `fiscal_sqlite_data` (mounted to `/app/data`) and workflow world data in Postgres volume `workflow_postgres_data`. @@ -100,13 +97,10 @@ BETTER_AUTH_BASE_URL=https://fiscal.b11studio.xyz BETTER_AUTH_TRUSTED_ORIGINS=https://fiscal.b11studio.xyz ZHIPU_API_KEY= -ZHIPU_MODEL=glm-4.7-flashx +ZHIPU_MODEL=glm-5 # optional generation tuning AI_TEMPERATURE=0.2 -OLLAMA_BASE_URL=http://127.0.0.1:11434 -OLLAMA_MODEL=qwen3:8b -OLLAMA_API_KEY=ollama SEC_USER_AGENT=Fiscal Clone WORKFLOW_TARGET_WORLD=@workflow/world-postgres @@ -119,8 +113,7 @@ WORKFLOW_LOCAL_DATA_DIR=.workflow-data WORKFLOW_LOCAL_QUEUE_CONCURRENCY=100 ``` -If `ZHIPU_API_KEY` is unset, the app uses local fallback analysis so task workflows still run. -If Ollama is unavailable, filing extraction falls back to deterministic metadata-based extraction and still proceeds to heavy report generation. +`ZHIPU_API_KEY` is required for AI workloads (extraction and report generation). Missing or invalid credentials fail AI tasks. `ZHIPU_BASE_URL` is deprecated and ignored; runtime always uses `https://api.z.ai/api/coding/paas/v4`. ## API surface diff --git a/bun.lock b/bun.lock index 3fe25ab..1f12c7b 100644 --- a/bun.lock +++ b/bun.lock @@ -5,7 +5,6 @@ "": { "name": "fiscal-frontend", "dependencies": { - "@ai-sdk/openai": "^2.0.62", "@elysiajs/eden": "^1.4.8", "@libsql/client": "^0.17.0", "@tailwindcss/postcss": "^4.2.1", @@ -42,11 +41,9 @@ "packages": { "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.58", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2e1hBCKsd+7m0hELwrakR1QDfZfFhz9PF2d4qb8TxQueEyApo7ydlEWRpXeKC+KdA2FRV21dMb1G6FxdeNDa2w=="], - "@ai-sdk/openai": ["@ai-sdk/openai@2.0.95", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@ai-sdk/provider-utils": "3.0.21" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-2CABPaa1UNh7dPyZUIB/Dc4AbvJioFnmryRx45sx7ezBSOdR0zxG6gbrSd/fZ0GVbptSZeLmF9omu10d/GxmJA=="], + "@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], - "@ai-sdk/provider": ["@ai-sdk/provider@2.0.1", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-KCUwswvsC5VsW2PWFqF8eJgSCu5Ysj7m1TxiHTVA6g7k360bk0RNQENT8KTMAYEs+8fWPD3Uu4dEmzGHc+jGng=="], - - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-veuMwTLxsgh31Jjn0SnBABnM1f7ebHhRWcV2ZuY3hP3iJDCZ8VXBaYqcHXoOQDqUXTCas08sKQcHyWK+zl882Q=="], + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], @@ -1528,10 +1525,6 @@ "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], - "@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], - - "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], - "@ai-sdk/gateway/@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="], "@aws-crypto/sha256-browser/@aws-sdk/types": ["@aws-sdk/types@3.973.3", "", { "dependencies": { "@smithy/types": "^4.13.0", "tslib": "^2.6.2" } }, "sha512-tma6D8/xHZHJEUqmr6ksZjZ0onyIUqKDQLyp50ttZJmS0IwFYzxBgp5CxFvpYAnah52V3UtgrqGA6E83gtT7NQ=="], @@ -1786,10 +1779,6 @@ "@xhmikosr/downloader/file-type": ["file-type@20.5.0", "", { "dependencies": { "@tokenizer/inflate": "^0.2.6", "strtok3": "^10.2.0", "token-types": "^6.0.0", "uint8array-extras": "^1.4.0" } }, "sha512-BfHZtG/l9iMm4Ecianu7P8HRD2tBHLtjXinm4X62XBOYzi7CYA7jyqfJzOvXHqzVrVPYqBo2/GvbARMaaJkKVg=="], - "ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], - - "ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], - "ansi-escapes/type-fest": ["type-fest@0.21.3", "", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="], "body-parser/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], @@ -1872,6 +1861,10 @@ "wsl-utils/is-wsl": ["is-wsl@3.1.1", "", { "dependencies": { "is-inside-container": "^1.0.0" } }, "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw=="], + "zhipu-ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@2.0.1", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-KCUwswvsC5VsW2PWFqF8eJgSCu5Ysj7m1TxiHTVA6g7k360bk0RNQENT8KTMAYEs+8fWPD3Uu4dEmzGHc+jGng=="], + + "zhipu-ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-veuMwTLxsgh31Jjn0SnBABnM1f7ebHhRWcV2ZuY3hP3iJDCZ8VXBaYqcHXoOQDqUXTCas08sKQcHyWK+zl882Q=="], + "@aws-crypto/sha256-browser/@aws-sdk/types/@smithy/types": ["@smithy/types@4.13.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-COuLsZILbbQsdrwKQpkkpyep7lCsByxwj7m0Mg5v66/ZTyenlfBc40/QFQ5chO0YN/PNEH1Bi3fGtfXPnYNeDw=="], "@aws-crypto/sha256-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], diff --git a/docker-compose.yml b/docker-compose.yml index fac1193..f658fc3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -37,11 +37,8 @@ services: BETTER_AUTH_ADMIN_USER_IDS: ${BETTER_AUTH_ADMIN_USER_IDS:-} BETTER_AUTH_TRUSTED_ORIGINS: ${BETTER_AUTH_TRUSTED_ORIGINS:-https://fiscal.b11studio.xyz} ZHIPU_API_KEY: ${ZHIPU_API_KEY:-} - ZHIPU_MODEL: ${ZHIPU_MODEL:-glm-4.7-flashx} + ZHIPU_MODEL: ${ZHIPU_MODEL:-glm-5} AI_TEMPERATURE: ${AI_TEMPERATURE:-0.2} - OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://127.0.0.1:11434} - OLLAMA_MODEL: ${OLLAMA_MODEL:-qwen3:8b} - OLLAMA_API_KEY: ${OLLAMA_API_KEY:-ollama} SEC_USER_AGENT: ${SEC_USER_AGENT:-Fiscal Clone } WORKFLOW_TARGET_WORLD: ${WORKFLOW_TARGET_WORLD:-@workflow/world-postgres} WORKFLOW_POSTGRES_URL: ${WORKFLOW_POSTGRES_URL:-postgres://workflow:workflow@workflow-postgres:5432/workflow} diff --git a/lib/server/ai.test.ts b/lib/server/ai.test.ts index e611d9d..af64789 100644 --- a/lib/server/ai.test.ts +++ b/lib/server/ai.test.ts @@ -21,9 +21,10 @@ describe('ai config and runtime', () => { warn: () => {} }); + expect(config.provider).toBe('zhipu'); expect(config.apiKey).toBe('key'); expect(config.baseUrl).toBe(CODING_API_BASE_URL); - expect(config.model).toBe('glm-4.7-flashx'); + expect(config.model).toBe('glm-5'); expect(config.temperature).toBe(0.2); }); @@ -39,7 +40,7 @@ describe('ai config and runtime', () => { expect(config.baseUrl).toBe(CODING_API_BASE_URL); }); - it('clamps temperature into [0, 2]', () => { + it('clamps report temperature into [0, 2]', () => { const negative = getAiConfig({ env: { ZHIPU_API_KEY: 'key', @@ -68,23 +69,50 @@ describe('ai config and runtime', () => { expect(invalid.temperature).toBe(0.2); }); - it('returns fallback output when ZHIPU_API_KEY is missing', async () => { - const generate = mock(async () => ({ text: 'should-not-be-used' })); + it('uses extraction workload with zhipu config and zero temperature', async () => { + const createModel = mock((config: { + provider: string; + apiKey?: string; + model: string; + baseUrl: string; + temperature: number; + }) => { + expect(config.provider).toBe('zhipu'); + expect(config.apiKey).toBe('new-key'); + expect(config.baseUrl).toBe(CODING_API_BASE_URL); + expect(config.model).toBe('glm-5'); + expect(config.temperature).toBe(0); + return { modelId: config.model }; + }); + const generate = mock(async (input: { + model: unknown; + system?: string; + prompt: string; + temperature: number; + maxRetries?: number; + }) => { + expect(input.system).toBe('Return strict JSON only.'); + expect(input.prompt).toBe('Extract this filing'); + expect(input.temperature).toBe(0); + expect(input.maxRetries).toBe(0); + return { text: '{"summary":"ok"}' }; + }); - const result = await runAiAnalysis( - 'Prompt line one\nPrompt line two', - 'System prompt', - { - env: {}, - warn: () => {}, - generate - } - ); + const result = await runAiAnalysis('Extract this filing', 'Return strict JSON only.', { + env: { + ZHIPU_API_KEY: 'new-key' + }, + warn: () => {}, + workload: 'extraction', + createModel, + generate + }); - expect(result.provider).toBe('local-fallback'); - expect(result.model).toBe('glm-4.7-flashx'); - expect(result.text).toContain('AI SDK fallback mode is active'); - expect(generate).not.toHaveBeenCalled(); + expect(result.provider).toBe('zhipu'); + expect(result.model).toBe('glm-5'); + expect(result.text).toBe('{"summary":"ok"}'); + expect(createModel).toHaveBeenCalledTimes(1); + expect(generate).toHaveBeenCalledTimes(1); }); it('warns once when ZHIPU_BASE_URL is set because coding endpoint is hardcoded', () => { @@ -103,11 +131,13 @@ describe('ai config and runtime', () => { it('uses configured ZHIPU values and injected generator when API key exists', async () => { const createModel = mock((config: { + provider: string; apiKey?: string; model: string; baseUrl: string; temperature: number; }) => { + expect(config.provider).toBe('zhipu'); expect(config.apiKey).toBe('new-key'); expect(config.baseUrl).toBe(CODING_API_BASE_URL); expect(config.model).toBe('glm-4-plus'); @@ -147,6 +177,29 @@ describe('ai config and runtime', () => { expect(result.text).toBe('Generated insight'); }); + it('throws when report workload runs without ZHIPU_API_KEY', async () => { + await expect( + runAiAnalysis('Analyze this filing', undefined, { + env: {}, + warn: () => {}, + createModel: () => ({}), + generate: async () => ({ text: 'should-not-be-used' }) + }) + ).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads'); + }); + + it('throws when extraction workload runs without ZHIPU_API_KEY', async () => { + await expect( + runAiAnalysis('Extract this filing', 'Return strict JSON only.', { + env: {}, + warn: () => {}, + workload: 'extraction', + createModel: () => ({}), + generate: async () => ({ text: 'should-not-be-used' }) + }) + ).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads'); + }); + it('throws when AI generation returns an empty response', async () => { await expect( runAiAnalysis('Analyze this filing', undefined, { @@ -158,112 +211,7 @@ describe('ai config and runtime', () => { ).rejects.toThrow('AI SDK returned an empty response'); }); - it('uses ollama defaults for extraction workload config', () => { - const config = getExtractionAiConfig({ - env: {}, - warn: () => {} - }); - - expect(config.provider).toBe('ollama'); - expect(config.baseUrl).toBe('http://127.0.0.1:11434'); - expect(config.model).toBe('qwen3:8b'); - expect(config.apiKey).toBe('ollama'); - expect(config.temperature).toBe(0); - }); - - it('uses extraction workload and returns ollama provider on success', async () => { - const createModel = mock((config: { - provider: string; - apiKey?: string; - model: string; - baseUrl: string; - temperature: number; - }) => { - expect(config.provider).toBe('ollama'); - expect(config.baseUrl).toBe('http://127.0.0.1:11434'); - expect(config.model).toBe('qwen3:8b'); - expect(config.temperature).toBe(0); - return { modelId: config.model }; - }); - const generate = mock(async () => ({ text: '{"summary":"ok","keyPoints":[],"redFlags":[],"followUpQuestions":[],"portfolioSignals":[],"confidence":0.6}' })); - - const result = await runAiAnalysis('Extract this filing', 'Return JSON', { - env: { - OLLAMA_MODEL: 'qwen3:8b' - }, - warn: () => {}, - workload: 'extraction', - createModel, - generate - }); - - expect(createModel).toHaveBeenCalledTimes(1); - expect(generate).toHaveBeenCalledTimes(1); - expect(result.provider).toBe('ollama'); - expect(result.model).toBe('qwen3:8b'); - }); - - it('falls back to local text when extraction workload generation fails', async () => { - const result = await runAiAnalysis('Extract this filing', 'Return JSON', { - env: {}, - warn: () => {}, - workload: 'extraction', - createModel: () => ({}), - generate: async () => { - throw new Error('ollama unavailable'); - } - }); - - expect(result.provider).toBe('local-fallback'); - expect(result.model).toBe('qwen3:8b'); - expect(result.text).toContain('AI SDK fallback mode is active'); - }); - - it('falls back to local text when report workload fails with insufficient balance', async () => { - const warn = mock((_message: string) => {}); - - const result = await runAiAnalysis('Analyze this filing', 'Use concise style', { - env: { - ZHIPU_API_KEY: 'new-key' - }, - warn, - createModel: () => ({}), - generate: async () => { - throw new Error('AI_RetryError: Failed after 3 attempts. Last error: Insufficient balance or no resource package. Please recharge.'); - } - }); - - expect(result.provider).toBe('local-fallback'); - expect(result.model).toBe('glm-4.7-flashx'); - expect(result.text).toContain('AI SDK fallback mode is active'); - expect(warn).toHaveBeenCalledTimes(1); - }); - - it('falls back to local text when report workload cause contains insufficient balance', async () => { - const warn = mock((_message: string) => {}); - - const result = await runAiAnalysis('Analyze this filing', 'Use concise style', { - env: { - ZHIPU_API_KEY: 'new-key' - }, - warn, - createModel: () => ({}), - generate: async () => { - const retryError = new Error('AI_RetryError: Failed after 3 attempts.'); - (retryError as Error & { cause?: unknown }).cause = new Error( - 'Last error: Insufficient balance or no resource package. Please recharge.' - ); - throw retryError; - } - }); - - expect(result.provider).toBe('local-fallback'); - expect(result.model).toBe('glm-4.7-flashx'); - expect(result.text).toContain('AI SDK fallback mode is active'); - expect(warn).toHaveBeenCalledTimes(1); - }); - - it('keeps throwing unknown report workload errors', async () => { + it('keeps throwing unknown provider errors', async () => { await expect( runAiAnalysis('Analyze this filing', 'Use concise style', { env: { @@ -277,4 +225,21 @@ describe('ai config and runtime', () => { }) ).rejects.toThrow('unexpected schema mismatch'); }); + + it('returns extraction config with same zhipu model and zero temperature', () => { + const config = getExtractionAiConfig({ + env: { + ZHIPU_API_KEY: 'new-key', + ZHIPU_MODEL: 'glm-4-plus', + AI_TEMPERATURE: '0.9' + }, + warn: () => {} + }); + + expect(config.provider).toBe('zhipu'); + expect(config.apiKey).toBe('new-key'); + expect(config.baseUrl).toBe(CODING_API_BASE_URL); + expect(config.model).toBe('glm-4-plus'); + expect(config.temperature).toBe(0); + }); }); diff --git a/lib/server/ai.ts b/lib/server/ai.ts index 737de1e..e67dc99 100644 --- a/lib/server/ai.ts +++ b/lib/server/ai.ts @@ -1,9 +1,8 @@ -import { createOpenAI } from '@ai-sdk/openai'; import { generateText } from 'ai'; import { createZhipu } from 'zhipu-ai-provider'; type AiWorkload = 'report' | 'extraction'; -type AiProvider = 'zhipu' | 'ollama'; +type AiProvider = 'zhipu'; type AiConfig = { provider: AiProvider; @@ -39,9 +38,6 @@ type RunAiAnalysisOptions = GetAiConfigOptions & { }; const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4'; -const OLLAMA_BASE_URL = 'http://127.0.0.1:11434'; -const OLLAMA_MODEL = 'qwen3:8b'; -const OLLAMA_API_KEY = 'ollama'; let warnedIgnoredZhipuBaseUrl = false; @@ -80,128 +76,13 @@ function warnIgnoredZhipuBaseUrl(env: EnvSource, warn: (message: string) => void ); } -function fallbackResponse(prompt: string) { - const clipped = prompt.split('\n').slice(0, 6).join(' ').slice(0, 260); - - return [ - 'AI SDK fallback mode is active (live model configuration is missing or unavailable).', - 'Thesis: Portfolio remains analyzable with local heuristics until live model access is configured.', - 'Risk scan: Concentration and filing sentiment should be monitored after each sync cycle.', - `Context digest: ${clipped}` - ].join('\n\n'); -} - -function toOpenAiCompatibleBaseUrl(baseUrl: string) { - const normalized = baseUrl.endsWith('/') - ? baseUrl.slice(0, -1) - : baseUrl; - - return normalized.endsWith('/v1') - ? normalized - : `${normalized}/v1`; -} - -function asErrorMessage(error: unknown) { - if (error instanceof Error && error.message) { - return error.message; - } - - return String(error); -} - -function errorSearchText(error: unknown) { - const chunks: string[] = []; - const seen = new Set(); - - const visit = (value: unknown) => { - if (value === null || value === undefined) { - return; - } - - if (typeof value === 'string') { - const normalized = value.trim(); - if (normalized.length > 0) { - chunks.push(normalized); - } - - return; - } - - if (typeof value !== 'object') { - chunks.push(String(value)); - return; - } - - if (seen.has(value)) { - return; - } - seen.add(value); - - if (value instanceof Error) { - if (value.message) { - chunks.push(value.message); - } - - const withCause = value as Error & { cause?: unknown }; - if (withCause.cause !== undefined) { - visit(withCause.cause); - } - return; - } - - const record = value as Record; - visit(record.message); - visit(record.error); - visit(record.reason); - visit(record.detail); - visit(record.details); - visit(record.cause); - }; - - visit(error); - return chunks.join('\n'); -} - -const REPORT_FALLBACK_ERROR_PATTERNS: RegExp[] = [ - /insufficient balance/i, - /no resource package/i, - /insufficient quota/i, - /quota exceeded/i, - /insufficient credit/i, - /invalid api key/i, - /authentication/i, - /unauthorized/i, - /forbidden/i, - /payment required/i, - /recharge/i, - /unable to connect/i, - /network/i, - /timeout/i, - /timed out/i, - /econnrefused/i -]; - -function shouldFallbackReportError(error: unknown) { - const searchText = errorSearchText(error) || asErrorMessage(error); - return REPORT_FALLBACK_ERROR_PATTERNS.some((pattern) => pattern.test(searchText)); -} - function defaultCreateModel(config: AiConfig) { - if (config.provider === 'zhipu') { - const zhipu = createZhipu({ - apiKey: config.apiKey, - baseURL: config.baseUrl - }); - - return zhipu(config.model); - } - - const openai = createOpenAI({ - apiKey: config.apiKey ?? OLLAMA_API_KEY, - baseURL: toOpenAiCompatibleBaseUrl(config.baseUrl) + const zhipu = createZhipu({ + apiKey: config.apiKey, + baseURL: config.baseUrl }); - return openai.chat(config.model); + return zhipu(config.model); } async function defaultGenerate(input: AiGenerateInput): Promise { @@ -228,21 +109,16 @@ export function getReportAiConfig(options?: GetAiConfigOptions) { provider: 'zhipu', apiKey: envValue('ZHIPU_API_KEY', env), baseUrl: CODING_API_BASE_URL, - model: envValue('ZHIPU_MODEL', env) ?? 'glm-4.7-flashx', + model: envValue('ZHIPU_MODEL', env) ?? 'glm-5', temperature: parseTemperature(envValue('AI_TEMPERATURE', env)) } satisfies AiConfig; } export function getExtractionAiConfig(options?: GetAiConfigOptions) { - const env = options?.env ?? process.env; - return { - provider: 'ollama', - apiKey: envValue('OLLAMA_API_KEY', env) ?? OLLAMA_API_KEY, - baseUrl: envValue('OLLAMA_BASE_URL', env) ?? OLLAMA_BASE_URL, - model: envValue('OLLAMA_MODEL', env) ?? OLLAMA_MODEL, + ...getReportAiConfig(options), temperature: 0 - } satisfies AiConfig; + }; } export function isAiConfigured(options?: GetAiConfigOptions) { @@ -256,70 +132,32 @@ export async function runAiAnalysis(prompt: string, systemPrompt?: string, optio ? getExtractionAiConfig(options) : getReportAiConfig(options); - if (workload === 'report' && !config.apiKey) { - return { - provider: 'local-fallback', - model: config.model, - text: fallbackResponse(prompt) - }; + if (!config.apiKey) { + throw new Error('ZHIPU_API_KEY is required for AI workloads'); } const createModel = options?.createModel ?? defaultCreateModel; const generate = options?.generate ?? defaultGenerate; - const warn = options?.warn ?? console.warn; + const model = createModel(config); - try { - const model = createModel(config); + const result = await generate({ + model, + system: systemPrompt, + prompt, + temperature: config.temperature, + maxRetries: 0 + }); - const result = await generate({ - model, - system: systemPrompt, - prompt, - temperature: config.temperature, - maxRetries: 0 - }); - - const text = result.text.trim(); - if (!text) { - if (workload === 'extraction') { - return { - provider: 'local-fallback', - model: config.model, - text: fallbackResponse(prompt) - }; - } - - throw new Error('AI SDK returned an empty response'); - } - - return { - provider: config.provider, - model: config.model, - text - }; - } catch (error) { - if (workload === 'report' && shouldFallbackReportError(error)) { - warn(`[AI SDK] Report fallback activated: ${asErrorMessage(error)}`); - - return { - provider: 'local-fallback', - model: config.model, - text: fallbackResponse(prompt) - }; - } - - if (workload === 'extraction') { - warn(`[AI SDK] Extraction fallback activated: ${asErrorMessage(error)}`); - - return { - provider: 'local-fallback', - model: config.model, - text: fallbackResponse(prompt) - }; - } - - throw error; + const text = result.text.trim(); + if (!text) { + throw new Error('AI SDK returned an empty response'); } + + return { + provider: config.provider, + model: config.model, + text + }; } export function __resetAiWarningsForTests() { diff --git a/lib/server/api/filing-redaction.test.ts b/lib/server/api/filing-redaction.test.ts index 6df9356..38466cf 100644 --- a/lib/server/api/filing-redaction.test.ts +++ b/lib/server/api/filing-redaction.test.ts @@ -32,8 +32,8 @@ function filingWithExtraction(): Filing { confidence: 0.4 }, extractionMeta: { - provider: 'ollama', - model: 'qwen3:8b', + provider: 'zhipu', + model: 'glm-4.7-flashx', source: 'primary_document', generatedAt: '2026-02-01T00:00:00.000Z' } diff --git a/lib/server/task-processors.ts b/lib/server/task-processors.ts index 621c478..b410474 100644 --- a/lib/server/task-processors.ts +++ b/lib/server/task-processors.ts @@ -689,50 +689,39 @@ async function processAnalyzeFiling(task: Task) { source: 'metadata_fallback', generatedAt: new Date().toISOString() }; + let filingDocument: Awaited> | null = null; try { await setProjectionStage(task, 'analyze.fetch_document', 'Fetching primary filing document'); - const filingDocument = await fetchPrimaryFilingText({ + filingDocument = await fetchPrimaryFilingText({ filingUrl: filing.filing_url, cik: filing.cik, accessionNumber: filing.accession_number, primaryDocument: filing.primary_document ?? null }); - - if (filingDocument?.text) { - await setProjectionStage(task, 'analyze.extract', 'Generating extraction context from filing text'); - const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text); - extraction = ruleBasedExtraction; - extractionMeta = { - provider: 'deterministic-fallback', - model: 'filing-rule-based', - source: filingDocument.source, - generatedAt: new Date().toISOString() - }; - - const extractionResult = await runAiAnalysis( - extractionPrompt(filing, filingDocument.text), - 'Return strict JSON only.', - { workload: 'extraction' } - ); - - const parsed = parseExtractionPayload(extractionResult.text); - if (parsed) { - extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction); - extractionMeta = { - provider: extractionResult.provider === 'local-fallback' ? 'deterministic-fallback' : 'ollama', - model: extractionResult.model, - source: filingDocument.source, - generatedAt: new Date().toISOString() - }; - } - } } catch { - extraction = defaultExtraction; + filingDocument = null; + } + + if (filingDocument?.text) { + await setProjectionStage(task, 'analyze.extract', 'Generating extraction context from filing text'); + const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text); + const extractionResult = await runAiAnalysis( + extractionPrompt(filing, filingDocument.text), + 'Return strict JSON only.', + { workload: 'extraction' } + ); + + const parsed = parseExtractionPayload(extractionResult.text); + if (!parsed) { + throw new Error('Extraction output invalid JSON schema'); + } + + extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction); extractionMeta = { - provider: 'deterministic-fallback', - model: 'metadata-fallback', - source: 'metadata_fallback', + provider: 'zhipu', + model: extractionResult.model, + source: filingDocument.source, generatedAt: new Date().toISOString() }; } diff --git a/package.json b/package.json index 70f84fe..9c4e16c 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,6 @@ "test:e2e:workflow": "RUN_TASK_WORKFLOW_E2E=1 bun test lib/server/api/task-workflow-hybrid.e2e.test.ts" }, "dependencies": { - "@ai-sdk/openai": "^2.0.62", "@elysiajs/eden": "^1.4.8", "@libsql/client": "^0.17.0", "@tailwindcss/postcss": "^4.2.1",