refactor: make AI runtime z.ai-only and default to glm-5

2026-03-02 22:27:39 -05:00
parent 812c4803f2
commit da2ce23bab
9 changed files with 152 additions and 384 deletions
--- a/lib/server/ai.test.ts
+++ b/lib/server/ai.test.ts
@@ -21,9 +21,10 @@ describe('ai config and runtime', () => {
      warn: () => {}
    });

+    expect(config.provider).toBe('zhipu');
    expect(config.apiKey).toBe('key');
    expect(config.baseUrl).toBe(CODING_API_BASE_URL);
-    expect(config.model).toBe('glm-4.7-flashx');
+    expect(config.model).toBe('glm-5');
    expect(config.temperature).toBe(0.2);
  });

@@ -39,7 +40,7 @@ describe('ai config and runtime', () => {
    expect(config.baseUrl).toBe(CODING_API_BASE_URL);
  });

-  it('clamps temperature into [0, 2]', () => {
+  it('clamps report temperature into [0, 2]', () => {
    const negative = getAiConfig({
      env: {
        ZHIPU_API_KEY: 'key',
@@ -68,23 +69,50 @@ describe('ai config and runtime', () => {
    expect(invalid.temperature).toBe(0.2);
  });

-  it('returns fallback output when ZHIPU_API_KEY is missing', async () => {
-    const generate = mock(async () => ({ text: 'should-not-be-used' }));
+  it('uses extraction workload with zhipu config and zero temperature', async () => {
+    const createModel = mock((config: {
+      provider: string;
+      apiKey?: string;
+      model: string;
+      baseUrl: string;
+      temperature: number;
+    }) => {
+      expect(config.provider).toBe('zhipu');
+      expect(config.apiKey).toBe('new-key');
+      expect(config.baseUrl).toBe(CODING_API_BASE_URL);
+      expect(config.model).toBe('glm-5');
+      expect(config.temperature).toBe(0);
+      return { modelId: config.model };
+    });
+    const generate = mock(async (input: {
+      model: unknown;
+      system?: string;
+      prompt: string;
+      temperature: number;
+      maxRetries?: number;
+    }) => {
+      expect(input.system).toBe('Return strict JSON only.');
+      expect(input.prompt).toBe('Extract this filing');
+      expect(input.temperature).toBe(0);
+      expect(input.maxRetries).toBe(0);
+      return { text: '{"summary":"ok"}' };
+    });

-    const result = await runAiAnalysis(
-      'Prompt line one\nPrompt line two',
-      'System prompt',
-      {
-        env: {},
-        warn: () => {},
-        generate
-      }
-    );
+    const result = await runAiAnalysis('Extract this filing', 'Return strict JSON only.', {
+      env: {
+        ZHIPU_API_KEY: 'new-key'
+      },
+      warn: () => {},
+      workload: 'extraction',
+      createModel,
+      generate
+    });

-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('glm-4.7-flashx');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-    expect(generate).not.toHaveBeenCalled();
+    expect(result.provider).toBe('zhipu');
+    expect(result.model).toBe('glm-5');
+    expect(result.text).toBe('{"summary":"ok"}');
+    expect(createModel).toHaveBeenCalledTimes(1);
+    expect(generate).toHaveBeenCalledTimes(1);
  });

  it('warns once when ZHIPU_BASE_URL is set because coding endpoint is hardcoded', () => {
@@ -103,11 +131,13 @@ describe('ai config and runtime', () => {

  it('uses configured ZHIPU values and injected generator when API key exists', async () => {
    const createModel = mock((config: {
+      provider: string;
      apiKey?: string;
      model: string;
      baseUrl: string;
      temperature: number;
    }) => {
+      expect(config.provider).toBe('zhipu');
      expect(config.apiKey).toBe('new-key');
      expect(config.baseUrl).toBe(CODING_API_BASE_URL);
      expect(config.model).toBe('glm-4-plus');
@@ -147,6 +177,29 @@ describe('ai config and runtime', () => {
    expect(result.text).toBe('Generated insight');
  });

+  it('throws when report workload runs without ZHIPU_API_KEY', async () => {
+    await expect(
+      runAiAnalysis('Analyze this filing', undefined, {
+        env: {},
+        warn: () => {},
+        createModel: () => ({}),
+        generate: async () => ({ text: 'should-not-be-used' })
+      })
+    ).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads');
+  });
+
+  it('throws when extraction workload runs without ZHIPU_API_KEY', async () => {
+    await expect(
+      runAiAnalysis('Extract this filing', 'Return strict JSON only.', {
+        env: {},
+        warn: () => {},
+        workload: 'extraction',
+        createModel: () => ({}),
+        generate: async () => ({ text: 'should-not-be-used' })
+      })
+    ).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads');
+  });
+
  it('throws when AI generation returns an empty response', async () => {
    await expect(
      runAiAnalysis('Analyze this filing', undefined, {
@@ -158,112 +211,7 @@ describe('ai config and runtime', () => {
    ).rejects.toThrow('AI SDK returned an empty response');
  });

-  it('uses ollama defaults for extraction workload config', () => {
-    const config = getExtractionAiConfig({
-      env: {},
-      warn: () => {}
-    });
-
-    expect(config.provider).toBe('ollama');
-    expect(config.baseUrl).toBe('http://127.0.0.1:11434');
-    expect(config.model).toBe('qwen3:8b');
-    expect(config.apiKey).toBe('ollama');
-    expect(config.temperature).toBe(0);
-  });
-
-  it('uses extraction workload and returns ollama provider on success', async () => {
-    const createModel = mock((config: {
-      provider: string;
-      apiKey?: string;
-      model: string;
-      baseUrl: string;
-      temperature: number;
-    }) => {
-      expect(config.provider).toBe('ollama');
-      expect(config.baseUrl).toBe('http://127.0.0.1:11434');
-      expect(config.model).toBe('qwen3:8b');
-      expect(config.temperature).toBe(0);
-      return { modelId: config.model };
-    });
-    const generate = mock(async () => ({ text: '{"summary":"ok","keyPoints":[],"redFlags":[],"followUpQuestions":[],"portfolioSignals":[],"confidence":0.6}' }));
-
-    const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
-      env: {
-        OLLAMA_MODEL: 'qwen3:8b'
-      },
-      warn: () => {},
-      workload: 'extraction',
-      createModel,
-      generate
-    });
-
-    expect(createModel).toHaveBeenCalledTimes(1);
-    expect(generate).toHaveBeenCalledTimes(1);
-    expect(result.provider).toBe('ollama');
-    expect(result.model).toBe('qwen3:8b');
-  });
-
-  it('falls back to local text when extraction workload generation fails', async () => {
-    const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
-      env: {},
-      warn: () => {},
-      workload: 'extraction',
-      createModel: () => ({}),
-      generate: async () => {
-        throw new Error('ollama unavailable');
-      }
-    });
-
-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('qwen3:8b');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-  });
-
-  it('falls back to local text when report workload fails with insufficient balance', async () => {
-    const warn = mock((_message: string) => {});
-
-    const result = await runAiAnalysis('Analyze this filing', 'Use concise style', {
-      env: {
-        ZHIPU_API_KEY: 'new-key'
-      },
-      warn,
-      createModel: () => ({}),
-      generate: async () => {
-        throw new Error('AI_RetryError: Failed after 3 attempts. Last error: Insufficient balance or no resource package. Please recharge.');
-      }
-    });
-
-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('glm-4.7-flashx');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-    expect(warn).toHaveBeenCalledTimes(1);
-  });
-
-  it('falls back to local text when report workload cause contains insufficient balance', async () => {
-    const warn = mock((_message: string) => {});
-
-    const result = await runAiAnalysis('Analyze this filing', 'Use concise style', {
-      env: {
-        ZHIPU_API_KEY: 'new-key'
-      },
-      warn,
-      createModel: () => ({}),
-      generate: async () => {
-        const retryError = new Error('AI_RetryError: Failed after 3 attempts.');
-        (retryError as Error & { cause?: unknown }).cause = new Error(
-          'Last error: Insufficient balance or no resource package. Please recharge.'
-        );
-        throw retryError;
-      }
-    });
-
-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('glm-4.7-flashx');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-    expect(warn).toHaveBeenCalledTimes(1);
-  });
-
-  it('keeps throwing unknown report workload errors', async () => {
+  it('keeps throwing unknown provider errors', async () => {
    await expect(
      runAiAnalysis('Analyze this filing', 'Use concise style', {
        env: {
@@ -277,4 +225,21 @@ describe('ai config and runtime', () => {
      })
    ).rejects.toThrow('unexpected schema mismatch');
  });
+
+  it('returns extraction config with same zhipu model and zero temperature', () => {
+    const config = getExtractionAiConfig({
+      env: {
+        ZHIPU_API_KEY: 'new-key',
+        ZHIPU_MODEL: 'glm-4-plus',
+        AI_TEMPERATURE: '0.9'
+      },
+      warn: () => {}
+    });
+
+    expect(config.provider).toBe('zhipu');
+    expect(config.apiKey).toBe('new-key');
+    expect(config.baseUrl).toBe(CODING_API_BASE_URL);
+    expect(config.model).toBe('glm-4-plus');
+    expect(config.temperature).toBe(0);
+  });
 });