refactor: make AI runtime z.ai-only and default to glm-5

2026-03-02 22:27:39 -05:00
parent 812c4803f2
commit da2ce23bab
9 changed files with 152 additions and 384 deletions
--- a/lib/server/ai.test.ts
+++ b/lib/server/ai.test.ts
@@ -21,9 +21,10 @@ describe('ai config and runtime', () => {
      warn: () => {}
    });

+    expect(config.provider).toBe('zhipu');
    expect(config.apiKey).toBe('key');
    expect(config.baseUrl).toBe(CODING_API_BASE_URL);
-    expect(config.model).toBe('glm-4.7-flashx');
+    expect(config.model).toBe('glm-5');
    expect(config.temperature).toBe(0.2);
  });

@@ -39,7 +40,7 @@ describe('ai config and runtime', () => {
    expect(config.baseUrl).toBe(CODING_API_BASE_URL);
  });

-  it('clamps temperature into [0, 2]', () => {
+  it('clamps report temperature into [0, 2]', () => {
    const negative = getAiConfig({
      env: {
        ZHIPU_API_KEY: 'key',
@@ -68,23 +69,50 @@ describe('ai config and runtime', () => {
    expect(invalid.temperature).toBe(0.2);
  });

-  it('returns fallback output when ZHIPU_API_KEY is missing', async () => {
-    const generate = mock(async () => ({ text: 'should-not-be-used' }));
+  it('uses extraction workload with zhipu config and zero temperature', async () => {
+    const createModel = mock((config: {
+      provider: string;
+      apiKey?: string;
+      model: string;
+      baseUrl: string;
+      temperature: number;
+    }) => {
+      expect(config.provider).toBe('zhipu');
+      expect(config.apiKey).toBe('new-key');
+      expect(config.baseUrl).toBe(CODING_API_BASE_URL);
+      expect(config.model).toBe('glm-5');
+      expect(config.temperature).toBe(0);
+      return { modelId: config.model };
+    });
+    const generate = mock(async (input: {
+      model: unknown;
+      system?: string;
+      prompt: string;
+      temperature: number;
+      maxRetries?: number;
+    }) => {
+      expect(input.system).toBe('Return strict JSON only.');
+      expect(input.prompt).toBe('Extract this filing');
+      expect(input.temperature).toBe(0);
+      expect(input.maxRetries).toBe(0);
+      return { text: '{"summary":"ok"}' };
+    });

-    const result = await runAiAnalysis(
-      'Prompt line one\nPrompt line two',
-      'System prompt',
-      {
-        env: {},
-        warn: () => {},
-        generate
-      }
-    );
+    const result = await runAiAnalysis('Extract this filing', 'Return strict JSON only.', {
+      env: {
+        ZHIPU_API_KEY: 'new-key'
+      },
+      warn: () => {},
+      workload: 'extraction',
+      createModel,
+      generate
+    });

-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('glm-4.7-flashx');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-    expect(generate).not.toHaveBeenCalled();
+    expect(result.provider).toBe('zhipu');
+    expect(result.model).toBe('glm-5');
+    expect(result.text).toBe('{"summary":"ok"}');
+    expect(createModel).toHaveBeenCalledTimes(1);
+    expect(generate).toHaveBeenCalledTimes(1);
  });

  it('warns once when ZHIPU_BASE_URL is set because coding endpoint is hardcoded', () => {
@@ -103,11 +131,13 @@ describe('ai config and runtime', () => {

  it('uses configured ZHIPU values and injected generator when API key exists', async () => {
    const createModel = mock((config: {
+      provider: string;
      apiKey?: string;
      model: string;
      baseUrl: string;
      temperature: number;
    }) => {
+      expect(config.provider).toBe('zhipu');
      expect(config.apiKey).toBe('new-key');
      expect(config.baseUrl).toBe(CODING_API_BASE_URL);
      expect(config.model).toBe('glm-4-plus');
@@ -147,6 +177,29 @@ describe('ai config and runtime', () => {
    expect(result.text).toBe('Generated insight');
  });

+  it('throws when report workload runs without ZHIPU_API_KEY', async () => {
+    await expect(
+      runAiAnalysis('Analyze this filing', undefined, {
+        env: {},
+        warn: () => {},
+        createModel: () => ({}),
+        generate: async () => ({ text: 'should-not-be-used' })
+      })
+    ).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads');
+  });
+
+  it('throws when extraction workload runs without ZHIPU_API_KEY', async () => {
+    await expect(
+      runAiAnalysis('Extract this filing', 'Return strict JSON only.', {
+        env: {},
+        warn: () => {},
+        workload: 'extraction',
+        createModel: () => ({}),
+        generate: async () => ({ text: 'should-not-be-used' })
+      })
+    ).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads');
+  });
+
  it('throws when AI generation returns an empty response', async () => {
    await expect(
      runAiAnalysis('Analyze this filing', undefined, {
@@ -158,112 +211,7 @@ describe('ai config and runtime', () => {
    ).rejects.toThrow('AI SDK returned an empty response');
  });

-  it('uses ollama defaults for extraction workload config', () => {
-    const config = getExtractionAiConfig({
-      env: {},
-      warn: () => {}
-    });
-
-    expect(config.provider).toBe('ollama');
-    expect(config.baseUrl).toBe('http://127.0.0.1:11434');
-    expect(config.model).toBe('qwen3:8b');
-    expect(config.apiKey).toBe('ollama');
-    expect(config.temperature).toBe(0);
-  });
-
-  it('uses extraction workload and returns ollama provider on success', async () => {
-    const createModel = mock((config: {
-      provider: string;
-      apiKey?: string;
-      model: string;
-      baseUrl: string;
-      temperature: number;
-    }) => {
-      expect(config.provider).toBe('ollama');
-      expect(config.baseUrl).toBe('http://127.0.0.1:11434');
-      expect(config.model).toBe('qwen3:8b');
-      expect(config.temperature).toBe(0);
-      return { modelId: config.model };
-    });
-    const generate = mock(async () => ({ text: '{"summary":"ok","keyPoints":[],"redFlags":[],"followUpQuestions":[],"portfolioSignals":[],"confidence":0.6}' }));
-
-    const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
-      env: {
-        OLLAMA_MODEL: 'qwen3:8b'
-      },
-      warn: () => {},
-      workload: 'extraction',
-      createModel,
-      generate
-    });
-
-    expect(createModel).toHaveBeenCalledTimes(1);
-    expect(generate).toHaveBeenCalledTimes(1);
-    expect(result.provider).toBe('ollama');
-    expect(result.model).toBe('qwen3:8b');
-  });
-
-  it('falls back to local text when extraction workload generation fails', async () => {
-    const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
-      env: {},
-      warn: () => {},
-      workload: 'extraction',
-      createModel: () => ({}),
-      generate: async () => {
-        throw new Error('ollama unavailable');
-      }
-    });
-
-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('qwen3:8b');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-  });
-
-  it('falls back to local text when report workload fails with insufficient balance', async () => {
-    const warn = mock((_message: string) => {});
-
-    const result = await runAiAnalysis('Analyze this filing', 'Use concise style', {
-      env: {
-        ZHIPU_API_KEY: 'new-key'
-      },
-      warn,
-      createModel: () => ({}),
-      generate: async () => {
-        throw new Error('AI_RetryError: Failed after 3 attempts. Last error: Insufficient balance or no resource package. Please recharge.');
-      }
-    });
-
-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('glm-4.7-flashx');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-    expect(warn).toHaveBeenCalledTimes(1);
-  });
-
-  it('falls back to local text when report workload cause contains insufficient balance', async () => {
-    const warn = mock((_message: string) => {});
-
-    const result = await runAiAnalysis('Analyze this filing', 'Use concise style', {
-      env: {
-        ZHIPU_API_KEY: 'new-key'
-      },
-      warn,
-      createModel: () => ({}),
-      generate: async () => {
-        const retryError = new Error('AI_RetryError: Failed after 3 attempts.');
-        (retryError as Error & { cause?: unknown }).cause = new Error(
-          'Last error: Insufficient balance or no resource package. Please recharge.'
-        );
-        throw retryError;
-      }
-    });
-
-    expect(result.provider).toBe('local-fallback');
-    expect(result.model).toBe('glm-4.7-flashx');
-    expect(result.text).toContain('AI SDK fallback mode is active');
-    expect(warn).toHaveBeenCalledTimes(1);
-  });
-
-  it('keeps throwing unknown report workload errors', async () => {
+  it('keeps throwing unknown provider errors', async () => {
    await expect(
      runAiAnalysis('Analyze this filing', 'Use concise style', {
        env: {
@@ -277,4 +225,21 @@ describe('ai config and runtime', () => {
      })
    ).rejects.toThrow('unexpected schema mismatch');
  });
+
+  it('returns extraction config with same zhipu model and zero temperature', () => {
+    const config = getExtractionAiConfig({
+      env: {
+        ZHIPU_API_KEY: 'new-key',
+        ZHIPU_MODEL: 'glm-4-plus',
+        AI_TEMPERATURE: '0.9'
+      },
+      warn: () => {}
+    });
+
+    expect(config.provider).toBe('zhipu');
+    expect(config.apiKey).toBe('new-key');
+    expect(config.baseUrl).toBe(CODING_API_BASE_URL);
+    expect(config.model).toBe('glm-4-plus');
+    expect(config.temperature).toBe(0);
+  });
 });
--- a/lib/server/ai.ts
+++ b/lib/server/ai.ts
@@ -1,9 +1,8 @@
-import { createOpenAI } from '@ai-sdk/openai';
 import { generateText } from 'ai';
 import { createZhipu } from 'zhipu-ai-provider';

 type AiWorkload = 'report' | 'extraction';
-type AiProvider = 'zhipu' | 'ollama';
+type AiProvider = 'zhipu';

 type AiConfig = {
  provider: AiProvider;
@@ -39,9 +38,6 @@ type RunAiAnalysisOptions = GetAiConfigOptions & {
 };

 const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4';
-const OLLAMA_BASE_URL = 'http://127.0.0.1:11434';
-const OLLAMA_MODEL = 'qwen3:8b';
-const OLLAMA_API_KEY = 'ollama';

 let warnedIgnoredZhipuBaseUrl = false;

@@ -80,128 +76,13 @@ function warnIgnoredZhipuBaseUrl(env: EnvSource, warn: (message: string) => void
  );
 }

-function fallbackResponse(prompt: string) {
-  const clipped = prompt.split('\n').slice(0, 6).join(' ').slice(0, 260);
-
-  return [
-    'AI SDK fallback mode is active (live model configuration is missing or unavailable).',
-    'Thesis: Portfolio remains analyzable with local heuristics until live model access is configured.',
-    'Risk scan: Concentration and filing sentiment should be monitored after each sync cycle.',
-    `Context digest: ${clipped}`
-  ].join('\n\n');
-}
-
-function toOpenAiCompatibleBaseUrl(baseUrl: string) {
-  const normalized = baseUrl.endsWith('/')
-    ? baseUrl.slice(0, -1)
-    : baseUrl;
-
-  return normalized.endsWith('/v1')
-    ? normalized
-    : `${normalized}/v1`;
-}
-
-function asErrorMessage(error: unknown) {
-  if (error instanceof Error && error.message) {
-    return error.message;
-  }
-
-  return String(error);
-}
-
-function errorSearchText(error: unknown) {
-  const chunks: string[] = [];
-  const seen = new Set<unknown>();
-
-  const visit = (value: unknown) => {
-    if (value === null || value === undefined) {
-      return;
-    }
-
-    if (typeof value === 'string') {
-      const normalized = value.trim();
-      if (normalized.length > 0) {
-        chunks.push(normalized);
-      }
-
-      return;
-    }
-
-    if (typeof value !== 'object') {
-      chunks.push(String(value));
-      return;
-    }
-
-    if (seen.has(value)) {
-      return;
-    }
-    seen.add(value);
-
-    if (value instanceof Error) {
-      if (value.message) {
-        chunks.push(value.message);
-      }
-
-      const withCause = value as Error & { cause?: unknown };
-      if (withCause.cause !== undefined) {
-        visit(withCause.cause);
-      }
-      return;
-    }
-
-    const record = value as Record<string, unknown>;
-    visit(record.message);
-    visit(record.error);
-    visit(record.reason);
-    visit(record.detail);
-    visit(record.details);
-    visit(record.cause);
-  };
-
-  visit(error);
-  return chunks.join('\n');
-}
-
-const REPORT_FALLBACK_ERROR_PATTERNS: RegExp[] = [
-  /insufficient balance/i,
-  /no resource package/i,
-  /insufficient quota/i,
-  /quota exceeded/i,
-  /insufficient credit/i,
-  /invalid api key/i,
-  /authentication/i,
-  /unauthorized/i,
-  /forbidden/i,
-  /payment required/i,
-  /recharge/i,
-  /unable to connect/i,
-  /network/i,
-  /timeout/i,
-  /timed out/i,
-  /econnrefused/i
-];
-
-function shouldFallbackReportError(error: unknown) {
-  const searchText = errorSearchText(error) || asErrorMessage(error);
-  return REPORT_FALLBACK_ERROR_PATTERNS.some((pattern) => pattern.test(searchText));
-}
-
 function defaultCreateModel(config: AiConfig) {
-  if (config.provider === 'zhipu') {
-    const zhipu = createZhipu({
-      apiKey: config.apiKey,
-      baseURL: config.baseUrl
-    });
-
-    return zhipu(config.model);
-  }
-
-  const openai = createOpenAI({
-    apiKey: config.apiKey ?? OLLAMA_API_KEY,
-    baseURL: toOpenAiCompatibleBaseUrl(config.baseUrl)
+  const zhipu = createZhipu({
+    apiKey: config.apiKey,
+    baseURL: config.baseUrl
  });

-  return openai.chat(config.model);
+  return zhipu(config.model);
 }

 async function defaultGenerate(input: AiGenerateInput): Promise<AiGenerateOutput> {
@@ -228,21 +109,16 @@ export function getReportAiConfig(options?: GetAiConfigOptions) {
    provider: 'zhipu',
    apiKey: envValue('ZHIPU_API_KEY', env),
    baseUrl: CODING_API_BASE_URL,
-    model: envValue('ZHIPU_MODEL', env) ?? 'glm-4.7-flashx',
+    model: envValue('ZHIPU_MODEL', env) ?? 'glm-5',
    temperature: parseTemperature(envValue('AI_TEMPERATURE', env))
  } satisfies AiConfig;
 }

 export function getExtractionAiConfig(options?: GetAiConfigOptions) {
-  const env = options?.env ?? process.env;
-
  return {
-    provider: 'ollama',
-    apiKey: envValue('OLLAMA_API_KEY', env) ?? OLLAMA_API_KEY,
-    baseUrl: envValue('OLLAMA_BASE_URL', env) ?? OLLAMA_BASE_URL,
-    model: envValue('OLLAMA_MODEL', env) ?? OLLAMA_MODEL,
+    ...getReportAiConfig(options),
    temperature: 0
-  } satisfies AiConfig;
+  };
 }

 export function isAiConfigured(options?: GetAiConfigOptions) {
@@ -256,70 +132,32 @@ export async function runAiAnalysis(prompt: string, systemPrompt?: string, optio
    ? getExtractionAiConfig(options)
    : getReportAiConfig(options);

-  if (workload === 'report' && !config.apiKey) {
-    return {
-      provider: 'local-fallback',
-      model: config.model,
-      text: fallbackResponse(prompt)
-    };
+  if (!config.apiKey) {
+    throw new Error('ZHIPU_API_KEY is required for AI workloads');
  }

  const createModel = options?.createModel ?? defaultCreateModel;
  const generate = options?.generate ?? defaultGenerate;
-  const warn = options?.warn ?? console.warn;
+  const model = createModel(config);

-  try {
-    const model = createModel(config);
+  const result = await generate({
+    model,
+    system: systemPrompt,
+    prompt,
+    temperature: config.temperature,
+    maxRetries: 0
+  });

-    const result = await generate({
-      model,
-      system: systemPrompt,
-      prompt,
-      temperature: config.temperature,
-      maxRetries: 0
-    });
-
-    const text = result.text.trim();
-    if (!text) {
-      if (workload === 'extraction') {
-        return {
-          provider: 'local-fallback',
-          model: config.model,
-          text: fallbackResponse(prompt)
-        };
-      }
-
-      throw new Error('AI SDK returned an empty response');
-    }
-
-    return {
-      provider: config.provider,
-      model: config.model,
-      text
-    };
-  } catch (error) {
-    if (workload === 'report' && shouldFallbackReportError(error)) {
-      warn(`[AI SDK] Report fallback activated: ${asErrorMessage(error)}`);
-
-      return {
-        provider: 'local-fallback',
-        model: config.model,
-        text: fallbackResponse(prompt)
-      };
-    }
-
-    if (workload === 'extraction') {
-      warn(`[AI SDK] Extraction fallback activated: ${asErrorMessage(error)}`);
-
-      return {
-        provider: 'local-fallback',
-        model: config.model,
-        text: fallbackResponse(prompt)
-      };
-    }
-
-    throw error;
+  const text = result.text.trim();
+  if (!text) {
+    throw new Error('AI SDK returned an empty response');
  }
+
+  return {
+    provider: config.provider,
+    model: config.model,
+    text
+  };
 }

 export function __resetAiWarningsForTests() {
--- a/lib/server/api/filing-redaction.test.ts
+++ b/lib/server/api/filing-redaction.test.ts
@@ -32,8 +32,8 @@ function filingWithExtraction(): Filing {
        confidence: 0.4
      },
      extractionMeta: {
-        provider: 'ollama',
-        model: 'qwen3:8b',
+        provider: 'zhipu',
+        model: 'glm-4.7-flashx',
        source: 'primary_document',
        generatedAt: '2026-02-01T00:00:00.000Z'
      }
--- a/lib/server/task-processors.ts
+++ b/lib/server/task-processors.ts
@@ -689,50 +689,39 @@ async function processAnalyzeFiling(task: Task) {
    source: 'metadata_fallback',
    generatedAt: new Date().toISOString()
  };
+  let filingDocument: Awaited<ReturnType<typeof fetchPrimaryFilingText>> | null = null;

  try {
    await setProjectionStage(task, 'analyze.fetch_document', 'Fetching primary filing document');
-    const filingDocument = await fetchPrimaryFilingText({
+    filingDocument = await fetchPrimaryFilingText({
      filingUrl: filing.filing_url,
      cik: filing.cik,
      accessionNumber: filing.accession_number,
      primaryDocument: filing.primary_document ?? null
    });
-
-    if (filingDocument?.text) {
-      await setProjectionStage(task, 'analyze.extract', 'Generating extraction context from filing text');
-      const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text);
-      extraction = ruleBasedExtraction;
-      extractionMeta = {
-        provider: 'deterministic-fallback',
-        model: 'filing-rule-based',
-        source: filingDocument.source,
-        generatedAt: new Date().toISOString()
-      };
-
-      const extractionResult = await runAiAnalysis(
-        extractionPrompt(filing, filingDocument.text),
-        'Return strict JSON only.',
-        { workload: 'extraction' }
-      );
-
-      const parsed = parseExtractionPayload(extractionResult.text);
-      if (parsed) {
-        extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction);
-        extractionMeta = {
-          provider: extractionResult.provider === 'local-fallback' ? 'deterministic-fallback' : 'ollama',
-          model: extractionResult.model,
-          source: filingDocument.source,
-          generatedAt: new Date().toISOString()
-        };
-      }
-    }
  } catch {
-    extraction = defaultExtraction;
+    filingDocument = null;
+  }
+
+  if (filingDocument?.text) {
+    await setProjectionStage(task, 'analyze.extract', 'Generating extraction context from filing text');
+    const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text);
+    const extractionResult = await runAiAnalysis(
+      extractionPrompt(filing, filingDocument.text),
+      'Return strict JSON only.',
+      { workload: 'extraction' }
+    );
+
+    const parsed = parseExtractionPayload(extractionResult.text);
+    if (!parsed) {
+      throw new Error('Extraction output invalid JSON schema');
+    }
+
+    extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction);
    extractionMeta = {
-      provider: 'deterministic-fallback',
-      model: 'metadata-fallback',
-      source: 'metadata_fallback',
+      provider: 'zhipu',
+      model: extractionResult.model,
+      source: filingDocument.source,
      generatedAt: new Date().toISOString()
    };
  }