refactor: make AI runtime z.ai-only and default to glm-5

This commit is contained in:
2026-03-02 22:27:39 -05:00
parent 812c4803f2
commit da2ce23bab
9 changed files with 152 additions and 384 deletions

View File

@@ -21,9 +21,10 @@ describe('ai config and runtime', () => {
warn: () => {}
});
expect(config.provider).toBe('zhipu');
expect(config.apiKey).toBe('key');
expect(config.baseUrl).toBe(CODING_API_BASE_URL);
expect(config.model).toBe('glm-4.7-flashx');
expect(config.model).toBe('glm-5');
expect(config.temperature).toBe(0.2);
});
@@ -39,7 +40,7 @@ describe('ai config and runtime', () => {
expect(config.baseUrl).toBe(CODING_API_BASE_URL);
});
it('clamps temperature into [0, 2]', () => {
it('clamps report temperature into [0, 2]', () => {
const negative = getAiConfig({
env: {
ZHIPU_API_KEY: 'key',
@@ -68,23 +69,50 @@ describe('ai config and runtime', () => {
expect(invalid.temperature).toBe(0.2);
});
it('returns fallback output when ZHIPU_API_KEY is missing', async () => {
const generate = mock(async () => ({ text: 'should-not-be-used' }));
it('uses extraction workload with zhipu config and zero temperature', async () => {
const createModel = mock((config: {
provider: string;
apiKey?: string;
model: string;
baseUrl: string;
temperature: number;
}) => {
expect(config.provider).toBe('zhipu');
expect(config.apiKey).toBe('new-key');
expect(config.baseUrl).toBe(CODING_API_BASE_URL);
expect(config.model).toBe('glm-5');
expect(config.temperature).toBe(0);
return { modelId: config.model };
});
const generate = mock(async (input: {
model: unknown;
system?: string;
prompt: string;
temperature: number;
maxRetries?: number;
}) => {
expect(input.system).toBe('Return strict JSON only.');
expect(input.prompt).toBe('Extract this filing');
expect(input.temperature).toBe(0);
expect(input.maxRetries).toBe(0);
return { text: '{"summary":"ok"}' };
});
const result = await runAiAnalysis(
'Prompt line one\nPrompt line two',
'System prompt',
{
env: {},
warn: () => {},
generate
}
);
const result = await runAiAnalysis('Extract this filing', 'Return strict JSON only.', {
env: {
ZHIPU_API_KEY: 'new-key'
},
warn: () => {},
workload: 'extraction',
createModel,
generate
});
expect(result.provider).toBe('local-fallback');
expect(result.model).toBe('glm-4.7-flashx');
expect(result.text).toContain('AI SDK fallback mode is active');
expect(generate).not.toHaveBeenCalled();
expect(result.provider).toBe('zhipu');
expect(result.model).toBe('glm-5');
expect(result.text).toBe('{"summary":"ok"}');
expect(createModel).toHaveBeenCalledTimes(1);
expect(generate).toHaveBeenCalledTimes(1);
});
it('warns once when ZHIPU_BASE_URL is set because coding endpoint is hardcoded', () => {
@@ -103,11 +131,13 @@ describe('ai config and runtime', () => {
it('uses configured ZHIPU values and injected generator when API key exists', async () => {
const createModel = mock((config: {
provider: string;
apiKey?: string;
model: string;
baseUrl: string;
temperature: number;
}) => {
expect(config.provider).toBe('zhipu');
expect(config.apiKey).toBe('new-key');
expect(config.baseUrl).toBe(CODING_API_BASE_URL);
expect(config.model).toBe('glm-4-plus');
@@ -147,6 +177,29 @@ describe('ai config and runtime', () => {
expect(result.text).toBe('Generated insight');
});
it('throws when report workload runs without ZHIPU_API_KEY', async () => {
await expect(
runAiAnalysis('Analyze this filing', undefined, {
env: {},
warn: () => {},
createModel: () => ({}),
generate: async () => ({ text: 'should-not-be-used' })
})
).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads');
});
it('throws when extraction workload runs without ZHIPU_API_KEY', async () => {
await expect(
runAiAnalysis('Extract this filing', 'Return strict JSON only.', {
env: {},
warn: () => {},
workload: 'extraction',
createModel: () => ({}),
generate: async () => ({ text: 'should-not-be-used' })
})
).rejects.toThrow('ZHIPU_API_KEY is required for AI workloads');
});
it('throws when AI generation returns an empty response', async () => {
await expect(
runAiAnalysis('Analyze this filing', undefined, {
@@ -158,112 +211,7 @@ describe('ai config and runtime', () => {
).rejects.toThrow('AI SDK returned an empty response');
});
it('uses ollama defaults for extraction workload config', () => {
const config = getExtractionAiConfig({
env: {},
warn: () => {}
});
expect(config.provider).toBe('ollama');
expect(config.baseUrl).toBe('http://127.0.0.1:11434');
expect(config.model).toBe('qwen3:8b');
expect(config.apiKey).toBe('ollama');
expect(config.temperature).toBe(0);
});
it('uses extraction workload and returns ollama provider on success', async () => {
const createModel = mock((config: {
provider: string;
apiKey?: string;
model: string;
baseUrl: string;
temperature: number;
}) => {
expect(config.provider).toBe('ollama');
expect(config.baseUrl).toBe('http://127.0.0.1:11434');
expect(config.model).toBe('qwen3:8b');
expect(config.temperature).toBe(0);
return { modelId: config.model };
});
const generate = mock(async () => ({ text: '{"summary":"ok","keyPoints":[],"redFlags":[],"followUpQuestions":[],"portfolioSignals":[],"confidence":0.6}' }));
const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
env: {
OLLAMA_MODEL: 'qwen3:8b'
},
warn: () => {},
workload: 'extraction',
createModel,
generate
});
expect(createModel).toHaveBeenCalledTimes(1);
expect(generate).toHaveBeenCalledTimes(1);
expect(result.provider).toBe('ollama');
expect(result.model).toBe('qwen3:8b');
});
it('falls back to local text when extraction workload generation fails', async () => {
const result = await runAiAnalysis('Extract this filing', 'Return JSON', {
env: {},
warn: () => {},
workload: 'extraction',
createModel: () => ({}),
generate: async () => {
throw new Error('ollama unavailable');
}
});
expect(result.provider).toBe('local-fallback');
expect(result.model).toBe('qwen3:8b');
expect(result.text).toContain('AI SDK fallback mode is active');
});
it('falls back to local text when report workload fails with insufficient balance', async () => {
const warn = mock((_message: string) => {});
const result = await runAiAnalysis('Analyze this filing', 'Use concise style', {
env: {
ZHIPU_API_KEY: 'new-key'
},
warn,
createModel: () => ({}),
generate: async () => {
throw new Error('AI_RetryError: Failed after 3 attempts. Last error: Insufficient balance or no resource package. Please recharge.');
}
});
expect(result.provider).toBe('local-fallback');
expect(result.model).toBe('glm-4.7-flashx');
expect(result.text).toContain('AI SDK fallback mode is active');
expect(warn).toHaveBeenCalledTimes(1);
});
it('falls back to local text when report workload cause contains insufficient balance', async () => {
const warn = mock((_message: string) => {});
const result = await runAiAnalysis('Analyze this filing', 'Use concise style', {
env: {
ZHIPU_API_KEY: 'new-key'
},
warn,
createModel: () => ({}),
generate: async () => {
const retryError = new Error('AI_RetryError: Failed after 3 attempts.');
(retryError as Error & { cause?: unknown }).cause = new Error(
'Last error: Insufficient balance or no resource package. Please recharge.'
);
throw retryError;
}
});
expect(result.provider).toBe('local-fallback');
expect(result.model).toBe('glm-4.7-flashx');
expect(result.text).toContain('AI SDK fallback mode is active');
expect(warn).toHaveBeenCalledTimes(1);
});
it('keeps throwing unknown report workload errors', async () => {
it('keeps throwing unknown provider errors', async () => {
await expect(
runAiAnalysis('Analyze this filing', 'Use concise style', {
env: {
@@ -277,4 +225,21 @@ describe('ai config and runtime', () => {
})
).rejects.toThrow('unexpected schema mismatch');
});
it('returns extraction config with same zhipu model and zero temperature', () => {
const config = getExtractionAiConfig({
env: {
ZHIPU_API_KEY: 'new-key',
ZHIPU_MODEL: 'glm-4-plus',
AI_TEMPERATURE: '0.9'
},
warn: () => {}
});
expect(config.provider).toBe('zhipu');
expect(config.apiKey).toBe('new-key');
expect(config.baseUrl).toBe(CODING_API_BASE_URL);
expect(config.model).toBe('glm-4-plus');
expect(config.temperature).toBe(0);
});
});

View File

@@ -1,9 +1,8 @@
import { createOpenAI } from '@ai-sdk/openai';
import { generateText } from 'ai';
import { createZhipu } from 'zhipu-ai-provider';
type AiWorkload = 'report' | 'extraction';
type AiProvider = 'zhipu' | 'ollama';
type AiProvider = 'zhipu';
type AiConfig = {
provider: AiProvider;
@@ -39,9 +38,6 @@ type RunAiAnalysisOptions = GetAiConfigOptions & {
};
const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4';
const OLLAMA_BASE_URL = 'http://127.0.0.1:11434';
const OLLAMA_MODEL = 'qwen3:8b';
const OLLAMA_API_KEY = 'ollama';
let warnedIgnoredZhipuBaseUrl = false;
@@ -80,128 +76,13 @@ function warnIgnoredZhipuBaseUrl(env: EnvSource, warn: (message: string) => void
);
}
function fallbackResponse(prompt: string) {
const clipped = prompt.split('\n').slice(0, 6).join(' ').slice(0, 260);
return [
'AI SDK fallback mode is active (live model configuration is missing or unavailable).',
'Thesis: Portfolio remains analyzable with local heuristics until live model access is configured.',
'Risk scan: Concentration and filing sentiment should be monitored after each sync cycle.',
`Context digest: ${clipped}`
].join('\n\n');
}
function toOpenAiCompatibleBaseUrl(baseUrl: string) {
const normalized = baseUrl.endsWith('/')
? baseUrl.slice(0, -1)
: baseUrl;
return normalized.endsWith('/v1')
? normalized
: `${normalized}/v1`;
}
function asErrorMessage(error: unknown) {
if (error instanceof Error && error.message) {
return error.message;
}
return String(error);
}
function errorSearchText(error: unknown) {
const chunks: string[] = [];
const seen = new Set<unknown>();
const visit = (value: unknown) => {
if (value === null || value === undefined) {
return;
}
if (typeof value === 'string') {
const normalized = value.trim();
if (normalized.length > 0) {
chunks.push(normalized);
}
return;
}
if (typeof value !== 'object') {
chunks.push(String(value));
return;
}
if (seen.has(value)) {
return;
}
seen.add(value);
if (value instanceof Error) {
if (value.message) {
chunks.push(value.message);
}
const withCause = value as Error & { cause?: unknown };
if (withCause.cause !== undefined) {
visit(withCause.cause);
}
return;
}
const record = value as Record<string, unknown>;
visit(record.message);
visit(record.error);
visit(record.reason);
visit(record.detail);
visit(record.details);
visit(record.cause);
};
visit(error);
return chunks.join('\n');
}
const REPORT_FALLBACK_ERROR_PATTERNS: RegExp[] = [
/insufficient balance/i,
/no resource package/i,
/insufficient quota/i,
/quota exceeded/i,
/insufficient credit/i,
/invalid api key/i,
/authentication/i,
/unauthorized/i,
/forbidden/i,
/payment required/i,
/recharge/i,
/unable to connect/i,
/network/i,
/timeout/i,
/timed out/i,
/econnrefused/i
];
function shouldFallbackReportError(error: unknown) {
const searchText = errorSearchText(error) || asErrorMessage(error);
return REPORT_FALLBACK_ERROR_PATTERNS.some((pattern) => pattern.test(searchText));
}
function defaultCreateModel(config: AiConfig) {
if (config.provider === 'zhipu') {
const zhipu = createZhipu({
apiKey: config.apiKey,
baseURL: config.baseUrl
});
return zhipu(config.model);
}
const openai = createOpenAI({
apiKey: config.apiKey ?? OLLAMA_API_KEY,
baseURL: toOpenAiCompatibleBaseUrl(config.baseUrl)
const zhipu = createZhipu({
apiKey: config.apiKey,
baseURL: config.baseUrl
});
return openai.chat(config.model);
return zhipu(config.model);
}
async function defaultGenerate(input: AiGenerateInput): Promise<AiGenerateOutput> {
@@ -228,21 +109,16 @@ export function getReportAiConfig(options?: GetAiConfigOptions) {
provider: 'zhipu',
apiKey: envValue('ZHIPU_API_KEY', env),
baseUrl: CODING_API_BASE_URL,
model: envValue('ZHIPU_MODEL', env) ?? 'glm-4.7-flashx',
model: envValue('ZHIPU_MODEL', env) ?? 'glm-5',
temperature: parseTemperature(envValue('AI_TEMPERATURE', env))
} satisfies AiConfig;
}
export function getExtractionAiConfig(options?: GetAiConfigOptions) {
const env = options?.env ?? process.env;
return {
provider: 'ollama',
apiKey: envValue('OLLAMA_API_KEY', env) ?? OLLAMA_API_KEY,
baseUrl: envValue('OLLAMA_BASE_URL', env) ?? OLLAMA_BASE_URL,
model: envValue('OLLAMA_MODEL', env) ?? OLLAMA_MODEL,
...getReportAiConfig(options),
temperature: 0
} satisfies AiConfig;
};
}
export function isAiConfigured(options?: GetAiConfigOptions) {
@@ -256,70 +132,32 @@ export async function runAiAnalysis(prompt: string, systemPrompt?: string, optio
? getExtractionAiConfig(options)
: getReportAiConfig(options);
if (workload === 'report' && !config.apiKey) {
return {
provider: 'local-fallback',
model: config.model,
text: fallbackResponse(prompt)
};
if (!config.apiKey) {
throw new Error('ZHIPU_API_KEY is required for AI workloads');
}
const createModel = options?.createModel ?? defaultCreateModel;
const generate = options?.generate ?? defaultGenerate;
const warn = options?.warn ?? console.warn;
const model = createModel(config);
try {
const model = createModel(config);
const result = await generate({
model,
system: systemPrompt,
prompt,
temperature: config.temperature,
maxRetries: 0
});
const result = await generate({
model,
system: systemPrompt,
prompt,
temperature: config.temperature,
maxRetries: 0
});
const text = result.text.trim();
if (!text) {
if (workload === 'extraction') {
return {
provider: 'local-fallback',
model: config.model,
text: fallbackResponse(prompt)
};
}
throw new Error('AI SDK returned an empty response');
}
return {
provider: config.provider,
model: config.model,
text
};
} catch (error) {
if (workload === 'report' && shouldFallbackReportError(error)) {
warn(`[AI SDK] Report fallback activated: ${asErrorMessage(error)}`);
return {
provider: 'local-fallback',
model: config.model,
text: fallbackResponse(prompt)
};
}
if (workload === 'extraction') {
warn(`[AI SDK] Extraction fallback activated: ${asErrorMessage(error)}`);
return {
provider: 'local-fallback',
model: config.model,
text: fallbackResponse(prompt)
};
}
throw error;
const text = result.text.trim();
if (!text) {
throw new Error('AI SDK returned an empty response');
}
return {
provider: config.provider,
model: config.model,
text
};
}
export function __resetAiWarningsForTests() {

View File

@@ -32,8 +32,8 @@ function filingWithExtraction(): Filing {
confidence: 0.4
},
extractionMeta: {
provider: 'ollama',
model: 'qwen3:8b',
provider: 'zhipu',
model: 'glm-4.7-flashx',
source: 'primary_document',
generatedAt: '2026-02-01T00:00:00.000Z'
}

View File

@@ -689,50 +689,39 @@ async function processAnalyzeFiling(task: Task) {
source: 'metadata_fallback',
generatedAt: new Date().toISOString()
};
let filingDocument: Awaited<ReturnType<typeof fetchPrimaryFilingText>> | null = null;
try {
await setProjectionStage(task, 'analyze.fetch_document', 'Fetching primary filing document');
const filingDocument = await fetchPrimaryFilingText({
filingDocument = await fetchPrimaryFilingText({
filingUrl: filing.filing_url,
cik: filing.cik,
accessionNumber: filing.accession_number,
primaryDocument: filing.primary_document ?? null
});
if (filingDocument?.text) {
await setProjectionStage(task, 'analyze.extract', 'Generating extraction context from filing text');
const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text);
extraction = ruleBasedExtraction;
extractionMeta = {
provider: 'deterministic-fallback',
model: 'filing-rule-based',
source: filingDocument.source,
generatedAt: new Date().toISOString()
};
const extractionResult = await runAiAnalysis(
extractionPrompt(filing, filingDocument.text),
'Return strict JSON only.',
{ workload: 'extraction' }
);
const parsed = parseExtractionPayload(extractionResult.text);
if (parsed) {
extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction);
extractionMeta = {
provider: extractionResult.provider === 'local-fallback' ? 'deterministic-fallback' : 'ollama',
model: extractionResult.model,
source: filingDocument.source,
generatedAt: new Date().toISOString()
};
}
}
} catch {
extraction = defaultExtraction;
filingDocument = null;
}
if (filingDocument?.text) {
await setProjectionStage(task, 'analyze.extract', 'Generating extraction context from filing text');
const ruleBasedExtraction = buildRuleBasedExtraction(filing, filingDocument.text);
const extractionResult = await runAiAnalysis(
extractionPrompt(filing, filingDocument.text),
'Return strict JSON only.',
{ workload: 'extraction' }
);
const parsed = parseExtractionPayload(extractionResult.text);
if (!parsed) {
throw new Error('Extraction output invalid JSON schema');
}
extraction = mergeExtractionWithFallback(parsed, ruleBasedExtraction);
extractionMeta = {
provider: 'deterministic-fallback',
model: 'metadata-fallback',
source: 'metadata_fallback',
provider: 'zhipu',
model: extractionResult.model,
source: filingDocument.source,
generatedAt: new Date().toISOString()
};
}