Add search and RAG workspace flows
This commit is contained in:
51
lib/api.ts
51
lib/api.ts
@@ -14,6 +14,9 @@ import type {
|
||||
PortfolioSummary,
|
||||
ResearchJournalEntry,
|
||||
ResearchJournalEntryType,
|
||||
SearchAnswerResponse,
|
||||
SearchResult,
|
||||
SearchSource,
|
||||
Task,
|
||||
TaskStatus,
|
||||
TaskTimeline,
|
||||
@@ -295,6 +298,54 @@ export async function listFilings(query?: { ticker?: string; limit?: number }) {
|
||||
return await unwrapData<{ filings: Filing[] }>(result, 'Unable to fetch filings');
|
||||
}
|
||||
|
||||
export async function searchKnowledge(input: {
|
||||
query: string;
|
||||
ticker?: string;
|
||||
sources?: SearchSource[];
|
||||
limit?: number;
|
||||
}) {
|
||||
const result = await client.api.search.get({
|
||||
$query: {
|
||||
q: input.query.trim(),
|
||||
...(input.ticker?.trim()
|
||||
? { ticker: input.ticker.trim().toUpperCase() }
|
||||
: {}),
|
||||
...(input.sources && input.sources.length > 0
|
||||
? { sources: input.sources }
|
||||
: {}),
|
||||
...(typeof input.limit === 'number'
|
||||
? { limit: input.limit }
|
||||
: {})
|
||||
}
|
||||
});
|
||||
|
||||
return await unwrapData<{ results: SearchResult[] }>(result, 'Unable to search indexed sources');
|
||||
}
|
||||
|
||||
export async function getSearchAnswer(input: {
|
||||
query: string;
|
||||
ticker?: string;
|
||||
sources?: SearchSource[];
|
||||
limit?: number;
|
||||
}) {
|
||||
return await requestJson<SearchAnswerResponse>({
|
||||
path: '/api/search/answer',
|
||||
method: 'POST',
|
||||
body: {
|
||||
query: input.query.trim(),
|
||||
...(input.ticker?.trim()
|
||||
? { ticker: input.ticker.trim().toUpperCase() }
|
||||
: {}),
|
||||
...(input.sources && input.sources.length > 0
|
||||
? { sources: input.sources }
|
||||
: {}),
|
||||
...(typeof input.limit === 'number'
|
||||
? { limit: input.limit }
|
||||
: {})
|
||||
}
|
||||
}, 'Unable to generate cited answer');
|
||||
}
|
||||
|
||||
export async function getCompanyAnalysis(ticker: string) {
|
||||
const result = await client.api.analysis.company.get({
|
||||
$query: {
|
||||
|
||||
@@ -12,6 +12,7 @@ export const queryKeys = {
|
||||
limit: number
|
||||
) => ['financials-v3', ticker, surfaceKind, cadence, includeDimensions ? 'dims' : 'no-dims', includeFacts ? 'facts' : 'rows', factsCursor ?? '', factsLimit, cursor ?? '', limit] as const,
|
||||
filings: (ticker: string | null, limit: number) => ['filings', ticker ?? '', limit] as const,
|
||||
search: (query: string, ticker: string | null, sources: string[], limit: number) => ['search', query, ticker ?? '', sources.join(','), limit] as const,
|
||||
report: (accessionNumber: string) => ['report', accessionNumber] as const,
|
||||
watchlist: () => ['watchlist'] as const,
|
||||
researchJournal: (ticker: string) => ['research', 'journal', ticker] as const,
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
getCompanyFinancialStatements,
|
||||
getLatestPortfolioInsight,
|
||||
getPortfolioSummary,
|
||||
searchKnowledge,
|
||||
getTask,
|
||||
getTaskTimeline,
|
||||
listFilings,
|
||||
@@ -16,7 +17,8 @@ import {
|
||||
import { queryKeys } from '@/lib/query/keys';
|
||||
import type {
|
||||
FinancialCadence,
|
||||
FinancialSurfaceKind
|
||||
FinancialSurfaceKind,
|
||||
SearchSource
|
||||
} from '@/lib/types';
|
||||
|
||||
export function companyAnalysisQueryOptions(ticker: string) {
|
||||
@@ -86,6 +88,31 @@ export function filingsQueryOptions(input: { ticker?: string; limit?: number } =
|
||||
});
|
||||
}
|
||||
|
||||
export function searchQueryOptions(input: {
|
||||
query: string;
|
||||
ticker?: string | null;
|
||||
sources?: SearchSource[];
|
||||
limit?: number;
|
||||
}) {
|
||||
const normalizedQuery = input.query.trim();
|
||||
const normalizedTicker = input.ticker?.trim().toUpperCase() ?? null;
|
||||
const sources = input.sources && input.sources.length > 0
|
||||
? [...new Set(input.sources)]
|
||||
: ['documents', 'filings', 'research'] as SearchSource[];
|
||||
const limit = input.limit ?? 10;
|
||||
|
||||
return queryOptions({
|
||||
queryKey: queryKeys.search(normalizedQuery, normalizedTicker, sources, limit),
|
||||
queryFn: () => searchKnowledge({
|
||||
query: normalizedQuery,
|
||||
ticker: normalizedTicker ?? undefined,
|
||||
sources,
|
||||
limit
|
||||
}),
|
||||
staleTime: 30_000
|
||||
});
|
||||
}
|
||||
|
||||
export function aiReportQueryOptions(accessionNumber: string) {
|
||||
const normalizedAccession = accessionNumber.trim();
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { generateText } from 'ai';
|
||||
import { embedMany, generateText } from 'ai';
|
||||
import { createZhipu } from 'zhipu-ai-provider';
|
||||
|
||||
type AiWorkload = 'report' | 'extraction';
|
||||
@@ -31,13 +31,35 @@ type AiGenerateOutput = {
|
||||
text: string;
|
||||
};
|
||||
|
||||
type AiEmbedOutput = {
|
||||
embeddings: number[][];
|
||||
};
|
||||
|
||||
type RunAiAnalysisOptions = GetAiConfigOptions & {
|
||||
workload?: AiWorkload;
|
||||
createModel?: (config: AiConfig) => unknown;
|
||||
generate?: (input: AiGenerateInput) => Promise<AiGenerateOutput>;
|
||||
};
|
||||
|
||||
type EmbeddingConfig = {
|
||||
provider: AiProvider;
|
||||
apiKey?: string;
|
||||
baseUrl: string;
|
||||
model: 'embedding-3';
|
||||
dimensions: 256;
|
||||
};
|
||||
|
||||
type RunAiEmbeddingsOptions = GetAiConfigOptions & {
|
||||
createModel?: (config: EmbeddingConfig) => unknown;
|
||||
embed?: (input: {
|
||||
model: unknown;
|
||||
values: string[];
|
||||
}) => Promise<AiEmbedOutput>;
|
||||
};
|
||||
|
||||
const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4';
|
||||
const SEARCH_EMBEDDING_MODEL = 'embedding-3';
|
||||
const SEARCH_EMBEDDING_DIMENSIONS = 256;
|
||||
|
||||
let warnedIgnoredZhipuBaseUrl = false;
|
||||
|
||||
@@ -97,6 +119,30 @@ async function defaultGenerate(input: AiGenerateInput): Promise<AiGenerateOutput
|
||||
return { text: result.text };
|
||||
}
|
||||
|
||||
function defaultCreateEmbeddingModel(config: EmbeddingConfig) {
|
||||
const zhipu = createZhipu({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: config.baseUrl
|
||||
});
|
||||
|
||||
return zhipu.textEmbeddingModel(config.model, {
|
||||
dimensions: config.dimensions
|
||||
});
|
||||
}
|
||||
|
||||
async function defaultEmbed(input: {
|
||||
model: unknown;
|
||||
values: string[];
|
||||
}): Promise<AiEmbedOutput> {
|
||||
const result = await embedMany({
|
||||
model: input.model as never,
|
||||
values: input.values,
|
||||
maxRetries: 0
|
||||
});
|
||||
|
||||
return { embeddings: result.embeddings as number[][] };
|
||||
}
|
||||
|
||||
export function getAiConfig(options?: GetAiConfigOptions) {
|
||||
return getReportAiConfig(options);
|
||||
}
|
||||
@@ -121,6 +167,19 @@ export function getExtractionAiConfig(options?: GetAiConfigOptions) {
|
||||
};
|
||||
}
|
||||
|
||||
export function getEmbeddingAiConfig(options?: GetAiConfigOptions) {
|
||||
const env = options?.env ?? process.env;
|
||||
warnIgnoredZhipuBaseUrl(env, options?.warn ?? console.warn);
|
||||
|
||||
return {
|
||||
provider: 'zhipu',
|
||||
apiKey: envValue('ZHIPU_API_KEY', env),
|
||||
baseUrl: CODING_API_BASE_URL,
|
||||
model: SEARCH_EMBEDDING_MODEL,
|
||||
dimensions: SEARCH_EMBEDDING_DIMENSIONS
|
||||
} satisfies EmbeddingConfig;
|
||||
}
|
||||
|
||||
export function isAiConfigured(options?: GetAiConfigOptions) {
|
||||
const config = getReportAiConfig(options);
|
||||
return Boolean(config.apiKey);
|
||||
@@ -160,6 +219,31 @@ export async function runAiAnalysis(prompt: string, systemPrompt?: string, optio
|
||||
};
|
||||
}
|
||||
|
||||
export async function runAiEmbeddings(values: string[], options?: RunAiEmbeddingsOptions) {
|
||||
const sanitizedValues = values
|
||||
.map((value) => value.trim())
|
||||
.filter((value) => value.length > 0);
|
||||
|
||||
if (sanitizedValues.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const config = getEmbeddingAiConfig(options);
|
||||
if (!config.apiKey) {
|
||||
throw new Error('ZHIPU_API_KEY is required for AI workloads');
|
||||
}
|
||||
|
||||
const createModel = options?.createModel ?? defaultCreateEmbeddingModel;
|
||||
const embed = options?.embed ?? defaultEmbed;
|
||||
const model = createModel(config);
|
||||
const result = await embed({
|
||||
model,
|
||||
values: sanitizedValues
|
||||
});
|
||||
|
||||
return result.embeddings.map((embedding) => embedding.map((value) => Number(value)));
|
||||
}
|
||||
|
||||
export function __resetAiWarningsForTests() {
|
||||
warnedIgnoredZhipuBaseUrl = false;
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import type {
|
||||
FinancialStatementKind,
|
||||
FinancialSurfaceKind,
|
||||
ResearchJournalEntryType,
|
||||
SearchSource,
|
||||
TaskStatus
|
||||
} from '@/lib/types';
|
||||
import { auth } from '@/lib/auth';
|
||||
@@ -48,6 +49,7 @@ import {
|
||||
upsertWatchlistItemRecord
|
||||
} from '@/lib/server/repos/watchlist';
|
||||
import { getPriceHistory, getQuote } from '@/lib/server/prices';
|
||||
import { answerSearchQuery, searchKnowledgeBase } from '@/lib/server/search';
|
||||
import {
|
||||
enqueueTask,
|
||||
findInFlightTask,
|
||||
@@ -82,6 +84,7 @@ const FINANCIAL_SURFACES: FinancialSurfaceKind[] = [
|
||||
const COVERAGE_STATUSES: CoverageStatus[] = ['backlog', 'active', 'watch', 'archive'];
|
||||
const COVERAGE_PRIORITIES: CoveragePriority[] = ['low', 'medium', 'high'];
|
||||
const JOURNAL_ENTRY_TYPES: ResearchJournalEntryType[] = ['note', 'filing_note', 'status_change'];
|
||||
const SEARCH_SOURCES: SearchSource[] = ['documents', 'filings', 'research'];
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
@@ -205,6 +208,21 @@ function asJournalEntryType(value: unknown) {
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function asSearchSources(value: unknown) {
|
||||
const raw = Array.isArray(value)
|
||||
? value
|
||||
: typeof value === 'string'
|
||||
? value.split(',')
|
||||
: [];
|
||||
|
||||
const normalized = raw
|
||||
.filter((entry): entry is string => typeof entry === 'string')
|
||||
.map((entry) => entry.trim().toLowerCase())
|
||||
.filter((entry): entry is SearchSource => SEARCH_SOURCES.includes(entry as SearchSource));
|
||||
|
||||
return normalized.length > 0 ? [...new Set(normalized)] : undefined;
|
||||
}
|
||||
|
||||
function formatLabel(value: string) {
|
||||
return value
|
||||
.split('_')
|
||||
@@ -763,6 +781,21 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
});
|
||||
|
||||
await updateWatchlistReviewByTicker(session.user.id, ticker, entry.updated_at);
|
||||
try {
|
||||
await enqueueTask({
|
||||
userId: session.user.id,
|
||||
taskType: 'index_search',
|
||||
payload: {
|
||||
ticker: entry.ticker,
|
||||
journalEntryId: entry.id,
|
||||
sourceKinds: ['research_note']
|
||||
},
|
||||
priority: 52,
|
||||
resourceKey: `index_search:research_note:${session.user.id}:${entry.id}`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[search-index-journal-create] failed:', error);
|
||||
}
|
||||
|
||||
return Response.json({ entry });
|
||||
} catch (error) {
|
||||
@@ -800,6 +833,21 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
}
|
||||
|
||||
await updateWatchlistReviewByTicker(session.user.id, entry.ticker, entry.updated_at);
|
||||
try {
|
||||
await enqueueTask({
|
||||
userId: session.user.id,
|
||||
taskType: 'index_search',
|
||||
payload: {
|
||||
ticker: entry.ticker,
|
||||
journalEntryId: entry.id,
|
||||
sourceKinds: ['research_note']
|
||||
},
|
||||
priority: 52,
|
||||
resourceKey: `index_search:research_note:${session.user.id}:${entry.id}`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[search-index-journal-update] failed:', error);
|
||||
}
|
||||
|
||||
return Response.json({ entry });
|
||||
} catch (error) {
|
||||
@@ -822,6 +870,25 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
return jsonError('Journal entry not found', 404);
|
||||
}
|
||||
|
||||
try {
|
||||
await enqueueTask({
|
||||
userId: session.user.id,
|
||||
taskType: 'index_search',
|
||||
payload: {
|
||||
deleteSourceRefs: [{
|
||||
sourceKind: 'research_note',
|
||||
sourceRef: String(numericId),
|
||||
scope: 'user',
|
||||
userId: session.user.id
|
||||
}]
|
||||
},
|
||||
priority: 52,
|
||||
resourceKey: `index_search:research_note:${session.user.id}:${numericId}:delete`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[search-index-journal-delete] failed:', error);
|
||||
}
|
||||
|
||||
return Response.json({ success: true });
|
||||
}, {
|
||||
params: t.Object({
|
||||
@@ -1124,6 +1191,63 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
limit: t.Optional(t.Numeric())
|
||||
})
|
||||
})
|
||||
.get('/search', async ({ query }) => {
|
||||
const { session, response } = await requireAuthenticatedSession();
|
||||
if (response) {
|
||||
return response;
|
||||
}
|
||||
|
||||
const q = typeof query.q === 'string' ? query.q.trim() : '';
|
||||
if (q.length < 2) {
|
||||
return jsonError('q is required', 400);
|
||||
}
|
||||
|
||||
const results = await searchKnowledgeBase({
|
||||
userId: session.user.id,
|
||||
query: q,
|
||||
ticker: asOptionalString(query.ticker),
|
||||
sources: asSearchSources(query.sources),
|
||||
limit: typeof query.limit === 'number' ? query.limit : Number(query.limit)
|
||||
});
|
||||
|
||||
return Response.json({ results });
|
||||
}, {
|
||||
query: t.Object({
|
||||
q: t.String({ minLength: 2 }),
|
||||
ticker: t.Optional(t.String()),
|
||||
sources: t.Optional(t.Union([t.String(), t.Array(t.String())])),
|
||||
limit: t.Optional(t.Numeric())
|
||||
})
|
||||
})
|
||||
.post('/search/answer', async ({ body }) => {
|
||||
const { session, response } = await requireAuthenticatedSession();
|
||||
if (response) {
|
||||
return response;
|
||||
}
|
||||
|
||||
const payload = asRecord(body);
|
||||
const query = typeof payload.query === 'string' ? payload.query.trim() : '';
|
||||
if (query.length < 2) {
|
||||
return jsonError('query is required', 400);
|
||||
}
|
||||
|
||||
const answer = await answerSearchQuery({
|
||||
userId: session.user.id,
|
||||
query,
|
||||
ticker: asOptionalString(payload.ticker),
|
||||
sources: asSearchSources(payload.sources),
|
||||
limit: asPositiveNumber(payload.limit) ?? undefined
|
||||
});
|
||||
|
||||
return Response.json(answer);
|
||||
}, {
|
||||
body: t.Object({
|
||||
query: t.String({ minLength: 2 }),
|
||||
ticker: t.Optional(t.String()),
|
||||
sources: t.Optional(t.Union([t.String(), t.Array(t.String())])),
|
||||
limit: t.Optional(t.Numeric())
|
||||
})
|
||||
})
|
||||
.post('/filings/sync', async ({ body }) => {
|
||||
const { session, response } = await requireAuthenticatedSession();
|
||||
if (response) {
|
||||
|
||||
@@ -37,6 +37,14 @@ describe('sqlite schema compatibility bootstrap', () => {
|
||||
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'filing_taxonomy_fact')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'research_journal_entry')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'search_document')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'search_chunk')).toBe(true);
|
||||
|
||||
__dbInternals.loadSqliteExtensions(client);
|
||||
__dbInternals.ensureSearchVirtualTables(client);
|
||||
|
||||
expect(__dbInternals.hasTable(client, 'search_chunk_fts')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'search_chunk_vec')).toBe(true);
|
||||
|
||||
client.close();
|
||||
});
|
||||
|
||||
@@ -2,6 +2,7 @@ import { mkdirSync, readFileSync } from 'node:fs';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { drizzle } from 'drizzle-orm/bun-sqlite';
|
||||
import { load as loadSqliteVec } from 'sqlite-vec';
|
||||
import { schema } from './schema';
|
||||
|
||||
type AppDrizzleDb = ReturnType<typeof createDb>;
|
||||
@@ -50,6 +51,45 @@ function applySqlFile(client: Database, fileName: string) {
|
||||
client.exec(sql);
|
||||
}
|
||||
|
||||
let customSqliteConfigured = false;
|
||||
const vectorExtensionStatus = new WeakMap<Database, boolean>();
|
||||
|
||||
function configureCustomSqliteRuntime() {
|
||||
if (customSqliteConfigured) {
|
||||
return;
|
||||
}
|
||||
|
||||
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
|
||||
if (process.platform === 'darwin' && customSqlitePath) {
|
||||
Database.setCustomSQLite(customSqlitePath);
|
||||
}
|
||||
|
||||
customSqliteConfigured = true;
|
||||
}
|
||||
|
||||
function loadSqliteExtensions(client: Database) {
|
||||
try {
|
||||
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
|
||||
|
||||
if (customVectorExtensionPath) {
|
||||
client.loadExtension(customVectorExtensionPath);
|
||||
} else {
|
||||
loadSqliteVec(client);
|
||||
}
|
||||
|
||||
vectorExtensionStatus.set(client, true);
|
||||
} catch (error) {
|
||||
vectorExtensionStatus.set(client, false);
|
||||
|
||||
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
|
||||
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
|
||||
}
|
||||
}
|
||||
|
||||
function isVectorExtensionLoaded(client: Database) {
|
||||
return vectorExtensionStatus.get(client) ?? false;
|
||||
}
|
||||
|
||||
function ensureLocalSqliteSchema(client: Database) {
|
||||
if (!hasTable(client, 'filing_statement_snapshot')) {
|
||||
applySqlFile(client, '0001_glossy_statement_snapshots.sql');
|
||||
@@ -142,10 +182,70 @@ function ensureLocalSqliteSchema(client: Database) {
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);');
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);');
|
||||
}
|
||||
|
||||
if (!hasTable(client, 'search_document')) {
|
||||
applySqlFile(client, '0008_search_rag.sql');
|
||||
}
|
||||
}
|
||||
|
||||
function ensureSearchVirtualTables(client: Database) {
|
||||
client.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
|
||||
\`chunk_text\`,
|
||||
\`citation_label\`,
|
||||
\`heading_path\`,
|
||||
\`chunk_id\` UNINDEXED,
|
||||
\`document_id\` UNINDEXED,
|
||||
\`chunk_index\` UNINDEXED,
|
||||
\`scope\` UNINDEXED,
|
||||
\`user_id\` UNINDEXED,
|
||||
\`source_kind\` UNINDEXED,
|
||||
\`ticker\` UNINDEXED,
|
||||
\`accession_number\` UNINDEXED,
|
||||
\`filing_date\` UNINDEXED
|
||||
);
|
||||
`);
|
||||
|
||||
if (isVectorExtensionLoaded(client)) {
|
||||
client.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
|
||||
\`chunk_id\` integer PRIMARY KEY,
|
||||
\`embedding\` float[256],
|
||||
\`scope\` text,
|
||||
\`user_id\` text,
|
||||
\`source_kind\` text,
|
||||
\`ticker\` text,
|
||||
\`accession_number\` text,
|
||||
\`filing_date\` text,
|
||||
+\`document_id\` integer,
|
||||
+\`chunk_index\` integer,
|
||||
+\`citation_label\` text
|
||||
);
|
||||
`);
|
||||
return;
|
||||
}
|
||||
|
||||
client.exec(`
|
||||
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
|
||||
\`chunk_id\` integer PRIMARY KEY NOT NULL,
|
||||
\`embedding\` text NOT NULL,
|
||||
\`scope\` text NOT NULL,
|
||||
\`user_id\` text,
|
||||
\`source_kind\` text NOT NULL,
|
||||
\`ticker\` text,
|
||||
\`accession_number\` text,
|
||||
\`filing_date\` text,
|
||||
\`document_id\` integer NOT NULL,
|
||||
\`chunk_index\` integer NOT NULL,
|
||||
\`citation_label\` text NOT NULL
|
||||
);
|
||||
`);
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
|
||||
}
|
||||
|
||||
export function getSqliteClient() {
|
||||
if (!globalThis.__fiscalSqliteClient) {
|
||||
configureCustomSqliteRuntime();
|
||||
const databasePath = getDatabasePath();
|
||||
|
||||
if (databasePath !== ':memory:') {
|
||||
@@ -156,7 +256,9 @@ export function getSqliteClient() {
|
||||
client.exec('PRAGMA foreign_keys = ON;');
|
||||
client.exec('PRAGMA journal_mode = WAL;');
|
||||
client.exec('PRAGMA busy_timeout = 5000;');
|
||||
loadSqliteExtensions(client);
|
||||
ensureLocalSqliteSchema(client);
|
||||
ensureSearchVirtualTables(client);
|
||||
|
||||
globalThis.__fiscalSqliteClient = client;
|
||||
}
|
||||
@@ -175,8 +277,12 @@ if (!globalThis.__fiscalDrizzleDb) {
|
||||
}
|
||||
|
||||
export const __dbInternals = {
|
||||
configureCustomSqliteRuntime,
|
||||
ensureLocalSqliteSchema,
|
||||
ensureSearchVirtualTables,
|
||||
getDatabasePath,
|
||||
hasColumn,
|
||||
hasTable
|
||||
hasTable,
|
||||
isVectorExtensionLoaded,
|
||||
loadSqliteExtensions
|
||||
};
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { sql } from 'drizzle-orm';
|
||||
import {
|
||||
index,
|
||||
integer,
|
||||
@@ -31,6 +32,9 @@ type CoverageStatus = 'backlog' | 'active' | 'watch' | 'archive';
|
||||
type CoveragePriority = 'low' | 'medium' | 'high';
|
||||
type ResearchJournalEntryType = 'note' | 'filing_note' | 'status_change';
|
||||
type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
|
||||
type SearchDocumentScope = 'global' | 'user';
|
||||
type SearchDocumentSourceKind = 'filing_document' | 'filing_brief' | 'research_note';
|
||||
type SearchIndexStatus = 'pending' | 'indexed' | 'failed';
|
||||
type FinancialSurfaceKind =
|
||||
| 'income_statement'
|
||||
| 'balance_sheet'
|
||||
@@ -500,7 +504,7 @@ export const filingLink = sqliteTable('filing_link', {
|
||||
export const taskRun = sqliteTable('task_run', {
|
||||
id: text('id').primaryKey().notNull(),
|
||||
user_id: text('user_id').notNull().references(() => user.id, { onDelete: 'cascade' }),
|
||||
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights'>().notNull(),
|
||||
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights' | 'index_search'>().notNull(),
|
||||
status: text('status').$type<'queued' | 'running' | 'completed' | 'failed'>().notNull(),
|
||||
stage: text('stage').notNull(),
|
||||
stage_detail: text('stage_detail'),
|
||||
@@ -570,6 +574,55 @@ export const researchJournalEntry = sqliteTable('research_journal_entry', {
|
||||
researchJournalAccessionIndex: index('research_journal_accession_idx').on(table.user_id, table.accession_number)
|
||||
}));
|
||||
|
||||
export const searchDocument = sqliteTable('search_document', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
source_kind: text('source_kind').$type<SearchDocumentSourceKind>().notNull(),
|
||||
source_ref: text('source_ref').notNull(),
|
||||
scope: text('scope').$type<SearchDocumentScope>().notNull(),
|
||||
user_id: text('user_id').references(() => user.id, { onDelete: 'cascade' }),
|
||||
ticker: text('ticker'),
|
||||
accession_number: text('accession_number'),
|
||||
title: text('title'),
|
||||
content_text: text('content_text').notNull(),
|
||||
content_hash: text('content_hash').notNull(),
|
||||
metadata: text('metadata', { mode: 'json' }).$type<Record<string, unknown> | null>(),
|
||||
index_status: text('index_status').$type<SearchIndexStatus>().notNull(),
|
||||
indexed_at: text('indexed_at'),
|
||||
last_error: text('last_error'),
|
||||
created_at: text('created_at').notNull(),
|
||||
updated_at: text('updated_at').notNull()
|
||||
}, (table) => ({
|
||||
searchDocumentSourceUnique: uniqueIndex('search_document_source_uidx').on(
|
||||
table.scope,
|
||||
sql`ifnull(${table.user_id}, '')`,
|
||||
table.source_kind,
|
||||
table.source_ref
|
||||
),
|
||||
searchDocumentScopeIndex: index('search_document_scope_idx').on(
|
||||
table.scope,
|
||||
table.source_kind,
|
||||
table.ticker,
|
||||
table.updated_at
|
||||
),
|
||||
searchDocumentAccessionIndex: index('search_document_accession_idx').on(table.accession_number, table.source_kind)
|
||||
}));
|
||||
|
||||
export const searchChunk = sqliteTable('search_chunk', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
document_id: integer('document_id').notNull().references(() => searchDocument.id, { onDelete: 'cascade' }),
|
||||
chunk_index: integer('chunk_index').notNull(),
|
||||
chunk_text: text('chunk_text').notNull(),
|
||||
char_count: integer('char_count').notNull(),
|
||||
start_offset: integer('start_offset').notNull(),
|
||||
end_offset: integer('end_offset').notNull(),
|
||||
heading_path: text('heading_path'),
|
||||
citation_label: text('citation_label').notNull(),
|
||||
created_at: text('created_at').notNull()
|
||||
}, (table) => ({
|
||||
searchChunkUnique: uniqueIndex('search_chunk_document_chunk_uidx').on(table.document_id, table.chunk_index),
|
||||
searchChunkDocumentIndex: index('search_chunk_document_idx').on(table.document_id)
|
||||
}));
|
||||
|
||||
export const authSchema = {
|
||||
user,
|
||||
session,
|
||||
@@ -595,7 +648,9 @@ export const appSchema = {
|
||||
taskRun,
|
||||
taskStageEvent,
|
||||
portfolioInsight,
|
||||
researchJournalEntry
|
||||
researchJournalEntry,
|
||||
searchDocument,
|
||||
searchChunk
|
||||
};
|
||||
|
||||
export const schema = {
|
||||
|
||||
@@ -62,6 +62,28 @@ export async function listResearchJournalEntries(userId: string, ticker: string,
|
||||
return rows.map(toResearchJournalEntry);
|
||||
}
|
||||
|
||||
export async function listResearchJournalEntriesForUser(userId: string, limit = 250) {
|
||||
const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 500);
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(researchJournalEntry)
|
||||
.where(eq(researchJournalEntry.user_id, userId))
|
||||
.orderBy(desc(researchJournalEntry.updated_at), desc(researchJournalEntry.id))
|
||||
.limit(safeLimit);
|
||||
|
||||
return rows.map(toResearchJournalEntry);
|
||||
}
|
||||
|
||||
export async function getResearchJournalEntryRecord(userId: string, id: number) {
|
||||
const [row] = await db
|
||||
.select()
|
||||
.from(researchJournalEntry)
|
||||
.where(and(eq(researchJournalEntry.user_id, userId), eq(researchJournalEntry.id, id)))
|
||||
.limit(1);
|
||||
|
||||
return row ? toResearchJournalEntry(row) : null;
|
||||
}
|
||||
|
||||
export async function createResearchJournalEntryRecord(input: {
|
||||
userId: string;
|
||||
ticker: string;
|
||||
|
||||
217
lib/server/search.test.ts
Normal file
217
lib/server/search.test.ts
Normal file
@@ -0,0 +1,217 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { __dbInternals } from '@/lib/server/db';
|
||||
import { __searchInternals } from '@/lib/server/search';
|
||||
|
||||
function applyMigration(client: Database, fileName: string) {
|
||||
const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8');
|
||||
client.exec(sql);
|
||||
}
|
||||
|
||||
function createClient() {
|
||||
const client = new Database(':memory:');
|
||||
client.exec('PRAGMA foreign_keys = ON;');
|
||||
applyMigration(client, '0000_cold_silver_centurion.sql');
|
||||
applyMigration(client, '0001_glossy_statement_snapshots.sql');
|
||||
applyMigration(client, '0002_workflow_task_projection_metadata.sql');
|
||||
applyMigration(client, '0003_task_stage_event_timeline.sql');
|
||||
__dbInternals.loadSqliteExtensions(client);
|
||||
__dbInternals.ensureLocalSqliteSchema(client);
|
||||
__dbInternals.ensureSearchVirtualTables(client);
|
||||
return client;
|
||||
}
|
||||
|
||||
function insertUser(client: Database, id: string) {
|
||||
client.query(`
|
||||
INSERT INTO user (id, name, email, emailVerified, createdAt, updatedAt)
|
||||
VALUES (?, ?, ?, 1, 0, 0)
|
||||
`).run(id, id, `${id}@example.com`);
|
||||
}
|
||||
|
||||
function vector(first: number, second = 0) {
|
||||
const values = new Array(256).fill(0);
|
||||
values[0] = first;
|
||||
values[1] = second;
|
||||
return values;
|
||||
}
|
||||
|
||||
describe('search internals', () => {
|
||||
it('chunks research notes as a single chunk under the small-note threshold', () => {
|
||||
const chunks = __searchInternals.chunkDocument({
|
||||
sourceKind: 'research_note',
|
||||
sourceRef: '1',
|
||||
scope: 'user',
|
||||
userId: 'user-1',
|
||||
ticker: 'AMD',
|
||||
accessionNumber: null,
|
||||
filingDate: null,
|
||||
title: 'AMD note',
|
||||
contentText: 'A compact note about margins and channel inventory.',
|
||||
metadata: {}
|
||||
});
|
||||
|
||||
expect(chunks).toHaveLength(1);
|
||||
expect(chunks[0]?.chunkText).toContain('channel inventory');
|
||||
});
|
||||
|
||||
it('formats insufficient evidence when the answer cites nothing valid', () => {
|
||||
const finalized = __searchInternals.finalizeAnswer('This has no valid citations.', [{
|
||||
chunkId: 1,
|
||||
documentId: 1,
|
||||
source: 'filings',
|
||||
sourceKind: 'filing_brief',
|
||||
sourceRef: '0001',
|
||||
title: 'Brief',
|
||||
ticker: 'AMD',
|
||||
accessionNumber: '0001',
|
||||
filingDate: '2026-01-01',
|
||||
citationLabel: 'AMD · 0001 [1]',
|
||||
headingPath: null,
|
||||
chunkText: 'Revenue grew.',
|
||||
snippet: 'Revenue grew.',
|
||||
score: 0.2,
|
||||
vectorRank: 1,
|
||||
lexicalRank: 1,
|
||||
href: '/filings?ticker=AMD'
|
||||
}]);
|
||||
|
||||
expect(finalized.answer).toBe('Insufficient evidence to answer from the indexed sources.');
|
||||
expect(finalized.citations).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('persists vec/fts rows, skips unchanged content, and deletes synced rows together', () => {
|
||||
const client = createClient();
|
||||
const document = {
|
||||
sourceKind: 'filing_brief' as const,
|
||||
sourceRef: '0000320193-26-000001',
|
||||
scope: 'global' as const,
|
||||
userId: null,
|
||||
ticker: 'AAPL',
|
||||
accessionNumber: '0000320193-26-000001',
|
||||
filingDate: '2026-01-30',
|
||||
title: 'AAPL filing brief',
|
||||
contentText: 'Revenue remained resilient across products and services. Services margin expanded.',
|
||||
metadata: {
|
||||
filingDate: '2026-01-30',
|
||||
hasAnalysis: true
|
||||
}
|
||||
};
|
||||
|
||||
const chunks = __searchInternals.chunkDocument(document);
|
||||
const firstPersist = __searchInternals.persistDocumentIndex(
|
||||
client,
|
||||
document,
|
||||
chunks,
|
||||
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
|
||||
);
|
||||
|
||||
expect(firstPersist.indexed).toBe(true);
|
||||
expect(client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).toEqual({ count: 1 });
|
||||
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(chunks.length);
|
||||
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(chunks.length);
|
||||
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(chunks.length);
|
||||
|
||||
const secondPersist = __searchInternals.persistDocumentIndex(
|
||||
client,
|
||||
document,
|
||||
chunks,
|
||||
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
|
||||
);
|
||||
|
||||
expect(secondPersist.skipped).toBe(true);
|
||||
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(1);
|
||||
|
||||
const deleted = __searchInternals.deleteSourceRefs(client, [{
|
||||
sourceKind: 'filing_brief',
|
||||
sourceRef: document.sourceRef,
|
||||
scope: 'global'
|
||||
}]);
|
||||
|
||||
expect(deleted).toBe(1);
|
||||
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(0);
|
||||
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(0);
|
||||
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(0);
|
||||
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(0);
|
||||
|
||||
client.close();
|
||||
});
|
||||
|
||||
it('keeps user-scoped research notes isolated in lexical and vector search', () => {
|
||||
const client = createClient();
|
||||
insertUser(client, 'user-1');
|
||||
insertUser(client, 'user-2');
|
||||
|
||||
const userOneDoc = {
|
||||
sourceKind: 'research_note' as const,
|
||||
sourceRef: '101',
|
||||
scope: 'user' as const,
|
||||
userId: 'user-1',
|
||||
ticker: 'AMD',
|
||||
accessionNumber: null,
|
||||
filingDate: null,
|
||||
title: 'Durable thesis',
|
||||
contentText: 'Durable pricing power thesis with channel checks.',
|
||||
metadata: {}
|
||||
};
|
||||
const userTwoDoc = {
|
||||
...userOneDoc,
|
||||
sourceRef: '102',
|
||||
userId: 'user-2',
|
||||
contentText: 'Different private note for another user.'
|
||||
};
|
||||
|
||||
const userOneChunks = __searchInternals.chunkDocument(userOneDoc);
|
||||
const userTwoChunks = __searchInternals.chunkDocument(userTwoDoc);
|
||||
|
||||
__searchInternals.persistDocumentIndex(client, userOneDoc, userOneChunks, [vector(1, 0)]);
|
||||
__searchInternals.persistDocumentIndex(client, userTwoDoc, userTwoChunks, [vector(0, 1)]);
|
||||
|
||||
const ftsQuery = __searchInternals.toFtsQuery('durable thesis');
|
||||
expect(ftsQuery).not.toBeNull();
|
||||
|
||||
const lexicalMatches = __searchInternals.lexicalSearch(client, {
|
||||
ftsQuery: ftsQuery!,
|
||||
limit: 5,
|
||||
sourceKind: 'research_note',
|
||||
scope: 'user',
|
||||
userId: 'user-1',
|
||||
ticker: 'AMD'
|
||||
});
|
||||
const hiddenLexicalMatches = __searchInternals.lexicalSearch(client, {
|
||||
ftsQuery: ftsQuery!,
|
||||
limit: 5,
|
||||
sourceKind: 'research_note',
|
||||
scope: 'user',
|
||||
userId: 'user-2',
|
||||
ticker: 'AMD'
|
||||
});
|
||||
|
||||
expect(lexicalMatches).toHaveLength(1);
|
||||
expect(hiddenLexicalMatches).toHaveLength(0);
|
||||
|
||||
const vectorMatches = __searchInternals.vectorSearch(client, {
|
||||
embedding: vector(1, 0),
|
||||
limit: 5,
|
||||
sourceKind: 'research_note',
|
||||
scope: 'user',
|
||||
userId: 'user-1',
|
||||
ticker: 'AMD'
|
||||
});
|
||||
const hiddenVectorMatches = __searchInternals.vectorSearch(client, {
|
||||
embedding: vector(1, 0),
|
||||
limit: 5,
|
||||
sourceKind: 'research_note',
|
||||
scope: 'user',
|
||||
userId: 'user-2',
|
||||
ticker: 'AMD'
|
||||
});
|
||||
|
||||
expect(vectorMatches).toHaveLength(1);
|
||||
expect(hiddenVectorMatches).toHaveLength(1);
|
||||
expect(vectorMatches[0]?.chunk_id).not.toBe(hiddenVectorMatches[0]?.chunk_id);
|
||||
|
||||
client.close();
|
||||
});
|
||||
});
|
||||
1315
lib/server/search.ts
Normal file
1315
lib/server/search.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -9,6 +9,7 @@ import type {
|
||||
import { runAiAnalysis } from '@/lib/server/ai';
|
||||
import { buildPortfolioSummary } from '@/lib/server/portfolio';
|
||||
import { getQuote } from '@/lib/server/prices';
|
||||
import { indexSearchDocuments } from '@/lib/server/search';
|
||||
import {
|
||||
getFilingByAccession,
|
||||
listFilingsRecords,
|
||||
@@ -34,6 +35,7 @@ import {
|
||||
fetchPrimaryFilingText,
|
||||
fetchRecentFilings
|
||||
} from '@/lib/server/sec';
|
||||
import { enqueueTask } from '@/lib/server/tasks';
|
||||
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
|
||||
|
||||
const EXTRACTION_REQUIRED_KEYS = [
|
||||
@@ -167,6 +169,17 @@ function parseOptionalText(raw: unknown) {
|
||||
return normalized.length > 0 ? normalized : null;
|
||||
}
|
||||
|
||||
function parseOptionalStringArray(raw: unknown) {
|
||||
if (!Array.isArray(raw)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return raw
|
||||
.filter((entry): entry is string => typeof entry === 'string')
|
||||
.map((entry) => entry.trim())
|
||||
.filter((entry) => entry.length > 0);
|
||||
}
|
||||
|
||||
function parseTags(raw: unknown) {
|
||||
if (!Array.isArray(raw)) {
|
||||
return [];
|
||||
@@ -562,6 +575,8 @@ async function processSyncFilings(task: Task) {
|
||||
.filter((entry): entry is string => Boolean(entry))
|
||||
.join(' | ');
|
||||
|
||||
let searchTaskId: string | null = null;
|
||||
|
||||
await setProjectionStage(
|
||||
task,
|
||||
'sync.fetch_filings',
|
||||
@@ -667,6 +682,22 @@ async function processSyncFilings(task: Task) {
|
||||
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
|
||||
}
|
||||
|
||||
try {
|
||||
const searchTask = await enqueueTask({
|
||||
userId: task.user_id,
|
||||
taskType: 'index_search',
|
||||
payload: {
|
||||
ticker,
|
||||
sourceKinds: ['filing_document', 'filing_brief']
|
||||
},
|
||||
priority: 55,
|
||||
resourceKey: `index_search:ticker:${ticker}`
|
||||
});
|
||||
searchTaskId = searchTask.id;
|
||||
} catch (error) {
|
||||
console.error(`[search-index-sync] failed for ${ticker}:`, error);
|
||||
}
|
||||
|
||||
return {
|
||||
ticker,
|
||||
category,
|
||||
@@ -675,7 +706,8 @@ async function processSyncFilings(task: Task) {
|
||||
inserted: saveResult.inserted,
|
||||
updated: saveResult.updated,
|
||||
taxonomySnapshotsHydrated,
|
||||
taxonomySnapshotsFailed
|
||||
taxonomySnapshotsFailed,
|
||||
searchTaskId
|
||||
};
|
||||
}
|
||||
|
||||
@@ -782,12 +814,108 @@ async function processAnalyzeFiling(task: Task) {
|
||||
extractionMeta
|
||||
});
|
||||
|
||||
let searchTaskId: string | null = null;
|
||||
try {
|
||||
const searchTask = await enqueueTask({
|
||||
userId: task.user_id,
|
||||
taskType: 'index_search',
|
||||
payload: {
|
||||
accessionNumber,
|
||||
sourceKinds: ['filing_brief']
|
||||
},
|
||||
priority: 58,
|
||||
resourceKey: `index_search:filing_brief:${accessionNumber}`
|
||||
});
|
||||
searchTaskId = searchTask.id;
|
||||
} catch (error) {
|
||||
console.error(`[search-index-analyze] failed for ${accessionNumber}:`, error);
|
||||
}
|
||||
|
||||
return {
|
||||
accessionNumber,
|
||||
provider: analysis.provider,
|
||||
model: analysis.model,
|
||||
extractionProvider: extractionMeta.provider,
|
||||
extractionModel: extractionMeta.model
|
||||
extractionModel: extractionMeta.model,
|
||||
searchTaskId
|
||||
};
|
||||
}
|
||||
|
||||
async function processIndexSearch(task: Task) {
|
||||
await setProjectionStage(task, 'search.collect_sources', 'Collecting source records for search indexing');
|
||||
|
||||
const ticker = parseOptionalText(task.payload.ticker);
|
||||
const accessionNumber = parseOptionalText(task.payload.accessionNumber);
|
||||
const journalEntryId = task.payload.journalEntryId === undefined
|
||||
? null
|
||||
: Number(task.payload.journalEntryId);
|
||||
const deleteSourceRefs = Array.isArray(task.payload.deleteSourceRefs)
|
||||
? task.payload.deleteSourceRefs
|
||||
.filter((entry): entry is {
|
||||
sourceKind: string;
|
||||
sourceRef: string;
|
||||
scope: string;
|
||||
userId?: string | null;
|
||||
} => {
|
||||
return Boolean(
|
||||
entry
|
||||
&& typeof entry === 'object'
|
||||
&& typeof (entry as { sourceKind?: unknown }).sourceKind === 'string'
|
||||
&& typeof (entry as { sourceRef?: unknown }).sourceRef === 'string'
|
||||
&& typeof (entry as { scope?: unknown }).scope === 'string'
|
||||
);
|
||||
})
|
||||
: [];
|
||||
const sourceKinds = parseOptionalStringArray(task.payload.sourceKinds)
|
||||
.filter((sourceKind): sourceKind is 'filing_document' | 'filing_brief' | 'research_note' => {
|
||||
return sourceKind === 'filing_document'
|
||||
|| sourceKind === 'filing_brief'
|
||||
|| sourceKind === 'research_note';
|
||||
});
|
||||
const validatedJournalEntryId = typeof journalEntryId === 'number'
|
||||
&& Number.isInteger(journalEntryId)
|
||||
&& journalEntryId > 0
|
||||
? journalEntryId
|
||||
: null;
|
||||
|
||||
const result = await indexSearchDocuments({
|
||||
userId: task.user_id,
|
||||
ticker,
|
||||
accessionNumber,
|
||||
journalEntryId: validatedJournalEntryId,
|
||||
sourceKinds: sourceKinds.length > 0 ? sourceKinds : undefined,
|
||||
deleteSourceRefs: deleteSourceRefs.map((entry) => ({
|
||||
sourceKind: entry.sourceKind as 'filing_document' | 'filing_brief' | 'research_note',
|
||||
sourceRef: entry.sourceRef,
|
||||
scope: entry.scope === 'user' ? 'user' : 'global',
|
||||
userId: typeof entry.userId === 'string' ? entry.userId : null
|
||||
})),
|
||||
onStage: async (stage, detail) => {
|
||||
switch (stage) {
|
||||
case 'collect':
|
||||
await setProjectionStage(task, 'search.collect_sources', detail);
|
||||
break;
|
||||
case 'fetch':
|
||||
await setProjectionStage(task, 'search.fetch_documents', detail);
|
||||
break;
|
||||
case 'chunk':
|
||||
await setProjectionStage(task, 'search.chunk', detail);
|
||||
break;
|
||||
case 'embed':
|
||||
await setProjectionStage(task, 'search.embed', detail);
|
||||
break;
|
||||
case 'persist':
|
||||
await setProjectionStage(task, 'search.persist', detail);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
ticker,
|
||||
accessionNumber,
|
||||
journalEntryId: validatedJournalEntryId,
|
||||
...result
|
||||
};
|
||||
}
|
||||
|
||||
@@ -858,6 +986,8 @@ export async function runTaskProcessor(task: Task) {
|
||||
return toTaskResult(await processAnalyzeFiling(task));
|
||||
case 'portfolio_insights':
|
||||
return toTaskResult(await processPortfolioInsights(task));
|
||||
case 'index_search':
|
||||
return toTaskResult(await processIndexSearch(task));
|
||||
default:
|
||||
throw new Error(`Unsupported task type: ${task.task_type}`);
|
||||
}
|
||||
|
||||
46
lib/types.ts
46
lib/types.ts
@@ -101,7 +101,12 @@ export type Filing = {
|
||||
};
|
||||
|
||||
export type TaskStatus = 'queued' | 'running' | 'completed' | 'failed';
|
||||
export type TaskType = 'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights';
|
||||
export type TaskType =
|
||||
| 'sync_filings'
|
||||
| 'refresh_prices'
|
||||
| 'analyze_filing'
|
||||
| 'portfolio_insights'
|
||||
| 'index_search';
|
||||
export type TaskStage =
|
||||
| 'queued'
|
||||
| 'running'
|
||||
@@ -125,6 +130,11 @@ export type TaskStage =
|
||||
| 'analyze.extract'
|
||||
| 'analyze.generate_report'
|
||||
| 'analyze.persist_report'
|
||||
| 'search.collect_sources'
|
||||
| 'search.fetch_documents'
|
||||
| 'search.chunk'
|
||||
| 'search.embed'
|
||||
| 'search.persist'
|
||||
| 'insights.load_holdings'
|
||||
| 'insights.generate'
|
||||
| 'insights.persist';
|
||||
@@ -188,6 +198,40 @@ export type ResearchJournalEntry = {
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type SearchSource = 'documents' | 'filings' | 'research';
|
||||
export type SearchResult = {
|
||||
chunkId: number;
|
||||
documentId: number;
|
||||
source: SearchSource;
|
||||
sourceKind: 'filing_document' | 'filing_brief' | 'research_note';
|
||||
sourceRef: string;
|
||||
title: string | null;
|
||||
ticker: string | null;
|
||||
accessionNumber: string | null;
|
||||
filingDate: string | null;
|
||||
citationLabel: string;
|
||||
headingPath: string | null;
|
||||
chunkText: string;
|
||||
snippet: string;
|
||||
score: number;
|
||||
vectorRank: number | null;
|
||||
lexicalRank: number | null;
|
||||
href: string;
|
||||
};
|
||||
|
||||
export type SearchCitation = {
|
||||
index: number;
|
||||
label: string;
|
||||
chunkId: number;
|
||||
href: string;
|
||||
};
|
||||
|
||||
export type SearchAnswerResponse = {
|
||||
answer: string;
|
||||
citations: SearchCitation[];
|
||||
results: SearchResult[];
|
||||
};
|
||||
|
||||
export type CompanyFinancialPoint = {
|
||||
filingDate: string;
|
||||
filingType: Filing['filing_type'];
|
||||
|
||||
Reference in New Issue
Block a user