Add search and RAG workspace flows

This commit is contained in:
2026-03-07 20:34:00 -05:00
parent db01f207a5
commit e20aba998b
35 changed files with 3417 additions and 372 deletions

View File

@@ -14,6 +14,9 @@ import type {
PortfolioSummary,
ResearchJournalEntry,
ResearchJournalEntryType,
SearchAnswerResponse,
SearchResult,
SearchSource,
Task,
TaskStatus,
TaskTimeline,
@@ -295,6 +298,54 @@ export async function listFilings(query?: { ticker?: string; limit?: number }) {
return await unwrapData<{ filings: Filing[] }>(result, 'Unable to fetch filings');
}
export async function searchKnowledge(input: {
query: string;
ticker?: string;
sources?: SearchSource[];
limit?: number;
}) {
const result = await client.api.search.get({
$query: {
q: input.query.trim(),
...(input.ticker?.trim()
? { ticker: input.ticker.trim().toUpperCase() }
: {}),
...(input.sources && input.sources.length > 0
? { sources: input.sources }
: {}),
...(typeof input.limit === 'number'
? { limit: input.limit }
: {})
}
});
return await unwrapData<{ results: SearchResult[] }>(result, 'Unable to search indexed sources');
}
export async function getSearchAnswer(input: {
query: string;
ticker?: string;
sources?: SearchSource[];
limit?: number;
}) {
return await requestJson<SearchAnswerResponse>({
path: '/api/search/answer',
method: 'POST',
body: {
query: input.query.trim(),
...(input.ticker?.trim()
? { ticker: input.ticker.trim().toUpperCase() }
: {}),
...(input.sources && input.sources.length > 0
? { sources: input.sources }
: {}),
...(typeof input.limit === 'number'
? { limit: input.limit }
: {})
}
}, 'Unable to generate cited answer');
}
export async function getCompanyAnalysis(ticker: string) {
const result = await client.api.analysis.company.get({
$query: {

View File

@@ -12,6 +12,7 @@ export const queryKeys = {
limit: number
) => ['financials-v3', ticker, surfaceKind, cadence, includeDimensions ? 'dims' : 'no-dims', includeFacts ? 'facts' : 'rows', factsCursor ?? '', factsLimit, cursor ?? '', limit] as const,
filings: (ticker: string | null, limit: number) => ['filings', ticker ?? '', limit] as const,
search: (query: string, ticker: string | null, sources: string[], limit: number) => ['search', query, ticker ?? '', sources.join(','), limit] as const,
report: (accessionNumber: string) => ['report', accessionNumber] as const,
watchlist: () => ['watchlist'] as const,
researchJournal: (ticker: string) => ['research', 'journal', ticker] as const,

View File

@@ -5,6 +5,7 @@ import {
getCompanyFinancialStatements,
getLatestPortfolioInsight,
getPortfolioSummary,
searchKnowledge,
getTask,
getTaskTimeline,
listFilings,
@@ -16,7 +17,8 @@ import {
import { queryKeys } from '@/lib/query/keys';
import type {
FinancialCadence,
FinancialSurfaceKind
FinancialSurfaceKind,
SearchSource
} from '@/lib/types';
export function companyAnalysisQueryOptions(ticker: string) {
@@ -86,6 +88,31 @@ export function filingsQueryOptions(input: { ticker?: string; limit?: number } =
});
}
export function searchQueryOptions(input: {
query: string;
ticker?: string | null;
sources?: SearchSource[];
limit?: number;
}) {
const normalizedQuery = input.query.trim();
const normalizedTicker = input.ticker?.trim().toUpperCase() ?? null;
const sources = input.sources && input.sources.length > 0
? [...new Set(input.sources)]
: ['documents', 'filings', 'research'] as SearchSource[];
const limit = input.limit ?? 10;
return queryOptions({
queryKey: queryKeys.search(normalizedQuery, normalizedTicker, sources, limit),
queryFn: () => searchKnowledge({
query: normalizedQuery,
ticker: normalizedTicker ?? undefined,
sources,
limit
}),
staleTime: 30_000
});
}
export function aiReportQueryOptions(accessionNumber: string) {
const normalizedAccession = accessionNumber.trim();

View File

@@ -1,4 +1,4 @@
import { generateText } from 'ai';
import { embedMany, generateText } from 'ai';
import { createZhipu } from 'zhipu-ai-provider';
type AiWorkload = 'report' | 'extraction';
@@ -31,13 +31,35 @@ type AiGenerateOutput = {
text: string;
};
type AiEmbedOutput = {
embeddings: number[][];
};
type RunAiAnalysisOptions = GetAiConfigOptions & {
workload?: AiWorkload;
createModel?: (config: AiConfig) => unknown;
generate?: (input: AiGenerateInput) => Promise<AiGenerateOutput>;
};
type EmbeddingConfig = {
provider: AiProvider;
apiKey?: string;
baseUrl: string;
model: 'embedding-3';
dimensions: 256;
};
type RunAiEmbeddingsOptions = GetAiConfigOptions & {
createModel?: (config: EmbeddingConfig) => unknown;
embed?: (input: {
model: unknown;
values: string[];
}) => Promise<AiEmbedOutput>;
};
const CODING_API_BASE_URL = 'https://api.z.ai/api/coding/paas/v4';
const SEARCH_EMBEDDING_MODEL = 'embedding-3';
const SEARCH_EMBEDDING_DIMENSIONS = 256;
let warnedIgnoredZhipuBaseUrl = false;
@@ -97,6 +119,30 @@ async function defaultGenerate(input: AiGenerateInput): Promise<AiGenerateOutput
return { text: result.text };
}
function defaultCreateEmbeddingModel(config: EmbeddingConfig) {
const zhipu = createZhipu({
apiKey: config.apiKey,
baseURL: config.baseUrl
});
return zhipu.textEmbeddingModel(config.model, {
dimensions: config.dimensions
});
}
async function defaultEmbed(input: {
model: unknown;
values: string[];
}): Promise<AiEmbedOutput> {
const result = await embedMany({
model: input.model as never,
values: input.values,
maxRetries: 0
});
return { embeddings: result.embeddings as number[][] };
}
export function getAiConfig(options?: GetAiConfigOptions) {
return getReportAiConfig(options);
}
@@ -121,6 +167,19 @@ export function getExtractionAiConfig(options?: GetAiConfigOptions) {
};
}
export function getEmbeddingAiConfig(options?: GetAiConfigOptions) {
const env = options?.env ?? process.env;
warnIgnoredZhipuBaseUrl(env, options?.warn ?? console.warn);
return {
provider: 'zhipu',
apiKey: envValue('ZHIPU_API_KEY', env),
baseUrl: CODING_API_BASE_URL,
model: SEARCH_EMBEDDING_MODEL,
dimensions: SEARCH_EMBEDDING_DIMENSIONS
} satisfies EmbeddingConfig;
}
export function isAiConfigured(options?: GetAiConfigOptions) {
const config = getReportAiConfig(options);
return Boolean(config.apiKey);
@@ -160,6 +219,31 @@ export async function runAiAnalysis(prompt: string, systemPrompt?: string, optio
};
}
export async function runAiEmbeddings(values: string[], options?: RunAiEmbeddingsOptions) {
const sanitizedValues = values
.map((value) => value.trim())
.filter((value) => value.length > 0);
if (sanitizedValues.length === 0) {
return [];
}
const config = getEmbeddingAiConfig(options);
if (!config.apiKey) {
throw new Error('ZHIPU_API_KEY is required for AI workloads');
}
const createModel = options?.createModel ?? defaultCreateEmbeddingModel;
const embed = options?.embed ?? defaultEmbed;
const model = createModel(config);
const result = await embed({
model,
values: sanitizedValues
});
return result.embeddings.map((embedding) => embedding.map((value) => Number(value)));
}
export function __resetAiWarningsForTests() {
warnedIgnoredZhipuBaseUrl = false;
}

View File

@@ -8,6 +8,7 @@ import type {
FinancialStatementKind,
FinancialSurfaceKind,
ResearchJournalEntryType,
SearchSource,
TaskStatus
} from '@/lib/types';
import { auth } from '@/lib/auth';
@@ -48,6 +49,7 @@ import {
upsertWatchlistItemRecord
} from '@/lib/server/repos/watchlist';
import { getPriceHistory, getQuote } from '@/lib/server/prices';
import { answerSearchQuery, searchKnowledgeBase } from '@/lib/server/search';
import {
enqueueTask,
findInFlightTask,
@@ -82,6 +84,7 @@ const FINANCIAL_SURFACES: FinancialSurfaceKind[] = [
const COVERAGE_STATUSES: CoverageStatus[] = ['backlog', 'active', 'watch', 'archive'];
const COVERAGE_PRIORITIES: CoveragePriority[] = ['low', 'medium', 'high'];
const JOURNAL_ENTRY_TYPES: ResearchJournalEntryType[] = ['note', 'filing_note', 'status_change'];
const SEARCH_SOURCES: SearchSource[] = ['documents', 'filings', 'research'];
function asRecord(value: unknown): Record<string, unknown> {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
@@ -205,6 +208,21 @@ function asJournalEntryType(value: unknown) {
: undefined;
}
function asSearchSources(value: unknown) {
const raw = Array.isArray(value)
? value
: typeof value === 'string'
? value.split(',')
: [];
const normalized = raw
.filter((entry): entry is string => typeof entry === 'string')
.map((entry) => entry.trim().toLowerCase())
.filter((entry): entry is SearchSource => SEARCH_SOURCES.includes(entry as SearchSource));
return normalized.length > 0 ? [...new Set(normalized)] : undefined;
}
function formatLabel(value: string) {
return value
.split('_')
@@ -763,6 +781,21 @@ export const app = new Elysia({ prefix: '/api' })
});
await updateWatchlistReviewByTicker(session.user.id, ticker, entry.updated_at);
try {
await enqueueTask({
userId: session.user.id,
taskType: 'index_search',
payload: {
ticker: entry.ticker,
journalEntryId: entry.id,
sourceKinds: ['research_note']
},
priority: 52,
resourceKey: `index_search:research_note:${session.user.id}:${entry.id}`
});
} catch (error) {
console.error('[search-index-journal-create] failed:', error);
}
return Response.json({ entry });
} catch (error) {
@@ -800,6 +833,21 @@ export const app = new Elysia({ prefix: '/api' })
}
await updateWatchlistReviewByTicker(session.user.id, entry.ticker, entry.updated_at);
try {
await enqueueTask({
userId: session.user.id,
taskType: 'index_search',
payload: {
ticker: entry.ticker,
journalEntryId: entry.id,
sourceKinds: ['research_note']
},
priority: 52,
resourceKey: `index_search:research_note:${session.user.id}:${entry.id}`
});
} catch (error) {
console.error('[search-index-journal-update] failed:', error);
}
return Response.json({ entry });
} catch (error) {
@@ -822,6 +870,25 @@ export const app = new Elysia({ prefix: '/api' })
return jsonError('Journal entry not found', 404);
}
try {
await enqueueTask({
userId: session.user.id,
taskType: 'index_search',
payload: {
deleteSourceRefs: [{
sourceKind: 'research_note',
sourceRef: String(numericId),
scope: 'user',
userId: session.user.id
}]
},
priority: 52,
resourceKey: `index_search:research_note:${session.user.id}:${numericId}:delete`
});
} catch (error) {
console.error('[search-index-journal-delete] failed:', error);
}
return Response.json({ success: true });
}, {
params: t.Object({
@@ -1124,6 +1191,63 @@ export const app = new Elysia({ prefix: '/api' })
limit: t.Optional(t.Numeric())
})
})
.get('/search', async ({ query }) => {
const { session, response } = await requireAuthenticatedSession();
if (response) {
return response;
}
const q = typeof query.q === 'string' ? query.q.trim() : '';
if (q.length < 2) {
return jsonError('q is required', 400);
}
const results = await searchKnowledgeBase({
userId: session.user.id,
query: q,
ticker: asOptionalString(query.ticker),
sources: asSearchSources(query.sources),
limit: typeof query.limit === 'number' ? query.limit : Number(query.limit)
});
return Response.json({ results });
}, {
query: t.Object({
q: t.String({ minLength: 2 }),
ticker: t.Optional(t.String()),
sources: t.Optional(t.Union([t.String(), t.Array(t.String())])),
limit: t.Optional(t.Numeric())
})
})
.post('/search/answer', async ({ body }) => {
const { session, response } = await requireAuthenticatedSession();
if (response) {
return response;
}
const payload = asRecord(body);
const query = typeof payload.query === 'string' ? payload.query.trim() : '';
if (query.length < 2) {
return jsonError('query is required', 400);
}
const answer = await answerSearchQuery({
userId: session.user.id,
query,
ticker: asOptionalString(payload.ticker),
sources: asSearchSources(payload.sources),
limit: asPositiveNumber(payload.limit) ?? undefined
});
return Response.json(answer);
}, {
body: t.Object({
query: t.String({ minLength: 2 }),
ticker: t.Optional(t.String()),
sources: t.Optional(t.Union([t.String(), t.Array(t.String())])),
limit: t.Optional(t.Numeric())
})
})
.post('/filings/sync', async ({ body }) => {
const { session, response } = await requireAuthenticatedSession();
if (response) {

View File

@@ -37,6 +37,14 @@ describe('sqlite schema compatibility bootstrap', () => {
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(true);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_fact')).toBe(true);
expect(__dbInternals.hasTable(client, 'research_journal_entry')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_document')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_chunk')).toBe(true);
__dbInternals.loadSqliteExtensions(client);
__dbInternals.ensureSearchVirtualTables(client);
expect(__dbInternals.hasTable(client, 'search_chunk_fts')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_chunk_vec')).toBe(true);
client.close();
});

View File

@@ -2,6 +2,7 @@ import { mkdirSync, readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { Database } from 'bun:sqlite';
import { drizzle } from 'drizzle-orm/bun-sqlite';
import { load as loadSqliteVec } from 'sqlite-vec';
import { schema } from './schema';
type AppDrizzleDb = ReturnType<typeof createDb>;
@@ -50,6 +51,45 @@ function applySqlFile(client: Database, fileName: string) {
client.exec(sql);
}
let customSqliteConfigured = false;
const vectorExtensionStatus = new WeakMap<Database, boolean>();
function configureCustomSqliteRuntime() {
if (customSqliteConfigured) {
return;
}
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
if (process.platform === 'darwin' && customSqlitePath) {
Database.setCustomSQLite(customSqlitePath);
}
customSqliteConfigured = true;
}
function loadSqliteExtensions(client: Database) {
try {
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
if (customVectorExtensionPath) {
client.loadExtension(customVectorExtensionPath);
} else {
loadSqliteVec(client);
}
vectorExtensionStatus.set(client, true);
} catch (error) {
vectorExtensionStatus.set(client, false);
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
}
}
function isVectorExtensionLoaded(client: Database) {
return vectorExtensionStatus.get(client) ?? false;
}
function ensureLocalSqliteSchema(client: Database) {
if (!hasTable(client, 'filing_statement_snapshot')) {
applySqlFile(client, '0001_glossy_statement_snapshots.sql');
@@ -142,10 +182,70 @@ function ensureLocalSqliteSchema(client: Database) {
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);');
}
if (!hasTable(client, 'search_document')) {
applySqlFile(client, '0008_search_rag.sql');
}
}
function ensureSearchVirtualTables(client: Database) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
\`chunk_text\`,
\`citation_label\`,
\`heading_path\`,
\`chunk_id\` UNINDEXED,
\`document_id\` UNINDEXED,
\`chunk_index\` UNINDEXED,
\`scope\` UNINDEXED,
\`user_id\` UNINDEXED,
\`source_kind\` UNINDEXED,
\`ticker\` UNINDEXED,
\`accession_number\` UNINDEXED,
\`filing_date\` UNINDEXED
);
`);
if (isVectorExtensionLoaded(client)) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
\`chunk_id\` integer PRIMARY KEY,
\`embedding\` float[256],
\`scope\` text,
\`user_id\` text,
\`source_kind\` text,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
+\`document_id\` integer,
+\`chunk_index\` integer,
+\`citation_label\` text
);
`);
return;
}
client.exec(`
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
\`chunk_id\` integer PRIMARY KEY NOT NULL,
\`embedding\` text NOT NULL,
\`scope\` text NOT NULL,
\`user_id\` text,
\`source_kind\` text NOT NULL,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
\`document_id\` integer NOT NULL,
\`chunk_index\` integer NOT NULL,
\`citation_label\` text NOT NULL
);
`);
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
}
export function getSqliteClient() {
if (!globalThis.__fiscalSqliteClient) {
configureCustomSqliteRuntime();
const databasePath = getDatabasePath();
if (databasePath !== ':memory:') {
@@ -156,7 +256,9 @@ export function getSqliteClient() {
client.exec('PRAGMA foreign_keys = ON;');
client.exec('PRAGMA journal_mode = WAL;');
client.exec('PRAGMA busy_timeout = 5000;');
loadSqliteExtensions(client);
ensureLocalSqliteSchema(client);
ensureSearchVirtualTables(client);
globalThis.__fiscalSqliteClient = client;
}
@@ -175,8 +277,12 @@ if (!globalThis.__fiscalDrizzleDb) {
}
export const __dbInternals = {
configureCustomSqliteRuntime,
ensureLocalSqliteSchema,
ensureSearchVirtualTables,
getDatabasePath,
hasColumn,
hasTable
hasTable,
isVectorExtensionLoaded,
loadSqliteExtensions
};

View File

@@ -1,3 +1,4 @@
import { sql } from 'drizzle-orm';
import {
index,
integer,
@@ -31,6 +32,9 @@ type CoverageStatus = 'backlog' | 'active' | 'watch' | 'archive';
type CoveragePriority = 'low' | 'medium' | 'high';
type ResearchJournalEntryType = 'note' | 'filing_note' | 'status_change';
type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
type SearchDocumentScope = 'global' | 'user';
type SearchDocumentSourceKind = 'filing_document' | 'filing_brief' | 'research_note';
type SearchIndexStatus = 'pending' | 'indexed' | 'failed';
type FinancialSurfaceKind =
| 'income_statement'
| 'balance_sheet'
@@ -500,7 +504,7 @@ export const filingLink = sqliteTable('filing_link', {
export const taskRun = sqliteTable('task_run', {
id: text('id').primaryKey().notNull(),
user_id: text('user_id').notNull().references(() => user.id, { onDelete: 'cascade' }),
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights'>().notNull(),
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights' | 'index_search'>().notNull(),
status: text('status').$type<'queued' | 'running' | 'completed' | 'failed'>().notNull(),
stage: text('stage').notNull(),
stage_detail: text('stage_detail'),
@@ -570,6 +574,55 @@ export const researchJournalEntry = sqliteTable('research_journal_entry', {
researchJournalAccessionIndex: index('research_journal_accession_idx').on(table.user_id, table.accession_number)
}));
export const searchDocument = sqliteTable('search_document', {
id: integer('id').primaryKey({ autoIncrement: true }),
source_kind: text('source_kind').$type<SearchDocumentSourceKind>().notNull(),
source_ref: text('source_ref').notNull(),
scope: text('scope').$type<SearchDocumentScope>().notNull(),
user_id: text('user_id').references(() => user.id, { onDelete: 'cascade' }),
ticker: text('ticker'),
accession_number: text('accession_number'),
title: text('title'),
content_text: text('content_text').notNull(),
content_hash: text('content_hash').notNull(),
metadata: text('metadata', { mode: 'json' }).$type<Record<string, unknown> | null>(),
index_status: text('index_status').$type<SearchIndexStatus>().notNull(),
indexed_at: text('indexed_at'),
last_error: text('last_error'),
created_at: text('created_at').notNull(),
updated_at: text('updated_at').notNull()
}, (table) => ({
searchDocumentSourceUnique: uniqueIndex('search_document_source_uidx').on(
table.scope,
sql`ifnull(${table.user_id}, '')`,
table.source_kind,
table.source_ref
),
searchDocumentScopeIndex: index('search_document_scope_idx').on(
table.scope,
table.source_kind,
table.ticker,
table.updated_at
),
searchDocumentAccessionIndex: index('search_document_accession_idx').on(table.accession_number, table.source_kind)
}));
export const searchChunk = sqliteTable('search_chunk', {
id: integer('id').primaryKey({ autoIncrement: true }),
document_id: integer('document_id').notNull().references(() => searchDocument.id, { onDelete: 'cascade' }),
chunk_index: integer('chunk_index').notNull(),
chunk_text: text('chunk_text').notNull(),
char_count: integer('char_count').notNull(),
start_offset: integer('start_offset').notNull(),
end_offset: integer('end_offset').notNull(),
heading_path: text('heading_path'),
citation_label: text('citation_label').notNull(),
created_at: text('created_at').notNull()
}, (table) => ({
searchChunkUnique: uniqueIndex('search_chunk_document_chunk_uidx').on(table.document_id, table.chunk_index),
searchChunkDocumentIndex: index('search_chunk_document_idx').on(table.document_id)
}));
export const authSchema = {
user,
session,
@@ -595,7 +648,9 @@ export const appSchema = {
taskRun,
taskStageEvent,
portfolioInsight,
researchJournalEntry
researchJournalEntry,
searchDocument,
searchChunk
};
export const schema = {

View File

@@ -62,6 +62,28 @@ export async function listResearchJournalEntries(userId: string, ticker: string,
return rows.map(toResearchJournalEntry);
}
export async function listResearchJournalEntriesForUser(userId: string, limit = 250) {
const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 500);
const rows = await db
.select()
.from(researchJournalEntry)
.where(eq(researchJournalEntry.user_id, userId))
.orderBy(desc(researchJournalEntry.updated_at), desc(researchJournalEntry.id))
.limit(safeLimit);
return rows.map(toResearchJournalEntry);
}
export async function getResearchJournalEntryRecord(userId: string, id: number) {
const [row] = await db
.select()
.from(researchJournalEntry)
.where(and(eq(researchJournalEntry.user_id, userId), eq(researchJournalEntry.id, id)))
.limit(1);
return row ? toResearchJournalEntry(row) : null;
}
export async function createResearchJournalEntryRecord(input: {
userId: string;
ticker: string;

217
lib/server/search.test.ts Normal file
View File

@@ -0,0 +1,217 @@
import { describe, expect, it } from 'bun:test';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { Database } from 'bun:sqlite';
import { __dbInternals } from '@/lib/server/db';
import { __searchInternals } from '@/lib/server/search';
function applyMigration(client: Database, fileName: string) {
const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8');
client.exec(sql);
}
function createClient() {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0001_glossy_statement_snapshots.sql');
applyMigration(client, '0002_workflow_task_projection_metadata.sql');
applyMigration(client, '0003_task_stage_event_timeline.sql');
__dbInternals.loadSqliteExtensions(client);
__dbInternals.ensureLocalSqliteSchema(client);
__dbInternals.ensureSearchVirtualTables(client);
return client;
}
function insertUser(client: Database, id: string) {
client.query(`
INSERT INTO user (id, name, email, emailVerified, createdAt, updatedAt)
VALUES (?, ?, ?, 1, 0, 0)
`).run(id, id, `${id}@example.com`);
}
function vector(first: number, second = 0) {
const values = new Array(256).fill(0);
values[0] = first;
values[1] = second;
return values;
}
describe('search internals', () => {
it('chunks research notes as a single chunk under the small-note threshold', () => {
const chunks = __searchInternals.chunkDocument({
sourceKind: 'research_note',
sourceRef: '1',
scope: 'user',
userId: 'user-1',
ticker: 'AMD',
accessionNumber: null,
filingDate: null,
title: 'AMD note',
contentText: 'A compact note about margins and channel inventory.',
metadata: {}
});
expect(chunks).toHaveLength(1);
expect(chunks[0]?.chunkText).toContain('channel inventory');
});
it('formats insufficient evidence when the answer cites nothing valid', () => {
const finalized = __searchInternals.finalizeAnswer('This has no valid citations.', [{
chunkId: 1,
documentId: 1,
source: 'filings',
sourceKind: 'filing_brief',
sourceRef: '0001',
title: 'Brief',
ticker: 'AMD',
accessionNumber: '0001',
filingDate: '2026-01-01',
citationLabel: 'AMD · 0001 [1]',
headingPath: null,
chunkText: 'Revenue grew.',
snippet: 'Revenue grew.',
score: 0.2,
vectorRank: 1,
lexicalRank: 1,
href: '/filings?ticker=AMD'
}]);
expect(finalized.answer).toBe('Insufficient evidence to answer from the indexed sources.');
expect(finalized.citations).toHaveLength(0);
});
it('persists vec/fts rows, skips unchanged content, and deletes synced rows together', () => {
const client = createClient();
const document = {
sourceKind: 'filing_brief' as const,
sourceRef: '0000320193-26-000001',
scope: 'global' as const,
userId: null,
ticker: 'AAPL',
accessionNumber: '0000320193-26-000001',
filingDate: '2026-01-30',
title: 'AAPL filing brief',
contentText: 'Revenue remained resilient across products and services. Services margin expanded.',
metadata: {
filingDate: '2026-01-30',
hasAnalysis: true
}
};
const chunks = __searchInternals.chunkDocument(document);
const firstPersist = __searchInternals.persistDocumentIndex(
client,
document,
chunks,
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
);
expect(firstPersist.indexed).toBe(true);
expect(client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).toEqual({ count: 1 });
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(chunks.length);
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(chunks.length);
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(chunks.length);
const secondPersist = __searchInternals.persistDocumentIndex(
client,
document,
chunks,
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
);
expect(secondPersist.skipped).toBe(true);
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(1);
const deleted = __searchInternals.deleteSourceRefs(client, [{
sourceKind: 'filing_brief',
sourceRef: document.sourceRef,
scope: 'global'
}]);
expect(deleted).toBe(1);
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(0);
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(0);
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(0);
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(0);
client.close();
});
it('keeps user-scoped research notes isolated in lexical and vector search', () => {
const client = createClient();
insertUser(client, 'user-1');
insertUser(client, 'user-2');
const userOneDoc = {
sourceKind: 'research_note' as const,
sourceRef: '101',
scope: 'user' as const,
userId: 'user-1',
ticker: 'AMD',
accessionNumber: null,
filingDate: null,
title: 'Durable thesis',
contentText: 'Durable pricing power thesis with channel checks.',
metadata: {}
};
const userTwoDoc = {
...userOneDoc,
sourceRef: '102',
userId: 'user-2',
contentText: 'Different private note for another user.'
};
const userOneChunks = __searchInternals.chunkDocument(userOneDoc);
const userTwoChunks = __searchInternals.chunkDocument(userTwoDoc);
__searchInternals.persistDocumentIndex(client, userOneDoc, userOneChunks, [vector(1, 0)]);
__searchInternals.persistDocumentIndex(client, userTwoDoc, userTwoChunks, [vector(0, 1)]);
const ftsQuery = __searchInternals.toFtsQuery('durable thesis');
expect(ftsQuery).not.toBeNull();
const lexicalMatches = __searchInternals.lexicalSearch(client, {
ftsQuery: ftsQuery!,
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-1',
ticker: 'AMD'
});
const hiddenLexicalMatches = __searchInternals.lexicalSearch(client, {
ftsQuery: ftsQuery!,
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-2',
ticker: 'AMD'
});
expect(lexicalMatches).toHaveLength(1);
expect(hiddenLexicalMatches).toHaveLength(0);
const vectorMatches = __searchInternals.vectorSearch(client, {
embedding: vector(1, 0),
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-1',
ticker: 'AMD'
});
const hiddenVectorMatches = __searchInternals.vectorSearch(client, {
embedding: vector(1, 0),
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-2',
ticker: 'AMD'
});
expect(vectorMatches).toHaveLength(1);
expect(hiddenVectorMatches).toHaveLength(1);
expect(vectorMatches[0]?.chunk_id).not.toBe(hiddenVectorMatches[0]?.chunk_id);
client.close();
});
});

1315
lib/server/search.ts Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,7 @@ import type {
import { runAiAnalysis } from '@/lib/server/ai';
import { buildPortfolioSummary } from '@/lib/server/portfolio';
import { getQuote } from '@/lib/server/prices';
import { indexSearchDocuments } from '@/lib/server/search';
import {
getFilingByAccession,
listFilingsRecords,
@@ -34,6 +35,7 @@ import {
fetchPrimaryFilingText,
fetchRecentFilings
} from '@/lib/server/sec';
import { enqueueTask } from '@/lib/server/tasks';
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
const EXTRACTION_REQUIRED_KEYS = [
@@ -167,6 +169,17 @@ function parseOptionalText(raw: unknown) {
return normalized.length > 0 ? normalized : null;
}
function parseOptionalStringArray(raw: unknown) {
if (!Array.isArray(raw)) {
return [];
}
return raw
.filter((entry): entry is string => typeof entry === 'string')
.map((entry) => entry.trim())
.filter((entry) => entry.length > 0);
}
function parseTags(raw: unknown) {
if (!Array.isArray(raw)) {
return [];
@@ -562,6 +575,8 @@ async function processSyncFilings(task: Task) {
.filter((entry): entry is string => Boolean(entry))
.join(' | ');
let searchTaskId: string | null = null;
await setProjectionStage(
task,
'sync.fetch_filings',
@@ -667,6 +682,22 @@ async function processSyncFilings(task: Task) {
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
}
try {
const searchTask = await enqueueTask({
userId: task.user_id,
taskType: 'index_search',
payload: {
ticker,
sourceKinds: ['filing_document', 'filing_brief']
},
priority: 55,
resourceKey: `index_search:ticker:${ticker}`
});
searchTaskId = searchTask.id;
} catch (error) {
console.error(`[search-index-sync] failed for ${ticker}:`, error);
}
return {
ticker,
category,
@@ -675,7 +706,8 @@ async function processSyncFilings(task: Task) {
inserted: saveResult.inserted,
updated: saveResult.updated,
taxonomySnapshotsHydrated,
taxonomySnapshotsFailed
taxonomySnapshotsFailed,
searchTaskId
};
}
@@ -782,12 +814,108 @@ async function processAnalyzeFiling(task: Task) {
extractionMeta
});
let searchTaskId: string | null = null;
try {
const searchTask = await enqueueTask({
userId: task.user_id,
taskType: 'index_search',
payload: {
accessionNumber,
sourceKinds: ['filing_brief']
},
priority: 58,
resourceKey: `index_search:filing_brief:${accessionNumber}`
});
searchTaskId = searchTask.id;
} catch (error) {
console.error(`[search-index-analyze] failed for ${accessionNumber}:`, error);
}
return {
accessionNumber,
provider: analysis.provider,
model: analysis.model,
extractionProvider: extractionMeta.provider,
extractionModel: extractionMeta.model
extractionModel: extractionMeta.model,
searchTaskId
};
}
async function processIndexSearch(task: Task) {
await setProjectionStage(task, 'search.collect_sources', 'Collecting source records for search indexing');
const ticker = parseOptionalText(task.payload.ticker);
const accessionNumber = parseOptionalText(task.payload.accessionNumber);
const journalEntryId = task.payload.journalEntryId === undefined
? null
: Number(task.payload.journalEntryId);
const deleteSourceRefs = Array.isArray(task.payload.deleteSourceRefs)
? task.payload.deleteSourceRefs
.filter((entry): entry is {
sourceKind: string;
sourceRef: string;
scope: string;
userId?: string | null;
} => {
return Boolean(
entry
&& typeof entry === 'object'
&& typeof (entry as { sourceKind?: unknown }).sourceKind === 'string'
&& typeof (entry as { sourceRef?: unknown }).sourceRef === 'string'
&& typeof (entry as { scope?: unknown }).scope === 'string'
);
})
: [];
const sourceKinds = parseOptionalStringArray(task.payload.sourceKinds)
.filter((sourceKind): sourceKind is 'filing_document' | 'filing_brief' | 'research_note' => {
return sourceKind === 'filing_document'
|| sourceKind === 'filing_brief'
|| sourceKind === 'research_note';
});
const validatedJournalEntryId = typeof journalEntryId === 'number'
&& Number.isInteger(journalEntryId)
&& journalEntryId > 0
? journalEntryId
: null;
const result = await indexSearchDocuments({
userId: task.user_id,
ticker,
accessionNumber,
journalEntryId: validatedJournalEntryId,
sourceKinds: sourceKinds.length > 0 ? sourceKinds : undefined,
deleteSourceRefs: deleteSourceRefs.map((entry) => ({
sourceKind: entry.sourceKind as 'filing_document' | 'filing_brief' | 'research_note',
sourceRef: entry.sourceRef,
scope: entry.scope === 'user' ? 'user' : 'global',
userId: typeof entry.userId === 'string' ? entry.userId : null
})),
onStage: async (stage, detail) => {
switch (stage) {
case 'collect':
await setProjectionStage(task, 'search.collect_sources', detail);
break;
case 'fetch':
await setProjectionStage(task, 'search.fetch_documents', detail);
break;
case 'chunk':
await setProjectionStage(task, 'search.chunk', detail);
break;
case 'embed':
await setProjectionStage(task, 'search.embed', detail);
break;
case 'persist':
await setProjectionStage(task, 'search.persist', detail);
break;
}
}
});
return {
ticker,
accessionNumber,
journalEntryId: validatedJournalEntryId,
...result
};
}
@@ -858,6 +986,8 @@ export async function runTaskProcessor(task: Task) {
return toTaskResult(await processAnalyzeFiling(task));
case 'portfolio_insights':
return toTaskResult(await processPortfolioInsights(task));
case 'index_search':
return toTaskResult(await processIndexSearch(task));
default:
throw new Error(`Unsupported task type: ${task.task_type}`);
}

View File

@@ -101,7 +101,12 @@ export type Filing = {
};
export type TaskStatus = 'queued' | 'running' | 'completed' | 'failed';
export type TaskType = 'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights';
export type TaskType =
| 'sync_filings'
| 'refresh_prices'
| 'analyze_filing'
| 'portfolio_insights'
| 'index_search';
export type TaskStage =
| 'queued'
| 'running'
@@ -125,6 +130,11 @@ export type TaskStage =
| 'analyze.extract'
| 'analyze.generate_report'
| 'analyze.persist_report'
| 'search.collect_sources'
| 'search.fetch_documents'
| 'search.chunk'
| 'search.embed'
| 'search.persist'
| 'insights.load_holdings'
| 'insights.generate'
| 'insights.persist';
@@ -188,6 +198,40 @@ export type ResearchJournalEntry = {
updated_at: string;
};
export type SearchSource = 'documents' | 'filings' | 'research';
export type SearchResult = {
chunkId: number;
documentId: number;
source: SearchSource;
sourceKind: 'filing_document' | 'filing_brief' | 'research_note';
sourceRef: string;
title: string | null;
ticker: string | null;
accessionNumber: string | null;
filingDate: string | null;
citationLabel: string;
headingPath: string | null;
chunkText: string;
snippet: string;
score: number;
vectorRank: number | null;
lexicalRank: number | null;
href: string;
};
export type SearchCitation = {
index: number;
label: string;
chunkId: number;
href: string;
};
export type SearchAnswerResponse = {
answer: string;
citations: SearchCitation[];
results: SearchResult[];
};
export type CompanyFinancialPoint = {
filingDate: string;
filingType: Filing['filing_type'];