import { describe, expect, it } from 'bun:test'; import { readFileSync } from 'node:fs'; import { join } from 'node:path'; import { Database } from 'bun:sqlite'; import { __dbInternals } from '@/lib/server/db'; import { __searchInternals } from '@/lib/server/search'; function applyMigration(client: Database, fileName: string) { const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8'); client.exec(sql); } function createClient() { const client = new Database(':memory:'); client.exec('PRAGMA foreign_keys = ON;'); applyMigration(client, '0000_cold_silver_centurion.sql'); applyMigration(client, '0001_glossy_statement_snapshots.sql'); applyMigration(client, '0002_workflow_task_projection_metadata.sql'); applyMigration(client, '0003_task_stage_event_timeline.sql'); __dbInternals.loadSqliteExtensions(client); __dbInternals.ensureLocalSqliteSchema(client); __dbInternals.ensureSearchVirtualTables(client); return client; } function insertUser(client: Database, id: string) { client.query(` INSERT INTO user (id, name, email, emailVerified, createdAt, updatedAt) VALUES (?, ?, ?, 1, 0, 0) `).run(id, id, `${id}@example.com`); } function vector(first: number, second = 0) { const values = new Array(256).fill(0); values[0] = first; values[1] = second; return values; } describe('search internals', () => { it('chunks research notes as a single chunk under the small-note threshold', () => { const chunks = __searchInternals.chunkDocument({ sourceKind: 'research_note', sourceRef: '1', scope: 'user', userId: 'user-1', ticker: 'AMD', accessionNumber: null, filingDate: null, title: 'AMD note', contentText: 'A compact note about margins and channel inventory.', metadata: {} }); expect(chunks).toHaveLength(1); expect(chunks[0]?.chunkText).toContain('channel inventory'); }); it('formats insufficient evidence when the answer cites nothing valid', () => { const finalized = __searchInternals.finalizeAnswer('This has no valid citations.', [{ chunkId: 1, documentId: 1, source: 'filings', sourceKind: 'filing_brief', sourceRef: '0001', title: 'Brief', ticker: 'AMD', accessionNumber: '0001', filingDate: '2026-01-01', citationLabel: 'AMD ยท 0001 [1]', headingPath: null, chunkText: 'Revenue grew.', snippet: 'Revenue grew.', score: 0.2, vectorRank: 1, lexicalRank: 1, href: '/filings?ticker=AMD' }]); expect(finalized.answer).toBe('Insufficient evidence to answer from the indexed sources.'); expect(finalized.citations).toHaveLength(0); }); it('persists vec/fts rows, skips unchanged content, and deletes synced rows together', () => { const client = createClient(); const document = { sourceKind: 'filing_brief' as const, sourceRef: '0000320193-26-000001', scope: 'global' as const, userId: null, ticker: 'AAPL', accessionNumber: '0000320193-26-000001', filingDate: '2026-01-30', title: 'AAPL filing brief', contentText: 'Revenue remained resilient across products and services. Services margin expanded.', metadata: { filingDate: '2026-01-30', hasAnalysis: true } }; const chunks = __searchInternals.chunkDocument(document); const firstPersist = __searchInternals.persistDocumentIndex( client, document, chunks, chunks.map((_chunk, index) => vector(1 - (index * 0.1))) ); expect(firstPersist.indexed).toBe(true); expect(client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).toEqual({ count: 1 }); expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(chunks.length); expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(chunks.length); expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(chunks.length); const secondPersist = __searchInternals.persistDocumentIndex( client, document, chunks, chunks.map((_chunk, index) => vector(1 - (index * 0.1))) ); expect(secondPersist.skipped).toBe(true); expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(1); const deleted = __searchInternals.deleteSourceRefs(client, [{ sourceKind: 'filing_brief', sourceRef: document.sourceRef, scope: 'global' }]); expect(deleted).toBe(1); expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(0); expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(0); expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(0); expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(0); client.close(); }); it('keeps user-scoped research notes isolated in lexical and vector search', () => { const client = createClient(); insertUser(client, 'user-1'); insertUser(client, 'user-2'); const userOneDoc = { sourceKind: 'research_note' as const, sourceRef: '101', scope: 'user' as const, userId: 'user-1', ticker: 'AMD', accessionNumber: null, filingDate: null, title: 'Durable thesis', contentText: 'Durable pricing power thesis with channel checks.', metadata: {} }; const userTwoDoc = { ...userOneDoc, sourceRef: '102', userId: 'user-2', contentText: 'Different private note for another user.' }; const userOneChunks = __searchInternals.chunkDocument(userOneDoc); const userTwoChunks = __searchInternals.chunkDocument(userTwoDoc); __searchInternals.persistDocumentIndex(client, userOneDoc, userOneChunks, [vector(1, 0)]); __searchInternals.persistDocumentIndex(client, userTwoDoc, userTwoChunks, [vector(0, 1)]); const ftsQuery = __searchInternals.toFtsQuery('durable thesis'); expect(ftsQuery).not.toBeNull(); const lexicalMatches = __searchInternals.lexicalSearch(client, { ftsQuery: ftsQuery!, limit: 5, sourceKind: 'research_note', scope: 'user', userId: 'user-1', ticker: 'AMD' }); const hiddenLexicalMatches = __searchInternals.lexicalSearch(client, { ftsQuery: ftsQuery!, limit: 5, sourceKind: 'research_note', scope: 'user', userId: 'user-2', ticker: 'AMD' }); expect(lexicalMatches).toHaveLength(1); expect(hiddenLexicalMatches).toHaveLength(0); const vectorMatches = __searchInternals.vectorSearch(client, { embedding: vector(1, 0), limit: 5, sourceKind: 'research_note', scope: 'user', userId: 'user-1', ticker: 'AMD' }); const hiddenVectorMatches = __searchInternals.vectorSearch(client, { embedding: vector(1, 0), limit: 5, sourceKind: 'research_note', scope: 'user', userId: 'user-2', ticker: 'AMD' }); expect(vectorMatches).toHaveLength(1); expect(hiddenVectorMatches).toHaveLength(1); expect(vectorMatches[0]?.chunk_id).not.toBe(hiddenVectorMatches[0]?.chunk_id); client.close(); }); });