Files
Neon-Desk/lib/server/search.test.ts

218 lines
7.4 KiB
TypeScript

import { describe, expect, it } from 'bun:test';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { Database } from 'bun:sqlite';
import { __dbInternals } from '@/lib/server/db';
import { __searchInternals } from '@/lib/server/search';
function applyMigration(client: Database, fileName: string) {
const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8');
client.exec(sql);
}
function createClient() {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0001_glossy_statement_snapshots.sql');
applyMigration(client, '0002_workflow_task_projection_metadata.sql');
applyMigration(client, '0003_task_stage_event_timeline.sql');
__dbInternals.loadSqliteExtensions(client);
__dbInternals.ensureLocalSqliteSchema(client);
__dbInternals.ensureSearchVirtualTables(client);
return client;
}
function insertUser(client: Database, id: string) {
client.query(`
INSERT INTO user (id, name, email, emailVerified, createdAt, updatedAt)
VALUES (?, ?, ?, 1, 0, 0)
`).run(id, id, `${id}@example.com`);
}
function vector(first: number, second = 0) {
const values = new Array(256).fill(0);
values[0] = first;
values[1] = second;
return values;
}
describe('search internals', () => {
it('chunks research notes as a single chunk under the small-note threshold', () => {
const chunks = __searchInternals.chunkDocument({
sourceKind: 'research_note',
sourceRef: '1',
scope: 'user',
userId: 'user-1',
ticker: 'AMD',
accessionNumber: null,
filingDate: null,
title: 'AMD note',
contentText: 'A compact note about margins and channel inventory.',
metadata: {}
});
expect(chunks).toHaveLength(1);
expect(chunks[0]?.chunkText).toContain('channel inventory');
});
it('formats insufficient evidence when the answer cites nothing valid', () => {
const finalized = __searchInternals.finalizeAnswer('This has no valid citations.', [{
chunkId: 1,
documentId: 1,
source: 'filings',
sourceKind: 'filing_brief',
sourceRef: '0001',
title: 'Brief',
ticker: 'AMD',
accessionNumber: '0001',
filingDate: '2026-01-01',
citationLabel: 'AMD · 0001 [1]',
headingPath: null,
chunkText: 'Revenue grew.',
snippet: 'Revenue grew.',
score: 0.2,
vectorRank: 1,
lexicalRank: 1,
href: '/filings?ticker=AMD'
}]);
expect(finalized.answer).toBe('Insufficient evidence to answer from the indexed sources.');
expect(finalized.citations).toHaveLength(0);
});
it('persists vec/fts rows, skips unchanged content, and deletes synced rows together', () => {
const client = createClient();
const document = {
sourceKind: 'filing_brief' as const,
sourceRef: '0000320193-26-000001',
scope: 'global' as const,
userId: null,
ticker: 'AAPL',
accessionNumber: '0000320193-26-000001',
filingDate: '2026-01-30',
title: 'AAPL filing brief',
contentText: 'Revenue remained resilient across products and services. Services margin expanded.',
metadata: {
filingDate: '2026-01-30',
hasAnalysis: true
}
};
const chunks = __searchInternals.chunkDocument(document);
const firstPersist = __searchInternals.persistDocumentIndex(
client,
document,
chunks,
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
);
expect(firstPersist.indexed).toBe(true);
expect(client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).toEqual({ count: 1 });
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(chunks.length);
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(chunks.length);
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(chunks.length);
const secondPersist = __searchInternals.persistDocumentIndex(
client,
document,
chunks,
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
);
expect(secondPersist.skipped).toBe(true);
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(1);
const deleted = __searchInternals.deleteSourceRefs(client, [{
sourceKind: 'filing_brief',
sourceRef: document.sourceRef,
scope: 'global'
}]);
expect(deleted).toBe(1);
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(0);
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(0);
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(0);
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(0);
client.close();
});
it('keeps user-scoped research notes isolated in lexical and vector search', () => {
const client = createClient();
insertUser(client, 'user-1');
insertUser(client, 'user-2');
const userOneDoc = {
sourceKind: 'research_note' as const,
sourceRef: '101',
scope: 'user' as const,
userId: 'user-1',
ticker: 'AMD',
accessionNumber: null,
filingDate: null,
title: 'Durable thesis',
contentText: 'Durable pricing power thesis with channel checks.',
metadata: {}
};
const userTwoDoc = {
...userOneDoc,
sourceRef: '102',
userId: 'user-2',
contentText: 'Different private note for another user.'
};
const userOneChunks = __searchInternals.chunkDocument(userOneDoc);
const userTwoChunks = __searchInternals.chunkDocument(userTwoDoc);
__searchInternals.persistDocumentIndex(client, userOneDoc, userOneChunks, [vector(1, 0)]);
__searchInternals.persistDocumentIndex(client, userTwoDoc, userTwoChunks, [vector(0, 1)]);
const ftsQuery = __searchInternals.toFtsQuery('durable thesis');
expect(ftsQuery).not.toBeNull();
const lexicalMatches = __searchInternals.lexicalSearch(client, {
ftsQuery: ftsQuery!,
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-1',
ticker: 'AMD'
});
const hiddenLexicalMatches = __searchInternals.lexicalSearch(client, {
ftsQuery: ftsQuery!,
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-2',
ticker: 'AMD'
});
expect(lexicalMatches).toHaveLength(1);
expect(hiddenLexicalMatches).toHaveLength(0);
const vectorMatches = __searchInternals.vectorSearch(client, {
embedding: vector(1, 0),
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-1',
ticker: 'AMD'
});
const hiddenVectorMatches = __searchInternals.vectorSearch(client, {
embedding: vector(1, 0),
limit: 5,
sourceKind: 'research_note',
scope: 'user',
userId: 'user-2',
ticker: 'AMD'
});
expect(vectorMatches).toHaveLength(1);
expect(hiddenVectorMatches).toHaveLength(1);
expect(vectorMatches[0]?.chunk_id).not.toBe(hiddenVectorMatches[0]?.chunk_id);
client.close();
});
});