218 lines
7.4 KiB
TypeScript
218 lines
7.4 KiB
TypeScript
import { describe, expect, it } from 'bun:test';
|
|
import { readFileSync } from 'node:fs';
|
|
import { join } from 'node:path';
|
|
import { Database } from 'bun:sqlite';
|
|
import { __dbInternals } from '@/lib/server/db';
|
|
import { __searchInternals } from '@/lib/server/search';
|
|
|
|
function applyMigration(client: Database, fileName: string) {
|
|
const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8');
|
|
client.exec(sql);
|
|
}
|
|
|
|
function createClient() {
|
|
const client = new Database(':memory:');
|
|
client.exec('PRAGMA foreign_keys = ON;');
|
|
applyMigration(client, '0000_cold_silver_centurion.sql');
|
|
applyMigration(client, '0001_glossy_statement_snapshots.sql');
|
|
applyMigration(client, '0002_workflow_task_projection_metadata.sql');
|
|
applyMigration(client, '0003_task_stage_event_timeline.sql');
|
|
__dbInternals.loadSqliteExtensions(client);
|
|
__dbInternals.ensureLocalSqliteSchema(client);
|
|
__dbInternals.ensureSearchVirtualTables(client);
|
|
return client;
|
|
}
|
|
|
|
function insertUser(client: Database, id: string) {
|
|
client.query(`
|
|
INSERT INTO user (id, name, email, emailVerified, createdAt, updatedAt)
|
|
VALUES (?, ?, ?, 1, 0, 0)
|
|
`).run(id, id, `${id}@example.com`);
|
|
}
|
|
|
|
function vector(first: number, second = 0) {
|
|
const values = new Array(256).fill(0);
|
|
values[0] = first;
|
|
values[1] = second;
|
|
return values;
|
|
}
|
|
|
|
describe('search internals', () => {
|
|
it('chunks research notes as a single chunk under the small-note threshold', () => {
|
|
const chunks = __searchInternals.chunkDocument({
|
|
sourceKind: 'research_note',
|
|
sourceRef: '1',
|
|
scope: 'user',
|
|
userId: 'user-1',
|
|
ticker: 'AMD',
|
|
accessionNumber: null,
|
|
filingDate: null,
|
|
title: 'AMD note',
|
|
contentText: 'A compact note about margins and channel inventory.',
|
|
metadata: {}
|
|
});
|
|
|
|
expect(chunks).toHaveLength(1);
|
|
expect(chunks[0]?.chunkText).toContain('channel inventory');
|
|
});
|
|
|
|
it('formats insufficient evidence when the answer cites nothing valid', () => {
|
|
const finalized = __searchInternals.finalizeAnswer('This has no valid citations.', [{
|
|
chunkId: 1,
|
|
documentId: 1,
|
|
source: 'filings',
|
|
sourceKind: 'filing_brief',
|
|
sourceRef: '0001',
|
|
title: 'Brief',
|
|
ticker: 'AMD',
|
|
accessionNumber: '0001',
|
|
filingDate: '2026-01-01',
|
|
citationLabel: 'AMD · 0001 [1]',
|
|
headingPath: null,
|
|
chunkText: 'Revenue grew.',
|
|
snippet: 'Revenue grew.',
|
|
score: 0.2,
|
|
vectorRank: 1,
|
|
lexicalRank: 1,
|
|
href: '/filings?ticker=AMD'
|
|
}]);
|
|
|
|
expect(finalized.answer).toBe('Insufficient evidence to answer from the indexed sources.');
|
|
expect(finalized.citations).toHaveLength(0);
|
|
});
|
|
|
|
it('persists vec/fts rows, skips unchanged content, and deletes synced rows together', () => {
|
|
const client = createClient();
|
|
const document = {
|
|
sourceKind: 'filing_brief' as const,
|
|
sourceRef: '0000320193-26-000001',
|
|
scope: 'global' as const,
|
|
userId: null,
|
|
ticker: 'AAPL',
|
|
accessionNumber: '0000320193-26-000001',
|
|
filingDate: '2026-01-30',
|
|
title: 'AAPL filing brief',
|
|
contentText: 'Revenue remained resilient across products and services. Services margin expanded.',
|
|
metadata: {
|
|
filingDate: '2026-01-30',
|
|
hasAnalysis: true
|
|
}
|
|
};
|
|
|
|
const chunks = __searchInternals.chunkDocument(document);
|
|
const firstPersist = __searchInternals.persistDocumentIndex(
|
|
client,
|
|
document,
|
|
chunks,
|
|
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
|
|
);
|
|
|
|
expect(firstPersist.indexed).toBe(true);
|
|
expect(client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).toEqual({ count: 1 });
|
|
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(chunks.length);
|
|
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(chunks.length);
|
|
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(chunks.length);
|
|
|
|
const secondPersist = __searchInternals.persistDocumentIndex(
|
|
client,
|
|
document,
|
|
chunks,
|
|
chunks.map((_chunk, index) => vector(1 - (index * 0.1)))
|
|
);
|
|
|
|
expect(secondPersist.skipped).toBe(true);
|
|
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(1);
|
|
|
|
const deleted = __searchInternals.deleteSourceRefs(client, [{
|
|
sourceKind: 'filing_brief',
|
|
sourceRef: document.sourceRef,
|
|
scope: 'global'
|
|
}]);
|
|
|
|
expect(deleted).toBe(1);
|
|
expect((client.query('SELECT count(*) AS count FROM search_document').get() as { count: number }).count).toBe(0);
|
|
expect((client.query('SELECT count(*) AS count FROM search_chunk').get() as { count: number }).count).toBe(0);
|
|
expect((client.query('SELECT count(*) AS count FROM search_chunk_fts').get() as { count: number }).count).toBe(0);
|
|
expect((client.query('SELECT count(*) AS count FROM search_chunk_vec').get() as { count: number }).count).toBe(0);
|
|
|
|
client.close();
|
|
});
|
|
|
|
it('keeps user-scoped research notes isolated in lexical and vector search', () => {
|
|
const client = createClient();
|
|
insertUser(client, 'user-1');
|
|
insertUser(client, 'user-2');
|
|
|
|
const userOneDoc = {
|
|
sourceKind: 'research_note' as const,
|
|
sourceRef: '101',
|
|
scope: 'user' as const,
|
|
userId: 'user-1',
|
|
ticker: 'AMD',
|
|
accessionNumber: null,
|
|
filingDate: null,
|
|
title: 'Durable thesis',
|
|
contentText: 'Durable pricing power thesis with channel checks.',
|
|
metadata: {}
|
|
};
|
|
const userTwoDoc = {
|
|
...userOneDoc,
|
|
sourceRef: '102',
|
|
userId: 'user-2',
|
|
contentText: 'Different private note for another user.'
|
|
};
|
|
|
|
const userOneChunks = __searchInternals.chunkDocument(userOneDoc);
|
|
const userTwoChunks = __searchInternals.chunkDocument(userTwoDoc);
|
|
|
|
__searchInternals.persistDocumentIndex(client, userOneDoc, userOneChunks, [vector(1, 0)]);
|
|
__searchInternals.persistDocumentIndex(client, userTwoDoc, userTwoChunks, [vector(0, 1)]);
|
|
|
|
const ftsQuery = __searchInternals.toFtsQuery('durable thesis');
|
|
expect(ftsQuery).not.toBeNull();
|
|
|
|
const lexicalMatches = __searchInternals.lexicalSearch(client, {
|
|
ftsQuery: ftsQuery!,
|
|
limit: 5,
|
|
sourceKind: 'research_note',
|
|
scope: 'user',
|
|
userId: 'user-1',
|
|
ticker: 'AMD'
|
|
});
|
|
const hiddenLexicalMatches = __searchInternals.lexicalSearch(client, {
|
|
ftsQuery: ftsQuery!,
|
|
limit: 5,
|
|
sourceKind: 'research_note',
|
|
scope: 'user',
|
|
userId: 'user-2',
|
|
ticker: 'AMD'
|
|
});
|
|
|
|
expect(lexicalMatches).toHaveLength(1);
|
|
expect(hiddenLexicalMatches).toHaveLength(0);
|
|
|
|
const vectorMatches = __searchInternals.vectorSearch(client, {
|
|
embedding: vector(1, 0),
|
|
limit: 5,
|
|
sourceKind: 'research_note',
|
|
scope: 'user',
|
|
userId: 'user-1',
|
|
ticker: 'AMD'
|
|
});
|
|
const hiddenVectorMatches = __searchInternals.vectorSearch(client, {
|
|
embedding: vector(1, 0),
|
|
limit: 5,
|
|
sourceKind: 'research_note',
|
|
scope: 'user',
|
|
userId: 'user-2',
|
|
ticker: 'AMD'
|
|
});
|
|
|
|
expect(vectorMatches).toHaveLength(1);
|
|
expect(hiddenVectorMatches).toHaveLength(1);
|
|
expect(vectorMatches[0]?.chunk_id).not.toBe(hiddenVectorMatches[0]?.chunk_id);
|
|
|
|
client.close();
|
|
});
|
|
});
|