Add search and RAG workspace flows
This commit is contained in:
@@ -2,6 +2,7 @@ import { mkdirSync, readFileSync } from 'node:fs';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { drizzle } from 'drizzle-orm/bun-sqlite';
|
||||
import { load as loadSqliteVec } from 'sqlite-vec';
|
||||
import { schema } from './schema';
|
||||
|
||||
type AppDrizzleDb = ReturnType<typeof createDb>;
|
||||
@@ -50,6 +51,45 @@ function applySqlFile(client: Database, fileName: string) {
|
||||
client.exec(sql);
|
||||
}
|
||||
|
||||
let customSqliteConfigured = false;
|
||||
const vectorExtensionStatus = new WeakMap<Database, boolean>();
|
||||
|
||||
function configureCustomSqliteRuntime() {
|
||||
if (customSqliteConfigured) {
|
||||
return;
|
||||
}
|
||||
|
||||
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
|
||||
if (process.platform === 'darwin' && customSqlitePath) {
|
||||
Database.setCustomSQLite(customSqlitePath);
|
||||
}
|
||||
|
||||
customSqliteConfigured = true;
|
||||
}
|
||||
|
||||
function loadSqliteExtensions(client: Database) {
|
||||
try {
|
||||
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
|
||||
|
||||
if (customVectorExtensionPath) {
|
||||
client.loadExtension(customVectorExtensionPath);
|
||||
} else {
|
||||
loadSqliteVec(client);
|
||||
}
|
||||
|
||||
vectorExtensionStatus.set(client, true);
|
||||
} catch (error) {
|
||||
vectorExtensionStatus.set(client, false);
|
||||
|
||||
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
|
||||
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
|
||||
}
|
||||
}
|
||||
|
||||
function isVectorExtensionLoaded(client: Database) {
|
||||
return vectorExtensionStatus.get(client) ?? false;
|
||||
}
|
||||
|
||||
function ensureLocalSqliteSchema(client: Database) {
|
||||
if (!hasTable(client, 'filing_statement_snapshot')) {
|
||||
applySqlFile(client, '0001_glossy_statement_snapshots.sql');
|
||||
@@ -142,10 +182,70 @@ function ensureLocalSqliteSchema(client: Database) {
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);');
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);');
|
||||
}
|
||||
|
||||
if (!hasTable(client, 'search_document')) {
|
||||
applySqlFile(client, '0008_search_rag.sql');
|
||||
}
|
||||
}
|
||||
|
||||
function ensureSearchVirtualTables(client: Database) {
|
||||
client.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
|
||||
\`chunk_text\`,
|
||||
\`citation_label\`,
|
||||
\`heading_path\`,
|
||||
\`chunk_id\` UNINDEXED,
|
||||
\`document_id\` UNINDEXED,
|
||||
\`chunk_index\` UNINDEXED,
|
||||
\`scope\` UNINDEXED,
|
||||
\`user_id\` UNINDEXED,
|
||||
\`source_kind\` UNINDEXED,
|
||||
\`ticker\` UNINDEXED,
|
||||
\`accession_number\` UNINDEXED,
|
||||
\`filing_date\` UNINDEXED
|
||||
);
|
||||
`);
|
||||
|
||||
if (isVectorExtensionLoaded(client)) {
|
||||
client.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
|
||||
\`chunk_id\` integer PRIMARY KEY,
|
||||
\`embedding\` float[256],
|
||||
\`scope\` text,
|
||||
\`user_id\` text,
|
||||
\`source_kind\` text,
|
||||
\`ticker\` text,
|
||||
\`accession_number\` text,
|
||||
\`filing_date\` text,
|
||||
+\`document_id\` integer,
|
||||
+\`chunk_index\` integer,
|
||||
+\`citation_label\` text
|
||||
);
|
||||
`);
|
||||
return;
|
||||
}
|
||||
|
||||
client.exec(`
|
||||
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
|
||||
\`chunk_id\` integer PRIMARY KEY NOT NULL,
|
||||
\`embedding\` text NOT NULL,
|
||||
\`scope\` text NOT NULL,
|
||||
\`user_id\` text,
|
||||
\`source_kind\` text NOT NULL,
|
||||
\`ticker\` text,
|
||||
\`accession_number\` text,
|
||||
\`filing_date\` text,
|
||||
\`document_id\` integer NOT NULL,
|
||||
\`chunk_index\` integer NOT NULL,
|
||||
\`citation_label\` text NOT NULL
|
||||
);
|
||||
`);
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
|
||||
}
|
||||
|
||||
export function getSqliteClient() {
|
||||
if (!globalThis.__fiscalSqliteClient) {
|
||||
configureCustomSqliteRuntime();
|
||||
const databasePath = getDatabasePath();
|
||||
|
||||
if (databasePath !== ':memory:') {
|
||||
@@ -156,7 +256,9 @@ export function getSqliteClient() {
|
||||
client.exec('PRAGMA foreign_keys = ON;');
|
||||
client.exec('PRAGMA journal_mode = WAL;');
|
||||
client.exec('PRAGMA busy_timeout = 5000;');
|
||||
loadSqliteExtensions(client);
|
||||
ensureLocalSqliteSchema(client);
|
||||
ensureSearchVirtualTables(client);
|
||||
|
||||
globalThis.__fiscalSqliteClient = client;
|
||||
}
|
||||
@@ -175,8 +277,12 @@ if (!globalThis.__fiscalDrizzleDb) {
|
||||
}
|
||||
|
||||
export const __dbInternals = {
|
||||
configureCustomSqliteRuntime,
|
||||
ensureLocalSqliteSchema,
|
||||
ensureSearchVirtualTables,
|
||||
getDatabasePath,
|
||||
hasColumn,
|
||||
hasTable
|
||||
hasTable,
|
||||
isVectorExtensionLoaded,
|
||||
loadSqliteExtensions
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user