Add search and RAG workspace flows

This commit is contained in:
2026-03-07 20:34:00 -05:00
parent db01f207a5
commit e20aba998b
35 changed files with 3417 additions and 372 deletions

View File

@@ -37,6 +37,14 @@ describe('sqlite schema compatibility bootstrap', () => {
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(true);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_fact')).toBe(true);
expect(__dbInternals.hasTable(client, 'research_journal_entry')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_document')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_chunk')).toBe(true);
__dbInternals.loadSqliteExtensions(client);
__dbInternals.ensureSearchVirtualTables(client);
expect(__dbInternals.hasTable(client, 'search_chunk_fts')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_chunk_vec')).toBe(true);
client.close();
});

View File

@@ -2,6 +2,7 @@ import { mkdirSync, readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { Database } from 'bun:sqlite';
import { drizzle } from 'drizzle-orm/bun-sqlite';
import { load as loadSqliteVec } from 'sqlite-vec';
import { schema } from './schema';
type AppDrizzleDb = ReturnType<typeof createDb>;
@@ -50,6 +51,45 @@ function applySqlFile(client: Database, fileName: string) {
client.exec(sql);
}
let customSqliteConfigured = false;
const vectorExtensionStatus = new WeakMap<Database, boolean>();
function configureCustomSqliteRuntime() {
if (customSqliteConfigured) {
return;
}
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
if (process.platform === 'darwin' && customSqlitePath) {
Database.setCustomSQLite(customSqlitePath);
}
customSqliteConfigured = true;
}
function loadSqliteExtensions(client: Database) {
try {
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
if (customVectorExtensionPath) {
client.loadExtension(customVectorExtensionPath);
} else {
loadSqliteVec(client);
}
vectorExtensionStatus.set(client, true);
} catch (error) {
vectorExtensionStatus.set(client, false);
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
}
}
function isVectorExtensionLoaded(client: Database) {
return vectorExtensionStatus.get(client) ?? false;
}
function ensureLocalSqliteSchema(client: Database) {
if (!hasTable(client, 'filing_statement_snapshot')) {
applySqlFile(client, '0001_glossy_statement_snapshots.sql');
@@ -142,10 +182,70 @@ function ensureLocalSqliteSchema(client: Database) {
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);');
}
if (!hasTable(client, 'search_document')) {
applySqlFile(client, '0008_search_rag.sql');
}
}
function ensureSearchVirtualTables(client: Database) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
\`chunk_text\`,
\`citation_label\`,
\`heading_path\`,
\`chunk_id\` UNINDEXED,
\`document_id\` UNINDEXED,
\`chunk_index\` UNINDEXED,
\`scope\` UNINDEXED,
\`user_id\` UNINDEXED,
\`source_kind\` UNINDEXED,
\`ticker\` UNINDEXED,
\`accession_number\` UNINDEXED,
\`filing_date\` UNINDEXED
);
`);
if (isVectorExtensionLoaded(client)) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
\`chunk_id\` integer PRIMARY KEY,
\`embedding\` float[256],
\`scope\` text,
\`user_id\` text,
\`source_kind\` text,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
+\`document_id\` integer,
+\`chunk_index\` integer,
+\`citation_label\` text
);
`);
return;
}
client.exec(`
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
\`chunk_id\` integer PRIMARY KEY NOT NULL,
\`embedding\` text NOT NULL,
\`scope\` text NOT NULL,
\`user_id\` text,
\`source_kind\` text NOT NULL,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
\`document_id\` integer NOT NULL,
\`chunk_index\` integer NOT NULL,
\`citation_label\` text NOT NULL
);
`);
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
}
export function getSqliteClient() {
if (!globalThis.__fiscalSqliteClient) {
configureCustomSqliteRuntime();
const databasePath = getDatabasePath();
if (databasePath !== ':memory:') {
@@ -156,7 +256,9 @@ export function getSqliteClient() {
client.exec('PRAGMA foreign_keys = ON;');
client.exec('PRAGMA journal_mode = WAL;');
client.exec('PRAGMA busy_timeout = 5000;');
loadSqliteExtensions(client);
ensureLocalSqliteSchema(client);
ensureSearchVirtualTables(client);
globalThis.__fiscalSqliteClient = client;
}
@@ -175,8 +277,12 @@ if (!globalThis.__fiscalDrizzleDb) {
}
export const __dbInternals = {
configureCustomSqliteRuntime,
ensureLocalSqliteSchema,
ensureSearchVirtualTables,
getDatabasePath,
hasColumn,
hasTable
hasTable,
isVectorExtensionLoaded,
loadSqliteExtensions
};

View File

@@ -1,3 +1,4 @@
import { sql } from 'drizzle-orm';
import {
index,
integer,
@@ -31,6 +32,9 @@ type CoverageStatus = 'backlog' | 'active' | 'watch' | 'archive';
type CoveragePriority = 'low' | 'medium' | 'high';
type ResearchJournalEntryType = 'note' | 'filing_note' | 'status_change';
type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
type SearchDocumentScope = 'global' | 'user';
type SearchDocumentSourceKind = 'filing_document' | 'filing_brief' | 'research_note';
type SearchIndexStatus = 'pending' | 'indexed' | 'failed';
type FinancialSurfaceKind =
| 'income_statement'
| 'balance_sheet'
@@ -500,7 +504,7 @@ export const filingLink = sqliteTable('filing_link', {
export const taskRun = sqliteTable('task_run', {
id: text('id').primaryKey().notNull(),
user_id: text('user_id').notNull().references(() => user.id, { onDelete: 'cascade' }),
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights'>().notNull(),
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights' | 'index_search'>().notNull(),
status: text('status').$type<'queued' | 'running' | 'completed' | 'failed'>().notNull(),
stage: text('stage').notNull(),
stage_detail: text('stage_detail'),
@@ -570,6 +574,55 @@ export const researchJournalEntry = sqliteTable('research_journal_entry', {
researchJournalAccessionIndex: index('research_journal_accession_idx').on(table.user_id, table.accession_number)
}));
export const searchDocument = sqliteTable('search_document', {
id: integer('id').primaryKey({ autoIncrement: true }),
source_kind: text('source_kind').$type<SearchDocumentSourceKind>().notNull(),
source_ref: text('source_ref').notNull(),
scope: text('scope').$type<SearchDocumentScope>().notNull(),
user_id: text('user_id').references(() => user.id, { onDelete: 'cascade' }),
ticker: text('ticker'),
accession_number: text('accession_number'),
title: text('title'),
content_text: text('content_text').notNull(),
content_hash: text('content_hash').notNull(),
metadata: text('metadata', { mode: 'json' }).$type<Record<string, unknown> | null>(),
index_status: text('index_status').$type<SearchIndexStatus>().notNull(),
indexed_at: text('indexed_at'),
last_error: text('last_error'),
created_at: text('created_at').notNull(),
updated_at: text('updated_at').notNull()
}, (table) => ({
searchDocumentSourceUnique: uniqueIndex('search_document_source_uidx').on(
table.scope,
sql`ifnull(${table.user_id}, '')`,
table.source_kind,
table.source_ref
),
searchDocumentScopeIndex: index('search_document_scope_idx').on(
table.scope,
table.source_kind,
table.ticker,
table.updated_at
),
searchDocumentAccessionIndex: index('search_document_accession_idx').on(table.accession_number, table.source_kind)
}));
export const searchChunk = sqliteTable('search_chunk', {
id: integer('id').primaryKey({ autoIncrement: true }),
document_id: integer('document_id').notNull().references(() => searchDocument.id, { onDelete: 'cascade' }),
chunk_index: integer('chunk_index').notNull(),
chunk_text: text('chunk_text').notNull(),
char_count: integer('char_count').notNull(),
start_offset: integer('start_offset').notNull(),
end_offset: integer('end_offset').notNull(),
heading_path: text('heading_path'),
citation_label: text('citation_label').notNull(),
created_at: text('created_at').notNull()
}, (table) => ({
searchChunkUnique: uniqueIndex('search_chunk_document_chunk_uidx').on(table.document_id, table.chunk_index),
searchChunkDocumentIndex: index('search_chunk_document_idx').on(table.document_id)
}));
export const authSchema = {
user,
session,
@@ -595,7 +648,9 @@ export const appSchema = {
taskRun,
taskStageEvent,
portfolioInsight,
researchJournalEntry
researchJournalEntry,
searchDocument,
searchChunk
};
export const schema = {