Add search and RAG workspace flows
This commit is contained in:
@@ -37,6 +37,14 @@ describe('sqlite schema compatibility bootstrap', () => {
|
||||
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'filing_taxonomy_fact')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'research_journal_entry')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'search_document')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'search_chunk')).toBe(true);
|
||||
|
||||
__dbInternals.loadSqliteExtensions(client);
|
||||
__dbInternals.ensureSearchVirtualTables(client);
|
||||
|
||||
expect(__dbInternals.hasTable(client, 'search_chunk_fts')).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, 'search_chunk_vec')).toBe(true);
|
||||
|
||||
client.close();
|
||||
});
|
||||
|
||||
@@ -2,6 +2,7 @@ import { mkdirSync, readFileSync } from 'node:fs';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { drizzle } from 'drizzle-orm/bun-sqlite';
|
||||
import { load as loadSqliteVec } from 'sqlite-vec';
|
||||
import { schema } from './schema';
|
||||
|
||||
type AppDrizzleDb = ReturnType<typeof createDb>;
|
||||
@@ -50,6 +51,45 @@ function applySqlFile(client: Database, fileName: string) {
|
||||
client.exec(sql);
|
||||
}
|
||||
|
||||
let customSqliteConfigured = false;
|
||||
const vectorExtensionStatus = new WeakMap<Database, boolean>();
|
||||
|
||||
function configureCustomSqliteRuntime() {
|
||||
if (customSqliteConfigured) {
|
||||
return;
|
||||
}
|
||||
|
||||
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
|
||||
if (process.platform === 'darwin' && customSqlitePath) {
|
||||
Database.setCustomSQLite(customSqlitePath);
|
||||
}
|
||||
|
||||
customSqliteConfigured = true;
|
||||
}
|
||||
|
||||
function loadSqliteExtensions(client: Database) {
|
||||
try {
|
||||
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
|
||||
|
||||
if (customVectorExtensionPath) {
|
||||
client.loadExtension(customVectorExtensionPath);
|
||||
} else {
|
||||
loadSqliteVec(client);
|
||||
}
|
||||
|
||||
vectorExtensionStatus.set(client, true);
|
||||
} catch (error) {
|
||||
vectorExtensionStatus.set(client, false);
|
||||
|
||||
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
|
||||
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
|
||||
}
|
||||
}
|
||||
|
||||
function isVectorExtensionLoaded(client: Database) {
|
||||
return vectorExtensionStatus.get(client) ?? false;
|
||||
}
|
||||
|
||||
function ensureLocalSqliteSchema(client: Database) {
|
||||
if (!hasTable(client, 'filing_statement_snapshot')) {
|
||||
applySqlFile(client, '0001_glossy_statement_snapshots.sql');
|
||||
@@ -142,10 +182,70 @@ function ensureLocalSqliteSchema(client: Database) {
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);');
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);');
|
||||
}
|
||||
|
||||
if (!hasTable(client, 'search_document')) {
|
||||
applySqlFile(client, '0008_search_rag.sql');
|
||||
}
|
||||
}
|
||||
|
||||
function ensureSearchVirtualTables(client: Database) {
|
||||
client.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
|
||||
\`chunk_text\`,
|
||||
\`citation_label\`,
|
||||
\`heading_path\`,
|
||||
\`chunk_id\` UNINDEXED,
|
||||
\`document_id\` UNINDEXED,
|
||||
\`chunk_index\` UNINDEXED,
|
||||
\`scope\` UNINDEXED,
|
||||
\`user_id\` UNINDEXED,
|
||||
\`source_kind\` UNINDEXED,
|
||||
\`ticker\` UNINDEXED,
|
||||
\`accession_number\` UNINDEXED,
|
||||
\`filing_date\` UNINDEXED
|
||||
);
|
||||
`);
|
||||
|
||||
if (isVectorExtensionLoaded(client)) {
|
||||
client.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
|
||||
\`chunk_id\` integer PRIMARY KEY,
|
||||
\`embedding\` float[256],
|
||||
\`scope\` text,
|
||||
\`user_id\` text,
|
||||
\`source_kind\` text,
|
||||
\`ticker\` text,
|
||||
\`accession_number\` text,
|
||||
\`filing_date\` text,
|
||||
+\`document_id\` integer,
|
||||
+\`chunk_index\` integer,
|
||||
+\`citation_label\` text
|
||||
);
|
||||
`);
|
||||
return;
|
||||
}
|
||||
|
||||
client.exec(`
|
||||
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
|
||||
\`chunk_id\` integer PRIMARY KEY NOT NULL,
|
||||
\`embedding\` text NOT NULL,
|
||||
\`scope\` text NOT NULL,
|
||||
\`user_id\` text,
|
||||
\`source_kind\` text NOT NULL,
|
||||
\`ticker\` text,
|
||||
\`accession_number\` text,
|
||||
\`filing_date\` text,
|
||||
\`document_id\` integer NOT NULL,
|
||||
\`chunk_index\` integer NOT NULL,
|
||||
\`citation_label\` text NOT NULL
|
||||
);
|
||||
`);
|
||||
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
|
||||
}
|
||||
|
||||
export function getSqliteClient() {
|
||||
if (!globalThis.__fiscalSqliteClient) {
|
||||
configureCustomSqliteRuntime();
|
||||
const databasePath = getDatabasePath();
|
||||
|
||||
if (databasePath !== ':memory:') {
|
||||
@@ -156,7 +256,9 @@ export function getSqliteClient() {
|
||||
client.exec('PRAGMA foreign_keys = ON;');
|
||||
client.exec('PRAGMA journal_mode = WAL;');
|
||||
client.exec('PRAGMA busy_timeout = 5000;');
|
||||
loadSqliteExtensions(client);
|
||||
ensureLocalSqliteSchema(client);
|
||||
ensureSearchVirtualTables(client);
|
||||
|
||||
globalThis.__fiscalSqliteClient = client;
|
||||
}
|
||||
@@ -175,8 +277,12 @@ if (!globalThis.__fiscalDrizzleDb) {
|
||||
}
|
||||
|
||||
export const __dbInternals = {
|
||||
configureCustomSqliteRuntime,
|
||||
ensureLocalSqliteSchema,
|
||||
ensureSearchVirtualTables,
|
||||
getDatabasePath,
|
||||
hasColumn,
|
||||
hasTable
|
||||
hasTable,
|
||||
isVectorExtensionLoaded,
|
||||
loadSqliteExtensions
|
||||
};
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { sql } from 'drizzle-orm';
|
||||
import {
|
||||
index,
|
||||
integer,
|
||||
@@ -31,6 +32,9 @@ type CoverageStatus = 'backlog' | 'active' | 'watch' | 'archive';
|
||||
type CoveragePriority = 'low' | 'medium' | 'high';
|
||||
type ResearchJournalEntryType = 'note' | 'filing_note' | 'status_change';
|
||||
type FinancialCadence = 'annual' | 'quarterly' | 'ltm';
|
||||
type SearchDocumentScope = 'global' | 'user';
|
||||
type SearchDocumentSourceKind = 'filing_document' | 'filing_brief' | 'research_note';
|
||||
type SearchIndexStatus = 'pending' | 'indexed' | 'failed';
|
||||
type FinancialSurfaceKind =
|
||||
| 'income_statement'
|
||||
| 'balance_sheet'
|
||||
@@ -500,7 +504,7 @@ export const filingLink = sqliteTable('filing_link', {
|
||||
export const taskRun = sqliteTable('task_run', {
|
||||
id: text('id').primaryKey().notNull(),
|
||||
user_id: text('user_id').notNull().references(() => user.id, { onDelete: 'cascade' }),
|
||||
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights'>().notNull(),
|
||||
task_type: text('task_type').$type<'sync_filings' | 'refresh_prices' | 'analyze_filing' | 'portfolio_insights' | 'index_search'>().notNull(),
|
||||
status: text('status').$type<'queued' | 'running' | 'completed' | 'failed'>().notNull(),
|
||||
stage: text('stage').notNull(),
|
||||
stage_detail: text('stage_detail'),
|
||||
@@ -570,6 +574,55 @@ export const researchJournalEntry = sqliteTable('research_journal_entry', {
|
||||
researchJournalAccessionIndex: index('research_journal_accession_idx').on(table.user_id, table.accession_number)
|
||||
}));
|
||||
|
||||
export const searchDocument = sqliteTable('search_document', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
source_kind: text('source_kind').$type<SearchDocumentSourceKind>().notNull(),
|
||||
source_ref: text('source_ref').notNull(),
|
||||
scope: text('scope').$type<SearchDocumentScope>().notNull(),
|
||||
user_id: text('user_id').references(() => user.id, { onDelete: 'cascade' }),
|
||||
ticker: text('ticker'),
|
||||
accession_number: text('accession_number'),
|
||||
title: text('title'),
|
||||
content_text: text('content_text').notNull(),
|
||||
content_hash: text('content_hash').notNull(),
|
||||
metadata: text('metadata', { mode: 'json' }).$type<Record<string, unknown> | null>(),
|
||||
index_status: text('index_status').$type<SearchIndexStatus>().notNull(),
|
||||
indexed_at: text('indexed_at'),
|
||||
last_error: text('last_error'),
|
||||
created_at: text('created_at').notNull(),
|
||||
updated_at: text('updated_at').notNull()
|
||||
}, (table) => ({
|
||||
searchDocumentSourceUnique: uniqueIndex('search_document_source_uidx').on(
|
||||
table.scope,
|
||||
sql`ifnull(${table.user_id}, '')`,
|
||||
table.source_kind,
|
||||
table.source_ref
|
||||
),
|
||||
searchDocumentScopeIndex: index('search_document_scope_idx').on(
|
||||
table.scope,
|
||||
table.source_kind,
|
||||
table.ticker,
|
||||
table.updated_at
|
||||
),
|
||||
searchDocumentAccessionIndex: index('search_document_accession_idx').on(table.accession_number, table.source_kind)
|
||||
}));
|
||||
|
||||
export const searchChunk = sqliteTable('search_chunk', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
document_id: integer('document_id').notNull().references(() => searchDocument.id, { onDelete: 'cascade' }),
|
||||
chunk_index: integer('chunk_index').notNull(),
|
||||
chunk_text: text('chunk_text').notNull(),
|
||||
char_count: integer('char_count').notNull(),
|
||||
start_offset: integer('start_offset').notNull(),
|
||||
end_offset: integer('end_offset').notNull(),
|
||||
heading_path: text('heading_path'),
|
||||
citation_label: text('citation_label').notNull(),
|
||||
created_at: text('created_at').notNull()
|
||||
}, (table) => ({
|
||||
searchChunkUnique: uniqueIndex('search_chunk_document_chunk_uidx').on(table.document_id, table.chunk_index),
|
||||
searchChunkDocumentIndex: index('search_chunk_document_idx').on(table.document_id)
|
||||
}));
|
||||
|
||||
export const authSchema = {
|
||||
user,
|
||||
session,
|
||||
@@ -595,7 +648,9 @@ export const appSchema = {
|
||||
taskRun,
|
||||
taskStageEvent,
|
||||
portfolioInsight,
|
||||
researchJournalEntry
|
||||
researchJournalEntry,
|
||||
searchDocument,
|
||||
searchChunk
|
||||
};
|
||||
|
||||
export const schema = {
|
||||
|
||||
Reference in New Issue
Block a user