Files
Neon-Desk/lib/server/db/index.ts

606 lines
20 KiB
TypeScript

import { mkdirSync, readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { Database } from 'bun:sqlite';
import { drizzle } from 'drizzle-orm/bun-sqlite';
import { load as loadSqliteVec } from 'sqlite-vec';
import {
ensureFinancialIngestionSchemaHealthy,
resolveFinancialSchemaRepairMode
} from './financial-ingestion-schema';
import { schema } from './schema';
type AppDrizzleDb = ReturnType<typeof createDb>;
declare global {
// eslint-disable-next-line no-var
var __fiscalSqliteClient: Database | undefined;
// eslint-disable-next-line no-var
var __fiscalDrizzleDb: AppDrizzleDb | undefined;
}
function getDatabasePath() {
const raw = process.env.DATABASE_URL?.trim() || 'file:data/fiscal.sqlite';
let databasePath = raw.startsWith('file:') ? raw.slice(5) : raw;
if (databasePath.startsWith('///')) {
databasePath = databasePath.slice(2);
}
if (!databasePath) {
throw new Error('DATABASE_URL must point to a SQLite file path.');
}
return databasePath;
}
function hasTable(client: Database, tableName: string) {
const row = client
.query('SELECT name FROM sqlite_master WHERE type = ? AND name = ? LIMIT 1')
.get('table', tableName) as { name: string } | null;
return row !== null;
}
function hasColumn(client: Database, tableName: string, columnName: string) {
if (!hasTable(client, tableName)) {
return false;
}
const rows = client.query(`PRAGMA table_info(${tableName})`).all() as Array<{ name: string }>;
return rows.some((row) => row.name === columnName);
}
function applySqlFile(client: Database, fileName: string) {
const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8');
client.exec(sql);
}
function applyBaseSchemaCompat(client: Database) {
const sql = readFileSync(join(process.cwd(), 'drizzle', '0000_cold_silver_centurion.sql'), 'utf8')
.replaceAll('CREATE TABLE `', 'CREATE TABLE IF NOT EXISTS `')
.replaceAll('CREATE UNIQUE INDEX `', 'CREATE UNIQUE INDEX IF NOT EXISTS `')
.replaceAll('CREATE INDEX `', 'CREATE INDEX IF NOT EXISTS `');
client.exec(sql);
}
let customSqliteConfigured = false;
const vectorExtensionStatus = new WeakMap<Database, boolean>();
function configureCustomSqliteRuntime() {
if (customSqliteConfigured) {
return;
}
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
if (process.platform === 'darwin' && customSqlitePath) {
Database.setCustomSQLite(customSqlitePath);
}
customSqliteConfigured = true;
}
function loadSqliteExtensions(client: Database) {
try {
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
if (customVectorExtensionPath) {
client.loadExtension(customVectorExtensionPath);
} else {
loadSqliteVec(client);
}
vectorExtensionStatus.set(client, true);
} catch (error) {
vectorExtensionStatus.set(client, false);
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
}
}
function isVectorExtensionLoaded(client: Database) {
return vectorExtensionStatus.get(client) ?? false;
}
function ensureResearchWorkspaceSchema(client: Database) {
if (!hasTable(client, 'research_artifact')) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_artifact\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
\`user_id\` text NOT NULL,
\`organization_id\` text,
\`ticker\` text NOT NULL,
\`accession_number\` text,
\`kind\` text NOT NULL,
\`source\` text NOT NULL DEFAULT 'user',
\`subtype\` text,
\`title\` text,
\`summary\` text,
\`body_markdown\` text,
\`search_text\` text,
\`visibility_scope\` text NOT NULL DEFAULT 'private',
\`tags\` text,
\`metadata\` text,
\`file_name\` text,
\`mime_type\` text,
\`file_size_bytes\` integer,
\`storage_path\` text,
\`created_at\` text NOT NULL,
\`updated_at\` text NOT NULL,
FOREIGN KEY (\`user_id\`) REFERENCES \`user\`(\`id\`) ON UPDATE no action ON DELETE cascade,
FOREIGN KEY (\`organization_id\`) REFERENCES \`organization\`(\`id\`) ON UPDATE no action ON DELETE set null
);
`);
}
if (!hasTable(client, 'research_memo')) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_memo\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
\`user_id\` text NOT NULL,
\`organization_id\` text,
\`ticker\` text NOT NULL,
\`rating\` text,
\`conviction\` text,
\`time_horizon_months\` integer,
\`packet_title\` text,
\`packet_subtitle\` text,
\`thesis_markdown\` text NOT NULL DEFAULT '',
\`variant_view_markdown\` text NOT NULL DEFAULT '',
\`catalysts_markdown\` text NOT NULL DEFAULT '',
\`risks_markdown\` text NOT NULL DEFAULT '',
\`disconfirming_evidence_markdown\` text NOT NULL DEFAULT '',
\`next_actions_markdown\` text NOT NULL DEFAULT '',
\`created_at\` text NOT NULL,
\`updated_at\` text NOT NULL,
FOREIGN KEY (\`user_id\`) REFERENCES \`user\`(\`id\`) ON UPDATE no action ON DELETE cascade,
FOREIGN KEY (\`organization_id\`) REFERENCES \`organization\`(\`id\`) ON UPDATE no action ON DELETE set null
);
`);
}
if (!hasTable(client, 'research_memo_evidence')) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_memo_evidence\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
\`memo_id\` integer NOT NULL,
\`artifact_id\` integer NOT NULL,
\`section\` text NOT NULL,
\`annotation\` text,
\`sort_order\` integer NOT NULL DEFAULT 0,
\`created_at\` text NOT NULL,
FOREIGN KEY (\`memo_id\`) REFERENCES \`research_memo\`(\`id\`) ON UPDATE no action ON DELETE cascade,
FOREIGN KEY (\`artifact_id\`) REFERENCES \`research_artifact\`(\`id\`) ON UPDATE no action ON DELETE cascade
);
`);
}
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_ticker_idx` ON `research_artifact` (`user_id`, `ticker`, `updated_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_kind_idx` ON `research_artifact` (`user_id`, `kind`, `updated_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_accession_idx` ON `research_artifact` (`user_id`, `accession_number`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_source_idx` ON `research_artifact` (`user_id`, `source`, `updated_at`);');
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `research_memo_ticker_uidx` ON `research_memo` (`user_id`, `ticker`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_memo_updated_idx` ON `research_memo` (`user_id`, `updated_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_memo_evidence_memo_idx` ON `research_memo_evidence` (`memo_id`, `section`, `sort_order`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_memo_evidence_artifact_idx` ON `research_memo_evidence` (`artifact_id`);');
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `research_memo_evidence_unique_uidx` ON `research_memo_evidence` (`memo_id`, `artifact_id`, `section`);');
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`research_artifact_fts\` USING fts5(
artifact_id UNINDEXED,
user_id UNINDEXED,
ticker UNINDEXED,
title,
summary,
body_markdown,
search_text,
tags_text
);
`);
client.exec(`
INSERT INTO \`research_artifact\` (
\`user_id\`,
\`organization_id\`,
\`ticker\`,
\`accession_number\`,
\`kind\`,
\`source\`,
\`subtype\`,
\`title\`,
\`summary\`,
\`body_markdown\`,
\`search_text\`,
\`visibility_scope\`,
\`tags\`,
\`metadata\`,
\`created_at\`,
\`updated_at\`
)
SELECT
r.\`user_id\`,
NULL,
r.\`ticker\`,
r.\`accession_number\`,
CASE
WHEN r.\`entry_type\` = 'status_change' THEN 'status_change'
ELSE 'note'
END,
CASE
WHEN r.\`entry_type\` = 'status_change' THEN 'system'
ELSE 'user'
END,
r.\`entry_type\`,
r.\`title\`,
CASE
WHEN r.\`body_markdown\` IS NULL OR TRIM(r.\`body_markdown\`) = '' THEN NULL
ELSE SUBSTR(r.\`body_markdown\`, 1, 280)
END,
r.\`body_markdown\`,
r.\`body_markdown\`,
'private',
NULL,
r.\`metadata\`,
r.\`created_at\`,
r.\`updated_at\`
FROM \`research_journal_entry\` r
WHERE EXISTS (SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'research_journal_entry')
AND NOT EXISTS (
SELECT 1
FROM \`research_artifact\` a
WHERE a.\`user_id\` = r.\`user_id\`
AND a.\`ticker\` = r.\`ticker\`
AND IFNULL(a.\`accession_number\`, '') = IFNULL(r.\`accession_number\`, '')
AND a.\`kind\` = CASE
WHEN r.\`entry_type\` = 'status_change' THEN 'status_change'
ELSE 'note'
END
AND IFNULL(a.\`title\`, '') = IFNULL(r.\`title\`, '')
AND a.\`created_at\` = r.\`created_at\`
);
`);
client.exec(`
INSERT INTO \`research_artifact\` (
\`user_id\`,
\`organization_id\`,
\`ticker\`,
\`accession_number\`,
\`kind\`,
\`source\`,
\`subtype\`,
\`title\`,
\`summary\`,
\`body_markdown\`,
\`search_text\`,
\`visibility_scope\`,
\`tags\`,
\`metadata\`,
\`created_at\`,
\`updated_at\`
)
SELECT
w.\`user_id\`,
NULL,
f.\`ticker\`,
f.\`accession_number\`,
'ai_report',
'system',
'filing_analysis',
f.\`filing_type\` || ' AI memo',
COALESCE(json_extract(f.\`analysis\`, '$.text'), json_extract(f.\`analysis\`, '$.legacyInsights')),
'Stored AI memo for ' || f.\`company_name\` || ' (' || f.\`ticker\` || ').' || CHAR(10) ||
'Accession: ' || f.\`accession_number\` || CHAR(10) || CHAR(10) ||
COALESCE(json_extract(f.\`analysis\`, '$.text'), json_extract(f.\`analysis\`, '$.legacyInsights')),
COALESCE(json_extract(f.\`analysis\`, '$.text'), json_extract(f.\`analysis\`, '$.legacyInsights')),
'private',
NULL,
json_object(
'provider', json_extract(f.\`analysis\`, '$.provider'),
'model', json_extract(f.\`analysis\`, '$.model'),
'filingType', f.\`filing_type\`,
'filingDate', f.\`filing_date\`
),
f.\`created_at\`,
f.\`updated_at\`
FROM \`filing\` f
JOIN \`watchlist_item\` w
ON w.\`ticker\` = f.\`ticker\`
WHERE f.\`analysis\` IS NOT NULL
AND TRIM(COALESCE(json_extract(f.\`analysis\`, '$.text'), json_extract(f.\`analysis\`, '$.legacyInsights'), '')) <> ''
AND NOT EXISTS (
SELECT 1
FROM \`research_artifact\` a
WHERE a.\`user_id\` = w.\`user_id\`
AND a.\`ticker\` = f.\`ticker\`
AND a.\`accession_number\` = f.\`accession_number\`
AND a.\`kind\` = 'ai_report'
);
`);
client.exec('DELETE FROM `research_artifact_fts`;');
client.exec(`
INSERT INTO \`research_artifact_fts\` (
\`artifact_id\`,
\`user_id\`,
\`ticker\`,
\`title\`,
\`summary\`,
\`body_markdown\`,
\`search_text\`,
\`tags_text\`
)
SELECT
\`id\`,
\`user_id\`,
\`ticker\`,
COALESCE(\`title\`, ''),
COALESCE(\`summary\`, ''),
COALESCE(\`body_markdown\`, ''),
COALESCE(\`search_text\`, ''),
CASE
WHEN \`tags\` IS NULL OR TRIM(\`tags\`) = '' THEN ''
ELSE REPLACE(REPLACE(REPLACE(\`tags\`, '[', ''), ']', ''), '\"', '')
END
FROM \`research_artifact\`;
`);
}
function ensureLocalSqliteSchema(client: Database) {
const missingBaseSchema = [
'filing',
'watchlist_item',
'holding',
'task_run',
'portfolio_insight'
].some((tableName) => !hasTable(client, tableName));
if (missingBaseSchema) {
applyBaseSchemaCompat(client);
}
if (!hasTable(client, 'user')) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`user\` (
\`id\` text PRIMARY KEY NOT NULL,
\`name\` text NOT NULL,
\`email\` text NOT NULL,
\`emailVerified\` integer NOT NULL DEFAULT 0,
\`image\` text,
\`createdAt\` integer NOT NULL,
\`updatedAt\` integer NOT NULL,
\`role\` text,
\`banned\` integer DEFAULT 0,
\`banReason\` text,
\`banExpires\` integer
);
`);
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `user_email_uidx` ON `user` (`email`);');
}
if (!hasTable(client, 'organization')) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`organization\` (
\`id\` text PRIMARY KEY NOT NULL,
\`name\` text NOT NULL,
\`slug\` text NOT NULL,
\`logo\` text,
\`createdAt\` integer NOT NULL,
\`metadata\` text
);
`);
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `organization_slug_uidx` ON `organization` (`slug`);');
}
if (!hasTable(client, 'filing_statement_snapshot')) {
applySqlFile(client, '0001_glossy_statement_snapshots.sql');
}
if (hasTable(client, 'task_run')) {
const missingTaskColumns: Array<{ name: string; sql: string }> = [
{ name: 'stage', sql: "ALTER TABLE `task_run` ADD `stage` text NOT NULL DEFAULT 'queued';" },
{ name: 'stage_detail', sql: 'ALTER TABLE `task_run` ADD `stage_detail` text;' },
{ name: 'stage_context', sql: 'ALTER TABLE `task_run` ADD `stage_context` text;' },
{ name: 'resource_key', sql: 'ALTER TABLE `task_run` ADD `resource_key` text;' },
{ name: 'notification_read_at', sql: 'ALTER TABLE `task_run` ADD `notification_read_at` text;' },
{ name: 'notification_silenced_at', sql: 'ALTER TABLE `task_run` ADD `notification_silenced_at` text;' }
];
for (const column of missingTaskColumns) {
if (!hasColumn(client, 'task_run', column.name)) {
client.exec(column.sql);
}
}
}
if (!hasTable(client, 'task_stage_event')) {
applySqlFile(client, '0003_task_stage_event_timeline.sql');
}
if (hasTable(client, 'task_stage_event') && !hasColumn(client, 'task_stage_event', 'stage_context')) {
client.exec('ALTER TABLE `task_stage_event` ADD `stage_context` text;');
}
client.exec('CREATE INDEX IF NOT EXISTS `task_user_updated_idx` ON `task_run` (`user_id`, `updated_at`);');
if (hasTable(client, 'watchlist_item')) {
const missingWatchlistColumns: Array<{ name: string; sql: string }> = [
{ name: 'category', sql: 'ALTER TABLE `watchlist_item` ADD `category` text;' },
{ name: 'tags', sql: 'ALTER TABLE `watchlist_item` ADD `tags` text;' },
{ name: 'status', sql: "ALTER TABLE `watchlist_item` ADD `status` text NOT NULL DEFAULT 'backlog';" },
{ name: 'priority', sql: "ALTER TABLE `watchlist_item` ADD `priority` text NOT NULL DEFAULT 'medium';" },
{ name: 'updated_at', sql: "ALTER TABLE `watchlist_item` ADD `updated_at` text NOT NULL DEFAULT '';" },
{ name: 'last_reviewed_at', sql: 'ALTER TABLE `watchlist_item` ADD `last_reviewed_at` text;' }
];
for (const column of missingWatchlistColumns) {
if (!hasColumn(client, 'watchlist_item', column.name)) {
client.exec(column.sql);
}
}
client.exec(`
UPDATE \`watchlist_item\`
SET
\`status\` = CASE
WHEN \`status\` IS NULL OR TRIM(\`status\`) = '' THEN 'backlog'
ELSE \`status\`
END,
\`priority\` = CASE
WHEN \`priority\` IS NULL OR TRIM(\`priority\`) = '' THEN 'medium'
ELSE \`priority\`
END,
\`updated_at\` = CASE
WHEN \`updated_at\` IS NULL OR TRIM(\`updated_at\`) = '' THEN COALESCE(NULLIF(\`created_at\`, ''), CURRENT_TIMESTAMP)
ELSE \`updated_at\`
END;
`);
client.exec('CREATE INDEX IF NOT EXISTS `watchlist_user_updated_idx` ON `watchlist_item` (`user_id`, `updated_at`);');
}
if (hasTable(client, 'holding') && !hasColumn(client, 'holding', 'company_name')) {
client.exec('ALTER TABLE `holding` ADD `company_name` text;');
}
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
applySqlFile(client, '0005_financial_taxonomy_v3.sql');
}
if (!hasTable(client, 'company_financial_bundle')) {
applySqlFile(client, '0007_company_financial_bundles.sql');
}
if (!hasTable(client, 'company_overview_cache')) {
applySqlFile(client, '0012_company_overview_cache.sql');
}
if (!hasTable(client, 'research_journal_entry')) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_journal_entry\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
\`user_id\` text NOT NULL,
\`ticker\` text NOT NULL,
\`accession_number\` text,
\`entry_type\` text NOT NULL,
\`title\` text,
\`body_markdown\` text NOT NULL,
\`metadata\` text,
\`created_at\` text NOT NULL,
\`updated_at\` text NOT NULL,
FOREIGN KEY (\`user_id\`) REFERENCES \`user\`(\`id\`) ON UPDATE no action ON DELETE cascade
);
`);
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);');
}
if (!hasTable(client, 'search_document')) {
applySqlFile(client, '0008_search_rag.sql');
}
ensureResearchWorkspaceSchema(client);
}
function ensureSearchVirtualTables(client: Database) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
\`chunk_text\`,
\`citation_label\`,
\`heading_path\`,
\`chunk_id\` UNINDEXED,
\`document_id\` UNINDEXED,
\`chunk_index\` UNINDEXED,
\`scope\` UNINDEXED,
\`user_id\` UNINDEXED,
\`source_kind\` UNINDEXED,
\`ticker\` UNINDEXED,
\`accession_number\` UNINDEXED,
\`filing_date\` UNINDEXED
);
`);
if (isVectorExtensionLoaded(client)) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
\`chunk_id\` integer PRIMARY KEY,
\`embedding\` float[256],
\`scope\` text,
\`user_id\` text,
\`source_kind\` text,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
+\`document_id\` integer,
+\`chunk_index\` integer,
+\`citation_label\` text
);
`);
return;
}
client.exec(`
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
\`chunk_id\` integer PRIMARY KEY NOT NULL,
\`embedding\` text NOT NULL,
\`scope\` text NOT NULL,
\`user_id\` text,
\`source_kind\` text NOT NULL,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
\`document_id\` integer NOT NULL,
\`chunk_index\` integer NOT NULL,
\`citation_label\` text NOT NULL
);
`);
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
}
export function getSqliteClient() {
if (!globalThis.__fiscalSqliteClient) {
configureCustomSqliteRuntime();
const databasePath = getDatabasePath();
if (databasePath !== ':memory:') {
mkdirSync(dirname(databasePath), { recursive: true });
}
const client = new Database(databasePath, { create: true });
client.exec('PRAGMA foreign_keys = ON;');
client.exec('PRAGMA journal_mode = WAL;');
client.exec('PRAGMA busy_timeout = 5000;');
loadSqliteExtensions(client);
ensureLocalSqliteSchema(client);
ensureFinancialIngestionSchemaHealthy(client, {
mode: resolveFinancialSchemaRepairMode(process.env.FINANCIAL_SCHEMA_REPAIR_MODE)
});
ensureSearchVirtualTables(client);
globalThis.__fiscalSqliteClient = client;
}
return globalThis.__fiscalSqliteClient;
}
function createDb() {
return drizzle(getSqliteClient(), { schema });
}
export const db = globalThis.__fiscalDrizzleDb ?? createDb();
if (!globalThis.__fiscalDrizzleDb) {
globalThis.__fiscalDrizzleDb = db;
}
export const __dbInternals = {
configureCustomSqliteRuntime,
ensureLocalSqliteSchema,
ensureSearchVirtualTables,
getDatabasePath,
hasColumn,
hasTable,
isVectorExtensionLoaded,
loadSqliteExtensions
};