Files
Neon-Desk/lib/server/db/index.ts
francy51 7a42d73a48 Fix filing taxonomy schema mismatch by adding explicit column verification
The filing_taxonomy_snapshot table was missing parser_engine and related columns
on databases created before the taxonomy surface sidecar migration. This caused
filing sync workflows to fail with 'table has no column named parser_engine'.

Changes:
- Add TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS constant for required columns
- Add verifyCriticalSchema() to fail fast at startup if schema is incompatible
- Reorder ensureTaxonomySnapshotCompat to check table existence before column ops
- Add explicit column verification after ALTER TABLE attempts
- Add regression tests for missing column detection

Fixes #2
2026-03-15 13:15:01 -04:00

205 lines
5.7 KiB
TypeScript

import { mkdirSync } from 'node:fs';
import { dirname } from 'node:path';
import { Database } from 'bun:sqlite';
import { drizzle } from 'drizzle-orm/bun-sqlite';
import { load as loadSqliteVec } from 'sqlite-vec';
import {
ensureFinancialIngestionSchemaHealthy,
resolveFinancialSchemaRepairMode
} from './financial-ingestion-schema';
import { schema } from './schema';
import {
ensureLocalSqliteSchema,
hasColumn,
hasTable,
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS
} from './sqlite-schema-compat';
type AppDrizzleDb = ReturnType<typeof createDb>;
declare global {
// eslint-disable-next-line no-var
var __fiscalSqliteClient: Database | undefined;
// eslint-disable-next-line no-var
var __fiscalDrizzleDb: AppDrizzleDb | undefined;
}
function getDatabasePath() {
const raw = process.env.DATABASE_URL?.trim() || 'file:data/fiscal.sqlite';
let databasePath = raw.startsWith('file:') ? raw.slice(5) : raw;
if (databasePath.startsWith('///')) {
databasePath = databasePath.slice(2);
}
if (!databasePath) {
throw new Error('DATABASE_URL must point to a SQLite file path.');
}
return databasePath;
}
let customSqliteConfigured = false;
const vectorExtensionStatus = new WeakMap<Database, boolean>();
function configureCustomSqliteRuntime() {
if (customSqliteConfigured) {
return;
}
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
if (process.platform === 'darwin' && customSqlitePath) {
Database.setCustomSQLite(customSqlitePath);
}
customSqliteConfigured = true;
}
function loadSqliteExtensions(client: Database) {
try {
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
if (customVectorExtensionPath) {
client.loadExtension(customVectorExtensionPath);
} else {
loadSqliteVec(client);
}
vectorExtensionStatus.set(client, true);
} catch (error) {
vectorExtensionStatus.set(client, false);
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
}
}
function isVectorExtensionLoaded(client: Database) {
return vectorExtensionStatus.get(client) ?? false;
}
function ensureSearchVirtualTables(client: Database) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
\`chunk_text\`,
\`citation_label\`,
\`heading_path\`,
\`chunk_id\` UNINDEXED,
\`document_id\` UNINDEXED,
\`chunk_index\` UNINDEXED,
\`scope\` UNINDEXED,
\`user_id\` UNINDEXED,
\`source_kind\` UNINDEXED,
\`ticker\` UNINDEXED,
\`accession_number\` UNINDEXED,
\`filing_date\` UNINDEXED
);
`);
if (isVectorExtensionLoaded(client)) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
\`chunk_id\` integer PRIMARY KEY,
\`embedding\` float[256],
\`scope\` text,
\`user_id\` text,
\`source_kind\` text,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
+\`document_id\` integer,
+\`chunk_index\` integer,
+\`citation_label\` text
);
`);
return;
}
client.exec(`
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
\`chunk_id\` integer PRIMARY KEY NOT NULL,
\`embedding\` text NOT NULL,
\`scope\` text NOT NULL,
\`user_id\` text,
\`source_kind\` text NOT NULL,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
\`document_id\` integer NOT NULL,
\`chunk_index\` integer NOT NULL,
\`citation_label\` text NOT NULL
);
`);
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
}
function verifyCriticalSchema(client: Database) {
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
return;
}
const missingColumns: string[] = [];
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
if (!hasColumn(client, 'filing_taxonomy_snapshot', columnName)) {
missingColumns.push(columnName);
}
}
if (missingColumns.length > 0) {
throw new Error(
`[db] CRITICAL: Database schema is incompatible. ` +
`filing_taxonomy_snapshot is missing columns: ${missingColumns.join(', ')}. ` +
`Delete the database file and restart to rebuild schema.`
);
}
}
export function getSqliteClient() {
if (!globalThis.__fiscalSqliteClient) {
configureCustomSqliteRuntime();
const databasePath = getDatabasePath();
if (databasePath !== ':memory:') {
mkdirSync(dirname(databasePath), { recursive: true });
}
const client = new Database(databasePath, { create: true });
client.exec('PRAGMA foreign_keys = ON;');
client.exec('PRAGMA journal_mode = WAL;');
client.exec('PRAGMA busy_timeout = 5000;');
loadSqliteExtensions(client);
ensureLocalSqliteSchema(client);
verifyCriticalSchema(client);
ensureFinancialIngestionSchemaHealthy(client, {
mode: resolveFinancialSchemaRepairMode(process.env.FINANCIAL_SCHEMA_REPAIR_MODE)
});
ensureSearchVirtualTables(client);
globalThis.__fiscalSqliteClient = client;
}
return globalThis.__fiscalSqliteClient;
}
function createDb() {
return drizzle(getSqliteClient(), { schema });
}
export const db = globalThis.__fiscalDrizzleDb ?? createDb();
if (!globalThis.__fiscalDrizzleDb) {
globalThis.__fiscalDrizzleDb = db;
}
export const __dbInternals = {
configureCustomSqliteRuntime,
ensureLocalSqliteSchema,
ensureSearchVirtualTables,
getDatabasePath,
hasColumn,
hasTable,
isVectorExtensionLoaded,
loadSqliteExtensions,
verifyCriticalSchema
};