The filing_taxonomy_snapshot table was missing parser_engine and related columns on databases created before the taxonomy surface sidecar migration. This caused filing sync workflows to fail with 'table has no column named parser_engine'. Changes: - Add TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS constant for required columns - Add verifyCriticalSchema() to fail fast at startup if schema is incompatible - Reorder ensureTaxonomySnapshotCompat to check table existence before column ops - Add explicit column verification after ALTER TABLE attempts - Add regression tests for missing column detection Fixes #2
205 lines
5.7 KiB
TypeScript
205 lines
5.7 KiB
TypeScript
import { mkdirSync } from 'node:fs';
|
|
import { dirname } from 'node:path';
|
|
import { Database } from 'bun:sqlite';
|
|
import { drizzle } from 'drizzle-orm/bun-sqlite';
|
|
import { load as loadSqliteVec } from 'sqlite-vec';
|
|
import {
|
|
ensureFinancialIngestionSchemaHealthy,
|
|
resolveFinancialSchemaRepairMode
|
|
} from './financial-ingestion-schema';
|
|
import { schema } from './schema';
|
|
import {
|
|
ensureLocalSqliteSchema,
|
|
hasColumn,
|
|
hasTable,
|
|
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS
|
|
} from './sqlite-schema-compat';
|
|
|
|
type AppDrizzleDb = ReturnType<typeof createDb>;
|
|
|
|
declare global {
|
|
// eslint-disable-next-line no-var
|
|
var __fiscalSqliteClient: Database | undefined;
|
|
// eslint-disable-next-line no-var
|
|
var __fiscalDrizzleDb: AppDrizzleDb | undefined;
|
|
}
|
|
|
|
function getDatabasePath() {
|
|
const raw = process.env.DATABASE_URL?.trim() || 'file:data/fiscal.sqlite';
|
|
let databasePath = raw.startsWith('file:') ? raw.slice(5) : raw;
|
|
|
|
if (databasePath.startsWith('///')) {
|
|
databasePath = databasePath.slice(2);
|
|
}
|
|
|
|
if (!databasePath) {
|
|
throw new Error('DATABASE_URL must point to a SQLite file path.');
|
|
}
|
|
|
|
return databasePath;
|
|
}
|
|
|
|
let customSqliteConfigured = false;
|
|
const vectorExtensionStatus = new WeakMap<Database, boolean>();
|
|
|
|
function configureCustomSqliteRuntime() {
|
|
if (customSqliteConfigured) {
|
|
return;
|
|
}
|
|
|
|
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
|
|
if (process.platform === 'darwin' && customSqlitePath) {
|
|
Database.setCustomSQLite(customSqlitePath);
|
|
}
|
|
|
|
customSqliteConfigured = true;
|
|
}
|
|
|
|
function loadSqliteExtensions(client: Database) {
|
|
try {
|
|
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
|
|
|
|
if (customVectorExtensionPath) {
|
|
client.loadExtension(customVectorExtensionPath);
|
|
} else {
|
|
loadSqliteVec(client);
|
|
}
|
|
|
|
vectorExtensionStatus.set(client, true);
|
|
} catch (error) {
|
|
vectorExtensionStatus.set(client, false);
|
|
|
|
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
|
|
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
|
|
}
|
|
}
|
|
|
|
function isVectorExtensionLoaded(client: Database) {
|
|
return vectorExtensionStatus.get(client) ?? false;
|
|
}
|
|
|
|
function ensureSearchVirtualTables(client: Database) {
|
|
client.exec(`
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
|
|
\`chunk_text\`,
|
|
\`citation_label\`,
|
|
\`heading_path\`,
|
|
\`chunk_id\` UNINDEXED,
|
|
\`document_id\` UNINDEXED,
|
|
\`chunk_index\` UNINDEXED,
|
|
\`scope\` UNINDEXED,
|
|
\`user_id\` UNINDEXED,
|
|
\`source_kind\` UNINDEXED,
|
|
\`ticker\` UNINDEXED,
|
|
\`accession_number\` UNINDEXED,
|
|
\`filing_date\` UNINDEXED
|
|
);
|
|
`);
|
|
|
|
if (isVectorExtensionLoaded(client)) {
|
|
client.exec(`
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
|
|
\`chunk_id\` integer PRIMARY KEY,
|
|
\`embedding\` float[256],
|
|
\`scope\` text,
|
|
\`user_id\` text,
|
|
\`source_kind\` text,
|
|
\`ticker\` text,
|
|
\`accession_number\` text,
|
|
\`filing_date\` text,
|
|
+\`document_id\` integer,
|
|
+\`chunk_index\` integer,
|
|
+\`citation_label\` text
|
|
);
|
|
`);
|
|
return;
|
|
}
|
|
|
|
client.exec(`
|
|
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
|
|
\`chunk_id\` integer PRIMARY KEY NOT NULL,
|
|
\`embedding\` text NOT NULL,
|
|
\`scope\` text NOT NULL,
|
|
\`user_id\` text,
|
|
\`source_kind\` text NOT NULL,
|
|
\`ticker\` text,
|
|
\`accession_number\` text,
|
|
\`filing_date\` text,
|
|
\`document_id\` integer NOT NULL,
|
|
\`chunk_index\` integer NOT NULL,
|
|
\`citation_label\` text NOT NULL
|
|
);
|
|
`);
|
|
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
|
|
}
|
|
|
|
function verifyCriticalSchema(client: Database) {
|
|
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
|
|
return;
|
|
}
|
|
|
|
const missingColumns: string[] = [];
|
|
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
|
|
if (!hasColumn(client, 'filing_taxonomy_snapshot', columnName)) {
|
|
missingColumns.push(columnName);
|
|
}
|
|
}
|
|
|
|
if (missingColumns.length > 0) {
|
|
throw new Error(
|
|
`[db] CRITICAL: Database schema is incompatible. ` +
|
|
`filing_taxonomy_snapshot is missing columns: ${missingColumns.join(', ')}. ` +
|
|
`Delete the database file and restart to rebuild schema.`
|
|
);
|
|
}
|
|
}
|
|
|
|
export function getSqliteClient() {
|
|
if (!globalThis.__fiscalSqliteClient) {
|
|
configureCustomSqliteRuntime();
|
|
const databasePath = getDatabasePath();
|
|
|
|
if (databasePath !== ':memory:') {
|
|
mkdirSync(dirname(databasePath), { recursive: true });
|
|
}
|
|
|
|
const client = new Database(databasePath, { create: true });
|
|
client.exec('PRAGMA foreign_keys = ON;');
|
|
client.exec('PRAGMA journal_mode = WAL;');
|
|
client.exec('PRAGMA busy_timeout = 5000;');
|
|
loadSqliteExtensions(client);
|
|
ensureLocalSqliteSchema(client);
|
|
verifyCriticalSchema(client);
|
|
ensureFinancialIngestionSchemaHealthy(client, {
|
|
mode: resolveFinancialSchemaRepairMode(process.env.FINANCIAL_SCHEMA_REPAIR_MODE)
|
|
});
|
|
ensureSearchVirtualTables(client);
|
|
|
|
globalThis.__fiscalSqliteClient = client;
|
|
}
|
|
|
|
return globalThis.__fiscalSqliteClient;
|
|
}
|
|
|
|
function createDb() {
|
|
return drizzle(getSqliteClient(), { schema });
|
|
}
|
|
|
|
export const db = globalThis.__fiscalDrizzleDb ?? createDb();
|
|
|
|
if (!globalThis.__fiscalDrizzleDb) {
|
|
globalThis.__fiscalDrizzleDb = db;
|
|
}
|
|
|
|
export const __dbInternals = {
|
|
configureCustomSqliteRuntime,
|
|
ensureLocalSqliteSchema,
|
|
ensureSearchVirtualTables,
|
|
getDatabasePath,
|
|
hasColumn,
|
|
hasTable,
|
|
isVectorExtensionLoaded,
|
|
loadSqliteExtensions,
|
|
verifyCriticalSchema
|
|
};
|