- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI - add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading - auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
221 lines
6.1 KiB
TypeScript
221 lines
6.1 KiB
TypeScript
import { mkdirSync } from "node:fs";
|
|
import { dirname } from "node:path";
|
|
import { Database } from "bun:sqlite";
|
|
import { drizzle } from "drizzle-orm/bun-sqlite";
|
|
import { load as loadSqliteVec } from "sqlite-vec";
|
|
import {
|
|
ensureFinancialIngestionSchemaHealthy,
|
|
resolveFinancialSchemaRepairMode,
|
|
} from "./financial-ingestion-schema";
|
|
import { schema } from "./schema";
|
|
import {
|
|
ensureLocalSqliteSchema,
|
|
hasColumn,
|
|
hasTable,
|
|
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS,
|
|
} from "./sqlite-schema-compat";
|
|
|
|
type AppDrizzleDb = ReturnType<typeof createDb>;
|
|
|
|
declare global {
|
|
// eslint-disable-next-line no-var
|
|
var __fiscalSqliteClient: Database | undefined;
|
|
// eslint-disable-next-line no-var
|
|
var __fiscalDrizzleDb: AppDrizzleDb | undefined;
|
|
}
|
|
|
|
function getDatabasePath() {
|
|
const raw = process.env.DATABASE_URL?.trim() || "file:data/fiscal.sqlite";
|
|
let databasePath = raw.startsWith("file:") ? raw.slice(5) : raw;
|
|
|
|
if (databasePath.startsWith("///")) {
|
|
databasePath = databasePath.slice(2);
|
|
}
|
|
|
|
if (!databasePath) {
|
|
throw new Error("DATABASE_URL must point to a SQLite file path.");
|
|
}
|
|
|
|
return databasePath;
|
|
}
|
|
|
|
let customSqliteConfigured = false;
|
|
const vectorExtensionStatus = new WeakMap<Database, boolean>();
|
|
|
|
function configureCustomSqliteRuntime() {
|
|
if (customSqliteConfigured) {
|
|
return;
|
|
}
|
|
|
|
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
|
|
if (process.platform === "darwin" && customSqlitePath) {
|
|
Database.setCustomSQLite(customSqlitePath);
|
|
}
|
|
|
|
customSqliteConfigured = true;
|
|
}
|
|
|
|
function loadSqliteExtensions(client: Database) {
|
|
const customVectorExtensionPath =
|
|
process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
|
|
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
|
|
|
|
try {
|
|
if (customVectorExtensionPath) {
|
|
client.loadExtension(customVectorExtensionPath);
|
|
} else {
|
|
loadSqliteVec(client);
|
|
}
|
|
|
|
vectorExtensionStatus.set(client, true);
|
|
} catch (error) {
|
|
vectorExtensionStatus.set(client, false);
|
|
|
|
const reason =
|
|
error instanceof Error ? error.message : "Unknown sqlite extension error";
|
|
if (customSqlitePath || customVectorExtensionPath) {
|
|
console.warn(
|
|
`[sqlite] sqlite-vec native extension load failed (SQLITE_CUSTOM_LIB_PATH=${customSqlitePath ?? "unset"}, SQLITE_VEC_EXTENSION_PATH=${customVectorExtensionPath ?? "package default"}). Falling back to table-backed vector storage: ${reason}`,
|
|
);
|
|
return;
|
|
}
|
|
|
|
console.warn(
|
|
`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
function isVectorExtensionLoaded(client: Database) {
|
|
return vectorExtensionStatus.get(client) ?? false;
|
|
}
|
|
|
|
function ensureSearchVirtualTables(client: Database) {
|
|
client.exec(`
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_fts\` USING fts5(
|
|
\`chunk_text\`,
|
|
\`citation_label\`,
|
|
\`heading_path\`,
|
|
\`chunk_id\` UNINDEXED,
|
|
\`document_id\` UNINDEXED,
|
|
\`chunk_index\` UNINDEXED,
|
|
\`scope\` UNINDEXED,
|
|
\`user_id\` UNINDEXED,
|
|
\`source_kind\` UNINDEXED,
|
|
\`ticker\` UNINDEXED,
|
|
\`accession_number\` UNINDEXED,
|
|
\`filing_date\` UNINDEXED
|
|
);
|
|
`);
|
|
|
|
if (isVectorExtensionLoaded(client)) {
|
|
client.exec(`
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS search_chunk_vec USING vec0(
|
|
chunk_id integer PRIMARY KEY,
|
|
embedding float[256],
|
|
scope text,
|
|
user_id text,
|
|
source_kind text,
|
|
ticker text,
|
|
accession_number text,
|
|
filing_date text,
|
|
+document_id integer,
|
|
+chunk_index integer,
|
|
+citation_label text
|
|
);
|
|
`);
|
|
return;
|
|
}
|
|
|
|
client.exec(`
|
|
CREATE TABLE IF NOT EXISTS \`search_chunk_vec\` (
|
|
\`chunk_id\` integer PRIMARY KEY NOT NULL,
|
|
\`embedding\` text NOT NULL,
|
|
\`scope\` text NOT NULL,
|
|
\`user_id\` text,
|
|
\`source_kind\` text NOT NULL,
|
|
\`ticker\` text,
|
|
\`accession_number\` text,
|
|
\`filing_date\` text,
|
|
\`document_id\` integer NOT NULL,
|
|
\`chunk_index\` integer NOT NULL,
|
|
\`citation_label\` text NOT NULL
|
|
);
|
|
`);
|
|
client.exec(
|
|
"CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);",
|
|
);
|
|
}
|
|
|
|
function verifyCriticalSchema(client: Database) {
|
|
if (!hasTable(client, "filing_taxonomy_snapshot")) {
|
|
return;
|
|
}
|
|
|
|
const missingColumns: string[] = [];
|
|
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
|
|
if (!hasColumn(client, "filing_taxonomy_snapshot", columnName)) {
|
|
missingColumns.push(columnName);
|
|
}
|
|
}
|
|
|
|
if (missingColumns.length > 0) {
|
|
throw new Error(
|
|
`[db] CRITICAL: Database schema is incompatible. ` +
|
|
`filing_taxonomy_snapshot is missing columns: ${missingColumns.join(", ")}. ` +
|
|
`Delete the database file and restart to rebuild schema.`,
|
|
);
|
|
}
|
|
}
|
|
|
|
export function getSqliteClient() {
|
|
if (!globalThis.__fiscalSqliteClient) {
|
|
configureCustomSqliteRuntime();
|
|
const databasePath = getDatabasePath();
|
|
|
|
if (databasePath !== ":memory:") {
|
|
mkdirSync(dirname(databasePath), { recursive: true });
|
|
}
|
|
|
|
const client = new Database(databasePath, { create: true });
|
|
client.exec("PRAGMA foreign_keys = ON;");
|
|
client.exec("PRAGMA journal_mode = WAL;");
|
|
client.exec("PRAGMA busy_timeout = 5000;");
|
|
loadSqliteExtensions(client);
|
|
ensureLocalSqliteSchema(client);
|
|
verifyCriticalSchema(client);
|
|
ensureFinancialIngestionSchemaHealthy(client, {
|
|
mode: resolveFinancialSchemaRepairMode(
|
|
process.env.FINANCIAL_SCHEMA_REPAIR_MODE,
|
|
),
|
|
});
|
|
ensureSearchVirtualTables(client);
|
|
|
|
globalThis.__fiscalSqliteClient = client;
|
|
}
|
|
|
|
return globalThis.__fiscalSqliteClient;
|
|
}
|
|
|
|
function createDb() {
|
|
return drizzle(getSqliteClient(), { schema });
|
|
}
|
|
|
|
export const db = globalThis.__fiscalDrizzleDb ?? createDb();
|
|
|
|
if (!globalThis.__fiscalDrizzleDb) {
|
|
globalThis.__fiscalDrizzleDb = db;
|
|
}
|
|
|
|
export const __dbInternals = {
|
|
configureCustomSqliteRuntime,
|
|
ensureLocalSqliteSchema,
|
|
ensureSearchVirtualTables,
|
|
getDatabasePath,
|
|
hasColumn,
|
|
hasTable,
|
|
isVectorExtensionLoaded,
|
|
loadSqliteExtensions,
|
|
verifyCriticalSchema,
|
|
};
|