Fix filing taxonomy schema mismatch by adding explicit column verification

The filing_taxonomy_snapshot table was missing parser_engine and related columns
on databases created before the taxonomy surface sidecar migration. This caused
filing sync workflows to fail with 'table has no column named parser_engine'.

Changes:
- Add TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS constant for required columns
- Add verifyCriticalSchema() to fail fast at startup if schema is incompatible
- Reorder ensureTaxonomySnapshotCompat to check table existence before column ops
- Add explicit column verification after ALTER TABLE attempts
- Add regression tests for missing column detection

Fixes #2
This commit is contained in:
2026-03-15 13:15:01 -04:00
parent 529437c760
commit 7a42d73a48
3 changed files with 88 additions and 5 deletions

View File

@@ -199,4 +199,35 @@ describe('sqlite schema compatibility bootstrap', () => {
client.close();
});
it('throws on missing parser_engine column when verifyCriticalSchema is called', () => {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0005_financial_taxonomy_v3.sql');
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'parser_engine')).toBe(false);
expect(() => __dbInternals.verifyCriticalSchema(client)).toThrow(
/filing_taxonomy_snapshot is missing columns: parser_engine/
);
client.close();
});
it('verifyCriticalSchema passes when all required columns exist', () => {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0005_financial_taxonomy_v3.sql');
__dbInternals.ensureLocalSqliteSchema(client);
expect(() => __dbInternals.verifyCriticalSchema(client)).not.toThrow();
client.close();
});
});

View File

@@ -11,7 +11,8 @@ import { schema } from './schema';
import {
ensureLocalSqliteSchema,
hasColumn,
hasTable
hasTable,
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS
} from './sqlite-schema-compat';
type AppDrizzleDb = ReturnType<typeof createDb>;
@@ -132,6 +133,27 @@ function ensureSearchVirtualTables(client: Database) {
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
}
function verifyCriticalSchema(client: Database) {
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
return;
}
const missingColumns: string[] = [];
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
if (!hasColumn(client, 'filing_taxonomy_snapshot', columnName)) {
missingColumns.push(columnName);
}
}
if (missingColumns.length > 0) {
throw new Error(
`[db] CRITICAL: Database schema is incompatible. ` +
`filing_taxonomy_snapshot is missing columns: ${missingColumns.join(', ')}. ` +
`Delete the database file and restart to rebuild schema.`
);
}
}
export function getSqliteClient() {
if (!globalThis.__fiscalSqliteClient) {
configureCustomSqliteRuntime();
@@ -147,6 +169,7 @@ export function getSqliteClient() {
client.exec('PRAGMA busy_timeout = 5000;');
loadSqliteExtensions(client);
ensureLocalSqliteSchema(client);
verifyCriticalSchema(client);
ensureFinancialIngestionSchemaHealthy(client, {
mode: resolveFinancialSchemaRepairMode(process.env.FINANCIAL_SCHEMA_REPAIR_MODE)
});
@@ -176,5 +199,6 @@ export const __dbInternals = {
hasColumn,
hasTable,
isVectorExtensionLoaded,
loadSqliteExtensions
loadSqliteExtensions,
verifyCriticalSchema
};

View File

@@ -296,7 +296,23 @@ function ensureResearchWorkspaceSchema(client: Database) {
`);
}
const TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS = [
'parser_engine',
'parser_version',
'taxonomy_regime',
'fiscal_pack',
'faithful_rows',
'surface_rows',
'detail_rows',
'kpi_rows',
'normalization_summary'
] as const;
function ensureTaxonomySnapshotCompat(client: Database) {
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
return;
}
ensureColumns(client, 'filing_taxonomy_snapshot', [
{ name: 'parser_engine', sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_engine` text NOT NULL DEFAULT 'fiscal-xbrl';" },
{ name: 'parser_version', sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_version` text NOT NULL DEFAULT 'unknown';" },
@@ -309,8 +325,13 @@ function ensureTaxonomySnapshotCompat(client: Database) {
{ name: 'normalization_summary', sql: 'ALTER TABLE `filing_taxonomy_snapshot` ADD `normalization_summary` text;' }
]);
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
return;
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
if (!hasColumn(client, 'filing_taxonomy_snapshot', columnName)) {
throw new Error(
`Schema compat failed: filing_taxonomy_snapshot missing required column '${columnName}'. ` +
`Delete the database file and restart to rebuild schema.`
);
}
}
client.exec(`
@@ -451,6 +472,10 @@ export function ensureLocalSqliteSchema(client: Database) {
client.exec('CREATE INDEX IF NOT EXISTS `task_user_updated_idx` ON `task_run` (`user_id`, `updated_at`);');
client.exec(`CREATE UNIQUE INDEX IF NOT EXISTS task_active_resource_uidx
ON task_run (user_id, task_type, resource_key)
WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
ensureColumns(client, 'watchlist_item', [
{ name: 'category', sql: 'ALTER TABLE `watchlist_item` ADD `category` text;' },
{ name: 'tags', sql: 'ALTER TABLE `watchlist_item` ADD `tags` text;' },
@@ -529,5 +554,8 @@ export const __sqliteSchemaCompatInternals = {
applyBaseSchemaCompat,
applySqlFile,
hasColumn,
hasTable
hasTable,
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS
};
export { TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS };