181 lines
7.6 KiB
TypeScript
181 lines
7.6 KiB
TypeScript
import { describe, expect, it } from 'bun:test';
|
|
import { Database } from 'bun:sqlite';
|
|
import {
|
|
ensureFinancialIngestionSchemaHealthy,
|
|
inspectFinancialIngestionSchema,
|
|
withFinancialIngestionSchemaRetry
|
|
} from './financial-ingestion-schema';
|
|
|
|
function createBundleTable(client: Database) {
|
|
client.exec(`
|
|
CREATE TABLE \`company_financial_bundle\` (
|
|
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
|
\`ticker\` text NOT NULL,
|
|
\`surface_kind\` text NOT NULL,
|
|
\`cadence\` text NOT NULL,
|
|
\`bundle_version\` integer NOT NULL,
|
|
\`source_snapshot_ids\` text NOT NULL,
|
|
\`source_signature\` text NOT NULL,
|
|
\`payload\` text NOT NULL,
|
|
\`created_at\` text NOT NULL,
|
|
\`updated_at\` text NOT NULL
|
|
);
|
|
`);
|
|
}
|
|
|
|
function createSnapshotTable(client: Database) {
|
|
client.exec(`
|
|
CREATE TABLE \`filing_taxonomy_snapshot\` (
|
|
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
|
\`filing_id\` integer NOT NULL,
|
|
\`ticker\` text NOT NULL,
|
|
\`filing_date\` text NOT NULL,
|
|
\`filing_type\` text NOT NULL,
|
|
\`parse_status\` text NOT NULL,
|
|
\`updated_at\` text NOT NULL
|
|
);
|
|
`);
|
|
}
|
|
|
|
function createMetricValidationTable(client: Database) {
|
|
client.exec(`
|
|
CREATE TABLE \`filing_taxonomy_metric_validation\` (
|
|
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
|
\`snapshot_id\` integer NOT NULL,
|
|
\`metric_key\` text NOT NULL,
|
|
\`status\` text NOT NULL,
|
|
\`updated_at\` text NOT NULL
|
|
);
|
|
`);
|
|
}
|
|
|
|
function createHealthyIndexes(client: Database) {
|
|
client.exec('CREATE UNIQUE INDEX `company_financial_bundle_uidx` ON `company_financial_bundle` (`ticker`,`surface_kind`,`cadence`);');
|
|
client.exec('CREATE INDEX `company_financial_bundle_ticker_idx` ON `company_financial_bundle` (`ticker`,`updated_at`);');
|
|
client.exec('CREATE UNIQUE INDEX `filing_taxonomy_snapshot_filing_uidx` ON `filing_taxonomy_snapshot` (`filing_id`);');
|
|
client.exec('CREATE INDEX `filing_taxonomy_snapshot_ticker_date_idx` ON `filing_taxonomy_snapshot` (`ticker`,`filing_date`);');
|
|
client.exec('CREATE INDEX `filing_taxonomy_snapshot_status_idx` ON `filing_taxonomy_snapshot` (`parse_status`);');
|
|
client.exec('CREATE UNIQUE INDEX `filing_taxonomy_metric_validation_uidx` ON `filing_taxonomy_metric_validation` (`snapshot_id`,`metric_key`);');
|
|
client.exec('CREATE INDEX `filing_taxonomy_metric_validation_snapshot_idx` ON `filing_taxonomy_metric_validation` (`snapshot_id`);');
|
|
client.exec('CREATE INDEX `filing_taxonomy_metric_validation_status_idx` ON `filing_taxonomy_metric_validation` (`snapshot_id`,`status`);');
|
|
}
|
|
|
|
describe('financial ingestion schema repair', () => {
|
|
it('reports a healthy schema when all critical indexes exist', () => {
|
|
const client = new Database(':memory:');
|
|
try {
|
|
createBundleTable(client);
|
|
createSnapshotTable(client);
|
|
createMetricValidationTable(client);
|
|
createHealthyIndexes(client);
|
|
|
|
const report = inspectFinancialIngestionSchema(client);
|
|
expect(report.ok).toBe(true);
|
|
expect(report.missingIndexes).toEqual([]);
|
|
expect(report.duplicateGroups).toBe(0);
|
|
} finally {
|
|
client.close();
|
|
}
|
|
});
|
|
|
|
it('repairs missing company financial bundle indexes and dedupes rows', () => {
|
|
const client = new Database(':memory:');
|
|
try {
|
|
createBundleTable(client);
|
|
createSnapshotTable(client);
|
|
createMetricValidationTable(client);
|
|
createHealthyIndexes(client);
|
|
client.exec('DROP INDEX `company_financial_bundle_uidx`;');
|
|
client.exec('DROP INDEX `company_financial_bundle_ticker_idx`;');
|
|
client.exec(`
|
|
INSERT INTO \`company_financial_bundle\` (
|
|
\`ticker\`, \`surface_kind\`, \`cadence\`, \`bundle_version\`, \`source_snapshot_ids\`, \`source_signature\`, \`payload\`, \`created_at\`, \`updated_at\`
|
|
) VALUES
|
|
('MSFT', 'income_statement', 'annual', 14, '[]', 'old', '{}', '2026-03-12T10:00:00.000Z', '2026-03-12T10:00:00.000Z'),
|
|
('MSFT', 'income_statement', 'annual', 14, '[]', 'new', '{}', '2026-03-12T11:00:00.000Z', '2026-03-12T11:00:00.000Z');
|
|
`);
|
|
|
|
const result = ensureFinancialIngestionSchemaHealthy(client, { mode: 'auto' });
|
|
const rows = client.query('SELECT `source_signature` FROM `company_financial_bundle`').all() as Array<{ source_signature: string }>;
|
|
const indexes = client.query('PRAGMA index_list(`company_financial_bundle`)').all() as Array<{ name: string }>;
|
|
|
|
expect(result.ok).toBe(true);
|
|
expect(result.mode).toBe('repaired');
|
|
expect(rows).toEqual([{ source_signature: 'new' }]);
|
|
expect(indexes.some((row) => row.name === 'company_financial_bundle_uidx')).toBe(true);
|
|
expect(indexes.some((row) => row.name === 'company_financial_bundle_ticker_idx')).toBe(true);
|
|
} finally {
|
|
client.close();
|
|
}
|
|
});
|
|
|
|
it('dedupes filing taxonomy snapshots and clears bundle cache before recreating indexes', () => {
|
|
const client = new Database(':memory:');
|
|
try {
|
|
createBundleTable(client);
|
|
createSnapshotTable(client);
|
|
createMetricValidationTable(client);
|
|
createHealthyIndexes(client);
|
|
client.exec('DROP INDEX `filing_taxonomy_snapshot_filing_uidx`;');
|
|
client.exec(`
|
|
INSERT INTO \`filing_taxonomy_snapshot\` (
|
|
\`filing_id\`, \`ticker\`, \`filing_date\`, \`filing_type\`, \`parse_status\`, \`updated_at\`
|
|
) VALUES
|
|
(10, 'MSFT', '2026-03-12', '10-K', 'ready', '2026-03-12T10:00:00.000Z'),
|
|
(10, 'MSFT', '2026-03-12', '10-K', 'ready', '2026-03-12T11:00:00.000Z');
|
|
`);
|
|
client.exec(`
|
|
INSERT INTO \`company_financial_bundle\` (
|
|
\`ticker\`, \`surface_kind\`, \`cadence\`, \`bundle_version\`, \`source_snapshot_ids\`, \`source_signature\`, \`payload\`, \`created_at\`, \`updated_at\`
|
|
) VALUES
|
|
('MSFT', 'income_statement', 'annual', 14, '[1,2]', 'cached', '{}', '2026-03-12T11:00:00.000Z', '2026-03-12T11:00:00.000Z');
|
|
`);
|
|
|
|
const result = ensureFinancialIngestionSchemaHealthy(client, { mode: 'auto' });
|
|
const snapshotCount = client.query('SELECT COUNT(*) AS count FROM `filing_taxonomy_snapshot`').get() as { count: number };
|
|
const bundleCount = client.query('SELECT COUNT(*) AS count FROM `company_financial_bundle`').get() as { count: number };
|
|
|
|
expect(result.ok).toBe(true);
|
|
expect(result.repair?.snapshotDuplicateRowsDeleted).toBe(1);
|
|
expect(result.repair?.bundleCacheCleared).toBe(true);
|
|
expect(snapshotCount.count).toBe(1);
|
|
expect(bundleCount.count).toBe(0);
|
|
} finally {
|
|
client.close();
|
|
}
|
|
});
|
|
|
|
it('retries once after repairing the schema for ON CONFLICT drift errors', async () => {
|
|
const client = new Database(':memory:');
|
|
try {
|
|
createBundleTable(client);
|
|
createSnapshotTable(client);
|
|
createMetricValidationTable(client);
|
|
createHealthyIndexes(client);
|
|
client.exec('DROP INDEX `company_financial_bundle_uidx`;');
|
|
client.exec('DROP INDEX `company_financial_bundle_ticker_idx`;');
|
|
|
|
let calls = 0;
|
|
const result = await withFinancialIngestionSchemaRetry({
|
|
client,
|
|
context: 'test-retry',
|
|
operation: async () => {
|
|
calls += 1;
|
|
if (calls === 1) {
|
|
throw new Error('ON CONFLICT clause does not match any PRIMARY KEY or UNIQUE constraint');
|
|
}
|
|
|
|
return 'ok';
|
|
}
|
|
});
|
|
|
|
expect(result).toBe('ok');
|
|
expect(calls).toBe(2);
|
|
const indexes = client.query('PRAGMA index_list(`company_financial_bundle`)').all() as Array<{ name: string }>;
|
|
expect(indexes.some((row) => row.name === 'company_financial_bundle_uidx')).toBe(true);
|
|
} finally {
|
|
client.close();
|
|
}
|
|
});
|
|
});
|