import { and, desc, eq, gte, lte } from 'drizzle-orm'; import { db } from '@/lib/server/db'; import { filingTaxonomySnapshot } from '@/lib/server/db/schema'; type ScriptOptions = { ticker: string | null; from: string | null; to: string | null; sampleLimit: number; }; type SnapshotRow = { filing_id: number; ticker: string; filing_date: string; filing_type: '10-K' | '10-Q'; parse_status: string; parse_error: string | null; parser_engine: string; parser_version: string; fiscal_pack: string | null; normalization_summary: { warnings?: string[]; } | null; updated_at: string; }; function parseOptions(argv: string[]): ScriptOptions { const options: ScriptOptions = { ticker: null, from: null, to: null, sampleLimit: 5 }; for (const arg of argv) { if (arg === '--help' || arg === '-h') { console.log('Report taxonomy snapshot health from the local database.'); console.log(''); console.log('Usage:'); console.log(' bun run scripts/report-taxonomy-health.ts [--ticker=SYMBOL] [--from=YYYY-MM-DD] [--to=YYYY-MM-DD] [--sample-limit=N]'); process.exit(0); } if (arg.startsWith('--ticker=')) { const value = arg.slice('--ticker='.length).trim().toUpperCase(); options.ticker = value.length > 0 ? value : null; continue; } if (arg.startsWith('--from=')) { const value = arg.slice('--from='.length).trim(); options.from = value.length > 0 ? value : null; continue; } if (arg.startsWith('--to=')) { const value = arg.slice('--to='.length).trim(); options.to = value.length > 0 ? value : null; continue; } if (arg.startsWith('--sample-limit=')) { const parsed = Number.parseInt(arg.slice('--sample-limit='.length), 10); if (Number.isFinite(parsed) && parsed > 0) { options.sampleLimit = parsed; } } } return options; } function incrementCount(map: Map, key: string) { map.set(key, (map.get(key) ?? 0) + 1); } function printCountMap(label: string, counts: Map) { console.log(`[report-taxonomy-health] ${label}`); if (counts.size === 0) { console.log(' (none)'); return; } for (const [key, count] of [...counts.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))) { console.log(` ${key}=${count}`); } } function printSamples(label: string, rows: SnapshotRow[]) { console.log(`[report-taxonomy-health] ${label}`); if (rows.length === 0) { console.log(' (none)'); return; } for (const row of rows) { const warnings = row.normalization_summary?.warnings ?? []; console.log( ` ${row.ticker} ${row.filing_type} ${row.filing_date} filing_id=${row.filing_id} status=${row.parse_status} parser=${row.parser_engine} pack=${row.fiscal_pack ?? 'null'} warnings=${warnings.join(',') || '-'} error=${row.parse_error ?? '-'}` ); } } async function loadRows(options: ScriptOptions): Promise { const conditions = []; if (options.ticker) { conditions.push(eq(filingTaxonomySnapshot.ticker, options.ticker)); } if (options.from) { conditions.push(gte(filingTaxonomySnapshot.filing_date, options.from)); } if (options.to) { conditions.push(lte(filingTaxonomySnapshot.filing_date, options.to)); } const whereClause = conditions.length > 0 ? and(...conditions) : undefined; const baseQuery = db.select({ filing_id: filingTaxonomySnapshot.filing_id, ticker: filingTaxonomySnapshot.ticker, filing_date: filingTaxonomySnapshot.filing_date, filing_type: filingTaxonomySnapshot.filing_type, parse_status: filingTaxonomySnapshot.parse_status, parse_error: filingTaxonomySnapshot.parse_error, parser_engine: filingTaxonomySnapshot.parser_engine, parser_version: filingTaxonomySnapshot.parser_version, fiscal_pack: filingTaxonomySnapshot.fiscal_pack, normalization_summary: filingTaxonomySnapshot.normalization_summary, updated_at: filingTaxonomySnapshot.updated_at }).from(filingTaxonomySnapshot).orderBy(desc(filingTaxonomySnapshot.updated_at)); if (whereClause) { return await baseQuery.where(whereClause); } return await baseQuery; } async function main() { const options = parseOptions(process.argv.slice(2)); const rows = await loadRows(options); const statusCounts = new Map(); const parserCounts = new Map(); const packCounts = new Map(); const warningCounts = new Map(); const parserVersionCounts = new Map(); for (const row of rows) { incrementCount(statusCounts, row.parse_status); incrementCount(parserCounts, row.parser_engine); incrementCount(parserVersionCounts, row.parser_version); incrementCount(packCounts, row.fiscal_pack ?? 'null'); for (const warning of row.normalization_summary?.warnings ?? []) { incrementCount(warningCounts, warning); } } const failedRows = rows .filter((row) => row.parse_status === 'failed') .slice(0, options.sampleLimit); const warningRows = rows .filter((row) => (row.normalization_summary?.warnings?.length ?? 0) > 0) .slice(0, options.sampleLimit); const legacyCount = rows.filter((row) => row.parser_engine === 'legacy-ts').length; const deferredCount = rows.filter((row) => (row.normalization_summary?.warnings ?? []).includes('surface_rows_deferred_to_typescript')).length; const fallbackCount = rows.filter((row) => (row.normalization_summary?.warnings ?? []).includes('ts_compact_surface_fallback_used')).length; console.log(`[report-taxonomy-health] snapshots=${rows.length}`); if (options.ticker) { console.log(`[report-taxonomy-health] ticker=${options.ticker}`); } if (options.from || options.to) { console.log(`[report-taxonomy-health] range=${options.from ?? 'min'}..${options.to ?? 'max'}`); } console.log(`[report-taxonomy-health] legacy_ts=${legacyCount}`); console.log(`[report-taxonomy-health] deferred_to_typescript=${deferredCount}`); console.log(`[report-taxonomy-health] ts_compact_surface_fallback=${fallbackCount}`); printCountMap('parse_status', statusCounts); printCountMap('parser_engine', parserCounts); printCountMap('parser_version', parserVersionCounts); printCountMap('fiscal_pack', packCounts); printCountMap('warnings', warningCounts); printSamples('failed_samples', failedRows); printSamples('warning_samples', warningRows); } void main().catch((error) => { const message = error instanceof Error ? error.message : String(error); console.error(`[report-taxonomy-health] fatal: ${message}`); process.exitCode = 1; });