refactor(taxonomy): remove legacy parser and add rollout checks
This commit is contained in:
200
scripts/report-taxonomy-health.ts
Normal file
200
scripts/report-taxonomy-health.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
import { and, desc, eq, gte, lte } from 'drizzle-orm';
|
||||
|
||||
import { db } from '@/lib/server/db';
|
||||
import { filingTaxonomySnapshot } from '@/lib/server/db/schema';
|
||||
|
||||
type ScriptOptions = {
|
||||
ticker: string | null;
|
||||
from: string | null;
|
||||
to: string | null;
|
||||
sampleLimit: number;
|
||||
};
|
||||
|
||||
type SnapshotRow = {
|
||||
filing_id: number;
|
||||
ticker: string;
|
||||
filing_date: string;
|
||||
filing_type: '10-K' | '10-Q';
|
||||
parse_status: string;
|
||||
parse_error: string | null;
|
||||
parser_engine: string;
|
||||
parser_version: string;
|
||||
fiscal_pack: string | null;
|
||||
normalization_summary: {
|
||||
warnings?: string[];
|
||||
} | null;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
function parseOptions(argv: string[]): ScriptOptions {
|
||||
const options: ScriptOptions = {
|
||||
ticker: null,
|
||||
from: null,
|
||||
to: null,
|
||||
sampleLimit: 5
|
||||
};
|
||||
|
||||
for (const arg of argv) {
|
||||
if (arg === '--help' || arg === '-h') {
|
||||
console.log('Report taxonomy snapshot health from the local database.');
|
||||
console.log('');
|
||||
console.log('Usage:');
|
||||
console.log(' bun run scripts/report-taxonomy-health.ts [--ticker=SYMBOL] [--from=YYYY-MM-DD] [--to=YYYY-MM-DD] [--sample-limit=N]');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (arg.startsWith('--ticker=')) {
|
||||
const value = arg.slice('--ticker='.length).trim().toUpperCase();
|
||||
options.ticker = value.length > 0 ? value : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.startsWith('--from=')) {
|
||||
const value = arg.slice('--from='.length).trim();
|
||||
options.from = value.length > 0 ? value : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.startsWith('--to=')) {
|
||||
const value = arg.slice('--to='.length).trim();
|
||||
options.to = value.length > 0 ? value : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.startsWith('--sample-limit=')) {
|
||||
const parsed = Number.parseInt(arg.slice('--sample-limit='.length), 10);
|
||||
if (Number.isFinite(parsed) && parsed > 0) {
|
||||
options.sampleLimit = parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
function incrementCount(map: Map<string, number>, key: string) {
|
||||
map.set(key, (map.get(key) ?? 0) + 1);
|
||||
}
|
||||
|
||||
function printCountMap(label: string, counts: Map<string, number>) {
|
||||
console.log(`[report-taxonomy-health] ${label}`);
|
||||
if (counts.size === 0) {
|
||||
console.log(' (none)');
|
||||
return;
|
||||
}
|
||||
|
||||
for (const [key, count] of [...counts.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))) {
|
||||
console.log(` ${key}=${count}`);
|
||||
}
|
||||
}
|
||||
|
||||
function printSamples(label: string, rows: SnapshotRow[]) {
|
||||
console.log(`[report-taxonomy-health] ${label}`);
|
||||
if (rows.length === 0) {
|
||||
console.log(' (none)');
|
||||
return;
|
||||
}
|
||||
|
||||
for (const row of rows) {
|
||||
const warnings = row.normalization_summary?.warnings ?? [];
|
||||
console.log(
|
||||
` ${row.ticker} ${row.filing_type} ${row.filing_date} filing_id=${row.filing_id} status=${row.parse_status} parser=${row.parser_engine} pack=${row.fiscal_pack ?? 'null'} warnings=${warnings.join(',') || '-'} error=${row.parse_error ?? '-'}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function loadRows(options: ScriptOptions): Promise<SnapshotRow[]> {
|
||||
const conditions = [];
|
||||
|
||||
if (options.ticker) {
|
||||
conditions.push(eq(filingTaxonomySnapshot.ticker, options.ticker));
|
||||
}
|
||||
|
||||
if (options.from) {
|
||||
conditions.push(gte(filingTaxonomySnapshot.filing_date, options.from));
|
||||
}
|
||||
|
||||
if (options.to) {
|
||||
conditions.push(lte(filingTaxonomySnapshot.filing_date, options.to));
|
||||
}
|
||||
|
||||
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
||||
|
||||
const baseQuery = db.select({
|
||||
filing_id: filingTaxonomySnapshot.filing_id,
|
||||
ticker: filingTaxonomySnapshot.ticker,
|
||||
filing_date: filingTaxonomySnapshot.filing_date,
|
||||
filing_type: filingTaxonomySnapshot.filing_type,
|
||||
parse_status: filingTaxonomySnapshot.parse_status,
|
||||
parse_error: filingTaxonomySnapshot.parse_error,
|
||||
parser_engine: filingTaxonomySnapshot.parser_engine,
|
||||
parser_version: filingTaxonomySnapshot.parser_version,
|
||||
fiscal_pack: filingTaxonomySnapshot.fiscal_pack,
|
||||
normalization_summary: filingTaxonomySnapshot.normalization_summary,
|
||||
updated_at: filingTaxonomySnapshot.updated_at
|
||||
}).from(filingTaxonomySnapshot).orderBy(desc(filingTaxonomySnapshot.updated_at));
|
||||
|
||||
if (whereClause) {
|
||||
return await baseQuery.where(whereClause);
|
||||
}
|
||||
|
||||
return await baseQuery;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const options = parseOptions(process.argv.slice(2));
|
||||
const rows = await loadRows(options);
|
||||
|
||||
const statusCounts = new Map<string, number>();
|
||||
const parserCounts = new Map<string, number>();
|
||||
const packCounts = new Map<string, number>();
|
||||
const warningCounts = new Map<string, number>();
|
||||
const parserVersionCounts = new Map<string, number>();
|
||||
|
||||
for (const row of rows) {
|
||||
incrementCount(statusCounts, row.parse_status);
|
||||
incrementCount(parserCounts, row.parser_engine);
|
||||
incrementCount(parserVersionCounts, row.parser_version);
|
||||
incrementCount(packCounts, row.fiscal_pack ?? 'null');
|
||||
|
||||
for (const warning of row.normalization_summary?.warnings ?? []) {
|
||||
incrementCount(warningCounts, warning);
|
||||
}
|
||||
}
|
||||
|
||||
const failedRows = rows
|
||||
.filter((row) => row.parse_status === 'failed')
|
||||
.slice(0, options.sampleLimit);
|
||||
const warningRows = rows
|
||||
.filter((row) => (row.normalization_summary?.warnings?.length ?? 0) > 0)
|
||||
.slice(0, options.sampleLimit);
|
||||
|
||||
const legacyCount = rows.filter((row) => row.parser_engine === 'legacy-ts').length;
|
||||
const deferredCount = rows.filter((row) => (row.normalization_summary?.warnings ?? []).includes('surface_rows_deferred_to_typescript')).length;
|
||||
const fallbackCount = rows.filter((row) => (row.normalization_summary?.warnings ?? []).includes('ts_compact_surface_fallback_used')).length;
|
||||
|
||||
console.log(`[report-taxonomy-health] snapshots=${rows.length}`);
|
||||
if (options.ticker) {
|
||||
console.log(`[report-taxonomy-health] ticker=${options.ticker}`);
|
||||
}
|
||||
if (options.from || options.to) {
|
||||
console.log(`[report-taxonomy-health] range=${options.from ?? 'min'}..${options.to ?? 'max'}`);
|
||||
}
|
||||
console.log(`[report-taxonomy-health] legacy_ts=${legacyCount}`);
|
||||
console.log(`[report-taxonomy-health] deferred_to_typescript=${deferredCount}`);
|
||||
console.log(`[report-taxonomy-health] ts_compact_surface_fallback=${fallbackCount}`);
|
||||
|
||||
printCountMap('parse_status', statusCounts);
|
||||
printCountMap('parser_engine', parserCounts);
|
||||
printCountMap('parser_version', parserVersionCounts);
|
||||
printCountMap('fiscal_pack', packCounts);
|
||||
printCountMap('warnings', warningCounts);
|
||||
printSamples('failed_samples', failedRows);
|
||||
printSamples('warning_samples', warningRows);
|
||||
}
|
||||
|
||||
void main().catch((error) => {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
console.error(`[report-taxonomy-health] fatal: ${message}`);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
Reference in New Issue
Block a user