import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine'; import { listFilingsRecords, updateFilingMetricsById } from '@/lib/server/repos/filings'; import { getFilingTaxonomySnapshotByFilingId, normalizeFilingTaxonomySnapshotPayload, upsertFilingTaxonomySnapshot } from '@/lib/server/repos/filing-taxonomy'; type ScriptOptions = { apply: boolean; ticker: string | null; window: '10y' | 'all'; limit: number | null; refresh: boolean; }; type ScriptSummary = { scanned: number; wouldWrite: number; written: number; skippedFresh: number; failed: number; }; type FilingRow = { id: number; ticker: string; cik: string; accessionNumber: string; filingDate: string; filingType: '10-K' | '10-Q'; filingUrl: string | null; primaryDocument: string | null; updatedAt: string; }; const REQUEST_DELAY_MS = 120; function parseOptions(argv: string[]): ScriptOptions { const options: ScriptOptions = { apply: false, ticker: null, window: '10y', limit: null, refresh: false }; for (const arg of argv) { if (arg === '--apply') { options.apply = true; continue; } if (arg === '--refresh') { options.refresh = true; continue; } if (arg.startsWith('--ticker=')) { const value = arg.slice('--ticker='.length).trim().toUpperCase(); options.ticker = value.length > 0 ? value : null; continue; } if (arg.startsWith('--window=')) { const value = arg.slice('--window='.length).trim().toLowerCase(); options.window = value === 'all' ? 'all' : '10y'; continue; } if (arg.startsWith('--limit=')) { const parsed = Number.parseInt(arg.slice('--limit='.length), 10); if (Number.isFinite(parsed) && parsed > 0) { options.limit = parsed; } continue; } if (arg === '--help' || arg === '-h') { console.log('Backfill filing taxonomy snapshots (Financial Statements V3).'); console.log(''); console.log('Usage:'); console.log(' bun run scripts/backfill-taxonomy-snapshots.ts [--apply] [--ticker=SYMBOL] [--window=10y|all] [--limit=N] [--refresh]'); process.exit(0); } } return options; } function tenYearsAgoIso() { const date = new Date(); date.setUTCFullYear(date.getUTCFullYear() - 10); return date.toISOString().slice(0, 10); } async function loadFilings(options: ScriptOptions): Promise { const rows: FilingRow[] = []; let cursor = 0; while (true) { const page = await listFilingsRecords({ ticker: options.ticker ?? undefined, limit: 250 }); const normalizedPage = page .filter((filing): filing is typeof filing & { filing_type: '10-K' | '10-Q' } => { return filing.filing_type === '10-K' || filing.filing_type === '10-Q'; }) .filter((filing) => { if (options.window === 'all') { return true; } return filing.filing_date >= tenYearsAgoIso(); }) .slice(cursor); if (normalizedPage.length === 0) { break; } for (const filing of normalizedPage) { rows.push({ id: filing.id, ticker: filing.ticker, cik: filing.cik, accessionNumber: filing.accession_number, filingDate: filing.filing_date, filingType: filing.filing_type, filingUrl: filing.filing_url, primaryDocument: filing.primary_document ?? null, updatedAt: filing.updated_at }); if (options.limit && rows.length >= options.limit) { return rows; } } cursor += normalizedPage.length; } return rows; } async function runBackfill(options: ScriptOptions): Promise { const rows = await loadFilings(options); const summary: ScriptSummary = { scanned: 0, wouldWrite: 0, written: 0, skippedFresh: 0, failed: 0 }; console.log(`[backfill-taxonomy-snapshots] mode=${options.apply ? 'apply' : 'dry-run'} window=${options.window} filings=${rows.length}`); if (options.ticker) { console.log(`[backfill-taxonomy-snapshots] ticker=${options.ticker}`); } for (const [index, row] of rows.entries()) { summary.scanned += 1; console.log(`[backfill-taxonomy-snapshots] [${index + 1}/${rows.length}] ${row.ticker} ${row.filingType} ${row.filingDate} ${row.accessionNumber}`); const existing = await getFilingTaxonomySnapshotByFilingId(row.id); const isFresh = existing && Date.parse(existing.updated_at) >= Date.parse(row.updatedAt); if (isFresh && !options.refresh) { summary.skippedFresh += 1; continue; } try { const snapshot = await hydrateFilingTaxonomySnapshot({ filingId: row.id, ticker: row.ticker, cik: row.cik, accessionNumber: row.accessionNumber, filingDate: row.filingDate, filingType: row.filingType, filingUrl: row.filingUrl, primaryDocument: row.primaryDocument }); summary.wouldWrite += 1; if (options.apply) { const normalizedSnapshot = { ...snapshot, ...normalizeFilingTaxonomySnapshotPayload(snapshot) }; await upsertFilingTaxonomySnapshot(normalizedSnapshot); await updateFilingMetricsById(row.id, normalizedSnapshot.derived_metrics); summary.written += 1; } } catch (error) { summary.failed += 1; const reason = error instanceof Error ? error.message : 'unknown error'; console.error(`[backfill-taxonomy-snapshots] failed for ${row.accessionNumber}: ${reason}`); } await Bun.sleep(REQUEST_DELAY_MS); } return summary; } async function main() { const options = parseOptions(process.argv.slice(2)); const startedAt = Date.now(); try { const summary = await runBackfill(options); const durationSec = ((Date.now() - startedAt) / 1000).toFixed(1); console.log('[backfill-taxonomy-snapshots] summary'); console.log(` scanned=${summary.scanned}`); console.log(` wouldWrite=${summary.wouldWrite}`); console.log(` written=${summary.written}`); console.log(` skippedFresh=${summary.skippedFresh}`); console.log(` failed=${summary.failed}`); console.log(` durationSec=${durationSec}`); } catch (error) { const message = error instanceof Error ? error.message : String(error); console.error(`[backfill-taxonomy-snapshots] fatal: ${message}`); process.exitCode = 1; } } void main();