import { asc, and, eq, gt, inArray } from 'drizzle-orm'; import { db } from '@/lib/server/db'; import { filing } from '@/lib/server/db/schema'; import { fetchFilingMetricsForFilings } from '@/lib/server/sec'; const FINANCIAL_FORMS = ['10-K', '10-Q'] as const; const PAGE_SIZE = 500; const REQUEST_DELAY_MS = 220; type FinancialForm = (typeof FINANCIAL_FORMS)[number]; type FilingMetrics = { revenue: number | null; netIncome: number | null; totalAssets: number | null; cash: number | null; debt: number | null; }; type FilingRow = { id: number; ticker: string; cik: string; accessionNumber: string; filingDate: string; filingType: FinancialForm; metrics: FilingMetrics | null; }; type ScriptOptions = { apply: boolean; ticker: string | null; limitCiks: number | null; allowNullOverwrite: boolean; }; type ScriptSummary = { scannedRows: number; scannedCiks: number; changedRows: number; unchangedRows: number; skippedRows: number; skippedCiks: number; }; function parseOptions(argv: string[]): ScriptOptions { const options: ScriptOptions = { apply: false, ticker: null, limitCiks: null, allowNullOverwrite: false }; for (const arg of argv) { if (arg === '--apply') { options.apply = true; continue; } if (arg === '--allow-null-overwrite') { options.allowNullOverwrite = true; continue; } if (arg.startsWith('--ticker=')) { const raw = arg.slice('--ticker='.length).trim(); options.ticker = raw ? raw.toUpperCase() : null; continue; } if (arg.startsWith('--limit-ciks=')) { const raw = Number(arg.slice('--limit-ciks='.length)); if (Number.isFinite(raw) && raw > 0) { options.limitCiks = Math.trunc(raw); } continue; } if (arg === '--help' || arg === '-h') { printUsage(); process.exit(0); } } return options; } function printUsage() { console.log( [ 'One-time backfill of filing metrics using per-filing SEC mapping.', '', 'Usage:', ' bun run scripts/backfill-filing-metrics.ts [--apply] [--ticker=SYMBOL] [--limit-ciks=N] [--allow-null-overwrite]', '', 'Flags:', ' --apply Persist updates. Without this flag, script runs as a dry run.', ' --ticker=SYMBOL Restrict processing to one ticker (e.g. AAPL).', ' --limit-ciks=N Process only first N CIK groups (useful for staged rollouts).', ' --allow-null-overwrite Allow replacing non-null metrics with all-null API results.', ' --help, -h Show help.' ].join('\n') ); } function toMetrics(value: FilingMetrics | null | undefined): FilingMetrics | null { if (!value) { return null; } return { revenue: value.revenue ?? null, netIncome: value.netIncome ?? null, totalAssets: value.totalAssets ?? null, cash: value.cash ?? null, debt: value.debt ?? null }; } function hasAnyMetricValue(metrics: FilingMetrics | null | undefined) { const normalized = toMetrics(metrics); if (!normalized) { return false; } return Object.values(normalized).some((value) => value !== null && Number.isFinite(value)); } function isAllNull(metrics: FilingMetrics | null | undefined) { const normalized = toMetrics(metrics); if (!normalized) { return true; } return Object.values(normalized).every((value) => value === null); } function metricsEqual(left: FilingMetrics | null | undefined, right: FilingMetrics | null | undefined) { const a = toMetrics(left); const b = toMetrics(right); if (!a && !b) { return true; } if (!a || !b) { return false; } return a.revenue === b.revenue && a.netIncome === b.netIncome && a.totalAssets === b.totalAssets && a.cash === b.cash && a.debt === b.debt; } async function loadFinancialRows(tickerFilter: string | null): Promise { const rows: FilingRow[] = []; let cursor = 0; for (;;) { const conditions = [ inArray(filing.filing_type, [...FINANCIAL_FORMS]), gt(filing.id, cursor) ]; if (tickerFilter) { conditions.push(eq(filing.ticker, tickerFilter)); } const page = await db .select({ id: filing.id, ticker: filing.ticker, cik: filing.cik, accessionNumber: filing.accession_number, filingDate: filing.filing_date, filingType: filing.filing_type, metrics: filing.metrics }) .from(filing) .where(and(...conditions)) .orderBy(asc(filing.id)) .limit(PAGE_SIZE); if (page.length === 0) { break; } for (const row of page) { if (row.filingType !== '10-K' && row.filingType !== '10-Q') { continue; } rows.push({ ...row, filingType: row.filingType, metrics: toMetrics(row.metrics) }); } cursor = page[page.length - 1]?.id ?? cursor; } return rows; } function groupByCik(rows: FilingRow[]) { const grouped = new Map(); for (const row of rows) { const group = grouped.get(row.cik); if (group) { group.push(row); continue; } grouped.set(row.cik, [row]); } return grouped; } async function runBackfill(options: ScriptOptions): Promise { const startedAt = new Date(); const rows = await loadFinancialRows(options.ticker); const grouped = groupByCik(rows); const entries = [...grouped.entries()]; const limitedEntries = options.limitCiks ? entries.slice(0, options.limitCiks) : entries; const summary: ScriptSummary = { scannedRows: 0, scannedCiks: 0, changedRows: 0, unchangedRows: 0, skippedRows: 0, skippedCiks: 0 }; console.log(`[backfill-filing-metrics] mode=${options.apply ? 'apply' : 'dry-run'} rows=${rows.length} ciks=${limitedEntries.length}`); if (options.ticker) { console.log(`[backfill-filing-metrics] ticker filter=${options.ticker}`); } for (const [index, [cik, filingsForCik]] of limitedEntries.entries()) { summary.scannedCiks += 1; summary.scannedRows += filingsForCik.length; const ticker = filingsForCik[0]?.ticker ?? 'UNKNOWN'; console.log(`[backfill-filing-metrics] [${index + 1}/${limitedEntries.length}] CIK=${cik} ticker=${ticker} filings=${filingsForCik.length}`); const metricsMap = await fetchFilingMetricsForFilings( cik, ticker, filingsForCik.map((entry) => ({ accessionNumber: entry.accessionNumber, filingDate: entry.filingDate, filingType: entry.filingType })) ); const fetchedMetrics = filingsForCik.map((entry) => metricsMap.get(entry.accessionNumber) ?? null); const apiReturnedAllNull = fetchedMetrics.every((entry) => isAllNull(entry)); const cikHasExistingValues = filingsForCik.some((entry) => hasAnyMetricValue(entry.metrics)); if (apiReturnedAllNull && cikHasExistingValues && !options.allowNullOverwrite) { summary.skippedCiks += 1; summary.skippedRows += filingsForCik.length; console.warn(`[backfill-filing-metrics] skipped CIK=${cik}: API returned all-null metrics; pass --allow-null-overwrite to force replacement`); await Bun.sleep(REQUEST_DELAY_MS); continue; } const pendingUpdates: Array<{ id: number; metrics: FilingMetrics | null }> = []; for (const entry of filingsForCik) { const nextMetrics = toMetrics(metricsMap.get(entry.accessionNumber) ?? null); if (metricsEqual(entry.metrics, nextMetrics)) { summary.unchangedRows += 1; continue; } summary.changedRows += 1; pendingUpdates.push({ id: entry.id, metrics: nextMetrics }); } if (!options.apply || pendingUpdates.length === 0) { console.log(`[backfill-filing-metrics] changes=${pendingUpdates.length} (not persisted)`); await Bun.sleep(REQUEST_DELAY_MS); continue; } for (const update of pendingUpdates) { await db .update(filing) .set({ metrics: update.metrics, updated_at: new Date().toISOString() }) .where(eq(filing.id, update.id)); } console.log(`[backfill-filing-metrics] updated rows=${pendingUpdates.length}`); await Bun.sleep(REQUEST_DELAY_MS); } const elapsedMs = Date.now() - startedAt.getTime(); console.log( [ '[backfill-filing-metrics] complete', ` mode=${options.apply ? 'apply' : 'dry-run'}`, ` scanned_ciks=${summary.scannedCiks}`, ` scanned_rows=${summary.scannedRows}`, ` changed_rows=${summary.changedRows}`, ` unchanged_rows=${summary.unchangedRows}`, ` skipped_ciks=${summary.skippedCiks}`, ` skipped_rows=${summary.skippedRows}`, ` elapsed_ms=${elapsedMs}` ].join('\n') ); return summary; } async function main() { const options = parseOptions(Bun.argv.slice(2)); await runBackfill(options); } main().catch((error) => { const message = error instanceof Error ? error.stack ?? error.message : String(error); console.error('[backfill-filing-metrics] failed\n' + message); process.exitCode = 1; });