import { and, asc, eq, gt, gte, inArray } from 'drizzle-orm'; import type { Filing } from '@/lib/types'; import { db } from '@/lib/server/db'; import { filing } from '@/lib/server/db/schema'; import { getFilingStatementSnapshotByFilingId, upsertFilingStatementSnapshot } from '@/lib/server/repos/filing-statements'; import { hydrateFilingStatementSnapshot } from '@/lib/server/sec'; const FINANCIAL_FORMS = ['10-K', '10-Q'] as const; const PAGE_SIZE = 200; const REQUEST_DELAY_MS = 120; type FinancialForm = (typeof FINANCIAL_FORMS)[number]; type FilingRow = { id: number; ticker: string; cik: string; accessionNumber: string; filingDate: string; filingType: FinancialForm; filingUrl: string | null; primaryDocument: string | null; metrics: Filing['metrics']; updatedAt: string; }; type ScriptOptions = { apply: boolean; ticker: string | null; window: '10y' | 'all'; limit: number | null; refresh: boolean; }; type ScriptSummary = { scanned: number; wouldWrite: number; written: number; skippedFresh: number; failed: number; }; function tenYearsAgoIso() { const date = new Date(); date.setUTCFullYear(date.getUTCFullYear() - 10); return date.toISOString().slice(0, 10); } function parseOptions(argv: string[]): ScriptOptions { const options: ScriptOptions = { apply: false, ticker: null, window: '10y', limit: null, refresh: false }; for (const arg of argv) { if (arg === '--apply') { options.apply = true; continue; } if (arg === '--refresh') { options.refresh = true; continue; } if (arg.startsWith('--ticker=')) { const raw = arg.slice('--ticker='.length).trim(); options.ticker = raw ? raw.toUpperCase() : null; continue; } if (arg.startsWith('--window=')) { const raw = arg.slice('--window='.length).trim(); if (raw === '10y' || raw === 'all') { options.window = raw; } continue; } if (arg.startsWith('--limit=')) { const parsed = Number(arg.slice('--limit='.length)); if (Number.isFinite(parsed) && parsed > 0) { options.limit = Math.trunc(parsed); } continue; } if (arg === '--help' || arg === '-h') { printUsage(); process.exit(0); } } return options; } function printUsage() { console.log( [ 'Backfill filing statement snapshots (Financial Statements V2).', '', 'Usage:', ' bun run scripts/backfill-filing-statements.ts [--apply] [--ticker=SYMBOL] [--window=10y|all] [--limit=N] [--refresh]', '', 'Flags:', ' --apply Persist snapshots. Without this flag, script runs in dry-run mode.', ' --ticker=SYMBOL Restrict to a single ticker (e.g. AAPL).', ' --window=... History window. Defaults to 10y.', ' --limit=N Stop after N filings are scanned.', ' --refresh Rehydrate even when snapshot appears fresh.', ' --help, -h Show help.' ].join('\n') ); } async function loadFilings(options: ScriptOptions): Promise { const rows: FilingRow[] = []; const threshold = tenYearsAgoIso(); let cursor = 0; for (;;) { const conditions = [ inArray(filing.filing_type, [...FINANCIAL_FORMS]), gt(filing.id, cursor) ]; if (options.ticker) { conditions.push(eq(filing.ticker, options.ticker)); } if (options.window === '10y') { conditions.push(gte(filing.filing_date, threshold)); } const page = await db .select({ id: filing.id, ticker: filing.ticker, cik: filing.cik, accessionNumber: filing.accession_number, filingDate: filing.filing_date, filingType: filing.filing_type, filingUrl: filing.filing_url, primaryDocument: filing.primary_document, metrics: filing.metrics, updatedAt: filing.updated_at }) .from(filing) .where(and(...conditions)) .orderBy(asc(filing.id)) .limit(PAGE_SIZE); if (page.length === 0) { break; } for (const row of page) { if (row.filingType !== '10-K' && row.filingType !== '10-Q') { continue; } rows.push({ id: row.id, ticker: row.ticker, cik: row.cik, accessionNumber: row.accessionNumber, filingDate: row.filingDate, filingType: row.filingType, filingUrl: row.filingUrl, primaryDocument: row.primaryDocument ?? null, metrics: row.metrics, updatedAt: row.updatedAt }); if (options.limit && rows.length >= options.limit) { return rows; } } cursor = page[page.length - 1]?.id ?? cursor; } return rows; } async function runBackfill(options: ScriptOptions): Promise { const rows = await loadFilings(options); const summary: ScriptSummary = { scanned: 0, wouldWrite: 0, written: 0, skippedFresh: 0, failed: 0 }; console.log(`[backfill-filing-statements] mode=${options.apply ? 'apply' : 'dry-run'} window=${options.window} filings=${rows.length}`); if (options.ticker) { console.log(`[backfill-filing-statements] ticker=${options.ticker}`); } for (const [index, row] of rows.entries()) { summary.scanned += 1; console.log(`[backfill-filing-statements] [${index + 1}/${rows.length}] ${row.ticker} ${row.filingType} ${row.filingDate} ${row.accessionNumber}`); const existing = await getFilingStatementSnapshotByFilingId(row.id); const isFresh = existing && Date.parse(existing.updated_at) >= Date.parse(row.updatedAt); if (isFresh && !options.refresh) { summary.skippedFresh += 1; continue; } try { const snapshot = await hydrateFilingStatementSnapshot({ filingId: row.id, ticker: row.ticker, cik: row.cik, accessionNumber: row.accessionNumber, filingDate: row.filingDate, filingType: row.filingType, filingUrl: row.filingUrl, primaryDocument: row.primaryDocument, metrics: row.metrics }); summary.wouldWrite += 1; if (options.apply) { await upsertFilingStatementSnapshot(snapshot); summary.written += 1; } } catch (error) { summary.failed += 1; const reason = error instanceof Error ? error.message : 'unknown error'; console.error(`[backfill-filing-statements] failed for ${row.accessionNumber}: ${reason}`); } await Bun.sleep(REQUEST_DELAY_MS); } return summary; } async function main() { const options = parseOptions(process.argv.slice(2)); const startedAt = Date.now(); try { const summary = await runBackfill(options); const durationSec = ((Date.now() - startedAt) / 1000).toFixed(1); console.log('[backfill-filing-statements] summary'); console.log(` scanned=${summary.scanned}`); console.log(` wouldWrite=${summary.wouldWrite}`); console.log(` written=${summary.written}`); console.log(` skippedFresh=${summary.skippedFresh}`); console.log(` failed=${summary.failed}`); console.log(` durationSeconds=${durationSec}`); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown backfill failure'; console.error(`[backfill-filing-statements] fatal: ${message}`); process.exitCode = 1; } } void main();