From 36c4ed2ee24ea3dcd8193c64c73d164c59f338ee Mon Sep 17 00:00:00 2001 From: francy51 Date: Mon, 2 Mar 2026 09:34:55 -0500 Subject: [PATCH] chore(financials-v2): add statement backfill script and docs --- README.md | 1 + package.json | 1 + scripts/backfill-filing-statements.ts | 272 ++++++++++++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 scripts/backfill-filing-statements.ts diff --git a/README.md b/README.md index c8d22d5..b2a4bc3 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,7 @@ All endpoints below are defined in Elysia at `lib/server/api/app.ts` and exposed - `POST /api/portfolio/refresh-prices` - `POST /api/portfolio/insights/generate` - `GET /api/portfolio/insights/latest` +- `GET /api/financials/company` - `GET /api/filings` - `POST /api/filings/sync` - `POST /api/filings/:accessionNumber/analyze` diff --git a/package.json b/package.json index f6a5eef..8a3fe6b 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "start": "bun --bun next start", "lint": "bun --bun tsc --noEmit", "backfill:filing-metrics": "bun run scripts/backfill-filing-metrics.ts", + "backfill:filing-statements": "bun run scripts/backfill-filing-statements.ts", "db:generate": "bun x drizzle-kit generate", "db:migrate": "bun x drizzle-kit migrate" }, diff --git a/scripts/backfill-filing-statements.ts b/scripts/backfill-filing-statements.ts new file mode 100644 index 0000000..d182dce --- /dev/null +++ b/scripts/backfill-filing-statements.ts @@ -0,0 +1,272 @@ +import { and, asc, eq, gt, gte, inArray } from 'drizzle-orm'; +import type { Filing } from '@/lib/types'; +import { db } from '@/lib/server/db'; +import { filing } from '@/lib/server/db/schema'; +import { + getFilingStatementSnapshotByFilingId, + upsertFilingStatementSnapshot +} from '@/lib/server/repos/filing-statements'; +import { hydrateFilingStatementSnapshot } from '@/lib/server/sec'; + +const FINANCIAL_FORMS = ['10-K', '10-Q'] as const; +const PAGE_SIZE = 200; +const REQUEST_DELAY_MS = 120; + +type FinancialForm = (typeof FINANCIAL_FORMS)[number]; + +type FilingRow = { + id: number; + ticker: string; + cik: string; + accessionNumber: string; + filingDate: string; + filingType: FinancialForm; + filingUrl: string | null; + primaryDocument: string | null; + metrics: Filing['metrics']; + updatedAt: string; +}; + +type ScriptOptions = { + apply: boolean; + ticker: string | null; + window: '10y' | 'all'; + limit: number | null; + refresh: boolean; +}; + +type ScriptSummary = { + scanned: number; + wouldWrite: number; + written: number; + skippedFresh: number; + failed: number; +}; + +function tenYearsAgoIso() { + const date = new Date(); + date.setUTCFullYear(date.getUTCFullYear() - 10); + return date.toISOString().slice(0, 10); +} + +function parseOptions(argv: string[]): ScriptOptions { + const options: ScriptOptions = { + apply: false, + ticker: null, + window: '10y', + limit: null, + refresh: false + }; + + for (const arg of argv) { + if (arg === '--apply') { + options.apply = true; + continue; + } + + if (arg === '--refresh') { + options.refresh = true; + continue; + } + + if (arg.startsWith('--ticker=')) { + const raw = arg.slice('--ticker='.length).trim(); + options.ticker = raw ? raw.toUpperCase() : null; + continue; + } + + if (arg.startsWith('--window=')) { + const raw = arg.slice('--window='.length).trim(); + if (raw === '10y' || raw === 'all') { + options.window = raw; + } + continue; + } + + if (arg.startsWith('--limit=')) { + const parsed = Number(arg.slice('--limit='.length)); + if (Number.isFinite(parsed) && parsed > 0) { + options.limit = Math.trunc(parsed); + } + continue; + } + + if (arg === '--help' || arg === '-h') { + printUsage(); + process.exit(0); + } + } + + return options; +} + +function printUsage() { + console.log( + [ + 'Backfill filing statement snapshots (Financial Statements V2).', + '', + 'Usage:', + ' bun run scripts/backfill-filing-statements.ts [--apply] [--ticker=SYMBOL] [--window=10y|all] [--limit=N] [--refresh]', + '', + 'Flags:', + ' --apply Persist snapshots. Without this flag, script runs in dry-run mode.', + ' --ticker=SYMBOL Restrict to a single ticker (e.g. AAPL).', + ' --window=... History window. Defaults to 10y.', + ' --limit=N Stop after N filings are scanned.', + ' --refresh Rehydrate even when snapshot appears fresh.', + ' --help, -h Show help.' + ].join('\n') + ); +} + +async function loadFilings(options: ScriptOptions): Promise { + const rows: FilingRow[] = []; + const threshold = tenYearsAgoIso(); + let cursor = 0; + + for (;;) { + const conditions = [ + inArray(filing.filing_type, [...FINANCIAL_FORMS]), + gt(filing.id, cursor) + ]; + + if (options.ticker) { + conditions.push(eq(filing.ticker, options.ticker)); + } + + if (options.window === '10y') { + conditions.push(gte(filing.filing_date, threshold)); + } + + const page = await db + .select({ + id: filing.id, + ticker: filing.ticker, + cik: filing.cik, + accessionNumber: filing.accession_number, + filingDate: filing.filing_date, + filingType: filing.filing_type, + filingUrl: filing.filing_url, + primaryDocument: filing.primary_document, + metrics: filing.metrics, + updatedAt: filing.updated_at + }) + .from(filing) + .where(and(...conditions)) + .orderBy(asc(filing.id)) + .limit(PAGE_SIZE); + + if (page.length === 0) { + break; + } + + for (const row of page) { + if (row.filingType !== '10-K' && row.filingType !== '10-Q') { + continue; + } + + rows.push({ + id: row.id, + ticker: row.ticker, + cik: row.cik, + accessionNumber: row.accessionNumber, + filingDate: row.filingDate, + filingType: row.filingType, + filingUrl: row.filingUrl, + primaryDocument: row.primaryDocument ?? null, + metrics: row.metrics, + updatedAt: row.updatedAt + }); + + if (options.limit && rows.length >= options.limit) { + return rows; + } + } + + cursor = page[page.length - 1]?.id ?? cursor; + } + + return rows; +} + +async function runBackfill(options: ScriptOptions): Promise { + const rows = await loadFilings(options); + const summary: ScriptSummary = { + scanned: 0, + wouldWrite: 0, + written: 0, + skippedFresh: 0, + failed: 0 + }; + + console.log(`[backfill-filing-statements] mode=${options.apply ? 'apply' : 'dry-run'} window=${options.window} filings=${rows.length}`); + if (options.ticker) { + console.log(`[backfill-filing-statements] ticker=${options.ticker}`); + } + + for (const [index, row] of rows.entries()) { + summary.scanned += 1; + console.log(`[backfill-filing-statements] [${index + 1}/${rows.length}] ${row.ticker} ${row.filingType} ${row.filingDate} ${row.accessionNumber}`); + + const existing = await getFilingStatementSnapshotByFilingId(row.id); + const isFresh = existing && Date.parse(existing.updated_at) >= Date.parse(row.updatedAt); + + if (isFresh && !options.refresh) { + summary.skippedFresh += 1; + continue; + } + + try { + const snapshot = await hydrateFilingStatementSnapshot({ + filingId: row.id, + ticker: row.ticker, + cik: row.cik, + accessionNumber: row.accessionNumber, + filingDate: row.filingDate, + filingType: row.filingType, + filingUrl: row.filingUrl, + primaryDocument: row.primaryDocument, + metrics: row.metrics + }); + + summary.wouldWrite += 1; + + if (options.apply) { + await upsertFilingStatementSnapshot(snapshot); + summary.written += 1; + } + } catch (error) { + summary.failed += 1; + const reason = error instanceof Error ? error.message : 'unknown error'; + console.error(`[backfill-filing-statements] failed for ${row.accessionNumber}: ${reason}`); + } + + await Bun.sleep(REQUEST_DELAY_MS); + } + + return summary; +} + +async function main() { + const options = parseOptions(process.argv.slice(2)); + const startedAt = Date.now(); + + try { + const summary = await runBackfill(options); + const durationSec = ((Date.now() - startedAt) / 1000).toFixed(1); + + console.log('[backfill-filing-statements] summary'); + console.log(` scanned=${summary.scanned}`); + console.log(` wouldWrite=${summary.wouldWrite}`); + console.log(` written=${summary.written}`); + console.log(` skippedFresh=${summary.skippedFresh}`); + console.log(` failed=${summary.failed}`); + console.log(` durationSeconds=${durationSec}`); + } catch (error) { + const message = error instanceof Error ? error.message : 'Unknown backfill failure'; + console.error(`[backfill-filing-statements] fatal: ${message}`); + process.exitCode = 1; + } +} + +void main();