273 lines
7.2 KiB
TypeScript
273 lines
7.2 KiB
TypeScript
import { and, asc, eq, gt, gte, inArray } from 'drizzle-orm';
|
|
import type { Filing } from '@/lib/types';
|
|
import { db } from '@/lib/server/db';
|
|
import { filing } from '@/lib/server/db/schema';
|
|
import {
|
|
getFilingStatementSnapshotByFilingId,
|
|
upsertFilingStatementSnapshot
|
|
} from '@/lib/server/repos/filing-statements';
|
|
import { hydrateFilingStatementSnapshot } from '@/lib/server/sec';
|
|
|
|
const FINANCIAL_FORMS = ['10-K', '10-Q'] as const;
|
|
const PAGE_SIZE = 200;
|
|
const REQUEST_DELAY_MS = 120;
|
|
|
|
type FinancialForm = (typeof FINANCIAL_FORMS)[number];
|
|
|
|
type FilingRow = {
|
|
id: number;
|
|
ticker: string;
|
|
cik: string;
|
|
accessionNumber: string;
|
|
filingDate: string;
|
|
filingType: FinancialForm;
|
|
filingUrl: string | null;
|
|
primaryDocument: string | null;
|
|
metrics: Filing['metrics'];
|
|
updatedAt: string;
|
|
};
|
|
|
|
type ScriptOptions = {
|
|
apply: boolean;
|
|
ticker: string | null;
|
|
window: '10y' | 'all';
|
|
limit: number | null;
|
|
refresh: boolean;
|
|
};
|
|
|
|
type ScriptSummary = {
|
|
scanned: number;
|
|
wouldWrite: number;
|
|
written: number;
|
|
skippedFresh: number;
|
|
failed: number;
|
|
};
|
|
|
|
function tenYearsAgoIso() {
|
|
const date = new Date();
|
|
date.setUTCFullYear(date.getUTCFullYear() - 10);
|
|
return date.toISOString().slice(0, 10);
|
|
}
|
|
|
|
function parseOptions(argv: string[]): ScriptOptions {
|
|
const options: ScriptOptions = {
|
|
apply: false,
|
|
ticker: null,
|
|
window: '10y',
|
|
limit: null,
|
|
refresh: false
|
|
};
|
|
|
|
for (const arg of argv) {
|
|
if (arg === '--apply') {
|
|
options.apply = true;
|
|
continue;
|
|
}
|
|
|
|
if (arg === '--refresh') {
|
|
options.refresh = true;
|
|
continue;
|
|
}
|
|
|
|
if (arg.startsWith('--ticker=')) {
|
|
const raw = arg.slice('--ticker='.length).trim();
|
|
options.ticker = raw ? raw.toUpperCase() : null;
|
|
continue;
|
|
}
|
|
|
|
if (arg.startsWith('--window=')) {
|
|
const raw = arg.slice('--window='.length).trim();
|
|
if (raw === '10y' || raw === 'all') {
|
|
options.window = raw;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (arg.startsWith('--limit=')) {
|
|
const parsed = Number(arg.slice('--limit='.length));
|
|
if (Number.isFinite(parsed) && parsed > 0) {
|
|
options.limit = Math.trunc(parsed);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (arg === '--help' || arg === '-h') {
|
|
printUsage();
|
|
process.exit(0);
|
|
}
|
|
}
|
|
|
|
return options;
|
|
}
|
|
|
|
function printUsage() {
|
|
console.log(
|
|
[
|
|
'Backfill filing statement snapshots (Financial Statements V2).',
|
|
'',
|
|
'Usage:',
|
|
' bun run scripts/backfill-filing-statements.ts [--apply] [--ticker=SYMBOL] [--window=10y|all] [--limit=N] [--refresh]',
|
|
'',
|
|
'Flags:',
|
|
' --apply Persist snapshots. Without this flag, script runs in dry-run mode.',
|
|
' --ticker=SYMBOL Restrict to a single ticker (e.g. AAPL).',
|
|
' --window=... History window. Defaults to 10y.',
|
|
' --limit=N Stop after N filings are scanned.',
|
|
' --refresh Rehydrate even when snapshot appears fresh.',
|
|
' --help, -h Show help.'
|
|
].join('\n')
|
|
);
|
|
}
|
|
|
|
async function loadFilings(options: ScriptOptions): Promise<FilingRow[]> {
|
|
const rows: FilingRow[] = [];
|
|
const threshold = tenYearsAgoIso();
|
|
let cursor = 0;
|
|
|
|
for (;;) {
|
|
const conditions = [
|
|
inArray(filing.filing_type, [...FINANCIAL_FORMS]),
|
|
gt(filing.id, cursor)
|
|
];
|
|
|
|
if (options.ticker) {
|
|
conditions.push(eq(filing.ticker, options.ticker));
|
|
}
|
|
|
|
if (options.window === '10y') {
|
|
conditions.push(gte(filing.filing_date, threshold));
|
|
}
|
|
|
|
const page = await db
|
|
.select({
|
|
id: filing.id,
|
|
ticker: filing.ticker,
|
|
cik: filing.cik,
|
|
accessionNumber: filing.accession_number,
|
|
filingDate: filing.filing_date,
|
|
filingType: filing.filing_type,
|
|
filingUrl: filing.filing_url,
|
|
primaryDocument: filing.primary_document,
|
|
metrics: filing.metrics,
|
|
updatedAt: filing.updated_at
|
|
})
|
|
.from(filing)
|
|
.where(and(...conditions))
|
|
.orderBy(asc(filing.id))
|
|
.limit(PAGE_SIZE);
|
|
|
|
if (page.length === 0) {
|
|
break;
|
|
}
|
|
|
|
for (const row of page) {
|
|
if (row.filingType !== '10-K' && row.filingType !== '10-Q') {
|
|
continue;
|
|
}
|
|
|
|
rows.push({
|
|
id: row.id,
|
|
ticker: row.ticker,
|
|
cik: row.cik,
|
|
accessionNumber: row.accessionNumber,
|
|
filingDate: row.filingDate,
|
|
filingType: row.filingType,
|
|
filingUrl: row.filingUrl,
|
|
primaryDocument: row.primaryDocument ?? null,
|
|
metrics: row.metrics,
|
|
updatedAt: row.updatedAt
|
|
});
|
|
|
|
if (options.limit && rows.length >= options.limit) {
|
|
return rows;
|
|
}
|
|
}
|
|
|
|
cursor = page[page.length - 1]?.id ?? cursor;
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
|
const rows = await loadFilings(options);
|
|
const summary: ScriptSummary = {
|
|
scanned: 0,
|
|
wouldWrite: 0,
|
|
written: 0,
|
|
skippedFresh: 0,
|
|
failed: 0
|
|
};
|
|
|
|
console.log(`[backfill-filing-statements] mode=${options.apply ? 'apply' : 'dry-run'} window=${options.window} filings=${rows.length}`);
|
|
if (options.ticker) {
|
|
console.log(`[backfill-filing-statements] ticker=${options.ticker}`);
|
|
}
|
|
|
|
for (const [index, row] of rows.entries()) {
|
|
summary.scanned += 1;
|
|
console.log(`[backfill-filing-statements] [${index + 1}/${rows.length}] ${row.ticker} ${row.filingType} ${row.filingDate} ${row.accessionNumber}`);
|
|
|
|
const existing = await getFilingStatementSnapshotByFilingId(row.id);
|
|
const isFresh = existing && Date.parse(existing.updated_at) >= Date.parse(row.updatedAt);
|
|
|
|
if (isFresh && !options.refresh) {
|
|
summary.skippedFresh += 1;
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
const snapshot = await hydrateFilingStatementSnapshot({
|
|
filingId: row.id,
|
|
ticker: row.ticker,
|
|
cik: row.cik,
|
|
accessionNumber: row.accessionNumber,
|
|
filingDate: row.filingDate,
|
|
filingType: row.filingType,
|
|
filingUrl: row.filingUrl,
|
|
primaryDocument: row.primaryDocument,
|
|
metrics: row.metrics
|
|
});
|
|
|
|
summary.wouldWrite += 1;
|
|
|
|
if (options.apply) {
|
|
await upsertFilingStatementSnapshot(snapshot);
|
|
summary.written += 1;
|
|
}
|
|
} catch (error) {
|
|
summary.failed += 1;
|
|
const reason = error instanceof Error ? error.message : 'unknown error';
|
|
console.error(`[backfill-filing-statements] failed for ${row.accessionNumber}: ${reason}`);
|
|
}
|
|
|
|
await Bun.sleep(REQUEST_DELAY_MS);
|
|
}
|
|
|
|
return summary;
|
|
}
|
|
|
|
async function main() {
|
|
const options = parseOptions(process.argv.slice(2));
|
|
const startedAt = Date.now();
|
|
|
|
try {
|
|
const summary = await runBackfill(options);
|
|
const durationSec = ((Date.now() - startedAt) / 1000).toFixed(1);
|
|
|
|
console.log('[backfill-filing-statements] summary');
|
|
console.log(` scanned=${summary.scanned}`);
|
|
console.log(` wouldWrite=${summary.wouldWrite}`);
|
|
console.log(` written=${summary.written}`);
|
|
console.log(` skippedFresh=${summary.skippedFresh}`);
|
|
console.log(` failed=${summary.failed}`);
|
|
console.log(` durationSeconds=${durationSec}`);
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : 'Unknown backfill failure';
|
|
console.error(`[backfill-filing-statements] fatal: ${message}`);
|
|
process.exitCode = 1;
|
|
}
|
|
}
|
|
|
|
void main();
|