Files
Neon-Desk/scripts/backfill-filing-metrics.ts

340 lines
9.0 KiB
TypeScript

import { asc, and, eq, gt, inArray } from 'drizzle-orm';
import { db } from '@/lib/server/db';
import { filing } from '@/lib/server/db/schema';
import { fetchFilingMetricsForFilings } from '@/lib/server/sec';
const FINANCIAL_FORMS = ['10-K', '10-Q'] as const;
const PAGE_SIZE = 500;
const REQUEST_DELAY_MS = 220;
type FinancialForm = (typeof FINANCIAL_FORMS)[number];
type FilingMetrics = {
revenue: number | null;
netIncome: number | null;
totalAssets: number | null;
cash: number | null;
debt: number | null;
};
type FilingRow = {
id: number;
ticker: string;
cik: string;
accessionNumber: string;
filingDate: string;
filingType: FinancialForm;
metrics: FilingMetrics | null;
};
type ScriptOptions = {
apply: boolean;
ticker: string | null;
limitCiks: number | null;
allowNullOverwrite: boolean;
};
type ScriptSummary = {
scannedRows: number;
scannedCiks: number;
changedRows: number;
unchangedRows: number;
skippedRows: number;
skippedCiks: number;
};
function parseOptions(argv: string[]): ScriptOptions {
const options: ScriptOptions = {
apply: false,
ticker: null,
limitCiks: null,
allowNullOverwrite: false
};
for (const arg of argv) {
if (arg === '--apply') {
options.apply = true;
continue;
}
if (arg === '--allow-null-overwrite') {
options.allowNullOverwrite = true;
continue;
}
if (arg.startsWith('--ticker=')) {
const raw = arg.slice('--ticker='.length).trim();
options.ticker = raw ? raw.toUpperCase() : null;
continue;
}
if (arg.startsWith('--limit-ciks=')) {
const raw = Number(arg.slice('--limit-ciks='.length));
if (Number.isFinite(raw) && raw > 0) {
options.limitCiks = Math.trunc(raw);
}
continue;
}
if (arg === '--help' || arg === '-h') {
printUsage();
process.exit(0);
}
}
return options;
}
function printUsage() {
console.log(
[
'One-time backfill of filing metrics using per-filing SEC mapping.',
'',
'Usage:',
' bun run scripts/backfill-filing-metrics.ts [--apply] [--ticker=SYMBOL] [--limit-ciks=N] [--allow-null-overwrite]',
'',
'Flags:',
' --apply Persist updates. Without this flag, script runs as a dry run.',
' --ticker=SYMBOL Restrict processing to one ticker (e.g. AAPL).',
' --limit-ciks=N Process only first N CIK groups (useful for staged rollouts).',
' --allow-null-overwrite Allow replacing non-null metrics with all-null API results.',
' --help, -h Show help.'
].join('\n')
);
}
function toMetrics(value: FilingMetrics | null | undefined): FilingMetrics | null {
if (!value) {
return null;
}
return {
revenue: value.revenue ?? null,
netIncome: value.netIncome ?? null,
totalAssets: value.totalAssets ?? null,
cash: value.cash ?? null,
debt: value.debt ?? null
};
}
function hasAnyMetricValue(metrics: FilingMetrics | null | undefined) {
const normalized = toMetrics(metrics);
if (!normalized) {
return false;
}
return Object.values(normalized).some((value) => value !== null && Number.isFinite(value));
}
function isAllNull(metrics: FilingMetrics | null | undefined) {
const normalized = toMetrics(metrics);
if (!normalized) {
return true;
}
return Object.values(normalized).every((value) => value === null);
}
function metricsEqual(left: FilingMetrics | null | undefined, right: FilingMetrics | null | undefined) {
const a = toMetrics(left);
const b = toMetrics(right);
if (!a && !b) {
return true;
}
if (!a || !b) {
return false;
}
return a.revenue === b.revenue
&& a.netIncome === b.netIncome
&& a.totalAssets === b.totalAssets
&& a.cash === b.cash
&& a.debt === b.debt;
}
async function loadFinancialRows(tickerFilter: string | null): Promise<FilingRow[]> {
const rows: FilingRow[] = [];
let cursor = 0;
for (;;) {
const conditions = [
inArray(filing.filing_type, [...FINANCIAL_FORMS]),
gt(filing.id, cursor)
];
if (tickerFilter) {
conditions.push(eq(filing.ticker, tickerFilter));
}
const page = await db
.select({
id: filing.id,
ticker: filing.ticker,
cik: filing.cik,
accessionNumber: filing.accession_number,
filingDate: filing.filing_date,
filingType: filing.filing_type,
metrics: filing.metrics
})
.from(filing)
.where(and(...conditions))
.orderBy(asc(filing.id))
.limit(PAGE_SIZE);
if (page.length === 0) {
break;
}
for (const row of page) {
if (row.filingType !== '10-K' && row.filingType !== '10-Q') {
continue;
}
rows.push({
...row,
filingType: row.filingType,
metrics: toMetrics(row.metrics)
});
}
cursor = page[page.length - 1]?.id ?? cursor;
}
return rows;
}
function groupByCik(rows: FilingRow[]) {
const grouped = new Map<string, FilingRow[]>();
for (const row of rows) {
const group = grouped.get(row.cik);
if (group) {
group.push(row);
continue;
}
grouped.set(row.cik, [row]);
}
return grouped;
}
async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
const startedAt = new Date();
const rows = await loadFinancialRows(options.ticker);
const grouped = groupByCik(rows);
const entries = [...grouped.entries()];
const limitedEntries = options.limitCiks
? entries.slice(0, options.limitCiks)
: entries;
const summary: ScriptSummary = {
scannedRows: 0,
scannedCiks: 0,
changedRows: 0,
unchangedRows: 0,
skippedRows: 0,
skippedCiks: 0
};
console.log(`[backfill-filing-metrics] mode=${options.apply ? 'apply' : 'dry-run'} rows=${rows.length} ciks=${limitedEntries.length}`);
if (options.ticker) {
console.log(`[backfill-filing-metrics] ticker filter=${options.ticker}`);
}
for (const [index, [cik, filingsForCik]] of limitedEntries.entries()) {
summary.scannedCiks += 1;
summary.scannedRows += filingsForCik.length;
const ticker = filingsForCik[0]?.ticker ?? 'UNKNOWN';
console.log(`[backfill-filing-metrics] [${index + 1}/${limitedEntries.length}] CIK=${cik} ticker=${ticker} filings=${filingsForCik.length}`);
const metricsMap = await fetchFilingMetricsForFilings(
cik,
ticker,
filingsForCik.map((entry) => ({
accessionNumber: entry.accessionNumber,
filingDate: entry.filingDate,
filingType: entry.filingType
}))
);
const fetchedMetrics = filingsForCik.map((entry) => metricsMap.get(entry.accessionNumber) ?? null);
const apiReturnedAllNull = fetchedMetrics.every((entry) => isAllNull(entry));
const cikHasExistingValues = filingsForCik.some((entry) => hasAnyMetricValue(entry.metrics));
if (apiReturnedAllNull && cikHasExistingValues && !options.allowNullOverwrite) {
summary.skippedCiks += 1;
summary.skippedRows += filingsForCik.length;
console.warn(`[backfill-filing-metrics] skipped CIK=${cik}: API returned all-null metrics; pass --allow-null-overwrite to force replacement`);
await Bun.sleep(REQUEST_DELAY_MS);
continue;
}
const pendingUpdates: Array<{ id: number; metrics: FilingMetrics | null }> = [];
for (const entry of filingsForCik) {
const nextMetrics = toMetrics(metricsMap.get(entry.accessionNumber) ?? null);
if (metricsEqual(entry.metrics, nextMetrics)) {
summary.unchangedRows += 1;
continue;
}
summary.changedRows += 1;
pendingUpdates.push({
id: entry.id,
metrics: nextMetrics
});
}
if (!options.apply || pendingUpdates.length === 0) {
console.log(`[backfill-filing-metrics] changes=${pendingUpdates.length} (not persisted)`);
await Bun.sleep(REQUEST_DELAY_MS);
continue;
}
for (const update of pendingUpdates) {
await db
.update(filing)
.set({
metrics: update.metrics,
updated_at: new Date().toISOString()
})
.where(eq(filing.id, update.id));
}
console.log(`[backfill-filing-metrics] updated rows=${pendingUpdates.length}`);
await Bun.sleep(REQUEST_DELAY_MS);
}
const elapsedMs = Date.now() - startedAt.getTime();
console.log(
[
'[backfill-filing-metrics] complete',
` mode=${options.apply ? 'apply' : 'dry-run'}`,
` scanned_ciks=${summary.scannedCiks}`,
` scanned_rows=${summary.scannedRows}`,
` changed_rows=${summary.changedRows}`,
` unchanged_rows=${summary.unchangedRows}`,
` skipped_ciks=${summary.skippedCiks}`,
` skipped_rows=${summary.skippedRows}`,
` elapsed_ms=${elapsedMs}`
].join('\n')
);
return summary;
}
async function main() {
const options = parseOptions(Bun.argv.slice(2));
await runBackfill(options);
}
main().catch((error) => {
const message = error instanceof Error ? error.stack ?? error.message : String(error);
console.error('[backfill-filing-metrics] failed\n' + message);
process.exitCode = 1;
});