Add one-time filing metrics backfill script
This commit is contained in:
339
scripts/backfill-filing-metrics.ts
Normal file
339
scripts/backfill-filing-metrics.ts
Normal file
@@ -0,0 +1,339 @@
|
||||
import { asc, and, eq, gt, inArray } from 'drizzle-orm';
|
||||
import { db } from '@/lib/server/db';
|
||||
import { filing } from '@/lib/server/db/schema';
|
||||
import { fetchFilingMetricsForFilings } from '@/lib/server/sec';
|
||||
|
||||
const FINANCIAL_FORMS = ['10-K', '10-Q'] as const;
|
||||
const PAGE_SIZE = 500;
|
||||
const REQUEST_DELAY_MS = 220;
|
||||
|
||||
type FinancialForm = (typeof FINANCIAL_FORMS)[number];
|
||||
|
||||
type FilingMetrics = {
|
||||
revenue: number | null;
|
||||
netIncome: number | null;
|
||||
totalAssets: number | null;
|
||||
cash: number | null;
|
||||
debt: number | null;
|
||||
};
|
||||
|
||||
type FilingRow = {
|
||||
id: number;
|
||||
ticker: string;
|
||||
cik: string;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
filingType: FinancialForm;
|
||||
metrics: FilingMetrics | null;
|
||||
};
|
||||
|
||||
type ScriptOptions = {
|
||||
apply: boolean;
|
||||
ticker: string | null;
|
||||
limitCiks: number | null;
|
||||
allowNullOverwrite: boolean;
|
||||
};
|
||||
|
||||
type ScriptSummary = {
|
||||
scannedRows: number;
|
||||
scannedCiks: number;
|
||||
changedRows: number;
|
||||
unchangedRows: number;
|
||||
skippedRows: number;
|
||||
skippedCiks: number;
|
||||
};
|
||||
|
||||
function parseOptions(argv: string[]): ScriptOptions {
|
||||
const options: ScriptOptions = {
|
||||
apply: false,
|
||||
ticker: null,
|
||||
limitCiks: null,
|
||||
allowNullOverwrite: false
|
||||
};
|
||||
|
||||
for (const arg of argv) {
|
||||
if (arg === '--apply') {
|
||||
options.apply = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg === '--allow-null-overwrite') {
|
||||
options.allowNullOverwrite = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.startsWith('--ticker=')) {
|
||||
const raw = arg.slice('--ticker='.length).trim();
|
||||
options.ticker = raw ? raw.toUpperCase() : null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.startsWith('--limit-ciks=')) {
|
||||
const raw = Number(arg.slice('--limit-ciks='.length));
|
||||
if (Number.isFinite(raw) && raw > 0) {
|
||||
options.limitCiks = Math.trunc(raw);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg === '--help' || arg === '-h') {
|
||||
printUsage();
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
function printUsage() {
|
||||
console.log(
|
||||
[
|
||||
'One-time backfill of filing metrics using per-filing SEC mapping.',
|
||||
'',
|
||||
'Usage:',
|
||||
' bun run scripts/backfill-filing-metrics.ts [--apply] [--ticker=SYMBOL] [--limit-ciks=N] [--allow-null-overwrite]',
|
||||
'',
|
||||
'Flags:',
|
||||
' --apply Persist updates. Without this flag, script runs as a dry run.',
|
||||
' --ticker=SYMBOL Restrict processing to one ticker (e.g. AAPL).',
|
||||
' --limit-ciks=N Process only first N CIK groups (useful for staged rollouts).',
|
||||
' --allow-null-overwrite Allow replacing non-null metrics with all-null API results.',
|
||||
' --help, -h Show help.'
|
||||
].join('\n')
|
||||
);
|
||||
}
|
||||
|
||||
function toMetrics(value: FilingMetrics | null | undefined): FilingMetrics | null {
|
||||
if (!value) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
revenue: value.revenue ?? null,
|
||||
netIncome: value.netIncome ?? null,
|
||||
totalAssets: value.totalAssets ?? null,
|
||||
cash: value.cash ?? null,
|
||||
debt: value.debt ?? null
|
||||
};
|
||||
}
|
||||
|
||||
function hasAnyMetricValue(metrics: FilingMetrics | null | undefined) {
|
||||
const normalized = toMetrics(metrics);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return Object.values(normalized).some((value) => value !== null && Number.isFinite(value));
|
||||
}
|
||||
|
||||
function isAllNull(metrics: FilingMetrics | null | undefined) {
|
||||
const normalized = toMetrics(metrics);
|
||||
if (!normalized) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return Object.values(normalized).every((value) => value === null);
|
||||
}
|
||||
|
||||
function metricsEqual(left: FilingMetrics | null | undefined, right: FilingMetrics | null | undefined) {
|
||||
const a = toMetrics(left);
|
||||
const b = toMetrics(right);
|
||||
|
||||
if (!a && !b) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!a || !b) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return a.revenue === b.revenue
|
||||
&& a.netIncome === b.netIncome
|
||||
&& a.totalAssets === b.totalAssets
|
||||
&& a.cash === b.cash
|
||||
&& a.debt === b.debt;
|
||||
}
|
||||
|
||||
async function loadFinancialRows(tickerFilter: string | null): Promise<FilingRow[]> {
|
||||
const rows: FilingRow[] = [];
|
||||
let cursor = 0;
|
||||
|
||||
for (;;) {
|
||||
const conditions = [
|
||||
inArray(filing.filing_type, [...FINANCIAL_FORMS]),
|
||||
gt(filing.id, cursor)
|
||||
];
|
||||
|
||||
if (tickerFilter) {
|
||||
conditions.push(eq(filing.ticker, tickerFilter));
|
||||
}
|
||||
|
||||
const page = await db
|
||||
.select({
|
||||
id: filing.id,
|
||||
ticker: filing.ticker,
|
||||
cik: filing.cik,
|
||||
accessionNumber: filing.accession_number,
|
||||
filingDate: filing.filing_date,
|
||||
filingType: filing.filing_type,
|
||||
metrics: filing.metrics
|
||||
})
|
||||
.from(filing)
|
||||
.where(and(...conditions))
|
||||
.orderBy(asc(filing.id))
|
||||
.limit(PAGE_SIZE);
|
||||
|
||||
if (page.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (const row of page) {
|
||||
if (row.filingType !== '10-K' && row.filingType !== '10-Q') {
|
||||
continue;
|
||||
}
|
||||
|
||||
rows.push({
|
||||
...row,
|
||||
filingType: row.filingType,
|
||||
metrics: toMetrics(row.metrics)
|
||||
});
|
||||
}
|
||||
|
||||
cursor = page[page.length - 1]?.id ?? cursor;
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
|
||||
function groupByCik(rows: FilingRow[]) {
|
||||
const grouped = new Map<string, FilingRow[]>();
|
||||
|
||||
for (const row of rows) {
|
||||
const group = grouped.get(row.cik);
|
||||
if (group) {
|
||||
group.push(row);
|
||||
continue;
|
||||
}
|
||||
|
||||
grouped.set(row.cik, [row]);
|
||||
}
|
||||
|
||||
return grouped;
|
||||
}
|
||||
|
||||
async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
||||
const startedAt = new Date();
|
||||
const rows = await loadFinancialRows(options.ticker);
|
||||
const grouped = groupByCik(rows);
|
||||
const entries = [...grouped.entries()];
|
||||
const limitedEntries = options.limitCiks
|
||||
? entries.slice(0, options.limitCiks)
|
||||
: entries;
|
||||
|
||||
const summary: ScriptSummary = {
|
||||
scannedRows: 0,
|
||||
scannedCiks: 0,
|
||||
changedRows: 0,
|
||||
unchangedRows: 0,
|
||||
skippedRows: 0,
|
||||
skippedCiks: 0
|
||||
};
|
||||
|
||||
console.log(`[backfill-filing-metrics] mode=${options.apply ? 'apply' : 'dry-run'} rows=${rows.length} ciks=${limitedEntries.length}`);
|
||||
if (options.ticker) {
|
||||
console.log(`[backfill-filing-metrics] ticker filter=${options.ticker}`);
|
||||
}
|
||||
|
||||
for (const [index, [cik, filingsForCik]] of limitedEntries.entries()) {
|
||||
summary.scannedCiks += 1;
|
||||
summary.scannedRows += filingsForCik.length;
|
||||
|
||||
const ticker = filingsForCik[0]?.ticker ?? 'UNKNOWN';
|
||||
console.log(`[backfill-filing-metrics] [${index + 1}/${limitedEntries.length}] CIK=${cik} ticker=${ticker} filings=${filingsForCik.length}`);
|
||||
|
||||
const metricsMap = await fetchFilingMetricsForFilings(
|
||||
cik,
|
||||
ticker,
|
||||
filingsForCik.map((entry) => ({
|
||||
accessionNumber: entry.accessionNumber,
|
||||
filingDate: entry.filingDate,
|
||||
filingType: entry.filingType
|
||||
}))
|
||||
);
|
||||
|
||||
const fetchedMetrics = filingsForCik.map((entry) => metricsMap.get(entry.accessionNumber) ?? null);
|
||||
const apiReturnedAllNull = fetchedMetrics.every((entry) => isAllNull(entry));
|
||||
const cikHasExistingValues = filingsForCik.some((entry) => hasAnyMetricValue(entry.metrics));
|
||||
|
||||
if (apiReturnedAllNull && cikHasExistingValues && !options.allowNullOverwrite) {
|
||||
summary.skippedCiks += 1;
|
||||
summary.skippedRows += filingsForCik.length;
|
||||
console.warn(`[backfill-filing-metrics] skipped CIK=${cik}: API returned all-null metrics; pass --allow-null-overwrite to force replacement`);
|
||||
await Bun.sleep(REQUEST_DELAY_MS);
|
||||
continue;
|
||||
}
|
||||
|
||||
const pendingUpdates: Array<{ id: number; metrics: FilingMetrics | null }> = [];
|
||||
|
||||
for (const entry of filingsForCik) {
|
||||
const nextMetrics = toMetrics(metricsMap.get(entry.accessionNumber) ?? null);
|
||||
if (metricsEqual(entry.metrics, nextMetrics)) {
|
||||
summary.unchangedRows += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
summary.changedRows += 1;
|
||||
pendingUpdates.push({
|
||||
id: entry.id,
|
||||
metrics: nextMetrics
|
||||
});
|
||||
}
|
||||
|
||||
if (!options.apply || pendingUpdates.length === 0) {
|
||||
console.log(`[backfill-filing-metrics] changes=${pendingUpdates.length} (not persisted)`);
|
||||
await Bun.sleep(REQUEST_DELAY_MS);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const update of pendingUpdates) {
|
||||
await db
|
||||
.update(filing)
|
||||
.set({
|
||||
metrics: update.metrics,
|
||||
updated_at: new Date().toISOString()
|
||||
})
|
||||
.where(eq(filing.id, update.id));
|
||||
}
|
||||
|
||||
console.log(`[backfill-filing-metrics] updated rows=${pendingUpdates.length}`);
|
||||
await Bun.sleep(REQUEST_DELAY_MS);
|
||||
}
|
||||
|
||||
const elapsedMs = Date.now() - startedAt.getTime();
|
||||
console.log(
|
||||
[
|
||||
'[backfill-filing-metrics] complete',
|
||||
` mode=${options.apply ? 'apply' : 'dry-run'}`,
|
||||
` scanned_ciks=${summary.scannedCiks}`,
|
||||
` scanned_rows=${summary.scannedRows}`,
|
||||
` changed_rows=${summary.changedRows}`,
|
||||
` unchanged_rows=${summary.unchangedRows}`,
|
||||
` skipped_ciks=${summary.skippedCiks}`,
|
||||
` skipped_rows=${summary.skippedRows}`,
|
||||
` elapsed_ms=${elapsedMs}`
|
||||
].join('\n')
|
||||
);
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const options = parseOptions(Bun.argv.slice(2));
|
||||
await runBackfill(options);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
const message = error instanceof Error ? error.stack ?? error.message : String(error);
|
||||
console.error('[backfill-filing-metrics] failed\n' + message);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
Reference in New Issue
Block a user