340 lines
9.0 KiB
TypeScript
340 lines
9.0 KiB
TypeScript
import { asc, and, eq, gt, inArray } from 'drizzle-orm';
|
|
import { db } from '@/lib/server/db';
|
|
import { filing } from '@/lib/server/db/schema';
|
|
import { fetchFilingMetricsForFilings } from '@/lib/server/sec';
|
|
|
|
const FINANCIAL_FORMS = ['10-K', '10-Q'] as const;
|
|
const PAGE_SIZE = 500;
|
|
const REQUEST_DELAY_MS = 220;
|
|
|
|
type FinancialForm = (typeof FINANCIAL_FORMS)[number];
|
|
|
|
type FilingMetrics = {
|
|
revenue: number | null;
|
|
netIncome: number | null;
|
|
totalAssets: number | null;
|
|
cash: number | null;
|
|
debt: number | null;
|
|
};
|
|
|
|
type FilingRow = {
|
|
id: number;
|
|
ticker: string;
|
|
cik: string;
|
|
accessionNumber: string;
|
|
filingDate: string;
|
|
filingType: FinancialForm;
|
|
metrics: FilingMetrics | null;
|
|
};
|
|
|
|
type ScriptOptions = {
|
|
apply: boolean;
|
|
ticker: string | null;
|
|
limitCiks: number | null;
|
|
allowNullOverwrite: boolean;
|
|
};
|
|
|
|
type ScriptSummary = {
|
|
scannedRows: number;
|
|
scannedCiks: number;
|
|
changedRows: number;
|
|
unchangedRows: number;
|
|
skippedRows: number;
|
|
skippedCiks: number;
|
|
};
|
|
|
|
function parseOptions(argv: string[]): ScriptOptions {
|
|
const options: ScriptOptions = {
|
|
apply: false,
|
|
ticker: null,
|
|
limitCiks: null,
|
|
allowNullOverwrite: false
|
|
};
|
|
|
|
for (const arg of argv) {
|
|
if (arg === '--apply') {
|
|
options.apply = true;
|
|
continue;
|
|
}
|
|
|
|
if (arg === '--allow-null-overwrite') {
|
|
options.allowNullOverwrite = true;
|
|
continue;
|
|
}
|
|
|
|
if (arg.startsWith('--ticker=')) {
|
|
const raw = arg.slice('--ticker='.length).trim();
|
|
options.ticker = raw ? raw.toUpperCase() : null;
|
|
continue;
|
|
}
|
|
|
|
if (arg.startsWith('--limit-ciks=')) {
|
|
const raw = Number(arg.slice('--limit-ciks='.length));
|
|
if (Number.isFinite(raw) && raw > 0) {
|
|
options.limitCiks = Math.trunc(raw);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (arg === '--help' || arg === '-h') {
|
|
printUsage();
|
|
process.exit(0);
|
|
}
|
|
}
|
|
|
|
return options;
|
|
}
|
|
|
|
function printUsage() {
|
|
console.log(
|
|
[
|
|
'One-time backfill of filing metrics using per-filing SEC mapping.',
|
|
'',
|
|
'Usage:',
|
|
' bun run scripts/backfill-filing-metrics.ts [--apply] [--ticker=SYMBOL] [--limit-ciks=N] [--allow-null-overwrite]',
|
|
'',
|
|
'Flags:',
|
|
' --apply Persist updates. Without this flag, script runs as a dry run.',
|
|
' --ticker=SYMBOL Restrict processing to one ticker (e.g. AAPL).',
|
|
' --limit-ciks=N Process only first N CIK groups (useful for staged rollouts).',
|
|
' --allow-null-overwrite Allow replacing non-null metrics with all-null API results.',
|
|
' --help, -h Show help.'
|
|
].join('\n')
|
|
);
|
|
}
|
|
|
|
function toMetrics(value: FilingMetrics | null | undefined): FilingMetrics | null {
|
|
if (!value) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
revenue: value.revenue ?? null,
|
|
netIncome: value.netIncome ?? null,
|
|
totalAssets: value.totalAssets ?? null,
|
|
cash: value.cash ?? null,
|
|
debt: value.debt ?? null
|
|
};
|
|
}
|
|
|
|
function hasAnyMetricValue(metrics: FilingMetrics | null | undefined) {
|
|
const normalized = toMetrics(metrics);
|
|
if (!normalized) {
|
|
return false;
|
|
}
|
|
|
|
return Object.values(normalized).some((value) => value !== null && Number.isFinite(value));
|
|
}
|
|
|
|
function isAllNull(metrics: FilingMetrics | null | undefined) {
|
|
const normalized = toMetrics(metrics);
|
|
if (!normalized) {
|
|
return true;
|
|
}
|
|
|
|
return Object.values(normalized).every((value) => value === null);
|
|
}
|
|
|
|
function metricsEqual(left: FilingMetrics | null | undefined, right: FilingMetrics | null | undefined) {
|
|
const a = toMetrics(left);
|
|
const b = toMetrics(right);
|
|
|
|
if (!a && !b) {
|
|
return true;
|
|
}
|
|
|
|
if (!a || !b) {
|
|
return false;
|
|
}
|
|
|
|
return a.revenue === b.revenue
|
|
&& a.netIncome === b.netIncome
|
|
&& a.totalAssets === b.totalAssets
|
|
&& a.cash === b.cash
|
|
&& a.debt === b.debt;
|
|
}
|
|
|
|
async function loadFinancialRows(tickerFilter: string | null): Promise<FilingRow[]> {
|
|
const rows: FilingRow[] = [];
|
|
let cursor = 0;
|
|
|
|
for (;;) {
|
|
const conditions = [
|
|
inArray(filing.filing_type, [...FINANCIAL_FORMS]),
|
|
gt(filing.id, cursor)
|
|
];
|
|
|
|
if (tickerFilter) {
|
|
conditions.push(eq(filing.ticker, tickerFilter));
|
|
}
|
|
|
|
const page = await db
|
|
.select({
|
|
id: filing.id,
|
|
ticker: filing.ticker,
|
|
cik: filing.cik,
|
|
accessionNumber: filing.accession_number,
|
|
filingDate: filing.filing_date,
|
|
filingType: filing.filing_type,
|
|
metrics: filing.metrics
|
|
})
|
|
.from(filing)
|
|
.where(and(...conditions))
|
|
.orderBy(asc(filing.id))
|
|
.limit(PAGE_SIZE);
|
|
|
|
if (page.length === 0) {
|
|
break;
|
|
}
|
|
|
|
for (const row of page) {
|
|
if (row.filingType !== '10-K' && row.filingType !== '10-Q') {
|
|
continue;
|
|
}
|
|
|
|
rows.push({
|
|
...row,
|
|
filingType: row.filingType,
|
|
metrics: toMetrics(row.metrics)
|
|
});
|
|
}
|
|
|
|
cursor = page[page.length - 1]?.id ?? cursor;
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
function groupByCik(rows: FilingRow[]) {
|
|
const grouped = new Map<string, FilingRow[]>();
|
|
|
|
for (const row of rows) {
|
|
const group = grouped.get(row.cik);
|
|
if (group) {
|
|
group.push(row);
|
|
continue;
|
|
}
|
|
|
|
grouped.set(row.cik, [row]);
|
|
}
|
|
|
|
return grouped;
|
|
}
|
|
|
|
async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
|
|
const startedAt = new Date();
|
|
const rows = await loadFinancialRows(options.ticker);
|
|
const grouped = groupByCik(rows);
|
|
const entries = [...grouped.entries()];
|
|
const limitedEntries = options.limitCiks
|
|
? entries.slice(0, options.limitCiks)
|
|
: entries;
|
|
|
|
const summary: ScriptSummary = {
|
|
scannedRows: 0,
|
|
scannedCiks: 0,
|
|
changedRows: 0,
|
|
unchangedRows: 0,
|
|
skippedRows: 0,
|
|
skippedCiks: 0
|
|
};
|
|
|
|
console.log(`[backfill-filing-metrics] mode=${options.apply ? 'apply' : 'dry-run'} rows=${rows.length} ciks=${limitedEntries.length}`);
|
|
if (options.ticker) {
|
|
console.log(`[backfill-filing-metrics] ticker filter=${options.ticker}`);
|
|
}
|
|
|
|
for (const [index, [cik, filingsForCik]] of limitedEntries.entries()) {
|
|
summary.scannedCiks += 1;
|
|
summary.scannedRows += filingsForCik.length;
|
|
|
|
const ticker = filingsForCik[0]?.ticker ?? 'UNKNOWN';
|
|
console.log(`[backfill-filing-metrics] [${index + 1}/${limitedEntries.length}] CIK=${cik} ticker=${ticker} filings=${filingsForCik.length}`);
|
|
|
|
const metricsMap = await fetchFilingMetricsForFilings(
|
|
cik,
|
|
ticker,
|
|
filingsForCik.map((entry) => ({
|
|
accessionNumber: entry.accessionNumber,
|
|
filingDate: entry.filingDate,
|
|
filingType: entry.filingType
|
|
}))
|
|
);
|
|
|
|
const fetchedMetrics = filingsForCik.map((entry) => metricsMap.get(entry.accessionNumber) ?? null);
|
|
const apiReturnedAllNull = fetchedMetrics.every((entry) => isAllNull(entry));
|
|
const cikHasExistingValues = filingsForCik.some((entry) => hasAnyMetricValue(entry.metrics));
|
|
|
|
if (apiReturnedAllNull && cikHasExistingValues && !options.allowNullOverwrite) {
|
|
summary.skippedCiks += 1;
|
|
summary.skippedRows += filingsForCik.length;
|
|
console.warn(`[backfill-filing-metrics] skipped CIK=${cik}: API returned all-null metrics; pass --allow-null-overwrite to force replacement`);
|
|
await Bun.sleep(REQUEST_DELAY_MS);
|
|
continue;
|
|
}
|
|
|
|
const pendingUpdates: Array<{ id: number; metrics: FilingMetrics | null }> = [];
|
|
|
|
for (const entry of filingsForCik) {
|
|
const nextMetrics = toMetrics(metricsMap.get(entry.accessionNumber) ?? null);
|
|
if (metricsEqual(entry.metrics, nextMetrics)) {
|
|
summary.unchangedRows += 1;
|
|
continue;
|
|
}
|
|
|
|
summary.changedRows += 1;
|
|
pendingUpdates.push({
|
|
id: entry.id,
|
|
metrics: nextMetrics
|
|
});
|
|
}
|
|
|
|
if (!options.apply || pendingUpdates.length === 0) {
|
|
console.log(`[backfill-filing-metrics] changes=${pendingUpdates.length} (not persisted)`);
|
|
await Bun.sleep(REQUEST_DELAY_MS);
|
|
continue;
|
|
}
|
|
|
|
for (const update of pendingUpdates) {
|
|
await db
|
|
.update(filing)
|
|
.set({
|
|
metrics: update.metrics,
|
|
updated_at: new Date().toISOString()
|
|
})
|
|
.where(eq(filing.id, update.id));
|
|
}
|
|
|
|
console.log(`[backfill-filing-metrics] updated rows=${pendingUpdates.length}`);
|
|
await Bun.sleep(REQUEST_DELAY_MS);
|
|
}
|
|
|
|
const elapsedMs = Date.now() - startedAt.getTime();
|
|
console.log(
|
|
[
|
|
'[backfill-filing-metrics] complete',
|
|
` mode=${options.apply ? 'apply' : 'dry-run'}`,
|
|
` scanned_ciks=${summary.scannedCiks}`,
|
|
` scanned_rows=${summary.scannedRows}`,
|
|
` changed_rows=${summary.changedRows}`,
|
|
` unchanged_rows=${summary.unchangedRows}`,
|
|
` skipped_ciks=${summary.skippedCiks}`,
|
|
` skipped_rows=${summary.skippedRows}`,
|
|
` elapsed_ms=${elapsedMs}`
|
|
].join('\n')
|
|
);
|
|
|
|
return summary;
|
|
}
|
|
|
|
async function main() {
|
|
const options = parseOptions(Bun.argv.slice(2));
|
|
await runBackfill(options);
|
|
}
|
|
|
|
main().catch((error) => {
|
|
const message = error instanceof Error ? error.stack ?? error.message : String(error);
|
|
console.error('[backfill-filing-metrics] failed\n' + message);
|
|
process.exitCode = 1;
|
|
});
|