Files
Neon-Desk/scripts/backfill-taxonomy-snapshots.ts

275 lines
7.7 KiB
TypeScript

import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
import { listFilingsRecords, updateFilingMetricsById } from '@/lib/server/repos/filings';
import {
getFilingTaxonomySnapshotByFilingId,
normalizeFilingTaxonomySnapshotPayload,
upsertFilingTaxonomySnapshot
} from '@/lib/server/repos/filing-taxonomy';
import { getIssuerOverlay } from '@/lib/server/repos/issuer-overlays';
type ScriptOptions = {
apply: boolean;
ticker: string | null;
window: '10y' | 'all';
limit: number | null;
refresh: boolean;
};
type ScriptSummary = {
scanned: number;
wouldWrite: number;
written: number;
skippedFresh: number;
failed: number;
};
type ActiveOverlayState = {
definition: Awaited<ReturnType<typeof getIssuerOverlay>> extends infer T
? T extends { active_revision: infer R | null }
? R extends { definition_json: infer D }
? D | null
: null
: null
: null;
revisionId: number | null;
};
type FilingRow = {
id: number;
ticker: string;
cik: string;
accessionNumber: string;
filingDate: string;
filingType: '10-K' | '10-Q';
filingUrl: string | null;
primaryDocument: string | null;
updatedAt: string;
};
const REQUEST_DELAY_MS = 120;
async function loadActiveOverlayState(
ticker: string,
cache: Map<string, Promise<ActiveOverlayState>>,
) {
const normalizedTicker = ticker.trim().toUpperCase();
const existing = cache.get(normalizedTicker);
if (existing) {
return await existing;
}
const pending = (async (): Promise<ActiveOverlayState> => {
const overlay = await getIssuerOverlay(normalizedTicker);
return {
definition: overlay?.active_revision?.definition_json ?? null,
revisionId: overlay?.active_revision_id ?? null
};
})();
cache.set(normalizedTicker, pending);
return await pending;
}
function parseOptions(argv: string[]): ScriptOptions {
const options: ScriptOptions = {
apply: false,
ticker: null,
window: '10y',
limit: null,
refresh: false
};
for (const arg of argv) {
if (arg === '--apply') {
options.apply = true;
continue;
}
if (arg === '--refresh') {
options.refresh = true;
continue;
}
if (arg.startsWith('--ticker=')) {
const value = arg.slice('--ticker='.length).trim().toUpperCase();
options.ticker = value.length > 0 ? value : null;
continue;
}
if (arg.startsWith('--window=')) {
const value = arg.slice('--window='.length).trim().toLowerCase();
options.window = value === 'all' ? 'all' : '10y';
continue;
}
if (arg.startsWith('--limit=')) {
const parsed = Number.parseInt(arg.slice('--limit='.length), 10);
if (Number.isFinite(parsed) && parsed > 0) {
options.limit = parsed;
}
continue;
}
if (arg === '--help' || arg === '-h') {
console.log('Backfill filing taxonomy snapshots (Financial Statements V3).');
console.log('');
console.log('Usage:');
console.log(' bun run scripts/backfill-taxonomy-snapshots.ts [--apply] [--ticker=SYMBOL] [--window=10y|all] [--limit=N] [--refresh]');
process.exit(0);
}
}
return options;
}
function tenYearsAgoIso() {
const date = new Date();
date.setUTCFullYear(date.getUTCFullYear() - 10);
return date.toISOString().slice(0, 10);
}
async function loadFilings(options: ScriptOptions): Promise<FilingRow[]> {
const rows: FilingRow[] = [];
let cursor = 0;
while (true) {
const page = await listFilingsRecords({
ticker: options.ticker ?? undefined,
limit: 250
});
const normalizedPage = page
.filter((filing): filing is typeof filing & { filing_type: '10-K' | '10-Q' } => {
return filing.filing_type === '10-K' || filing.filing_type === '10-Q';
})
.filter((filing) => {
if (options.window === 'all') {
return true;
}
return filing.filing_date >= tenYearsAgoIso();
})
.slice(cursor);
if (normalizedPage.length === 0) {
break;
}
for (const filing of normalizedPage) {
rows.push({
id: filing.id,
ticker: filing.ticker,
cik: filing.cik,
accessionNumber: filing.accession_number,
filingDate: filing.filing_date,
filingType: filing.filing_type,
filingUrl: filing.filing_url,
primaryDocument: filing.primary_document ?? null,
updatedAt: filing.updated_at
});
if (options.limit && rows.length >= options.limit) {
return rows;
}
}
cursor += normalizedPage.length;
}
return rows;
}
async function runBackfill(options: ScriptOptions): Promise<ScriptSummary> {
const rows = await loadFilings(options);
const overlayCache = new Map<string, Promise<ActiveOverlayState>>();
const summary: ScriptSummary = {
scanned: 0,
wouldWrite: 0,
written: 0,
skippedFresh: 0,
failed: 0
};
console.log(`[backfill-taxonomy-snapshots] mode=${options.apply ? 'apply' : 'dry-run'} window=${options.window} filings=${rows.length}`);
if (options.ticker) {
console.log(`[backfill-taxonomy-snapshots] ticker=${options.ticker}`);
}
for (const [index, row] of rows.entries()) {
summary.scanned += 1;
console.log(`[backfill-taxonomy-snapshots] [${index + 1}/${rows.length}] ${row.ticker} ${row.filingType} ${row.filingDate} ${row.accessionNumber}`);
const activeOverlay = await loadActiveOverlayState(row.ticker, overlayCache);
const existing = await getFilingTaxonomySnapshotByFilingId(row.id);
const isFresh = Boolean(
existing
&& Date.parse(existing.updated_at) >= Date.parse(row.updatedAt)
&& (existing.issuer_overlay_revision_id ?? null) === activeOverlay.revisionId
);
if (isFresh && !options.refresh) {
summary.skippedFresh += 1;
continue;
}
try {
const snapshot = await hydrateFilingTaxonomySnapshot({
filingId: row.id,
ticker: row.ticker,
cik: row.cik,
accessionNumber: row.accessionNumber,
filingDate: row.filingDate,
filingType: row.filingType,
filingUrl: row.filingUrl,
primaryDocument: row.primaryDocument,
issuerOverlay: activeOverlay.definition
});
summary.wouldWrite += 1;
if (options.apply) {
const normalizedSnapshot = {
...snapshot,
issuer_overlay_revision_id: activeOverlay.revisionId,
...normalizeFilingTaxonomySnapshotPayload(snapshot)
};
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
await updateFilingMetricsById(row.id, normalizedSnapshot.derived_metrics);
summary.written += 1;
}
} catch (error) {
summary.failed += 1;
const reason = error instanceof Error ? error.message : 'unknown error';
console.error(`[backfill-taxonomy-snapshots] failed for ${row.accessionNumber}: ${reason}`);
}
await Bun.sleep(REQUEST_DELAY_MS);
}
return summary;
}
async function main() {
const options = parseOptions(process.argv.slice(2));
const startedAt = Date.now();
try {
const summary = await runBackfill(options);
const durationSec = ((Date.now() - startedAt) / 1000).toFixed(1);
console.log('[backfill-taxonomy-snapshots] summary');
console.log(` scanned=${summary.scanned}`);
console.log(` wouldWrite=${summary.wouldWrite}`);
console.log(` written=${summary.written}`);
console.log(` skippedFresh=${summary.skippedFresh}`);
console.log(` failed=${summary.failed}`);
console.log(` durationSec=${durationSec}`);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.error(`[backfill-taxonomy-snapshots] fatal: ${message}`);
process.exitCode = 1;
}
}
void main();