import { existsSync, readFileSync } from "node:fs"; import { join } from "node:path"; import type { IssuerOverlayDefinition, IssuerOverlayDiagnostics, IssuerOverlayStats, } from "@/lib/server/db/schema"; import type { FilingTaxonomyConceptRecord, FilingTaxonomySnapshotRecord, } from "@/lib/server/repos/filing-taxonomy"; import { listFilingTaxonomyConceptsBySnapshotIds, listFilingTaxonomySnapshotsByTicker, } from "@/lib/server/repos/filing-taxonomy"; import { ensureIssuerOverlayRow, getIssuerOverlay, markIssuerOverlayBuildState, normalizeIssuerOverlayDefinition, publishIssuerOverlayRevision, } from "@/lib/server/repos/issuer-overlays"; type SurfaceFile = { surfaces?: Array<{ surface_key: string; statement: string; allowed_source_concepts?: string[]; allowed_authoritative_concepts?: string[]; }>; }; type SurfaceCatalogEntry = { surface_key: string; statement: | "income" | "balance" | "cash_flow" | "disclosure" | "equity" | "comprehensive_income"; sourceConcepts: Set; authoritativeConcepts: Set; }; type AcceptedCandidate = { qname: string; surface_key: string; statement: SurfaceCatalogEntry["statement"]; reason: "authoritative_match" | "local_name_match"; source_snapshot_ids: number[]; }; type RejectedCandidate = { qname: string; reason: string; source_snapshot_ids: number[]; }; function normalizeTicker(ticker: string) { return ticker.trim().toUpperCase(); } function normalizeLocalName(value: string | null | undefined) { const normalized = value?.trim() ?? ""; if (!normalized) { return null; } const localName = normalized.includes(":") ? normalized.split(":").pop() ?? normalized : normalized.includes("#") ? normalized.split("#").pop() ?? normalized : normalized; return localName.trim().toLowerCase() || null; } function taxonomyFilePath(fileName: string) { return join(process.cwd(), "rust", "taxonomy", "fiscal", "v1", fileName); } function loadOptionalSurfaceFile(fileName: string) { const path = taxonomyFilePath(fileName); if (!existsSync(path)) { return null; } return JSON.parse(readFileSync(path, "utf8")) as SurfaceFile; } function applySurfaceFile( catalog: Map, file: SurfaceFile | null, ) { for (const surface of file?.surfaces ?? []) { if ( surface.statement !== "income" && surface.statement !== "balance" && surface.statement !== "cash_flow" && surface.statement !== "disclosure" && surface.statement !== "equity" && surface.statement !== "comprehensive_income" ) { continue; } const existing = catalog.get(surface.surface_key) ?? { surface_key: surface.surface_key, statement: surface.statement, sourceConcepts: new Set(), authoritativeConcepts: new Set(), }; for (const concept of surface.allowed_source_concepts ?? []) { existing.sourceConcepts.add(concept); } for (const concept of surface.allowed_authoritative_concepts ?? []) { existing.authoritativeConcepts.add(concept); } catalog.set(surface.surface_key, existing); } } function loadSurfaceCatalog(pack: string | null) { const normalizedPack = pack?.trim() || "core"; const catalog = new Map(); applySurfaceFile(catalog, loadOptionalSurfaceFile(`${normalizedPack}.surface.json`)); applySurfaceFile( catalog, loadOptionalSurfaceFile(`${normalizedPack}.disclosure.surface.json`), ); if (normalizedPack !== "core") { applySurfaceFile(catalog, loadOptionalSurfaceFile("core.surface.json")); applySurfaceFile( catalog, loadOptionalSurfaceFile("core.disclosure.surface.json"), ); } return catalog; } function buildSurfaceIndexes(catalog: Map) { const authoritative = new Map>(); const localNames = new Map>(); for (const entry of catalog.values()) { for (const concept of entry.authoritativeConcepts) { const localName = normalizeLocalName(concept); if (!localName) { continue; } const surfaces = authoritative.get(localName) ?? new Set(); surfaces.add(entry.surface_key); authoritative.set(localName, surfaces); } for (const concept of [...entry.sourceConcepts, ...entry.authoritativeConcepts]) { const localName = normalizeLocalName(concept); if (!localName) { continue; } const surfaces = localNames.get(localName) ?? new Set(); surfaces.add(entry.surface_key); localNames.set(localName, surfaces); } } return { authoritative, localNames }; } function selectWorkingPack(snapshots: FilingTaxonomySnapshotRecord[]) { const counts = new Map(); for (const snapshot of snapshots) { const pack = snapshot.fiscal_pack?.trim(); if (!pack) { continue; } counts.set(pack, (counts.get(pack) ?? 0) + 1); } return ( [...counts.entries()].sort( (left, right) => right[1] - left[1] || left[0].localeCompare(right[0]), )[0]?.[0] ?? snapshots[0]?.fiscal_pack ?? "core" ); } function sourceSnapshotIds(records: FilingTaxonomyConceptRecord[]) { return [...new Set(records.map((record) => record.snapshot_id))].sort( (left, right) => left - right, ); } function chooseCandidate( records: FilingTaxonomyConceptRecord[], sampleSnapshots: FilingTaxonomySnapshotRecord[], catalog: Map, ) { const statementKinds = [ ...new Set( records .map((record) => record.statement_kind) .filter((value): value is NonNullable => value !== null), ), ]; if (statementKinds.length !== 1) { return { accepted: null, rejected: { qname: records[0]?.qname ?? "unknown", reason: "inconsistent_statement_kind", source_snapshot_ids: sourceSnapshotIds(records), } satisfies RejectedCandidate, }; } const distinctSnapshotIds = sourceSnapshotIds(records); if (distinctSnapshotIds.length < 2) { return { accepted: null, rejected: { qname: records[0]?.qname ?? "unknown", reason: "insufficient_recurrence", source_snapshot_ids: distinctSnapshotIds, } satisfies RejectedCandidate, }; } const catalogIndexes = buildSurfaceIndexes(catalog); const statementKind = statementKinds[0]; const authoritativeNames = [ ...new Set( records .map((record) => normalizeLocalName(record.authoritative_concept_key)) .filter((value): value is string => value !== null), ), ]; let matchedSurfaceKey: string | null = null; let reason: AcceptedCandidate["reason"] | null = null; if (authoritativeNames.length === 1) { const candidates = [ ...(catalogIndexes.authoritative.get(authoritativeNames[0]) ?? new Set()), ]; const matchingCandidates = candidates.filter((surfaceKey) => { return catalog.get(surfaceKey)?.statement === statementKind; }); if (matchingCandidates.length === 1) { matchedSurfaceKey = matchingCandidates[0] ?? null; reason = "authoritative_match"; } } if (!matchedSurfaceKey) { const localNames = [ ...new Set( records .map((record) => normalizeLocalName(record.local_name)) .filter((value): value is string => value !== null), ), ]; if (localNames.length === 1) { const candidates = [ ...(catalogIndexes.localNames.get(localNames[0]) ?? new Set()), ]; const matchingCandidates = candidates.filter((surfaceKey) => { return catalog.get(surfaceKey)?.statement === statementKind; }); if (matchingCandidates.length === 1) { matchedSurfaceKey = matchingCandidates[0] ?? null; reason = "local_name_match"; } } } if (!matchedSurfaceKey || !reason) { return { accepted: null, rejected: { qname: records[0]?.qname ?? "unknown", reason: "no_unique_surface_match", source_snapshot_ids: distinctSnapshotIds, } satisfies RejectedCandidate, }; } const surface = catalog.get(matchedSurfaceKey); if (!surface) { return { accepted: null, rejected: { qname: records[0]?.qname ?? "unknown", reason: "unknown_surface_key", source_snapshot_ids: distinctSnapshotIds, } satisfies RejectedCandidate, }; } const latestTenKSnapshotId = sampleSnapshots.find( (snapshot) => snapshot.filing_type === "10-K", )?.id; if ( distinctSnapshotIds.length < 2 && !( latestTenKSnapshotId !== undefined && distinctSnapshotIds.includes(latestTenKSnapshotId) ) ) { return { accepted: null, rejected: { qname: records[0]?.qname ?? "unknown", reason: "promotion_threshold_not_met", source_snapshot_ids: distinctSnapshotIds, } satisfies RejectedCandidate, }; } return { accepted: { qname: records[0]?.qname ?? "unknown", surface_key: surface.surface_key, statement: surface.statement, reason, source_snapshot_ids: distinctSnapshotIds, } satisfies AcceptedCandidate, rejected: null, }; } function mergeOverlayDefinition(input: { ticker: string; pack: string | null; current: IssuerOverlayDefinition | null; accepted: AcceptedCandidate[]; }) { const mappings = new Map< string, { surface_key: string; statement: IssuerOverlayDefinition["mappings"][number]["statement"]; allowed_source_concepts: Set; allowed_authoritative_concepts: Set; } >(); for (const mapping of input.current?.mappings ?? []) { const key = `${mapping.statement}:${mapping.surface_key}`; mappings.set(key, { surface_key: mapping.surface_key, statement: mapping.statement, allowed_source_concepts: new Set(mapping.allowed_source_concepts), allowed_authoritative_concepts: new Set( mapping.allowed_authoritative_concepts, ), }); } for (const candidate of input.accepted) { const key = `${candidate.statement}:${candidate.surface_key}`; const existing = mappings.get(key) ?? { surface_key: candidate.surface_key, statement: candidate.statement, allowed_source_concepts: new Set(), allowed_authoritative_concepts: new Set(), }; existing.allowed_source_concepts.add(candidate.qname); mappings.set(key, existing); } return normalizeIssuerOverlayDefinition({ version: "fiscal-v1", ticker: input.ticker, pack: input.current?.pack ?? input.pack, mappings: [...mappings.values()].map((mapping) => ({ surface_key: mapping.surface_key, statement: mapping.statement, allowed_source_concepts: [...mapping.allowed_source_concepts], allowed_authoritative_concepts: [...mapping.allowed_authoritative_concepts], })), }); } export async function shouldQueueTickerAutomation(ticker: string) { const normalizedTicker = normalizeTicker(ticker); if (!normalizedTicker) { return false; } const [overlay, snapshotResult] = await Promise.all([ getIssuerOverlay(normalizedTicker), listFilingTaxonomySnapshotsByTicker({ ticker: normalizedTicker, window: "all", filingTypes: ["10-K", "10-Q"], limit: 5, }), ]); const latestReadySnapshot = snapshotResult.snapshots.find((snapshot) => snapshot.parse_status === "ready") ?? null; if (!overlay || !latestReadySnapshot) { return true; } if (overlay.status === "error") { return true; } if (overlay.status === "empty") { return overlay.last_built_at === null; } return ( overlay.active_revision_id !== latestReadySnapshot.issuer_overlay_revision_id ); } export async function generateIssuerOverlayForTicker(ticker: string) { const normalizedTicker = normalizeTicker(ticker); const currentOverlay = await getIssuerOverlay(normalizedTicker); const threeYearsAgo = new Date(); threeYearsAgo.setUTCFullYear(threeYearsAgo.getUTCFullYear() - 3); const snapshotResult = await listFilingTaxonomySnapshotsByTicker({ ticker: normalizedTicker, window: "all", filingTypes: ["10-K", "10-Q"], limit: 24, }); const sampleSnapshots = snapshotResult.snapshots .filter( (snapshot) => snapshot.parse_status === "ready" && Date.parse(snapshot.filing_date) >= threeYearsAgo.getTime(), ) .slice(0, 12); await ensureIssuerOverlayRow(normalizedTicker); if (sampleSnapshots.length === 0) { const stats: IssuerOverlayStats = { pack: null, sampledSnapshotCount: 0, sampledSnapshotIds: [], acceptedMappingCount: 0, rejectedMappingCount: 0, publishedRevisionNumber: null, }; await markIssuerOverlayBuildState({ ticker: normalizedTicker, status: currentOverlay?.active_revision_id ? "active" : "empty", stats, activeRevisionId: currentOverlay?.active_revision_id ?? null, }); return { published: false, activeRevisionId: currentOverlay?.active_revision_id ?? null, sampledSnapshotIds: [], }; } const pack = selectWorkingPack(sampleSnapshots); const catalog = loadSurfaceCatalog(pack); const conceptRows = ( await listFilingTaxonomyConceptsBySnapshotIds( sampleSnapshots.map((snapshot) => snapshot.id), ) ).filter((record) => { return ( record.is_extension && record.residual_flag && !record.is_abstract && record.statement_kind !== null ); }); const grouped = new Map(); for (const record of conceptRows) { const existing = grouped.get(record.qname) ?? []; existing.push(record); grouped.set(record.qname, existing); } const acceptedMappings: AcceptedCandidate[] = []; const rejectedMappings: RejectedCandidate[] = []; for (const records of grouped.values()) { const selection = chooseCandidate(records, sampleSnapshots, catalog); if (selection.accepted) { acceptedMappings.push(selection.accepted); } else if (selection.rejected) { rejectedMappings.push(selection.rejected); } } const nextDefinition = acceptedMappings.length > 0 || currentOverlay?.active_revision?.definition_json ? mergeOverlayDefinition({ ticker: normalizedTicker, pack, current: currentOverlay?.active_revision?.definition_json ?? null, accepted: acceptedMappings, }) : null; const diagnostics: IssuerOverlayDiagnostics = { pack, sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id), acceptedMappings, rejectedMappings, }; const stats: IssuerOverlayStats = { pack, sampledSnapshotCount: sampleSnapshots.length, sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id), acceptedMappingCount: acceptedMappings.length, rejectedMappingCount: rejectedMappings.length, publishedRevisionNumber: currentOverlay?.active_revision?.revision_number ?? null, }; if (!nextDefinition || nextDefinition.mappings.length === 0) { const overlay = await markIssuerOverlayBuildState({ ticker: normalizedTicker, status: currentOverlay?.active_revision_id ? "active" : "empty", stats, activeRevisionId: currentOverlay?.active_revision_id ?? null, }); return { published: false, activeRevisionId: overlay?.active_revision_id ?? null, sampledSnapshotIds: diagnostics.sampledSnapshotIds, }; } const published = await publishIssuerOverlayRevision({ ticker: normalizedTicker, definition: nextDefinition, diagnostics, stats, }); stats.publishedRevisionNumber = published.revision.revision_number; await markIssuerOverlayBuildState({ ticker: normalizedTicker, status: "active", stats, activeRevisionId: published.revision.id, }); return { published: published.published, activeRevisionId: published.revision.id, sampledSnapshotIds: diagnostics.sampledSnapshotIds, }; } export async function recordIssuerOverlayBuildFailure( ticker: string, error: unknown, ) { await markIssuerOverlayBuildState({ ticker, status: "error", lastError: error instanceof Error ? error.message : String(error), }); }