Files
Neon-Desk/lib/server/issuer-overlays.ts

576 lines
16 KiB
TypeScript

import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";
import type {
IssuerOverlayDefinition,
IssuerOverlayDiagnostics,
IssuerOverlayStats,
} from "@/lib/server/db/schema";
import type {
FilingTaxonomyConceptRecord,
FilingTaxonomySnapshotRecord,
} from "@/lib/server/repos/filing-taxonomy";
import {
listFilingTaxonomyConceptsBySnapshotIds,
listFilingTaxonomySnapshotsByTicker,
} from "@/lib/server/repos/filing-taxonomy";
import {
ensureIssuerOverlayRow,
getIssuerOverlay,
markIssuerOverlayBuildState,
normalizeIssuerOverlayDefinition,
publishIssuerOverlayRevision,
} from "@/lib/server/repos/issuer-overlays";
type SurfaceFile = {
surfaces?: Array<{
surface_key: string;
statement: string;
allowed_source_concepts?: string[];
allowed_authoritative_concepts?: string[];
}>;
};
type SurfaceCatalogEntry = {
surface_key: string;
statement:
| "income"
| "balance"
| "cash_flow"
| "disclosure"
| "equity"
| "comprehensive_income";
sourceConcepts: Set<string>;
authoritativeConcepts: Set<string>;
};
type AcceptedCandidate = {
qname: string;
surface_key: string;
statement: SurfaceCatalogEntry["statement"];
reason: "authoritative_match" | "local_name_match";
source_snapshot_ids: number[];
};
type RejectedCandidate = {
qname: string;
reason: string;
source_snapshot_ids: number[];
};
function normalizeTicker(ticker: string) {
return ticker.trim().toUpperCase();
}
function normalizeLocalName(value: string | null | undefined) {
const normalized = value?.trim() ?? "";
if (!normalized) {
return null;
}
const localName = normalized.includes(":")
? normalized.split(":").pop() ?? normalized
: normalized.includes("#")
? normalized.split("#").pop() ?? normalized
: normalized;
return localName.trim().toLowerCase() || null;
}
function taxonomyFilePath(fileName: string) {
return join(process.cwd(), "rust", "taxonomy", "fiscal", "v1", fileName);
}
function loadOptionalSurfaceFile(fileName: string) {
const path = taxonomyFilePath(fileName);
if (!existsSync(path)) {
return null;
}
return JSON.parse(readFileSync(path, "utf8")) as SurfaceFile;
}
function applySurfaceFile(
catalog: Map<string, SurfaceCatalogEntry>,
file: SurfaceFile | null,
) {
for (const surface of file?.surfaces ?? []) {
if (
surface.statement !== "income" &&
surface.statement !== "balance" &&
surface.statement !== "cash_flow" &&
surface.statement !== "disclosure" &&
surface.statement !== "equity" &&
surface.statement !== "comprehensive_income"
) {
continue;
}
const existing = catalog.get(surface.surface_key) ?? {
surface_key: surface.surface_key,
statement: surface.statement,
sourceConcepts: new Set<string>(),
authoritativeConcepts: new Set<string>(),
};
for (const concept of surface.allowed_source_concepts ?? []) {
existing.sourceConcepts.add(concept);
}
for (const concept of surface.allowed_authoritative_concepts ?? []) {
existing.authoritativeConcepts.add(concept);
}
catalog.set(surface.surface_key, existing);
}
}
function loadSurfaceCatalog(pack: string | null) {
const normalizedPack = pack?.trim() || "core";
const catalog = new Map<string, SurfaceCatalogEntry>();
applySurfaceFile(catalog, loadOptionalSurfaceFile(`${normalizedPack}.surface.json`));
applySurfaceFile(
catalog,
loadOptionalSurfaceFile(`${normalizedPack}.disclosure.surface.json`),
);
if (normalizedPack !== "core") {
applySurfaceFile(catalog, loadOptionalSurfaceFile("core.surface.json"));
applySurfaceFile(
catalog,
loadOptionalSurfaceFile("core.disclosure.surface.json"),
);
}
return catalog;
}
function buildSurfaceIndexes(catalog: Map<string, SurfaceCatalogEntry>) {
const authoritative = new Map<string, Set<string>>();
const localNames = new Map<string, Set<string>>();
for (const entry of catalog.values()) {
for (const concept of entry.authoritativeConcepts) {
const localName = normalizeLocalName(concept);
if (!localName) {
continue;
}
const surfaces = authoritative.get(localName) ?? new Set<string>();
surfaces.add(entry.surface_key);
authoritative.set(localName, surfaces);
}
for (const concept of [...entry.sourceConcepts, ...entry.authoritativeConcepts]) {
const localName = normalizeLocalName(concept);
if (!localName) {
continue;
}
const surfaces = localNames.get(localName) ?? new Set<string>();
surfaces.add(entry.surface_key);
localNames.set(localName, surfaces);
}
}
return { authoritative, localNames };
}
function selectWorkingPack(snapshots: FilingTaxonomySnapshotRecord[]) {
const counts = new Map<string, number>();
for (const snapshot of snapshots) {
const pack = snapshot.fiscal_pack?.trim();
if (!pack) {
continue;
}
counts.set(pack, (counts.get(pack) ?? 0) + 1);
}
return (
[...counts.entries()].sort(
(left, right) => right[1] - left[1] || left[0].localeCompare(right[0]),
)[0]?.[0] ??
snapshots[0]?.fiscal_pack ??
"core"
);
}
function sourceSnapshotIds(records: FilingTaxonomyConceptRecord[]) {
return [...new Set(records.map((record) => record.snapshot_id))].sort(
(left, right) => left - right,
);
}
function chooseCandidate(
records: FilingTaxonomyConceptRecord[],
sampleSnapshots: FilingTaxonomySnapshotRecord[],
catalog: Map<string, SurfaceCatalogEntry>,
) {
const statementKinds = [
...new Set(
records
.map((record) => record.statement_kind)
.filter((value): value is NonNullable<typeof value> => value !== null),
),
];
if (statementKinds.length !== 1) {
return {
accepted: null,
rejected: {
qname: records[0]?.qname ?? "unknown",
reason: "inconsistent_statement_kind",
source_snapshot_ids: sourceSnapshotIds(records),
} satisfies RejectedCandidate,
};
}
const distinctSnapshotIds = sourceSnapshotIds(records);
if (distinctSnapshotIds.length < 2) {
return {
accepted: null,
rejected: {
qname: records[0]?.qname ?? "unknown",
reason: "insufficient_recurrence",
source_snapshot_ids: distinctSnapshotIds,
} satisfies RejectedCandidate,
};
}
const catalogIndexes = buildSurfaceIndexes(catalog);
const statementKind = statementKinds[0];
const authoritativeNames = [
...new Set(
records
.map((record) => normalizeLocalName(record.authoritative_concept_key))
.filter((value): value is string => value !== null),
),
];
let matchedSurfaceKey: string | null = null;
let reason: AcceptedCandidate["reason"] | null = null;
if (authoritativeNames.length === 1) {
const candidates = [
...(catalogIndexes.authoritative.get(authoritativeNames[0]) ?? new Set()),
];
const matchingCandidates = candidates.filter((surfaceKey) => {
return catalog.get(surfaceKey)?.statement === statementKind;
});
if (matchingCandidates.length === 1) {
matchedSurfaceKey = matchingCandidates[0] ?? null;
reason = "authoritative_match";
}
}
if (!matchedSurfaceKey) {
const localNames = [
...new Set(
records
.map((record) => normalizeLocalName(record.local_name))
.filter((value): value is string => value !== null),
),
];
if (localNames.length === 1) {
const candidates = [
...(catalogIndexes.localNames.get(localNames[0]) ?? new Set()),
];
const matchingCandidates = candidates.filter((surfaceKey) => {
return catalog.get(surfaceKey)?.statement === statementKind;
});
if (matchingCandidates.length === 1) {
matchedSurfaceKey = matchingCandidates[0] ?? null;
reason = "local_name_match";
}
}
}
if (!matchedSurfaceKey || !reason) {
return {
accepted: null,
rejected: {
qname: records[0]?.qname ?? "unknown",
reason: "no_unique_surface_match",
source_snapshot_ids: distinctSnapshotIds,
} satisfies RejectedCandidate,
};
}
const surface = catalog.get(matchedSurfaceKey);
if (!surface) {
return {
accepted: null,
rejected: {
qname: records[0]?.qname ?? "unknown",
reason: "unknown_surface_key",
source_snapshot_ids: distinctSnapshotIds,
} satisfies RejectedCandidate,
};
}
const latestTenKSnapshotId = sampleSnapshots.find(
(snapshot) => snapshot.filing_type === "10-K",
)?.id;
if (
distinctSnapshotIds.length < 2 &&
!(
latestTenKSnapshotId !== undefined &&
distinctSnapshotIds.includes(latestTenKSnapshotId)
)
) {
return {
accepted: null,
rejected: {
qname: records[0]?.qname ?? "unknown",
reason: "promotion_threshold_not_met",
source_snapshot_ids: distinctSnapshotIds,
} satisfies RejectedCandidate,
};
}
return {
accepted: {
qname: records[0]?.qname ?? "unknown",
surface_key: surface.surface_key,
statement: surface.statement,
reason,
source_snapshot_ids: distinctSnapshotIds,
} satisfies AcceptedCandidate,
rejected: null,
};
}
function mergeOverlayDefinition(input: {
ticker: string;
pack: string | null;
current: IssuerOverlayDefinition | null;
accepted: AcceptedCandidate[];
}) {
const mappings = new Map<
string,
{
surface_key: string;
statement: IssuerOverlayDefinition["mappings"][number]["statement"];
allowed_source_concepts: Set<string>;
allowed_authoritative_concepts: Set<string>;
}
>();
for (const mapping of input.current?.mappings ?? []) {
const key = `${mapping.statement}:${mapping.surface_key}`;
mappings.set(key, {
surface_key: mapping.surface_key,
statement: mapping.statement,
allowed_source_concepts: new Set(mapping.allowed_source_concepts),
allowed_authoritative_concepts: new Set(
mapping.allowed_authoritative_concepts,
),
});
}
for (const candidate of input.accepted) {
const key = `${candidate.statement}:${candidate.surface_key}`;
const existing = mappings.get(key) ?? {
surface_key: candidate.surface_key,
statement: candidate.statement,
allowed_source_concepts: new Set<string>(),
allowed_authoritative_concepts: new Set<string>(),
};
existing.allowed_source_concepts.add(candidate.qname);
mappings.set(key, existing);
}
return normalizeIssuerOverlayDefinition({
version: "fiscal-v1",
ticker: input.ticker,
pack: input.current?.pack ?? input.pack,
mappings: [...mappings.values()].map((mapping) => ({
surface_key: mapping.surface_key,
statement: mapping.statement,
allowed_source_concepts: [...mapping.allowed_source_concepts],
allowed_authoritative_concepts: [...mapping.allowed_authoritative_concepts],
})),
});
}
export async function shouldQueueTickerAutomation(ticker: string) {
const normalizedTicker = normalizeTicker(ticker);
if (!normalizedTicker) {
return false;
}
const [overlay, snapshotResult] = await Promise.all([
getIssuerOverlay(normalizedTicker),
listFilingTaxonomySnapshotsByTicker({
ticker: normalizedTicker,
window: "all",
filingTypes: ["10-K", "10-Q"],
limit: 5,
}),
]);
const latestReadySnapshot =
snapshotResult.snapshots.find((snapshot) => snapshot.parse_status === "ready") ??
null;
if (!overlay || !latestReadySnapshot) {
return true;
}
if (overlay.status === "error") {
return true;
}
if (overlay.status === "empty") {
return overlay.last_built_at === null;
}
return (
overlay.active_revision_id !== latestReadySnapshot.issuer_overlay_revision_id
);
}
export async function generateIssuerOverlayForTicker(ticker: string) {
const normalizedTicker = normalizeTicker(ticker);
const currentOverlay = await getIssuerOverlay(normalizedTicker);
const threeYearsAgo = new Date();
threeYearsAgo.setUTCFullYear(threeYearsAgo.getUTCFullYear() - 3);
const snapshotResult = await listFilingTaxonomySnapshotsByTicker({
ticker: normalizedTicker,
window: "all",
filingTypes: ["10-K", "10-Q"],
limit: 24,
});
const sampleSnapshots = snapshotResult.snapshots
.filter(
(snapshot) =>
snapshot.parse_status === "ready" &&
Date.parse(snapshot.filing_date) >= threeYearsAgo.getTime(),
)
.slice(0, 12);
await ensureIssuerOverlayRow(normalizedTicker);
if (sampleSnapshots.length === 0) {
const stats: IssuerOverlayStats = {
pack: null,
sampledSnapshotCount: 0,
sampledSnapshotIds: [],
acceptedMappingCount: 0,
rejectedMappingCount: 0,
publishedRevisionNumber: null,
};
await markIssuerOverlayBuildState({
ticker: normalizedTicker,
status: currentOverlay?.active_revision_id ? "active" : "empty",
stats,
activeRevisionId: currentOverlay?.active_revision_id ?? null,
});
return {
published: false,
activeRevisionId: currentOverlay?.active_revision_id ?? null,
sampledSnapshotIds: [],
};
}
const pack = selectWorkingPack(sampleSnapshots);
const catalog = loadSurfaceCatalog(pack);
const conceptRows = (
await listFilingTaxonomyConceptsBySnapshotIds(
sampleSnapshots.map((snapshot) => snapshot.id),
)
).filter((record) => {
return (
record.is_extension &&
record.residual_flag &&
!record.is_abstract &&
record.statement_kind !== null
);
});
const grouped = new Map<string, FilingTaxonomyConceptRecord[]>();
for (const record of conceptRows) {
const existing = grouped.get(record.qname) ?? [];
existing.push(record);
grouped.set(record.qname, existing);
}
const acceptedMappings: AcceptedCandidate[] = [];
const rejectedMappings: RejectedCandidate[] = [];
for (const records of grouped.values()) {
const selection = chooseCandidate(records, sampleSnapshots, catalog);
if (selection.accepted) {
acceptedMappings.push(selection.accepted);
} else if (selection.rejected) {
rejectedMappings.push(selection.rejected);
}
}
const nextDefinition =
acceptedMappings.length > 0 || currentOverlay?.active_revision?.definition_json
? mergeOverlayDefinition({
ticker: normalizedTicker,
pack,
current: currentOverlay?.active_revision?.definition_json ?? null,
accepted: acceptedMappings,
})
: null;
const diagnostics: IssuerOverlayDiagnostics = {
pack,
sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id),
acceptedMappings,
rejectedMappings,
};
const stats: IssuerOverlayStats = {
pack,
sampledSnapshotCount: sampleSnapshots.length,
sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id),
acceptedMappingCount: acceptedMappings.length,
rejectedMappingCount: rejectedMappings.length,
publishedRevisionNumber:
currentOverlay?.active_revision?.revision_number ?? null,
};
if (!nextDefinition || nextDefinition.mappings.length === 0) {
const overlay = await markIssuerOverlayBuildState({
ticker: normalizedTicker,
status: currentOverlay?.active_revision_id ? "active" : "empty",
stats,
activeRevisionId: currentOverlay?.active_revision_id ?? null,
});
return {
published: false,
activeRevisionId: overlay?.active_revision_id ?? null,
sampledSnapshotIds: diagnostics.sampledSnapshotIds,
};
}
const published = await publishIssuerOverlayRevision({
ticker: normalizedTicker,
definition: nextDefinition,
diagnostics,
stats,
});
stats.publishedRevisionNumber = published.revision.revision_number;
await markIssuerOverlayBuildState({
ticker: normalizedTicker,
status: "active",
stats,
activeRevisionId: published.revision.id,
});
return {
published: published.published,
activeRevisionId: published.revision.id,
sampledSnapshotIds: diagnostics.sampledSnapshotIds,
};
}
export async function recordIssuerOverlayBuildFailure(
ticker: string,
error: unknown,
) {
await markIssuerOverlayBuildState({
ticker,
status: "error",
lastError: error instanceof Error ? error.message : String(error),
});
}