Automate issuer overlay creation from ticker searches
This commit is contained in:
575
lib/server/issuer-overlays.ts
Normal file
575
lib/server/issuer-overlays.ts
Normal file
@@ -0,0 +1,575 @@
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import type {
|
||||
IssuerOverlayDefinition,
|
||||
IssuerOverlayDiagnostics,
|
||||
IssuerOverlayStats,
|
||||
} from "@/lib/server/db/schema";
|
||||
import type {
|
||||
FilingTaxonomyConceptRecord,
|
||||
FilingTaxonomySnapshotRecord,
|
||||
} from "@/lib/server/repos/filing-taxonomy";
|
||||
import {
|
||||
listFilingTaxonomyConceptsBySnapshotIds,
|
||||
listFilingTaxonomySnapshotsByTicker,
|
||||
} from "@/lib/server/repos/filing-taxonomy";
|
||||
import {
|
||||
ensureIssuerOverlayRow,
|
||||
getIssuerOverlay,
|
||||
markIssuerOverlayBuildState,
|
||||
normalizeIssuerOverlayDefinition,
|
||||
publishIssuerOverlayRevision,
|
||||
} from "@/lib/server/repos/issuer-overlays";
|
||||
|
||||
type SurfaceFile = {
|
||||
surfaces?: Array<{
|
||||
surface_key: string;
|
||||
statement: string;
|
||||
allowed_source_concepts?: string[];
|
||||
allowed_authoritative_concepts?: string[];
|
||||
}>;
|
||||
};
|
||||
|
||||
type SurfaceCatalogEntry = {
|
||||
surface_key: string;
|
||||
statement:
|
||||
| "income"
|
||||
| "balance"
|
||||
| "cash_flow"
|
||||
| "disclosure"
|
||||
| "equity"
|
||||
| "comprehensive_income";
|
||||
sourceConcepts: Set<string>;
|
||||
authoritativeConcepts: Set<string>;
|
||||
};
|
||||
|
||||
type AcceptedCandidate = {
|
||||
qname: string;
|
||||
surface_key: string;
|
||||
statement: SurfaceCatalogEntry["statement"];
|
||||
reason: "authoritative_match" | "local_name_match";
|
||||
source_snapshot_ids: number[];
|
||||
};
|
||||
|
||||
type RejectedCandidate = {
|
||||
qname: string;
|
||||
reason: string;
|
||||
source_snapshot_ids: number[];
|
||||
};
|
||||
|
||||
function normalizeTicker(ticker: string) {
|
||||
return ticker.trim().toUpperCase();
|
||||
}
|
||||
|
||||
function normalizeLocalName(value: string | null | undefined) {
|
||||
const normalized = value?.trim() ?? "";
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const localName = normalized.includes(":")
|
||||
? normalized.split(":").pop() ?? normalized
|
||||
: normalized.includes("#")
|
||||
? normalized.split("#").pop() ?? normalized
|
||||
: normalized;
|
||||
return localName.trim().toLowerCase() || null;
|
||||
}
|
||||
|
||||
function taxonomyFilePath(fileName: string) {
|
||||
return join(process.cwd(), "rust", "taxonomy", "fiscal", "v1", fileName);
|
||||
}
|
||||
|
||||
function loadOptionalSurfaceFile(fileName: string) {
|
||||
const path = taxonomyFilePath(fileName);
|
||||
if (!existsSync(path)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return JSON.parse(readFileSync(path, "utf8")) as SurfaceFile;
|
||||
}
|
||||
|
||||
function applySurfaceFile(
|
||||
catalog: Map<string, SurfaceCatalogEntry>,
|
||||
file: SurfaceFile | null,
|
||||
) {
|
||||
for (const surface of file?.surfaces ?? []) {
|
||||
if (
|
||||
surface.statement !== "income" &&
|
||||
surface.statement !== "balance" &&
|
||||
surface.statement !== "cash_flow" &&
|
||||
surface.statement !== "disclosure" &&
|
||||
surface.statement !== "equity" &&
|
||||
surface.statement !== "comprehensive_income"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = catalog.get(surface.surface_key) ?? {
|
||||
surface_key: surface.surface_key,
|
||||
statement: surface.statement,
|
||||
sourceConcepts: new Set<string>(),
|
||||
authoritativeConcepts: new Set<string>(),
|
||||
};
|
||||
for (const concept of surface.allowed_source_concepts ?? []) {
|
||||
existing.sourceConcepts.add(concept);
|
||||
}
|
||||
for (const concept of surface.allowed_authoritative_concepts ?? []) {
|
||||
existing.authoritativeConcepts.add(concept);
|
||||
}
|
||||
catalog.set(surface.surface_key, existing);
|
||||
}
|
||||
}
|
||||
|
||||
function loadSurfaceCatalog(pack: string | null) {
|
||||
const normalizedPack = pack?.trim() || "core";
|
||||
const catalog = new Map<string, SurfaceCatalogEntry>();
|
||||
|
||||
applySurfaceFile(catalog, loadOptionalSurfaceFile(`${normalizedPack}.surface.json`));
|
||||
applySurfaceFile(
|
||||
catalog,
|
||||
loadOptionalSurfaceFile(`${normalizedPack}.disclosure.surface.json`),
|
||||
);
|
||||
|
||||
if (normalizedPack !== "core") {
|
||||
applySurfaceFile(catalog, loadOptionalSurfaceFile("core.surface.json"));
|
||||
applySurfaceFile(
|
||||
catalog,
|
||||
loadOptionalSurfaceFile("core.disclosure.surface.json"),
|
||||
);
|
||||
}
|
||||
|
||||
return catalog;
|
||||
}
|
||||
|
||||
function buildSurfaceIndexes(catalog: Map<string, SurfaceCatalogEntry>) {
|
||||
const authoritative = new Map<string, Set<string>>();
|
||||
const localNames = new Map<string, Set<string>>();
|
||||
|
||||
for (const entry of catalog.values()) {
|
||||
for (const concept of entry.authoritativeConcepts) {
|
||||
const localName = normalizeLocalName(concept);
|
||||
if (!localName) {
|
||||
continue;
|
||||
}
|
||||
const surfaces = authoritative.get(localName) ?? new Set<string>();
|
||||
surfaces.add(entry.surface_key);
|
||||
authoritative.set(localName, surfaces);
|
||||
}
|
||||
|
||||
for (const concept of [...entry.sourceConcepts, ...entry.authoritativeConcepts]) {
|
||||
const localName = normalizeLocalName(concept);
|
||||
if (!localName) {
|
||||
continue;
|
||||
}
|
||||
const surfaces = localNames.get(localName) ?? new Set<string>();
|
||||
surfaces.add(entry.surface_key);
|
||||
localNames.set(localName, surfaces);
|
||||
}
|
||||
}
|
||||
|
||||
return { authoritative, localNames };
|
||||
}
|
||||
|
||||
function selectWorkingPack(snapshots: FilingTaxonomySnapshotRecord[]) {
|
||||
const counts = new Map<string, number>();
|
||||
for (const snapshot of snapshots) {
|
||||
const pack = snapshot.fiscal_pack?.trim();
|
||||
if (!pack) {
|
||||
continue;
|
||||
}
|
||||
counts.set(pack, (counts.get(pack) ?? 0) + 1);
|
||||
}
|
||||
|
||||
return (
|
||||
[...counts.entries()].sort(
|
||||
(left, right) => right[1] - left[1] || left[0].localeCompare(right[0]),
|
||||
)[0]?.[0] ??
|
||||
snapshots[0]?.fiscal_pack ??
|
||||
"core"
|
||||
);
|
||||
}
|
||||
|
||||
function sourceSnapshotIds(records: FilingTaxonomyConceptRecord[]) {
|
||||
return [...new Set(records.map((record) => record.snapshot_id))].sort(
|
||||
(left, right) => left - right,
|
||||
);
|
||||
}
|
||||
|
||||
function chooseCandidate(
|
||||
records: FilingTaxonomyConceptRecord[],
|
||||
sampleSnapshots: FilingTaxonomySnapshotRecord[],
|
||||
catalog: Map<string, SurfaceCatalogEntry>,
|
||||
) {
|
||||
const statementKinds = [
|
||||
...new Set(
|
||||
records
|
||||
.map((record) => record.statement_kind)
|
||||
.filter((value): value is NonNullable<typeof value> => value !== null),
|
||||
),
|
||||
];
|
||||
if (statementKinds.length !== 1) {
|
||||
return {
|
||||
accepted: null,
|
||||
rejected: {
|
||||
qname: records[0]?.qname ?? "unknown",
|
||||
reason: "inconsistent_statement_kind",
|
||||
source_snapshot_ids: sourceSnapshotIds(records),
|
||||
} satisfies RejectedCandidate,
|
||||
};
|
||||
}
|
||||
|
||||
const distinctSnapshotIds = sourceSnapshotIds(records);
|
||||
if (distinctSnapshotIds.length < 2) {
|
||||
return {
|
||||
accepted: null,
|
||||
rejected: {
|
||||
qname: records[0]?.qname ?? "unknown",
|
||||
reason: "insufficient_recurrence",
|
||||
source_snapshot_ids: distinctSnapshotIds,
|
||||
} satisfies RejectedCandidate,
|
||||
};
|
||||
}
|
||||
|
||||
const catalogIndexes = buildSurfaceIndexes(catalog);
|
||||
const statementKind = statementKinds[0];
|
||||
const authoritativeNames = [
|
||||
...new Set(
|
||||
records
|
||||
.map((record) => normalizeLocalName(record.authoritative_concept_key))
|
||||
.filter((value): value is string => value !== null),
|
||||
),
|
||||
];
|
||||
|
||||
let matchedSurfaceKey: string | null = null;
|
||||
let reason: AcceptedCandidate["reason"] | null = null;
|
||||
|
||||
if (authoritativeNames.length === 1) {
|
||||
const candidates = [
|
||||
...(catalogIndexes.authoritative.get(authoritativeNames[0]) ?? new Set()),
|
||||
];
|
||||
const matchingCandidates = candidates.filter((surfaceKey) => {
|
||||
return catalog.get(surfaceKey)?.statement === statementKind;
|
||||
});
|
||||
if (matchingCandidates.length === 1) {
|
||||
matchedSurfaceKey = matchingCandidates[0] ?? null;
|
||||
reason = "authoritative_match";
|
||||
}
|
||||
}
|
||||
|
||||
if (!matchedSurfaceKey) {
|
||||
const localNames = [
|
||||
...new Set(
|
||||
records
|
||||
.map((record) => normalizeLocalName(record.local_name))
|
||||
.filter((value): value is string => value !== null),
|
||||
),
|
||||
];
|
||||
|
||||
if (localNames.length === 1) {
|
||||
const candidates = [
|
||||
...(catalogIndexes.localNames.get(localNames[0]) ?? new Set()),
|
||||
];
|
||||
const matchingCandidates = candidates.filter((surfaceKey) => {
|
||||
return catalog.get(surfaceKey)?.statement === statementKind;
|
||||
});
|
||||
if (matchingCandidates.length === 1) {
|
||||
matchedSurfaceKey = matchingCandidates[0] ?? null;
|
||||
reason = "local_name_match";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!matchedSurfaceKey || !reason) {
|
||||
return {
|
||||
accepted: null,
|
||||
rejected: {
|
||||
qname: records[0]?.qname ?? "unknown",
|
||||
reason: "no_unique_surface_match",
|
||||
source_snapshot_ids: distinctSnapshotIds,
|
||||
} satisfies RejectedCandidate,
|
||||
};
|
||||
}
|
||||
|
||||
const surface = catalog.get(matchedSurfaceKey);
|
||||
if (!surface) {
|
||||
return {
|
||||
accepted: null,
|
||||
rejected: {
|
||||
qname: records[0]?.qname ?? "unknown",
|
||||
reason: "unknown_surface_key",
|
||||
source_snapshot_ids: distinctSnapshotIds,
|
||||
} satisfies RejectedCandidate,
|
||||
};
|
||||
}
|
||||
|
||||
const latestTenKSnapshotId = sampleSnapshots.find(
|
||||
(snapshot) => snapshot.filing_type === "10-K",
|
||||
)?.id;
|
||||
if (
|
||||
distinctSnapshotIds.length < 2 &&
|
||||
!(
|
||||
latestTenKSnapshotId !== undefined &&
|
||||
distinctSnapshotIds.includes(latestTenKSnapshotId)
|
||||
)
|
||||
) {
|
||||
return {
|
||||
accepted: null,
|
||||
rejected: {
|
||||
qname: records[0]?.qname ?? "unknown",
|
||||
reason: "promotion_threshold_not_met",
|
||||
source_snapshot_ids: distinctSnapshotIds,
|
||||
} satisfies RejectedCandidate,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
accepted: {
|
||||
qname: records[0]?.qname ?? "unknown",
|
||||
surface_key: surface.surface_key,
|
||||
statement: surface.statement,
|
||||
reason,
|
||||
source_snapshot_ids: distinctSnapshotIds,
|
||||
} satisfies AcceptedCandidate,
|
||||
rejected: null,
|
||||
};
|
||||
}
|
||||
|
||||
function mergeOverlayDefinition(input: {
|
||||
ticker: string;
|
||||
pack: string | null;
|
||||
current: IssuerOverlayDefinition | null;
|
||||
accepted: AcceptedCandidate[];
|
||||
}) {
|
||||
const mappings = new Map<
|
||||
string,
|
||||
{
|
||||
surface_key: string;
|
||||
statement: IssuerOverlayDefinition["mappings"][number]["statement"];
|
||||
allowed_source_concepts: Set<string>;
|
||||
allowed_authoritative_concepts: Set<string>;
|
||||
}
|
||||
>();
|
||||
|
||||
for (const mapping of input.current?.mappings ?? []) {
|
||||
const key = `${mapping.statement}:${mapping.surface_key}`;
|
||||
mappings.set(key, {
|
||||
surface_key: mapping.surface_key,
|
||||
statement: mapping.statement,
|
||||
allowed_source_concepts: new Set(mapping.allowed_source_concepts),
|
||||
allowed_authoritative_concepts: new Set(
|
||||
mapping.allowed_authoritative_concepts,
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
for (const candidate of input.accepted) {
|
||||
const key = `${candidate.statement}:${candidate.surface_key}`;
|
||||
const existing = mappings.get(key) ?? {
|
||||
surface_key: candidate.surface_key,
|
||||
statement: candidate.statement,
|
||||
allowed_source_concepts: new Set<string>(),
|
||||
allowed_authoritative_concepts: new Set<string>(),
|
||||
};
|
||||
existing.allowed_source_concepts.add(candidate.qname);
|
||||
mappings.set(key, existing);
|
||||
}
|
||||
|
||||
return normalizeIssuerOverlayDefinition({
|
||||
version: "fiscal-v1",
|
||||
ticker: input.ticker,
|
||||
pack: input.current?.pack ?? input.pack,
|
||||
mappings: [...mappings.values()].map((mapping) => ({
|
||||
surface_key: mapping.surface_key,
|
||||
statement: mapping.statement,
|
||||
allowed_source_concepts: [...mapping.allowed_source_concepts],
|
||||
allowed_authoritative_concepts: [...mapping.allowed_authoritative_concepts],
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
export async function shouldQueueTickerAutomation(ticker: string) {
|
||||
const normalizedTicker = normalizeTicker(ticker);
|
||||
if (!normalizedTicker) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const [overlay, snapshotResult] = await Promise.all([
|
||||
getIssuerOverlay(normalizedTicker),
|
||||
listFilingTaxonomySnapshotsByTicker({
|
||||
ticker: normalizedTicker,
|
||||
window: "all",
|
||||
filingTypes: ["10-K", "10-Q"],
|
||||
limit: 5,
|
||||
}),
|
||||
]);
|
||||
|
||||
const latestReadySnapshot =
|
||||
snapshotResult.snapshots.find((snapshot) => snapshot.parse_status === "ready") ??
|
||||
null;
|
||||
|
||||
if (!overlay || !latestReadySnapshot) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (overlay.status === "error") {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (overlay.status === "empty") {
|
||||
return overlay.last_built_at === null;
|
||||
}
|
||||
|
||||
return (
|
||||
overlay.active_revision_id !== latestReadySnapshot.issuer_overlay_revision_id
|
||||
);
|
||||
}
|
||||
|
||||
export async function generateIssuerOverlayForTicker(ticker: string) {
|
||||
const normalizedTicker = normalizeTicker(ticker);
|
||||
const currentOverlay = await getIssuerOverlay(normalizedTicker);
|
||||
const threeYearsAgo = new Date();
|
||||
threeYearsAgo.setUTCFullYear(threeYearsAgo.getUTCFullYear() - 3);
|
||||
|
||||
const snapshotResult = await listFilingTaxonomySnapshotsByTicker({
|
||||
ticker: normalizedTicker,
|
||||
window: "all",
|
||||
filingTypes: ["10-K", "10-Q"],
|
||||
limit: 24,
|
||||
});
|
||||
const sampleSnapshots = snapshotResult.snapshots
|
||||
.filter(
|
||||
(snapshot) =>
|
||||
snapshot.parse_status === "ready" &&
|
||||
Date.parse(snapshot.filing_date) >= threeYearsAgo.getTime(),
|
||||
)
|
||||
.slice(0, 12);
|
||||
|
||||
await ensureIssuerOverlayRow(normalizedTicker);
|
||||
|
||||
if (sampleSnapshots.length === 0) {
|
||||
const stats: IssuerOverlayStats = {
|
||||
pack: null,
|
||||
sampledSnapshotCount: 0,
|
||||
sampledSnapshotIds: [],
|
||||
acceptedMappingCount: 0,
|
||||
rejectedMappingCount: 0,
|
||||
publishedRevisionNumber: null,
|
||||
};
|
||||
await markIssuerOverlayBuildState({
|
||||
ticker: normalizedTicker,
|
||||
status: currentOverlay?.active_revision_id ? "active" : "empty",
|
||||
stats,
|
||||
activeRevisionId: currentOverlay?.active_revision_id ?? null,
|
||||
});
|
||||
return {
|
||||
published: false,
|
||||
activeRevisionId: currentOverlay?.active_revision_id ?? null,
|
||||
sampledSnapshotIds: [],
|
||||
};
|
||||
}
|
||||
|
||||
const pack = selectWorkingPack(sampleSnapshots);
|
||||
const catalog = loadSurfaceCatalog(pack);
|
||||
const conceptRows = (
|
||||
await listFilingTaxonomyConceptsBySnapshotIds(
|
||||
sampleSnapshots.map((snapshot) => snapshot.id),
|
||||
)
|
||||
).filter((record) => {
|
||||
return (
|
||||
record.is_extension &&
|
||||
record.residual_flag &&
|
||||
!record.is_abstract &&
|
||||
record.statement_kind !== null
|
||||
);
|
||||
});
|
||||
|
||||
const grouped = new Map<string, FilingTaxonomyConceptRecord[]>();
|
||||
for (const record of conceptRows) {
|
||||
const existing = grouped.get(record.qname) ?? [];
|
||||
existing.push(record);
|
||||
grouped.set(record.qname, existing);
|
||||
}
|
||||
|
||||
const acceptedMappings: AcceptedCandidate[] = [];
|
||||
const rejectedMappings: RejectedCandidate[] = [];
|
||||
|
||||
for (const records of grouped.values()) {
|
||||
const selection = chooseCandidate(records, sampleSnapshots, catalog);
|
||||
if (selection.accepted) {
|
||||
acceptedMappings.push(selection.accepted);
|
||||
} else if (selection.rejected) {
|
||||
rejectedMappings.push(selection.rejected);
|
||||
}
|
||||
}
|
||||
|
||||
const nextDefinition =
|
||||
acceptedMappings.length > 0 || currentOverlay?.active_revision?.definition_json
|
||||
? mergeOverlayDefinition({
|
||||
ticker: normalizedTicker,
|
||||
pack,
|
||||
current: currentOverlay?.active_revision?.definition_json ?? null,
|
||||
accepted: acceptedMappings,
|
||||
})
|
||||
: null;
|
||||
|
||||
const diagnostics: IssuerOverlayDiagnostics = {
|
||||
pack,
|
||||
sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id),
|
||||
acceptedMappings,
|
||||
rejectedMappings,
|
||||
};
|
||||
const stats: IssuerOverlayStats = {
|
||||
pack,
|
||||
sampledSnapshotCount: sampleSnapshots.length,
|
||||
sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id),
|
||||
acceptedMappingCount: acceptedMappings.length,
|
||||
rejectedMappingCount: rejectedMappings.length,
|
||||
publishedRevisionNumber:
|
||||
currentOverlay?.active_revision?.revision_number ?? null,
|
||||
};
|
||||
|
||||
if (!nextDefinition || nextDefinition.mappings.length === 0) {
|
||||
const overlay = await markIssuerOverlayBuildState({
|
||||
ticker: normalizedTicker,
|
||||
status: currentOverlay?.active_revision_id ? "active" : "empty",
|
||||
stats,
|
||||
activeRevisionId: currentOverlay?.active_revision_id ?? null,
|
||||
});
|
||||
return {
|
||||
published: false,
|
||||
activeRevisionId: overlay?.active_revision_id ?? null,
|
||||
sampledSnapshotIds: diagnostics.sampledSnapshotIds,
|
||||
};
|
||||
}
|
||||
|
||||
const published = await publishIssuerOverlayRevision({
|
||||
ticker: normalizedTicker,
|
||||
definition: nextDefinition,
|
||||
diagnostics,
|
||||
stats,
|
||||
});
|
||||
stats.publishedRevisionNumber = published.revision.revision_number;
|
||||
await markIssuerOverlayBuildState({
|
||||
ticker: normalizedTicker,
|
||||
status: "active",
|
||||
stats,
|
||||
activeRevisionId: published.revision.id,
|
||||
});
|
||||
|
||||
return {
|
||||
published: published.published,
|
||||
activeRevisionId: published.revision.id,
|
||||
sampledSnapshotIds: diagnostics.sampledSnapshotIds,
|
||||
};
|
||||
}
|
||||
|
||||
export async function recordIssuerOverlayBuildFailure(
|
||||
ticker: string,
|
||||
error: unknown,
|
||||
) {
|
||||
await markIssuerOverlayBuildState({
|
||||
ticker,
|
||||
status: "error",
|
||||
lastError: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user