576 lines
16 KiB
TypeScript
576 lines
16 KiB
TypeScript
import { existsSync, readFileSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
import type {
|
|
IssuerOverlayDefinition,
|
|
IssuerOverlayDiagnostics,
|
|
IssuerOverlayStats,
|
|
} from "@/lib/server/db/schema";
|
|
import type {
|
|
FilingTaxonomyConceptRecord,
|
|
FilingTaxonomySnapshotRecord,
|
|
} from "@/lib/server/repos/filing-taxonomy";
|
|
import {
|
|
listFilingTaxonomyConceptsBySnapshotIds,
|
|
listFilingTaxonomySnapshotsByTicker,
|
|
} from "@/lib/server/repos/filing-taxonomy";
|
|
import {
|
|
ensureIssuerOverlayRow,
|
|
getIssuerOverlay,
|
|
markIssuerOverlayBuildState,
|
|
normalizeIssuerOverlayDefinition,
|
|
publishIssuerOverlayRevision,
|
|
} from "@/lib/server/repos/issuer-overlays";
|
|
|
|
type SurfaceFile = {
|
|
surfaces?: Array<{
|
|
surface_key: string;
|
|
statement: string;
|
|
allowed_source_concepts?: string[];
|
|
allowed_authoritative_concepts?: string[];
|
|
}>;
|
|
};
|
|
|
|
type SurfaceCatalogEntry = {
|
|
surface_key: string;
|
|
statement:
|
|
| "income"
|
|
| "balance"
|
|
| "cash_flow"
|
|
| "disclosure"
|
|
| "equity"
|
|
| "comprehensive_income";
|
|
sourceConcepts: Set<string>;
|
|
authoritativeConcepts: Set<string>;
|
|
};
|
|
|
|
type AcceptedCandidate = {
|
|
qname: string;
|
|
surface_key: string;
|
|
statement: SurfaceCatalogEntry["statement"];
|
|
reason: "authoritative_match" | "local_name_match";
|
|
source_snapshot_ids: number[];
|
|
};
|
|
|
|
type RejectedCandidate = {
|
|
qname: string;
|
|
reason: string;
|
|
source_snapshot_ids: number[];
|
|
};
|
|
|
|
function normalizeTicker(ticker: string) {
|
|
return ticker.trim().toUpperCase();
|
|
}
|
|
|
|
function normalizeLocalName(value: string | null | undefined) {
|
|
const normalized = value?.trim() ?? "";
|
|
if (!normalized) {
|
|
return null;
|
|
}
|
|
|
|
const localName = normalized.includes(":")
|
|
? normalized.split(":").pop() ?? normalized
|
|
: normalized.includes("#")
|
|
? normalized.split("#").pop() ?? normalized
|
|
: normalized;
|
|
return localName.trim().toLowerCase() || null;
|
|
}
|
|
|
|
function taxonomyFilePath(fileName: string) {
|
|
return join(process.cwd(), "rust", "taxonomy", "fiscal", "v1", fileName);
|
|
}
|
|
|
|
function loadOptionalSurfaceFile(fileName: string) {
|
|
const path = taxonomyFilePath(fileName);
|
|
if (!existsSync(path)) {
|
|
return null;
|
|
}
|
|
|
|
return JSON.parse(readFileSync(path, "utf8")) as SurfaceFile;
|
|
}
|
|
|
|
function applySurfaceFile(
|
|
catalog: Map<string, SurfaceCatalogEntry>,
|
|
file: SurfaceFile | null,
|
|
) {
|
|
for (const surface of file?.surfaces ?? []) {
|
|
if (
|
|
surface.statement !== "income" &&
|
|
surface.statement !== "balance" &&
|
|
surface.statement !== "cash_flow" &&
|
|
surface.statement !== "disclosure" &&
|
|
surface.statement !== "equity" &&
|
|
surface.statement !== "comprehensive_income"
|
|
) {
|
|
continue;
|
|
}
|
|
|
|
const existing = catalog.get(surface.surface_key) ?? {
|
|
surface_key: surface.surface_key,
|
|
statement: surface.statement,
|
|
sourceConcepts: new Set<string>(),
|
|
authoritativeConcepts: new Set<string>(),
|
|
};
|
|
for (const concept of surface.allowed_source_concepts ?? []) {
|
|
existing.sourceConcepts.add(concept);
|
|
}
|
|
for (const concept of surface.allowed_authoritative_concepts ?? []) {
|
|
existing.authoritativeConcepts.add(concept);
|
|
}
|
|
catalog.set(surface.surface_key, existing);
|
|
}
|
|
}
|
|
|
|
function loadSurfaceCatalog(pack: string | null) {
|
|
const normalizedPack = pack?.trim() || "core";
|
|
const catalog = new Map<string, SurfaceCatalogEntry>();
|
|
|
|
applySurfaceFile(catalog, loadOptionalSurfaceFile(`${normalizedPack}.surface.json`));
|
|
applySurfaceFile(
|
|
catalog,
|
|
loadOptionalSurfaceFile(`${normalizedPack}.disclosure.surface.json`),
|
|
);
|
|
|
|
if (normalizedPack !== "core") {
|
|
applySurfaceFile(catalog, loadOptionalSurfaceFile("core.surface.json"));
|
|
applySurfaceFile(
|
|
catalog,
|
|
loadOptionalSurfaceFile("core.disclosure.surface.json"),
|
|
);
|
|
}
|
|
|
|
return catalog;
|
|
}
|
|
|
|
function buildSurfaceIndexes(catalog: Map<string, SurfaceCatalogEntry>) {
|
|
const authoritative = new Map<string, Set<string>>();
|
|
const localNames = new Map<string, Set<string>>();
|
|
|
|
for (const entry of catalog.values()) {
|
|
for (const concept of entry.authoritativeConcepts) {
|
|
const localName = normalizeLocalName(concept);
|
|
if (!localName) {
|
|
continue;
|
|
}
|
|
const surfaces = authoritative.get(localName) ?? new Set<string>();
|
|
surfaces.add(entry.surface_key);
|
|
authoritative.set(localName, surfaces);
|
|
}
|
|
|
|
for (const concept of [...entry.sourceConcepts, ...entry.authoritativeConcepts]) {
|
|
const localName = normalizeLocalName(concept);
|
|
if (!localName) {
|
|
continue;
|
|
}
|
|
const surfaces = localNames.get(localName) ?? new Set<string>();
|
|
surfaces.add(entry.surface_key);
|
|
localNames.set(localName, surfaces);
|
|
}
|
|
}
|
|
|
|
return { authoritative, localNames };
|
|
}
|
|
|
|
function selectWorkingPack(snapshots: FilingTaxonomySnapshotRecord[]) {
|
|
const counts = new Map<string, number>();
|
|
for (const snapshot of snapshots) {
|
|
const pack = snapshot.fiscal_pack?.trim();
|
|
if (!pack) {
|
|
continue;
|
|
}
|
|
counts.set(pack, (counts.get(pack) ?? 0) + 1);
|
|
}
|
|
|
|
return (
|
|
[...counts.entries()].sort(
|
|
(left, right) => right[1] - left[1] || left[0].localeCompare(right[0]),
|
|
)[0]?.[0] ??
|
|
snapshots[0]?.fiscal_pack ??
|
|
"core"
|
|
);
|
|
}
|
|
|
|
function sourceSnapshotIds(records: FilingTaxonomyConceptRecord[]) {
|
|
return [...new Set(records.map((record) => record.snapshot_id))].sort(
|
|
(left, right) => left - right,
|
|
);
|
|
}
|
|
|
|
function chooseCandidate(
|
|
records: FilingTaxonomyConceptRecord[],
|
|
sampleSnapshots: FilingTaxonomySnapshotRecord[],
|
|
catalog: Map<string, SurfaceCatalogEntry>,
|
|
) {
|
|
const statementKinds = [
|
|
...new Set(
|
|
records
|
|
.map((record) => record.statement_kind)
|
|
.filter((value): value is NonNullable<typeof value> => value !== null),
|
|
),
|
|
];
|
|
if (statementKinds.length !== 1) {
|
|
return {
|
|
accepted: null,
|
|
rejected: {
|
|
qname: records[0]?.qname ?? "unknown",
|
|
reason: "inconsistent_statement_kind",
|
|
source_snapshot_ids: sourceSnapshotIds(records),
|
|
} satisfies RejectedCandidate,
|
|
};
|
|
}
|
|
|
|
const distinctSnapshotIds = sourceSnapshotIds(records);
|
|
if (distinctSnapshotIds.length < 2) {
|
|
return {
|
|
accepted: null,
|
|
rejected: {
|
|
qname: records[0]?.qname ?? "unknown",
|
|
reason: "insufficient_recurrence",
|
|
source_snapshot_ids: distinctSnapshotIds,
|
|
} satisfies RejectedCandidate,
|
|
};
|
|
}
|
|
|
|
const catalogIndexes = buildSurfaceIndexes(catalog);
|
|
const statementKind = statementKinds[0];
|
|
const authoritativeNames = [
|
|
...new Set(
|
|
records
|
|
.map((record) => normalizeLocalName(record.authoritative_concept_key))
|
|
.filter((value): value is string => value !== null),
|
|
),
|
|
];
|
|
|
|
let matchedSurfaceKey: string | null = null;
|
|
let reason: AcceptedCandidate["reason"] | null = null;
|
|
|
|
if (authoritativeNames.length === 1) {
|
|
const candidates = [
|
|
...(catalogIndexes.authoritative.get(authoritativeNames[0]) ?? new Set()),
|
|
];
|
|
const matchingCandidates = candidates.filter((surfaceKey) => {
|
|
return catalog.get(surfaceKey)?.statement === statementKind;
|
|
});
|
|
if (matchingCandidates.length === 1) {
|
|
matchedSurfaceKey = matchingCandidates[0] ?? null;
|
|
reason = "authoritative_match";
|
|
}
|
|
}
|
|
|
|
if (!matchedSurfaceKey) {
|
|
const localNames = [
|
|
...new Set(
|
|
records
|
|
.map((record) => normalizeLocalName(record.local_name))
|
|
.filter((value): value is string => value !== null),
|
|
),
|
|
];
|
|
|
|
if (localNames.length === 1) {
|
|
const candidates = [
|
|
...(catalogIndexes.localNames.get(localNames[0]) ?? new Set()),
|
|
];
|
|
const matchingCandidates = candidates.filter((surfaceKey) => {
|
|
return catalog.get(surfaceKey)?.statement === statementKind;
|
|
});
|
|
if (matchingCandidates.length === 1) {
|
|
matchedSurfaceKey = matchingCandidates[0] ?? null;
|
|
reason = "local_name_match";
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!matchedSurfaceKey || !reason) {
|
|
return {
|
|
accepted: null,
|
|
rejected: {
|
|
qname: records[0]?.qname ?? "unknown",
|
|
reason: "no_unique_surface_match",
|
|
source_snapshot_ids: distinctSnapshotIds,
|
|
} satisfies RejectedCandidate,
|
|
};
|
|
}
|
|
|
|
const surface = catalog.get(matchedSurfaceKey);
|
|
if (!surface) {
|
|
return {
|
|
accepted: null,
|
|
rejected: {
|
|
qname: records[0]?.qname ?? "unknown",
|
|
reason: "unknown_surface_key",
|
|
source_snapshot_ids: distinctSnapshotIds,
|
|
} satisfies RejectedCandidate,
|
|
};
|
|
}
|
|
|
|
const latestTenKSnapshotId = sampleSnapshots.find(
|
|
(snapshot) => snapshot.filing_type === "10-K",
|
|
)?.id;
|
|
if (
|
|
distinctSnapshotIds.length < 2 &&
|
|
!(
|
|
latestTenKSnapshotId !== undefined &&
|
|
distinctSnapshotIds.includes(latestTenKSnapshotId)
|
|
)
|
|
) {
|
|
return {
|
|
accepted: null,
|
|
rejected: {
|
|
qname: records[0]?.qname ?? "unknown",
|
|
reason: "promotion_threshold_not_met",
|
|
source_snapshot_ids: distinctSnapshotIds,
|
|
} satisfies RejectedCandidate,
|
|
};
|
|
}
|
|
|
|
return {
|
|
accepted: {
|
|
qname: records[0]?.qname ?? "unknown",
|
|
surface_key: surface.surface_key,
|
|
statement: surface.statement,
|
|
reason,
|
|
source_snapshot_ids: distinctSnapshotIds,
|
|
} satisfies AcceptedCandidate,
|
|
rejected: null,
|
|
};
|
|
}
|
|
|
|
function mergeOverlayDefinition(input: {
|
|
ticker: string;
|
|
pack: string | null;
|
|
current: IssuerOverlayDefinition | null;
|
|
accepted: AcceptedCandidate[];
|
|
}) {
|
|
const mappings = new Map<
|
|
string,
|
|
{
|
|
surface_key: string;
|
|
statement: IssuerOverlayDefinition["mappings"][number]["statement"];
|
|
allowed_source_concepts: Set<string>;
|
|
allowed_authoritative_concepts: Set<string>;
|
|
}
|
|
>();
|
|
|
|
for (const mapping of input.current?.mappings ?? []) {
|
|
const key = `${mapping.statement}:${mapping.surface_key}`;
|
|
mappings.set(key, {
|
|
surface_key: mapping.surface_key,
|
|
statement: mapping.statement,
|
|
allowed_source_concepts: new Set(mapping.allowed_source_concepts),
|
|
allowed_authoritative_concepts: new Set(
|
|
mapping.allowed_authoritative_concepts,
|
|
),
|
|
});
|
|
}
|
|
|
|
for (const candidate of input.accepted) {
|
|
const key = `${candidate.statement}:${candidate.surface_key}`;
|
|
const existing = mappings.get(key) ?? {
|
|
surface_key: candidate.surface_key,
|
|
statement: candidate.statement,
|
|
allowed_source_concepts: new Set<string>(),
|
|
allowed_authoritative_concepts: new Set<string>(),
|
|
};
|
|
existing.allowed_source_concepts.add(candidate.qname);
|
|
mappings.set(key, existing);
|
|
}
|
|
|
|
return normalizeIssuerOverlayDefinition({
|
|
version: "fiscal-v1",
|
|
ticker: input.ticker,
|
|
pack: input.current?.pack ?? input.pack,
|
|
mappings: [...mappings.values()].map((mapping) => ({
|
|
surface_key: mapping.surface_key,
|
|
statement: mapping.statement,
|
|
allowed_source_concepts: [...mapping.allowed_source_concepts],
|
|
allowed_authoritative_concepts: [...mapping.allowed_authoritative_concepts],
|
|
})),
|
|
});
|
|
}
|
|
|
|
export async function shouldQueueTickerAutomation(ticker: string) {
|
|
const normalizedTicker = normalizeTicker(ticker);
|
|
if (!normalizedTicker) {
|
|
return false;
|
|
}
|
|
|
|
const [overlay, snapshotResult] = await Promise.all([
|
|
getIssuerOverlay(normalizedTicker),
|
|
listFilingTaxonomySnapshotsByTicker({
|
|
ticker: normalizedTicker,
|
|
window: "all",
|
|
filingTypes: ["10-K", "10-Q"],
|
|
limit: 5,
|
|
}),
|
|
]);
|
|
|
|
const latestReadySnapshot =
|
|
snapshotResult.snapshots.find((snapshot) => snapshot.parse_status === "ready") ??
|
|
null;
|
|
|
|
if (!overlay || !latestReadySnapshot) {
|
|
return true;
|
|
}
|
|
|
|
if (overlay.status === "error") {
|
|
return true;
|
|
}
|
|
|
|
if (overlay.status === "empty") {
|
|
return overlay.last_built_at === null;
|
|
}
|
|
|
|
return (
|
|
overlay.active_revision_id !== latestReadySnapshot.issuer_overlay_revision_id
|
|
);
|
|
}
|
|
|
|
export async function generateIssuerOverlayForTicker(ticker: string) {
|
|
const normalizedTicker = normalizeTicker(ticker);
|
|
const currentOverlay = await getIssuerOverlay(normalizedTicker);
|
|
const threeYearsAgo = new Date();
|
|
threeYearsAgo.setUTCFullYear(threeYearsAgo.getUTCFullYear() - 3);
|
|
|
|
const snapshotResult = await listFilingTaxonomySnapshotsByTicker({
|
|
ticker: normalizedTicker,
|
|
window: "all",
|
|
filingTypes: ["10-K", "10-Q"],
|
|
limit: 24,
|
|
});
|
|
const sampleSnapshots = snapshotResult.snapshots
|
|
.filter(
|
|
(snapshot) =>
|
|
snapshot.parse_status === "ready" &&
|
|
Date.parse(snapshot.filing_date) >= threeYearsAgo.getTime(),
|
|
)
|
|
.slice(0, 12);
|
|
|
|
await ensureIssuerOverlayRow(normalizedTicker);
|
|
|
|
if (sampleSnapshots.length === 0) {
|
|
const stats: IssuerOverlayStats = {
|
|
pack: null,
|
|
sampledSnapshotCount: 0,
|
|
sampledSnapshotIds: [],
|
|
acceptedMappingCount: 0,
|
|
rejectedMappingCount: 0,
|
|
publishedRevisionNumber: null,
|
|
};
|
|
await markIssuerOverlayBuildState({
|
|
ticker: normalizedTicker,
|
|
status: currentOverlay?.active_revision_id ? "active" : "empty",
|
|
stats,
|
|
activeRevisionId: currentOverlay?.active_revision_id ?? null,
|
|
});
|
|
return {
|
|
published: false,
|
|
activeRevisionId: currentOverlay?.active_revision_id ?? null,
|
|
sampledSnapshotIds: [],
|
|
};
|
|
}
|
|
|
|
const pack = selectWorkingPack(sampleSnapshots);
|
|
const catalog = loadSurfaceCatalog(pack);
|
|
const conceptRows = (
|
|
await listFilingTaxonomyConceptsBySnapshotIds(
|
|
sampleSnapshots.map((snapshot) => snapshot.id),
|
|
)
|
|
).filter((record) => {
|
|
return (
|
|
record.is_extension &&
|
|
record.residual_flag &&
|
|
!record.is_abstract &&
|
|
record.statement_kind !== null
|
|
);
|
|
});
|
|
|
|
const grouped = new Map<string, FilingTaxonomyConceptRecord[]>();
|
|
for (const record of conceptRows) {
|
|
const existing = grouped.get(record.qname) ?? [];
|
|
existing.push(record);
|
|
grouped.set(record.qname, existing);
|
|
}
|
|
|
|
const acceptedMappings: AcceptedCandidate[] = [];
|
|
const rejectedMappings: RejectedCandidate[] = [];
|
|
|
|
for (const records of grouped.values()) {
|
|
const selection = chooseCandidate(records, sampleSnapshots, catalog);
|
|
if (selection.accepted) {
|
|
acceptedMappings.push(selection.accepted);
|
|
} else if (selection.rejected) {
|
|
rejectedMappings.push(selection.rejected);
|
|
}
|
|
}
|
|
|
|
const nextDefinition =
|
|
acceptedMappings.length > 0 || currentOverlay?.active_revision?.definition_json
|
|
? mergeOverlayDefinition({
|
|
ticker: normalizedTicker,
|
|
pack,
|
|
current: currentOverlay?.active_revision?.definition_json ?? null,
|
|
accepted: acceptedMappings,
|
|
})
|
|
: null;
|
|
|
|
const diagnostics: IssuerOverlayDiagnostics = {
|
|
pack,
|
|
sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id),
|
|
acceptedMappings,
|
|
rejectedMappings,
|
|
};
|
|
const stats: IssuerOverlayStats = {
|
|
pack,
|
|
sampledSnapshotCount: sampleSnapshots.length,
|
|
sampledSnapshotIds: sampleSnapshots.map((snapshot) => snapshot.id),
|
|
acceptedMappingCount: acceptedMappings.length,
|
|
rejectedMappingCount: rejectedMappings.length,
|
|
publishedRevisionNumber:
|
|
currentOverlay?.active_revision?.revision_number ?? null,
|
|
};
|
|
|
|
if (!nextDefinition || nextDefinition.mappings.length === 0) {
|
|
const overlay = await markIssuerOverlayBuildState({
|
|
ticker: normalizedTicker,
|
|
status: currentOverlay?.active_revision_id ? "active" : "empty",
|
|
stats,
|
|
activeRevisionId: currentOverlay?.active_revision_id ?? null,
|
|
});
|
|
return {
|
|
published: false,
|
|
activeRevisionId: overlay?.active_revision_id ?? null,
|
|
sampledSnapshotIds: diagnostics.sampledSnapshotIds,
|
|
};
|
|
}
|
|
|
|
const published = await publishIssuerOverlayRevision({
|
|
ticker: normalizedTicker,
|
|
definition: nextDefinition,
|
|
diagnostics,
|
|
stats,
|
|
});
|
|
stats.publishedRevisionNumber = published.revision.revision_number;
|
|
await markIssuerOverlayBuildState({
|
|
ticker: normalizedTicker,
|
|
status: "active",
|
|
stats,
|
|
activeRevisionId: published.revision.id,
|
|
});
|
|
|
|
return {
|
|
published: published.published,
|
|
activeRevisionId: published.revision.id,
|
|
sampledSnapshotIds: diagnostics.sampledSnapshotIds,
|
|
};
|
|
}
|
|
|
|
export async function recordIssuerOverlayBuildFailure(
|
|
ticker: string,
|
|
error: unknown,
|
|
) {
|
|
await markIssuerOverlayBuildState({
|
|
ticker,
|
|
status: "error",
|
|
lastError: error instanceof Error ? error.message : String(error),
|
|
});
|
|
}
|