Automate issuer overlay creation from ticker searches

This commit is contained in:
2026-03-19 20:44:58 -04:00
parent 17de3dd72d
commit 391d6d34ce
79 changed files with 4746 additions and 695 deletions

View File

@@ -32,6 +32,14 @@ import {
import { createPortfolioInsight } from "@/lib/server/repos/insights";
import { updateTaskStage } from "@/lib/server/repos/tasks";
import { fetchPrimaryFilingText, fetchRecentFilings } from "@/lib/server/sec";
import {
generateIssuerOverlayForTicker,
recordIssuerOverlayBuildFailure,
} from "@/lib/server/issuer-overlays";
import {
getActiveIssuerOverlayDefinition,
getIssuerOverlay,
} from "@/lib/server/repos/issuer-overlays";
import { enqueueTask } from "@/lib/server/tasks";
import { hydrateFilingTaxonomySnapshot } from "@/lib/server/taxonomy/engine";
import { validateMetricsWithPdfLlm } from "@/lib/server/taxonomy/pdf-validation";
@@ -143,6 +151,12 @@ export type TaskExecutionOutcome = {
completionContext?: TaskStageContext | null;
};
type TaxonomyHydrationRunResult = {
hydrated: number;
failed: number;
touchedFilingIds: Set<number>;
};
function buildTaskOutcome(
result: unknown,
completionDetail: string,
@@ -155,6 +169,259 @@ function buildTaskOutcome(
};
}
function shouldRefreshTaxonomySnapshot(input: {
existingSnapshot: Awaited<
ReturnType<typeof getFilingTaxonomySnapshotByFilingId>
> | null;
filingUpdatedAt: string;
overlayRevisionId: number | null;
}) {
if (!input.existingSnapshot) {
return true;
}
if (
Date.parse(input.existingSnapshot.updated_at) <
Date.parse(input.filingUpdatedAt)
) {
return true;
}
return (
(input.existingSnapshot.issuer_overlay_revision_id ?? null) !==
input.overlayRevisionId
);
}
async function hydrateTaxonomySnapshotsForCandidates(input: {
task: Task;
ticker: string;
filingsCount: number;
saveResult: { inserted: number; updated: number };
candidates: Array<Filing & { filing_type: "10-K" | "10-Q" }>;
overlayRevisionId: number | null;
}) {
const overlayDefinition =
input.overlayRevisionId === null
? null
: await getActiveIssuerOverlayDefinition(input.ticker);
let hydrated = 0;
let failed = 0;
const touchedFilingIds = new Set<number>();
for (let index = 0; index < input.candidates.length; index += 1) {
const filing = input.candidates[index];
const existingSnapshot = await getFilingTaxonomySnapshotByFilingId(
filing.id,
);
if (
!shouldRefreshTaxonomySnapshot({
existingSnapshot,
filingUpdatedAt: filing.updated_at,
overlayRevisionId: input.overlayRevisionId,
})
) {
continue;
}
touchedFilingIds.add(filing.id);
const stageContext = (stage: TaskStage) =>
buildProgressContext({
current: index + 1,
total: input.candidates.length,
unit: "filings",
counters: {
fetched: input.filingsCount,
inserted: input.saveResult.inserted,
updated: input.saveResult.updated,
hydrated,
failed,
},
subject: {
ticker: input.ticker,
accessionNumber: filing.accession_number,
label: stage,
},
});
try {
await setProjectionStage(
input.task,
"sync.extract_taxonomy",
`Extracting XBRL taxonomy for ${filing.accession_number}`,
stageContext("sync.extract_taxonomy"),
);
const snapshot = await hydrateFilingTaxonomySnapshot({
filingId: filing.id,
ticker: filing.ticker,
cik: filing.cik,
accessionNumber: filing.accession_number,
filingDate: filing.filing_date,
filingType: filing.filing_type,
filingUrl: filing.filing_url,
primaryDocument: filing.primary_document ?? null,
issuerOverlay: overlayDefinition,
});
let pdfValidation = {
validation_result: snapshot.validation_result,
metric_validations: snapshot.metric_validations,
};
try {
pdfValidation = await validateMetricsWithPdfLlm({
metrics: snapshot.derived_metrics,
assets: snapshot.assets,
});
} catch (error) {
const message =
error instanceof Error
? error.message
: "PDF metric validation failed";
pdfValidation = {
validation_result: {
status: "error",
checks: [],
validatedAt: new Date().toISOString(),
},
metric_validations: snapshot.metric_validations.map((check) => ({
...check,
error: check.error ?? message,
})),
};
}
const normalizedSnapshot = {
...snapshot,
validation_result: pdfValidation.validation_result,
metric_validations: pdfValidation.metric_validations,
issuer_overlay_revision_id: input.overlayRevisionId,
...normalizeFilingTaxonomySnapshotPayload(snapshot),
};
await setProjectionStage(
input.task,
"sync.normalize_taxonomy",
`Materializing statements for ${filing.accession_number}`,
stageContext("sync.normalize_taxonomy"),
);
await setProjectionStage(
input.task,
"sync.derive_metrics",
`Deriving taxonomy metrics for ${filing.accession_number}`,
stageContext("sync.derive_metrics"),
);
await setProjectionStage(
input.task,
"sync.validate_pdf_metrics",
`Validating metrics via PDF + LLM for ${filing.accession_number}`,
stageContext("sync.validate_pdf_metrics"),
);
await setProjectionStage(
input.task,
"sync.persist_taxonomy",
`Persisting taxonomy snapshot for ${filing.accession_number}`,
stageContext("sync.persist_taxonomy"),
);
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
await updateFilingMetricsById(
filing.id,
normalizedSnapshot.derived_metrics,
);
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
hydrated += 1;
} catch (error) {
const now = new Date().toISOString();
await upsertFilingTaxonomySnapshot({
filing_id: filing.id,
ticker: filing.ticker,
filing_date: filing.filing_date,
filing_type: filing.filing_type,
parse_status: "failed",
parse_error:
error instanceof Error ? error.message : "Taxonomy hydration failed",
source: "legacy_html_fallback",
parser_engine: "fiscal-xbrl",
parser_version: "unknown",
taxonomy_regime: "unknown",
fiscal_pack: "core",
periods: [],
faithful_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
disclosure: [],
},
statement_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
disclosure: [],
},
surface_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
disclosure: [],
},
detail_rows: {
income: {},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {},
disclosure: {},
},
kpi_rows: [],
computed_definitions: [],
contexts: [],
derived_metrics: filing.metrics ?? null,
validation_result: {
status: "error",
checks: [],
validatedAt: now,
},
normalization_summary: {
surfaceRowCount: 0,
detailRowCount: 0,
kpiRowCount: 0,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
residualPrimaryCount: 0,
residualDisclosureCount: 0,
unsupportedConceptCount: 0,
issuerOverlayMatchCount: 0,
warnings: [],
},
issuer_overlay_revision_id: input.overlayRevisionId,
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
assets: [],
concepts: [],
facts: [],
metric_validations: [],
});
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
failed += 1;
}
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
}
return {
hydrated,
failed,
touchedFilingIds,
} satisfies TaxonomyHydrationRunResult;
}
async function setProjectionStage(
task: Task,
stage: TaskStage,
@@ -756,200 +1023,77 @@ async function processSyncFilings(task: Task) {
subject: tickerSubject,
}),
);
for (let index = 0; index < hydrateCandidates.length; index += 1) {
const filing = hydrateCandidates[index];
const existingSnapshot = await getFilingTaxonomySnapshotByFilingId(
filing.id,
);
const shouldRefresh =
!existingSnapshot ||
Date.parse(existingSnapshot.updated_at) < Date.parse(filing.updated_at);
const currentOverlay = await getIssuerOverlay(ticker);
const firstPass = await hydrateTaxonomySnapshotsForCandidates({
task,
ticker,
filingsCount: filings.length,
saveResult,
candidates: hydrateCandidates,
overlayRevisionId: currentOverlay?.active_revision_id ?? null,
});
taxonomySnapshotsHydrated += firstPass.hydrated;
taxonomySnapshotsFailed += firstPass.failed;
if (!shouldRefresh) {
continue;
}
const stageContext = (stage: TaskStage) =>
buildProgressContext({
current: index + 1,
total: hydrateCandidates.length,
unit: "filings",
let overlayPublished = false;
let activeOverlayRevisionId = currentOverlay?.active_revision_id ?? null;
try {
await setProjectionStage(
task,
"sync.persist_taxonomy",
`Building issuer overlay for ${ticker}`,
{
counters: {
fetched: filings.length,
inserted: saveResult.inserted,
updated: saveResult.updated,
sampledFilings: Math.min(hydrateCandidates.length, 12),
hydrated: taxonomySnapshotsHydrated,
failed: taxonomySnapshotsFailed,
},
subject: {
ticker,
accessionNumber: filing.accession_number,
label: stage,
},
});
subject: tickerSubject,
},
);
const overlayResult = await generateIssuerOverlayForTicker(ticker);
overlayPublished = overlayResult.published;
activeOverlayRevisionId = overlayResult.activeRevisionId;
} catch (error) {
await recordIssuerOverlayBuildFailure(ticker, error);
console.error(`[issuer-overlay] failed for ${ticker}:`, error);
}
try {
await setProjectionStage(
task,
"sync.extract_taxonomy",
`Extracting XBRL taxonomy for ${filing.accession_number}`,
stageContext("sync.extract_taxonomy"),
);
const snapshot = await hydrateFilingTaxonomySnapshot({
filingId: filing.id,
ticker: filing.ticker,
cik: filing.cik,
accessionNumber: filing.accession_number,
filingDate: filing.filing_date,
filingType: filing.filing_type,
filingUrl: filing.filing_url,
primaryDocument: filing.primary_document ?? null,
});
let pdfValidation = {
validation_result: snapshot.validation_result,
metric_validations: snapshot.metric_validations,
};
if (
overlayPublished &&
activeOverlayRevisionId !== null &&
activeOverlayRevisionId !== currentOverlay?.active_revision_id
) {
const prioritizedCandidates = [
...hydrateCandidates.filter((filing) =>
firstPass.touchedFilingIds.has(filing.id),
),
...hydrateCandidates.filter(
(filing) => !firstPass.touchedFilingIds.has(filing.id),
),
];
const uniqueCandidates = prioritizedCandidates.filter(
(filing, index, rows) => {
return (
rows.findIndex((candidate) => candidate.id === filing.id) === index
);
},
);
const rehydrateCandidates = uniqueCandidates.slice(
0,
Math.max(firstPass.touchedFilingIds.size, 8),
);
try {
pdfValidation = await validateMetricsWithPdfLlm({
metrics: snapshot.derived_metrics,
assets: snapshot.assets,
});
} catch (error) {
const message =
error instanceof Error
? error.message
: "PDF metric validation failed";
pdfValidation = {
validation_result: {
status: "error",
checks: [],
validatedAt: new Date().toISOString(),
},
metric_validations: snapshot.metric_validations.map((check) => ({
...check,
error: check.error ?? message,
})),
};
}
const normalizedSnapshot = {
...snapshot,
validation_result: pdfValidation.validation_result,
metric_validations: pdfValidation.metric_validations,
...normalizeFilingTaxonomySnapshotPayload(snapshot),
};
await setProjectionStage(
task,
"sync.normalize_taxonomy",
`Materializing statements for ${filing.accession_number}`,
stageContext("sync.normalize_taxonomy"),
);
await setProjectionStage(
task,
"sync.derive_metrics",
`Deriving taxonomy metrics for ${filing.accession_number}`,
stageContext("sync.derive_metrics"),
);
await setProjectionStage(
task,
"sync.validate_pdf_metrics",
`Validating metrics via PDF + LLM for ${filing.accession_number}`,
stageContext("sync.validate_pdf_metrics"),
);
await setProjectionStage(
task,
"sync.persist_taxonomy",
`Persisting taxonomy snapshot for ${filing.accession_number}`,
stageContext("sync.persist_taxonomy"),
);
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
await updateFilingMetricsById(
filing.id,
normalizedSnapshot.derived_metrics,
);
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
taxonomySnapshotsHydrated += 1;
} catch (error) {
const now = new Date().toISOString();
await upsertFilingTaxonomySnapshot({
filing_id: filing.id,
ticker: filing.ticker,
filing_date: filing.filing_date,
filing_type: filing.filing_type,
parse_status: "failed",
parse_error:
error instanceof Error ? error.message : "Taxonomy hydration failed",
source: "legacy_html_fallback",
parser_engine: "fiscal-xbrl",
parser_version: "unknown",
taxonomy_regime: "unknown",
fiscal_pack: "core",
periods: [],
faithful_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
disclosure: [],
},
statement_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
disclosure: [],
},
surface_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
disclosure: [],
},
detail_rows: {
income: {},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {},
disclosure: {},
},
kpi_rows: [],
computed_definitions: [],
contexts: [],
derived_metrics: filing.metrics ?? null,
validation_result: {
status: "error",
checks: [],
validatedAt: now,
},
normalization_summary: {
surfaceRowCount: 0,
detailRowCount: 0,
kpiRowCount: 0,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
warnings: [],
},
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
assets: [],
concepts: [],
facts: [],
metric_validations: [],
});
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
taxonomySnapshotsFailed += 1;
}
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
const secondPass = await hydrateTaxonomySnapshotsForCandidates({
task,
ticker,
filingsCount: filings.length,
saveResult,
candidates: rehydrateCandidates,
overlayRevisionId: activeOverlayRevisionId,
});
taxonomySnapshotsHydrated += secondPass.hydrated;
taxonomySnapshotsFailed += secondPass.failed;
}
try {
@@ -977,6 +1121,8 @@ async function processSyncFilings(task: Task) {
updated: saveResult.updated,
taxonomySnapshotsHydrated,
taxonomySnapshotsFailed,
overlayPublished,
activeOverlayRevisionId,
searchTaskId,
};