Automate issuer overlay creation from ticker searches
This commit is contained in:
@@ -32,6 +32,14 @@ import {
|
||||
import { createPortfolioInsight } from "@/lib/server/repos/insights";
|
||||
import { updateTaskStage } from "@/lib/server/repos/tasks";
|
||||
import { fetchPrimaryFilingText, fetchRecentFilings } from "@/lib/server/sec";
|
||||
import {
|
||||
generateIssuerOverlayForTicker,
|
||||
recordIssuerOverlayBuildFailure,
|
||||
} from "@/lib/server/issuer-overlays";
|
||||
import {
|
||||
getActiveIssuerOverlayDefinition,
|
||||
getIssuerOverlay,
|
||||
} from "@/lib/server/repos/issuer-overlays";
|
||||
import { enqueueTask } from "@/lib/server/tasks";
|
||||
import { hydrateFilingTaxonomySnapshot } from "@/lib/server/taxonomy/engine";
|
||||
import { validateMetricsWithPdfLlm } from "@/lib/server/taxonomy/pdf-validation";
|
||||
@@ -143,6 +151,12 @@ export type TaskExecutionOutcome = {
|
||||
completionContext?: TaskStageContext | null;
|
||||
};
|
||||
|
||||
type TaxonomyHydrationRunResult = {
|
||||
hydrated: number;
|
||||
failed: number;
|
||||
touchedFilingIds: Set<number>;
|
||||
};
|
||||
|
||||
function buildTaskOutcome(
|
||||
result: unknown,
|
||||
completionDetail: string,
|
||||
@@ -155,6 +169,259 @@ function buildTaskOutcome(
|
||||
};
|
||||
}
|
||||
|
||||
function shouldRefreshTaxonomySnapshot(input: {
|
||||
existingSnapshot: Awaited<
|
||||
ReturnType<typeof getFilingTaxonomySnapshotByFilingId>
|
||||
> | null;
|
||||
filingUpdatedAt: string;
|
||||
overlayRevisionId: number | null;
|
||||
}) {
|
||||
if (!input.existingSnapshot) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
Date.parse(input.existingSnapshot.updated_at) <
|
||||
Date.parse(input.filingUpdatedAt)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (
|
||||
(input.existingSnapshot.issuer_overlay_revision_id ?? null) !==
|
||||
input.overlayRevisionId
|
||||
);
|
||||
}
|
||||
|
||||
async function hydrateTaxonomySnapshotsForCandidates(input: {
|
||||
task: Task;
|
||||
ticker: string;
|
||||
filingsCount: number;
|
||||
saveResult: { inserted: number; updated: number };
|
||||
candidates: Array<Filing & { filing_type: "10-K" | "10-Q" }>;
|
||||
overlayRevisionId: number | null;
|
||||
}) {
|
||||
const overlayDefinition =
|
||||
input.overlayRevisionId === null
|
||||
? null
|
||||
: await getActiveIssuerOverlayDefinition(input.ticker);
|
||||
let hydrated = 0;
|
||||
let failed = 0;
|
||||
const touchedFilingIds = new Set<number>();
|
||||
|
||||
for (let index = 0; index < input.candidates.length; index += 1) {
|
||||
const filing = input.candidates[index];
|
||||
const existingSnapshot = await getFilingTaxonomySnapshotByFilingId(
|
||||
filing.id,
|
||||
);
|
||||
if (
|
||||
!shouldRefreshTaxonomySnapshot({
|
||||
existingSnapshot,
|
||||
filingUpdatedAt: filing.updated_at,
|
||||
overlayRevisionId: input.overlayRevisionId,
|
||||
})
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
touchedFilingIds.add(filing.id);
|
||||
const stageContext = (stage: TaskStage) =>
|
||||
buildProgressContext({
|
||||
current: index + 1,
|
||||
total: input.candidates.length,
|
||||
unit: "filings",
|
||||
counters: {
|
||||
fetched: input.filingsCount,
|
||||
inserted: input.saveResult.inserted,
|
||||
updated: input.saveResult.updated,
|
||||
hydrated,
|
||||
failed,
|
||||
},
|
||||
subject: {
|
||||
ticker: input.ticker,
|
||||
accessionNumber: filing.accession_number,
|
||||
label: stage,
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
await setProjectionStage(
|
||||
input.task,
|
||||
"sync.extract_taxonomy",
|
||||
`Extracting XBRL taxonomy for ${filing.accession_number}`,
|
||||
stageContext("sync.extract_taxonomy"),
|
||||
);
|
||||
const snapshot = await hydrateFilingTaxonomySnapshot({
|
||||
filingId: filing.id,
|
||||
ticker: filing.ticker,
|
||||
cik: filing.cik,
|
||||
accessionNumber: filing.accession_number,
|
||||
filingDate: filing.filing_date,
|
||||
filingType: filing.filing_type,
|
||||
filingUrl: filing.filing_url,
|
||||
primaryDocument: filing.primary_document ?? null,
|
||||
issuerOverlay: overlayDefinition,
|
||||
});
|
||||
let pdfValidation = {
|
||||
validation_result: snapshot.validation_result,
|
||||
metric_validations: snapshot.metric_validations,
|
||||
};
|
||||
|
||||
try {
|
||||
pdfValidation = await validateMetricsWithPdfLlm({
|
||||
metrics: snapshot.derived_metrics,
|
||||
assets: snapshot.assets,
|
||||
});
|
||||
} catch (error) {
|
||||
const message =
|
||||
error instanceof Error
|
||||
? error.message
|
||||
: "PDF metric validation failed";
|
||||
pdfValidation = {
|
||||
validation_result: {
|
||||
status: "error",
|
||||
checks: [],
|
||||
validatedAt: new Date().toISOString(),
|
||||
},
|
||||
metric_validations: snapshot.metric_validations.map((check) => ({
|
||||
...check,
|
||||
error: check.error ?? message,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const normalizedSnapshot = {
|
||||
...snapshot,
|
||||
validation_result: pdfValidation.validation_result,
|
||||
metric_validations: pdfValidation.metric_validations,
|
||||
issuer_overlay_revision_id: input.overlayRevisionId,
|
||||
...normalizeFilingTaxonomySnapshotPayload(snapshot),
|
||||
};
|
||||
|
||||
await setProjectionStage(
|
||||
input.task,
|
||||
"sync.normalize_taxonomy",
|
||||
`Materializing statements for ${filing.accession_number}`,
|
||||
stageContext("sync.normalize_taxonomy"),
|
||||
);
|
||||
await setProjectionStage(
|
||||
input.task,
|
||||
"sync.derive_metrics",
|
||||
`Deriving taxonomy metrics for ${filing.accession_number}`,
|
||||
stageContext("sync.derive_metrics"),
|
||||
);
|
||||
await setProjectionStage(
|
||||
input.task,
|
||||
"sync.validate_pdf_metrics",
|
||||
`Validating metrics via PDF + LLM for ${filing.accession_number}`,
|
||||
stageContext("sync.validate_pdf_metrics"),
|
||||
);
|
||||
await setProjectionStage(
|
||||
input.task,
|
||||
"sync.persist_taxonomy",
|
||||
`Persisting taxonomy snapshot for ${filing.accession_number}`,
|
||||
stageContext("sync.persist_taxonomy"),
|
||||
);
|
||||
|
||||
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
|
||||
await updateFilingMetricsById(
|
||||
filing.id,
|
||||
normalizedSnapshot.derived_metrics,
|
||||
);
|
||||
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
|
||||
hydrated += 1;
|
||||
} catch (error) {
|
||||
const now = new Date().toISOString();
|
||||
await upsertFilingTaxonomySnapshot({
|
||||
filing_id: filing.id,
|
||||
ticker: filing.ticker,
|
||||
filing_date: filing.filing_date,
|
||||
filing_type: filing.filing_type,
|
||||
parse_status: "failed",
|
||||
parse_error:
|
||||
error instanceof Error ? error.message : "Taxonomy hydration failed",
|
||||
source: "legacy_html_fallback",
|
||||
parser_engine: "fiscal-xbrl",
|
||||
parser_version: "unknown",
|
||||
taxonomy_regime: "unknown",
|
||||
fiscal_pack: "core",
|
||||
periods: [],
|
||||
faithful_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: [],
|
||||
disclosure: [],
|
||||
},
|
||||
statement_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: [],
|
||||
disclosure: [],
|
||||
},
|
||||
surface_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: [],
|
||||
disclosure: [],
|
||||
},
|
||||
detail_rows: {
|
||||
income: {},
|
||||
balance: {},
|
||||
cash_flow: {},
|
||||
equity: {},
|
||||
comprehensive_income: {},
|
||||
disclosure: {},
|
||||
},
|
||||
kpi_rows: [],
|
||||
computed_definitions: [],
|
||||
contexts: [],
|
||||
derived_metrics: filing.metrics ?? null,
|
||||
validation_result: {
|
||||
status: "error",
|
||||
checks: [],
|
||||
validatedAt: now,
|
||||
},
|
||||
normalization_summary: {
|
||||
surfaceRowCount: 0,
|
||||
detailRowCount: 0,
|
||||
kpiRowCount: 0,
|
||||
unmappedRowCount: 0,
|
||||
materialUnmappedRowCount: 0,
|
||||
residualPrimaryCount: 0,
|
||||
residualDisclosureCount: 0,
|
||||
unsupportedConceptCount: 0,
|
||||
issuerOverlayMatchCount: 0,
|
||||
warnings: [],
|
||||
},
|
||||
issuer_overlay_revision_id: input.overlayRevisionId,
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
assets: [],
|
||||
concepts: [],
|
||||
facts: [],
|
||||
metric_validations: [],
|
||||
});
|
||||
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
|
||||
failed += 1;
|
||||
}
|
||||
|
||||
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
|
||||
}
|
||||
|
||||
return {
|
||||
hydrated,
|
||||
failed,
|
||||
touchedFilingIds,
|
||||
} satisfies TaxonomyHydrationRunResult;
|
||||
}
|
||||
|
||||
async function setProjectionStage(
|
||||
task: Task,
|
||||
stage: TaskStage,
|
||||
@@ -756,200 +1023,77 @@ async function processSyncFilings(task: Task) {
|
||||
subject: tickerSubject,
|
||||
}),
|
||||
);
|
||||
for (let index = 0; index < hydrateCandidates.length; index += 1) {
|
||||
const filing = hydrateCandidates[index];
|
||||
const existingSnapshot = await getFilingTaxonomySnapshotByFilingId(
|
||||
filing.id,
|
||||
);
|
||||
const shouldRefresh =
|
||||
!existingSnapshot ||
|
||||
Date.parse(existingSnapshot.updated_at) < Date.parse(filing.updated_at);
|
||||
const currentOverlay = await getIssuerOverlay(ticker);
|
||||
const firstPass = await hydrateTaxonomySnapshotsForCandidates({
|
||||
task,
|
||||
ticker,
|
||||
filingsCount: filings.length,
|
||||
saveResult,
|
||||
candidates: hydrateCandidates,
|
||||
overlayRevisionId: currentOverlay?.active_revision_id ?? null,
|
||||
});
|
||||
taxonomySnapshotsHydrated += firstPass.hydrated;
|
||||
taxonomySnapshotsFailed += firstPass.failed;
|
||||
|
||||
if (!shouldRefresh) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const stageContext = (stage: TaskStage) =>
|
||||
buildProgressContext({
|
||||
current: index + 1,
|
||||
total: hydrateCandidates.length,
|
||||
unit: "filings",
|
||||
let overlayPublished = false;
|
||||
let activeOverlayRevisionId = currentOverlay?.active_revision_id ?? null;
|
||||
try {
|
||||
await setProjectionStage(
|
||||
task,
|
||||
"sync.persist_taxonomy",
|
||||
`Building issuer overlay for ${ticker}`,
|
||||
{
|
||||
counters: {
|
||||
fetched: filings.length,
|
||||
inserted: saveResult.inserted,
|
||||
updated: saveResult.updated,
|
||||
sampledFilings: Math.min(hydrateCandidates.length, 12),
|
||||
hydrated: taxonomySnapshotsHydrated,
|
||||
failed: taxonomySnapshotsFailed,
|
||||
},
|
||||
subject: {
|
||||
ticker,
|
||||
accessionNumber: filing.accession_number,
|
||||
label: stage,
|
||||
},
|
||||
});
|
||||
subject: tickerSubject,
|
||||
},
|
||||
);
|
||||
const overlayResult = await generateIssuerOverlayForTicker(ticker);
|
||||
overlayPublished = overlayResult.published;
|
||||
activeOverlayRevisionId = overlayResult.activeRevisionId;
|
||||
} catch (error) {
|
||||
await recordIssuerOverlayBuildFailure(ticker, error);
|
||||
console.error(`[issuer-overlay] failed for ${ticker}:`, error);
|
||||
}
|
||||
|
||||
try {
|
||||
await setProjectionStage(
|
||||
task,
|
||||
"sync.extract_taxonomy",
|
||||
`Extracting XBRL taxonomy for ${filing.accession_number}`,
|
||||
stageContext("sync.extract_taxonomy"),
|
||||
);
|
||||
const snapshot = await hydrateFilingTaxonomySnapshot({
|
||||
filingId: filing.id,
|
||||
ticker: filing.ticker,
|
||||
cik: filing.cik,
|
||||
accessionNumber: filing.accession_number,
|
||||
filingDate: filing.filing_date,
|
||||
filingType: filing.filing_type,
|
||||
filingUrl: filing.filing_url,
|
||||
primaryDocument: filing.primary_document ?? null,
|
||||
});
|
||||
let pdfValidation = {
|
||||
validation_result: snapshot.validation_result,
|
||||
metric_validations: snapshot.metric_validations,
|
||||
};
|
||||
if (
|
||||
overlayPublished &&
|
||||
activeOverlayRevisionId !== null &&
|
||||
activeOverlayRevisionId !== currentOverlay?.active_revision_id
|
||||
) {
|
||||
const prioritizedCandidates = [
|
||||
...hydrateCandidates.filter((filing) =>
|
||||
firstPass.touchedFilingIds.has(filing.id),
|
||||
),
|
||||
...hydrateCandidates.filter(
|
||||
(filing) => !firstPass.touchedFilingIds.has(filing.id),
|
||||
),
|
||||
];
|
||||
const uniqueCandidates = prioritizedCandidates.filter(
|
||||
(filing, index, rows) => {
|
||||
return (
|
||||
rows.findIndex((candidate) => candidate.id === filing.id) === index
|
||||
);
|
||||
},
|
||||
);
|
||||
const rehydrateCandidates = uniqueCandidates.slice(
|
||||
0,
|
||||
Math.max(firstPass.touchedFilingIds.size, 8),
|
||||
);
|
||||
|
||||
try {
|
||||
pdfValidation = await validateMetricsWithPdfLlm({
|
||||
metrics: snapshot.derived_metrics,
|
||||
assets: snapshot.assets,
|
||||
});
|
||||
} catch (error) {
|
||||
const message =
|
||||
error instanceof Error
|
||||
? error.message
|
||||
: "PDF metric validation failed";
|
||||
pdfValidation = {
|
||||
validation_result: {
|
||||
status: "error",
|
||||
checks: [],
|
||||
validatedAt: new Date().toISOString(),
|
||||
},
|
||||
metric_validations: snapshot.metric_validations.map((check) => ({
|
||||
...check,
|
||||
error: check.error ?? message,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const normalizedSnapshot = {
|
||||
...snapshot,
|
||||
validation_result: pdfValidation.validation_result,
|
||||
metric_validations: pdfValidation.metric_validations,
|
||||
...normalizeFilingTaxonomySnapshotPayload(snapshot),
|
||||
};
|
||||
|
||||
await setProjectionStage(
|
||||
task,
|
||||
"sync.normalize_taxonomy",
|
||||
`Materializing statements for ${filing.accession_number}`,
|
||||
stageContext("sync.normalize_taxonomy"),
|
||||
);
|
||||
await setProjectionStage(
|
||||
task,
|
||||
"sync.derive_metrics",
|
||||
`Deriving taxonomy metrics for ${filing.accession_number}`,
|
||||
stageContext("sync.derive_metrics"),
|
||||
);
|
||||
await setProjectionStage(
|
||||
task,
|
||||
"sync.validate_pdf_metrics",
|
||||
`Validating metrics via PDF + LLM for ${filing.accession_number}`,
|
||||
stageContext("sync.validate_pdf_metrics"),
|
||||
);
|
||||
await setProjectionStage(
|
||||
task,
|
||||
"sync.persist_taxonomy",
|
||||
`Persisting taxonomy snapshot for ${filing.accession_number}`,
|
||||
stageContext("sync.persist_taxonomy"),
|
||||
);
|
||||
|
||||
await upsertFilingTaxonomySnapshot(normalizedSnapshot);
|
||||
await updateFilingMetricsById(
|
||||
filing.id,
|
||||
normalizedSnapshot.derived_metrics,
|
||||
);
|
||||
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
|
||||
taxonomySnapshotsHydrated += 1;
|
||||
} catch (error) {
|
||||
const now = new Date().toISOString();
|
||||
await upsertFilingTaxonomySnapshot({
|
||||
filing_id: filing.id,
|
||||
ticker: filing.ticker,
|
||||
filing_date: filing.filing_date,
|
||||
filing_type: filing.filing_type,
|
||||
parse_status: "failed",
|
||||
parse_error:
|
||||
error instanceof Error ? error.message : "Taxonomy hydration failed",
|
||||
source: "legacy_html_fallback",
|
||||
parser_engine: "fiscal-xbrl",
|
||||
parser_version: "unknown",
|
||||
taxonomy_regime: "unknown",
|
||||
fiscal_pack: "core",
|
||||
periods: [],
|
||||
faithful_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: [],
|
||||
disclosure: [],
|
||||
},
|
||||
statement_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: [],
|
||||
disclosure: [],
|
||||
},
|
||||
surface_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: [],
|
||||
disclosure: [],
|
||||
},
|
||||
detail_rows: {
|
||||
income: {},
|
||||
balance: {},
|
||||
cash_flow: {},
|
||||
equity: {},
|
||||
comprehensive_income: {},
|
||||
disclosure: {},
|
||||
},
|
||||
kpi_rows: [],
|
||||
computed_definitions: [],
|
||||
contexts: [],
|
||||
derived_metrics: filing.metrics ?? null,
|
||||
validation_result: {
|
||||
status: "error",
|
||||
checks: [],
|
||||
validatedAt: now,
|
||||
},
|
||||
normalization_summary: {
|
||||
surfaceRowCount: 0,
|
||||
detailRowCount: 0,
|
||||
kpiRowCount: 0,
|
||||
unmappedRowCount: 0,
|
||||
materialUnmappedRowCount: 0,
|
||||
warnings: [],
|
||||
},
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
assets: [],
|
||||
concepts: [],
|
||||
facts: [],
|
||||
metric_validations: [],
|
||||
});
|
||||
await deleteCompanyFinancialBundlesForTicker(filing.ticker);
|
||||
taxonomySnapshotsFailed += 1;
|
||||
}
|
||||
|
||||
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
|
||||
const secondPass = await hydrateTaxonomySnapshotsForCandidates({
|
||||
task,
|
||||
ticker,
|
||||
filingsCount: filings.length,
|
||||
saveResult,
|
||||
candidates: rehydrateCandidates,
|
||||
overlayRevisionId: activeOverlayRevisionId,
|
||||
});
|
||||
taxonomySnapshotsHydrated += secondPass.hydrated;
|
||||
taxonomySnapshotsFailed += secondPass.failed;
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -977,6 +1121,8 @@ async function processSyncFilings(task: Task) {
|
||||
updated: saveResult.updated,
|
||||
taxonomySnapshotsHydrated,
|
||||
taxonomySnapshotsFailed,
|
||||
overlayPublished,
|
||||
activeOverlayRevisionId,
|
||||
searchTaskId,
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user