Automate issuer overlay creation from ticker searches

This commit is contained in:
2026-03-19 20:44:58 -04:00
parent 17de3dd72d
commit 391d6d34ce
79 changed files with 4746 additions and 695 deletions

View File

@@ -1,7 +1,7 @@
import type { Database } from 'bun:sqlite';
export type FinancialSchemaRepairMode = 'auto' | 'check-only' | 'off';
export type FinancialIngestionHealthMode = 'healthy' | 'repaired' | 'drifted' | 'failed';
type FinancialSchemaRepairMode = 'auto' | 'check-only' | 'off';
type FinancialIngestionHealthMode = 'healthy' | 'repaired' | 'drifted' | 'failed';
type CriticalIndexDefinition = {
name: string;
@@ -16,7 +16,7 @@ type DuplicateRule = {
partitionColumns: string[];
};
export type FinancialIngestionIndexStatus = {
type FinancialIngestionIndexStatus = {
name: string;
table: string;
expectedColumns: string[];
@@ -26,13 +26,13 @@ export type FinancialIngestionIndexStatus = {
healthy: boolean;
};
export type FinancialIngestionDuplicateStatus = {
type FinancialIngestionDuplicateStatus = {
table: string;
duplicateGroups: number;
duplicateRows: number;
};
export type FinancialIngestionSchemaReport = {
type FinancialIngestionSchemaReport = {
ok: boolean;
checkedAt: string;
indexes: FinancialIngestionIndexStatus[];
@@ -42,7 +42,7 @@ export type FinancialIngestionSchemaReport = {
duplicates: FinancialIngestionDuplicateStatus[];
};
export type FinancialIngestionSchemaRepairResult = {
type FinancialIngestionSchemaRepairResult = {
attempted: boolean;
requestedMode: FinancialSchemaRepairMode;
missingIndexesBefore: string[];
@@ -56,7 +56,7 @@ export type FinancialIngestionSchemaRepairResult = {
reportAfter: FinancialIngestionSchemaReport;
};
export type FinancialIngestionSchemaEnsureResult = {
type FinancialIngestionSchemaEnsureResult = {
ok: boolean;
mode: FinancialIngestionHealthMode;
requestedMode: FinancialSchemaRepairMode;
@@ -317,7 +317,7 @@ function createOrRecreateIndex(client: Database, definition: CriticalIndexDefini
client.exec(definition.createSql);
}
export function repairFinancialIngestionSchema(
function repairFinancialIngestionSchema(
client: Database,
options: {
mode?: FinancialSchemaRepairMode;
@@ -496,7 +496,7 @@ export function ensureFinancialIngestionSchemaHealthy(
}
}
export function isMissingOnConflictConstraintError(error: unknown) {
function isMissingOnConflictConstraintError(error: unknown) {
return error instanceof Error
&& error.message.toLowerCase().includes('on conflict clause does not match any primary key or unique constraint');
}
@@ -529,7 +529,7 @@ export async function withFinancialIngestionSchemaRetry<T>(input: {
}
}
export const __financialIngestionSchemaInternals = {
const __financialIngestionSchemaInternals = {
CRITICAL_INDEX_DEFINITIONS,
UNIQUE_DUPLICATE_RULES,
clearBundleCache,

View File

@@ -218,6 +218,17 @@ describe("sqlite schema compatibility bootstrap", () => {
expect(__dbInternals.hasTable(client, "research_memo")).toBe(true);
expect(__dbInternals.hasTable(client, "research_memo_evidence")).toBe(true);
expect(__dbInternals.hasTable(client, "company_overview_cache")).toBe(true);
expect(__dbInternals.hasTable(client, "issuer_overlay")).toBe(true);
expect(__dbInternals.hasTable(client, "issuer_overlay_revision")).toBe(
true,
);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"issuer_overlay_revision_id",
),
).toBe(true);
__dbInternals.loadSqliteExtensions(client);
__dbInternals.ensureSearchVirtualTables(client);

View File

@@ -33,6 +33,7 @@ type TaxonomyMetricValidationStatus =
| "matched"
| "mismatch"
| "error";
type IssuerOverlayStatus = "empty" | "active" | "error";
type CoverageStatus = "backlog" | "active" | "watch" | "archive";
type CoveragePriority = "low" | "medium" | "high";
type ResearchJournalEntryType = "note" | "filing_note" | "status_change";
@@ -65,6 +66,8 @@ type FinancialSurfaceKind =
| "income_statement"
| "balance_sheet"
| "cash_flow_statement"
| "equity_statement"
| "disclosures"
| "ratios"
| "segments_kpis"
| "adjusted"
@@ -103,6 +106,44 @@ type FinancialStatementKind =
| "equity"
| "comprehensive_income";
export type IssuerOverlayDefinition = {
version: "fiscal-v1";
ticker: string;
pack: string | null;
mappings: Array<{
surface_key: string;
statement: FinancialStatementKind;
allowed_source_concepts: string[];
allowed_authoritative_concepts: string[];
}>;
};
export type IssuerOverlayStats = {
pack: string | null;
sampledSnapshotCount: number;
sampledSnapshotIds: number[];
acceptedMappingCount: number;
rejectedMappingCount: number;
publishedRevisionNumber: number | null;
};
export type IssuerOverlayDiagnostics = {
pack: string | null;
sampledSnapshotIds: number[];
acceptedMappings: Array<{
qname: string;
surface_key: string;
statement: FinancialStatementKind;
reason: "authoritative_match" | "local_name_match";
source_snapshot_ids: number[];
}>;
rejectedMappings: Array<{
qname: string;
reason: string;
source_snapshot_ids: number[];
}>;
};
type FilingStatementPeriod = {
id: string;
filingId: number;
@@ -208,7 +249,7 @@ type TaxonomySurfaceSnapshotRow = {
formulaKey: string | null;
hasDimensions: boolean;
resolvedSourceRowKeys: Record<string, string | null>;
statement?: "income" | "balance" | "cash_flow";
statement?: "income" | "balance" | "cash_flow" | "equity" | "disclosure";
detailCount?: number;
};
@@ -270,6 +311,10 @@ type TaxonomyNormalizationSummary = {
kpiRowCount: number;
unmappedRowCount: number;
materialUnmappedRowCount: number;
residualPrimaryCount: number;
residualDisclosureCount: number;
unsupportedConceptCount: number;
issuerOverlayMatchCount: number;
warnings: string[];
};
@@ -640,6 +685,7 @@ export const filingTaxonomySnapshot = sqliteTable(
normalization_summary: text("normalization_summary", {
mode: "json",
}).$type<TaxonomyNormalizationSummary | null>(),
issuer_overlay_revision_id: integer("issuer_overlay_revision_id"),
facts_count: integer("facts_count").notNull().default(0),
concepts_count: integer("concepts_count").notNull().default(0),
dimensions_count: integer("dimensions_count").notNull().default(0),
@@ -659,6 +705,65 @@ export const filingTaxonomySnapshot = sqliteTable(
}),
);
export const issuerOverlayRevision = sqliteTable(
"issuer_overlay_revision",
{
id: integer("id").primaryKey({ autoIncrement: true }),
ticker: text("ticker").notNull(),
revision_number: integer("revision_number").notNull(),
definition_hash: text("definition_hash").notNull(),
definition_json: text("definition_json", {
mode: "json",
}).$type<IssuerOverlayDefinition>(),
diagnostics_json: text("diagnostics_json", {
mode: "json",
}).$type<IssuerOverlayDiagnostics | null>(),
source_snapshot_ids: text("source_snapshot_ids", {
mode: "json",
}).$type<number[]>(),
created_at: text("created_at").notNull(),
},
(table) => ({
issuerOverlayRevisionTickerRevisionUnique: uniqueIndex(
"issuer_overlay_revision_ticker_revision_uidx",
).on(table.ticker, table.revision_number),
issuerOverlayRevisionTickerHashUnique: uniqueIndex(
"issuer_overlay_revision_ticker_hash_uidx",
).on(table.ticker, table.definition_hash),
issuerOverlayRevisionTickerCreatedIndex: index(
"issuer_overlay_revision_ticker_created_idx",
).on(table.ticker, table.created_at),
}),
);
export const issuerOverlay = sqliteTable(
"issuer_overlay",
{
ticker: text("ticker").primaryKey(),
status: text("status")
.$type<IssuerOverlayStatus>()
.notNull()
.default("empty"),
active_revision_id: integer("active_revision_id").references(
() => issuerOverlayRevision.id,
{ onDelete: "set null" },
),
last_built_at: text("last_built_at"),
last_error: text("last_error"),
stats_json: text("stats_json", {
mode: "json",
}).$type<IssuerOverlayStats | null>(),
created_at: text("created_at").notNull(),
updated_at: text("updated_at").notNull(),
},
(table) => ({
issuerOverlayStatusIndex: index("issuer_overlay_status_idx").on(
table.status,
table.updated_at,
),
}),
);
export const filingTaxonomyContext = sqliteTable(
"filing_taxonomy_context",
{
@@ -1315,6 +1420,8 @@ export const appSchema = {
filing,
filingStatementSnapshot,
filingTaxonomySnapshot,
issuerOverlay,
issuerOverlayRevision,
filingTaxonomyAsset,
filingTaxonomyConcept,
filingTaxonomyFact,

View File

@@ -35,12 +35,12 @@ export function hasColumn(
return rows.some((row) => row.name === columnName);
}
export function applySqlFile(client: Database, fileName: string) {
function applySqlFile(client: Database, fileName: string) {
const sql = readFileSync(join(process.cwd(), "drizzle", fileName), "utf8");
client.exec(sql);
}
export function applyBaseSchemaCompat(client: Database) {
function applyBaseSchemaCompat(client: Database) {
const sql = readFileSync(
join(process.cwd(), "drizzle", "0000_cold_silver_centurion.sql"),
"utf8",
@@ -340,6 +340,7 @@ const TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS = [
"kpi_rows",
"computed_definitions",
"normalization_summary",
"issuer_overlay_revision_id",
] as const;
function ensureTaxonomySnapshotCompat(client: Database) {
@@ -388,6 +389,10 @@ function ensureTaxonomySnapshotCompat(client: Database) {
name: "normalization_summary",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `normalization_summary` text;",
},
{
name: "issuer_overlay_revision_id",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `issuer_overlay_revision_id` integer REFERENCES `issuer_overlay_revision`(`id`) ON UPDATE no action ON DELETE set null;",
},
]);
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
@@ -527,6 +532,52 @@ function ensureTaxonomyCompat(client: Database) {
ensureTaxonomyFactCompat(client);
}
function ensureIssuerOverlaySchema(client: Database) {
if (!hasTable(client, "issuer_overlay_revision")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`issuer_overlay_revision\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
\`ticker\` text NOT NULL,
\`revision_number\` integer NOT NULL,
\`definition_hash\` text NOT NULL,
\`definition_json\` text NOT NULL,
\`diagnostics_json\` text,
\`source_snapshot_ids\` text NOT NULL DEFAULT '[]',
\`created_at\` text NOT NULL
);
`);
}
if (!hasTable(client, "issuer_overlay")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`issuer_overlay\` (
\`ticker\` text PRIMARY KEY NOT NULL,
\`status\` text NOT NULL DEFAULT 'empty',
\`active_revision_id\` integer,
\`last_built_at\` text,
\`last_error\` text,
\`stats_json\` text,
\`created_at\` text NOT NULL,
\`updated_at\` text NOT NULL,
FOREIGN KEY (\`active_revision_id\`) REFERENCES \`issuer_overlay_revision\`(\`id\`) ON UPDATE no action ON DELETE set null
);
`);
}
client.exec(
"CREATE UNIQUE INDEX IF NOT EXISTS `issuer_overlay_revision_ticker_revision_uidx` ON `issuer_overlay_revision` (`ticker`, `revision_number`);",
);
client.exec(
"CREATE UNIQUE INDEX IF NOT EXISTS `issuer_overlay_revision_ticker_hash_uidx` ON `issuer_overlay_revision` (`ticker`, `definition_hash`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `issuer_overlay_revision_ticker_created_idx` ON `issuer_overlay_revision` (`ticker`, `created_at`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `issuer_overlay_status_idx` ON `issuer_overlay` (`status`, `updated_at`);",
);
}
export function ensureLocalSqliteSchema(client: Database) {
const missingBaseSchema = [
"filing",
@@ -684,6 +735,7 @@ WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
if (!hasTable(client, "filing_taxonomy_snapshot")) {
applySqlFile(client, "0005_financial_taxonomy_v3.sql");
}
ensureIssuerOverlaySchema(client);
ensureTaxonomyCompat(client);
if (!hasTable(client, "company_financial_bundle")) {
@@ -725,7 +777,7 @@ WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
ensureResearchWorkspaceSchema(client);
}
export const __sqliteSchemaCompatInternals = {
const __sqliteSchemaCompatInternals = {
applyBaseSchemaCompat,
applySqlFile,
hasColumn,