Automate issuer overlay creation from ticker searches
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import type { Database } from 'bun:sqlite';
|
||||
|
||||
export type FinancialSchemaRepairMode = 'auto' | 'check-only' | 'off';
|
||||
export type FinancialIngestionHealthMode = 'healthy' | 'repaired' | 'drifted' | 'failed';
|
||||
type FinancialSchemaRepairMode = 'auto' | 'check-only' | 'off';
|
||||
type FinancialIngestionHealthMode = 'healthy' | 'repaired' | 'drifted' | 'failed';
|
||||
|
||||
type CriticalIndexDefinition = {
|
||||
name: string;
|
||||
@@ -16,7 +16,7 @@ type DuplicateRule = {
|
||||
partitionColumns: string[];
|
||||
};
|
||||
|
||||
export type FinancialIngestionIndexStatus = {
|
||||
type FinancialIngestionIndexStatus = {
|
||||
name: string;
|
||||
table: string;
|
||||
expectedColumns: string[];
|
||||
@@ -26,13 +26,13 @@ export type FinancialIngestionIndexStatus = {
|
||||
healthy: boolean;
|
||||
};
|
||||
|
||||
export type FinancialIngestionDuplicateStatus = {
|
||||
type FinancialIngestionDuplicateStatus = {
|
||||
table: string;
|
||||
duplicateGroups: number;
|
||||
duplicateRows: number;
|
||||
};
|
||||
|
||||
export type FinancialIngestionSchemaReport = {
|
||||
type FinancialIngestionSchemaReport = {
|
||||
ok: boolean;
|
||||
checkedAt: string;
|
||||
indexes: FinancialIngestionIndexStatus[];
|
||||
@@ -42,7 +42,7 @@ export type FinancialIngestionSchemaReport = {
|
||||
duplicates: FinancialIngestionDuplicateStatus[];
|
||||
};
|
||||
|
||||
export type FinancialIngestionSchemaRepairResult = {
|
||||
type FinancialIngestionSchemaRepairResult = {
|
||||
attempted: boolean;
|
||||
requestedMode: FinancialSchemaRepairMode;
|
||||
missingIndexesBefore: string[];
|
||||
@@ -56,7 +56,7 @@ export type FinancialIngestionSchemaRepairResult = {
|
||||
reportAfter: FinancialIngestionSchemaReport;
|
||||
};
|
||||
|
||||
export type FinancialIngestionSchemaEnsureResult = {
|
||||
type FinancialIngestionSchemaEnsureResult = {
|
||||
ok: boolean;
|
||||
mode: FinancialIngestionHealthMode;
|
||||
requestedMode: FinancialSchemaRepairMode;
|
||||
@@ -317,7 +317,7 @@ function createOrRecreateIndex(client: Database, definition: CriticalIndexDefini
|
||||
client.exec(definition.createSql);
|
||||
}
|
||||
|
||||
export function repairFinancialIngestionSchema(
|
||||
function repairFinancialIngestionSchema(
|
||||
client: Database,
|
||||
options: {
|
||||
mode?: FinancialSchemaRepairMode;
|
||||
@@ -496,7 +496,7 @@ export function ensureFinancialIngestionSchemaHealthy(
|
||||
}
|
||||
}
|
||||
|
||||
export function isMissingOnConflictConstraintError(error: unknown) {
|
||||
function isMissingOnConflictConstraintError(error: unknown) {
|
||||
return error instanceof Error
|
||||
&& error.message.toLowerCase().includes('on conflict clause does not match any primary key or unique constraint');
|
||||
}
|
||||
@@ -529,7 +529,7 @@ export async function withFinancialIngestionSchemaRetry<T>(input: {
|
||||
}
|
||||
}
|
||||
|
||||
export const __financialIngestionSchemaInternals = {
|
||||
const __financialIngestionSchemaInternals = {
|
||||
CRITICAL_INDEX_DEFINITIONS,
|
||||
UNIQUE_DUPLICATE_RULES,
|
||||
clearBundleCache,
|
||||
|
||||
@@ -218,6 +218,17 @@ describe("sqlite schema compatibility bootstrap", () => {
|
||||
expect(__dbInternals.hasTable(client, "research_memo")).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, "research_memo_evidence")).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, "company_overview_cache")).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, "issuer_overlay")).toBe(true);
|
||||
expect(__dbInternals.hasTable(client, "issuer_overlay_revision")).toBe(
|
||||
true,
|
||||
);
|
||||
expect(
|
||||
__dbInternals.hasColumn(
|
||||
client,
|
||||
"filing_taxonomy_snapshot",
|
||||
"issuer_overlay_revision_id",
|
||||
),
|
||||
).toBe(true);
|
||||
|
||||
__dbInternals.loadSqliteExtensions(client);
|
||||
__dbInternals.ensureSearchVirtualTables(client);
|
||||
|
||||
@@ -33,6 +33,7 @@ type TaxonomyMetricValidationStatus =
|
||||
| "matched"
|
||||
| "mismatch"
|
||||
| "error";
|
||||
type IssuerOverlayStatus = "empty" | "active" | "error";
|
||||
type CoverageStatus = "backlog" | "active" | "watch" | "archive";
|
||||
type CoveragePriority = "low" | "medium" | "high";
|
||||
type ResearchJournalEntryType = "note" | "filing_note" | "status_change";
|
||||
@@ -65,6 +66,8 @@ type FinancialSurfaceKind =
|
||||
| "income_statement"
|
||||
| "balance_sheet"
|
||||
| "cash_flow_statement"
|
||||
| "equity_statement"
|
||||
| "disclosures"
|
||||
| "ratios"
|
||||
| "segments_kpis"
|
||||
| "adjusted"
|
||||
@@ -103,6 +106,44 @@ type FinancialStatementKind =
|
||||
| "equity"
|
||||
| "comprehensive_income";
|
||||
|
||||
export type IssuerOverlayDefinition = {
|
||||
version: "fiscal-v1";
|
||||
ticker: string;
|
||||
pack: string | null;
|
||||
mappings: Array<{
|
||||
surface_key: string;
|
||||
statement: FinancialStatementKind;
|
||||
allowed_source_concepts: string[];
|
||||
allowed_authoritative_concepts: string[];
|
||||
}>;
|
||||
};
|
||||
|
||||
export type IssuerOverlayStats = {
|
||||
pack: string | null;
|
||||
sampledSnapshotCount: number;
|
||||
sampledSnapshotIds: number[];
|
||||
acceptedMappingCount: number;
|
||||
rejectedMappingCount: number;
|
||||
publishedRevisionNumber: number | null;
|
||||
};
|
||||
|
||||
export type IssuerOverlayDiagnostics = {
|
||||
pack: string | null;
|
||||
sampledSnapshotIds: number[];
|
||||
acceptedMappings: Array<{
|
||||
qname: string;
|
||||
surface_key: string;
|
||||
statement: FinancialStatementKind;
|
||||
reason: "authoritative_match" | "local_name_match";
|
||||
source_snapshot_ids: number[];
|
||||
}>;
|
||||
rejectedMappings: Array<{
|
||||
qname: string;
|
||||
reason: string;
|
||||
source_snapshot_ids: number[];
|
||||
}>;
|
||||
};
|
||||
|
||||
type FilingStatementPeriod = {
|
||||
id: string;
|
||||
filingId: number;
|
||||
@@ -208,7 +249,7 @@ type TaxonomySurfaceSnapshotRow = {
|
||||
formulaKey: string | null;
|
||||
hasDimensions: boolean;
|
||||
resolvedSourceRowKeys: Record<string, string | null>;
|
||||
statement?: "income" | "balance" | "cash_flow";
|
||||
statement?: "income" | "balance" | "cash_flow" | "equity" | "disclosure";
|
||||
detailCount?: number;
|
||||
};
|
||||
|
||||
@@ -270,6 +311,10 @@ type TaxonomyNormalizationSummary = {
|
||||
kpiRowCount: number;
|
||||
unmappedRowCount: number;
|
||||
materialUnmappedRowCount: number;
|
||||
residualPrimaryCount: number;
|
||||
residualDisclosureCount: number;
|
||||
unsupportedConceptCount: number;
|
||||
issuerOverlayMatchCount: number;
|
||||
warnings: string[];
|
||||
};
|
||||
|
||||
@@ -640,6 +685,7 @@ export const filingTaxonomySnapshot = sqliteTable(
|
||||
normalization_summary: text("normalization_summary", {
|
||||
mode: "json",
|
||||
}).$type<TaxonomyNormalizationSummary | null>(),
|
||||
issuer_overlay_revision_id: integer("issuer_overlay_revision_id"),
|
||||
facts_count: integer("facts_count").notNull().default(0),
|
||||
concepts_count: integer("concepts_count").notNull().default(0),
|
||||
dimensions_count: integer("dimensions_count").notNull().default(0),
|
||||
@@ -659,6 +705,65 @@ export const filingTaxonomySnapshot = sqliteTable(
|
||||
}),
|
||||
);
|
||||
|
||||
export const issuerOverlayRevision = sqliteTable(
|
||||
"issuer_overlay_revision",
|
||||
{
|
||||
id: integer("id").primaryKey({ autoIncrement: true }),
|
||||
ticker: text("ticker").notNull(),
|
||||
revision_number: integer("revision_number").notNull(),
|
||||
definition_hash: text("definition_hash").notNull(),
|
||||
definition_json: text("definition_json", {
|
||||
mode: "json",
|
||||
}).$type<IssuerOverlayDefinition>(),
|
||||
diagnostics_json: text("diagnostics_json", {
|
||||
mode: "json",
|
||||
}).$type<IssuerOverlayDiagnostics | null>(),
|
||||
source_snapshot_ids: text("source_snapshot_ids", {
|
||||
mode: "json",
|
||||
}).$type<number[]>(),
|
||||
created_at: text("created_at").notNull(),
|
||||
},
|
||||
(table) => ({
|
||||
issuerOverlayRevisionTickerRevisionUnique: uniqueIndex(
|
||||
"issuer_overlay_revision_ticker_revision_uidx",
|
||||
).on(table.ticker, table.revision_number),
|
||||
issuerOverlayRevisionTickerHashUnique: uniqueIndex(
|
||||
"issuer_overlay_revision_ticker_hash_uidx",
|
||||
).on(table.ticker, table.definition_hash),
|
||||
issuerOverlayRevisionTickerCreatedIndex: index(
|
||||
"issuer_overlay_revision_ticker_created_idx",
|
||||
).on(table.ticker, table.created_at),
|
||||
}),
|
||||
);
|
||||
|
||||
export const issuerOverlay = sqliteTable(
|
||||
"issuer_overlay",
|
||||
{
|
||||
ticker: text("ticker").primaryKey(),
|
||||
status: text("status")
|
||||
.$type<IssuerOverlayStatus>()
|
||||
.notNull()
|
||||
.default("empty"),
|
||||
active_revision_id: integer("active_revision_id").references(
|
||||
() => issuerOverlayRevision.id,
|
||||
{ onDelete: "set null" },
|
||||
),
|
||||
last_built_at: text("last_built_at"),
|
||||
last_error: text("last_error"),
|
||||
stats_json: text("stats_json", {
|
||||
mode: "json",
|
||||
}).$type<IssuerOverlayStats | null>(),
|
||||
created_at: text("created_at").notNull(),
|
||||
updated_at: text("updated_at").notNull(),
|
||||
},
|
||||
(table) => ({
|
||||
issuerOverlayStatusIndex: index("issuer_overlay_status_idx").on(
|
||||
table.status,
|
||||
table.updated_at,
|
||||
),
|
||||
}),
|
||||
);
|
||||
|
||||
export const filingTaxonomyContext = sqliteTable(
|
||||
"filing_taxonomy_context",
|
||||
{
|
||||
@@ -1315,6 +1420,8 @@ export const appSchema = {
|
||||
filing,
|
||||
filingStatementSnapshot,
|
||||
filingTaxonomySnapshot,
|
||||
issuerOverlay,
|
||||
issuerOverlayRevision,
|
||||
filingTaxonomyAsset,
|
||||
filingTaxonomyConcept,
|
||||
filingTaxonomyFact,
|
||||
|
||||
@@ -35,12 +35,12 @@ export function hasColumn(
|
||||
return rows.some((row) => row.name === columnName);
|
||||
}
|
||||
|
||||
export function applySqlFile(client: Database, fileName: string) {
|
||||
function applySqlFile(client: Database, fileName: string) {
|
||||
const sql = readFileSync(join(process.cwd(), "drizzle", fileName), "utf8");
|
||||
client.exec(sql);
|
||||
}
|
||||
|
||||
export function applyBaseSchemaCompat(client: Database) {
|
||||
function applyBaseSchemaCompat(client: Database) {
|
||||
const sql = readFileSync(
|
||||
join(process.cwd(), "drizzle", "0000_cold_silver_centurion.sql"),
|
||||
"utf8",
|
||||
@@ -340,6 +340,7 @@ const TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS = [
|
||||
"kpi_rows",
|
||||
"computed_definitions",
|
||||
"normalization_summary",
|
||||
"issuer_overlay_revision_id",
|
||||
] as const;
|
||||
|
||||
function ensureTaxonomySnapshotCompat(client: Database) {
|
||||
@@ -388,6 +389,10 @@ function ensureTaxonomySnapshotCompat(client: Database) {
|
||||
name: "normalization_summary",
|
||||
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `normalization_summary` text;",
|
||||
},
|
||||
{
|
||||
name: "issuer_overlay_revision_id",
|
||||
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `issuer_overlay_revision_id` integer REFERENCES `issuer_overlay_revision`(`id`) ON UPDATE no action ON DELETE set null;",
|
||||
},
|
||||
]);
|
||||
|
||||
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
|
||||
@@ -527,6 +532,52 @@ function ensureTaxonomyCompat(client: Database) {
|
||||
ensureTaxonomyFactCompat(client);
|
||||
}
|
||||
|
||||
function ensureIssuerOverlaySchema(client: Database) {
|
||||
if (!hasTable(client, "issuer_overlay_revision")) {
|
||||
client.exec(`
|
||||
CREATE TABLE IF NOT EXISTS \`issuer_overlay_revision\` (
|
||||
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
\`ticker\` text NOT NULL,
|
||||
\`revision_number\` integer NOT NULL,
|
||||
\`definition_hash\` text NOT NULL,
|
||||
\`definition_json\` text NOT NULL,
|
||||
\`diagnostics_json\` text,
|
||||
\`source_snapshot_ids\` text NOT NULL DEFAULT '[]',
|
||||
\`created_at\` text NOT NULL
|
||||
);
|
||||
`);
|
||||
}
|
||||
|
||||
if (!hasTable(client, "issuer_overlay")) {
|
||||
client.exec(`
|
||||
CREATE TABLE IF NOT EXISTS \`issuer_overlay\` (
|
||||
\`ticker\` text PRIMARY KEY NOT NULL,
|
||||
\`status\` text NOT NULL DEFAULT 'empty',
|
||||
\`active_revision_id\` integer,
|
||||
\`last_built_at\` text,
|
||||
\`last_error\` text,
|
||||
\`stats_json\` text,
|
||||
\`created_at\` text NOT NULL,
|
||||
\`updated_at\` text NOT NULL,
|
||||
FOREIGN KEY (\`active_revision_id\`) REFERENCES \`issuer_overlay_revision\`(\`id\`) ON UPDATE no action ON DELETE set null
|
||||
);
|
||||
`);
|
||||
}
|
||||
|
||||
client.exec(
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS `issuer_overlay_revision_ticker_revision_uidx` ON `issuer_overlay_revision` (`ticker`, `revision_number`);",
|
||||
);
|
||||
client.exec(
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS `issuer_overlay_revision_ticker_hash_uidx` ON `issuer_overlay_revision` (`ticker`, `definition_hash`);",
|
||||
);
|
||||
client.exec(
|
||||
"CREATE INDEX IF NOT EXISTS `issuer_overlay_revision_ticker_created_idx` ON `issuer_overlay_revision` (`ticker`, `created_at`);",
|
||||
);
|
||||
client.exec(
|
||||
"CREATE INDEX IF NOT EXISTS `issuer_overlay_status_idx` ON `issuer_overlay` (`status`, `updated_at`);",
|
||||
);
|
||||
}
|
||||
|
||||
export function ensureLocalSqliteSchema(client: Database) {
|
||||
const missingBaseSchema = [
|
||||
"filing",
|
||||
@@ -684,6 +735,7 @@ WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
|
||||
if (!hasTable(client, "filing_taxonomy_snapshot")) {
|
||||
applySqlFile(client, "0005_financial_taxonomy_v3.sql");
|
||||
}
|
||||
ensureIssuerOverlaySchema(client);
|
||||
ensureTaxonomyCompat(client);
|
||||
|
||||
if (!hasTable(client, "company_financial_bundle")) {
|
||||
@@ -725,7 +777,7 @@ WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
|
||||
ensureResearchWorkspaceSchema(client);
|
||||
}
|
||||
|
||||
export const __sqliteSchemaCompatInternals = {
|
||||
const __sqliteSchemaCompatInternals = {
|
||||
applyBaseSchemaCompat,
|
||||
applySqlFile,
|
||||
hasColumn,
|
||||
|
||||
Reference in New Issue
Block a user