import { and, desc, eq, gte, inArray, lt, sql } from "drizzle-orm"; import type { ComputedDefinition } from "@/lib/generated"; import type { DetailFinancialRow, Filing, FinancialStatementKind, MetricValidationResult, NormalizationSummary, StructuredKpiRow, SurfaceDetailMap, SurfaceFinancialRow, TaxonomyDimensionMember, TaxonomyFactRow, TaxonomyStatementRow, } from "@/lib/types"; import { db, getSqliteClient } from "@/lib/server/db"; import { withFinancialIngestionSchemaRetry } from "@/lib/server/db/financial-ingestion-schema"; import { filingTaxonomyAsset, filingTaxonomyConcept, filingTaxonomyContext, filingTaxonomyFact, filingTaxonomyMetricValidation, filingTaxonomySnapshot, } from "@/lib/server/db/schema"; export type FilingTaxonomyParseStatus = "ready" | "partial" | "failed"; export type FilingTaxonomySource = | "xbrl_instance" | "xbrl_instance_with_linkbase" | "legacy_html_fallback"; export type FilingTaxonomyAssetType = | "instance" | "schema" | "presentation" | "label" | "calculation" | "definition" | "pdf" | "other"; export type FilingTaxonomyPeriod = { id: string; filingId: number; accessionNumber: string; filingDate: string; periodStart: string | null; periodEnd: string | null; filingType: "10-K" | "10-Q"; periodLabel: string; }; export type FilingTaxonomySnapshotRecord = { id: number; filing_id: number; ticker: string; filing_date: string; filing_type: "10-K" | "10-Q"; parse_status: FilingTaxonomyParseStatus; parse_error: string | null; source: FilingTaxonomySource; parser_engine: string; parser_version: string; taxonomy_regime: "us-gaap" | "ifrs-full" | "unknown"; fiscal_pack: string | null; periods: FilingTaxonomyPeriod[]; faithful_rows: Record; statement_rows: Record; surface_rows: Record; detail_rows: Record; kpi_rows: StructuredKpiRow[]; computed_definitions: ComputedDefinition[]; derived_metrics: Filing["metrics"]; validation_result: MetricValidationResult | null; normalization_summary: NormalizationSummary | null; issuer_overlay_revision_id: number | null; facts_count: number; concepts_count: number; dimensions_count: number; created_at: string; updated_at: string; }; type FilingTaxonomyContextRecord = { id: number; snapshot_id: number; context_id: string; entity_identifier: string | null; entity_scheme: string | null; period_start: string | null; period_end: string | null; period_instant: string | null; segment_json: Record | null; scenario_json: Record | null; created_at: string; }; type FilingTaxonomyAssetRecord = { id: number; snapshot_id: number; asset_type: FilingTaxonomyAssetType; name: string; url: string; size_bytes: number | null; score: number | null; is_selected: boolean; created_at: string; }; export type FilingTaxonomyConceptRecord = { id: number; snapshot_id: number; concept_key: string; qname: string; namespace_uri: string; local_name: string; label: string | null; is_extension: boolean; balance: string | null; period_type: string | null; data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; authoritative_concept_key: string | null; mapping_method: string | null; surface_key: string | null; detail_parent_surface_key: string | null; kpi_key: string | null; residual_flag: boolean; presentation_order: number | null; presentation_depth: number | null; parent_concept_key: string | null; is_abstract: boolean; created_at: string; }; type FilingTaxonomyFactRecord = { id: number; snapshot_id: number; concept_key: string; qname: string; namespace_uri: string; local_name: string; data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; authoritative_concept_key: string | null; mapping_method: string | null; surface_key: string | null; detail_parent_surface_key: string | null; kpi_key: string | null; residual_flag: boolean; context_id: string; unit: string | null; decimals: string | null; precision: string | null; nil: boolean; value_num: number; period_start: string | null; period_end: string | null; period_instant: string | null; dimensions: TaxonomyDimensionMember[]; is_dimensionless: boolean; source_file: string | null; created_at: string; }; type FilingTaxonomyMetricValidationRecord = { id: number; snapshot_id: number; metric_key: keyof NonNullable; taxonomy_value: number | null; llm_value: number | null; absolute_diff: number | null; relative_diff: number | null; status: "not_run" | "matched" | "mismatch" | "error"; evidence_pages: number[]; pdf_url: string | null; provider: string | null; model: string | null; error: string | null; created_at: string; updated_at: string; }; type UpsertFilingTaxonomySnapshotInput = { filing_id: number; ticker: string; filing_date: string; filing_type: "10-K" | "10-Q"; parse_status: FilingTaxonomyParseStatus; parse_error: string | null; source: FilingTaxonomySource; parser_engine: string; parser_version: string; taxonomy_regime: "us-gaap" | "ifrs-full" | "unknown"; fiscal_pack: string | null; periods: FilingTaxonomyPeriod[]; faithful_rows: Record; statement_rows: Record; surface_rows: Record; detail_rows: Record; kpi_rows: StructuredKpiRow[]; computed_definitions: ComputedDefinition[]; derived_metrics: Filing["metrics"]; validation_result: MetricValidationResult | null; normalization_summary: NormalizationSummary | null; issuer_overlay_revision_id?: number | null; facts_count: number; concepts_count: number; dimensions_count: number; contexts: Array<{ context_id: string; entity_identifier: string | null; entity_scheme: string | null; period_start: string | null; period_end: string | null; period_instant: string | null; segment_json: Record | null; scenario_json: Record | null; }>; assets: Array<{ asset_type: FilingTaxonomyAssetType; name: string; url: string; size_bytes: number | null; score: number | null; is_selected: boolean; }>; concepts: Array<{ concept_key: string; qname: string; namespace_uri: string; local_name: string; label: string | null; is_extension: boolean; balance: string | null; period_type: string | null; data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; authoritative_concept_key: string | null; mapping_method: string | null; surface_key: string | null; detail_parent_surface_key: string | null; kpi_key: string | null; residual_flag: boolean; presentation_order: number | null; presentation_depth: number | null; parent_concept_key: string | null; is_abstract: boolean; }>; facts: Array<{ concept_key: string; qname: string; namespace_uri: string; local_name: string; data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; authoritative_concept_key: string | null; mapping_method: string | null; surface_key: string | null; detail_parent_surface_key: string | null; kpi_key: string | null; residual_flag: boolean; context_id: string; unit: string | null; decimals: string | null; precision: string | null; nil: boolean; value_num: number; period_start: string | null; period_end: string | null; period_instant: string | null; dimensions: TaxonomyDimensionMember[]; is_dimensionless: boolean; source_file: string | null; }>; metric_validations: Array<{ metric_key: keyof NonNullable; taxonomy_value: number | null; llm_value: number | null; absolute_diff: number | null; relative_diff: number | null; status: "not_run" | "matched" | "mismatch" | "error"; evidence_pages: number[]; pdf_url: string | null; provider: string | null; model: string | null; error: string | null; }>; }; const FINANCIAL_STATEMENT_KINDS = [ "income", "balance", "cash_flow", "disclosure", "equity", "comprehensive_income", ] as const satisfies FinancialStatementKind[]; type StatementRowMap = Record; type SurfaceRowMap = Record; type DetailRowMap = Record; function tenYearsAgoIso() { const date = new Date(); date.setUTCFullYear(date.getUTCFullYear() - 10); return date.toISOString().slice(0, 10); } function asNumber(value: unknown) { if (typeof value === "number") { return Number.isFinite(value) ? value : null; } if (typeof value === "string") { const parsed = Number(value); return Number.isFinite(parsed) ? parsed : null; } return null; } function asNumericText(value: number | null) { if (value === null || !Number.isFinite(value)) { return null; } return String(value); } function asObject(value: unknown) { return value !== null && typeof value === "object" && !Array.isArray(value) ? (value as Record) : null; } function asString(value: unknown) { return typeof value === "string" ? value : null; } function asNullableString(value: unknown) { return typeof value === "string" ? value : value === null ? null : null; } function asBoolean(value: unknown) { return typeof value === "boolean" ? value : Boolean(value); } function asStatementKind(value: unknown): FinancialStatementKind | null { return value === "income" || value === "balance" || value === "cash_flow" || value === "disclosure" || value === "equity" || value === "comprehensive_income" ? value : null; } function normalizeNumberMap(value: unknown) { const object = asObject(value); if (!object) { return {}; } return Object.fromEntries( Object.entries(object).map(([key, entry]) => [key, asNumber(entry)]), ); } function normalizeNullableStringMap(value: unknown) { const object = asObject(value); if (!object) { return {}; } return Object.fromEntries( Object.entries(object).map(([key, entry]) => [ key, asNullableString(entry), ]), ); } function normalizeStringArray(value: unknown) { return Array.isArray(value) ? value.filter((entry): entry is string => typeof entry === "string") : []; } function normalizeNumberArray(value: unknown) { if (!Array.isArray(value)) { return []; } return value .map((entry) => asNumber(entry)) .filter((entry): entry is number => entry !== null); } function normalizePeriods(value: unknown): FilingTaxonomyPeriod[] { if (!Array.isArray(value)) { return []; } return value .map((entry) => { const row = asObject(entry); if (!row) { return null; } const id = asString(row.id); const filingId = asNumber(row.filingId ?? row.filing_id); const accessionNumber = asString( row.accessionNumber ?? row.accession_number, ); const filingDate = asString(row.filingDate ?? row.filing_date); const filingType = row.filingType === "10-K" || row.filing_type === "10-K" ? "10-K" : row.filingType === "10-Q" || row.filing_type === "10-Q" ? "10-Q" : null; const periodLabel = asString(row.periodLabel ?? row.period_label); if ( !id || filingId === null || !accessionNumber || !filingDate || !filingType || !periodLabel ) { return null; } return { id, filingId, accessionNumber, filingDate, periodStart: asNullableString(row.periodStart ?? row.period_start), periodEnd: asNullableString(row.periodEnd ?? row.period_end), filingType, periodLabel, } satisfies FilingTaxonomyPeriod; }) .filter((entry): entry is FilingTaxonomyPeriod => entry !== null); } function normalizeStatementRows( value: unknown, fallbackRows: StatementRowMap = emptyStatementRows(), ): StatementRowMap { const object = asObject(value); if (!object) { return fallbackRows; } const normalized = emptyStatementRows(); for (const statement of FINANCIAL_STATEMENT_KINDS) { const rows = Array.isArray(object[statement]) ? object[statement] : []; normalized[statement] = rows .map((entry) => { const row = asObject(entry); if (!row) { return null; } const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key); const label = asString(row.label); const conceptKey = asString(row.conceptKey ?? row.concept_key); const qname = asString(row.qname); const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri); const localName = asString(row.localName ?? row.local_name); if ( !key || !label || !conceptKey || !qname || !namespaceUri || !localName ) { return null; } return { key, label, conceptKey, qname, namespaceUri, localName, isExtension: asBoolean(row.isExtension ?? row.is_extension), statement: asStatementKind(row.statement) ?? statement, roleUri: asNullableString(row.roleUri ?? row.role_uri), order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER, depth: asNumber(row.depth) ?? 0, parentKey: asNullableString(row.parentKey ?? row.parent_key), values: normalizeNumberMap(row.values), units: normalizeNullableStringMap(row.units), hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions), sourceFactIds: normalizeNumberArray( row.sourceFactIds ?? row.source_fact_ids, ), }; }) .filter((entry): entry is TaxonomyStatementRow => entry !== null); } return normalized; } function normalizeSurfaceRows( value: unknown, fallbackRows: SurfaceRowMap = emptySurfaceRows(), ): SurfaceRowMap { const object = asObject(value); if (!object) { return fallbackRows; } const normalized = emptySurfaceRows(); for (const statement of FINANCIAL_STATEMENT_KINDS) { const rows = Array.isArray(object[statement]) ? object[statement] : []; normalized[statement] = rows .map((entry) => { const row = asObject(entry); if (!row) { return null; } const key = asString(row.key); const label = asString(row.label); const category = asString(row.category); const unit = asString(row.unit); if (!key || !label || !category || !unit) { return null; } const normalizedStatement = asStatementKind(row.statement); const resolutionMethod = row.resolutionMethod ?? row.resolution_method; const confidence = row.confidence; const normalizedRow: SurfaceFinancialRow = { key, label, category: category as SurfaceFinancialRow["category"], order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER, unit: unit as SurfaceFinancialRow["unit"], values: normalizeNumberMap(row.values), sourceConcepts: normalizeStringArray( row.sourceConcepts ?? row.source_concepts, ), sourceRowKeys: normalizeStringArray( row.sourceRowKeys ?? row.source_row_keys, ), sourceFactIds: normalizeNumberArray( row.sourceFactIds ?? row.source_fact_ids, ), formulaKey: asNullableString(row.formulaKey ?? row.formula_key), hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions), resolvedSourceRowKeys: normalizeNullableStringMap( row.resolvedSourceRowKeys ?? row.resolved_source_row_keys, ), }; const templateSection = asString( row.templateSection ?? row.template_section, ); if (templateSection) { normalizedRow.templateSection = templateSection as SurfaceFinancialRow["templateSection"]; } if ( normalizedStatement === "income" || normalizedStatement === "balance" || normalizedStatement === "cash_flow" || normalizedStatement === "equity" || normalizedStatement === "disclosure" ) { normalizedRow.statement = normalizedStatement; } const detailCount = asNumber(row.detailCount ?? row.detail_count); if (detailCount !== null) { normalizedRow.detailCount = detailCount; } if ( resolutionMethod === "direct" || resolutionMethod === "surface_bridge" || resolutionMethod === "formula_derived" || resolutionMethod === "not_meaningful" ) { normalizedRow.resolutionMethod = resolutionMethod; } if ( confidence === "high" || confidence === "medium" || confidence === "low" ) { normalizedRow.confidence = confidence; } const warningCodes = normalizeStringArray( row.warningCodes ?? row.warning_codes, ); if (warningCodes.length > 0) { normalizedRow.warningCodes = warningCodes; } return normalizedRow; }) .filter((entry): entry is SurfaceFinancialRow => entry !== null); } return normalized; } function normalizeDetailRows( value: unknown, fallbackRows: DetailRowMap = emptyDetailRows(), ): DetailRowMap { const object = asObject(value); if (!object) { return fallbackRows; } const normalized = emptyDetailRows(); for (const statement of FINANCIAL_STATEMENT_KINDS) { const groups = asObject(object[statement]) ?? {}; normalized[statement] = Object.fromEntries( Object.entries(groups).map(([surfaceKey, rows]) => { const normalizedRows = Array.isArray(rows) ? rows .map((entry) => { const row = asObject(entry); if (!row) { return null; } const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key); const label = asString(row.label); const conceptKey = asString(row.conceptKey ?? row.concept_key); const qname = asString(row.qname); const namespaceUri = asString( row.namespaceUri ?? row.namespace_uri, ); const localName = asString(row.localName ?? row.local_name); if ( !key || !label || !conceptKey || !qname || !namespaceUri || !localName ) { return null; } return { key, parentSurfaceKey: asString(row.parentSurfaceKey ?? row.parent_surface_key) ?? surfaceKey, label, conceptKey, qname, namespaceUri, localName, unit: asNullableString(row.unit), values: normalizeNumberMap(row.values), sourceFactIds: normalizeNumberArray( row.sourceFactIds ?? row.source_fact_ids, ), isExtension: asBoolean(row.isExtension ?? row.is_extension), dimensionsSummary: normalizeStringArray( row.dimensionsSummary ?? row.dimensions_summary, ), residualFlag: asBoolean( row.residualFlag ?? row.residual_flag, ), }; }) .filter((entry): entry is DetailFinancialRow => entry !== null) : []; return [surfaceKey, normalizedRows]; }), ); } return normalized; } function normalizeKpiRows(value: unknown) { if (!Array.isArray(value)) { return []; } return value .map((entry) => { const row = asObject(entry); if (!row) { return null; } const key = asString(row.key); const label = asString(row.label); const category = asString(row.category); const unit = asString(row.unit); const provenanceType = row.provenanceType ?? row.provenance_type; if ( !key || !label || !category || !unit || (provenanceType !== "taxonomy" && provenanceType !== "structured_note") ) { return null; } return { key, label, category: category as StructuredKpiRow["category"], unit: unit as StructuredKpiRow["unit"], order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER, segment: asNullableString(row.segment), axis: asNullableString(row.axis), member: asNullableString(row.member), values: normalizeNumberMap(row.values), sourceConcepts: normalizeStringArray( row.sourceConcepts ?? row.source_concepts, ), sourceFactIds: normalizeNumberArray( row.sourceFactIds ?? row.source_fact_ids, ), provenanceType, hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions), } satisfies StructuredKpiRow; }) .filter((entry): entry is StructuredKpiRow => entry !== null); } function normalizeComputedDefinitions(value: unknown): ComputedDefinition[] { if (!Array.isArray(value)) { return []; } return value .map((entry) => { const row = asObject(entry); if (!row) { return null; } const key = asString(row.key); const label = asString(row.label); const category = asString(row.category); const unit = asString(row.unit); const computation = asObject(row.computation); const computationType = asString(computation?.type); if ( !key || !label || !category || !unit || !computation || !computationType ) { return null; } const normalizedComputation = (() => { if (computationType === "ratio") { const numerator = asString(computation.numerator); const denominator = asString(computation.denominator); return numerator && denominator ? ({ type: "ratio", numerator, denominator } as const) : null; } if (computationType === "yoy_growth") { const source = asString(computation.source); return source ? ({ type: "yoy_growth", source } as const) : null; } if (computationType === "cagr") { const source = asString(computation.source); const years = asNumber(computation.years); return source && years !== null ? ({ type: "cagr", source, years } as const) : null; } if (computationType === "per_share") { const source = asString(computation.source); const shares_key = asString( computation.shares_key ?? computation.sharesKey, ); return source && shares_key ? ({ type: "per_share", source, shares_key } as const) : null; } if (computationType === "simple") { const formula = asString(computation.formula); return formula ? ({ type: "simple", formula } as const) : null; } return null; })(); if (!normalizedComputation) { return null; } const normalizedDefinition: ComputedDefinition = { key, label, category, order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER, unit: unit as ComputedDefinition["unit"], computation: normalizedComputation, supported_cadences: normalizeStringArray( row.supported_cadences ?? row.supportedCadences, ) as ComputedDefinition["supported_cadences"], requires_external_data: normalizeStringArray( row.requires_external_data ?? row.requiresExternalData, ), }; return normalizedDefinition; }) .filter((entry): entry is ComputedDefinition => entry !== null); } function normalizeNormalizationSummary(value: unknown) { const row = asObject(value); if (!row) { return null; } return { surfaceRowCount: asNumber(row.surfaceRowCount ?? row.surface_row_count) ?? 0, detailRowCount: asNumber(row.detailRowCount ?? row.detail_row_count) ?? 0, kpiRowCount: asNumber(row.kpiRowCount ?? row.kpi_row_count) ?? 0, unmappedRowCount: asNumber(row.unmappedRowCount ?? row.unmapped_row_count) ?? 0, materialUnmappedRowCount: asNumber( row.materialUnmappedRowCount ?? row.material_unmapped_row_count, ) ?? 0, residualPrimaryCount: asNumber(row.residualPrimaryCount ?? row.residual_primary_count) ?? 0, residualDisclosureCount: asNumber(row.residualDisclosureCount ?? row.residual_disclosure_count) ?? 0, unsupportedConceptCount: asNumber(row.unsupportedConceptCount ?? row.unsupported_concept_count) ?? 0, issuerOverlayMatchCount: asNumber(row.issuerOverlayMatchCount ?? row.issuer_overlay_match_count) ?? 0, warnings: normalizeStringArray(row.warnings), } satisfies NormalizationSummary; } export function normalizeFilingTaxonomySnapshotPayload(input: { periods: unknown; faithful_rows: unknown; statement_rows: unknown; surface_rows: unknown; detail_rows: unknown; kpi_rows: unknown; computed_definitions: unknown; normalization_summary: unknown; }) { const faithfulRows = normalizeStatementRows(input.faithful_rows); const statementRows = normalizeStatementRows( input.statement_rows, faithfulRows, ); return { periods: normalizePeriods(input.periods), faithful_rows: faithfulRows, statement_rows: statementRows, surface_rows: normalizeSurfaceRows(input.surface_rows), detail_rows: normalizeDetailRows(input.detail_rows), kpi_rows: normalizeKpiRows(input.kpi_rows), computed_definitions: normalizeComputedDefinitions( input.computed_definitions, ), normalization_summary: normalizeNormalizationSummary( input.normalization_summary, ), }; } function emptyStatementRows(): StatementRowMap { return { income: [], balance: [], cash_flow: [], disclosure: [], equity: [], comprehensive_income: [], }; } function emptySurfaceRows(): SurfaceRowMap { return { income: [], balance: [], cash_flow: [], disclosure: [], equity: [], comprehensive_income: [], }; } function emptyDetailRows(): DetailRowMap { return { income: {}, balance: {}, cash_flow: {}, disclosure: {}, equity: {}, comprehensive_income: {}, }; } function toSnapshotRecord( row: typeof filingTaxonomySnapshot.$inferSelect, ): FilingTaxonomySnapshotRecord { const normalized = normalizeFilingTaxonomySnapshotPayload({ periods: row.periods, faithful_rows: row.faithful_rows, statement_rows: row.statement_rows, surface_rows: row.surface_rows, detail_rows: row.detail_rows, kpi_rows: row.kpi_rows, computed_definitions: row.computed_definitions, normalization_summary: row.normalization_summary, }); return { id: row.id, filing_id: row.filing_id, ticker: row.ticker, filing_date: row.filing_date, filing_type: row.filing_type, parse_status: row.parse_status, parse_error: row.parse_error, source: row.source, parser_engine: row.parser_engine, parser_version: row.parser_version, taxonomy_regime: row.taxonomy_regime, fiscal_pack: row.fiscal_pack, periods: normalized.periods, faithful_rows: normalized.faithful_rows, statement_rows: normalized.statement_rows, surface_rows: normalized.surface_rows, detail_rows: normalized.detail_rows, kpi_rows: normalized.kpi_rows, computed_definitions: normalized.computed_definitions, derived_metrics: row.derived_metrics ?? null, validation_result: row.validation_result ?? null, normalization_summary: normalized.normalization_summary, issuer_overlay_revision_id: row.issuer_overlay_revision_id ?? null, facts_count: row.facts_count, concepts_count: row.concepts_count, dimensions_count: row.dimensions_count, created_at: row.created_at, updated_at: row.updated_at, }; } function toContextRecord( row: typeof filingTaxonomyContext.$inferSelect, ): FilingTaxonomyContextRecord { return { id: row.id, snapshot_id: row.snapshot_id, context_id: row.context_id, entity_identifier: row.entity_identifier, entity_scheme: row.entity_scheme, period_start: row.period_start, period_end: row.period_end, period_instant: row.period_instant, segment_json: row.segment_json ?? null, scenario_json: row.scenario_json ?? null, created_at: row.created_at, }; } function toAssetRecord( row: typeof filingTaxonomyAsset.$inferSelect, ): FilingTaxonomyAssetRecord { return { id: row.id, snapshot_id: row.snapshot_id, asset_type: row.asset_type, name: row.name, url: row.url, size_bytes: row.size_bytes, score: asNumber(row.score), is_selected: row.is_selected, created_at: row.created_at, }; } function toConceptRecord( row: typeof filingTaxonomyConcept.$inferSelect, ): FilingTaxonomyConceptRecord { return { id: row.id, snapshot_id: row.snapshot_id, concept_key: row.concept_key, qname: row.qname, namespace_uri: row.namespace_uri, local_name: row.local_name, label: row.label, is_extension: row.is_extension, balance: row.balance, period_type: row.period_type, data_type: row.data_type, statement_kind: row.statement_kind ?? null, role_uri: row.role_uri, authoritative_concept_key: row.authoritative_concept_key, mapping_method: row.mapping_method, surface_key: row.surface_key, detail_parent_surface_key: row.detail_parent_surface_key, kpi_key: row.kpi_key, residual_flag: row.residual_flag, presentation_order: asNumber(row.presentation_order), presentation_depth: row.presentation_depth, parent_concept_key: row.parent_concept_key, is_abstract: row.is_abstract, created_at: row.created_at, }; } function toFactRecord( row: typeof filingTaxonomyFact.$inferSelect, ): FilingTaxonomyFactRecord { const value = asNumber(row.value_num); if (value === null) { throw new Error(`Invalid value_num for taxonomy fact row ${row.id}`); } return { id: row.id, snapshot_id: row.snapshot_id, concept_key: row.concept_key, qname: row.qname, namespace_uri: row.namespace_uri, local_name: row.local_name, data_type: row.data_type, statement_kind: row.statement_kind ?? null, role_uri: row.role_uri, authoritative_concept_key: row.authoritative_concept_key, mapping_method: row.mapping_method, surface_key: row.surface_key, detail_parent_surface_key: row.detail_parent_surface_key, kpi_key: row.kpi_key, residual_flag: row.residual_flag, context_id: row.context_id, unit: row.unit, decimals: row.decimals, precision: row.precision, nil: row.nil, value_num: value, period_start: row.period_start, period_end: row.period_end, period_instant: row.period_instant, dimensions: row.dimensions, is_dimensionless: row.is_dimensionless, source_file: row.source_file, created_at: row.created_at, }; } function toMetricValidationRecord( row: typeof filingTaxonomyMetricValidation.$inferSelect, ): FilingTaxonomyMetricValidationRecord { return { id: row.id, snapshot_id: row.snapshot_id, metric_key: row.metric_key, taxonomy_value: asNumber(row.taxonomy_value), llm_value: asNumber(row.llm_value), absolute_diff: asNumber(row.absolute_diff), relative_diff: asNumber(row.relative_diff), status: row.status, evidence_pages: row.evidence_pages ?? [], pdf_url: row.pdf_url, provider: row.provider, model: row.model, error: row.error, created_at: row.created_at, updated_at: row.updated_at, }; } export async function getFilingTaxonomySnapshotByFilingId(filingId: number) { const [row] = await db .select() .from(filingTaxonomySnapshot) .where(eq(filingTaxonomySnapshot.filing_id, filingId)) .limit(1); return row ? toSnapshotRecord(row) : null; } async function listFilingTaxonomyAssets(snapshotId: number) { const rows = await db .select() .from(filingTaxonomyAsset) .where(eq(filingTaxonomyAsset.snapshot_id, snapshotId)) .orderBy(desc(filingTaxonomyAsset.id)); return rows.map(toAssetRecord); } async function listFilingTaxonomyContexts(snapshotId: number) { const rows = await db .select() .from(filingTaxonomyContext) .where(eq(filingTaxonomyContext.snapshot_id, snapshotId)) .orderBy(desc(filingTaxonomyContext.id)); return rows.map(toContextRecord); } async function listFilingTaxonomyConcepts(snapshotId: number) { const rows = await db .select() .from(filingTaxonomyConcept) .where(eq(filingTaxonomyConcept.snapshot_id, snapshotId)) .orderBy(desc(filingTaxonomyConcept.id)); return rows.map(toConceptRecord); } async function listFilingTaxonomyFacts(snapshotId: number) { const rows = await db .select() .from(filingTaxonomyFact) .where(eq(filingTaxonomyFact.snapshot_id, snapshotId)) .orderBy(desc(filingTaxonomyFact.id)); return rows.map(toFactRecord); } async function listFilingTaxonomyMetricValidations(snapshotId: number) { const rows = await db .select() .from(filingTaxonomyMetricValidation) .where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId)) .orderBy(desc(filingTaxonomyMetricValidation.id)); return rows.map(toMetricValidationRecord); } export async function upsertFilingTaxonomySnapshot( input: UpsertFilingTaxonomySnapshotInput, ) { const now = new Date().toISOString(); const normalized = normalizeFilingTaxonomySnapshotPayload(input); return db.transaction(async (tx) => { const [saved] = await tx .insert(filingTaxonomySnapshot) .values({ filing_id: input.filing_id, ticker: input.ticker, filing_date: input.filing_date, filing_type: input.filing_type, parse_status: input.parse_status, parse_error: input.parse_error, source: input.source, parser_engine: input.parser_engine, parser_version: input.parser_version, taxonomy_regime: input.taxonomy_regime, fiscal_pack: input.fiscal_pack, periods: normalized.periods, faithful_rows: normalized.faithful_rows, statement_rows: normalized.statement_rows, surface_rows: normalized.surface_rows, detail_rows: normalized.detail_rows, kpi_rows: normalized.kpi_rows, computed_definitions: normalized.computed_definitions, derived_metrics: input.derived_metrics, validation_result: input.validation_result, normalization_summary: normalized.normalization_summary, issuer_overlay_revision_id: input.issuer_overlay_revision_id ?? null, facts_count: input.facts_count, concepts_count: input.concepts_count, dimensions_count: input.dimensions_count, created_at: now, updated_at: now, }) .onConflictDoUpdate({ target: filingTaxonomySnapshot.filing_id, set: { ticker: input.ticker, filing_date: input.filing_date, filing_type: input.filing_type, parse_status: input.parse_status, parse_error: input.parse_error, source: input.source, parser_engine: input.parser_engine, parser_version: input.parser_version, taxonomy_regime: input.taxonomy_regime, fiscal_pack: input.fiscal_pack, periods: normalized.periods, faithful_rows: normalized.faithful_rows, statement_rows: normalized.statement_rows, surface_rows: normalized.surface_rows, detail_rows: normalized.detail_rows, kpi_rows: normalized.kpi_rows, computed_definitions: normalized.computed_definitions, derived_metrics: input.derived_metrics, validation_result: input.validation_result, normalization_summary: normalized.normalization_summary, issuer_overlay_revision_id: input.issuer_overlay_revision_id ?? null, facts_count: input.facts_count, concepts_count: input.concepts_count, dimensions_count: input.dimensions_count, updated_at: now, }, }) .returning(); const snapshotId = saved.id; try { await tx .delete(filingTaxonomyAsset) .where(eq(filingTaxonomyAsset.snapshot_id, snapshotId)); await tx .delete(filingTaxonomyContext) .where(eq(filingTaxonomyContext.snapshot_id, snapshotId)); await tx .delete(filingTaxonomyConcept) .where(eq(filingTaxonomyConcept.snapshot_id, snapshotId)); await tx .delete(filingTaxonomyFact) .where(eq(filingTaxonomyFact.snapshot_id, snapshotId)); await tx .delete(filingTaxonomyMetricValidation) .where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId)); } catch (error) { throw new Error( `Failed to delete child records for snapshot ${snapshotId}: ${error}`, ); } if (input.contexts.length > 0) { try { await tx.insert(filingTaxonomyContext).values( input.contexts.map((context) => ({ snapshot_id: snapshotId, context_id: context.context_id, entity_identifier: context.entity_identifier, entity_scheme: context.entity_scheme, period_start: context.period_start, period_end: context.period_end, period_instant: context.period_instant, segment_json: context.segment_json, scenario_json: context.scenario_json, created_at: now, })), ); } catch (error) { throw new Error( `Failed to insert ${input.contexts.length} contexts for snapshot ${snapshotId}: ${error}`, ); } } if (input.assets.length > 0) { try { await tx.insert(filingTaxonomyAsset).values( input.assets.map((asset) => ({ snapshot_id: snapshotId, asset_type: asset.asset_type, name: asset.name, url: asset.url, size_bytes: asset.size_bytes, score: asNumericText(asset.score), is_selected: asset.is_selected, created_at: now, })), ); } catch (error) { throw new Error( `Failed to insert ${input.assets.length} assets for snapshot ${snapshotId}: ${error}`, ); } } if (input.concepts.length > 0) { try { await tx.insert(filingTaxonomyConcept).values( input.concepts.map((concept) => ({ snapshot_id: snapshotId, concept_key: concept.concept_key, qname: concept.qname, namespace_uri: concept.namespace_uri, local_name: concept.local_name, label: concept.label, is_extension: concept.is_extension, balance: concept.balance, period_type: concept.period_type, data_type: concept.data_type, statement_kind: concept.statement_kind, role_uri: concept.role_uri, authoritative_concept_key: concept.authoritative_concept_key, mapping_method: concept.mapping_method, surface_key: concept.surface_key, detail_parent_surface_key: concept.detail_parent_surface_key, kpi_key: concept.kpi_key, residual_flag: concept.residual_flag, presentation_order: asNumericText(concept.presentation_order), presentation_depth: concept.presentation_depth, parent_concept_key: concept.parent_concept_key, is_abstract: concept.is_abstract, created_at: now, })), ); } catch (error) { throw new Error( `Failed to insert ${input.concepts.length} concepts for snapshot ${snapshotId}: ${error}`, ); } } if (input.facts.length > 0) { try { await tx.insert(filingTaxonomyFact).values( input.facts.map((fact) => ({ snapshot_id: snapshotId, concept_key: fact.concept_key, qname: fact.qname, namespace_uri: fact.namespace_uri, local_name: fact.local_name, data_type: fact.data_type, statement_kind: fact.statement_kind, role_uri: fact.role_uri, authoritative_concept_key: fact.authoritative_concept_key, mapping_method: fact.mapping_method, surface_key: fact.surface_key, detail_parent_surface_key: fact.detail_parent_surface_key, kpi_key: fact.kpi_key, residual_flag: fact.residual_flag, context_id: fact.context_id, unit: fact.unit, decimals: fact.decimals, precision: fact.precision, nil: fact.nil, value_num: String(fact.value_num), period_start: fact.period_start, period_end: fact.period_end, period_instant: fact.period_instant, dimensions: fact.dimensions, is_dimensionless: fact.is_dimensionless, source_file: fact.source_file, created_at: now, })), ); } catch (error) { throw new Error( `Failed to insert ${input.facts.length} facts for snapshot ${snapshotId}: ${error}`, ); } } if (input.metric_validations.length > 0) { try { await tx.insert(filingTaxonomyMetricValidation).values( input.metric_validations.map((check) => ({ snapshot_id: snapshotId, metric_key: check.metric_key, taxonomy_value: asNumericText(check.taxonomy_value), llm_value: asNumericText(check.llm_value), absolute_diff: asNumericText(check.absolute_diff), relative_diff: asNumericText(check.relative_diff), status: check.status, evidence_pages: check.evidence_pages, pdf_url: check.pdf_url, provider: check.provider, model: check.model, error: check.error, created_at: now, updated_at: now, })), ); } catch (error) { throw new Error( `Failed to insert ${input.metric_validations.length} metric validations for snapshot ${snapshotId}: ${error}`, ); } } return toSnapshotRecord(saved); }); } export async function listFilingTaxonomySnapshotsByTicker(input: { ticker: string; window: "10y" | "all"; filingTypes?: Array<"10-K" | "10-Q">; limit?: number; cursor?: string | null; }) { const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 40), 1), 120); const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null; const constraints = [ eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase()), ]; if (input.window === "10y") { constraints.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso())); } if (cursorId && Number.isFinite(cursorId) && cursorId > 0) { constraints.push(lt(filingTaxonomySnapshot.id, cursorId)); } if (input.filingTypes && input.filingTypes.length > 0) { constraints.push( inArray(filingTaxonomySnapshot.filing_type, input.filingTypes), ); } const rows = await db .select() .from(filingTaxonomySnapshot) .where(and(...constraints)) .orderBy( desc(filingTaxonomySnapshot.filing_date), desc(filingTaxonomySnapshot.id), ) .limit(safeLimit + 1); const hasMore = rows.length > safeLimit; const usedRows = hasMore ? rows.slice(0, safeLimit) : rows; const nextCursor = hasMore ? String(usedRows[usedRows.length - 1]?.id ?? "") : null; return { snapshots: usedRows.map(toSnapshotRecord), nextCursor, }; } export async function countFilingTaxonomySnapshotStatuses(ticker: string) { const rows = await db .select({ status: filingTaxonomySnapshot.parse_status, count: sql`count(*)`, }) .from(filingTaxonomySnapshot) .where(eq(filingTaxonomySnapshot.ticker, ticker.trim().toUpperCase())) .groupBy(filingTaxonomySnapshot.parse_status); return rows.reduce>( (acc, row) => { acc[row.status] = Number(row.count); return acc; }, { ready: 0, partial: 0, failed: 0, }, ); } export async function listTaxonomyFactsByTicker(input: { ticker: string; window: "10y" | "all"; statement?: FinancialStatementKind; filingTypes?: Array<"10-K" | "10-Q">; cursor?: string | null; limit?: number; }) { const safeLimit = Math.min( Math.max(Math.trunc(input.limit ?? 500), 1), 10000, ); const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null; const conditions = [ eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase()), ]; if (input.window === "10y") { conditions.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso())); } if (input.statement) { conditions.push(eq(filingTaxonomyFact.statement_kind, input.statement)); } if (input.filingTypes && input.filingTypes.length > 0) { conditions.push( inArray(filingTaxonomySnapshot.filing_type, input.filingTypes), ); } if (cursorId && Number.isFinite(cursorId) && cursorId > 0) { conditions.push(lt(filingTaxonomyFact.id, cursorId)); } const rows = await db .select({ id: filingTaxonomyFact.id, snapshot_id: filingTaxonomyFact.snapshot_id, filing_id: filingTaxonomySnapshot.filing_id, filing_date: filingTaxonomySnapshot.filing_date, statement_kind: filingTaxonomyFact.statement_kind, role_uri: filingTaxonomyFact.role_uri, concept_key: filingTaxonomyFact.concept_key, qname: filingTaxonomyFact.qname, namespace_uri: filingTaxonomyFact.namespace_uri, local_name: filingTaxonomyFact.local_name, value_num: filingTaxonomyFact.value_num, context_id: filingTaxonomyFact.context_id, unit: filingTaxonomyFact.unit, decimals: filingTaxonomyFact.decimals, period_start: filingTaxonomyFact.period_start, period_end: filingTaxonomyFact.period_end, period_instant: filingTaxonomyFact.period_instant, dimensions: filingTaxonomyFact.dimensions, is_dimensionless: filingTaxonomyFact.is_dimensionless, source_file: filingTaxonomyFact.source_file, }) .from(filingTaxonomyFact) .innerJoin( filingTaxonomySnapshot, eq(filingTaxonomyFact.snapshot_id, filingTaxonomySnapshot.id), ) .where(and(...conditions)) .orderBy(desc(filingTaxonomyFact.id)) .limit(safeLimit + 1); const hasMore = rows.length > safeLimit; const used = hasMore ? rows.slice(0, safeLimit) : rows; const nextCursor = hasMore ? String(used[used.length - 1]?.id ?? "") : null; const facts: TaxonomyFactRow[] = used.map((row) => { const value = asNumber(row.value_num); if (value === null) { throw new Error(`Invalid value_num in taxonomy fact ${row.id}`); } return { id: row.id, snapshotId: row.snapshot_id, filingId: row.filing_id, filingDate: row.filing_date, statement: row.statement_kind, roleUri: row.role_uri, conceptKey: row.concept_key, qname: row.qname, namespaceUri: row.namespace_uri, localName: row.local_name, value, contextId: row.context_id, unit: row.unit, decimals: row.decimals, periodStart: row.period_start, periodEnd: row.period_end, periodInstant: row.period_instant, dimensions: row.dimensions, isDimensionless: row.is_dimensionless, sourceFile: row.source_file, }; }); return { facts, nextCursor, }; } async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) { if (snapshotIds.length === 0) { return []; } const rows = await db .select() .from(filingTaxonomyAsset) .where(inArray(filingTaxonomyAsset.snapshot_id, snapshotIds)) .orderBy(desc(filingTaxonomyAsset.id)); return rows.map(toAssetRecord); } export async function listFilingTaxonomyConceptsBySnapshotIds( snapshotIds: number[], ) { if (snapshotIds.length === 0) { return []; } const rows = await db .select() .from(filingTaxonomyConcept) .where(inArray(filingTaxonomyConcept.snapshot_id, snapshotIds)) .orderBy(desc(filingTaxonomyConcept.id)); return rows.map(toConceptRecord); } export const __filingTaxonomyInternals = { normalizeFilingTaxonomySnapshotPayload, toSnapshotRecord, };