Files
Neon-Desk/lib/server/repos/filing-taxonomy.ts
francy51 14a7773504 Add consolidated disclosure statement type
Create unified disclosure statement to organize footnote disclosures
separate from primary financial statements. Disclosures are now grouped
by type (tax, debt, securities, derivatives, leases, intangibles, ma,
revenue, cash_flow) in a dedicated statement type for cleaner UI
presentation.
2026-03-16 18:54:23 -04:00

1600 lines
48 KiB
TypeScript

import { and, desc, eq, gte, inArray, lt, sql } from "drizzle-orm";
import type { ComputedDefinition } from "@/lib/generated";
import type {
DetailFinancialRow,
Filing,
FinancialStatementKind,
MetricValidationResult,
NormalizationSummary,
StructuredKpiRow,
SurfaceDetailMap,
SurfaceFinancialRow,
TaxonomyDimensionMember,
TaxonomyFactRow,
TaxonomyStatementRow,
} from "@/lib/types";
import { db, getSqliteClient } from "@/lib/server/db";
import { withFinancialIngestionSchemaRetry } from "@/lib/server/db/financial-ingestion-schema";
import {
filingTaxonomyAsset,
filingTaxonomyConcept,
filingTaxonomyContext,
filingTaxonomyFact,
filingTaxonomyMetricValidation,
filingTaxonomySnapshot,
} from "@/lib/server/db/schema";
export type FilingTaxonomyParseStatus = "ready" | "partial" | "failed";
export type FilingTaxonomySource =
| "xbrl_instance"
| "xbrl_instance_with_linkbase"
| "legacy_html_fallback";
export type FilingTaxonomyAssetType =
| "instance"
| "schema"
| "presentation"
| "label"
| "calculation"
| "definition"
| "pdf"
| "other";
export type FilingTaxonomyPeriod = {
id: string;
filingId: number;
accessionNumber: string;
filingDate: string;
periodStart: string | null;
periodEnd: string | null;
filingType: "10-K" | "10-Q";
periodLabel: string;
};
export type FilingTaxonomySnapshotRecord = {
id: number;
filing_id: number;
ticker: string;
filing_date: string;
filing_type: "10-K" | "10-Q";
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
parser_engine: string;
parser_version: string;
taxonomy_regime: "us-gaap" | "ifrs-full" | "unknown";
fiscal_pack: string | null;
periods: FilingTaxonomyPeriod[];
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
kpi_rows: StructuredKpiRow[];
computed_definitions: ComputedDefinition[];
derived_metrics: Filing["metrics"];
validation_result: MetricValidationResult | null;
normalization_summary: NormalizationSummary | null;
facts_count: number;
concepts_count: number;
dimensions_count: number;
created_at: string;
updated_at: string;
};
export type FilingTaxonomyContextRecord = {
id: number;
snapshot_id: number;
context_id: string;
entity_identifier: string | null;
entity_scheme: string | null;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
segment_json: Record<string, unknown> | null;
scenario_json: Record<string, unknown> | null;
created_at: string;
};
export type FilingTaxonomyAssetRecord = {
id: number;
snapshot_id: number;
asset_type: FilingTaxonomyAssetType;
name: string;
url: string;
size_bytes: number | null;
score: number | null;
is_selected: boolean;
created_at: string;
};
export type FilingTaxonomyConceptRecord = {
id: number;
snapshot_id: number;
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
label: string | null;
is_extension: boolean;
balance: string | null;
period_type: string | null;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
presentation_order: number | null;
presentation_depth: number | null;
parent_concept_key: string | null;
is_abstract: boolean;
created_at: string;
};
export type FilingTaxonomyFactRecord = {
id: number;
snapshot_id: number;
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
context_id: string;
unit: string | null;
decimals: string | null;
precision: string | null;
nil: boolean;
value_num: number;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
dimensions: TaxonomyDimensionMember[];
is_dimensionless: boolean;
source_file: string | null;
created_at: string;
};
export type FilingTaxonomyMetricValidationRecord = {
id: number;
snapshot_id: number;
metric_key: keyof NonNullable<Filing["metrics"]>;
taxonomy_value: number | null;
llm_value: number | null;
absolute_diff: number | null;
relative_diff: number | null;
status: "not_run" | "matched" | "mismatch" | "error";
evidence_pages: number[];
pdf_url: string | null;
provider: string | null;
model: string | null;
error: string | null;
created_at: string;
updated_at: string;
};
export type UpsertFilingTaxonomySnapshotInput = {
filing_id: number;
ticker: string;
filing_date: string;
filing_type: "10-K" | "10-Q";
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
parser_engine: string;
parser_version: string;
taxonomy_regime: "us-gaap" | "ifrs-full" | "unknown";
fiscal_pack: string | null;
periods: FilingTaxonomyPeriod[];
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
kpi_rows: StructuredKpiRow[];
computed_definitions: ComputedDefinition[];
derived_metrics: Filing["metrics"];
validation_result: MetricValidationResult | null;
normalization_summary: NormalizationSummary | null;
facts_count: number;
concepts_count: number;
dimensions_count: number;
contexts: Array<{
context_id: string;
entity_identifier: string | null;
entity_scheme: string | null;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
segment_json: Record<string, unknown> | null;
scenario_json: Record<string, unknown> | null;
}>;
assets: Array<{
asset_type: FilingTaxonomyAssetType;
name: string;
url: string;
size_bytes: number | null;
score: number | null;
is_selected: boolean;
}>;
concepts: Array<{
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
label: string | null;
is_extension: boolean;
balance: string | null;
period_type: string | null;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
presentation_order: number | null;
presentation_depth: number | null;
parent_concept_key: string | null;
is_abstract: boolean;
}>;
facts: Array<{
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
data_type: string | null;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
authoritative_concept_key: string | null;
mapping_method: string | null;
surface_key: string | null;
detail_parent_surface_key: string | null;
kpi_key: string | null;
residual_flag: boolean;
context_id: string;
unit: string | null;
decimals: string | null;
precision: string | null;
nil: boolean;
value_num: number;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
dimensions: TaxonomyDimensionMember[];
is_dimensionless: boolean;
source_file: string | null;
}>;
metric_validations: Array<{
metric_key: keyof NonNullable<Filing["metrics"]>;
taxonomy_value: number | null;
llm_value: number | null;
absolute_diff: number | null;
relative_diff: number | null;
status: "not_run" | "matched" | "mismatch" | "error";
evidence_pages: number[];
pdf_url: string | null;
provider: string | null;
model: string | null;
error: string | null;
}>;
};
const FINANCIAL_STATEMENT_KINDS = [
"income",
"balance",
"cash_flow",
"equity",
"comprehensive_income",
] as const satisfies FinancialStatementKind[];
type StatementRowMap = Record<FinancialStatementKind, TaxonomyStatementRow[]>;
type SurfaceRowMap = Record<FinancialStatementKind, SurfaceFinancialRow[]>;
type DetailRowMap = Record<FinancialStatementKind, SurfaceDetailMap>;
function tenYearsAgoIso() {
const date = new Date();
date.setUTCFullYear(date.getUTCFullYear() - 10);
return date.toISOString().slice(0, 10);
}
function asNumber(value: unknown) {
if (typeof value === "number") {
return Number.isFinite(value) ? value : null;
}
if (typeof value === "string") {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function asNumericText(value: number | null) {
if (value === null || !Number.isFinite(value)) {
return null;
}
return String(value);
}
function asObject(value: unknown) {
return value !== null && typeof value === "object" && !Array.isArray(value)
? (value as Record<string, unknown>)
: null;
}
function asString(value: unknown) {
return typeof value === "string" ? value : null;
}
function asNullableString(value: unknown) {
return typeof value === "string" ? value : value === null ? null : null;
}
function asBoolean(value: unknown) {
return typeof value === "boolean" ? value : Boolean(value);
}
function asStatementKind(value: unknown): FinancialStatementKind | null {
return value === "income" ||
value === "balance" ||
value === "cash_flow" ||
value === "equity" ||
value === "comprehensive_income"
? value
: null;
}
function normalizeNumberMap(value: unknown) {
const object = asObject(value);
if (!object) {
return {};
}
return Object.fromEntries(
Object.entries(object).map(([key, entry]) => [key, asNumber(entry)]),
);
}
function normalizeNullableStringMap(value: unknown) {
const object = asObject(value);
if (!object) {
return {};
}
return Object.fromEntries(
Object.entries(object).map(([key, entry]) => [
key,
asNullableString(entry),
]),
);
}
function normalizeStringArray(value: unknown) {
return Array.isArray(value)
? value.filter((entry): entry is string => typeof entry === "string")
: [];
}
function normalizeNumberArray(value: unknown) {
if (!Array.isArray(value)) {
return [];
}
return value
.map((entry) => asNumber(entry))
.filter((entry): entry is number => entry !== null);
}
function normalizePeriods(value: unknown): FilingTaxonomyPeriod[] {
if (!Array.isArray(value)) {
return [];
}
return value
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const id = asString(row.id);
const filingId = asNumber(row.filingId ?? row.filing_id);
const accessionNumber = asString(
row.accessionNumber ?? row.accession_number,
);
const filingDate = asString(row.filingDate ?? row.filing_date);
const filingType =
row.filingType === "10-K" || row.filing_type === "10-K"
? "10-K"
: row.filingType === "10-Q" || row.filing_type === "10-Q"
? "10-Q"
: null;
const periodLabel = asString(row.periodLabel ?? row.period_label);
if (
!id ||
filingId === null ||
!accessionNumber ||
!filingDate ||
!filingType ||
!periodLabel
) {
return null;
}
return {
id,
filingId,
accessionNumber,
filingDate,
periodStart: asNullableString(row.periodStart ?? row.period_start),
periodEnd: asNullableString(row.periodEnd ?? row.period_end),
filingType,
periodLabel,
} satisfies FilingTaxonomyPeriod;
})
.filter((entry): entry is FilingTaxonomyPeriod => entry !== null);
}
function normalizeStatementRows(
value: unknown,
fallbackRows: StatementRowMap = emptyStatementRows(),
): StatementRowMap {
const object = asObject(value);
if (!object) {
return fallbackRows;
}
const normalized = emptyStatementRows();
for (const statement of FINANCIAL_STATEMENT_KINDS) {
const rows = Array.isArray(object[statement]) ? object[statement] : [];
normalized[statement] = rows
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key =
asString(row.key) ?? asString(row.conceptKey ?? row.concept_key);
const label = asString(row.label);
const conceptKey = asString(row.conceptKey ?? row.concept_key);
const qname = asString(row.qname);
const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri);
const localName = asString(row.localName ?? row.local_name);
if (
!key ||
!label ||
!conceptKey ||
!qname ||
!namespaceUri ||
!localName
) {
return null;
}
return {
key,
label,
conceptKey,
qname,
namespaceUri,
localName,
isExtension: asBoolean(row.isExtension ?? row.is_extension),
statement: asStatementKind(row.statement) ?? statement,
roleUri: asNullableString(row.roleUri ?? row.role_uri),
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
depth: asNumber(row.depth) ?? 0,
parentKey: asNullableString(row.parentKey ?? row.parent_key),
values: normalizeNumberMap(row.values),
units: normalizeNullableStringMap(row.units),
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
sourceFactIds: normalizeNumberArray(
row.sourceFactIds ?? row.source_fact_ids,
),
};
})
.filter((entry): entry is TaxonomyStatementRow => entry !== null);
}
return normalized;
}
function normalizeSurfaceRows(
value: unknown,
fallbackRows: SurfaceRowMap = emptySurfaceRows(),
): SurfaceRowMap {
const object = asObject(value);
if (!object) {
return fallbackRows;
}
const normalized = emptySurfaceRows();
for (const statement of FINANCIAL_STATEMENT_KINDS) {
const rows = Array.isArray(object[statement]) ? object[statement] : [];
normalized[statement] = rows
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key = asString(row.key);
const label = asString(row.label);
const category = asString(row.category);
const unit = asString(row.unit);
if (!key || !label || !category || !unit) {
return null;
}
const normalizedStatement = asStatementKind(row.statement);
const resolutionMethod = row.resolutionMethod ?? row.resolution_method;
const confidence = row.confidence;
const normalizedRow: SurfaceFinancialRow = {
key,
label,
category: category as SurfaceFinancialRow["category"],
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
unit: unit as SurfaceFinancialRow["unit"],
values: normalizeNumberMap(row.values),
sourceConcepts: normalizeStringArray(
row.sourceConcepts ?? row.source_concepts,
),
sourceRowKeys: normalizeStringArray(
row.sourceRowKeys ?? row.source_row_keys,
),
sourceFactIds: normalizeNumberArray(
row.sourceFactIds ?? row.source_fact_ids,
),
formulaKey: asNullableString(row.formulaKey ?? row.formula_key),
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
resolvedSourceRowKeys: normalizeNullableStringMap(
row.resolvedSourceRowKeys ?? row.resolved_source_row_keys,
),
};
const templateSection = asString(
row.templateSection ?? row.template_section,
);
if (templateSection) {
normalizedRow.templateSection =
templateSection as SurfaceFinancialRow["templateSection"];
}
if (
normalizedStatement === "income" ||
normalizedStatement === "balance" ||
normalizedStatement === "cash_flow"
) {
normalizedRow.statement = normalizedStatement;
}
const detailCount = asNumber(row.detailCount ?? row.detail_count);
if (detailCount !== null) {
normalizedRow.detailCount = detailCount;
}
if (
resolutionMethod === "direct" ||
resolutionMethod === "surface_bridge" ||
resolutionMethod === "formula_derived" ||
resolutionMethod === "not_meaningful"
) {
normalizedRow.resolutionMethod = resolutionMethod;
}
if (
confidence === "high" ||
confidence === "medium" ||
confidence === "low"
) {
normalizedRow.confidence = confidence;
}
const warningCodes = normalizeStringArray(
row.warningCodes ?? row.warning_codes,
);
if (warningCodes.length > 0) {
normalizedRow.warningCodes = warningCodes;
}
return normalizedRow;
})
.filter((entry): entry is SurfaceFinancialRow => entry !== null);
}
return normalized;
}
function normalizeDetailRows(
value: unknown,
fallbackRows: DetailRowMap = emptyDetailRows(),
): DetailRowMap {
const object = asObject(value);
if (!object) {
return fallbackRows;
}
const normalized = emptyDetailRows();
for (const statement of FINANCIAL_STATEMENT_KINDS) {
const groups = asObject(object[statement]) ?? {};
normalized[statement] = Object.fromEntries(
Object.entries(groups).map(([surfaceKey, rows]) => {
const normalizedRows = Array.isArray(rows)
? rows
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key =
asString(row.key) ??
asString(row.conceptKey ?? row.concept_key);
const label = asString(row.label);
const conceptKey = asString(row.conceptKey ?? row.concept_key);
const qname = asString(row.qname);
const namespaceUri = asString(
row.namespaceUri ?? row.namespace_uri,
);
const localName = asString(row.localName ?? row.local_name);
if (
!key ||
!label ||
!conceptKey ||
!qname ||
!namespaceUri ||
!localName
) {
return null;
}
return {
key,
parentSurfaceKey:
asString(row.parentSurfaceKey ?? row.parent_surface_key) ??
surfaceKey,
label,
conceptKey,
qname,
namespaceUri,
localName,
unit: asNullableString(row.unit),
values: normalizeNumberMap(row.values),
sourceFactIds: normalizeNumberArray(
row.sourceFactIds ?? row.source_fact_ids,
),
isExtension: asBoolean(row.isExtension ?? row.is_extension),
dimensionsSummary: normalizeStringArray(
row.dimensionsSummary ?? row.dimensions_summary,
),
residualFlag: asBoolean(
row.residualFlag ?? row.residual_flag,
),
};
})
.filter((entry): entry is DetailFinancialRow => entry !== null)
: [];
return [surfaceKey, normalizedRows];
}),
);
}
return normalized;
}
function normalizeKpiRows(value: unknown) {
if (!Array.isArray(value)) {
return [];
}
return value
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key = asString(row.key);
const label = asString(row.label);
const category = asString(row.category);
const unit = asString(row.unit);
const provenanceType = row.provenanceType ?? row.provenance_type;
if (
!key ||
!label ||
!category ||
!unit ||
(provenanceType !== "taxonomy" && provenanceType !== "structured_note")
) {
return null;
}
return {
key,
label,
category: category as StructuredKpiRow["category"],
unit: unit as StructuredKpiRow["unit"],
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
segment: asNullableString(row.segment),
axis: asNullableString(row.axis),
member: asNullableString(row.member),
values: normalizeNumberMap(row.values),
sourceConcepts: normalizeStringArray(
row.sourceConcepts ?? row.source_concepts,
),
sourceFactIds: normalizeNumberArray(
row.sourceFactIds ?? row.source_fact_ids,
),
provenanceType,
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
} satisfies StructuredKpiRow;
})
.filter((entry): entry is StructuredKpiRow => entry !== null);
}
function normalizeComputedDefinitions(value: unknown): ComputedDefinition[] {
if (!Array.isArray(value)) {
return [];
}
return value
.map((entry) => {
const row = asObject(entry);
if (!row) {
return null;
}
const key = asString(row.key);
const label = asString(row.label);
const category = asString(row.category);
const unit = asString(row.unit);
const computation = asObject(row.computation);
const computationType = asString(computation?.type);
if (
!key ||
!label ||
!category ||
!unit ||
!computation ||
!computationType
) {
return null;
}
const normalizedComputation = (() => {
if (computationType === "ratio") {
const numerator = asString(computation.numerator);
const denominator = asString(computation.denominator);
return numerator && denominator
? ({ type: "ratio", numerator, denominator } as const)
: null;
}
if (computationType === "yoy_growth") {
const source = asString(computation.source);
return source ? ({ type: "yoy_growth", source } as const) : null;
}
if (computationType === "cagr") {
const source = asString(computation.source);
const years = asNumber(computation.years);
return source && years !== null
? ({ type: "cagr", source, years } as const)
: null;
}
if (computationType === "per_share") {
const source = asString(computation.source);
const shares_key = asString(
computation.shares_key ?? computation.sharesKey,
);
return source && shares_key
? ({ type: "per_share", source, shares_key } as const)
: null;
}
if (computationType === "simple") {
const formula = asString(computation.formula);
return formula ? ({ type: "simple", formula } as const) : null;
}
return null;
})();
if (!normalizedComputation) {
return null;
}
const normalizedDefinition: ComputedDefinition = {
key,
label,
category,
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
unit: unit as ComputedDefinition["unit"],
computation: normalizedComputation,
supported_cadences: normalizeStringArray(
row.supported_cadences ?? row.supportedCadences,
) as ComputedDefinition["supported_cadences"],
requires_external_data: normalizeStringArray(
row.requires_external_data ?? row.requiresExternalData,
),
};
return normalizedDefinition;
})
.filter((entry): entry is ComputedDefinition => entry !== null);
}
function normalizeNormalizationSummary(value: unknown) {
const row = asObject(value);
if (!row) {
return null;
}
return {
surfaceRowCount:
asNumber(row.surfaceRowCount ?? row.surface_row_count) ?? 0,
detailRowCount: asNumber(row.detailRowCount ?? row.detail_row_count) ?? 0,
kpiRowCount: asNumber(row.kpiRowCount ?? row.kpi_row_count) ?? 0,
unmappedRowCount:
asNumber(row.unmappedRowCount ?? row.unmapped_row_count) ?? 0,
materialUnmappedRowCount:
asNumber(
row.materialUnmappedRowCount ?? row.material_unmapped_row_count,
) ?? 0,
warnings: normalizeStringArray(row.warnings),
} satisfies NormalizationSummary;
}
export function normalizeFilingTaxonomySnapshotPayload(input: {
periods: unknown;
faithful_rows: unknown;
statement_rows: unknown;
surface_rows: unknown;
detail_rows: unknown;
kpi_rows: unknown;
computed_definitions: unknown;
normalization_summary: unknown;
}) {
const faithfulRows = normalizeStatementRows(input.faithful_rows);
const statementRows = normalizeStatementRows(
input.statement_rows,
faithfulRows,
);
return {
periods: normalizePeriods(input.periods),
faithful_rows: faithfulRows,
statement_rows: statementRows,
surface_rows: normalizeSurfaceRows(input.surface_rows),
detail_rows: normalizeDetailRows(input.detail_rows),
kpi_rows: normalizeKpiRows(input.kpi_rows),
computed_definitions: normalizeComputedDefinitions(
input.computed_definitions,
),
normalization_summary: normalizeNormalizationSummary(
input.normalization_summary,
),
};
}
function emptyStatementRows(): StatementRowMap {
return {
income: [],
balance: [],
cash_flow: [],
disclosure: [],
equity: [],
comprehensive_income: [],
};
}
function emptySurfaceRows(): SurfaceRowMap {
return {
income: [],
balance: [],
cash_flow: [],
disclosure: [],
equity: [],
comprehensive_income: [],
};
}
function emptyDetailRows(): DetailRowMap {
return {
income: {},
balance: {},
cash_flow: {},
disclosure: {},
equity: {},
comprehensive_income: {},
};
}
function toSnapshotRecord(
row: typeof filingTaxonomySnapshot.$inferSelect,
): FilingTaxonomySnapshotRecord {
const normalized = normalizeFilingTaxonomySnapshotPayload({
periods: row.periods,
faithful_rows: row.faithful_rows,
statement_rows: row.statement_rows,
surface_rows: row.surface_rows,
detail_rows: row.detail_rows,
kpi_rows: row.kpi_rows,
computed_definitions: row.computed_definitions,
normalization_summary: row.normalization_summary,
});
return {
id: row.id,
filing_id: row.filing_id,
ticker: row.ticker,
filing_date: row.filing_date,
filing_type: row.filing_type,
parse_status: row.parse_status,
parse_error: row.parse_error,
source: row.source,
parser_engine: row.parser_engine,
parser_version: row.parser_version,
taxonomy_regime: row.taxonomy_regime,
fiscal_pack: row.fiscal_pack,
periods: normalized.periods,
faithful_rows: normalized.faithful_rows,
statement_rows: normalized.statement_rows,
surface_rows: normalized.surface_rows,
detail_rows: normalized.detail_rows,
kpi_rows: normalized.kpi_rows,
computed_definitions: normalized.computed_definitions,
derived_metrics: row.derived_metrics ?? null,
validation_result: row.validation_result ?? null,
normalization_summary: normalized.normalization_summary,
facts_count: row.facts_count,
concepts_count: row.concepts_count,
dimensions_count: row.dimensions_count,
created_at: row.created_at,
updated_at: row.updated_at,
};
}
function toContextRecord(
row: typeof filingTaxonomyContext.$inferSelect,
): FilingTaxonomyContextRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
context_id: row.context_id,
entity_identifier: row.entity_identifier,
entity_scheme: row.entity_scheme,
period_start: row.period_start,
period_end: row.period_end,
period_instant: row.period_instant,
segment_json: row.segment_json ?? null,
scenario_json: row.scenario_json ?? null,
created_at: row.created_at,
};
}
function toAssetRecord(
row: typeof filingTaxonomyAsset.$inferSelect,
): FilingTaxonomyAssetRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
asset_type: row.asset_type,
name: row.name,
url: row.url,
size_bytes: row.size_bytes,
score: asNumber(row.score),
is_selected: row.is_selected,
created_at: row.created_at,
};
}
function toConceptRecord(
row: typeof filingTaxonomyConcept.$inferSelect,
): FilingTaxonomyConceptRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
concept_key: row.concept_key,
qname: row.qname,
namespace_uri: row.namespace_uri,
local_name: row.local_name,
label: row.label,
is_extension: row.is_extension,
balance: row.balance,
period_type: row.period_type,
data_type: row.data_type,
statement_kind: row.statement_kind ?? null,
role_uri: row.role_uri,
authoritative_concept_key: row.authoritative_concept_key,
mapping_method: row.mapping_method,
surface_key: row.surface_key,
detail_parent_surface_key: row.detail_parent_surface_key,
kpi_key: row.kpi_key,
residual_flag: row.residual_flag,
presentation_order: asNumber(row.presentation_order),
presentation_depth: row.presentation_depth,
parent_concept_key: row.parent_concept_key,
is_abstract: row.is_abstract,
created_at: row.created_at,
};
}
function toFactRecord(
row: typeof filingTaxonomyFact.$inferSelect,
): FilingTaxonomyFactRecord {
const value = asNumber(row.value_num);
if (value === null) {
throw new Error(`Invalid value_num for taxonomy fact row ${row.id}`);
}
return {
id: row.id,
snapshot_id: row.snapshot_id,
concept_key: row.concept_key,
qname: row.qname,
namespace_uri: row.namespace_uri,
local_name: row.local_name,
data_type: row.data_type,
statement_kind: row.statement_kind ?? null,
role_uri: row.role_uri,
authoritative_concept_key: row.authoritative_concept_key,
mapping_method: row.mapping_method,
surface_key: row.surface_key,
detail_parent_surface_key: row.detail_parent_surface_key,
kpi_key: row.kpi_key,
residual_flag: row.residual_flag,
context_id: row.context_id,
unit: row.unit,
decimals: row.decimals,
precision: row.precision,
nil: row.nil,
value_num: value,
period_start: row.period_start,
period_end: row.period_end,
period_instant: row.period_instant,
dimensions: row.dimensions,
is_dimensionless: row.is_dimensionless,
source_file: row.source_file,
created_at: row.created_at,
};
}
function toMetricValidationRecord(
row: typeof filingTaxonomyMetricValidation.$inferSelect,
): FilingTaxonomyMetricValidationRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
metric_key: row.metric_key,
taxonomy_value: asNumber(row.taxonomy_value),
llm_value: asNumber(row.llm_value),
absolute_diff: asNumber(row.absolute_diff),
relative_diff: asNumber(row.relative_diff),
status: row.status,
evidence_pages: row.evidence_pages ?? [],
pdf_url: row.pdf_url,
provider: row.provider,
model: row.model,
error: row.error,
created_at: row.created_at,
updated_at: row.updated_at,
};
}
export async function getFilingTaxonomySnapshotByFilingId(filingId: number) {
const [row] = await db
.select()
.from(filingTaxonomySnapshot)
.where(eq(filingTaxonomySnapshot.filing_id, filingId))
.limit(1);
return row ? toSnapshotRecord(row) : null;
}
export async function listFilingTaxonomyAssets(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyAsset)
.where(eq(filingTaxonomyAsset.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyAsset.id));
return rows.map(toAssetRecord);
}
export async function listFilingTaxonomyContexts(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyContext)
.where(eq(filingTaxonomyContext.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyContext.id));
return rows.map(toContextRecord);
}
export async function listFilingTaxonomyConcepts(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyConcept)
.where(eq(filingTaxonomyConcept.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyConcept.id));
return rows.map(toConceptRecord);
}
export async function listFilingTaxonomyFacts(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyFact)
.where(eq(filingTaxonomyFact.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyFact.id));
return rows.map(toFactRecord);
}
export async function listFilingTaxonomyMetricValidations(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyMetricValidation)
.where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyMetricValidation.id));
return rows.map(toMetricValidationRecord);
}
export async function upsertFilingTaxonomySnapshot(
input: UpsertFilingTaxonomySnapshotInput,
) {
const now = new Date().toISOString();
const normalized = normalizeFilingTaxonomySnapshotPayload(input);
return db.transaction(async (tx) => {
const [saved] = await tx
.insert(filingTaxonomySnapshot)
.values({
filing_id: input.filing_id,
ticker: input.ticker,
filing_date: input.filing_date,
filing_type: input.filing_type,
parse_status: input.parse_status,
parse_error: input.parse_error,
source: input.source,
parser_engine: input.parser_engine,
parser_version: input.parser_version,
taxonomy_regime: input.taxonomy_regime,
fiscal_pack: input.fiscal_pack,
periods: normalized.periods,
faithful_rows: normalized.faithful_rows,
statement_rows: normalized.statement_rows,
surface_rows: normalized.surface_rows,
detail_rows: normalized.detail_rows,
kpi_rows: normalized.kpi_rows,
computed_definitions: normalized.computed_definitions,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
normalization_summary: normalized.normalization_summary,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
created_at: now,
updated_at: now,
})
.onConflictDoUpdate({
target: filingTaxonomySnapshot.filing_id,
set: {
ticker: input.ticker,
filing_date: input.filing_date,
filing_type: input.filing_type,
parse_status: input.parse_status,
parse_error: input.parse_error,
source: input.source,
parser_engine: input.parser_engine,
parser_version: input.parser_version,
taxonomy_regime: input.taxonomy_regime,
fiscal_pack: input.fiscal_pack,
periods: normalized.periods,
faithful_rows: normalized.faithful_rows,
statement_rows: normalized.statement_rows,
surface_rows: normalized.surface_rows,
detail_rows: normalized.detail_rows,
kpi_rows: normalized.kpi_rows,
computed_definitions: normalized.computed_definitions,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
normalization_summary: normalized.normalization_summary,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
updated_at: now,
},
})
.returning();
const snapshotId = saved.id;
try {
await tx
.delete(filingTaxonomyAsset)
.where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
await tx
.delete(filingTaxonomyContext)
.where(eq(filingTaxonomyContext.snapshot_id, snapshotId));
await tx
.delete(filingTaxonomyConcept)
.where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
await tx
.delete(filingTaxonomyFact)
.where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
await tx
.delete(filingTaxonomyMetricValidation)
.where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
} catch (error) {
throw new Error(
`Failed to delete child records for snapshot ${snapshotId}: ${error}`,
);
}
if (input.contexts.length > 0) {
try {
await tx.insert(filingTaxonomyContext).values(
input.contexts.map((context) => ({
snapshot_id: snapshotId,
context_id: context.context_id,
entity_identifier: context.entity_identifier,
entity_scheme: context.entity_scheme,
period_start: context.period_start,
period_end: context.period_end,
period_instant: context.period_instant,
segment_json: context.segment_json,
scenario_json: context.scenario_json,
created_at: now,
})),
);
} catch (error) {
throw new Error(
`Failed to insert ${input.contexts.length} contexts for snapshot ${snapshotId}: ${error}`,
);
}
}
if (input.assets.length > 0) {
try {
await tx.insert(filingTaxonomyAsset).values(
input.assets.map((asset) => ({
snapshot_id: snapshotId,
asset_type: asset.asset_type,
name: asset.name,
url: asset.url,
size_bytes: asset.size_bytes,
score: asNumericText(asset.score),
is_selected: asset.is_selected,
created_at: now,
})),
);
} catch (error) {
throw new Error(
`Failed to insert ${input.assets.length} assets for snapshot ${snapshotId}: ${error}`,
);
}
}
if (input.concepts.length > 0) {
try {
await tx.insert(filingTaxonomyConcept).values(
input.concepts.map((concept) => ({
snapshot_id: snapshotId,
concept_key: concept.concept_key,
qname: concept.qname,
namespace_uri: concept.namespace_uri,
local_name: concept.local_name,
label: concept.label,
is_extension: concept.is_extension,
balance: concept.balance,
period_type: concept.period_type,
data_type: concept.data_type,
statement_kind: concept.statement_kind,
role_uri: concept.role_uri,
authoritative_concept_key: concept.authoritative_concept_key,
mapping_method: concept.mapping_method,
surface_key: concept.surface_key,
detail_parent_surface_key: concept.detail_parent_surface_key,
kpi_key: concept.kpi_key,
residual_flag: concept.residual_flag,
presentation_order: asNumericText(concept.presentation_order),
presentation_depth: concept.presentation_depth,
parent_concept_key: concept.parent_concept_key,
is_abstract: concept.is_abstract,
created_at: now,
})),
);
} catch (error) {
throw new Error(
`Failed to insert ${input.concepts.length} concepts for snapshot ${snapshotId}: ${error}`,
);
}
}
if (input.facts.length > 0) {
try {
await tx.insert(filingTaxonomyFact).values(
input.facts.map((fact) => ({
snapshot_id: snapshotId,
concept_key: fact.concept_key,
qname: fact.qname,
namespace_uri: fact.namespace_uri,
local_name: fact.local_name,
data_type: fact.data_type,
statement_kind: fact.statement_kind,
role_uri: fact.role_uri,
authoritative_concept_key: fact.authoritative_concept_key,
mapping_method: fact.mapping_method,
surface_key: fact.surface_key,
detail_parent_surface_key: fact.detail_parent_surface_key,
kpi_key: fact.kpi_key,
residual_flag: fact.residual_flag,
context_id: fact.context_id,
unit: fact.unit,
decimals: fact.decimals,
precision: fact.precision,
nil: fact.nil,
value_num: String(fact.value_num),
period_start: fact.period_start,
period_end: fact.period_end,
period_instant: fact.period_instant,
dimensions: fact.dimensions,
is_dimensionless: fact.is_dimensionless,
source_file: fact.source_file,
created_at: now,
})),
);
} catch (error) {
throw new Error(
`Failed to insert ${input.facts.length} facts for snapshot ${snapshotId}: ${error}`,
);
}
}
if (input.metric_validations.length > 0) {
try {
await tx.insert(filingTaxonomyMetricValidation).values(
input.metric_validations.map((check) => ({
snapshot_id: snapshotId,
metric_key: check.metric_key,
taxonomy_value: asNumericText(check.taxonomy_value),
llm_value: asNumericText(check.llm_value),
absolute_diff: asNumericText(check.absolute_diff),
relative_diff: asNumericText(check.relative_diff),
status: check.status,
evidence_pages: check.evidence_pages,
pdf_url: check.pdf_url,
provider: check.provider,
model: check.model,
error: check.error,
created_at: now,
updated_at: now,
})),
);
} catch (error) {
throw new Error(
`Failed to insert ${input.metric_validations.length} metric validations for snapshot ${snapshotId}: ${error}`,
);
}
}
return toSnapshotRecord(saved);
});
}
export async function listFilingTaxonomySnapshotsByTicker(input: {
ticker: string;
window: "10y" | "all";
filingTypes?: Array<"10-K" | "10-Q">;
limit?: number;
cursor?: string | null;
}) {
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 40), 1), 120);
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
const constraints = [
eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase()),
];
if (input.window === "10y") {
constraints.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
}
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
constraints.push(lt(filingTaxonomySnapshot.id, cursorId));
}
if (input.filingTypes && input.filingTypes.length > 0) {
constraints.push(
inArray(filingTaxonomySnapshot.filing_type, input.filingTypes),
);
}
const rows = await db
.select()
.from(filingTaxonomySnapshot)
.where(and(...constraints))
.orderBy(
desc(filingTaxonomySnapshot.filing_date),
desc(filingTaxonomySnapshot.id),
)
.limit(safeLimit + 1);
const hasMore = rows.length > safeLimit;
const usedRows = hasMore ? rows.slice(0, safeLimit) : rows;
const nextCursor = hasMore
? String(usedRows[usedRows.length - 1]?.id ?? "")
: null;
return {
snapshots: usedRows.map(toSnapshotRecord),
nextCursor,
};
}
export async function countFilingTaxonomySnapshotStatuses(ticker: string) {
const rows = await db
.select({
status: filingTaxonomySnapshot.parse_status,
count: sql<string>`count(*)`,
})
.from(filingTaxonomySnapshot)
.where(eq(filingTaxonomySnapshot.ticker, ticker.trim().toUpperCase()))
.groupBy(filingTaxonomySnapshot.parse_status);
return rows.reduce<Record<FilingTaxonomyParseStatus, number>>(
(acc, row) => {
acc[row.status] = Number(row.count);
return acc;
},
{
ready: 0,
partial: 0,
failed: 0,
},
);
}
export async function listTaxonomyFactsByTicker(input: {
ticker: string;
window: "10y" | "all";
statement?: FinancialStatementKind;
filingTypes?: Array<"10-K" | "10-Q">;
cursor?: string | null;
limit?: number;
}) {
const safeLimit = Math.min(
Math.max(Math.trunc(input.limit ?? 500), 1),
10000,
);
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
const conditions = [
eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase()),
];
if (input.window === "10y") {
conditions.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
}
if (input.statement) {
conditions.push(eq(filingTaxonomyFact.statement_kind, input.statement));
}
if (input.filingTypes && input.filingTypes.length > 0) {
conditions.push(
inArray(filingTaxonomySnapshot.filing_type, input.filingTypes),
);
}
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
conditions.push(lt(filingTaxonomyFact.id, cursorId));
}
const rows = await db
.select({
id: filingTaxonomyFact.id,
snapshot_id: filingTaxonomyFact.snapshot_id,
filing_id: filingTaxonomySnapshot.filing_id,
filing_date: filingTaxonomySnapshot.filing_date,
statement_kind: filingTaxonomyFact.statement_kind,
role_uri: filingTaxonomyFact.role_uri,
concept_key: filingTaxonomyFact.concept_key,
qname: filingTaxonomyFact.qname,
namespace_uri: filingTaxonomyFact.namespace_uri,
local_name: filingTaxonomyFact.local_name,
value_num: filingTaxonomyFact.value_num,
context_id: filingTaxonomyFact.context_id,
unit: filingTaxonomyFact.unit,
decimals: filingTaxonomyFact.decimals,
period_start: filingTaxonomyFact.period_start,
period_end: filingTaxonomyFact.period_end,
period_instant: filingTaxonomyFact.period_instant,
dimensions: filingTaxonomyFact.dimensions,
is_dimensionless: filingTaxonomyFact.is_dimensionless,
source_file: filingTaxonomyFact.source_file,
})
.from(filingTaxonomyFact)
.innerJoin(
filingTaxonomySnapshot,
eq(filingTaxonomyFact.snapshot_id, filingTaxonomySnapshot.id),
)
.where(and(...conditions))
.orderBy(desc(filingTaxonomyFact.id))
.limit(safeLimit + 1);
const hasMore = rows.length > safeLimit;
const used = hasMore ? rows.slice(0, safeLimit) : rows;
const nextCursor = hasMore ? String(used[used.length - 1]?.id ?? "") : null;
const facts: TaxonomyFactRow[] = used.map((row) => {
const value = asNumber(row.value_num);
if (value === null) {
throw new Error(`Invalid value_num in taxonomy fact ${row.id}`);
}
return {
id: row.id,
snapshotId: row.snapshot_id,
filingId: row.filing_id,
filingDate: row.filing_date,
statement: row.statement_kind,
roleUri: row.role_uri,
conceptKey: row.concept_key,
qname: row.qname,
namespaceUri: row.namespace_uri,
localName: row.local_name,
value,
contextId: row.context_id,
unit: row.unit,
decimals: row.decimals,
periodStart: row.period_start,
periodEnd: row.period_end,
periodInstant: row.period_instant,
dimensions: row.dimensions,
isDimensionless: row.is_dimensionless,
sourceFile: row.source_file,
};
});
return {
facts,
nextCursor,
};
}
export async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) {
if (snapshotIds.length === 0) {
return [];
}
const rows = await db
.select()
.from(filingTaxonomyAsset)
.where(inArray(filingTaxonomyAsset.snapshot_id, snapshotIds))
.orderBy(desc(filingTaxonomyAsset.id));
return rows.map(toAssetRecord);
}
export const __filingTaxonomyInternals = {
normalizeFilingTaxonomySnapshotPayload,
toSnapshotRecord,
};