1636 lines
49 KiB
TypeScript
1636 lines
49 KiB
TypeScript
import { and, desc, eq, gte, inArray, lt, sql } from "drizzle-orm";
|
|
import type { ComputedDefinition } from "@/lib/generated";
|
|
import type {
|
|
DetailFinancialRow,
|
|
Filing,
|
|
FinancialStatementKind,
|
|
MetricValidationResult,
|
|
NormalizationSummary,
|
|
StructuredKpiRow,
|
|
SurfaceDetailMap,
|
|
SurfaceFinancialRow,
|
|
TaxonomyDimensionMember,
|
|
TaxonomyFactRow,
|
|
TaxonomyStatementRow,
|
|
} from "@/lib/types";
|
|
import { db, getSqliteClient } from "@/lib/server/db";
|
|
import { withFinancialIngestionSchemaRetry } from "@/lib/server/db/financial-ingestion-schema";
|
|
import {
|
|
filingTaxonomyAsset,
|
|
filingTaxonomyConcept,
|
|
filingTaxonomyContext,
|
|
filingTaxonomyFact,
|
|
filingTaxonomyMetricValidation,
|
|
filingTaxonomySnapshot,
|
|
} from "@/lib/server/db/schema";
|
|
|
|
export type FilingTaxonomyParseStatus = "ready" | "partial" | "failed";
|
|
export type FilingTaxonomySource =
|
|
| "xbrl_instance"
|
|
| "xbrl_instance_with_linkbase"
|
|
| "legacy_html_fallback";
|
|
export type FilingTaxonomyAssetType =
|
|
| "instance"
|
|
| "schema"
|
|
| "presentation"
|
|
| "label"
|
|
| "calculation"
|
|
| "definition"
|
|
| "pdf"
|
|
| "other";
|
|
|
|
export type FilingTaxonomyPeriod = {
|
|
id: string;
|
|
filingId: number;
|
|
accessionNumber: string;
|
|
filingDate: string;
|
|
periodStart: string | null;
|
|
periodEnd: string | null;
|
|
filingType: "10-K" | "10-Q";
|
|
periodLabel: string;
|
|
};
|
|
|
|
export type FilingTaxonomySnapshotRecord = {
|
|
id: number;
|
|
filing_id: number;
|
|
ticker: string;
|
|
filing_date: string;
|
|
filing_type: "10-K" | "10-Q";
|
|
parse_status: FilingTaxonomyParseStatus;
|
|
parse_error: string | null;
|
|
source: FilingTaxonomySource;
|
|
parser_engine: string;
|
|
parser_version: string;
|
|
taxonomy_regime: "us-gaap" | "ifrs-full" | "unknown";
|
|
fiscal_pack: string | null;
|
|
periods: FilingTaxonomyPeriod[];
|
|
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
|
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
|
|
kpi_rows: StructuredKpiRow[];
|
|
computed_definitions: ComputedDefinition[];
|
|
derived_metrics: Filing["metrics"];
|
|
validation_result: MetricValidationResult | null;
|
|
normalization_summary: NormalizationSummary | null;
|
|
issuer_overlay_revision_id: number | null;
|
|
facts_count: number;
|
|
concepts_count: number;
|
|
dimensions_count: number;
|
|
created_at: string;
|
|
updated_at: string;
|
|
};
|
|
|
|
type FilingTaxonomyContextRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
context_id: string;
|
|
entity_identifier: string | null;
|
|
entity_scheme: string | null;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
segment_json: Record<string, unknown> | null;
|
|
scenario_json: Record<string, unknown> | null;
|
|
created_at: string;
|
|
};
|
|
|
|
type FilingTaxonomyAssetRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
asset_type: FilingTaxonomyAssetType;
|
|
name: string;
|
|
url: string;
|
|
size_bytes: number | null;
|
|
score: number | null;
|
|
is_selected: boolean;
|
|
created_at: string;
|
|
};
|
|
|
|
export type FilingTaxonomyConceptRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
label: string | null;
|
|
is_extension: boolean;
|
|
balance: string | null;
|
|
period_type: string | null;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
presentation_order: number | null;
|
|
presentation_depth: number | null;
|
|
parent_concept_key: string | null;
|
|
is_abstract: boolean;
|
|
created_at: string;
|
|
};
|
|
|
|
type FilingTaxonomyFactRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
context_id: string;
|
|
unit: string | null;
|
|
decimals: string | null;
|
|
precision: string | null;
|
|
nil: boolean;
|
|
value_num: number;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
dimensions: TaxonomyDimensionMember[];
|
|
is_dimensionless: boolean;
|
|
source_file: string | null;
|
|
created_at: string;
|
|
};
|
|
|
|
type FilingTaxonomyMetricValidationRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
metric_key: keyof NonNullable<Filing["metrics"]>;
|
|
taxonomy_value: number | null;
|
|
llm_value: number | null;
|
|
absolute_diff: number | null;
|
|
relative_diff: number | null;
|
|
status: "not_run" | "matched" | "mismatch" | "error";
|
|
evidence_pages: number[];
|
|
pdf_url: string | null;
|
|
provider: string | null;
|
|
model: string | null;
|
|
error: string | null;
|
|
created_at: string;
|
|
updated_at: string;
|
|
};
|
|
|
|
type UpsertFilingTaxonomySnapshotInput = {
|
|
filing_id: number;
|
|
ticker: string;
|
|
filing_date: string;
|
|
filing_type: "10-K" | "10-Q";
|
|
parse_status: FilingTaxonomyParseStatus;
|
|
parse_error: string | null;
|
|
source: FilingTaxonomySource;
|
|
parser_engine: string;
|
|
parser_version: string;
|
|
taxonomy_regime: "us-gaap" | "ifrs-full" | "unknown";
|
|
fiscal_pack: string | null;
|
|
periods: FilingTaxonomyPeriod[];
|
|
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
|
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
|
|
kpi_rows: StructuredKpiRow[];
|
|
computed_definitions: ComputedDefinition[];
|
|
derived_metrics: Filing["metrics"];
|
|
validation_result: MetricValidationResult | null;
|
|
normalization_summary: NormalizationSummary | null;
|
|
issuer_overlay_revision_id?: number | null;
|
|
facts_count: number;
|
|
concepts_count: number;
|
|
dimensions_count: number;
|
|
contexts: Array<{
|
|
context_id: string;
|
|
entity_identifier: string | null;
|
|
entity_scheme: string | null;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
segment_json: Record<string, unknown> | null;
|
|
scenario_json: Record<string, unknown> | null;
|
|
}>;
|
|
assets: Array<{
|
|
asset_type: FilingTaxonomyAssetType;
|
|
name: string;
|
|
url: string;
|
|
size_bytes: number | null;
|
|
score: number | null;
|
|
is_selected: boolean;
|
|
}>;
|
|
concepts: Array<{
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
label: string | null;
|
|
is_extension: boolean;
|
|
balance: string | null;
|
|
period_type: string | null;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
presentation_order: number | null;
|
|
presentation_depth: number | null;
|
|
parent_concept_key: string | null;
|
|
is_abstract: boolean;
|
|
}>;
|
|
facts: Array<{
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
context_id: string;
|
|
unit: string | null;
|
|
decimals: string | null;
|
|
precision: string | null;
|
|
nil: boolean;
|
|
value_num: number;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
dimensions: TaxonomyDimensionMember[];
|
|
is_dimensionless: boolean;
|
|
source_file: string | null;
|
|
}>;
|
|
metric_validations: Array<{
|
|
metric_key: keyof NonNullable<Filing["metrics"]>;
|
|
taxonomy_value: number | null;
|
|
llm_value: number | null;
|
|
absolute_diff: number | null;
|
|
relative_diff: number | null;
|
|
status: "not_run" | "matched" | "mismatch" | "error";
|
|
evidence_pages: number[];
|
|
pdf_url: string | null;
|
|
provider: string | null;
|
|
model: string | null;
|
|
error: string | null;
|
|
}>;
|
|
};
|
|
|
|
const FINANCIAL_STATEMENT_KINDS = [
|
|
"income",
|
|
"balance",
|
|
"cash_flow",
|
|
"disclosure",
|
|
"equity",
|
|
"comprehensive_income",
|
|
] as const satisfies FinancialStatementKind[];
|
|
|
|
type StatementRowMap = Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
type SurfaceRowMap = Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
|
type DetailRowMap = Record<FinancialStatementKind, SurfaceDetailMap>;
|
|
|
|
function tenYearsAgoIso() {
|
|
const date = new Date();
|
|
date.setUTCFullYear(date.getUTCFullYear() - 10);
|
|
return date.toISOString().slice(0, 10);
|
|
}
|
|
|
|
function asNumber(value: unknown) {
|
|
if (typeof value === "number") {
|
|
return Number.isFinite(value) ? value : null;
|
|
}
|
|
|
|
if (typeof value === "string") {
|
|
const parsed = Number(value);
|
|
return Number.isFinite(parsed) ? parsed : null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function asNumericText(value: number | null) {
|
|
if (value === null || !Number.isFinite(value)) {
|
|
return null;
|
|
}
|
|
|
|
return String(value);
|
|
}
|
|
|
|
function asObject(value: unknown) {
|
|
return value !== null && typeof value === "object" && !Array.isArray(value)
|
|
? (value as Record<string, unknown>)
|
|
: null;
|
|
}
|
|
|
|
function asString(value: unknown) {
|
|
return typeof value === "string" ? value : null;
|
|
}
|
|
|
|
function asNullableString(value: unknown) {
|
|
return typeof value === "string" ? value : value === null ? null : null;
|
|
}
|
|
|
|
function asBoolean(value: unknown) {
|
|
return typeof value === "boolean" ? value : Boolean(value);
|
|
}
|
|
|
|
function asStatementKind(value: unknown): FinancialStatementKind | null {
|
|
return value === "income" ||
|
|
value === "balance" ||
|
|
value === "cash_flow" ||
|
|
value === "disclosure" ||
|
|
value === "equity" ||
|
|
value === "comprehensive_income"
|
|
? value
|
|
: null;
|
|
}
|
|
|
|
function normalizeNumberMap(value: unknown) {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return {};
|
|
}
|
|
|
|
return Object.fromEntries(
|
|
Object.entries(object).map(([key, entry]) => [key, asNumber(entry)]),
|
|
);
|
|
}
|
|
|
|
function normalizeNullableStringMap(value: unknown) {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return {};
|
|
}
|
|
|
|
return Object.fromEntries(
|
|
Object.entries(object).map(([key, entry]) => [
|
|
key,
|
|
asNullableString(entry),
|
|
]),
|
|
);
|
|
}
|
|
|
|
function normalizeStringArray(value: unknown) {
|
|
return Array.isArray(value)
|
|
? value.filter((entry): entry is string => typeof entry === "string")
|
|
: [];
|
|
}
|
|
|
|
function normalizeNumberArray(value: unknown) {
|
|
if (!Array.isArray(value)) {
|
|
return [];
|
|
}
|
|
|
|
return value
|
|
.map((entry) => asNumber(entry))
|
|
.filter((entry): entry is number => entry !== null);
|
|
}
|
|
|
|
function normalizePeriods(value: unknown): FilingTaxonomyPeriod[] {
|
|
if (!Array.isArray(value)) {
|
|
return [];
|
|
}
|
|
|
|
return value
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const id = asString(row.id);
|
|
const filingId = asNumber(row.filingId ?? row.filing_id);
|
|
const accessionNumber = asString(
|
|
row.accessionNumber ?? row.accession_number,
|
|
);
|
|
const filingDate = asString(row.filingDate ?? row.filing_date);
|
|
const filingType =
|
|
row.filingType === "10-K" || row.filing_type === "10-K"
|
|
? "10-K"
|
|
: row.filingType === "10-Q" || row.filing_type === "10-Q"
|
|
? "10-Q"
|
|
: null;
|
|
const periodLabel = asString(row.periodLabel ?? row.period_label);
|
|
|
|
if (
|
|
!id ||
|
|
filingId === null ||
|
|
!accessionNumber ||
|
|
!filingDate ||
|
|
!filingType ||
|
|
!periodLabel
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
id,
|
|
filingId,
|
|
accessionNumber,
|
|
filingDate,
|
|
periodStart: asNullableString(row.periodStart ?? row.period_start),
|
|
periodEnd: asNullableString(row.periodEnd ?? row.period_end),
|
|
filingType,
|
|
periodLabel,
|
|
} satisfies FilingTaxonomyPeriod;
|
|
})
|
|
.filter((entry): entry is FilingTaxonomyPeriod => entry !== null);
|
|
}
|
|
|
|
function normalizeStatementRows(
|
|
value: unknown,
|
|
fallbackRows: StatementRowMap = emptyStatementRows(),
|
|
): StatementRowMap {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return fallbackRows;
|
|
}
|
|
|
|
const normalized = emptyStatementRows();
|
|
for (const statement of FINANCIAL_STATEMENT_KINDS) {
|
|
const rows = Array.isArray(object[statement]) ? object[statement] : [];
|
|
normalized[statement] = rows
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key =
|
|
asString(row.key) ?? asString(row.conceptKey ?? row.concept_key);
|
|
const label = asString(row.label);
|
|
const conceptKey = asString(row.conceptKey ?? row.concept_key);
|
|
const qname = asString(row.qname);
|
|
const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri);
|
|
const localName = asString(row.localName ?? row.local_name);
|
|
if (
|
|
!key ||
|
|
!label ||
|
|
!conceptKey ||
|
|
!qname ||
|
|
!namespaceUri ||
|
|
!localName
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
key,
|
|
label,
|
|
conceptKey,
|
|
qname,
|
|
namespaceUri,
|
|
localName,
|
|
isExtension: asBoolean(row.isExtension ?? row.is_extension),
|
|
statement: asStatementKind(row.statement) ?? statement,
|
|
roleUri: asNullableString(row.roleUri ?? row.role_uri),
|
|
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
|
|
depth: asNumber(row.depth) ?? 0,
|
|
parentKey: asNullableString(row.parentKey ?? row.parent_key),
|
|
values: normalizeNumberMap(row.values),
|
|
units: normalizeNullableStringMap(row.units),
|
|
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
|
|
sourceFactIds: normalizeNumberArray(
|
|
row.sourceFactIds ?? row.source_fact_ids,
|
|
),
|
|
};
|
|
})
|
|
.filter((entry): entry is TaxonomyStatementRow => entry !== null);
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeSurfaceRows(
|
|
value: unknown,
|
|
fallbackRows: SurfaceRowMap = emptySurfaceRows(),
|
|
): SurfaceRowMap {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return fallbackRows;
|
|
}
|
|
|
|
const normalized = emptySurfaceRows();
|
|
for (const statement of FINANCIAL_STATEMENT_KINDS) {
|
|
const rows = Array.isArray(object[statement]) ? object[statement] : [];
|
|
normalized[statement] = rows
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key = asString(row.key);
|
|
const label = asString(row.label);
|
|
const category = asString(row.category);
|
|
const unit = asString(row.unit);
|
|
if (!key || !label || !category || !unit) {
|
|
return null;
|
|
}
|
|
|
|
const normalizedStatement = asStatementKind(row.statement);
|
|
const resolutionMethod = row.resolutionMethod ?? row.resolution_method;
|
|
const confidence = row.confidence;
|
|
const normalizedRow: SurfaceFinancialRow = {
|
|
key,
|
|
label,
|
|
category: category as SurfaceFinancialRow["category"],
|
|
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
|
|
unit: unit as SurfaceFinancialRow["unit"],
|
|
values: normalizeNumberMap(row.values),
|
|
sourceConcepts: normalizeStringArray(
|
|
row.sourceConcepts ?? row.source_concepts,
|
|
),
|
|
sourceRowKeys: normalizeStringArray(
|
|
row.sourceRowKeys ?? row.source_row_keys,
|
|
),
|
|
sourceFactIds: normalizeNumberArray(
|
|
row.sourceFactIds ?? row.source_fact_ids,
|
|
),
|
|
formulaKey: asNullableString(row.formulaKey ?? row.formula_key),
|
|
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
|
|
resolvedSourceRowKeys: normalizeNullableStringMap(
|
|
row.resolvedSourceRowKeys ?? row.resolved_source_row_keys,
|
|
),
|
|
};
|
|
|
|
const templateSection = asString(
|
|
row.templateSection ?? row.template_section,
|
|
);
|
|
if (templateSection) {
|
|
normalizedRow.templateSection =
|
|
templateSection as SurfaceFinancialRow["templateSection"];
|
|
}
|
|
if (
|
|
normalizedStatement === "income" ||
|
|
normalizedStatement === "balance" ||
|
|
normalizedStatement === "cash_flow" ||
|
|
normalizedStatement === "equity" ||
|
|
normalizedStatement === "disclosure"
|
|
) {
|
|
normalizedRow.statement = normalizedStatement;
|
|
}
|
|
|
|
const detailCount = asNumber(row.detailCount ?? row.detail_count);
|
|
if (detailCount !== null) {
|
|
normalizedRow.detailCount = detailCount;
|
|
}
|
|
|
|
if (
|
|
resolutionMethod === "direct" ||
|
|
resolutionMethod === "surface_bridge" ||
|
|
resolutionMethod === "formula_derived" ||
|
|
resolutionMethod === "not_meaningful"
|
|
) {
|
|
normalizedRow.resolutionMethod = resolutionMethod;
|
|
}
|
|
|
|
if (
|
|
confidence === "high" ||
|
|
confidence === "medium" ||
|
|
confidence === "low"
|
|
) {
|
|
normalizedRow.confidence = confidence;
|
|
}
|
|
|
|
const warningCodes = normalizeStringArray(
|
|
row.warningCodes ?? row.warning_codes,
|
|
);
|
|
if (warningCodes.length > 0) {
|
|
normalizedRow.warningCodes = warningCodes;
|
|
}
|
|
|
|
return normalizedRow;
|
|
})
|
|
.filter((entry): entry is SurfaceFinancialRow => entry !== null);
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeDetailRows(
|
|
value: unknown,
|
|
fallbackRows: DetailRowMap = emptyDetailRows(),
|
|
): DetailRowMap {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return fallbackRows;
|
|
}
|
|
|
|
const normalized = emptyDetailRows();
|
|
for (const statement of FINANCIAL_STATEMENT_KINDS) {
|
|
const groups = asObject(object[statement]) ?? {};
|
|
normalized[statement] = Object.fromEntries(
|
|
Object.entries(groups).map(([surfaceKey, rows]) => {
|
|
const normalizedRows = Array.isArray(rows)
|
|
? rows
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key =
|
|
asString(row.key) ??
|
|
asString(row.conceptKey ?? row.concept_key);
|
|
const label = asString(row.label);
|
|
const conceptKey = asString(row.conceptKey ?? row.concept_key);
|
|
const qname = asString(row.qname);
|
|
const namespaceUri = asString(
|
|
row.namespaceUri ?? row.namespace_uri,
|
|
);
|
|
const localName = asString(row.localName ?? row.local_name);
|
|
if (
|
|
!key ||
|
|
!label ||
|
|
!conceptKey ||
|
|
!qname ||
|
|
!namespaceUri ||
|
|
!localName
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
key,
|
|
parentSurfaceKey:
|
|
asString(row.parentSurfaceKey ?? row.parent_surface_key) ??
|
|
surfaceKey,
|
|
label,
|
|
conceptKey,
|
|
qname,
|
|
namespaceUri,
|
|
localName,
|
|
unit: asNullableString(row.unit),
|
|
values: normalizeNumberMap(row.values),
|
|
sourceFactIds: normalizeNumberArray(
|
|
row.sourceFactIds ?? row.source_fact_ids,
|
|
),
|
|
isExtension: asBoolean(row.isExtension ?? row.is_extension),
|
|
dimensionsSummary: normalizeStringArray(
|
|
row.dimensionsSummary ?? row.dimensions_summary,
|
|
),
|
|
residualFlag: asBoolean(
|
|
row.residualFlag ?? row.residual_flag,
|
|
),
|
|
};
|
|
})
|
|
.filter((entry): entry is DetailFinancialRow => entry !== null)
|
|
: [];
|
|
|
|
return [surfaceKey, normalizedRows];
|
|
}),
|
|
);
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeKpiRows(value: unknown) {
|
|
if (!Array.isArray(value)) {
|
|
return [];
|
|
}
|
|
|
|
return value
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key = asString(row.key);
|
|
const label = asString(row.label);
|
|
const category = asString(row.category);
|
|
const unit = asString(row.unit);
|
|
const provenanceType = row.provenanceType ?? row.provenance_type;
|
|
if (
|
|
!key ||
|
|
!label ||
|
|
!category ||
|
|
!unit ||
|
|
(provenanceType !== "taxonomy" && provenanceType !== "structured_note")
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
key,
|
|
label,
|
|
category: category as StructuredKpiRow["category"],
|
|
unit: unit as StructuredKpiRow["unit"],
|
|
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
|
|
segment: asNullableString(row.segment),
|
|
axis: asNullableString(row.axis),
|
|
member: asNullableString(row.member),
|
|
values: normalizeNumberMap(row.values),
|
|
sourceConcepts: normalizeStringArray(
|
|
row.sourceConcepts ?? row.source_concepts,
|
|
),
|
|
sourceFactIds: normalizeNumberArray(
|
|
row.sourceFactIds ?? row.source_fact_ids,
|
|
),
|
|
provenanceType,
|
|
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
|
|
} satisfies StructuredKpiRow;
|
|
})
|
|
.filter((entry): entry is StructuredKpiRow => entry !== null);
|
|
}
|
|
|
|
function normalizeComputedDefinitions(value: unknown): ComputedDefinition[] {
|
|
if (!Array.isArray(value)) {
|
|
return [];
|
|
}
|
|
|
|
return value
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key = asString(row.key);
|
|
const label = asString(row.label);
|
|
const category = asString(row.category);
|
|
const unit = asString(row.unit);
|
|
const computation = asObject(row.computation);
|
|
const computationType = asString(computation?.type);
|
|
if (
|
|
!key ||
|
|
!label ||
|
|
!category ||
|
|
!unit ||
|
|
!computation ||
|
|
!computationType
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
const normalizedComputation = (() => {
|
|
if (computationType === "ratio") {
|
|
const numerator = asString(computation.numerator);
|
|
const denominator = asString(computation.denominator);
|
|
return numerator && denominator
|
|
? ({ type: "ratio", numerator, denominator } as const)
|
|
: null;
|
|
}
|
|
|
|
if (computationType === "yoy_growth") {
|
|
const source = asString(computation.source);
|
|
return source ? ({ type: "yoy_growth", source } as const) : null;
|
|
}
|
|
|
|
if (computationType === "cagr") {
|
|
const source = asString(computation.source);
|
|
const years = asNumber(computation.years);
|
|
return source && years !== null
|
|
? ({ type: "cagr", source, years } as const)
|
|
: null;
|
|
}
|
|
|
|
if (computationType === "per_share") {
|
|
const source = asString(computation.source);
|
|
const shares_key = asString(
|
|
computation.shares_key ?? computation.sharesKey,
|
|
);
|
|
return source && shares_key
|
|
? ({ type: "per_share", source, shares_key } as const)
|
|
: null;
|
|
}
|
|
|
|
if (computationType === "simple") {
|
|
const formula = asString(computation.formula);
|
|
return formula ? ({ type: "simple", formula } as const) : null;
|
|
}
|
|
|
|
return null;
|
|
})();
|
|
|
|
if (!normalizedComputation) {
|
|
return null;
|
|
}
|
|
|
|
const normalizedDefinition: ComputedDefinition = {
|
|
key,
|
|
label,
|
|
category,
|
|
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
|
|
unit: unit as ComputedDefinition["unit"],
|
|
computation: normalizedComputation,
|
|
supported_cadences: normalizeStringArray(
|
|
row.supported_cadences ?? row.supportedCadences,
|
|
) as ComputedDefinition["supported_cadences"],
|
|
requires_external_data: normalizeStringArray(
|
|
row.requires_external_data ?? row.requiresExternalData,
|
|
),
|
|
};
|
|
|
|
return normalizedDefinition;
|
|
})
|
|
.filter((entry): entry is ComputedDefinition => entry !== null);
|
|
}
|
|
|
|
function normalizeNormalizationSummary(value: unknown) {
|
|
const row = asObject(value);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
surfaceRowCount:
|
|
asNumber(row.surfaceRowCount ?? row.surface_row_count) ?? 0,
|
|
detailRowCount: asNumber(row.detailRowCount ?? row.detail_row_count) ?? 0,
|
|
kpiRowCount: asNumber(row.kpiRowCount ?? row.kpi_row_count) ?? 0,
|
|
unmappedRowCount:
|
|
asNumber(row.unmappedRowCount ?? row.unmapped_row_count) ?? 0,
|
|
materialUnmappedRowCount:
|
|
asNumber(
|
|
row.materialUnmappedRowCount ?? row.material_unmapped_row_count,
|
|
) ?? 0,
|
|
residualPrimaryCount:
|
|
asNumber(row.residualPrimaryCount ?? row.residual_primary_count) ?? 0,
|
|
residualDisclosureCount:
|
|
asNumber(row.residualDisclosureCount ?? row.residual_disclosure_count) ??
|
|
0,
|
|
unsupportedConceptCount:
|
|
asNumber(row.unsupportedConceptCount ?? row.unsupported_concept_count) ??
|
|
0,
|
|
issuerOverlayMatchCount:
|
|
asNumber(row.issuerOverlayMatchCount ?? row.issuer_overlay_match_count) ??
|
|
0,
|
|
warnings: normalizeStringArray(row.warnings),
|
|
} satisfies NormalizationSummary;
|
|
}
|
|
|
|
export function normalizeFilingTaxonomySnapshotPayload(input: {
|
|
periods: unknown;
|
|
faithful_rows: unknown;
|
|
statement_rows: unknown;
|
|
surface_rows: unknown;
|
|
detail_rows: unknown;
|
|
kpi_rows: unknown;
|
|
computed_definitions: unknown;
|
|
normalization_summary: unknown;
|
|
}) {
|
|
const faithfulRows = normalizeStatementRows(input.faithful_rows);
|
|
const statementRows = normalizeStatementRows(
|
|
input.statement_rows,
|
|
faithfulRows,
|
|
);
|
|
|
|
return {
|
|
periods: normalizePeriods(input.periods),
|
|
faithful_rows: faithfulRows,
|
|
statement_rows: statementRows,
|
|
surface_rows: normalizeSurfaceRows(input.surface_rows),
|
|
detail_rows: normalizeDetailRows(input.detail_rows),
|
|
kpi_rows: normalizeKpiRows(input.kpi_rows),
|
|
computed_definitions: normalizeComputedDefinitions(
|
|
input.computed_definitions,
|
|
),
|
|
normalization_summary: normalizeNormalizationSummary(
|
|
input.normalization_summary,
|
|
),
|
|
};
|
|
}
|
|
|
|
function emptyStatementRows(): StatementRowMap {
|
|
return {
|
|
income: [],
|
|
balance: [],
|
|
cash_flow: [],
|
|
disclosure: [],
|
|
equity: [],
|
|
comprehensive_income: [],
|
|
};
|
|
}
|
|
|
|
function emptySurfaceRows(): SurfaceRowMap {
|
|
return {
|
|
income: [],
|
|
balance: [],
|
|
cash_flow: [],
|
|
disclosure: [],
|
|
equity: [],
|
|
comprehensive_income: [],
|
|
};
|
|
}
|
|
|
|
function emptyDetailRows(): DetailRowMap {
|
|
return {
|
|
income: {},
|
|
balance: {},
|
|
cash_flow: {},
|
|
disclosure: {},
|
|
equity: {},
|
|
comprehensive_income: {},
|
|
};
|
|
}
|
|
|
|
function toSnapshotRecord(
|
|
row: typeof filingTaxonomySnapshot.$inferSelect,
|
|
): FilingTaxonomySnapshotRecord {
|
|
const normalized = normalizeFilingTaxonomySnapshotPayload({
|
|
periods: row.periods,
|
|
faithful_rows: row.faithful_rows,
|
|
statement_rows: row.statement_rows,
|
|
surface_rows: row.surface_rows,
|
|
detail_rows: row.detail_rows,
|
|
kpi_rows: row.kpi_rows,
|
|
computed_definitions: row.computed_definitions,
|
|
normalization_summary: row.normalization_summary,
|
|
});
|
|
|
|
return {
|
|
id: row.id,
|
|
filing_id: row.filing_id,
|
|
ticker: row.ticker,
|
|
filing_date: row.filing_date,
|
|
filing_type: row.filing_type,
|
|
parse_status: row.parse_status,
|
|
parse_error: row.parse_error,
|
|
source: row.source,
|
|
parser_engine: row.parser_engine,
|
|
parser_version: row.parser_version,
|
|
taxonomy_regime: row.taxonomy_regime,
|
|
fiscal_pack: row.fiscal_pack,
|
|
periods: normalized.periods,
|
|
faithful_rows: normalized.faithful_rows,
|
|
statement_rows: normalized.statement_rows,
|
|
surface_rows: normalized.surface_rows,
|
|
detail_rows: normalized.detail_rows,
|
|
kpi_rows: normalized.kpi_rows,
|
|
computed_definitions: normalized.computed_definitions,
|
|
derived_metrics: row.derived_metrics ?? null,
|
|
validation_result: row.validation_result ?? null,
|
|
normalization_summary: normalized.normalization_summary,
|
|
issuer_overlay_revision_id: row.issuer_overlay_revision_id ?? null,
|
|
facts_count: row.facts_count,
|
|
concepts_count: row.concepts_count,
|
|
dimensions_count: row.dimensions_count,
|
|
created_at: row.created_at,
|
|
updated_at: row.updated_at,
|
|
};
|
|
}
|
|
|
|
function toContextRecord(
|
|
row: typeof filingTaxonomyContext.$inferSelect,
|
|
): FilingTaxonomyContextRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
context_id: row.context_id,
|
|
entity_identifier: row.entity_identifier,
|
|
entity_scheme: row.entity_scheme,
|
|
period_start: row.period_start,
|
|
period_end: row.period_end,
|
|
period_instant: row.period_instant,
|
|
segment_json: row.segment_json ?? null,
|
|
scenario_json: row.scenario_json ?? null,
|
|
created_at: row.created_at,
|
|
};
|
|
}
|
|
|
|
function toAssetRecord(
|
|
row: typeof filingTaxonomyAsset.$inferSelect,
|
|
): FilingTaxonomyAssetRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
asset_type: row.asset_type,
|
|
name: row.name,
|
|
url: row.url,
|
|
size_bytes: row.size_bytes,
|
|
score: asNumber(row.score),
|
|
is_selected: row.is_selected,
|
|
created_at: row.created_at,
|
|
};
|
|
}
|
|
|
|
function toConceptRecord(
|
|
row: typeof filingTaxonomyConcept.$inferSelect,
|
|
): FilingTaxonomyConceptRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
concept_key: row.concept_key,
|
|
qname: row.qname,
|
|
namespace_uri: row.namespace_uri,
|
|
local_name: row.local_name,
|
|
label: row.label,
|
|
is_extension: row.is_extension,
|
|
balance: row.balance,
|
|
period_type: row.period_type,
|
|
data_type: row.data_type,
|
|
statement_kind: row.statement_kind ?? null,
|
|
role_uri: row.role_uri,
|
|
authoritative_concept_key: row.authoritative_concept_key,
|
|
mapping_method: row.mapping_method,
|
|
surface_key: row.surface_key,
|
|
detail_parent_surface_key: row.detail_parent_surface_key,
|
|
kpi_key: row.kpi_key,
|
|
residual_flag: row.residual_flag,
|
|
presentation_order: asNumber(row.presentation_order),
|
|
presentation_depth: row.presentation_depth,
|
|
parent_concept_key: row.parent_concept_key,
|
|
is_abstract: row.is_abstract,
|
|
created_at: row.created_at,
|
|
};
|
|
}
|
|
|
|
function toFactRecord(
|
|
row: typeof filingTaxonomyFact.$inferSelect,
|
|
): FilingTaxonomyFactRecord {
|
|
const value = asNumber(row.value_num);
|
|
if (value === null) {
|
|
throw new Error(`Invalid value_num for taxonomy fact row ${row.id}`);
|
|
}
|
|
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
concept_key: row.concept_key,
|
|
qname: row.qname,
|
|
namespace_uri: row.namespace_uri,
|
|
local_name: row.local_name,
|
|
data_type: row.data_type,
|
|
statement_kind: row.statement_kind ?? null,
|
|
role_uri: row.role_uri,
|
|
authoritative_concept_key: row.authoritative_concept_key,
|
|
mapping_method: row.mapping_method,
|
|
surface_key: row.surface_key,
|
|
detail_parent_surface_key: row.detail_parent_surface_key,
|
|
kpi_key: row.kpi_key,
|
|
residual_flag: row.residual_flag,
|
|
context_id: row.context_id,
|
|
unit: row.unit,
|
|
decimals: row.decimals,
|
|
precision: row.precision,
|
|
nil: row.nil,
|
|
value_num: value,
|
|
period_start: row.period_start,
|
|
period_end: row.period_end,
|
|
period_instant: row.period_instant,
|
|
dimensions: row.dimensions,
|
|
is_dimensionless: row.is_dimensionless,
|
|
source_file: row.source_file,
|
|
created_at: row.created_at,
|
|
};
|
|
}
|
|
|
|
function toMetricValidationRecord(
|
|
row: typeof filingTaxonomyMetricValidation.$inferSelect,
|
|
): FilingTaxonomyMetricValidationRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
metric_key: row.metric_key,
|
|
taxonomy_value: asNumber(row.taxonomy_value),
|
|
llm_value: asNumber(row.llm_value),
|
|
absolute_diff: asNumber(row.absolute_diff),
|
|
relative_diff: asNumber(row.relative_diff),
|
|
status: row.status,
|
|
evidence_pages: row.evidence_pages ?? [],
|
|
pdf_url: row.pdf_url,
|
|
provider: row.provider,
|
|
model: row.model,
|
|
error: row.error,
|
|
created_at: row.created_at,
|
|
updated_at: row.updated_at,
|
|
};
|
|
}
|
|
|
|
export async function getFilingTaxonomySnapshotByFilingId(filingId: number) {
|
|
const [row] = await db
|
|
.select()
|
|
.from(filingTaxonomySnapshot)
|
|
.where(eq(filingTaxonomySnapshot.filing_id, filingId))
|
|
.limit(1);
|
|
|
|
return row ? toSnapshotRecord(row) : null;
|
|
}
|
|
|
|
async function listFilingTaxonomyAssets(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyAsset)
|
|
.where(eq(filingTaxonomyAsset.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyAsset.id));
|
|
|
|
return rows.map(toAssetRecord);
|
|
}
|
|
|
|
async function listFilingTaxonomyContexts(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyContext)
|
|
.where(eq(filingTaxonomyContext.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyContext.id));
|
|
|
|
return rows.map(toContextRecord);
|
|
}
|
|
|
|
async function listFilingTaxonomyConcepts(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyConcept)
|
|
.where(eq(filingTaxonomyConcept.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyConcept.id));
|
|
|
|
return rows.map(toConceptRecord);
|
|
}
|
|
|
|
async function listFilingTaxonomyFacts(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyFact)
|
|
.where(eq(filingTaxonomyFact.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyFact.id));
|
|
|
|
return rows.map(toFactRecord);
|
|
}
|
|
|
|
async function listFilingTaxonomyMetricValidations(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyMetricValidation)
|
|
.where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyMetricValidation.id));
|
|
|
|
return rows.map(toMetricValidationRecord);
|
|
}
|
|
|
|
export async function upsertFilingTaxonomySnapshot(
|
|
input: UpsertFilingTaxonomySnapshotInput,
|
|
) {
|
|
const now = new Date().toISOString();
|
|
const normalized = normalizeFilingTaxonomySnapshotPayload(input);
|
|
|
|
return db.transaction(async (tx) => {
|
|
const [saved] = await tx
|
|
.insert(filingTaxonomySnapshot)
|
|
.values({
|
|
filing_id: input.filing_id,
|
|
ticker: input.ticker,
|
|
filing_date: input.filing_date,
|
|
filing_type: input.filing_type,
|
|
parse_status: input.parse_status,
|
|
parse_error: input.parse_error,
|
|
source: input.source,
|
|
parser_engine: input.parser_engine,
|
|
parser_version: input.parser_version,
|
|
taxonomy_regime: input.taxonomy_regime,
|
|
fiscal_pack: input.fiscal_pack,
|
|
periods: normalized.periods,
|
|
faithful_rows: normalized.faithful_rows,
|
|
statement_rows: normalized.statement_rows,
|
|
surface_rows: normalized.surface_rows,
|
|
detail_rows: normalized.detail_rows,
|
|
kpi_rows: normalized.kpi_rows,
|
|
computed_definitions: normalized.computed_definitions,
|
|
derived_metrics: input.derived_metrics,
|
|
validation_result: input.validation_result,
|
|
normalization_summary: normalized.normalization_summary,
|
|
issuer_overlay_revision_id: input.issuer_overlay_revision_id ?? null,
|
|
facts_count: input.facts_count,
|
|
concepts_count: input.concepts_count,
|
|
dimensions_count: input.dimensions_count,
|
|
created_at: now,
|
|
updated_at: now,
|
|
})
|
|
.onConflictDoUpdate({
|
|
target: filingTaxonomySnapshot.filing_id,
|
|
set: {
|
|
ticker: input.ticker,
|
|
filing_date: input.filing_date,
|
|
filing_type: input.filing_type,
|
|
parse_status: input.parse_status,
|
|
parse_error: input.parse_error,
|
|
source: input.source,
|
|
parser_engine: input.parser_engine,
|
|
parser_version: input.parser_version,
|
|
taxonomy_regime: input.taxonomy_regime,
|
|
fiscal_pack: input.fiscal_pack,
|
|
periods: normalized.periods,
|
|
faithful_rows: normalized.faithful_rows,
|
|
statement_rows: normalized.statement_rows,
|
|
surface_rows: normalized.surface_rows,
|
|
detail_rows: normalized.detail_rows,
|
|
kpi_rows: normalized.kpi_rows,
|
|
computed_definitions: normalized.computed_definitions,
|
|
derived_metrics: input.derived_metrics,
|
|
validation_result: input.validation_result,
|
|
normalization_summary: normalized.normalization_summary,
|
|
issuer_overlay_revision_id: input.issuer_overlay_revision_id ?? null,
|
|
facts_count: input.facts_count,
|
|
concepts_count: input.concepts_count,
|
|
dimensions_count: input.dimensions_count,
|
|
updated_at: now,
|
|
},
|
|
})
|
|
.returning();
|
|
|
|
const snapshotId = saved.id;
|
|
|
|
try {
|
|
await tx
|
|
.delete(filingTaxonomyAsset)
|
|
.where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
|
|
await tx
|
|
.delete(filingTaxonomyContext)
|
|
.where(eq(filingTaxonomyContext.snapshot_id, snapshotId));
|
|
await tx
|
|
.delete(filingTaxonomyConcept)
|
|
.where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
|
|
await tx
|
|
.delete(filingTaxonomyFact)
|
|
.where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
|
|
await tx
|
|
.delete(filingTaxonomyMetricValidation)
|
|
.where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
|
|
} catch (error) {
|
|
throw new Error(
|
|
`Failed to delete child records for snapshot ${snapshotId}: ${error}`,
|
|
);
|
|
}
|
|
|
|
if (input.contexts.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyContext).values(
|
|
input.contexts.map((context) => ({
|
|
snapshot_id: snapshotId,
|
|
context_id: context.context_id,
|
|
entity_identifier: context.entity_identifier,
|
|
entity_scheme: context.entity_scheme,
|
|
period_start: context.period_start,
|
|
period_end: context.period_end,
|
|
period_instant: context.period_instant,
|
|
segment_json: context.segment_json,
|
|
scenario_json: context.scenario_json,
|
|
created_at: now,
|
|
})),
|
|
);
|
|
} catch (error) {
|
|
throw new Error(
|
|
`Failed to insert ${input.contexts.length} contexts for snapshot ${snapshotId}: ${error}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
if (input.assets.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyAsset).values(
|
|
input.assets.map((asset) => ({
|
|
snapshot_id: snapshotId,
|
|
asset_type: asset.asset_type,
|
|
name: asset.name,
|
|
url: asset.url,
|
|
size_bytes: asset.size_bytes,
|
|
score: asNumericText(asset.score),
|
|
is_selected: asset.is_selected,
|
|
created_at: now,
|
|
})),
|
|
);
|
|
} catch (error) {
|
|
throw new Error(
|
|
`Failed to insert ${input.assets.length} assets for snapshot ${snapshotId}: ${error}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
if (input.concepts.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyConcept).values(
|
|
input.concepts.map((concept) => ({
|
|
snapshot_id: snapshotId,
|
|
concept_key: concept.concept_key,
|
|
qname: concept.qname,
|
|
namespace_uri: concept.namespace_uri,
|
|
local_name: concept.local_name,
|
|
label: concept.label,
|
|
is_extension: concept.is_extension,
|
|
balance: concept.balance,
|
|
period_type: concept.period_type,
|
|
data_type: concept.data_type,
|
|
statement_kind: concept.statement_kind,
|
|
role_uri: concept.role_uri,
|
|
authoritative_concept_key: concept.authoritative_concept_key,
|
|
mapping_method: concept.mapping_method,
|
|
surface_key: concept.surface_key,
|
|
detail_parent_surface_key: concept.detail_parent_surface_key,
|
|
kpi_key: concept.kpi_key,
|
|
residual_flag: concept.residual_flag,
|
|
presentation_order: asNumericText(concept.presentation_order),
|
|
presentation_depth: concept.presentation_depth,
|
|
parent_concept_key: concept.parent_concept_key,
|
|
is_abstract: concept.is_abstract,
|
|
created_at: now,
|
|
})),
|
|
);
|
|
} catch (error) {
|
|
throw new Error(
|
|
`Failed to insert ${input.concepts.length} concepts for snapshot ${snapshotId}: ${error}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
if (input.facts.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyFact).values(
|
|
input.facts.map((fact) => ({
|
|
snapshot_id: snapshotId,
|
|
concept_key: fact.concept_key,
|
|
qname: fact.qname,
|
|
namespace_uri: fact.namespace_uri,
|
|
local_name: fact.local_name,
|
|
data_type: fact.data_type,
|
|
statement_kind: fact.statement_kind,
|
|
role_uri: fact.role_uri,
|
|
authoritative_concept_key: fact.authoritative_concept_key,
|
|
mapping_method: fact.mapping_method,
|
|
surface_key: fact.surface_key,
|
|
detail_parent_surface_key: fact.detail_parent_surface_key,
|
|
kpi_key: fact.kpi_key,
|
|
residual_flag: fact.residual_flag,
|
|
context_id: fact.context_id,
|
|
unit: fact.unit,
|
|
decimals: fact.decimals,
|
|
precision: fact.precision,
|
|
nil: fact.nil,
|
|
value_num: String(fact.value_num),
|
|
period_start: fact.period_start,
|
|
period_end: fact.period_end,
|
|
period_instant: fact.period_instant,
|
|
dimensions: fact.dimensions,
|
|
is_dimensionless: fact.is_dimensionless,
|
|
source_file: fact.source_file,
|
|
created_at: now,
|
|
})),
|
|
);
|
|
} catch (error) {
|
|
throw new Error(
|
|
`Failed to insert ${input.facts.length} facts for snapshot ${snapshotId}: ${error}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
if (input.metric_validations.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyMetricValidation).values(
|
|
input.metric_validations.map((check) => ({
|
|
snapshot_id: snapshotId,
|
|
metric_key: check.metric_key,
|
|
taxonomy_value: asNumericText(check.taxonomy_value),
|
|
llm_value: asNumericText(check.llm_value),
|
|
absolute_diff: asNumericText(check.absolute_diff),
|
|
relative_diff: asNumericText(check.relative_diff),
|
|
status: check.status,
|
|
evidence_pages: check.evidence_pages,
|
|
pdf_url: check.pdf_url,
|
|
provider: check.provider,
|
|
model: check.model,
|
|
error: check.error,
|
|
created_at: now,
|
|
updated_at: now,
|
|
})),
|
|
);
|
|
} catch (error) {
|
|
throw new Error(
|
|
`Failed to insert ${input.metric_validations.length} metric validations for snapshot ${snapshotId}: ${error}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
return toSnapshotRecord(saved);
|
|
});
|
|
}
|
|
|
|
export async function listFilingTaxonomySnapshotsByTicker(input: {
|
|
ticker: string;
|
|
window: "10y" | "all";
|
|
filingTypes?: Array<"10-K" | "10-Q">;
|
|
limit?: number;
|
|
cursor?: string | null;
|
|
}) {
|
|
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 40), 1), 120);
|
|
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
|
|
const constraints = [
|
|
eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase()),
|
|
];
|
|
|
|
if (input.window === "10y") {
|
|
constraints.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
|
|
}
|
|
|
|
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
|
|
constraints.push(lt(filingTaxonomySnapshot.id, cursorId));
|
|
}
|
|
|
|
if (input.filingTypes && input.filingTypes.length > 0) {
|
|
constraints.push(
|
|
inArray(filingTaxonomySnapshot.filing_type, input.filingTypes),
|
|
);
|
|
}
|
|
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomySnapshot)
|
|
.where(and(...constraints))
|
|
.orderBy(
|
|
desc(filingTaxonomySnapshot.filing_date),
|
|
desc(filingTaxonomySnapshot.id),
|
|
)
|
|
.limit(safeLimit + 1);
|
|
|
|
const hasMore = rows.length > safeLimit;
|
|
const usedRows = hasMore ? rows.slice(0, safeLimit) : rows;
|
|
const nextCursor = hasMore
|
|
? String(usedRows[usedRows.length - 1]?.id ?? "")
|
|
: null;
|
|
|
|
return {
|
|
snapshots: usedRows.map(toSnapshotRecord),
|
|
nextCursor,
|
|
};
|
|
}
|
|
|
|
export async function countFilingTaxonomySnapshotStatuses(ticker: string) {
|
|
const rows = await db
|
|
.select({
|
|
status: filingTaxonomySnapshot.parse_status,
|
|
count: sql<string>`count(*)`,
|
|
})
|
|
.from(filingTaxonomySnapshot)
|
|
.where(eq(filingTaxonomySnapshot.ticker, ticker.trim().toUpperCase()))
|
|
.groupBy(filingTaxonomySnapshot.parse_status);
|
|
|
|
return rows.reduce<Record<FilingTaxonomyParseStatus, number>>(
|
|
(acc, row) => {
|
|
acc[row.status] = Number(row.count);
|
|
return acc;
|
|
},
|
|
{
|
|
ready: 0,
|
|
partial: 0,
|
|
failed: 0,
|
|
},
|
|
);
|
|
}
|
|
|
|
export async function listTaxonomyFactsByTicker(input: {
|
|
ticker: string;
|
|
window: "10y" | "all";
|
|
statement?: FinancialStatementKind;
|
|
filingTypes?: Array<"10-K" | "10-Q">;
|
|
cursor?: string | null;
|
|
limit?: number;
|
|
}) {
|
|
const safeLimit = Math.min(
|
|
Math.max(Math.trunc(input.limit ?? 500), 1),
|
|
10000,
|
|
);
|
|
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
|
|
const conditions = [
|
|
eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase()),
|
|
];
|
|
|
|
if (input.window === "10y") {
|
|
conditions.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
|
|
}
|
|
|
|
if (input.statement) {
|
|
conditions.push(eq(filingTaxonomyFact.statement_kind, input.statement));
|
|
}
|
|
|
|
if (input.filingTypes && input.filingTypes.length > 0) {
|
|
conditions.push(
|
|
inArray(filingTaxonomySnapshot.filing_type, input.filingTypes),
|
|
);
|
|
}
|
|
|
|
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
|
|
conditions.push(lt(filingTaxonomyFact.id, cursorId));
|
|
}
|
|
|
|
const rows = await db
|
|
.select({
|
|
id: filingTaxonomyFact.id,
|
|
snapshot_id: filingTaxonomyFact.snapshot_id,
|
|
filing_id: filingTaxonomySnapshot.filing_id,
|
|
filing_date: filingTaxonomySnapshot.filing_date,
|
|
statement_kind: filingTaxonomyFact.statement_kind,
|
|
role_uri: filingTaxonomyFact.role_uri,
|
|
concept_key: filingTaxonomyFact.concept_key,
|
|
qname: filingTaxonomyFact.qname,
|
|
namespace_uri: filingTaxonomyFact.namespace_uri,
|
|
local_name: filingTaxonomyFact.local_name,
|
|
value_num: filingTaxonomyFact.value_num,
|
|
context_id: filingTaxonomyFact.context_id,
|
|
unit: filingTaxonomyFact.unit,
|
|
decimals: filingTaxonomyFact.decimals,
|
|
period_start: filingTaxonomyFact.period_start,
|
|
period_end: filingTaxonomyFact.period_end,
|
|
period_instant: filingTaxonomyFact.period_instant,
|
|
dimensions: filingTaxonomyFact.dimensions,
|
|
is_dimensionless: filingTaxonomyFact.is_dimensionless,
|
|
source_file: filingTaxonomyFact.source_file,
|
|
})
|
|
.from(filingTaxonomyFact)
|
|
.innerJoin(
|
|
filingTaxonomySnapshot,
|
|
eq(filingTaxonomyFact.snapshot_id, filingTaxonomySnapshot.id),
|
|
)
|
|
.where(and(...conditions))
|
|
.orderBy(desc(filingTaxonomyFact.id))
|
|
.limit(safeLimit + 1);
|
|
|
|
const hasMore = rows.length > safeLimit;
|
|
const used = hasMore ? rows.slice(0, safeLimit) : rows;
|
|
const nextCursor = hasMore ? String(used[used.length - 1]?.id ?? "") : null;
|
|
|
|
const facts: TaxonomyFactRow[] = used.map((row) => {
|
|
const value = asNumber(row.value_num);
|
|
if (value === null) {
|
|
throw new Error(`Invalid value_num in taxonomy fact ${row.id}`);
|
|
}
|
|
|
|
return {
|
|
id: row.id,
|
|
snapshotId: row.snapshot_id,
|
|
filingId: row.filing_id,
|
|
filingDate: row.filing_date,
|
|
statement: row.statement_kind,
|
|
roleUri: row.role_uri,
|
|
conceptKey: row.concept_key,
|
|
qname: row.qname,
|
|
namespaceUri: row.namespace_uri,
|
|
localName: row.local_name,
|
|
value,
|
|
contextId: row.context_id,
|
|
unit: row.unit,
|
|
decimals: row.decimals,
|
|
periodStart: row.period_start,
|
|
periodEnd: row.period_end,
|
|
periodInstant: row.period_instant,
|
|
dimensions: row.dimensions,
|
|
isDimensionless: row.is_dimensionless,
|
|
sourceFile: row.source_file,
|
|
};
|
|
});
|
|
|
|
return {
|
|
facts,
|
|
nextCursor,
|
|
};
|
|
}
|
|
|
|
async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) {
|
|
if (snapshotIds.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyAsset)
|
|
.where(inArray(filingTaxonomyAsset.snapshot_id, snapshotIds))
|
|
.orderBy(desc(filingTaxonomyAsset.id));
|
|
|
|
return rows.map(toAssetRecord);
|
|
}
|
|
|
|
export async function listFilingTaxonomyConceptsBySnapshotIds(
|
|
snapshotIds: number[],
|
|
) {
|
|
if (snapshotIds.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyConcept)
|
|
.where(inArray(filingTaxonomyConcept.snapshot_id, snapshotIds))
|
|
.orderBy(desc(filingTaxonomyConcept.id));
|
|
|
|
return rows.map(toConceptRecord);
|
|
}
|
|
|
|
export const __filingTaxonomyInternals = {
|
|
normalizeFilingTaxonomySnapshotPayload,
|
|
toSnapshotRecord,
|
|
};
|