- Wrap snapshot updates in transactions with error context for each child table - Add sidecar retry with exponential backoff (3 attempts, 2s base, 10s max, 30% jitter) - Add HTTP timeout (30s per request) and SEC rate limiting (10 req/s) in Rust - Add XBRL validation with status reporting (checks root element, tag balance)
1345 lines
43 KiB
TypeScript
1345 lines
43 KiB
TypeScript
import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm';
|
|
import type {
|
|
DetailFinancialRow,
|
|
Filing,
|
|
FinancialStatementKind,
|
|
MetricValidationResult,
|
|
NormalizationSummary,
|
|
StructuredKpiRow,
|
|
SurfaceDetailMap,
|
|
SurfaceFinancialRow,
|
|
TaxonomyDimensionMember,
|
|
TaxonomyFactRow,
|
|
TaxonomyStatementRow
|
|
} from '@/lib/types';
|
|
import { db, getSqliteClient } from '@/lib/server/db';
|
|
import { withFinancialIngestionSchemaRetry } from '@/lib/server/db/financial-ingestion-schema';
|
|
import {
|
|
filingTaxonomyAsset,
|
|
filingTaxonomyConcept,
|
|
filingTaxonomyContext,
|
|
filingTaxonomyFact,
|
|
filingTaxonomyMetricValidation,
|
|
filingTaxonomySnapshot
|
|
} from '@/lib/server/db/schema';
|
|
|
|
export type FilingTaxonomyParseStatus = 'ready' | 'partial' | 'failed';
|
|
export type FilingTaxonomySource = 'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback';
|
|
export type FilingTaxonomyAssetType =
|
|
| 'instance'
|
|
| 'schema'
|
|
| 'presentation'
|
|
| 'label'
|
|
| 'calculation'
|
|
| 'definition'
|
|
| 'pdf'
|
|
| 'other';
|
|
|
|
export type FilingTaxonomyPeriod = {
|
|
id: string;
|
|
filingId: number;
|
|
accessionNumber: string;
|
|
filingDate: string;
|
|
periodStart: string | null;
|
|
periodEnd: string | null;
|
|
filingType: '10-K' | '10-Q';
|
|
periodLabel: string;
|
|
};
|
|
|
|
export type FilingTaxonomySnapshotRecord = {
|
|
id: number;
|
|
filing_id: number;
|
|
ticker: string;
|
|
filing_date: string;
|
|
filing_type: '10-K' | '10-Q';
|
|
parse_status: FilingTaxonomyParseStatus;
|
|
parse_error: string | null;
|
|
source: FilingTaxonomySource;
|
|
parser_engine: string;
|
|
parser_version: string;
|
|
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
|
|
fiscal_pack: string | null;
|
|
periods: FilingTaxonomyPeriod[];
|
|
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
|
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
|
|
kpi_rows: StructuredKpiRow[];
|
|
derived_metrics: Filing['metrics'];
|
|
validation_result: MetricValidationResult | null;
|
|
normalization_summary: NormalizationSummary | null;
|
|
facts_count: number;
|
|
concepts_count: number;
|
|
dimensions_count: number;
|
|
created_at: string;
|
|
updated_at: string;
|
|
};
|
|
|
|
export type FilingTaxonomyContextRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
context_id: string;
|
|
entity_identifier: string | null;
|
|
entity_scheme: string | null;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
segment_json: Record<string, unknown> | null;
|
|
scenario_json: Record<string, unknown> | null;
|
|
created_at: string;
|
|
};
|
|
|
|
export type FilingTaxonomyAssetRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
asset_type: FilingTaxonomyAssetType;
|
|
name: string;
|
|
url: string;
|
|
size_bytes: number | null;
|
|
score: number | null;
|
|
is_selected: boolean;
|
|
created_at: string;
|
|
};
|
|
|
|
export type FilingTaxonomyConceptRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
label: string | null;
|
|
is_extension: boolean;
|
|
balance: string | null;
|
|
period_type: string | null;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
presentation_order: number | null;
|
|
presentation_depth: number | null;
|
|
parent_concept_key: string | null;
|
|
is_abstract: boolean;
|
|
created_at: string;
|
|
};
|
|
|
|
export type FilingTaxonomyFactRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
context_id: string;
|
|
unit: string | null;
|
|
decimals: string | null;
|
|
precision: string | null;
|
|
nil: boolean;
|
|
value_num: number;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
dimensions: TaxonomyDimensionMember[];
|
|
is_dimensionless: boolean;
|
|
source_file: string | null;
|
|
created_at: string;
|
|
};
|
|
|
|
export type FilingTaxonomyMetricValidationRecord = {
|
|
id: number;
|
|
snapshot_id: number;
|
|
metric_key: keyof NonNullable<Filing['metrics']>;
|
|
taxonomy_value: number | null;
|
|
llm_value: number | null;
|
|
absolute_diff: number | null;
|
|
relative_diff: number | null;
|
|
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
|
evidence_pages: number[];
|
|
pdf_url: string | null;
|
|
provider: string | null;
|
|
model: string | null;
|
|
error: string | null;
|
|
created_at: string;
|
|
updated_at: string;
|
|
};
|
|
|
|
export type UpsertFilingTaxonomySnapshotInput = {
|
|
filing_id: number;
|
|
ticker: string;
|
|
filing_date: string;
|
|
filing_type: '10-K' | '10-Q';
|
|
parse_status: FilingTaxonomyParseStatus;
|
|
parse_error: string | null;
|
|
source: FilingTaxonomySource;
|
|
parser_engine: string;
|
|
parser_version: string;
|
|
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
|
|
fiscal_pack: string | null;
|
|
periods: FilingTaxonomyPeriod[];
|
|
faithful_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
surface_rows: Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
|
detail_rows: Record<FinancialStatementKind, SurfaceDetailMap>;
|
|
kpi_rows: StructuredKpiRow[];
|
|
derived_metrics: Filing['metrics'];
|
|
validation_result: MetricValidationResult | null;
|
|
normalization_summary: NormalizationSummary | null;
|
|
facts_count: number;
|
|
concepts_count: number;
|
|
dimensions_count: number;
|
|
contexts: Array<{
|
|
context_id: string;
|
|
entity_identifier: string | null;
|
|
entity_scheme: string | null;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
segment_json: Record<string, unknown> | null;
|
|
scenario_json: Record<string, unknown> | null;
|
|
}>;
|
|
assets: Array<{
|
|
asset_type: FilingTaxonomyAssetType;
|
|
name: string;
|
|
url: string;
|
|
size_bytes: number | null;
|
|
score: number | null;
|
|
is_selected: boolean;
|
|
}>;
|
|
concepts: Array<{
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
label: string | null;
|
|
is_extension: boolean;
|
|
balance: string | null;
|
|
period_type: string | null;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
presentation_order: number | null;
|
|
presentation_depth: number | null;
|
|
parent_concept_key: string | null;
|
|
is_abstract: boolean;
|
|
}>;
|
|
facts: Array<{
|
|
concept_key: string;
|
|
qname: string;
|
|
namespace_uri: string;
|
|
local_name: string;
|
|
data_type: string | null;
|
|
statement_kind: FinancialStatementKind | null;
|
|
role_uri: string | null;
|
|
authoritative_concept_key: string | null;
|
|
mapping_method: string | null;
|
|
surface_key: string | null;
|
|
detail_parent_surface_key: string | null;
|
|
kpi_key: string | null;
|
|
residual_flag: boolean;
|
|
context_id: string;
|
|
unit: string | null;
|
|
decimals: string | null;
|
|
precision: string | null;
|
|
nil: boolean;
|
|
value_num: number;
|
|
period_start: string | null;
|
|
period_end: string | null;
|
|
period_instant: string | null;
|
|
dimensions: TaxonomyDimensionMember[];
|
|
is_dimensionless: boolean;
|
|
source_file: string | null;
|
|
}>;
|
|
metric_validations: Array<{
|
|
metric_key: keyof NonNullable<Filing['metrics']>;
|
|
taxonomy_value: number | null;
|
|
llm_value: number | null;
|
|
absolute_diff: number | null;
|
|
relative_diff: number | null;
|
|
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
|
evidence_pages: number[];
|
|
pdf_url: string | null;
|
|
provider: string | null;
|
|
model: string | null;
|
|
error: string | null;
|
|
}>;
|
|
};
|
|
|
|
const FINANCIAL_STATEMENT_KINDS = [
|
|
'income',
|
|
'balance',
|
|
'cash_flow',
|
|
'equity',
|
|
'comprehensive_income'
|
|
] as const satisfies FinancialStatementKind[];
|
|
|
|
type StatementRowMap = Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
|
type SurfaceRowMap = Record<FinancialStatementKind, SurfaceFinancialRow[]>;
|
|
type DetailRowMap = Record<FinancialStatementKind, SurfaceDetailMap>;
|
|
|
|
function tenYearsAgoIso() {
|
|
const date = new Date();
|
|
date.setUTCFullYear(date.getUTCFullYear() - 10);
|
|
return date.toISOString().slice(0, 10);
|
|
}
|
|
|
|
function asNumber(value: unknown) {
|
|
if (typeof value === 'number') {
|
|
return Number.isFinite(value) ? value : null;
|
|
}
|
|
|
|
if (typeof value === 'string') {
|
|
const parsed = Number(value);
|
|
return Number.isFinite(parsed) ? parsed : null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function asNumericText(value: number | null) {
|
|
if (value === null || !Number.isFinite(value)) {
|
|
return null;
|
|
}
|
|
|
|
return String(value);
|
|
}
|
|
|
|
function asObject(value: unknown) {
|
|
return value !== null && typeof value === 'object' && !Array.isArray(value)
|
|
? value as Record<string, unknown>
|
|
: null;
|
|
}
|
|
|
|
function asString(value: unknown) {
|
|
return typeof value === 'string' ? value : null;
|
|
}
|
|
|
|
function asNullableString(value: unknown) {
|
|
return typeof value === 'string'
|
|
? value
|
|
: value === null
|
|
? null
|
|
: null;
|
|
}
|
|
|
|
function asBoolean(value: unknown) {
|
|
return typeof value === 'boolean' ? value : Boolean(value);
|
|
}
|
|
|
|
function asStatementKind(value: unknown): FinancialStatementKind | null {
|
|
return value === 'income'
|
|
|| value === 'balance'
|
|
|| value === 'cash_flow'
|
|
|| value === 'equity'
|
|
|| value === 'comprehensive_income'
|
|
? value
|
|
: null;
|
|
}
|
|
|
|
function normalizeNumberMap(value: unknown) {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return {};
|
|
}
|
|
|
|
return Object.fromEntries(
|
|
Object.entries(object).map(([key, entry]) => [key, asNumber(entry)])
|
|
);
|
|
}
|
|
|
|
function normalizeNullableStringMap(value: unknown) {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return {};
|
|
}
|
|
|
|
return Object.fromEntries(
|
|
Object.entries(object).map(([key, entry]) => [key, asNullableString(entry)])
|
|
);
|
|
}
|
|
|
|
function normalizeStringArray(value: unknown) {
|
|
return Array.isArray(value)
|
|
? value.filter((entry): entry is string => typeof entry === 'string')
|
|
: [];
|
|
}
|
|
|
|
function normalizeNumberArray(value: unknown) {
|
|
if (!Array.isArray(value)) {
|
|
return [];
|
|
}
|
|
|
|
return value
|
|
.map((entry) => asNumber(entry))
|
|
.filter((entry): entry is number => entry !== null);
|
|
}
|
|
|
|
function normalizePeriods(value: unknown): FilingTaxonomyPeriod[] {
|
|
if (!Array.isArray(value)) {
|
|
return [];
|
|
}
|
|
|
|
return value
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const id = asString(row.id);
|
|
const filingId = asNumber(row.filingId ?? row.filing_id);
|
|
const accessionNumber = asString(row.accessionNumber ?? row.accession_number);
|
|
const filingDate = asString(row.filingDate ?? row.filing_date);
|
|
const filingType = row.filingType === '10-K' || row.filing_type === '10-K'
|
|
? '10-K'
|
|
: row.filingType === '10-Q' || row.filing_type === '10-Q'
|
|
? '10-Q'
|
|
: null;
|
|
const periodLabel = asString(row.periodLabel ?? row.period_label);
|
|
|
|
if (!id || filingId === null || !accessionNumber || !filingDate || !filingType || !periodLabel) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
id,
|
|
filingId,
|
|
accessionNumber,
|
|
filingDate,
|
|
periodStart: asNullableString(row.periodStart ?? row.period_start),
|
|
periodEnd: asNullableString(row.periodEnd ?? row.period_end),
|
|
filingType,
|
|
periodLabel
|
|
} satisfies FilingTaxonomyPeriod;
|
|
})
|
|
.filter((entry): entry is FilingTaxonomyPeriod => entry !== null);
|
|
}
|
|
|
|
function normalizeStatementRows(
|
|
value: unknown,
|
|
fallbackRows: StatementRowMap = emptyStatementRows()
|
|
): StatementRowMap {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return fallbackRows;
|
|
}
|
|
|
|
const normalized = emptyStatementRows();
|
|
for (const statement of FINANCIAL_STATEMENT_KINDS) {
|
|
const rows = Array.isArray(object[statement]) ? object[statement] : [];
|
|
normalized[statement] = rows
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key);
|
|
const label = asString(row.label);
|
|
const conceptKey = asString(row.conceptKey ?? row.concept_key);
|
|
const qname = asString(row.qname);
|
|
const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri);
|
|
const localName = asString(row.localName ?? row.local_name);
|
|
if (!key || !label || !conceptKey || !qname || !namespaceUri || !localName) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
key,
|
|
label,
|
|
conceptKey,
|
|
qname,
|
|
namespaceUri,
|
|
localName,
|
|
isExtension: asBoolean(row.isExtension ?? row.is_extension),
|
|
statement: asStatementKind(row.statement) ?? statement,
|
|
roleUri: asNullableString(row.roleUri ?? row.role_uri),
|
|
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
|
|
depth: asNumber(row.depth) ?? 0,
|
|
parentKey: asNullableString(row.parentKey ?? row.parent_key),
|
|
values: normalizeNumberMap(row.values),
|
|
units: normalizeNullableStringMap(row.units),
|
|
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
|
|
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids)
|
|
};
|
|
})
|
|
.filter((entry): entry is TaxonomyStatementRow => entry !== null);
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeSurfaceRows(
|
|
value: unknown,
|
|
fallbackRows: SurfaceRowMap = emptySurfaceRows()
|
|
): SurfaceRowMap {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return fallbackRows;
|
|
}
|
|
|
|
const normalized = emptySurfaceRows();
|
|
for (const statement of FINANCIAL_STATEMENT_KINDS) {
|
|
const rows = Array.isArray(object[statement]) ? object[statement] : [];
|
|
normalized[statement] = rows
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key = asString(row.key);
|
|
const label = asString(row.label);
|
|
const category = asString(row.category);
|
|
const unit = asString(row.unit);
|
|
if (!key || !label || !category || !unit) {
|
|
return null;
|
|
}
|
|
|
|
const normalizedStatement = asStatementKind(row.statement);
|
|
const resolutionMethod = row.resolutionMethod ?? row.resolution_method;
|
|
const confidence = row.confidence;
|
|
const normalizedRow: SurfaceFinancialRow = {
|
|
key,
|
|
label,
|
|
category: category as SurfaceFinancialRow['category'],
|
|
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
|
|
unit: unit as SurfaceFinancialRow['unit'],
|
|
values: normalizeNumberMap(row.values),
|
|
sourceConcepts: normalizeStringArray(row.sourceConcepts ?? row.source_concepts),
|
|
sourceRowKeys: normalizeStringArray(row.sourceRowKeys ?? row.source_row_keys),
|
|
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids),
|
|
formulaKey: asNullableString(row.formulaKey ?? row.formula_key),
|
|
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions),
|
|
resolvedSourceRowKeys: normalizeNullableStringMap(row.resolvedSourceRowKeys ?? row.resolved_source_row_keys)
|
|
};
|
|
|
|
const templateSection = asString(row.templateSection ?? row.template_section);
|
|
if (templateSection) {
|
|
normalizedRow.templateSection = templateSection as SurfaceFinancialRow['templateSection'];
|
|
}
|
|
if (normalizedStatement === 'income' || normalizedStatement === 'balance' || normalizedStatement === 'cash_flow') {
|
|
normalizedRow.statement = normalizedStatement;
|
|
}
|
|
|
|
const detailCount = asNumber(row.detailCount ?? row.detail_count);
|
|
if (detailCount !== null) {
|
|
normalizedRow.detailCount = detailCount;
|
|
}
|
|
|
|
if (
|
|
resolutionMethod === 'direct'
|
|
|| resolutionMethod === 'surface_bridge'
|
|
|| resolutionMethod === 'formula_derived'
|
|
|| resolutionMethod === 'not_meaningful'
|
|
) {
|
|
normalizedRow.resolutionMethod = resolutionMethod;
|
|
}
|
|
|
|
if (confidence === 'high' || confidence === 'medium' || confidence === 'low') {
|
|
normalizedRow.confidence = confidence;
|
|
}
|
|
|
|
const warningCodes = normalizeStringArray(row.warningCodes ?? row.warning_codes);
|
|
if (warningCodes.length > 0) {
|
|
normalizedRow.warningCodes = warningCodes;
|
|
}
|
|
|
|
return normalizedRow;
|
|
})
|
|
.filter((entry): entry is SurfaceFinancialRow => entry !== null);
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeDetailRows(
|
|
value: unknown,
|
|
fallbackRows: DetailRowMap = emptyDetailRows()
|
|
): DetailRowMap {
|
|
const object = asObject(value);
|
|
if (!object) {
|
|
return fallbackRows;
|
|
}
|
|
|
|
const normalized = emptyDetailRows();
|
|
for (const statement of FINANCIAL_STATEMENT_KINDS) {
|
|
const groups = asObject(object[statement]) ?? {};
|
|
normalized[statement] = Object.fromEntries(
|
|
Object.entries(groups).map(([surfaceKey, rows]) => {
|
|
const normalizedRows = Array.isArray(rows)
|
|
? rows
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key = asString(row.key) ?? asString(row.conceptKey ?? row.concept_key);
|
|
const label = asString(row.label);
|
|
const conceptKey = asString(row.conceptKey ?? row.concept_key);
|
|
const qname = asString(row.qname);
|
|
const namespaceUri = asString(row.namespaceUri ?? row.namespace_uri);
|
|
const localName = asString(row.localName ?? row.local_name);
|
|
if (!key || !label || !conceptKey || !qname || !namespaceUri || !localName) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
key,
|
|
parentSurfaceKey: asString(row.parentSurfaceKey ?? row.parent_surface_key) ?? surfaceKey,
|
|
label,
|
|
conceptKey,
|
|
qname,
|
|
namespaceUri,
|
|
localName,
|
|
unit: asNullableString(row.unit),
|
|
values: normalizeNumberMap(row.values),
|
|
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids),
|
|
isExtension: asBoolean(row.isExtension ?? row.is_extension),
|
|
dimensionsSummary: normalizeStringArray(row.dimensionsSummary ?? row.dimensions_summary),
|
|
residualFlag: asBoolean(row.residualFlag ?? row.residual_flag)
|
|
};
|
|
})
|
|
.filter((entry): entry is DetailFinancialRow => entry !== null)
|
|
: [];
|
|
|
|
return [surfaceKey, normalizedRows];
|
|
})
|
|
);
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeKpiRows(value: unknown) {
|
|
if (!Array.isArray(value)) {
|
|
return [];
|
|
}
|
|
|
|
return value
|
|
.map((entry) => {
|
|
const row = asObject(entry);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
const key = asString(row.key);
|
|
const label = asString(row.label);
|
|
const category = asString(row.category);
|
|
const unit = asString(row.unit);
|
|
const provenanceType = row.provenanceType ?? row.provenance_type;
|
|
if (!key || !label || !category || !unit || (provenanceType !== 'taxonomy' && provenanceType !== 'structured_note')) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
key,
|
|
label,
|
|
category: category as StructuredKpiRow['category'],
|
|
unit: unit as StructuredKpiRow['unit'],
|
|
order: asNumber(row.order) ?? Number.MAX_SAFE_INTEGER,
|
|
segment: asNullableString(row.segment),
|
|
axis: asNullableString(row.axis),
|
|
member: asNullableString(row.member),
|
|
values: normalizeNumberMap(row.values),
|
|
sourceConcepts: normalizeStringArray(row.sourceConcepts ?? row.source_concepts),
|
|
sourceFactIds: normalizeNumberArray(row.sourceFactIds ?? row.source_fact_ids),
|
|
provenanceType,
|
|
hasDimensions: asBoolean(row.hasDimensions ?? row.has_dimensions)
|
|
} satisfies StructuredKpiRow;
|
|
})
|
|
.filter((entry): entry is StructuredKpiRow => entry !== null);
|
|
}
|
|
|
|
function normalizeNormalizationSummary(value: unknown) {
|
|
const row = asObject(value);
|
|
if (!row) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
surfaceRowCount: asNumber(row.surfaceRowCount ?? row.surface_row_count) ?? 0,
|
|
detailRowCount: asNumber(row.detailRowCount ?? row.detail_row_count) ?? 0,
|
|
kpiRowCount: asNumber(row.kpiRowCount ?? row.kpi_row_count) ?? 0,
|
|
unmappedRowCount: asNumber(row.unmappedRowCount ?? row.unmapped_row_count) ?? 0,
|
|
materialUnmappedRowCount: asNumber(row.materialUnmappedRowCount ?? row.material_unmapped_row_count) ?? 0,
|
|
warnings: normalizeStringArray(row.warnings)
|
|
} satisfies NormalizationSummary;
|
|
}
|
|
|
|
export function normalizeFilingTaxonomySnapshotPayload(input: {
|
|
periods: unknown;
|
|
faithful_rows: unknown;
|
|
statement_rows: unknown;
|
|
surface_rows: unknown;
|
|
detail_rows: unknown;
|
|
kpi_rows: unknown;
|
|
normalization_summary: unknown;
|
|
}) {
|
|
const faithfulRows = normalizeStatementRows(input.faithful_rows);
|
|
const statementRows = normalizeStatementRows(input.statement_rows, faithfulRows);
|
|
|
|
return {
|
|
periods: normalizePeriods(input.periods),
|
|
faithful_rows: faithfulRows,
|
|
statement_rows: statementRows,
|
|
surface_rows: normalizeSurfaceRows(input.surface_rows),
|
|
detail_rows: normalizeDetailRows(input.detail_rows),
|
|
kpi_rows: normalizeKpiRows(input.kpi_rows),
|
|
normalization_summary: normalizeNormalizationSummary(input.normalization_summary)
|
|
};
|
|
}
|
|
|
|
function emptyStatementRows(): StatementRowMap {
|
|
return {
|
|
income: [],
|
|
balance: [],
|
|
cash_flow: [],
|
|
equity: [],
|
|
comprehensive_income: []
|
|
};
|
|
}
|
|
|
|
function emptySurfaceRows(): SurfaceRowMap {
|
|
return {
|
|
income: [],
|
|
balance: [],
|
|
cash_flow: [],
|
|
equity: [],
|
|
comprehensive_income: []
|
|
};
|
|
}
|
|
|
|
function emptyDetailRows(): DetailRowMap {
|
|
return {
|
|
income: {},
|
|
balance: {},
|
|
cash_flow: {},
|
|
equity: {},
|
|
comprehensive_income: {}
|
|
};
|
|
}
|
|
|
|
function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord {
|
|
const normalized = normalizeFilingTaxonomySnapshotPayload({
|
|
periods: row.periods,
|
|
faithful_rows: row.faithful_rows,
|
|
statement_rows: row.statement_rows,
|
|
surface_rows: row.surface_rows,
|
|
detail_rows: row.detail_rows,
|
|
kpi_rows: row.kpi_rows,
|
|
normalization_summary: row.normalization_summary
|
|
});
|
|
|
|
return {
|
|
id: row.id,
|
|
filing_id: row.filing_id,
|
|
ticker: row.ticker,
|
|
filing_date: row.filing_date,
|
|
filing_type: row.filing_type,
|
|
parse_status: row.parse_status,
|
|
parse_error: row.parse_error,
|
|
source: row.source,
|
|
parser_engine: row.parser_engine,
|
|
parser_version: row.parser_version,
|
|
taxonomy_regime: row.taxonomy_regime,
|
|
fiscal_pack: row.fiscal_pack,
|
|
periods: normalized.periods,
|
|
faithful_rows: normalized.faithful_rows,
|
|
statement_rows: normalized.statement_rows,
|
|
surface_rows: normalized.surface_rows,
|
|
detail_rows: normalized.detail_rows,
|
|
kpi_rows: normalized.kpi_rows,
|
|
derived_metrics: row.derived_metrics ?? null,
|
|
validation_result: row.validation_result ?? null,
|
|
normalization_summary: normalized.normalization_summary,
|
|
facts_count: row.facts_count,
|
|
concepts_count: row.concepts_count,
|
|
dimensions_count: row.dimensions_count,
|
|
created_at: row.created_at,
|
|
updated_at: row.updated_at
|
|
};
|
|
}
|
|
|
|
function toContextRecord(row: typeof filingTaxonomyContext.$inferSelect): FilingTaxonomyContextRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
context_id: row.context_id,
|
|
entity_identifier: row.entity_identifier,
|
|
entity_scheme: row.entity_scheme,
|
|
period_start: row.period_start,
|
|
period_end: row.period_end,
|
|
period_instant: row.period_instant,
|
|
segment_json: row.segment_json ?? null,
|
|
scenario_json: row.scenario_json ?? null,
|
|
created_at: row.created_at
|
|
};
|
|
}
|
|
|
|
function toAssetRecord(row: typeof filingTaxonomyAsset.$inferSelect): FilingTaxonomyAssetRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
asset_type: row.asset_type,
|
|
name: row.name,
|
|
url: row.url,
|
|
size_bytes: row.size_bytes,
|
|
score: asNumber(row.score),
|
|
is_selected: row.is_selected,
|
|
created_at: row.created_at
|
|
};
|
|
}
|
|
|
|
function toConceptRecord(row: typeof filingTaxonomyConcept.$inferSelect): FilingTaxonomyConceptRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
concept_key: row.concept_key,
|
|
qname: row.qname,
|
|
namespace_uri: row.namespace_uri,
|
|
local_name: row.local_name,
|
|
label: row.label,
|
|
is_extension: row.is_extension,
|
|
balance: row.balance,
|
|
period_type: row.period_type,
|
|
data_type: row.data_type,
|
|
statement_kind: row.statement_kind ?? null,
|
|
role_uri: row.role_uri,
|
|
authoritative_concept_key: row.authoritative_concept_key,
|
|
mapping_method: row.mapping_method,
|
|
surface_key: row.surface_key,
|
|
detail_parent_surface_key: row.detail_parent_surface_key,
|
|
kpi_key: row.kpi_key,
|
|
residual_flag: row.residual_flag,
|
|
presentation_order: asNumber(row.presentation_order),
|
|
presentation_depth: row.presentation_depth,
|
|
parent_concept_key: row.parent_concept_key,
|
|
is_abstract: row.is_abstract,
|
|
created_at: row.created_at
|
|
};
|
|
}
|
|
|
|
function toFactRecord(row: typeof filingTaxonomyFact.$inferSelect): FilingTaxonomyFactRecord {
|
|
const value = asNumber(row.value_num);
|
|
if (value === null) {
|
|
throw new Error(`Invalid value_num for taxonomy fact row ${row.id}`);
|
|
}
|
|
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
concept_key: row.concept_key,
|
|
qname: row.qname,
|
|
namespace_uri: row.namespace_uri,
|
|
local_name: row.local_name,
|
|
data_type: row.data_type,
|
|
statement_kind: row.statement_kind ?? null,
|
|
role_uri: row.role_uri,
|
|
authoritative_concept_key: row.authoritative_concept_key,
|
|
mapping_method: row.mapping_method,
|
|
surface_key: row.surface_key,
|
|
detail_parent_surface_key: row.detail_parent_surface_key,
|
|
kpi_key: row.kpi_key,
|
|
residual_flag: row.residual_flag,
|
|
context_id: row.context_id,
|
|
unit: row.unit,
|
|
decimals: row.decimals,
|
|
precision: row.precision,
|
|
nil: row.nil,
|
|
value_num: value,
|
|
period_start: row.period_start,
|
|
period_end: row.period_end,
|
|
period_instant: row.period_instant,
|
|
dimensions: row.dimensions,
|
|
is_dimensionless: row.is_dimensionless,
|
|
source_file: row.source_file,
|
|
created_at: row.created_at
|
|
};
|
|
}
|
|
|
|
function toMetricValidationRecord(row: typeof filingTaxonomyMetricValidation.$inferSelect): FilingTaxonomyMetricValidationRecord {
|
|
return {
|
|
id: row.id,
|
|
snapshot_id: row.snapshot_id,
|
|
metric_key: row.metric_key,
|
|
taxonomy_value: asNumber(row.taxonomy_value),
|
|
llm_value: asNumber(row.llm_value),
|
|
absolute_diff: asNumber(row.absolute_diff),
|
|
relative_diff: asNumber(row.relative_diff),
|
|
status: row.status,
|
|
evidence_pages: row.evidence_pages ?? [],
|
|
pdf_url: row.pdf_url,
|
|
provider: row.provider,
|
|
model: row.model,
|
|
error: row.error,
|
|
created_at: row.created_at,
|
|
updated_at: row.updated_at
|
|
};
|
|
}
|
|
|
|
export async function getFilingTaxonomySnapshotByFilingId(filingId: number) {
|
|
const [row] = await db
|
|
.select()
|
|
.from(filingTaxonomySnapshot)
|
|
.where(eq(filingTaxonomySnapshot.filing_id, filingId))
|
|
.limit(1);
|
|
|
|
return row ? toSnapshotRecord(row) : null;
|
|
}
|
|
|
|
export async function listFilingTaxonomyAssets(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyAsset)
|
|
.where(eq(filingTaxonomyAsset.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyAsset.id));
|
|
|
|
return rows.map(toAssetRecord);
|
|
}
|
|
|
|
export async function listFilingTaxonomyContexts(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyContext)
|
|
.where(eq(filingTaxonomyContext.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyContext.id));
|
|
|
|
return rows.map(toContextRecord);
|
|
}
|
|
|
|
export async function listFilingTaxonomyConcepts(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyConcept)
|
|
.where(eq(filingTaxonomyConcept.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyConcept.id));
|
|
|
|
return rows.map(toConceptRecord);
|
|
}
|
|
|
|
export async function listFilingTaxonomyFacts(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyFact)
|
|
.where(eq(filingTaxonomyFact.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyFact.id));
|
|
|
|
return rows.map(toFactRecord);
|
|
}
|
|
|
|
export async function listFilingTaxonomyMetricValidations(snapshotId: number) {
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyMetricValidation)
|
|
.where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId))
|
|
.orderBy(desc(filingTaxonomyMetricValidation.id));
|
|
|
|
return rows.map(toMetricValidationRecord);
|
|
}
|
|
|
|
export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySnapshotInput) {
|
|
const now = new Date().toISOString();
|
|
const normalized = normalizeFilingTaxonomySnapshotPayload(input);
|
|
|
|
return db.transaction(async (tx) => {
|
|
const [saved] = await tx
|
|
.insert(filingTaxonomySnapshot)
|
|
.values({
|
|
filing_id: input.filing_id,
|
|
ticker: input.ticker,
|
|
filing_date: input.filing_date,
|
|
filing_type: input.filing_type,
|
|
parse_status: input.parse_status,
|
|
parse_error: input.parse_error,
|
|
source: input.source,
|
|
parser_engine: input.parser_engine,
|
|
parser_version: input.parser_version,
|
|
taxonomy_regime: input.taxonomy_regime,
|
|
fiscal_pack: input.fiscal_pack,
|
|
periods: normalized.periods,
|
|
faithful_rows: normalized.faithful_rows,
|
|
statement_rows: normalized.statement_rows,
|
|
surface_rows: normalized.surface_rows,
|
|
detail_rows: normalized.detail_rows,
|
|
kpi_rows: normalized.kpi_rows,
|
|
derived_metrics: input.derived_metrics,
|
|
validation_result: input.validation_result,
|
|
normalization_summary: normalized.normalization_summary,
|
|
facts_count: input.facts_count,
|
|
concepts_count: input.concepts_count,
|
|
dimensions_count: input.dimensions_count,
|
|
created_at: now,
|
|
updated_at: now
|
|
})
|
|
.onConflictDoUpdate({
|
|
target: filingTaxonomySnapshot.filing_id,
|
|
set: {
|
|
ticker: input.ticker,
|
|
filing_date: input.filing_date,
|
|
filing_type: input.filing_type,
|
|
parse_status: input.parse_status,
|
|
parse_error: input.parse_error,
|
|
source: input.source,
|
|
parser_engine: input.parser_engine,
|
|
parser_version: input.parser_version,
|
|
taxonomy_regime: input.taxonomy_regime,
|
|
fiscal_pack: input.fiscal_pack,
|
|
periods: normalized.periods,
|
|
faithful_rows: normalized.faithful_rows,
|
|
statement_rows: normalized.statement_rows,
|
|
surface_rows: normalized.surface_rows,
|
|
detail_rows: normalized.detail_rows,
|
|
kpi_rows: normalized.kpi_rows,
|
|
derived_metrics: input.derived_metrics,
|
|
validation_result: input.validation_result,
|
|
normalization_summary: normalized.normalization_summary,
|
|
facts_count: input.facts_count,
|
|
concepts_count: input.concepts_count,
|
|
dimensions_count: input.dimensions_count,
|
|
updated_at: now
|
|
}
|
|
})
|
|
.returning();
|
|
|
|
const snapshotId = saved.id;
|
|
|
|
try {
|
|
await tx.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
|
|
await tx.delete(filingTaxonomyContext).where(eq(filingTaxonomyContext.snapshot_id, snapshotId));
|
|
await tx.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
|
|
await tx.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
|
|
await tx.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
|
|
} catch (error) {
|
|
throw new Error(`Failed to delete child records for snapshot ${snapshotId}: ${error}`);
|
|
}
|
|
|
|
if (input.contexts.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyContext).values(input.contexts.map((context) => ({
|
|
snapshot_id: snapshotId,
|
|
context_id: context.context_id,
|
|
entity_identifier: context.entity_identifier,
|
|
entity_scheme: context.entity_scheme,
|
|
period_start: context.period_start,
|
|
period_end: context.period_end,
|
|
period_instant: context.period_instant,
|
|
segment_json: context.segment_json,
|
|
scenario_json: context.scenario_json,
|
|
created_at: now
|
|
})));
|
|
} catch (error) {
|
|
throw new Error(`Failed to insert ${input.contexts.length} contexts for snapshot ${snapshotId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
if (input.assets.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({
|
|
snapshot_id: snapshotId,
|
|
asset_type: asset.asset_type,
|
|
name: asset.name,
|
|
url: asset.url,
|
|
size_bytes: asset.size_bytes,
|
|
score: asNumericText(asset.score),
|
|
is_selected: asset.is_selected,
|
|
created_at: now
|
|
})));
|
|
} catch (error) {
|
|
throw new Error(`Failed to insert ${input.assets.length} assets for snapshot ${snapshotId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
if (input.concepts.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyConcept).values(input.concepts.map((concept) => ({
|
|
snapshot_id: snapshotId,
|
|
concept_key: concept.concept_key,
|
|
qname: concept.qname,
|
|
namespace_uri: concept.namespace_uri,
|
|
local_name: concept.local_name,
|
|
label: concept.label,
|
|
is_extension: concept.is_extension,
|
|
balance: concept.balance,
|
|
period_type: concept.period_type,
|
|
data_type: concept.data_type,
|
|
statement_kind: concept.statement_kind,
|
|
role_uri: concept.role_uri,
|
|
authoritative_concept_key: concept.authoritative_concept_key,
|
|
mapping_method: concept.mapping_method,
|
|
surface_key: concept.surface_key,
|
|
detail_parent_surface_key: concept.detail_parent_surface_key,
|
|
kpi_key: concept.kpi_key,
|
|
residual_flag: concept.residual_flag,
|
|
presentation_order: asNumericText(concept.presentation_order),
|
|
presentation_depth: concept.presentation_depth,
|
|
parent_concept_key: concept.parent_concept_key,
|
|
is_abstract: concept.is_abstract,
|
|
created_at: now
|
|
})));
|
|
} catch (error) {
|
|
throw new Error(`Failed to insert ${input.concepts.length} concepts for snapshot ${snapshotId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
if (input.facts.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyFact).values(input.facts.map((fact) => ({
|
|
snapshot_id: snapshotId,
|
|
concept_key: fact.concept_key,
|
|
qname: fact.qname,
|
|
namespace_uri: fact.namespace_uri,
|
|
local_name: fact.local_name,
|
|
data_type: fact.data_type,
|
|
statement_kind: fact.statement_kind,
|
|
role_uri: fact.role_uri,
|
|
authoritative_concept_key: fact.authoritative_concept_key,
|
|
mapping_method: fact.mapping_method,
|
|
surface_key: fact.surface_key,
|
|
detail_parent_surface_key: fact.detail_parent_surface_key,
|
|
kpi_key: fact.kpi_key,
|
|
residual_flag: fact.residual_flag,
|
|
context_id: fact.context_id,
|
|
unit: fact.unit,
|
|
decimals: fact.decimals,
|
|
precision: fact.precision,
|
|
nil: fact.nil,
|
|
value_num: String(fact.value_num),
|
|
period_start: fact.period_start,
|
|
period_end: fact.period_end,
|
|
period_instant: fact.period_instant,
|
|
dimensions: fact.dimensions,
|
|
is_dimensionless: fact.is_dimensionless,
|
|
source_file: fact.source_file,
|
|
created_at: now
|
|
})));
|
|
} catch (error) {
|
|
throw new Error(`Failed to insert ${input.facts.length} facts for snapshot ${snapshotId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
if (input.metric_validations.length > 0) {
|
|
try {
|
|
await tx.insert(filingTaxonomyMetricValidation).values(input.metric_validations.map((check) => ({
|
|
snapshot_id: snapshotId,
|
|
metric_key: check.metric_key,
|
|
taxonomy_value: asNumericText(check.taxonomy_value),
|
|
llm_value: asNumericText(check.llm_value),
|
|
absolute_diff: asNumericText(check.absolute_diff),
|
|
relative_diff: asNumericText(check.relative_diff),
|
|
status: check.status,
|
|
evidence_pages: check.evidence_pages,
|
|
pdf_url: check.pdf_url,
|
|
provider: check.provider,
|
|
model: check.model,
|
|
error: check.error,
|
|
created_at: now,
|
|
updated_at: now
|
|
})));
|
|
} catch (error) {
|
|
throw new Error(`Failed to insert ${input.metric_validations.length} metric validations for snapshot ${snapshotId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
return toSnapshotRecord(saved);
|
|
});
|
|
}
|
|
|
|
export async function listFilingTaxonomySnapshotsByTicker(input: {
|
|
ticker: string;
|
|
window: '10y' | 'all';
|
|
filingTypes?: Array<'10-K' | '10-Q'>;
|
|
limit?: number;
|
|
cursor?: string | null;
|
|
}) {
|
|
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 40), 1), 120);
|
|
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
|
|
const constraints = [eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase())];
|
|
|
|
if (input.window === '10y') {
|
|
constraints.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
|
|
}
|
|
|
|
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
|
|
constraints.push(lt(filingTaxonomySnapshot.id, cursorId));
|
|
}
|
|
|
|
if (input.filingTypes && input.filingTypes.length > 0) {
|
|
constraints.push(inArray(filingTaxonomySnapshot.filing_type, input.filingTypes));
|
|
}
|
|
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomySnapshot)
|
|
.where(and(...constraints))
|
|
.orderBy(desc(filingTaxonomySnapshot.filing_date), desc(filingTaxonomySnapshot.id))
|
|
.limit(safeLimit + 1);
|
|
|
|
const hasMore = rows.length > safeLimit;
|
|
const usedRows = hasMore ? rows.slice(0, safeLimit) : rows;
|
|
const nextCursor = hasMore
|
|
? String(usedRows[usedRows.length - 1]?.id ?? '')
|
|
: null;
|
|
|
|
return {
|
|
snapshots: usedRows.map(toSnapshotRecord),
|
|
nextCursor
|
|
};
|
|
}
|
|
|
|
export async function countFilingTaxonomySnapshotStatuses(ticker: string) {
|
|
const rows = await db
|
|
.select({
|
|
status: filingTaxonomySnapshot.parse_status,
|
|
count: sql<string>`count(*)`
|
|
})
|
|
.from(filingTaxonomySnapshot)
|
|
.where(eq(filingTaxonomySnapshot.ticker, ticker.trim().toUpperCase()))
|
|
.groupBy(filingTaxonomySnapshot.parse_status);
|
|
|
|
return rows.reduce<Record<FilingTaxonomyParseStatus, number>>((acc, row) => {
|
|
acc[row.status] = Number(row.count);
|
|
return acc;
|
|
}, {
|
|
ready: 0,
|
|
partial: 0,
|
|
failed: 0
|
|
});
|
|
}
|
|
|
|
export async function listTaxonomyFactsByTicker(input: {
|
|
ticker: string;
|
|
window: '10y' | 'all';
|
|
statement?: FinancialStatementKind;
|
|
filingTypes?: Array<'10-K' | '10-Q'>;
|
|
cursor?: string | null;
|
|
limit?: number;
|
|
}) {
|
|
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 500), 1), 10000);
|
|
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
|
|
const conditions = [eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase())];
|
|
|
|
if (input.window === '10y') {
|
|
conditions.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
|
|
}
|
|
|
|
if (input.statement) {
|
|
conditions.push(eq(filingTaxonomyFact.statement_kind, input.statement));
|
|
}
|
|
|
|
if (input.filingTypes && input.filingTypes.length > 0) {
|
|
conditions.push(inArray(filingTaxonomySnapshot.filing_type, input.filingTypes));
|
|
}
|
|
|
|
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
|
|
conditions.push(lt(filingTaxonomyFact.id, cursorId));
|
|
}
|
|
|
|
const rows = await db
|
|
.select({
|
|
id: filingTaxonomyFact.id,
|
|
snapshot_id: filingTaxonomyFact.snapshot_id,
|
|
filing_id: filingTaxonomySnapshot.filing_id,
|
|
filing_date: filingTaxonomySnapshot.filing_date,
|
|
statement_kind: filingTaxonomyFact.statement_kind,
|
|
role_uri: filingTaxonomyFact.role_uri,
|
|
concept_key: filingTaxonomyFact.concept_key,
|
|
qname: filingTaxonomyFact.qname,
|
|
namespace_uri: filingTaxonomyFact.namespace_uri,
|
|
local_name: filingTaxonomyFact.local_name,
|
|
value_num: filingTaxonomyFact.value_num,
|
|
context_id: filingTaxonomyFact.context_id,
|
|
unit: filingTaxonomyFact.unit,
|
|
decimals: filingTaxonomyFact.decimals,
|
|
period_start: filingTaxonomyFact.period_start,
|
|
period_end: filingTaxonomyFact.period_end,
|
|
period_instant: filingTaxonomyFact.period_instant,
|
|
dimensions: filingTaxonomyFact.dimensions,
|
|
is_dimensionless: filingTaxonomyFact.is_dimensionless,
|
|
source_file: filingTaxonomyFact.source_file
|
|
})
|
|
.from(filingTaxonomyFact)
|
|
.innerJoin(filingTaxonomySnapshot, eq(filingTaxonomyFact.snapshot_id, filingTaxonomySnapshot.id))
|
|
.where(and(...conditions))
|
|
.orderBy(desc(filingTaxonomyFact.id))
|
|
.limit(safeLimit + 1);
|
|
|
|
const hasMore = rows.length > safeLimit;
|
|
const used = hasMore ? rows.slice(0, safeLimit) : rows;
|
|
const nextCursor = hasMore ? String(used[used.length - 1]?.id ?? '') : null;
|
|
|
|
const facts: TaxonomyFactRow[] = used.map((row) => {
|
|
const value = asNumber(row.value_num);
|
|
if (value === null) {
|
|
throw new Error(`Invalid value_num in taxonomy fact ${row.id}`);
|
|
}
|
|
|
|
return {
|
|
id: row.id,
|
|
snapshotId: row.snapshot_id,
|
|
filingId: row.filing_id,
|
|
filingDate: row.filing_date,
|
|
statement: row.statement_kind,
|
|
roleUri: row.role_uri,
|
|
conceptKey: row.concept_key,
|
|
qname: row.qname,
|
|
namespaceUri: row.namespace_uri,
|
|
localName: row.local_name,
|
|
value,
|
|
contextId: row.context_id,
|
|
unit: row.unit,
|
|
decimals: row.decimals,
|
|
periodStart: row.period_start,
|
|
periodEnd: row.period_end,
|
|
periodInstant: row.period_instant,
|
|
dimensions: row.dimensions,
|
|
isDimensionless: row.is_dimensionless,
|
|
sourceFile: row.source_file
|
|
};
|
|
});
|
|
|
|
return {
|
|
facts,
|
|
nextCursor
|
|
};
|
|
}
|
|
|
|
export async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) {
|
|
if (snapshotIds.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const rows = await db
|
|
.select()
|
|
.from(filingTaxonomyAsset)
|
|
.where(inArray(filingTaxonomyAsset.snapshot_id, snapshotIds))
|
|
.orderBy(desc(filingTaxonomyAsset.id));
|
|
|
|
return rows.map(toAssetRecord);
|
|
}
|
|
|
|
export const __filingTaxonomyInternals = {
|
|
normalizeFilingTaxonomySnapshotPayload,
|
|
toSnapshotRecord
|
|
};
|