Files
Neon-Desk/lib/server/taxonomy/materialize.ts
2026-03-06 14:40:43 -05:00

375 lines
12 KiB
TypeScript

import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy';
import { classifyStatementRole } from '@/lib/server/taxonomy/linkbase-parser';
import { conceptStatementFallback } from '@/lib/server/taxonomy/xbrl-parser';
function compactAccessionNumber(value: string) {
return value.replace(/-/g, '');
}
function isUsGaapNamespace(namespaceUri: string) {
return /fasb\.org\/us-gaap/i.test(namespaceUri) || /us-gaap/i.test(namespaceUri);
}
function splitConceptKey(conceptKey: string) {
const index = conceptKey.lastIndexOf('#');
if (index < 0) {
return {
namespaceUri: 'urn:unknown',
localName: conceptKey
};
}
return {
namespaceUri: conceptKey.slice(0, index),
localName: conceptKey.slice(index + 1)
};
}
function localNameToLabel(localName: string) {
return localName
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
.replace(/_/g, ' ')
.trim();
}
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
return {
income: factory(),
balance: factory(),
cash_flow: factory(),
equity: factory(),
comprehensive_income: factory()
};
}
function periodSignature(fact: TaxonomyFact) {
const start = fact.periodStart ?? '';
const end = fact.periodEnd ?? '';
const instant = fact.periodInstant ?? '';
return `start:${start}|end:${end}|instant:${instant}`;
}
function periodDate(fact: TaxonomyFact, fallbackDate: string) {
return fact.periodEnd ?? fact.periodInstant ?? fallbackDate;
}
function parseEpoch(value: string | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function sortPeriods(periods: FilingTaxonomyPeriod[]) {
return [...periods].sort((left, right) => {
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return leftDate - rightDate;
}
return left.id.localeCompare(right.id);
});
}
function pickPreferredFact<T extends TaxonomyFact>(facts: T[]) {
if (facts.length === 0) {
return null;
}
const ordered = [...facts].sort((left, right) => {
const leftScore = left.isDimensionless ? 1 : 0;
const rightScore = right.isDimensionless ? 1 : 0;
if (leftScore !== rightScore) {
return rightScore - leftScore;
}
const leftDate = parseEpoch(left.periodEnd ?? left.periodInstant);
const rightDate = parseEpoch(right.periodEnd ?? right.periodInstant);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return rightDate - leftDate;
}
return Math.abs(right.value) - Math.abs(left.value);
});
return ordered[0] ?? null;
}
export function materializeTaxonomyStatements(input: {
filingId: number;
accessionNumber: string;
filingDate: string;
filingType: '10-K' | '10-Q';
facts: TaxonomyFact[];
presentation: TaxonomyPresentationConcept[];
labelByConcept: Map<string, string>;
}) {
const periodBySignature = new Map<string, FilingTaxonomyPeriod>();
const compactAccession = compactAccessionNumber(input.accessionNumber);
for (const fact of input.facts) {
const signature = periodSignature(fact);
if (periodBySignature.has(signature)) {
continue;
}
const date = periodDate(fact, input.filingDate);
const id = `${date}-${compactAccession}-${periodBySignature.size + 1}`;
periodBySignature.set(signature, {
id,
filingId: input.filingId,
accessionNumber: input.accessionNumber,
filingDate: input.filingDate,
periodStart: fact.periodStart,
periodEnd: fact.periodEnd ?? fact.periodInstant ?? input.filingDate,
filingType: input.filingType,
periodLabel: fact.periodInstant && !fact.periodStart
? 'Instant'
: fact.periodStart && fact.periodEnd
? `${fact.periodStart} to ${fact.periodEnd}`
: 'Filing Period'
});
}
const periods = sortPeriods([...periodBySignature.values()]);
const periodIdBySignature = new Map<string, string>(
[...periodBySignature.entries()].map(([signature, period]) => [signature, period.id])
);
const presentationByConcept = new Map<string, TaxonomyPresentationConcept[]>();
for (const node of input.presentation) {
const existing = presentationByConcept.get(node.conceptKey);
if (existing) {
existing.push(node);
} else {
presentationByConcept.set(node.conceptKey, [node]);
}
}
const enrichedFacts = input.facts.map((fact, index) => {
const nodes = presentationByConcept.get(fact.conceptKey) ?? [];
const bestNode = nodes[0] ?? null;
const statementKind = bestNode
? classifyStatementRole(bestNode.roleUri)
: conceptStatementFallback(fact.localName);
return {
...fact,
__sourceFactId: index + 1,
statement_kind: statementKind,
role_uri: bestNode?.roleUri ?? null
};
});
const rowsByStatement = createStatementRecord<TaxonomyStatementRow[]>(() => []);
const conceptByKey = new Map<string, TaxonomyConcept>();
const groupedByStatement = createStatementRecord<Map<string, typeof enrichedFacts>>(() => new Map());
for (const fact of enrichedFacts) {
if (!fact.statement_kind) {
continue;
}
const group = groupedByStatement[fact.statement_kind].get(fact.conceptKey);
if (group) {
group.push(fact);
} else {
groupedByStatement[fact.statement_kind].set(fact.conceptKey, [fact]);
}
}
for (const statement of Object.keys(rowsByStatement) as FinancialStatementKind[]) {
const conceptKeys = new Set<string>();
for (const node of input.presentation) {
if (classifyStatementRole(node.roleUri) !== statement) {
continue;
}
conceptKeys.add(node.conceptKey);
}
for (const conceptKey of groupedByStatement[statement].keys()) {
conceptKeys.add(conceptKey);
}
const orderedConcepts = [...conceptKeys]
.map((conceptKey) => {
const presentationNodes = input.presentation.filter(
(node) => node.conceptKey === conceptKey && classifyStatementRole(node.roleUri) === statement
);
const presentationOrder = presentationNodes.length > 0
? Math.min(...presentationNodes.map((node) => node.order))
: Number.MAX_SAFE_INTEGER;
const presentationDepth = presentationNodes.length > 0
? Math.min(...presentationNodes.map((node) => node.depth))
: 0;
const roleUri = presentationNodes[0]?.roleUri ?? null;
const parentConceptKey = presentationNodes[0]?.parentConceptKey ?? null;
return {
conceptKey,
presentationOrder,
presentationDepth,
roleUri,
parentConceptKey
};
})
.sort((left, right) => {
if (left.presentationOrder !== right.presentationOrder) {
return left.presentationOrder - right.presentationOrder;
}
return left.conceptKey.localeCompare(right.conceptKey);
});
for (const orderedConcept of orderedConcepts) {
const facts = groupedByStatement[statement].get(orderedConcept.conceptKey) ?? [];
const { namespaceUri, localName } = splitConceptKey(orderedConcept.conceptKey);
const qname = facts[0]?.qname ?? `unknown:${localName}`;
const label = input.labelByConcept.get(orderedConcept.conceptKey) ?? localNameToLabel(localName);
const values: Record<string, number | null> = {};
const units: Record<string, string | null> = {};
const factGroups = new Map<string, typeof facts>();
for (const fact of facts) {
const signature = periodSignature(fact);
const group = factGroups.get(signature);
if (group) {
group.push(fact);
} else {
factGroups.set(signature, [fact]);
}
}
const sourceFactIds: number[] = [];
let hasDimensions = false;
for (const [signature, group] of factGroups.entries()) {
const periodId = periodIdBySignature.get(signature);
if (!periodId) {
continue;
}
const preferred = pickPreferredFact(group);
if (!preferred) {
continue;
}
values[periodId] = preferred.value;
units[periodId] = preferred.unit;
const sourceFactId = (preferred as { __sourceFactId?: number }).__sourceFactId;
if (typeof sourceFactId === 'number') {
sourceFactIds.push(sourceFactId);
}
if (group.some((entry) => !entry.isDimensionless)) {
hasDimensions = true;
}
}
if (Object.keys(values).length === 0) {
continue;
}
const row: TaxonomyStatementRow = {
key: orderedConcept.conceptKey,
label,
conceptKey: orderedConcept.conceptKey,
qname,
namespaceUri,
localName,
isExtension: !isUsGaapNamespace(namespaceUri),
statement,
roleUri: orderedConcept.roleUri,
order: Number.isFinite(orderedConcept.presentationOrder)
? orderedConcept.presentationOrder
: rowsByStatement[statement].length + 1,
depth: orderedConcept.presentationDepth,
parentKey: orderedConcept.parentConceptKey,
values,
units,
hasDimensions,
sourceFactIds
};
rowsByStatement[statement].push(row);
if (!conceptByKey.has(orderedConcept.conceptKey)) {
conceptByKey.set(orderedConcept.conceptKey, {
concept_key: orderedConcept.conceptKey,
qname,
namespace_uri: namespaceUri,
local_name: localName,
label,
is_extension: !isUsGaapNamespace(namespaceUri),
statement_kind: statement,
role_uri: orderedConcept.roleUri,
presentation_order: row.order,
presentation_depth: row.depth,
parent_concept_key: row.parentKey,
is_abstract: /abstract/i.test(localName)
});
}
}
}
for (const fact of enrichedFacts) {
if (conceptByKey.has(fact.conceptKey)) {
continue;
}
conceptByKey.set(fact.conceptKey, {
concept_key: fact.conceptKey,
qname: fact.qname,
namespace_uri: fact.namespaceUri,
local_name: fact.localName,
label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName),
is_extension: !isUsGaapNamespace(fact.namespaceUri),
statement_kind: fact.statement_kind,
role_uri: fact.role_uri,
presentation_order: null,
presentation_depth: null,
parent_concept_key: null,
is_abstract: /abstract/i.test(fact.localName)
});
}
const concepts = [...conceptByKey.values()];
const factRows = enrichedFacts.map((fact) => ({
concept_key: fact.conceptKey,
qname: fact.qname,
namespace_uri: fact.namespaceUri,
local_name: fact.localName,
statement_kind: fact.statement_kind,
role_uri: fact.role_uri,
context_id: fact.contextId,
unit: fact.unit,
decimals: fact.decimals,
value_num: fact.value,
period_start: fact.periodStart,
period_end: fact.periodEnd,
period_instant: fact.periodInstant,
dimensions: fact.dimensions,
is_dimensionless: fact.isDimensionless,
source_file: fact.sourceFile,
}));
const dimensionsCount = enrichedFacts.reduce((total, fact) => {
return total + fact.dimensions.length;
}, 0);
return {
periods,
statement_rows: rowsByStatement,
concepts,
facts: factRows,
dimensionsCount
};
}