401 lines
12 KiB
TypeScript
401 lines
12 KiB
TypeScript
import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
|
|
import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
|
import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy';
|
|
import { classifyStatementRole, conceptStatementFallback } from '@/lib/server/taxonomy/classifiers';
|
|
|
|
function compactAccessionNumber(value: string) {
|
|
return value.replace(/-/g, '');
|
|
}
|
|
|
|
function isUsGaapNamespace(namespaceUri: string) {
|
|
return /fasb\.org\/us-gaap/i.test(namespaceUri) || /us-gaap/i.test(namespaceUri);
|
|
}
|
|
|
|
function splitConceptKey(conceptKey: string) {
|
|
const index = conceptKey.lastIndexOf('#');
|
|
if (index < 0) {
|
|
return {
|
|
namespaceUri: 'urn:unknown',
|
|
localName: conceptKey
|
|
};
|
|
}
|
|
|
|
return {
|
|
namespaceUri: conceptKey.slice(0, index),
|
|
localName: conceptKey.slice(index + 1)
|
|
};
|
|
}
|
|
|
|
function localNameToLabel(localName: string) {
|
|
return localName
|
|
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
.replace(/_/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
|
return {
|
|
income: factory(),
|
|
balance: factory(),
|
|
cash_flow: factory(),
|
|
equity: factory(),
|
|
comprehensive_income: factory()
|
|
};
|
|
}
|
|
|
|
function periodSignature(fact: TaxonomyFact) {
|
|
const start = fact.periodStart ?? '';
|
|
const end = fact.periodEnd ?? '';
|
|
const instant = fact.periodInstant ?? '';
|
|
return `start:${start}|end:${end}|instant:${instant}`;
|
|
}
|
|
|
|
function periodDate(fact: TaxonomyFact, fallbackDate: string) {
|
|
return fact.periodEnd ?? fact.periodInstant ?? fallbackDate;
|
|
}
|
|
|
|
function parseEpoch(value: string | null) {
|
|
if (!value) {
|
|
return Number.NaN;
|
|
}
|
|
|
|
return Date.parse(value);
|
|
}
|
|
|
|
function sortPeriods(periods: FilingTaxonomyPeriod[]) {
|
|
return [...periods].sort((left, right) => {
|
|
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
|
|
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
|
|
|
|
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
|
return leftDate - rightDate;
|
|
}
|
|
|
|
return left.id.localeCompare(right.id);
|
|
});
|
|
}
|
|
|
|
function pickPreferredFact<T extends TaxonomyFact>(facts: T[]) {
|
|
if (facts.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
const ordered = [...facts].sort((left, right) => {
|
|
const leftScore = left.isDimensionless ? 1 : 0;
|
|
const rightScore = right.isDimensionless ? 1 : 0;
|
|
if (leftScore !== rightScore) {
|
|
return rightScore - leftScore;
|
|
}
|
|
|
|
const leftDate = parseEpoch(left.periodEnd ?? left.periodInstant);
|
|
const rightDate = parseEpoch(right.periodEnd ?? right.periodInstant);
|
|
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
|
return rightDate - leftDate;
|
|
}
|
|
|
|
return Math.abs(right.value) - Math.abs(left.value);
|
|
});
|
|
|
|
return ordered[0] ?? null;
|
|
}
|
|
|
|
export function materializeTaxonomyStatements(input: {
|
|
filingId: number;
|
|
accessionNumber: string;
|
|
filingDate: string;
|
|
filingType: '10-K' | '10-Q';
|
|
facts: TaxonomyFact[];
|
|
presentation: TaxonomyPresentationConcept[];
|
|
labelByConcept: Map<string, string>;
|
|
}) {
|
|
const periodBySignature = new Map<string, FilingTaxonomyPeriod>();
|
|
const compactAccession = compactAccessionNumber(input.accessionNumber);
|
|
|
|
for (const fact of input.facts) {
|
|
const signature = periodSignature(fact);
|
|
if (periodBySignature.has(signature)) {
|
|
continue;
|
|
}
|
|
|
|
const date = periodDate(fact, input.filingDate);
|
|
const id = `${date}-${compactAccession}-${periodBySignature.size + 1}`;
|
|
|
|
periodBySignature.set(signature, {
|
|
id,
|
|
filingId: input.filingId,
|
|
accessionNumber: input.accessionNumber,
|
|
filingDate: input.filingDate,
|
|
periodStart: fact.periodStart,
|
|
periodEnd: fact.periodEnd ?? fact.periodInstant ?? input.filingDate,
|
|
filingType: input.filingType,
|
|
periodLabel: fact.periodInstant && !fact.periodStart
|
|
? 'Instant'
|
|
: fact.periodStart && fact.periodEnd
|
|
? `${fact.periodStart} to ${fact.periodEnd}`
|
|
: 'Filing Period'
|
|
});
|
|
}
|
|
|
|
const periods = sortPeriods([...periodBySignature.values()]);
|
|
const periodIdBySignature = new Map<string, string>(
|
|
[...periodBySignature.entries()].map(([signature, period]) => [signature, period.id])
|
|
);
|
|
|
|
const presentationByConcept = new Map<string, TaxonomyPresentationConcept[]>();
|
|
for (const node of input.presentation) {
|
|
const existing = presentationByConcept.get(node.conceptKey);
|
|
if (existing) {
|
|
existing.push(node);
|
|
} else {
|
|
presentationByConcept.set(node.conceptKey, [node]);
|
|
}
|
|
}
|
|
|
|
const enrichedFacts = input.facts.map((fact, index) => {
|
|
const nodes = presentationByConcept.get(fact.conceptKey) ?? [];
|
|
const bestNode = nodes[0] ?? null;
|
|
const statementKind = bestNode
|
|
? classifyStatementRole(bestNode.roleUri)
|
|
: conceptStatementFallback(fact.localName);
|
|
|
|
return {
|
|
...fact,
|
|
__sourceFactId: index + 1,
|
|
statement_kind: statementKind,
|
|
role_uri: bestNode?.roleUri ?? null
|
|
};
|
|
});
|
|
|
|
const rowsByStatement = createStatementRecord<TaxonomyStatementRow[]>(() => []);
|
|
const conceptByKey = new Map<string, TaxonomyConcept>();
|
|
const groupedByStatement = createStatementRecord<Map<string, typeof enrichedFacts>>(() => new Map());
|
|
|
|
for (const fact of enrichedFacts) {
|
|
if (!fact.statement_kind) {
|
|
continue;
|
|
}
|
|
|
|
const group = groupedByStatement[fact.statement_kind].get(fact.conceptKey);
|
|
if (group) {
|
|
group.push(fact);
|
|
} else {
|
|
groupedByStatement[fact.statement_kind].set(fact.conceptKey, [fact]);
|
|
}
|
|
}
|
|
|
|
for (const statement of Object.keys(rowsByStatement) as FinancialStatementKind[]) {
|
|
const conceptKeys = new Set<string>();
|
|
|
|
for (const node of input.presentation) {
|
|
if (classifyStatementRole(node.roleUri) !== statement) {
|
|
continue;
|
|
}
|
|
|
|
conceptKeys.add(node.conceptKey);
|
|
}
|
|
|
|
for (const conceptKey of groupedByStatement[statement].keys()) {
|
|
conceptKeys.add(conceptKey);
|
|
}
|
|
|
|
const orderedConcepts = [...conceptKeys]
|
|
.map((conceptKey) => {
|
|
const presentationNodes = input.presentation.filter(
|
|
(node) => node.conceptKey === conceptKey && classifyStatementRole(node.roleUri) === statement
|
|
);
|
|
const presentationOrder = presentationNodes.length > 0
|
|
? Math.min(...presentationNodes.map((node) => node.order))
|
|
: Number.MAX_SAFE_INTEGER;
|
|
const presentationDepth = presentationNodes.length > 0
|
|
? Math.min(...presentationNodes.map((node) => node.depth))
|
|
: 0;
|
|
const roleUri = presentationNodes[0]?.roleUri ?? null;
|
|
const parentConceptKey = presentationNodes[0]?.parentConceptKey ?? null;
|
|
return {
|
|
conceptKey,
|
|
presentationOrder,
|
|
presentationDepth,
|
|
roleUri,
|
|
parentConceptKey
|
|
};
|
|
})
|
|
.sort((left, right) => {
|
|
if (left.presentationOrder !== right.presentationOrder) {
|
|
return left.presentationOrder - right.presentationOrder;
|
|
}
|
|
|
|
return left.conceptKey.localeCompare(right.conceptKey);
|
|
});
|
|
|
|
for (const orderedConcept of orderedConcepts) {
|
|
const facts = groupedByStatement[statement].get(orderedConcept.conceptKey) ?? [];
|
|
const { namespaceUri, localName } = splitConceptKey(orderedConcept.conceptKey);
|
|
const qname = facts[0]?.qname ?? `unknown:${localName}`;
|
|
const label = input.labelByConcept.get(orderedConcept.conceptKey) ?? localNameToLabel(localName);
|
|
const values: Record<string, number | null> = {};
|
|
const units: Record<string, string | null> = {};
|
|
|
|
const factGroups = new Map<string, typeof facts>();
|
|
for (const fact of facts) {
|
|
const signature = periodSignature(fact);
|
|
const group = factGroups.get(signature);
|
|
if (group) {
|
|
group.push(fact);
|
|
} else {
|
|
factGroups.set(signature, [fact]);
|
|
}
|
|
}
|
|
|
|
const sourceFactIds: number[] = [];
|
|
let hasDimensions = false;
|
|
for (const [signature, group] of factGroups.entries()) {
|
|
const periodId = periodIdBySignature.get(signature);
|
|
if (!periodId) {
|
|
continue;
|
|
}
|
|
|
|
const preferred = pickPreferredFact(group);
|
|
if (!preferred) {
|
|
continue;
|
|
}
|
|
|
|
values[periodId] = preferred.value;
|
|
units[periodId] = preferred.unit;
|
|
const sourceFactId = (preferred as { __sourceFactId?: number }).__sourceFactId;
|
|
if (typeof sourceFactId === 'number') {
|
|
sourceFactIds.push(sourceFactId);
|
|
}
|
|
|
|
if (group.some((entry) => !entry.isDimensionless)) {
|
|
hasDimensions = true;
|
|
}
|
|
}
|
|
|
|
if (Object.keys(values).length === 0) {
|
|
continue;
|
|
}
|
|
|
|
const row: TaxonomyStatementRow = {
|
|
key: orderedConcept.conceptKey,
|
|
label,
|
|
conceptKey: orderedConcept.conceptKey,
|
|
qname,
|
|
namespaceUri,
|
|
localName,
|
|
isExtension: !isUsGaapNamespace(namespaceUri),
|
|
statement,
|
|
roleUri: orderedConcept.roleUri,
|
|
order: Number.isFinite(orderedConcept.presentationOrder)
|
|
? orderedConcept.presentationOrder
|
|
: rowsByStatement[statement].length + 1,
|
|
depth: orderedConcept.presentationDepth,
|
|
parentKey: orderedConcept.parentConceptKey,
|
|
values,
|
|
units,
|
|
hasDimensions,
|
|
sourceFactIds
|
|
};
|
|
|
|
rowsByStatement[statement].push(row);
|
|
|
|
if (!conceptByKey.has(orderedConcept.conceptKey)) {
|
|
conceptByKey.set(orderedConcept.conceptKey, {
|
|
concept_key: orderedConcept.conceptKey,
|
|
qname,
|
|
namespace_uri: namespaceUri,
|
|
local_name: localName,
|
|
label,
|
|
is_extension: !isUsGaapNamespace(namespaceUri),
|
|
balance: null,
|
|
period_type: null,
|
|
data_type: null,
|
|
statement_kind: statement,
|
|
role_uri: orderedConcept.roleUri,
|
|
authoritative_concept_key: null,
|
|
mapping_method: null,
|
|
surface_key: null,
|
|
detail_parent_surface_key: null,
|
|
kpi_key: null,
|
|
residual_flag: false,
|
|
presentation_order: row.order,
|
|
presentation_depth: row.depth,
|
|
parent_concept_key: row.parentKey,
|
|
is_abstract: /abstract/i.test(localName)
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const fact of enrichedFacts) {
|
|
if (conceptByKey.has(fact.conceptKey)) {
|
|
continue;
|
|
}
|
|
|
|
conceptByKey.set(fact.conceptKey, {
|
|
concept_key: fact.conceptKey,
|
|
qname: fact.qname,
|
|
namespace_uri: fact.namespaceUri,
|
|
local_name: fact.localName,
|
|
label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName),
|
|
is_extension: !isUsGaapNamespace(fact.namespaceUri),
|
|
balance: null,
|
|
period_type: null,
|
|
data_type: fact.dataType,
|
|
statement_kind: fact.statement_kind,
|
|
role_uri: fact.role_uri,
|
|
authoritative_concept_key: null,
|
|
mapping_method: null,
|
|
surface_key: null,
|
|
detail_parent_surface_key: null,
|
|
kpi_key: null,
|
|
residual_flag: false,
|
|
presentation_order: null,
|
|
presentation_depth: null,
|
|
parent_concept_key: null,
|
|
is_abstract: /abstract/i.test(fact.localName)
|
|
});
|
|
}
|
|
|
|
const concepts = [...conceptByKey.values()];
|
|
const factRows = enrichedFacts.map((fact) => ({
|
|
concept_key: fact.conceptKey,
|
|
qname: fact.qname,
|
|
namespace_uri: fact.namespaceUri,
|
|
local_name: fact.localName,
|
|
data_type: fact.dataType,
|
|
statement_kind: fact.statement_kind,
|
|
role_uri: fact.role_uri,
|
|
authoritative_concept_key: null,
|
|
mapping_method: null,
|
|
surface_key: null,
|
|
detail_parent_surface_key: null,
|
|
kpi_key: null,
|
|
residual_flag: false,
|
|
context_id: fact.contextId,
|
|
unit: fact.unit,
|
|
decimals: fact.decimals,
|
|
precision: fact.precision,
|
|
nil: fact.nil,
|
|
value_num: fact.value,
|
|
period_start: fact.periodStart,
|
|
period_end: fact.periodEnd,
|
|
period_instant: fact.periodInstant,
|
|
dimensions: fact.dimensions,
|
|
is_dimensionless: fact.isDimensionless,
|
|
source_file: fact.sourceFile,
|
|
}));
|
|
|
|
const dimensionsCount = enrichedFacts.reduce((total, fact) => {
|
|
return total + fact.dimensions.length;
|
|
}, 0);
|
|
|
|
return {
|
|
periods,
|
|
statement_rows: rowsByStatement,
|
|
concepts,
|
|
facts: factRows,
|
|
dimensionsCount
|
|
};
|
|
}
|