import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types'; import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types'; import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy'; import { classifyStatementRole } from '@/lib/server/taxonomy/linkbase-parser'; import { conceptStatementFallback } from '@/lib/server/taxonomy/xbrl-parser'; function compactAccessionNumber(value: string) { return value.replace(/-/g, ''); } function isUsGaapNamespace(namespaceUri: string) { return /fasb\.org\/us-gaap/i.test(namespaceUri) || /us-gaap/i.test(namespaceUri); } function splitConceptKey(conceptKey: string) { const index = conceptKey.lastIndexOf('#'); if (index < 0) { return { namespaceUri: 'urn:unknown', localName: conceptKey }; } return { namespaceUri: conceptKey.slice(0, index), localName: conceptKey.slice(index + 1) }; } function localNameToLabel(localName: string) { return localName .replace(/([a-z0-9])([A-Z])/g, '$1 $2') .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') .replace(/_/g, ' ') .trim(); } function createStatementRecord(factory: () => T): Record { return { income: factory(), balance: factory(), cash_flow: factory(), equity: factory(), comprehensive_income: factory() }; } function periodSignature(fact: TaxonomyFact) { const start = fact.periodStart ?? ''; const end = fact.periodEnd ?? ''; const instant = fact.periodInstant ?? ''; return `start:${start}|end:${end}|instant:${instant}`; } function periodDate(fact: TaxonomyFact, fallbackDate: string) { return fact.periodEnd ?? fact.periodInstant ?? fallbackDate; } function parseEpoch(value: string | null) { if (!value) { return Number.NaN; } return Date.parse(value); } function sortPeriods(periods: FilingTaxonomyPeriod[]) { return [...periods].sort((left, right) => { const leftDate = parseEpoch(left.periodEnd ?? left.filingDate); const rightDate = parseEpoch(right.periodEnd ?? right.filingDate); if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) { return leftDate - rightDate; } return left.id.localeCompare(right.id); }); } function pickPreferredFact(facts: T[]) { if (facts.length === 0) { return null; } const ordered = [...facts].sort((left, right) => { const leftScore = left.isDimensionless ? 1 : 0; const rightScore = right.isDimensionless ? 1 : 0; if (leftScore !== rightScore) { return rightScore - leftScore; } const leftDate = parseEpoch(left.periodEnd ?? left.periodInstant); const rightDate = parseEpoch(right.periodEnd ?? right.periodInstant); if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) { return rightDate - leftDate; } return Math.abs(right.value) - Math.abs(left.value); }); return ordered[0] ?? null; } export function materializeTaxonomyStatements(input: { filingId: number; accessionNumber: string; filingDate: string; filingType: '10-K' | '10-Q'; facts: TaxonomyFact[]; presentation: TaxonomyPresentationConcept[]; labelByConcept: Map; }) { const periodBySignature = new Map(); const compactAccession = compactAccessionNumber(input.accessionNumber); for (const fact of input.facts) { const signature = periodSignature(fact); if (periodBySignature.has(signature)) { continue; } const date = periodDate(fact, input.filingDate); const id = `${date}-${compactAccession}-${periodBySignature.size + 1}`; periodBySignature.set(signature, { id, filingId: input.filingId, accessionNumber: input.accessionNumber, filingDate: input.filingDate, periodStart: fact.periodStart, periodEnd: fact.periodEnd ?? fact.periodInstant ?? input.filingDate, filingType: input.filingType, periodLabel: fact.periodInstant && !fact.periodStart ? 'Instant' : fact.periodStart && fact.periodEnd ? `${fact.periodStart} to ${fact.periodEnd}` : 'Filing Period' }); } const periods = sortPeriods([...periodBySignature.values()]); const periodIdBySignature = new Map( [...periodBySignature.entries()].map(([signature, period]) => [signature, period.id]) ); const presentationByConcept = new Map(); for (const node of input.presentation) { const existing = presentationByConcept.get(node.conceptKey); if (existing) { existing.push(node); } else { presentationByConcept.set(node.conceptKey, [node]); } } const enrichedFacts = input.facts.map((fact, index) => { const nodes = presentationByConcept.get(fact.conceptKey) ?? []; const bestNode = nodes[0] ?? null; const statementKind = bestNode ? classifyStatementRole(bestNode.roleUri) : conceptStatementFallback(fact.localName); return { ...fact, __sourceFactId: index + 1, statement_kind: statementKind, role_uri: bestNode?.roleUri ?? null }; }); const rowsByStatement = createStatementRecord(() => []); const conceptByKey = new Map(); const groupedByStatement = createStatementRecord>(() => new Map()); for (const fact of enrichedFacts) { if (!fact.statement_kind) { continue; } const group = groupedByStatement[fact.statement_kind].get(fact.conceptKey); if (group) { group.push(fact); } else { groupedByStatement[fact.statement_kind].set(fact.conceptKey, [fact]); } } for (const statement of Object.keys(rowsByStatement) as FinancialStatementKind[]) { const conceptKeys = new Set(); for (const node of input.presentation) { if (classifyStatementRole(node.roleUri) !== statement) { continue; } conceptKeys.add(node.conceptKey); } for (const conceptKey of groupedByStatement[statement].keys()) { conceptKeys.add(conceptKey); } const orderedConcepts = [...conceptKeys] .map((conceptKey) => { const presentationNodes = input.presentation.filter( (node) => node.conceptKey === conceptKey && classifyStatementRole(node.roleUri) === statement ); const presentationOrder = presentationNodes.length > 0 ? Math.min(...presentationNodes.map((node) => node.order)) : Number.MAX_SAFE_INTEGER; const presentationDepth = presentationNodes.length > 0 ? Math.min(...presentationNodes.map((node) => node.depth)) : 0; const roleUri = presentationNodes[0]?.roleUri ?? null; const parentConceptKey = presentationNodes[0]?.parentConceptKey ?? null; return { conceptKey, presentationOrder, presentationDepth, roleUri, parentConceptKey }; }) .sort((left, right) => { if (left.presentationOrder !== right.presentationOrder) { return left.presentationOrder - right.presentationOrder; } return left.conceptKey.localeCompare(right.conceptKey); }); for (const orderedConcept of orderedConcepts) { const facts = groupedByStatement[statement].get(orderedConcept.conceptKey) ?? []; const { namespaceUri, localName } = splitConceptKey(orderedConcept.conceptKey); const qname = facts[0]?.qname ?? `unknown:${localName}`; const label = input.labelByConcept.get(orderedConcept.conceptKey) ?? localNameToLabel(localName); const values: Record = {}; const units: Record = {}; const factGroups = new Map(); for (const fact of facts) { const signature = periodSignature(fact); const group = factGroups.get(signature); if (group) { group.push(fact); } else { factGroups.set(signature, [fact]); } } const sourceFactIds: number[] = []; let hasDimensions = false; for (const [signature, group] of factGroups.entries()) { const periodId = periodIdBySignature.get(signature); if (!periodId) { continue; } const preferred = pickPreferredFact(group); if (!preferred) { continue; } values[periodId] = preferred.value; units[periodId] = preferred.unit; const sourceFactId = (preferred as { __sourceFactId?: number }).__sourceFactId; if (typeof sourceFactId === 'number') { sourceFactIds.push(sourceFactId); } if (group.some((entry) => !entry.isDimensionless)) { hasDimensions = true; } } if (Object.keys(values).length === 0) { continue; } const row: TaxonomyStatementRow = { key: orderedConcept.conceptKey, label, conceptKey: orderedConcept.conceptKey, qname, namespaceUri, localName, isExtension: !isUsGaapNamespace(namespaceUri), statement, roleUri: orderedConcept.roleUri, order: Number.isFinite(orderedConcept.presentationOrder) ? orderedConcept.presentationOrder : rowsByStatement[statement].length + 1, depth: orderedConcept.presentationDepth, parentKey: orderedConcept.parentConceptKey, values, units, hasDimensions, sourceFactIds }; rowsByStatement[statement].push(row); if (!conceptByKey.has(orderedConcept.conceptKey)) { conceptByKey.set(orderedConcept.conceptKey, { concept_key: orderedConcept.conceptKey, qname, namespace_uri: namespaceUri, local_name: localName, label, is_extension: !isUsGaapNamespace(namespaceUri), statement_kind: statement, role_uri: orderedConcept.roleUri, presentation_order: row.order, presentation_depth: row.depth, parent_concept_key: row.parentKey, is_abstract: /abstract/i.test(localName) }); } } } for (const fact of enrichedFacts) { if (conceptByKey.has(fact.conceptKey)) { continue; } conceptByKey.set(fact.conceptKey, { concept_key: fact.conceptKey, qname: fact.qname, namespace_uri: fact.namespaceUri, local_name: fact.localName, label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName), is_extension: !isUsGaapNamespace(fact.namespaceUri), statement_kind: fact.statement_kind, role_uri: fact.role_uri, presentation_order: null, presentation_depth: null, parent_concept_key: null, is_abstract: /abstract/i.test(fact.localName) }); } const concepts = [...conceptByKey.values()]; const factRows = enrichedFacts.map((fact) => ({ concept_key: fact.conceptKey, qname: fact.qname, namespace_uri: fact.namespaceUri, local_name: fact.localName, statement_kind: fact.statement_kind, role_uri: fact.role_uri, context_id: fact.contextId, unit: fact.unit, decimals: fact.decimals, value_num: fact.value, period_start: fact.periodStart, period_end: fact.periodEnd, period_instant: fact.periodInstant, dimensions: fact.dimensions, is_dimensionless: fact.isDimensionless, source_file: fact.sourceFile, })); const dimensionsCount = enrichedFacts.reduce((total, fact) => { return total + fact.dimensions.length; }, 0); return { periods, statement_rows: rowsByStatement, concepts, facts: factRows, dimensionsCount }; }