import type { DerivedFinancialRow, DimensionBreakdownRow, FinancialStatementKind, FinancialStatementPeriod, FinancialUnit, StandardizedFinancialRow, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types'; import { STANDARD_FINANCIAL_TEMPLATES, type StandardTemplateRowDefinition, type TemplateFormula } from '@/lib/server/financials/standard-template'; function normalizeToken(value: string) { return value.trim().toLowerCase(); } function tokenizeLabel(value: string) { return value .toLowerCase() .replace(/[^a-z0-9]+/g, ' ') .trim() .split(/\s+/) .filter((token) => token.length > 0); } function valueOrNull(values: Record, periodId: string) { return periodId in values ? values[periodId] : null; } function sumValues(values: Array, treatNullAsZero = false) { if (!treatNullAsZero && values.some((value) => value === null)) { return null; } return values.reduce((sum, value) => sum + (value ?? 0), 0); } function subtractValues(left: number | null, right: number | null) { if (left === null || right === null) { return null; } return left - right; } function divideValues(left: number | null, right: number | null) { if (left === null || right === null || right === 0) { return null; } return left / right; } type CandidateMatchKind = 'exact_local_name' | 'secondary_local_name' | 'label_phrase'; type StatementRowCandidate = { row: TaxonomyStatementRow; matchKind: CandidateMatchKind; aliasRank: number; unit: FinancialUnit; labelTokenCount: number; matchedPhraseTokenCount: number; }; type FactCandidate = { fact: TaxonomyFactRow; matchKind: Exclude; aliasRank: number; unit: FinancialUnit; }; type ResolvedCandidate = | { sourceType: 'row'; matchKind: CandidateMatchKind; aliasRank: number; unit: FinancialUnit; labelTokenCount: number; matchedPhraseTokenCount: number; row: TaxonomyStatementRow; } | { sourceType: 'fact'; matchKind: Exclude; aliasRank: number; unit: FinancialUnit; fact: TaxonomyFactRow; }; type DerivedRole = 'expense' | 'addback'; type InternalRowMetadata = { derivedRoleByPeriod: Record; }; function resolvedCandidatesForPeriod(input: { definition: StandardTemplateRowDefinition; candidates: StatementRowCandidate[]; factCandidates: FactCandidate[]; period: FinancialStatementPeriod; }) { const rowCandidates = input.candidates .filter((candidate) => input.period.id in candidate.row.values && candidate.row.values[input.period.id] !== null) .map((candidate) => ({ sourceType: 'row' as const, ...candidate })); const factCandidates = input.factCandidates .filter((candidate) => factMatchesPeriod(candidate.fact, input.period)) .map((candidate) => ({ sourceType: 'fact' as const, ...candidate })); if (input.definition.selectionPolicy === 'aggregate_multiple_components') { const aggregateCandidates = [...rowCandidates, ...factCandidates] .sort((left, right) => compareResolvedCandidates(left, right, input.definition)); const dedupedCandidates: ResolvedCandidate[] = []; const seenConcepts = new Set(); for (const candidate of aggregateCandidates) { const conceptKey = candidate.sourceType === 'row' ? candidate.row.key : candidate.fact.conceptKey; if (seenConcepts.has(conceptKey)) { continue; } seenConcepts.add(conceptKey); dedupedCandidates.push(candidate); } return dedupedCandidates; } const resolvedCandidate = [...rowCandidates, ...factCandidates] .sort((left, right) => compareResolvedCandidates(left, right, input.definition))[0]; return resolvedCandidate ? [resolvedCandidate] : []; } const GLOBAL_EXCLUDE_LABEL_PHRASES = [ 'pro forma', 'reconciliation', 'acquiree', 'business combination', 'assets acquired', 'liabilities assumed' ] as const; function inferUnit(rawUnit: string | null, fallback: FinancialUnit) { const normalized = (rawUnit ?? '').toLowerCase(); if (!normalized) { return fallback; } if (normalized.includes('usd') || normalized.includes('iso4217')) { return 'currency'; } if (normalized.includes('shares')) { return 'shares'; } if (normalized.includes('pure') || normalized.includes('percent')) { return fallback === 'percent' ? 'percent' : 'ratio'; } return fallback; } function rowUnit(row: TaxonomyStatementRow, fallback: FinancialUnit) { return inferUnit(Object.values(row.units)[0] ?? null, fallback); } function isUnitCompatible(expected: FinancialUnit, actual: FinancialUnit) { if (expected === actual) { return true; } if ((expected === 'percent' || expected === 'ratio') && (actual === 'percent' || actual === 'ratio')) { return true; } return false; } function phraseTokens(phrase: string) { return tokenizeLabel(phrase); } function labelContainsPhrase(labelTokens: string[], phrase: string) { const target = phraseTokens(phrase); if (target.length === 0 || target.length > labelTokens.length) { return false; } for (let index = 0; index <= labelTokens.length - target.length; index += 1) { let matched = true; for (let offset = 0; offset < target.length; offset += 1) { if (labelTokens[index + offset] !== target[offset]) { matched = false; break; } } if (matched) { return true; } } return false; } function matchRank(matchKind: CandidateMatchKind) { switch (matchKind) { case 'exact_local_name': return 0; case 'secondary_local_name': return 1; case 'label_phrase': return 2; } } function aliasRank(localName: string, aliases: readonly string[] | undefined) { const normalizedLocalName = normalizeToken(localName); const matchIndex = (aliases ?? []).findIndex((alias) => normalizeToken(alias) === normalizedLocalName); return matchIndex === -1 ? Number.MAX_SAFE_INTEGER : matchIndex; } function applySignTransform(value: number | null, transform: StandardTemplateRowDefinition['signTransform']) { if (value === null || !transform) { return value; } if (transform === 'invert') { return value * -1; } return Math.abs(value); } function classifyStatementRowCandidate( row: TaxonomyStatementRow, definition: StandardTemplateRowDefinition ) { if (definition.selectionPolicy === 'formula_only') { return null; } const rowLocalName = normalizeToken(row.localName); if ((definition.matchers.excludeLocalNames ?? []).some((localName) => normalizeToken(localName) === rowLocalName)) { return null; } const labelTokens = tokenizeLabel(row.label); const excludedLabelPhrases = [ ...GLOBAL_EXCLUDE_LABEL_PHRASES, ...(definition.matchers.excludeLabelPhrases ?? []) ]; if (excludedLabelPhrases.some((phrase) => labelContainsPhrase(labelTokens, phrase))) { return null; } const unit = rowUnit(row, definition.unit); if (!isUnitCompatible(definition.unit, unit)) { return null; } if ((definition.matchers.exactLocalNames ?? []).some((localName) => normalizeToken(localName) === rowLocalName)) { return { row, matchKind: 'exact_local_name', aliasRank: aliasRank(row.localName, definition.matchers.exactLocalNames), unit, labelTokenCount: labelTokens.length, matchedPhraseTokenCount: 0 } satisfies StatementRowCandidate; } if ((definition.matchers.secondaryLocalNames ?? []).some((localName) => normalizeToken(localName) === rowLocalName)) { return { row, matchKind: 'secondary_local_name', aliasRank: aliasRank(row.localName, definition.matchers.secondaryLocalNames), unit, labelTokenCount: labelTokens.length, matchedPhraseTokenCount: 0 } satisfies StatementRowCandidate; } const matchedPhrase = (definition.matchers.allowedLabelPhrases ?? []) .map((phrase) => ({ phrase, tokenCount: phraseTokens(phrase).length })) .filter(({ phrase }) => labelContainsPhrase(labelTokens, phrase)) .sort((left, right) => right.tokenCount - left.tokenCount)[0]; if (!matchedPhrase) { return null; } if (row.hasDimensions) { return null; } return { row, matchKind: 'label_phrase', aliasRank: Number.MAX_SAFE_INTEGER, unit, labelTokenCount: labelTokens.length, matchedPhraseTokenCount: matchedPhrase.tokenCount } satisfies StatementRowCandidate; } function classifyFactCandidate( fact: TaxonomyFactRow, definition: StandardTemplateRowDefinition ) { if (!fact.isDimensionless) { return null; } const localName = normalizeToken(fact.localName); if ((definition.matchers.excludeLocalNames ?? []).some((entry) => normalizeToken(entry) === localName)) { return null; } const unit = inferUnit(fact.unit ?? null, definition.unit); if (!isUnitCompatible(definition.unit, unit)) { return null; } if ((definition.matchers.exactLocalNames ?? []).some((entry) => normalizeToken(entry) === localName)) { return { fact, matchKind: 'exact_local_name', aliasRank: aliasRank(fact.localName, definition.matchers.exactLocalNames), unit } satisfies FactCandidate; } if ((definition.matchers.secondaryLocalNames ?? []).some((entry) => normalizeToken(entry) === localName)) { return { fact, matchKind: 'secondary_local_name', aliasRank: aliasRank(fact.localName, definition.matchers.secondaryLocalNames), unit } satisfies FactCandidate; } return null; } export function factMatchesPeriod(fact: TaxonomyFactRow, period: FinancialStatementPeriod) { if (period.periodStart) { return fact.periodStart === period.periodStart && fact.periodEnd === period.periodEnd; } return (fact.periodInstant ?? fact.periodEnd) === period.periodEnd; } function compareStatementRowCandidates( left: StatementRowCandidate, right: StatementRowCandidate, definition: StandardTemplateRowDefinition ) { const matchDelta = matchRank(left.matchKind) - matchRank(right.matchKind); if (matchDelta !== 0) { return matchDelta; } if (left.aliasRank !== right.aliasRank) { return left.aliasRank - right.aliasRank; } if (left.row.hasDimensions !== right.row.hasDimensions) { return left.row.hasDimensions ? 1 : -1; } if (definition.selectionPolicy === 'prefer_primary_statement_concept' && left.row.isExtension !== right.row.isExtension) { return left.row.isExtension ? 1 : -1; } if (left.row.order !== right.row.order) { return left.row.order - right.row.order; } if (left.matchedPhraseTokenCount !== right.matchedPhraseTokenCount) { return right.matchedPhraseTokenCount - left.matchedPhraseTokenCount; } if (left.labelTokenCount !== right.labelTokenCount) { return left.labelTokenCount - right.labelTokenCount; } return left.row.label.localeCompare(right.row.label); } function compareFactCandidates(left: FactCandidate, right: FactCandidate) { const matchDelta = matchRank(left.matchKind) - matchRank(right.matchKind); if (matchDelta !== 0) { return matchDelta; } if (left.aliasRank !== right.aliasRank) { return left.aliasRank - right.aliasRank; } return left.fact.qname.localeCompare(right.fact.qname); } function compareResolvedCandidates( left: ResolvedCandidate, right: ResolvedCandidate, definition: StandardTemplateRowDefinition ) { const matchDelta = matchRank(left.matchKind) - matchRank(right.matchKind); if (matchDelta !== 0) { return matchDelta; } if (left.aliasRank !== right.aliasRank) { return left.aliasRank - right.aliasRank; } if (left.sourceType === 'row' && right.sourceType === 'row') { return compareStatementRowCandidates(left, right, definition); } if (left.sourceType === 'fact' && right.sourceType === 'fact') { return compareFactCandidates(left, right); } if (left.sourceType === 'row' && right.sourceType === 'fact') { return left.row.hasDimensions ? 1 : -1; } if (left.sourceType === 'fact' && right.sourceType === 'row') { return right.row.hasDimensions ? -1 : 1; } return 0; } function buildTemplateRow( definition: StandardTemplateRowDefinition, candidates: StatementRowCandidate[], factCandidates: FactCandidate[], periods: FinancialStatementPeriod[] ) { const sourceConcepts = new Set(); const sourceRowKeys = new Set(); const sourceFactIds = new Set(); const matchedRowKeys = new Set(); const values: Record = Object.fromEntries(periods.map((period) => [period.id, null])); const resolvedSourceRowKeys: Record = Object.fromEntries(periods.map((period) => [period.id, null])); const metadata: InternalRowMetadata = { derivedRoleByPeriod: Object.fromEntries(periods.map((period) => [period.id, null])) }; let unit = definition.unit; let hasDimensions = false; for (const period of periods) { const resolvedCandidates = resolvedCandidatesForPeriod({ definition, candidates, factCandidates, period }); if (resolvedCandidates.length === 0) { continue; } if (definition.key === 'depreciation_and_amortization') { metadata.derivedRoleByPeriod[period.id] = resolvedCandidates.some((candidate) => { const localName = candidate.sourceType === 'row' ? candidate.row.localName : candidate.fact.localName; return normalizeToken(localName) === normalizeToken('CostOfGoodsAndServicesSoldDepreciationAndAmortization'); }) ? 'expense' : 'addback'; } values[period.id] = definition.selectionPolicy === 'aggregate_multiple_components' ? sumValues(resolvedCandidates.map((candidate) => { if (candidate.sourceType === 'row') { return applySignTransform(candidate.row.values[period.id] ?? null, definition.signTransform); } return applySignTransform(candidate.fact.value ?? null, definition.signTransform); })) : (() => { const resolvedCandidate = resolvedCandidates[0]!; if (resolvedCandidate.sourceType === 'row') { return applySignTransform(resolvedCandidate.row.values[period.id] ?? null, definition.signTransform); } return applySignTransform(resolvedCandidate.fact.value ?? null, definition.signTransform); })(); resolvedSourceRowKeys[period.id] = resolvedCandidates.length === 1 ? (resolvedCandidates[0]!.sourceType === 'row' ? resolvedCandidates[0]!.row.key : resolvedCandidates[0]!.fact.conceptKey ?? null) : null; for (const resolvedCandidate of resolvedCandidates) { unit = resolvedCandidate.unit; if (resolvedCandidate.sourceType === 'row') { hasDimensions = hasDimensions || resolvedCandidate.row.hasDimensions; matchedRowKeys.add(resolvedCandidate.row.key); sourceConcepts.add(resolvedCandidate.row.qname); sourceRowKeys.add(resolvedCandidate.row.key); for (const factId of resolvedCandidate.row.sourceFactIds) { sourceFactIds.add(factId); } continue; } sourceConcepts.add(resolvedCandidate.fact.qname); sourceRowKeys.add(resolvedCandidate.fact.conceptKey); sourceFactIds.add(resolvedCandidate.fact.id); } } return { row: { key: definition.key, label: definition.label, category: definition.category, templateSection: definition.category, order: definition.order, unit, values, sourceConcepts: [...sourceConcepts].sort((left, right) => left.localeCompare(right)), sourceRowKeys: [...sourceRowKeys].sort((left, right) => left.localeCompare(right)), sourceFactIds: [...sourceFactIds].sort((left, right) => left - right), formulaKey: null, hasDimensions, resolvedSourceRowKeys } satisfies StandardizedFinancialRow, matchedRowKeys, metadata }; } function computeFormulaValue( formula: TemplateFormula, rowsByKey: Map, periodId: string ) { switch (formula.kind) { case 'sum': return sumValues( formula.sourceKeys.map((key) => valueOrNull(rowsByKey.get(key)?.values ?? {}, periodId)), formula.treatNullAsZero ?? false ); case 'subtract': return subtractValues( valueOrNull(rowsByKey.get(formula.left)?.values ?? {}, periodId), valueOrNull(rowsByKey.get(formula.right)?.values ?? {}, periodId) ); case 'divide': return divideValues( valueOrNull(rowsByKey.get(formula.numerator)?.values ?? {}, periodId), valueOrNull(rowsByKey.get(formula.denominator)?.values ?? {}, periodId) ); } } function rowValueForPeriod( rowsByKey: Map, key: string, periodId: string ) { return valueOrNull(rowsByKey.get(key)?.values ?? {}, periodId); } function computeOperatingIncomeFallbackValue( rowsByKey: Map, rowMetadataByKey: Map, periodId: string ) { const grossProfit = rowValueForPeriod(rowsByKey, 'gross_profit', periodId); const sellingGeneralAndAdministrative = rowValueForPeriod(rowsByKey, 'selling_general_and_administrative', periodId); const researchAndDevelopment = rowValueForPeriod(rowsByKey, 'research_and_development', periodId) ?? 0; const depreciationAndAmortization = rowValueForPeriod(rowsByKey, 'depreciation_and_amortization', periodId); const depreciationRole = rowMetadataByKey.get('depreciation_and_amortization')?.derivedRoleByPeriod[periodId] ?? null; if ( depreciationRole === 'expense' && grossProfit !== null && sellingGeneralAndAdministrative !== null && depreciationAndAmortization !== null ) { return grossProfit - sellingGeneralAndAdministrative - researchAndDevelopment - depreciationAndAmortization; } const pretaxIncome = rowValueForPeriod(rowsByKey, 'pretax_income', periodId); if (pretaxIncome === null) { return null; } const interestExpense = rowValueForPeriod(rowsByKey, 'interest_expense', periodId) ?? 0; const interestIncome = rowValueForPeriod(rowsByKey, 'interest_income', periodId) ?? 0; const otherNonOperatingIncome = rowValueForPeriod(rowsByKey, 'other_non_operating_income', periodId) ?? 0; return pretaxIncome + interestExpense - interestIncome - otherNonOperatingIncome; } function computeFallbackValueForDefinition( definition: StandardTemplateRowDefinition, rowsByKey: Map, rowMetadataByKey: Map, periodId: string ) { if (definition.key === 'operating_income') { return computeOperatingIncomeFallbackValue(rowsByKey, rowMetadataByKey, periodId); } if (!definition.fallbackFormula) { return null; } return computeFormulaValue(definition.fallbackFormula, rowsByKey, periodId); } function applyFormulas( rowsByKey: Map, rowMetadataByKey: Map, definitions: StandardTemplateRowDefinition[], periods: FinancialStatementPeriod[] ) { for (let pass = 0; pass < definitions.length; pass += 1) { let changed = false; for (const definition of definitions) { if (!definition.fallbackFormula && definition.key !== 'operating_income') { continue; } const target = rowsByKey.get(definition.key); if (!target) { continue; } let usedFormula = target.formulaKey !== null; for (const period of periods) { if (definition.selectionPolicy !== 'formula_only' && target.values[period.id] !== null) { continue; } const computed = computeFallbackValueForDefinition(definition, rowsByKey, rowMetadataByKey, period.id); if (computed === null) { continue; } target.values[period.id] = applySignTransform(computed, definition.signTransform); target.resolvedSourceRowKeys[period.id] = null; usedFormula = true; changed = true; } if (usedFormula) { target.formulaKey = definition.key; } } if (!changed) { break; } } } export function buildStandardizedRows(input: { rows: TaxonomyStatementRow[]; statement: Extract; periods: FinancialStatementPeriod[]; facts: TaxonomyFactRow[]; }) { const definitions = STANDARD_FINANCIAL_TEMPLATES[input.statement]; const rowsByKey = new Map(); const rowMetadataByKey = new Map(); const matchedRowKeys = new Set(); for (const definition of definitions) { const candidates = input.rows .map((row) => classifyStatementRowCandidate(row, definition)) .filter((candidate): candidate is StatementRowCandidate => candidate !== null); const factCandidates = input.facts .map((fact) => classifyFactCandidate(fact, definition)) .filter((candidate): candidate is FactCandidate => candidate !== null); const templateRow = buildTemplateRow(definition, candidates, factCandidates, input.periods); for (const rowKey of templateRow.matchedRowKeys) { matchedRowKeys.add(rowKey); } const hasAnyValue = Object.values(templateRow.row.values).some((value) => value !== null); if (hasAnyValue || definition.fallbackFormula || definition.key === 'operating_income') { rowsByKey.set(definition.key, templateRow.row); rowMetadataByKey.set(definition.key, templateRow.metadata); } } applyFormulas(rowsByKey, rowMetadataByKey, definitions, input.periods); const templateRows = definitions .filter((definition) => definition.includeInOutput !== false) .map((definition) => rowsByKey.get(definition.key)) .filter((row): row is StandardizedFinancialRow => row !== undefined); const coveredTemplateSourceRowKeys = new Set(templateRows.flatMap((row) => row.sourceRowKeys)); const unmatchedRows = input.rows .filter((row) => !matchedRowKeys.has(row.key)) .filter((row) => !(row.hasDimensions && coveredTemplateSourceRowKeys.has(row.key))) .map((row) => ({ key: `other:${row.key}`, label: row.label, category: 'other', templateSection: 'other', order: 10_000 + row.order, unit: inferUnit(Object.values(row.units)[0] ?? null, 'currency'), values: { ...row.values }, sourceConcepts: [row.qname], sourceRowKeys: [row.key], sourceFactIds: [...row.sourceFactIds], formulaKey: null, hasDimensions: row.hasDimensions, resolvedSourceRowKeys: Object.fromEntries( input.periods.map((period) => [period.id, period.id in row.values ? row.key : null]) ) } satisfies StandardizedFinancialRow)); return [...templateRows, ...unmatchedRows].sort((left, right) => { if (left.order !== right.order) { return left.order - right.order; } return left.label.localeCompare(right.label); }); } export function buildDimensionBreakdown( facts: TaxonomyFactRow[], periods: FinancialStatementPeriod[], faithfulRows: TaxonomyStatementRow[], standardizedRows: StandardizedFinancialRow[] ) { const periodByFilingId = new Map(); for (const period of periods) { periodByFilingId.set(period.filingId, period); } const faithfulRowByKey = new Map(faithfulRows.map((row) => [row.key, row])); const standardizedRowsBySource = new Map(); for (const row of standardizedRows) { for (const sourceRowKey of row.sourceRowKeys) { const existing = standardizedRowsBySource.get(sourceRowKey); if (existing) { existing.push(row); } else { standardizedRowsBySource.set(sourceRowKey, [row]); } } } const map = new Map(); const pushRow = (key: string, row: DimensionBreakdownRow) => { const existing = map.get(key); if (existing) { existing.push(row); } else { map.set(key, [row]); } }; for (const fact of facts) { if (fact.dimensions.length === 0) { continue; } const period = periodByFilingId.get(fact.filingId) ?? null; if (!period || !factMatchesPeriod(fact, period)) { continue; } const faithfulRow = faithfulRowByKey.get(fact.conceptKey) ?? null; const standardizedMatches = standardizedRowsBySource.get(fact.conceptKey) ?? []; for (const dimension of fact.dimensions) { const faithfulDimensionRow: DimensionBreakdownRow = { rowKey: fact.conceptKey, concept: fact.qname, sourceRowKey: fact.conceptKey, sourceLabel: faithfulRow?.label ?? null, periodId: period.id, axis: dimension.axis, member: dimension.member, value: fact.value, unit: fact.unit, provenanceType: 'taxonomy' }; pushRow(fact.conceptKey, faithfulDimensionRow); for (const standardizedRow of standardizedMatches) { pushRow(standardizedRow.key, { ...faithfulDimensionRow, rowKey: standardizedRow.key }); } } } return map.size > 0 ? Object.fromEntries(map.entries()) : null; } export function cloneStandardizedRows(rows: StandardizedFinancialRow[]) { return rows.map((row) => ({ ...row, values: { ...row.values }, sourceConcepts: [...row.sourceConcepts], sourceRowKeys: [...row.sourceRowKeys], sourceFactIds: [...row.sourceFactIds], resolvedSourceRowKeys: { ...row.resolvedSourceRowKeys } })); } export function buildLtmStandardizedRows( quarterlyRows: StandardizedFinancialRow[], quarterlyPeriods: FinancialStatementPeriod[], ltmPeriods: FinancialStatementPeriod[], statement: Extract ) { const sortedQuarterlyPeriods = [...quarterlyPeriods].sort((left, right) => { return Date.parse(left.periodEnd ?? left.filingDate) - Date.parse(right.periodEnd ?? right.filingDate); }); const result = cloneStandardizedRows(quarterlyRows).map((row) => ({ ...row, values: {} as Record, resolvedSourceRowKeys: {} as Record })); for (const row of result) { const source = quarterlyRows.find((entry) => entry.key === row.key); if (!source) { continue; } for (const ltmPeriod of ltmPeriods) { const anchorIndex = sortedQuarterlyPeriods.findIndex((period) => `ltm:${period.id}` === ltmPeriod.id); if (anchorIndex < 3) { continue; } const slice = sortedQuarterlyPeriods.slice(anchorIndex - 3, anchorIndex + 1); const sourceValues = slice.map((period) => source.values[period.id] ?? null); row.values[ltmPeriod.id] = statement === 'balance' ? sourceValues[sourceValues.length - 1] ?? null : sumValues(sourceValues); row.resolvedSourceRowKeys[ltmPeriod.id] = source.formulaKey ? null : source.resolvedSourceRowKeys[slice[slice.length - 1]?.id ?? ''] ?? null; } } return result; }