Remove legacy TypeScript financial surface mapping, make Rust JSON single source of truth
- Delete standard-template.ts, surface.ts, materialize.ts (dead code) - Delete financial-taxonomy.test.ts (relied on removed code) - Add missing income statement surfaces to core.surface.json - Add cost_of_revenue mapping to core.income-bridge.json - Refactor standardize.ts to remove template dependency - Simplify financial-taxonomy.ts to use only DB snapshots - Add architecture documentation
This commit is contained in:
@@ -1,400 +0,0 @@
|
||||
import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
||||
import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy';
|
||||
import { classifyStatementRole, conceptStatementFallback } from '@/lib/server/taxonomy/classifiers';
|
||||
|
||||
function compactAccessionNumber(value: string) {
|
||||
return value.replace(/-/g, '');
|
||||
}
|
||||
|
||||
function isUsGaapNamespace(namespaceUri: string) {
|
||||
return /fasb\.org\/us-gaap/i.test(namespaceUri) || /us-gaap/i.test(namespaceUri);
|
||||
}
|
||||
|
||||
function splitConceptKey(conceptKey: string) {
|
||||
const index = conceptKey.lastIndexOf('#');
|
||||
if (index < 0) {
|
||||
return {
|
||||
namespaceUri: 'urn:unknown',
|
||||
localName: conceptKey
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
namespaceUri: conceptKey.slice(0, index),
|
||||
localName: conceptKey.slice(index + 1)
|
||||
};
|
||||
}
|
||||
|
||||
function localNameToLabel(localName: string) {
|
||||
return localName
|
||||
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
||||
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
||||
.replace(/_/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
||||
return {
|
||||
income: factory(),
|
||||
balance: factory(),
|
||||
cash_flow: factory(),
|
||||
equity: factory(),
|
||||
comprehensive_income: factory()
|
||||
};
|
||||
}
|
||||
|
||||
function periodSignature(fact: TaxonomyFact) {
|
||||
const start = fact.periodStart ?? '';
|
||||
const end = fact.periodEnd ?? '';
|
||||
const instant = fact.periodInstant ?? '';
|
||||
return `start:${start}|end:${end}|instant:${instant}`;
|
||||
}
|
||||
|
||||
function periodDate(fact: TaxonomyFact, fallbackDate: string) {
|
||||
return fact.periodEnd ?? fact.periodInstant ?? fallbackDate;
|
||||
}
|
||||
|
||||
function parseEpoch(value: string | null) {
|
||||
if (!value) {
|
||||
return Number.NaN;
|
||||
}
|
||||
|
||||
return Date.parse(value);
|
||||
}
|
||||
|
||||
function sortPeriods(periods: FilingTaxonomyPeriod[]) {
|
||||
return [...periods].sort((left, right) => {
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
|
||||
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return leftDate - rightDate;
|
||||
}
|
||||
|
||||
return left.id.localeCompare(right.id);
|
||||
});
|
||||
}
|
||||
|
||||
function pickPreferredFact<T extends TaxonomyFact>(facts: T[]) {
|
||||
if (facts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const ordered = [...facts].sort((left, right) => {
|
||||
const leftScore = left.isDimensionless ? 1 : 0;
|
||||
const rightScore = right.isDimensionless ? 1 : 0;
|
||||
if (leftScore !== rightScore) {
|
||||
return rightScore - leftScore;
|
||||
}
|
||||
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.periodInstant);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.periodInstant);
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return rightDate - leftDate;
|
||||
}
|
||||
|
||||
return Math.abs(right.value) - Math.abs(left.value);
|
||||
});
|
||||
|
||||
return ordered[0] ?? null;
|
||||
}
|
||||
|
||||
export function materializeTaxonomyStatements(input: {
|
||||
filingId: number;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
filingType: '10-K' | '10-Q';
|
||||
facts: TaxonomyFact[];
|
||||
presentation: TaxonomyPresentationConcept[];
|
||||
labelByConcept: Map<string, string>;
|
||||
}) {
|
||||
const periodBySignature = new Map<string, FilingTaxonomyPeriod>();
|
||||
const compactAccession = compactAccessionNumber(input.accessionNumber);
|
||||
|
||||
for (const fact of input.facts) {
|
||||
const signature = periodSignature(fact);
|
||||
if (periodBySignature.has(signature)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const date = periodDate(fact, input.filingDate);
|
||||
const id = `${date}-${compactAccession}-${periodBySignature.size + 1}`;
|
||||
|
||||
periodBySignature.set(signature, {
|
||||
id,
|
||||
filingId: input.filingId,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
periodStart: fact.periodStart,
|
||||
periodEnd: fact.periodEnd ?? fact.periodInstant ?? input.filingDate,
|
||||
filingType: input.filingType,
|
||||
periodLabel: fact.periodInstant && !fact.periodStart
|
||||
? 'Instant'
|
||||
: fact.periodStart && fact.periodEnd
|
||||
? `${fact.periodStart} to ${fact.periodEnd}`
|
||||
: 'Filing Period'
|
||||
});
|
||||
}
|
||||
|
||||
const periods = sortPeriods([...periodBySignature.values()]);
|
||||
const periodIdBySignature = new Map<string, string>(
|
||||
[...periodBySignature.entries()].map(([signature, period]) => [signature, period.id])
|
||||
);
|
||||
|
||||
const presentationByConcept = new Map<string, TaxonomyPresentationConcept[]>();
|
||||
for (const node of input.presentation) {
|
||||
const existing = presentationByConcept.get(node.conceptKey);
|
||||
if (existing) {
|
||||
existing.push(node);
|
||||
} else {
|
||||
presentationByConcept.set(node.conceptKey, [node]);
|
||||
}
|
||||
}
|
||||
|
||||
const enrichedFacts = input.facts.map((fact, index) => {
|
||||
const nodes = presentationByConcept.get(fact.conceptKey) ?? [];
|
||||
const bestNode = nodes[0] ?? null;
|
||||
const statementKind = bestNode
|
||||
? classifyStatementRole(bestNode.roleUri)
|
||||
: conceptStatementFallback(fact.localName);
|
||||
|
||||
return {
|
||||
...fact,
|
||||
__sourceFactId: index + 1,
|
||||
statement_kind: statementKind,
|
||||
role_uri: bestNode?.roleUri ?? null
|
||||
};
|
||||
});
|
||||
|
||||
const rowsByStatement = createStatementRecord<TaxonomyStatementRow[]>(() => []);
|
||||
const conceptByKey = new Map<string, TaxonomyConcept>();
|
||||
const groupedByStatement = createStatementRecord<Map<string, typeof enrichedFacts>>(() => new Map());
|
||||
|
||||
for (const fact of enrichedFacts) {
|
||||
if (!fact.statement_kind) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const group = groupedByStatement[fact.statement_kind].get(fact.conceptKey);
|
||||
if (group) {
|
||||
group.push(fact);
|
||||
} else {
|
||||
groupedByStatement[fact.statement_kind].set(fact.conceptKey, [fact]);
|
||||
}
|
||||
}
|
||||
|
||||
for (const statement of Object.keys(rowsByStatement) as FinancialStatementKind[]) {
|
||||
const conceptKeys = new Set<string>();
|
||||
|
||||
for (const node of input.presentation) {
|
||||
if (classifyStatementRole(node.roleUri) !== statement) {
|
||||
continue;
|
||||
}
|
||||
|
||||
conceptKeys.add(node.conceptKey);
|
||||
}
|
||||
|
||||
for (const conceptKey of groupedByStatement[statement].keys()) {
|
||||
conceptKeys.add(conceptKey);
|
||||
}
|
||||
|
||||
const orderedConcepts = [...conceptKeys]
|
||||
.map((conceptKey) => {
|
||||
const presentationNodes = input.presentation.filter(
|
||||
(node) => node.conceptKey === conceptKey && classifyStatementRole(node.roleUri) === statement
|
||||
);
|
||||
const presentationOrder = presentationNodes.length > 0
|
||||
? Math.min(...presentationNodes.map((node) => node.order))
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
const presentationDepth = presentationNodes.length > 0
|
||||
? Math.min(...presentationNodes.map((node) => node.depth))
|
||||
: 0;
|
||||
const roleUri = presentationNodes[0]?.roleUri ?? null;
|
||||
const parentConceptKey = presentationNodes[0]?.parentConceptKey ?? null;
|
||||
return {
|
||||
conceptKey,
|
||||
presentationOrder,
|
||||
presentationDepth,
|
||||
roleUri,
|
||||
parentConceptKey
|
||||
};
|
||||
})
|
||||
.sort((left, right) => {
|
||||
if (left.presentationOrder !== right.presentationOrder) {
|
||||
return left.presentationOrder - right.presentationOrder;
|
||||
}
|
||||
|
||||
return left.conceptKey.localeCompare(right.conceptKey);
|
||||
});
|
||||
|
||||
for (const orderedConcept of orderedConcepts) {
|
||||
const facts = groupedByStatement[statement].get(orderedConcept.conceptKey) ?? [];
|
||||
const { namespaceUri, localName } = splitConceptKey(orderedConcept.conceptKey);
|
||||
const qname = facts[0]?.qname ?? `unknown:${localName}`;
|
||||
const label = input.labelByConcept.get(orderedConcept.conceptKey) ?? localNameToLabel(localName);
|
||||
const values: Record<string, number | null> = {};
|
||||
const units: Record<string, string | null> = {};
|
||||
|
||||
const factGroups = new Map<string, typeof facts>();
|
||||
for (const fact of facts) {
|
||||
const signature = periodSignature(fact);
|
||||
const group = factGroups.get(signature);
|
||||
if (group) {
|
||||
group.push(fact);
|
||||
} else {
|
||||
factGroups.set(signature, [fact]);
|
||||
}
|
||||
}
|
||||
|
||||
const sourceFactIds: number[] = [];
|
||||
let hasDimensions = false;
|
||||
for (const [signature, group] of factGroups.entries()) {
|
||||
const periodId = periodIdBySignature.get(signature);
|
||||
if (!periodId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const preferred = pickPreferredFact(group);
|
||||
if (!preferred) {
|
||||
continue;
|
||||
}
|
||||
|
||||
values[periodId] = preferred.value;
|
||||
units[periodId] = preferred.unit;
|
||||
const sourceFactId = (preferred as { __sourceFactId?: number }).__sourceFactId;
|
||||
if (typeof sourceFactId === 'number') {
|
||||
sourceFactIds.push(sourceFactId);
|
||||
}
|
||||
|
||||
if (group.some((entry) => !entry.isDimensionless)) {
|
||||
hasDimensions = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(values).length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const row: TaxonomyStatementRow = {
|
||||
key: orderedConcept.conceptKey,
|
||||
label,
|
||||
conceptKey: orderedConcept.conceptKey,
|
||||
qname,
|
||||
namespaceUri,
|
||||
localName,
|
||||
isExtension: !isUsGaapNamespace(namespaceUri),
|
||||
statement,
|
||||
roleUri: orderedConcept.roleUri,
|
||||
order: Number.isFinite(orderedConcept.presentationOrder)
|
||||
? orderedConcept.presentationOrder
|
||||
: rowsByStatement[statement].length + 1,
|
||||
depth: orderedConcept.presentationDepth,
|
||||
parentKey: orderedConcept.parentConceptKey,
|
||||
values,
|
||||
units,
|
||||
hasDimensions,
|
||||
sourceFactIds
|
||||
};
|
||||
|
||||
rowsByStatement[statement].push(row);
|
||||
|
||||
if (!conceptByKey.has(orderedConcept.conceptKey)) {
|
||||
conceptByKey.set(orderedConcept.conceptKey, {
|
||||
concept_key: orderedConcept.conceptKey,
|
||||
qname,
|
||||
namespace_uri: namespaceUri,
|
||||
local_name: localName,
|
||||
label,
|
||||
is_extension: !isUsGaapNamespace(namespaceUri),
|
||||
balance: null,
|
||||
period_type: null,
|
||||
data_type: null,
|
||||
statement_kind: statement,
|
||||
role_uri: orderedConcept.roleUri,
|
||||
authoritative_concept_key: null,
|
||||
mapping_method: null,
|
||||
surface_key: null,
|
||||
detail_parent_surface_key: null,
|
||||
kpi_key: null,
|
||||
residual_flag: false,
|
||||
presentation_order: row.order,
|
||||
presentation_depth: row.depth,
|
||||
parent_concept_key: row.parentKey,
|
||||
is_abstract: /abstract/i.test(localName)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const fact of enrichedFacts) {
|
||||
if (conceptByKey.has(fact.conceptKey)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
conceptByKey.set(fact.conceptKey, {
|
||||
concept_key: fact.conceptKey,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespaceUri,
|
||||
local_name: fact.localName,
|
||||
label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName),
|
||||
is_extension: !isUsGaapNamespace(fact.namespaceUri),
|
||||
balance: null,
|
||||
period_type: null,
|
||||
data_type: fact.dataType,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
authoritative_concept_key: null,
|
||||
mapping_method: null,
|
||||
surface_key: null,
|
||||
detail_parent_surface_key: null,
|
||||
kpi_key: null,
|
||||
residual_flag: false,
|
||||
presentation_order: null,
|
||||
presentation_depth: null,
|
||||
parent_concept_key: null,
|
||||
is_abstract: /abstract/i.test(fact.localName)
|
||||
});
|
||||
}
|
||||
|
||||
const concepts = [...conceptByKey.values()];
|
||||
const factRows = enrichedFacts.map((fact) => ({
|
||||
concept_key: fact.conceptKey,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespaceUri,
|
||||
local_name: fact.localName,
|
||||
data_type: fact.dataType,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
authoritative_concept_key: null,
|
||||
mapping_method: null,
|
||||
surface_key: null,
|
||||
detail_parent_surface_key: null,
|
||||
kpi_key: null,
|
||||
residual_flag: false,
|
||||
context_id: fact.contextId,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
precision: fact.precision,
|
||||
nil: fact.nil,
|
||||
value_num: fact.value,
|
||||
period_start: fact.periodStart,
|
||||
period_end: fact.periodEnd,
|
||||
period_instant: fact.periodInstant,
|
||||
dimensions: fact.dimensions,
|
||||
is_dimensionless: fact.isDimensionless,
|
||||
source_file: fact.sourceFile,
|
||||
}));
|
||||
|
||||
const dimensionsCount = enrichedFacts.reduce((total, fact) => {
|
||||
return total + fact.dimensions.length;
|
||||
}, 0);
|
||||
|
||||
return {
|
||||
periods,
|
||||
statement_rows: rowsByStatement,
|
||||
concepts,
|
||||
facts: factRows,
|
||||
dimensionsCount
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user