265 lines
7.3 KiB
TypeScript
265 lines
7.3 KiB
TypeScript
import type { FinancialStatementKind } from '@/lib/types';
|
|
import type { TaxonomyContext, TaxonomyFact, TaxonomyNamespaceMap, TaxonomyUnit } from '@/lib/server/taxonomy/types';
|
|
|
|
function decodeXmlEntities(value: string) {
|
|
return value
|
|
.replace(/&/gi, '&')
|
|
.replace(/</gi, '<')
|
|
.replace(/>/gi, '>')
|
|
.replace(/"/gi, '"')
|
|
.replace(/'/gi, "'")
|
|
.replace(/ | /gi, ' ')
|
|
.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
|
|
const parsed = Number.parseInt(hex, 16);
|
|
if (!Number.isFinite(parsed)) {
|
|
return ' ';
|
|
}
|
|
|
|
try {
|
|
return String.fromCodePoint(parsed);
|
|
} catch {
|
|
return ' ';
|
|
}
|
|
})
|
|
.replace(/&#([0-9]+);/g, (_match, numeric) => {
|
|
const parsed = Number.parseInt(numeric, 10);
|
|
if (!Number.isFinite(parsed)) {
|
|
return ' ';
|
|
}
|
|
|
|
try {
|
|
return String.fromCodePoint(parsed);
|
|
} catch {
|
|
return ' ';
|
|
}
|
|
});
|
|
}
|
|
|
|
function parseNumber(value: string) {
|
|
const trimmed = value.trim();
|
|
if (!trimmed) {
|
|
return null;
|
|
}
|
|
|
|
if (/^--+$/.test(trimmed)) {
|
|
return null;
|
|
}
|
|
|
|
const negative = trimmed.startsWith('(') && trimmed.endsWith(')');
|
|
const normalized = trimmed
|
|
.replace(/<[^>]+>/g, ' ')
|
|
.replace(/[,$\s]/g, '')
|
|
.replace(/[()]/g, '')
|
|
.replace(/\u2212/g, '-');
|
|
|
|
if (!normalized) {
|
|
return null;
|
|
}
|
|
|
|
const parsed = Number.parseFloat(normalized);
|
|
if (!Number.isFinite(parsed)) {
|
|
return null;
|
|
}
|
|
|
|
return negative ? -Math.abs(parsed) : parsed;
|
|
}
|
|
|
|
function parseNamespaceMapFromDocument(raw: string): TaxonomyNamespaceMap {
|
|
const map: TaxonomyNamespaceMap = {};
|
|
const rootStart = raw.match(/<[^>]*xbrl[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
|
|
|
|
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
|
|
const prefix = (match[1] ?? '').trim();
|
|
const uri = (match[2] ?? '').trim();
|
|
|
|
if (!prefix || !uri) {
|
|
continue;
|
|
}
|
|
|
|
map[prefix] = uri;
|
|
}
|
|
|
|
return map;
|
|
}
|
|
|
|
function parseContexts(raw: string): Record<string, TaxonomyContext> {
|
|
const contexts: Record<string, TaxonomyContext> = {};
|
|
const contextPattern = /<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?context>/gi;
|
|
|
|
for (const match of raw.matchAll(contextPattern)) {
|
|
const contextId = (match[1] ?? '').trim();
|
|
const block = match[2] ?? '';
|
|
if (!contextId) {
|
|
continue;
|
|
}
|
|
|
|
const periodStart = block.match(/<(?:[a-z0-9_\-]+:)?startDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?startDate>/i)?.[1]?.trim() ?? null;
|
|
const periodEnd = block.match(/<(?:[a-z0-9_\-]+:)?endDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?endDate>/i)?.[1]?.trim() ?? null;
|
|
const periodInstant = block.match(/<(?:[a-z0-9_\-]+:)?instant>([^<]+)<\/(?:[a-z0-9_\-]+:)?instant>/i)?.[1]?.trim() ?? null;
|
|
|
|
const dimensions: Array<{ axis: string; member: string }> = [];
|
|
const dimPattern = /<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9_\-]+:)?explicitMember>/gi;
|
|
for (const dimMatch of block.matchAll(dimPattern)) {
|
|
const axis = decodeXmlEntities((dimMatch[1] ?? '').trim());
|
|
const member = decodeXmlEntities((dimMatch[2] ?? '').trim());
|
|
if (!axis || !member) {
|
|
continue;
|
|
}
|
|
|
|
dimensions.push({ axis, member });
|
|
}
|
|
|
|
contexts[contextId] = {
|
|
id: contextId,
|
|
periodStart,
|
|
periodEnd,
|
|
periodInstant,
|
|
dimensions
|
|
};
|
|
}
|
|
|
|
return contexts;
|
|
}
|
|
|
|
function parseUnits(raw: string): Record<string, TaxonomyUnit> {
|
|
const units: Record<string, TaxonomyUnit> = {};
|
|
const unitPattern = /<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?unit>/gi;
|
|
|
|
for (const match of raw.matchAll(unitPattern)) {
|
|
const id = (match[1] ?? '').trim();
|
|
const block = match[2] ?? '';
|
|
if (!id) {
|
|
continue;
|
|
}
|
|
|
|
const measures = [...block.matchAll(/<(?:[a-z0-9_\-]+:)?measure>([^<]+)<\/(?:[a-z0-9_\-]+:)?measure>/gi)]
|
|
.map((entry) => decodeXmlEntities((entry[1] ?? '').trim()))
|
|
.filter(Boolean);
|
|
|
|
let measure: string | null = null;
|
|
if (measures.length === 1) {
|
|
measure = measures[0] ?? null;
|
|
} else if (measures.length > 1) {
|
|
measure = measures.join('/');
|
|
}
|
|
|
|
units[id] = {
|
|
id,
|
|
measure
|
|
};
|
|
}
|
|
|
|
return units;
|
|
}
|
|
|
|
function classifyStatementKind(localName: string): FinancialStatementKind | null {
|
|
const normalized = localName.toLowerCase();
|
|
|
|
if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) {
|
|
return 'cash_flow';
|
|
}
|
|
|
|
if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) {
|
|
return 'equity';
|
|
}
|
|
|
|
if (/comprehensiveincome/.test(normalized)) {
|
|
return 'comprehensive_income';
|
|
}
|
|
|
|
if (/asset|liabilit|debt/.test(normalized)) {
|
|
return 'balance';
|
|
}
|
|
|
|
if (/revenue|income|profit|expense|costof/.test(normalized)) {
|
|
return 'income';
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function isXbrlInfrastructurePrefix(prefix: string) {
|
|
const normalized = prefix.toLowerCase();
|
|
return normalized === 'xbrli'
|
|
|| normalized === 'xlink'
|
|
|| normalized === 'link'
|
|
|| normalized === 'xbrldi'
|
|
|| normalized === 'xbrldt';
|
|
}
|
|
|
|
function localNameToKey(namespaceUri: string, localName: string) {
|
|
return `${namespaceUri}#${localName}`;
|
|
}
|
|
|
|
export function parseXbrlInstance(
|
|
raw: string,
|
|
sourceFile: string | null
|
|
): {
|
|
namespaces: TaxonomyNamespaceMap;
|
|
contexts: Record<string, TaxonomyContext>;
|
|
units: Record<string, TaxonomyUnit>;
|
|
facts: TaxonomyFact[];
|
|
} {
|
|
const namespaces = parseNamespaceMapFromDocument(raw);
|
|
const contexts = parseContexts(raw);
|
|
const units = parseUnits(raw);
|
|
const facts: TaxonomyFact[] = [];
|
|
|
|
const factPattern = /<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>([\s\S]*?)<\/\1:\2>/g;
|
|
|
|
for (const match of raw.matchAll(factPattern)) {
|
|
const prefix = (match[1] ?? '').trim();
|
|
const localName = (match[2] ?? '').trim();
|
|
const attrs = match[3] ?? '';
|
|
const body = decodeXmlEntities((match[4] ?? '').trim());
|
|
|
|
if (!prefix || !localName || isXbrlInfrastructurePrefix(prefix)) {
|
|
continue;
|
|
}
|
|
|
|
const contextId = attrs.match(/\bcontextRef=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
if (!contextId) {
|
|
continue;
|
|
}
|
|
|
|
const value = parseNumber(body);
|
|
if (value === null) {
|
|
continue;
|
|
}
|
|
|
|
const unitRef = attrs.match(/\bunitRef=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
|
const decimals = attrs.match(/\bdecimals=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
|
|
|
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
|
|
const context = contexts[contextId];
|
|
|
|
facts.push({
|
|
conceptKey: localNameToKey(namespaceUri, localName),
|
|
qname: `${prefix}:${localName}`,
|
|
namespaceUri,
|
|
localName,
|
|
contextId,
|
|
unit: unitRef && units[unitRef]?.measure ? units[unitRef]?.measure ?? unitRef : unitRef,
|
|
decimals,
|
|
value,
|
|
periodStart: context?.periodStart ?? null,
|
|
periodEnd: context?.periodEnd ?? null,
|
|
periodInstant: context?.periodInstant ?? null,
|
|
dimensions: context?.dimensions ?? [],
|
|
isDimensionless: (context?.dimensions.length ?? 0) === 0,
|
|
sourceFile,
|
|
});
|
|
}
|
|
|
|
return {
|
|
namespaces,
|
|
contexts,
|
|
units,
|
|
facts
|
|
};
|
|
}
|
|
|
|
export function conceptStatementFallback(localName: string) {
|
|
return classifyStatementKind(localName);
|
|
}
|