import type { FinancialStatementKind } from '@/lib/types'; import type { TaxonomyContext, TaxonomyFact, TaxonomyNamespaceMap, TaxonomyUnit } from '@/lib/server/taxonomy/types'; function decodeXmlEntities(value: string) { return value .replace(/&/gi, '&') .replace(/</gi, '<') .replace(/>/gi, '>') .replace(/"/gi, '"') .replace(/'/gi, "'") .replace(/ | /gi, ' ') .replace(/&#x([0-9a-f]+);/gi, (_match, hex) => { const parsed = Number.parseInt(hex, 16); if (!Number.isFinite(parsed)) { return ' '; } try { return String.fromCodePoint(parsed); } catch { return ' '; } }) .replace(/&#([0-9]+);/g, (_match, numeric) => { const parsed = Number.parseInt(numeric, 10); if (!Number.isFinite(parsed)) { return ' '; } try { return String.fromCodePoint(parsed); } catch { return ' '; } }); } function parseNumber(value: string) { const trimmed = value.trim(); if (!trimmed) { return null; } if (/^--+$/.test(trimmed)) { return null; } const negative = trimmed.startsWith('(') && trimmed.endsWith(')'); const normalized = trimmed .replace(/<[^>]+>/g, ' ') .replace(/[,$\s]/g, '') .replace(/[()]/g, '') .replace(/\u2212/g, '-'); if (!normalized) { return null; } const parsed = Number.parseFloat(normalized); if (!Number.isFinite(parsed)) { return null; } return negative ? -Math.abs(parsed) : parsed; } function parseNamespaceMapFromDocument(raw: string): TaxonomyNamespaceMap { const map: TaxonomyNamespaceMap = {}; const rootStart = raw.match(/<[^>]*xbrl[^>]*>/i)?.[0] ?? raw.slice(0, 1200); for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) { const prefix = (match[1] ?? '').trim(); const uri = (match[2] ?? '').trim(); if (!prefix || !uri) { continue; } map[prefix] = uri; } return map; } function parseContexts(raw: string): Record { const contexts: Record = {}; const contextPattern = /<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?context>/gi; for (const match of raw.matchAll(contextPattern)) { const contextId = (match[1] ?? '').trim(); const block = match[2] ?? ''; if (!contextId) { continue; } const periodStart = block.match(/<(?:[a-z0-9_\-]+:)?startDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?startDate>/i)?.[1]?.trim() ?? null; const periodEnd = block.match(/<(?:[a-z0-9_\-]+:)?endDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?endDate>/i)?.[1]?.trim() ?? null; const periodInstant = block.match(/<(?:[a-z0-9_\-]+:)?instant>([^<]+)<\/(?:[a-z0-9_\-]+:)?instant>/i)?.[1]?.trim() ?? null; const dimensions: Array<{ axis: string; member: string }> = []; const dimPattern = /<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9_\-]+:)?explicitMember>/gi; for (const dimMatch of block.matchAll(dimPattern)) { const axis = decodeXmlEntities((dimMatch[1] ?? '').trim()); const member = decodeXmlEntities((dimMatch[2] ?? '').trim()); if (!axis || !member) { continue; } dimensions.push({ axis, member }); } contexts[contextId] = { id: contextId, periodStart, periodEnd, periodInstant, dimensions }; } return contexts; } function parseUnits(raw: string): Record { const units: Record = {}; const unitPattern = /<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?unit>/gi; for (const match of raw.matchAll(unitPattern)) { const id = (match[1] ?? '').trim(); const block = match[2] ?? ''; if (!id) { continue; } const measures = [...block.matchAll(/<(?:[a-z0-9_\-]+:)?measure>([^<]+)<\/(?:[a-z0-9_\-]+:)?measure>/gi)] .map((entry) => decodeXmlEntities((entry[1] ?? '').trim())) .filter(Boolean); let measure: string | null = null; if (measures.length === 1) { measure = measures[0] ?? null; } else if (measures.length > 1) { measure = measures.join('/'); } units[id] = { id, measure }; } return units; } function classifyStatementKind(localName: string): FinancialStatementKind | null { const normalized = localName.toLowerCase(); if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) { return 'cash_flow'; } if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) { return 'equity'; } if (/comprehensiveincome/.test(normalized)) { return 'comprehensive_income'; } if (/asset|liabilit|debt/.test(normalized)) { return 'balance'; } if (/revenue|income|profit|expense|costof/.test(normalized)) { return 'income'; } return null; } function isXbrlInfrastructurePrefix(prefix: string) { const normalized = prefix.toLowerCase(); return normalized === 'xbrli' || normalized === 'xlink' || normalized === 'link' || normalized === 'xbrldi' || normalized === 'xbrldt'; } function localNameToKey(namespaceUri: string, localName: string) { return `${namespaceUri}#${localName}`; } export function parseXbrlInstance( raw: string, sourceFile: string | null ): { namespaces: TaxonomyNamespaceMap; contexts: Record; units: Record; facts: TaxonomyFact[]; } { const namespaces = parseNamespaceMapFromDocument(raw); const contexts = parseContexts(raw); const units = parseUnits(raw); const facts: TaxonomyFact[] = []; const factPattern = /<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>([\s\S]*?)<\/\1:\2>/g; for (const match of raw.matchAll(factPattern)) { const prefix = (match[1] ?? '').trim(); const localName = (match[2] ?? '').trim(); const attrs = match[3] ?? ''; const body = decodeXmlEntities((match[4] ?? '').trim()); if (!prefix || !localName || isXbrlInfrastructurePrefix(prefix)) { continue; } const contextId = attrs.match(/\bcontextRef=["']([^"']+)["']/i)?.[1]?.trim() ?? ''; if (!contextId) { continue; } const value = parseNumber(body); if (value === null) { continue; } const unitRef = attrs.match(/\bunitRef=["']([^"']+)["']/i)?.[1]?.trim() ?? null; const decimals = attrs.match(/\bdecimals=["']([^"']+)["']/i)?.[1]?.trim() ?? null; const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`; const context = contexts[contextId]; facts.push({ conceptKey: localNameToKey(namespaceUri, localName), qname: `${prefix}:${localName}`, namespaceUri, localName, contextId, unit: unitRef && units[unitRef]?.measure ? units[unitRef]?.measure ?? unitRef : unitRef, decimals, value, periodStart: context?.periodStart ?? null, periodEnd: context?.periodEnd ?? null, periodInstant: context?.periodInstant ?? null, dimensions: context?.dimensions ?? [], isDimensionless: (context?.dimensions.length ?? 0) === 0, sourceFile, }); } return { namespaces, contexts, units, facts }; } export function conceptStatementFallback(localName: string) { return classifyStatementKind(localName); }