311 lines
8.9 KiB
TypeScript
311 lines
8.9 KiB
TypeScript
import type { FinancialStatementKind } from '@/lib/types';
|
|
import type { TaxonomyNamespaceMap, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
|
|
|
function decodeXmlEntities(value: string) {
|
|
return value
|
|
.replace(/&/gi, '&')
|
|
.replace(/</gi, '<')
|
|
.replace(/>/gi, '>')
|
|
.replace(/"/gi, '"')
|
|
.replace(/'/gi, "'")
|
|
.replace(/ | /gi, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function parseNamespaceMap(raw: string): TaxonomyNamespaceMap {
|
|
const map: TaxonomyNamespaceMap = {};
|
|
const rootStart = raw.match(/<[^>]*linkbase[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
|
|
|
|
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
|
|
const prefix = (match[1] ?? '').trim();
|
|
const uri = (match[2] ?? '').trim();
|
|
if (!prefix || !uri) {
|
|
continue;
|
|
}
|
|
|
|
map[prefix] = uri;
|
|
}
|
|
|
|
return map;
|
|
}
|
|
|
|
function qnameFromHref(href: string) {
|
|
const fragment = href.includes('#') ? href.slice(href.indexOf('#') + 1) : href;
|
|
if (!fragment) {
|
|
return null;
|
|
}
|
|
|
|
const cleaned = fragment.trim().replace(/^loc_+/i, '');
|
|
if (!cleaned) {
|
|
return null;
|
|
}
|
|
|
|
if (cleaned.includes(':')) {
|
|
return cleaned;
|
|
}
|
|
|
|
if (cleaned.includes('_')) {
|
|
const idx = cleaned.indexOf('_');
|
|
return `${cleaned.slice(0, idx)}:${cleaned.slice(idx + 1)}`;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function conceptFromQName(qname: string, namespaces: TaxonomyNamespaceMap) {
|
|
const [prefix, ...rest] = qname.split(':');
|
|
const localName = rest.join(':');
|
|
if (!prefix || !localName) {
|
|
return null;
|
|
}
|
|
|
|
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
|
|
|
|
return {
|
|
qname,
|
|
namespaceUri,
|
|
localName,
|
|
conceptKey: `${namespaceUri}#${localName}`
|
|
};
|
|
}
|
|
|
|
function labelPriority(role: string | null) {
|
|
const normalized = (role ?? '').toLowerCase();
|
|
if (!normalized) {
|
|
return 0;
|
|
}
|
|
|
|
if (normalized.endsWith('/label')) {
|
|
return 4;
|
|
}
|
|
|
|
if (normalized.endsWith('/terselabel')) {
|
|
return 3;
|
|
}
|
|
|
|
if (normalized.endsWith('/verboselabel')) {
|
|
return 2;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
export function classifyStatementRole(roleUri: string): FinancialStatementKind | null {
|
|
const normalized = roleUri.toLowerCase();
|
|
|
|
if (/cash\s*flow|statementsof?cashflows|netcash/.test(normalized)) {
|
|
return 'cash_flow';
|
|
}
|
|
|
|
if (/shareholders?|stockholders?|equity|retainedearnings/.test(normalized)) {
|
|
return 'equity';
|
|
}
|
|
|
|
if (/comprehensive\s*income/.test(normalized)) {
|
|
return 'comprehensive_income';
|
|
}
|
|
|
|
if (/balance\s*sheet|financial\s*position|assets?andliabilities/.test(normalized)) {
|
|
return 'balance';
|
|
}
|
|
|
|
if (/operations|income\s*statement|statementsofincome|profit/.test(normalized)) {
|
|
return 'income';
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
export function parseLabelLinkbase(raw: string): Map<string, string> {
|
|
const namespaces = parseNamespaceMap(raw);
|
|
const preferredLabelByConcept = new Map<string, { text: string; priority: number }>();
|
|
|
|
const linkPattern = /<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?labelLink>/gi;
|
|
for (const linkMatch of raw.matchAll(linkPattern)) {
|
|
const block = linkMatch[1] ?? '';
|
|
const locByLabel = new Map<string, string>();
|
|
const resourceByLabel = new Map<string, { text: string; role: string | null }>();
|
|
|
|
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
|
const attrs = locMatch[1] ?? '';
|
|
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
if (!label || !href) {
|
|
continue;
|
|
}
|
|
|
|
const qname = qnameFromHref(href);
|
|
if (!qname) {
|
|
continue;
|
|
}
|
|
|
|
const concept = conceptFromQName(qname, namespaces);
|
|
if (!concept) {
|
|
continue;
|
|
}
|
|
|
|
locByLabel.set(label, concept.conceptKey);
|
|
}
|
|
|
|
for (const resourceMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?label\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?label>/gi)) {
|
|
const attrs = resourceMatch[1] ?? '';
|
|
const body = decodeXmlEntities(resourceMatch[2] ?? '').replace(/\s+/g, ' ').trim();
|
|
if (!body) {
|
|
continue;
|
|
}
|
|
|
|
const resourceLabel = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
const role = attrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
|
if (!resourceLabel) {
|
|
continue;
|
|
}
|
|
|
|
resourceByLabel.set(resourceLabel, {
|
|
text: body,
|
|
role
|
|
});
|
|
}
|
|
|
|
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)\/?>/gi)) {
|
|
const attrs = arcMatch[1] ?? '';
|
|
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
if (!from || !to) {
|
|
continue;
|
|
}
|
|
|
|
const conceptKey = locByLabel.get(from);
|
|
const resource = resourceByLabel.get(to);
|
|
if (!conceptKey || !resource) {
|
|
continue;
|
|
}
|
|
|
|
const priority = labelPriority(resource.role);
|
|
const current = preferredLabelByConcept.get(conceptKey);
|
|
if (!current || priority > current.priority) {
|
|
preferredLabelByConcept.set(conceptKey, {
|
|
text: resource.text,
|
|
priority
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return new Map(
|
|
[...preferredLabelByConcept.entries()].map(([conceptKey, value]) => [conceptKey, value.text])
|
|
);
|
|
}
|
|
|
|
export function parsePresentationLinkbase(raw: string): TaxonomyPresentationConcept[] {
|
|
const namespaces = parseNamespaceMap(raw);
|
|
const rows: TaxonomyPresentationConcept[] = [];
|
|
|
|
const linkPattern = /<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?presentationLink>/gi;
|
|
for (const linkMatch of raw.matchAll(linkPattern)) {
|
|
const linkAttrs = linkMatch[1] ?? '';
|
|
const block = linkMatch[2] ?? '';
|
|
const roleUri = linkAttrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
if (!roleUri) {
|
|
continue;
|
|
}
|
|
|
|
const locByLabel = new Map<string, { conceptKey: string; qname: string; isAbstract: boolean }>();
|
|
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
|
const attrs = locMatch[1] ?? '';
|
|
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
if (!label || !href) {
|
|
continue;
|
|
}
|
|
|
|
const qname = qnameFromHref(href);
|
|
if (!qname) {
|
|
continue;
|
|
}
|
|
|
|
const concept = conceptFromQName(qname, namespaces);
|
|
if (!concept) {
|
|
continue;
|
|
}
|
|
|
|
locByLabel.set(label, {
|
|
conceptKey: concept.conceptKey,
|
|
qname: concept.qname,
|
|
isAbstract: /abstract/i.test(concept.localName)
|
|
});
|
|
}
|
|
|
|
const childrenByLabel = new Map<string, Array<{ label: string; order: number }>>();
|
|
const incoming = new Set<string>();
|
|
const allReferenced = new Set<string>();
|
|
|
|
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)\/?>/gi)) {
|
|
const attrs = arcMatch[1] ?? '';
|
|
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
const orderRaw = attrs.match(/\border=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
|
const order = Number.parseFloat(orderRaw);
|
|
|
|
if (!from || !to || !locByLabel.has(from) || !locByLabel.has(to)) {
|
|
continue;
|
|
}
|
|
|
|
const group = childrenByLabel.get(from) ?? [];
|
|
group.push({ label: to, order: Number.isFinite(order) ? order : group.length + 1 });
|
|
childrenByLabel.set(from, group);
|
|
|
|
incoming.add(to);
|
|
allReferenced.add(from);
|
|
allReferenced.add(to);
|
|
}
|
|
|
|
const roots = [...allReferenced].filter((label) => !incoming.has(label));
|
|
const visited = new Set<string>();
|
|
|
|
function dfs(label: string, depth: number, parentLabel: string | null, baseOrder: number) {
|
|
const node = locByLabel.get(label);
|
|
if (!node) {
|
|
return;
|
|
}
|
|
|
|
const pathKey = `${parentLabel ?? 'root'}::${label}::${depth}`;
|
|
if (visited.has(pathKey)) {
|
|
return;
|
|
}
|
|
visited.add(pathKey);
|
|
|
|
const parentConceptKey = parentLabel ? (locByLabel.get(parentLabel)?.conceptKey ?? null) : null;
|
|
rows.push({
|
|
conceptKey: node.conceptKey,
|
|
qname: node.qname,
|
|
roleUri,
|
|
order: baseOrder,
|
|
depth,
|
|
parentConceptKey,
|
|
isAbstract: node.isAbstract
|
|
});
|
|
|
|
const children = [...(childrenByLabel.get(label) ?? [])].sort((left, right) => left.order - right.order);
|
|
for (let i = 0; i < children.length; i += 1) {
|
|
const child = children[i];
|
|
if (!child) {
|
|
continue;
|
|
}
|
|
|
|
dfs(child.label, depth + 1, label, baseOrder + (i + 1) / 1000);
|
|
}
|
|
}
|
|
|
|
for (let i = 0; i < roots.length; i += 1) {
|
|
const root = roots[i];
|
|
if (!root) {
|
|
continue;
|
|
}
|
|
|
|
dfs(root, 0, null, i + 1);
|
|
}
|
|
}
|
|
|
|
return rows;
|
|
}
|