Run playwright UI tests
This commit is contained in:
310
lib/server/taxonomy/linkbase-parser.ts
Normal file
310
lib/server/taxonomy/linkbase-parser.ts
Normal file
@@ -0,0 +1,310 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import type { TaxonomyNamespaceMap, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
||||
|
||||
function decodeXmlEntities(value: string) {
|
||||
return value
|
||||
.replace(/&/gi, '&')
|
||||
.replace(/</gi, '<')
|
||||
.replace(/>/gi, '>')
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/gi, "'")
|
||||
.replace(/ | /gi, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function parseNamespaceMap(raw: string): TaxonomyNamespaceMap {
|
||||
const map: TaxonomyNamespaceMap = {};
|
||||
const rootStart = raw.match(/<[^>]*linkbase[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
|
||||
|
||||
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
|
||||
const prefix = (match[1] ?? '').trim();
|
||||
const uri = (match[2] ?? '').trim();
|
||||
if (!prefix || !uri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
map[prefix] = uri;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
function qnameFromHref(href: string) {
|
||||
const fragment = href.includes('#') ? href.slice(href.indexOf('#') + 1) : href;
|
||||
if (!fragment) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cleaned = fragment.trim().replace(/^loc_+/i, '');
|
||||
if (!cleaned) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (cleaned.includes(':')) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
if (cleaned.includes('_')) {
|
||||
const idx = cleaned.indexOf('_');
|
||||
return `${cleaned.slice(0, idx)}:${cleaned.slice(idx + 1)}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function conceptFromQName(qname: string, namespaces: TaxonomyNamespaceMap) {
|
||||
const [prefix, ...rest] = qname.split(':');
|
||||
const localName = rest.join(':');
|
||||
if (!prefix || !localName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
|
||||
|
||||
return {
|
||||
qname,
|
||||
namespaceUri,
|
||||
localName,
|
||||
conceptKey: `${namespaceUri}#${localName}`
|
||||
};
|
||||
}
|
||||
|
||||
function labelPriority(role: string | null) {
|
||||
const normalized = (role ?? '').toLowerCase();
|
||||
if (!normalized) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/label')) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/terselabel')) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/verboselabel')) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
export function classifyStatementRole(roleUri: string): FinancialStatementKind | null {
|
||||
const normalized = roleUri.toLowerCase();
|
||||
|
||||
if (/cash\s*flow|statementsof?cashflows|netcash/.test(normalized)) {
|
||||
return 'cash_flow';
|
||||
}
|
||||
|
||||
if (/shareholders?|stockholders?|equity|retainedearnings/.test(normalized)) {
|
||||
return 'equity';
|
||||
}
|
||||
|
||||
if (/comprehensive\s*income/.test(normalized)) {
|
||||
return 'comprehensive_income';
|
||||
}
|
||||
|
||||
if (/balance\s*sheet|financial\s*position|assets?andliabilities/.test(normalized)) {
|
||||
return 'balance';
|
||||
}
|
||||
|
||||
if (/operations|income\s*statement|statementsofincome|profit/.test(normalized)) {
|
||||
return 'income';
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parseLabelLinkbase(raw: string): Map<string, string> {
|
||||
const namespaces = parseNamespaceMap(raw);
|
||||
const preferredLabelByConcept = new Map<string, { text: string; priority: number }>();
|
||||
|
||||
const linkPattern = /<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?labelLink>/gi;
|
||||
for (const linkMatch of raw.matchAll(linkPattern)) {
|
||||
const block = linkMatch[1] ?? '';
|
||||
const locByLabel = new Map<string, string>();
|
||||
const resourceByLabel = new Map<string, { text: string; role: string | null }>();
|
||||
|
||||
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = locMatch[1] ?? '';
|
||||
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!label || !href) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const qname = qnameFromHref(href);
|
||||
if (!qname) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const concept = conceptFromQName(qname, namespaces);
|
||||
if (!concept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
locByLabel.set(label, concept.conceptKey);
|
||||
}
|
||||
|
||||
for (const resourceMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?label\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?label>/gi)) {
|
||||
const attrs = resourceMatch[1] ?? '';
|
||||
const body = decodeXmlEntities(resourceMatch[2] ?? '').replace(/\s+/g, ' ').trim();
|
||||
if (!body) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const resourceLabel = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const role = attrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
||||
if (!resourceLabel) {
|
||||
continue;
|
||||
}
|
||||
|
||||
resourceByLabel.set(resourceLabel, {
|
||||
text: body,
|
||||
role
|
||||
});
|
||||
}
|
||||
|
||||
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = arcMatch[1] ?? '';
|
||||
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!from || !to) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const conceptKey = locByLabel.get(from);
|
||||
const resource = resourceByLabel.get(to);
|
||||
if (!conceptKey || !resource) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const priority = labelPriority(resource.role);
|
||||
const current = preferredLabelByConcept.get(conceptKey);
|
||||
if (!current || priority > current.priority) {
|
||||
preferredLabelByConcept.set(conceptKey, {
|
||||
text: resource.text,
|
||||
priority
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Map(
|
||||
[...preferredLabelByConcept.entries()].map(([conceptKey, value]) => [conceptKey, value.text])
|
||||
);
|
||||
}
|
||||
|
||||
export function parsePresentationLinkbase(raw: string): TaxonomyPresentationConcept[] {
|
||||
const namespaces = parseNamespaceMap(raw);
|
||||
const rows: TaxonomyPresentationConcept[] = [];
|
||||
|
||||
const linkPattern = /<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?presentationLink>/gi;
|
||||
for (const linkMatch of raw.matchAll(linkPattern)) {
|
||||
const linkAttrs = linkMatch[1] ?? '';
|
||||
const block = linkMatch[2] ?? '';
|
||||
const roleUri = linkAttrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!roleUri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const locByLabel = new Map<string, { conceptKey: string; qname: string; isAbstract: boolean }>();
|
||||
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = locMatch[1] ?? '';
|
||||
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!label || !href) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const qname = qnameFromHref(href);
|
||||
if (!qname) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const concept = conceptFromQName(qname, namespaces);
|
||||
if (!concept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
locByLabel.set(label, {
|
||||
conceptKey: concept.conceptKey,
|
||||
qname: concept.qname,
|
||||
isAbstract: /abstract/i.test(concept.localName)
|
||||
});
|
||||
}
|
||||
|
||||
const childrenByLabel = new Map<string, Array<{ label: string; order: number }>>();
|
||||
const incoming = new Set<string>();
|
||||
const allReferenced = new Set<string>();
|
||||
|
||||
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = arcMatch[1] ?? '';
|
||||
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const orderRaw = attrs.match(/\border=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const order = Number.parseFloat(orderRaw);
|
||||
|
||||
if (!from || !to || !locByLabel.has(from) || !locByLabel.has(to)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const group = childrenByLabel.get(from) ?? [];
|
||||
group.push({ label: to, order: Number.isFinite(order) ? order : group.length + 1 });
|
||||
childrenByLabel.set(from, group);
|
||||
|
||||
incoming.add(to);
|
||||
allReferenced.add(from);
|
||||
allReferenced.add(to);
|
||||
}
|
||||
|
||||
const roots = [...allReferenced].filter((label) => !incoming.has(label));
|
||||
const visited = new Set<string>();
|
||||
|
||||
function dfs(label: string, depth: number, parentLabel: string | null, baseOrder: number) {
|
||||
const node = locByLabel.get(label);
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pathKey = `${parentLabel ?? 'root'}::${label}::${depth}`;
|
||||
if (visited.has(pathKey)) {
|
||||
return;
|
||||
}
|
||||
visited.add(pathKey);
|
||||
|
||||
const parentConceptKey = parentLabel ? (locByLabel.get(parentLabel)?.conceptKey ?? null) : null;
|
||||
rows.push({
|
||||
conceptKey: node.conceptKey,
|
||||
qname: node.qname,
|
||||
roleUri,
|
||||
order: baseOrder,
|
||||
depth,
|
||||
parentConceptKey,
|
||||
isAbstract: node.isAbstract
|
||||
});
|
||||
|
||||
const children = [...(childrenByLabel.get(label) ?? [])].sort((left, right) => left.order - right.order);
|
||||
for (let i = 0; i < children.length; i += 1) {
|
||||
const child = children[i];
|
||||
if (!child) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dfs(child.label, depth + 1, label, baseOrder + (i + 1) / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < roots.length; i += 1) {
|
||||
const root = roots[i];
|
||||
if (!root) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dfs(root, 0, null, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
Reference in New Issue
Block a user