Run playwright UI tests
This commit is contained in:
73
lib/server/taxonomy/asset-discovery.test.ts
Normal file
73
lib/server/taxonomy/asset-discovery.test.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
|
||||
|
||||
describe('taxonomy asset discovery', () => {
|
||||
it('classifies assets and selects ranked instance/pdf candidates', async () => {
|
||||
const fetchImpl = (async () => {
|
||||
return new Response(JSON.stringify({
|
||||
directory: {
|
||||
item: [
|
||||
{ name: 'abc_htm.xml', size: '900000' },
|
||||
{ name: 'abc_pre.xml', size: '250000' },
|
||||
{ name: 'abc_lab.xml', size: '120000' },
|
||||
{ name: '10k_financial_statements.pdf', size: '400000' },
|
||||
{ name: 'annual_report.pdf', size: '300000' },
|
||||
{ name: 'quarter_statement.pdf', size: '200000' },
|
||||
{ name: 'exhibit99.pdf', size: '500000' }
|
||||
]
|
||||
}
|
||||
}), {
|
||||
status: 200,
|
||||
headers: {
|
||||
'content-type': 'application/json'
|
||||
}
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const result = await discoverFilingAssets({
|
||||
cik: '0000123456',
|
||||
accessionNumber: '0000123456-26-000001',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.htm',
|
||||
primaryDocument: 'abc.htm',
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
expect(result.directoryUrl).toBe('https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/');
|
||||
|
||||
const selectedInstance = result.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected);
|
||||
expect(selectedInstance?.name).toBe('abc_htm.xml');
|
||||
|
||||
const selectedPdfs = result.assets
|
||||
.filter((asset) => asset.asset_type === 'pdf' && asset.is_selected)
|
||||
.map((asset) => asset.name);
|
||||
expect(selectedPdfs.length).toBe(3);
|
||||
expect(selectedPdfs).toContain('10k_financial_statements.pdf');
|
||||
expect(selectedPdfs).toContain('annual_report.pdf');
|
||||
expect(selectedPdfs).toContain('quarter_statement.pdf');
|
||||
expect(selectedPdfs).not.toContain('exhibit99.pdf');
|
||||
});
|
||||
|
||||
it('falls back to filing url when SEC directory assets are unavailable', async () => {
|
||||
const fetchImpl = (async () => {
|
||||
return new Response('not found', { status: 404 });
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const result = await discoverFilingAssets({
|
||||
cik: '0000123456',
|
||||
accessionNumber: '0000123456-26-000001',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml',
|
||||
primaryDocument: 'abc.xml',
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
expect(result.assets.length).toBe(1);
|
||||
expect(result.assets[0]).toEqual({
|
||||
asset_type: 'instance',
|
||||
name: 'abc.xml',
|
||||
url: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml',
|
||||
size_bytes: null,
|
||||
score: 6,
|
||||
is_selected: true
|
||||
});
|
||||
});
|
||||
});
|
||||
283
lib/server/taxonomy/asset-discovery.ts
Normal file
283
lib/server/taxonomy/asset-discovery.ts
Normal file
@@ -0,0 +1,283 @@
|
||||
import type { TaxonomyAsset } from '@/lib/server/taxonomy/types';
|
||||
|
||||
type FilingAssetDiscoveryInput = {
|
||||
cik: string;
|
||||
accessionNumber: string;
|
||||
filingUrl: string | null;
|
||||
primaryDocument: string | null;
|
||||
fetchImpl?: typeof fetch;
|
||||
};
|
||||
|
||||
type FilingDirectoryJson = {
|
||||
directory?: {
|
||||
item?: Array<{
|
||||
name?: string;
|
||||
type?: string;
|
||||
size?: string | number;
|
||||
}>;
|
||||
};
|
||||
};
|
||||
|
||||
function envUserAgent() {
|
||||
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
||||
}
|
||||
|
||||
function compactAccessionNumber(value: string) {
|
||||
return value.replace(/-/g, '');
|
||||
}
|
||||
|
||||
function normalizeCikForPath(value: string) {
|
||||
const digits = value.replace(/\D/g, '');
|
||||
if (!digits) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const numeric = Number(digits);
|
||||
if (!Number.isFinite(numeric)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return String(numeric);
|
||||
}
|
||||
|
||||
function resolveFilingDirectoryUrl(input: {
|
||||
filingUrl: string | null;
|
||||
cik: string;
|
||||
accessionNumber: string;
|
||||
}) {
|
||||
const direct = input.filingUrl?.trim();
|
||||
if (direct) {
|
||||
const lastSlash = direct.lastIndexOf('/');
|
||||
if (lastSlash > 'https://'.length) {
|
||||
return direct.slice(0, lastSlash + 1);
|
||||
}
|
||||
}
|
||||
|
||||
const cikPath = normalizeCikForPath(input.cik);
|
||||
const accessionPath = compactAccessionNumber(input.accessionNumber);
|
||||
if (!cikPath || !accessionPath) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`;
|
||||
}
|
||||
|
||||
function classifyAssetType(name: string): TaxonomyAsset['asset_type'] {
|
||||
const lower = name.toLowerCase();
|
||||
|
||||
if (lower.endsWith('.pdf')) {
|
||||
return 'pdf';
|
||||
}
|
||||
|
||||
if (lower.endsWith('.xsd')) {
|
||||
return 'schema';
|
||||
}
|
||||
|
||||
if (lower.endsWith('.xml')) {
|
||||
if (/(_|-)pre\.xml$/.test(lower) || /presentation/.test(lower)) {
|
||||
return 'presentation';
|
||||
}
|
||||
|
||||
if (/(_|-)lab\.xml$/.test(lower) || /label/.test(lower)) {
|
||||
return 'label';
|
||||
}
|
||||
|
||||
if (/(_|-)cal\.xml$/.test(lower) || /calculation/.test(lower)) {
|
||||
return 'calculation';
|
||||
}
|
||||
|
||||
if (/(_|-)def\.xml$/.test(lower) || /definition/.test(lower)) {
|
||||
return 'definition';
|
||||
}
|
||||
|
||||
return 'instance';
|
||||
}
|
||||
|
||||
return 'other';
|
||||
}
|
||||
|
||||
function scorePdf(name: string, sizeBytes: number | null) {
|
||||
const lower = name.toLowerCase();
|
||||
let score = 0;
|
||||
|
||||
if (/financial|statement|annual|quarter|10k|10q/.test(lower)) {
|
||||
score += 8;
|
||||
}
|
||||
|
||||
if (/exhibit|ex-\d+/.test(lower)) {
|
||||
score -= 2;
|
||||
}
|
||||
|
||||
if (sizeBytes && sizeBytes > 100_000) {
|
||||
score += 1;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
function scoreInstance(name: string, primaryDocument: string | null) {
|
||||
const lower = name.toLowerCase();
|
||||
let score = 1;
|
||||
|
||||
if (/_htm\.xml$/.test(lower)) {
|
||||
score += 4;
|
||||
}
|
||||
|
||||
if (/_ins\.xml$/.test(lower)) {
|
||||
score += 4;
|
||||
}
|
||||
|
||||
const basePrimary = (primaryDocument ?? '').replace(/\.[a-z0-9]+$/i, '').toLowerCase();
|
||||
if (basePrimary && lower.includes(basePrimary)) {
|
||||
score += 5;
|
||||
}
|
||||
|
||||
if (/cal|def|lab|pre/.test(lower)) {
|
||||
score -= 3;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
function parseSize(raw: unknown) {
|
||||
if (typeof raw === 'number') {
|
||||
return Number.isFinite(raw) ? raw : null;
|
||||
}
|
||||
|
||||
if (typeof raw === 'string') {
|
||||
const parsed = Number(raw);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchJson<T>(url: string, fetchImpl: typeof fetch): Promise<T> {
|
||||
const response = await fetchImpl(url, {
|
||||
headers: {
|
||||
'User-Agent': envUserAgent(),
|
||||
Accept: 'application/json'
|
||||
},
|
||||
cache: 'no-store'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`SEC request failed (${response.status})`);
|
||||
}
|
||||
|
||||
return await response.json() as T;
|
||||
}
|
||||
|
||||
export async function discoverFilingAssets(input: FilingAssetDiscoveryInput): Promise<{
|
||||
directoryUrl: string | null;
|
||||
assets: TaxonomyAsset[];
|
||||
}> {
|
||||
const fetchImpl = input.fetchImpl ?? fetch;
|
||||
const directoryUrl = resolveFilingDirectoryUrl({
|
||||
filingUrl: input.filingUrl,
|
||||
cik: input.cik,
|
||||
accessionNumber: input.accessionNumber
|
||||
});
|
||||
|
||||
if (!directoryUrl) {
|
||||
return {
|
||||
directoryUrl: null,
|
||||
assets: []
|
||||
};
|
||||
}
|
||||
|
||||
let payload: FilingDirectoryJson | null = null;
|
||||
try {
|
||||
payload = await fetchJson<FilingDirectoryJson>(`${directoryUrl}index.json`, fetchImpl);
|
||||
} catch {
|
||||
payload = null;
|
||||
}
|
||||
|
||||
const discovered: TaxonomyAsset[] = [];
|
||||
for (const item of payload?.directory?.item ?? []) {
|
||||
const name = (item.name ?? '').trim();
|
||||
if (!name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const url = `${directoryUrl}${name.replace(/^\/+/, '')}`;
|
||||
const asset_type = classifyAssetType(name);
|
||||
const size_bytes = parseSize(item.size);
|
||||
|
||||
discovered.push({
|
||||
asset_type,
|
||||
name,
|
||||
url,
|
||||
size_bytes,
|
||||
score: null,
|
||||
is_selected: false
|
||||
});
|
||||
}
|
||||
|
||||
if (discovered.length === 0 && input.filingUrl) {
|
||||
const fallbackName = input.primaryDocument ?? input.filingUrl.split('/').pop() ?? 'primary_document';
|
||||
discovered.push({
|
||||
asset_type: fallbackName.toLowerCase().endsWith('.xml') ? 'instance' : 'other',
|
||||
name: fallbackName,
|
||||
url: input.filingUrl,
|
||||
size_bytes: null,
|
||||
score: null,
|
||||
is_selected: true
|
||||
});
|
||||
}
|
||||
|
||||
const instanceCandidates = discovered
|
||||
.filter((asset) => asset.asset_type === 'instance')
|
||||
.map((asset) => ({
|
||||
asset,
|
||||
score: scoreInstance(asset.name, input.primaryDocument)
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
const selectedInstanceUrl = instanceCandidates[0]?.asset.url ?? null;
|
||||
|
||||
const selectedPdfUrls = discovered
|
||||
.filter((asset) => asset.asset_type === 'pdf')
|
||||
.map((asset) => ({
|
||||
asset,
|
||||
score: scorePdf(asset.name, asset.size_bytes)
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, 3)
|
||||
.map((entry) => entry.asset.url);
|
||||
|
||||
const assets = discovered.map((asset) => {
|
||||
if (asset.asset_type === 'instance') {
|
||||
const score = scoreInstance(asset.name, input.primaryDocument);
|
||||
return {
|
||||
...asset,
|
||||
score,
|
||||
is_selected: asset.url === selectedInstanceUrl
|
||||
};
|
||||
}
|
||||
|
||||
if (asset.asset_type === 'pdf') {
|
||||
const score = scorePdf(asset.name, asset.size_bytes);
|
||||
return {
|
||||
...asset,
|
||||
score,
|
||||
is_selected: selectedPdfUrls.includes(asset.url)
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
...asset,
|
||||
score: null,
|
||||
is_selected: asset.asset_type === 'presentation'
|
||||
|| asset.asset_type === 'label'
|
||||
|| asset.asset_type === 'calculation'
|
||||
|| asset.asset_type === 'definition'
|
||||
|| asset.asset_type === 'schema'
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
directoryUrl,
|
||||
assets
|
||||
};
|
||||
}
|
||||
185
lib/server/taxonomy/engine.ts
Normal file
185
lib/server/taxonomy/engine.ts
Normal file
@@ -0,0 +1,185 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
|
||||
import { parseLabelLinkbase, parsePresentationLinkbase } from '@/lib/server/taxonomy/linkbase-parser';
|
||||
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
|
||||
import { materializeTaxonomyStatements } from '@/lib/server/taxonomy/materialize';
|
||||
import { validateMetricsWithPdfLlm } from '@/lib/server/taxonomy/pdf-validation';
|
||||
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
|
||||
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
|
||||
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
||||
return {
|
||||
income: factory(),
|
||||
balance: factory(),
|
||||
cash_flow: factory(),
|
||||
equity: factory(),
|
||||
comprehensive_income: factory()
|
||||
};
|
||||
}
|
||||
|
||||
function envUserAgent() {
|
||||
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
||||
}
|
||||
|
||||
async function fetchText(url: string, fetchImpl: typeof fetch) {
|
||||
const response = await fetchImpl(url, {
|
||||
headers: {
|
||||
'User-Agent': envUserAgent(),
|
||||
Accept: 'text/xml, text/plain, text/html;q=0.8, */*;q=0.5'
|
||||
},
|
||||
cache: 'no-store'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`SEC request failed (${response.status})`);
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
}
|
||||
|
||||
export async function hydrateFilingTaxonomySnapshot(
|
||||
input: TaxonomyHydrationInput,
|
||||
options?: {
|
||||
fetchImpl?: typeof fetch;
|
||||
}
|
||||
): Promise<TaxonomyHydrationResult> {
|
||||
const fetchImpl = options?.fetchImpl ?? fetch;
|
||||
|
||||
const discovered = await discoverFilingAssets({
|
||||
cik: input.cik,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingUrl: input.filingUrl,
|
||||
primaryDocument: input.primaryDocument,
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
const emptyResult: TaxonomyHydrationResult = {
|
||||
filing_id: input.filingId,
|
||||
ticker: input.ticker.trim().toUpperCase(),
|
||||
filing_date: input.filingDate,
|
||||
filing_type: input.filingType,
|
||||
parse_status: 'failed',
|
||||
parse_error: 'No XBRL instance found',
|
||||
source: 'legacy_html_fallback',
|
||||
periods: [],
|
||||
statement_rows: createStatementRecord(() => []),
|
||||
derived_metrics: null,
|
||||
validation_result: {
|
||||
status: 'not_run',
|
||||
checks: [],
|
||||
validatedAt: null
|
||||
},
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
assets: discovered.assets,
|
||||
concepts: [],
|
||||
facts: [],
|
||||
metric_validations: []
|
||||
};
|
||||
|
||||
const selectedInstance = discovered.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected)
|
||||
?? discovered.assets.find((asset) => asset.asset_type === 'instance')
|
||||
?? null;
|
||||
|
||||
if (!selectedInstance) {
|
||||
return emptyResult;
|
||||
}
|
||||
|
||||
let parseError: string | null = null;
|
||||
let source: TaxonomyHydrationResult['source'] = 'xbrl_instance';
|
||||
|
||||
let instanceText = '';
|
||||
try {
|
||||
instanceText = await fetchText(selectedInstance.url, fetchImpl);
|
||||
} catch (error) {
|
||||
parseError = error instanceof Error ? error.message : 'Unable to fetch instance file';
|
||||
return {
|
||||
...emptyResult,
|
||||
parse_error: parseError
|
||||
};
|
||||
}
|
||||
|
||||
const parsedInstance = parseXbrlInstance(instanceText, selectedInstance.name);
|
||||
|
||||
const labelByConcept = new Map<string, string>();
|
||||
const presentation: ReturnType<typeof parsePresentationLinkbase> = [];
|
||||
|
||||
for (const asset of discovered.assets) {
|
||||
if (!asset.is_selected) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asset.asset_type !== 'presentation' && asset.asset_type !== 'label') {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = await fetchText(asset.url, fetchImpl);
|
||||
if (asset.asset_type === 'presentation') {
|
||||
const parsed = parsePresentationLinkbase(content);
|
||||
if (parsed.length > 0) {
|
||||
source = 'xbrl_instance_with_linkbase';
|
||||
}
|
||||
|
||||
presentation.push(...parsed);
|
||||
} else if (asset.asset_type === 'label') {
|
||||
const parsed = parseLabelLinkbase(content);
|
||||
for (const [conceptKey, label] of parsed.entries()) {
|
||||
if (!labelByConcept.has(conceptKey)) {
|
||||
labelByConcept.set(conceptKey, label);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
parseError = parseError ?? (error instanceof Error ? error.message : 'Failed to parse taxonomy linkbase');
|
||||
}
|
||||
}
|
||||
|
||||
const materialized = materializeTaxonomyStatements({
|
||||
filingId: input.filingId,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
filingType: input.filingType,
|
||||
facts: parsedInstance.facts,
|
||||
presentation,
|
||||
labelByConcept
|
||||
});
|
||||
|
||||
const derivedMetrics = deriveTaxonomyMetrics(parsedInstance.facts);
|
||||
const llmValidation = await validateMetricsWithPdfLlm({
|
||||
metrics: derivedMetrics,
|
||||
assets: discovered.assets,
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
const hasRows = (Object.values(materialized.statement_rows).reduce((total, rows) => total + rows.length, 0)) > 0;
|
||||
const hasFacts = materialized.facts.length > 0;
|
||||
|
||||
const parseStatus: TaxonomyHydrationResult['parse_status'] = hasRows && hasFacts
|
||||
? 'ready'
|
||||
: hasFacts
|
||||
? 'partial'
|
||||
: 'failed';
|
||||
|
||||
return {
|
||||
filing_id: input.filingId,
|
||||
ticker: input.ticker.trim().toUpperCase(),
|
||||
filing_date: input.filingDate,
|
||||
filing_type: input.filingType,
|
||||
parse_status: parseStatus,
|
||||
parse_error: parseStatus === 'failed' ? (parseError ?? 'No XBRL facts extracted') : parseError,
|
||||
source,
|
||||
periods: materialized.periods,
|
||||
statement_rows: materialized.statement_rows,
|
||||
derived_metrics: derivedMetrics,
|
||||
validation_result: llmValidation.validation_result,
|
||||
facts_count: materialized.facts.length,
|
||||
concepts_count: materialized.concepts.length,
|
||||
dimensions_count: materialized.dimensionsCount,
|
||||
assets: discovered.assets,
|
||||
concepts: materialized.concepts,
|
||||
facts: materialized.facts,
|
||||
metric_validations: llmValidation.metric_validations
|
||||
};
|
||||
}
|
||||
63
lib/server/taxonomy/linkbase-parser.test.ts
Normal file
63
lib/server/taxonomy/linkbase-parser.test.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import {
|
||||
classifyStatementRole,
|
||||
parseLabelLinkbase,
|
||||
parsePresentationLinkbase
|
||||
} from '@/lib/server/taxonomy/linkbase-parser';
|
||||
|
||||
const SAMPLE_LABEL_LINKBASE = `
|
||||
<link:linkbase xmlns:link="http://www.xbrl.org/2003/linkbase"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:us-gaap="http://fasb.org/us-gaap/2024">
|
||||
<link:labelLink xlink:type="extended">
|
||||
<link:loc xlink:type="locator" xlink:label="loc_rev" xlink:href="test.xsd#us-gaap_Revenues" />
|
||||
<link:label xlink:type="resource" xlink:label="lab_terse" xlink:role="http://www.xbrl.org/2003/role/terseLabel">Rev.</link:label>
|
||||
<link:label xlink:type="resource" xlink:label="lab_label" xlink:role="http://www.xbrl.org/2003/role/label">Revenues</link:label>
|
||||
<link:labelArc xlink:type="arc" xlink:from="loc_rev" xlink:to="lab_terse" />
|
||||
<link:labelArc xlink:type="arc" xlink:from="loc_rev" xlink:to="lab_label" />
|
||||
</link:labelLink>
|
||||
</link:linkbase>
|
||||
`;
|
||||
|
||||
const SAMPLE_PRESENTATION_LINKBASE = `
|
||||
<link:linkbase xmlns:link="http://www.xbrl.org/2003/linkbase"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:us-gaap="http://fasb.org/us-gaap/2024">
|
||||
<link:presentationLink xlink:type="extended" xlink:role="http://www.xbrl.org/2003/role/StatementOfOperations">
|
||||
<link:loc xlink:type="locator" xlink:label="root" xlink:href="test.xsd#us-gaap_StatementLineItems" />
|
||||
<link:loc xlink:type="locator" xlink:label="rev" xlink:href="test.xsd#us-gaap_Revenues" />
|
||||
<link:loc xlink:type="locator" xlink:label="cogs" xlink:href="test.xsd#us-gaap_CostOfGoodsSold" />
|
||||
<link:presentationArc xlink:type="arc" xlink:from="root" xlink:to="rev" order="1" />
|
||||
<link:presentationArc xlink:type="arc" xlink:from="root" xlink:to="cogs" order="2" />
|
||||
</link:presentationLink>
|
||||
</link:linkbase>
|
||||
`;
|
||||
|
||||
describe('linkbase parser', () => {
|
||||
it('builds preferred labels from label linkbase', () => {
|
||||
const labels = parseLabelLinkbase(SAMPLE_LABEL_LINKBASE);
|
||||
expect(labels.get('http://fasb.org/us-gaap/2024#Revenues')).toBe('Revenues');
|
||||
});
|
||||
|
||||
it('builds role trees with depth/order/parent metadata', () => {
|
||||
const rows = parsePresentationLinkbase(SAMPLE_PRESENTATION_LINKBASE);
|
||||
expect(rows.length).toBe(3);
|
||||
|
||||
const root = rows.find((row) => row.qname === 'us-gaap:StatementLineItems');
|
||||
const revenue = rows.find((row) => row.qname === 'us-gaap:Revenues');
|
||||
const cogs = rows.find((row) => row.qname === 'us-gaap:CostOfGoodsSold');
|
||||
|
||||
expect(root?.depth).toBe(0);
|
||||
expect(root?.parentConceptKey).toBeNull();
|
||||
expect(revenue?.depth).toBe(1);
|
||||
expect(cogs?.depth).toBe(1);
|
||||
expect(revenue?.parentConceptKey).toBe(root?.conceptKey ?? null);
|
||||
expect(revenue?.order).toBeLessThan(cogs?.order ?? Number.POSITIVE_INFINITY);
|
||||
});
|
||||
|
||||
it('classifies statement roles into canonical statement kinds', () => {
|
||||
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfOperations')).toBe('income');
|
||||
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfFinancialPosition')).toBe('balance');
|
||||
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfCashFlows')).toBe('cash_flow');
|
||||
});
|
||||
});
|
||||
310
lib/server/taxonomy/linkbase-parser.ts
Normal file
310
lib/server/taxonomy/linkbase-parser.ts
Normal file
@@ -0,0 +1,310 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import type { TaxonomyNamespaceMap, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
||||
|
||||
function decodeXmlEntities(value: string) {
|
||||
return value
|
||||
.replace(/&/gi, '&')
|
||||
.replace(/</gi, '<')
|
||||
.replace(/>/gi, '>')
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/gi, "'")
|
||||
.replace(/ | /gi, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function parseNamespaceMap(raw: string): TaxonomyNamespaceMap {
|
||||
const map: TaxonomyNamespaceMap = {};
|
||||
const rootStart = raw.match(/<[^>]*linkbase[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
|
||||
|
||||
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
|
||||
const prefix = (match[1] ?? '').trim();
|
||||
const uri = (match[2] ?? '').trim();
|
||||
if (!prefix || !uri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
map[prefix] = uri;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
function qnameFromHref(href: string) {
|
||||
const fragment = href.includes('#') ? href.slice(href.indexOf('#') + 1) : href;
|
||||
if (!fragment) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cleaned = fragment.trim().replace(/^loc_+/i, '');
|
||||
if (!cleaned) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (cleaned.includes(':')) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
if (cleaned.includes('_')) {
|
||||
const idx = cleaned.indexOf('_');
|
||||
return `${cleaned.slice(0, idx)}:${cleaned.slice(idx + 1)}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function conceptFromQName(qname: string, namespaces: TaxonomyNamespaceMap) {
|
||||
const [prefix, ...rest] = qname.split(':');
|
||||
const localName = rest.join(':');
|
||||
if (!prefix || !localName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
|
||||
|
||||
return {
|
||||
qname,
|
||||
namespaceUri,
|
||||
localName,
|
||||
conceptKey: `${namespaceUri}#${localName}`
|
||||
};
|
||||
}
|
||||
|
||||
function labelPriority(role: string | null) {
|
||||
const normalized = (role ?? '').toLowerCase();
|
||||
if (!normalized) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/label')) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/terselabel')) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/verboselabel')) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
export function classifyStatementRole(roleUri: string): FinancialStatementKind | null {
|
||||
const normalized = roleUri.toLowerCase();
|
||||
|
||||
if (/cash\s*flow|statementsof?cashflows|netcash/.test(normalized)) {
|
||||
return 'cash_flow';
|
||||
}
|
||||
|
||||
if (/shareholders?|stockholders?|equity|retainedearnings/.test(normalized)) {
|
||||
return 'equity';
|
||||
}
|
||||
|
||||
if (/comprehensive\s*income/.test(normalized)) {
|
||||
return 'comprehensive_income';
|
||||
}
|
||||
|
||||
if (/balance\s*sheet|financial\s*position|assets?andliabilities/.test(normalized)) {
|
||||
return 'balance';
|
||||
}
|
||||
|
||||
if (/operations|income\s*statement|statementsofincome|profit/.test(normalized)) {
|
||||
return 'income';
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parseLabelLinkbase(raw: string): Map<string, string> {
|
||||
const namespaces = parseNamespaceMap(raw);
|
||||
const preferredLabelByConcept = new Map<string, { text: string; priority: number }>();
|
||||
|
||||
const linkPattern = /<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?labelLink>/gi;
|
||||
for (const linkMatch of raw.matchAll(linkPattern)) {
|
||||
const block = linkMatch[1] ?? '';
|
||||
const locByLabel = new Map<string, string>();
|
||||
const resourceByLabel = new Map<string, { text: string; role: string | null }>();
|
||||
|
||||
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = locMatch[1] ?? '';
|
||||
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!label || !href) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const qname = qnameFromHref(href);
|
||||
if (!qname) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const concept = conceptFromQName(qname, namespaces);
|
||||
if (!concept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
locByLabel.set(label, concept.conceptKey);
|
||||
}
|
||||
|
||||
for (const resourceMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?label\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?label>/gi)) {
|
||||
const attrs = resourceMatch[1] ?? '';
|
||||
const body = decodeXmlEntities(resourceMatch[2] ?? '').replace(/\s+/g, ' ').trim();
|
||||
if (!body) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const resourceLabel = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const role = attrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
||||
if (!resourceLabel) {
|
||||
continue;
|
||||
}
|
||||
|
||||
resourceByLabel.set(resourceLabel, {
|
||||
text: body,
|
||||
role
|
||||
});
|
||||
}
|
||||
|
||||
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = arcMatch[1] ?? '';
|
||||
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!from || !to) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const conceptKey = locByLabel.get(from);
|
||||
const resource = resourceByLabel.get(to);
|
||||
if (!conceptKey || !resource) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const priority = labelPriority(resource.role);
|
||||
const current = preferredLabelByConcept.get(conceptKey);
|
||||
if (!current || priority > current.priority) {
|
||||
preferredLabelByConcept.set(conceptKey, {
|
||||
text: resource.text,
|
||||
priority
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Map(
|
||||
[...preferredLabelByConcept.entries()].map(([conceptKey, value]) => [conceptKey, value.text])
|
||||
);
|
||||
}
|
||||
|
||||
export function parsePresentationLinkbase(raw: string): TaxonomyPresentationConcept[] {
|
||||
const namespaces = parseNamespaceMap(raw);
|
||||
const rows: TaxonomyPresentationConcept[] = [];
|
||||
|
||||
const linkPattern = /<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?presentationLink>/gi;
|
||||
for (const linkMatch of raw.matchAll(linkPattern)) {
|
||||
const linkAttrs = linkMatch[1] ?? '';
|
||||
const block = linkMatch[2] ?? '';
|
||||
const roleUri = linkAttrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!roleUri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const locByLabel = new Map<string, { conceptKey: string; qname: string; isAbstract: boolean }>();
|
||||
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = locMatch[1] ?? '';
|
||||
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!label || !href) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const qname = qnameFromHref(href);
|
||||
if (!qname) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const concept = conceptFromQName(qname, namespaces);
|
||||
if (!concept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
locByLabel.set(label, {
|
||||
conceptKey: concept.conceptKey,
|
||||
qname: concept.qname,
|
||||
isAbstract: /abstract/i.test(concept.localName)
|
||||
});
|
||||
}
|
||||
|
||||
const childrenByLabel = new Map<string, Array<{ label: string; order: number }>>();
|
||||
const incoming = new Set<string>();
|
||||
const allReferenced = new Set<string>();
|
||||
|
||||
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = arcMatch[1] ?? '';
|
||||
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const orderRaw = attrs.match(/\border=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const order = Number.parseFloat(orderRaw);
|
||||
|
||||
if (!from || !to || !locByLabel.has(from) || !locByLabel.has(to)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const group = childrenByLabel.get(from) ?? [];
|
||||
group.push({ label: to, order: Number.isFinite(order) ? order : group.length + 1 });
|
||||
childrenByLabel.set(from, group);
|
||||
|
||||
incoming.add(to);
|
||||
allReferenced.add(from);
|
||||
allReferenced.add(to);
|
||||
}
|
||||
|
||||
const roots = [...allReferenced].filter((label) => !incoming.has(label));
|
||||
const visited = new Set<string>();
|
||||
|
||||
function dfs(label: string, depth: number, parentLabel: string | null, baseOrder: number) {
|
||||
const node = locByLabel.get(label);
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pathKey = `${parentLabel ?? 'root'}::${label}::${depth}`;
|
||||
if (visited.has(pathKey)) {
|
||||
return;
|
||||
}
|
||||
visited.add(pathKey);
|
||||
|
||||
const parentConceptKey = parentLabel ? (locByLabel.get(parentLabel)?.conceptKey ?? null) : null;
|
||||
rows.push({
|
||||
conceptKey: node.conceptKey,
|
||||
qname: node.qname,
|
||||
roleUri,
|
||||
order: baseOrder,
|
||||
depth,
|
||||
parentConceptKey,
|
||||
isAbstract: node.isAbstract
|
||||
});
|
||||
|
||||
const children = [...(childrenByLabel.get(label) ?? [])].sort((left, right) => left.order - right.order);
|
||||
for (let i = 0; i < children.length; i += 1) {
|
||||
const child = children[i];
|
||||
if (!child) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dfs(child.label, depth + 1, label, baseOrder + (i + 1) / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < roots.length; i += 1) {
|
||||
const root = roots[i];
|
||||
if (!root) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dfs(root, 0, null, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
374
lib/server/taxonomy/materialize.ts
Normal file
374
lib/server/taxonomy/materialize.ts
Normal file
@@ -0,0 +1,374 @@
|
||||
import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
||||
import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy';
|
||||
import { classifyStatementRole } from '@/lib/server/taxonomy/linkbase-parser';
|
||||
import { conceptStatementFallback } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
|
||||
function compactAccessionNumber(value: string) {
|
||||
return value.replace(/-/g, '');
|
||||
}
|
||||
|
||||
function isUsGaapNamespace(namespaceUri: string) {
|
||||
return /fasb\.org\/us-gaap/i.test(namespaceUri) || /us-gaap/i.test(namespaceUri);
|
||||
}
|
||||
|
||||
function splitConceptKey(conceptKey: string) {
|
||||
const index = conceptKey.lastIndexOf('#');
|
||||
if (index < 0) {
|
||||
return {
|
||||
namespaceUri: 'urn:unknown',
|
||||
localName: conceptKey
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
namespaceUri: conceptKey.slice(0, index),
|
||||
localName: conceptKey.slice(index + 1)
|
||||
};
|
||||
}
|
||||
|
||||
function localNameToLabel(localName: string) {
|
||||
return localName
|
||||
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
||||
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
||||
.replace(/_/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
||||
return {
|
||||
income: factory(),
|
||||
balance: factory(),
|
||||
cash_flow: factory(),
|
||||
equity: factory(),
|
||||
comprehensive_income: factory()
|
||||
};
|
||||
}
|
||||
|
||||
function periodSignature(fact: TaxonomyFact) {
|
||||
const start = fact.periodStart ?? '';
|
||||
const end = fact.periodEnd ?? '';
|
||||
const instant = fact.periodInstant ?? '';
|
||||
return `start:${start}|end:${end}|instant:${instant}`;
|
||||
}
|
||||
|
||||
function periodDate(fact: TaxonomyFact, fallbackDate: string) {
|
||||
return fact.periodEnd ?? fact.periodInstant ?? fallbackDate;
|
||||
}
|
||||
|
||||
function parseEpoch(value: string | null) {
|
||||
if (!value) {
|
||||
return Number.NaN;
|
||||
}
|
||||
|
||||
return Date.parse(value);
|
||||
}
|
||||
|
||||
function sortPeriods(periods: FilingTaxonomyPeriod[]) {
|
||||
return [...periods].sort((left, right) => {
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
|
||||
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return leftDate - rightDate;
|
||||
}
|
||||
|
||||
return left.id.localeCompare(right.id);
|
||||
});
|
||||
}
|
||||
|
||||
function pickPreferredFact<T extends TaxonomyFact>(facts: T[]) {
|
||||
if (facts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const ordered = [...facts].sort((left, right) => {
|
||||
const leftScore = left.isDimensionless ? 1 : 0;
|
||||
const rightScore = right.isDimensionless ? 1 : 0;
|
||||
if (leftScore !== rightScore) {
|
||||
return rightScore - leftScore;
|
||||
}
|
||||
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.periodInstant);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.periodInstant);
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return rightDate - leftDate;
|
||||
}
|
||||
|
||||
return Math.abs(right.value) - Math.abs(left.value);
|
||||
});
|
||||
|
||||
return ordered[0] ?? null;
|
||||
}
|
||||
|
||||
export function materializeTaxonomyStatements(input: {
|
||||
filingId: number;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
filingType: '10-K' | '10-Q';
|
||||
facts: TaxonomyFact[];
|
||||
presentation: TaxonomyPresentationConcept[];
|
||||
labelByConcept: Map<string, string>;
|
||||
}) {
|
||||
const periodBySignature = new Map<string, FilingTaxonomyPeriod>();
|
||||
const compactAccession = compactAccessionNumber(input.accessionNumber);
|
||||
|
||||
for (const fact of input.facts) {
|
||||
const signature = periodSignature(fact);
|
||||
if (periodBySignature.has(signature)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const date = periodDate(fact, input.filingDate);
|
||||
const id = `${date}-${compactAccession}-${periodBySignature.size + 1}`;
|
||||
|
||||
periodBySignature.set(signature, {
|
||||
id,
|
||||
filingId: input.filingId,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
periodStart: fact.periodStart,
|
||||
periodEnd: fact.periodEnd ?? fact.periodInstant ?? input.filingDate,
|
||||
filingType: input.filingType,
|
||||
periodLabel: fact.periodInstant && !fact.periodStart
|
||||
? 'Instant'
|
||||
: fact.periodStart && fact.periodEnd
|
||||
? `${fact.periodStart} to ${fact.periodEnd}`
|
||||
: 'Filing Period'
|
||||
});
|
||||
}
|
||||
|
||||
const periods = sortPeriods([...periodBySignature.values()]);
|
||||
const periodIdBySignature = new Map<string, string>(
|
||||
[...periodBySignature.entries()].map(([signature, period]) => [signature, period.id])
|
||||
);
|
||||
|
||||
const presentationByConcept = new Map<string, TaxonomyPresentationConcept[]>();
|
||||
for (const node of input.presentation) {
|
||||
const existing = presentationByConcept.get(node.conceptKey);
|
||||
if (existing) {
|
||||
existing.push(node);
|
||||
} else {
|
||||
presentationByConcept.set(node.conceptKey, [node]);
|
||||
}
|
||||
}
|
||||
|
||||
const enrichedFacts = input.facts.map((fact, index) => {
|
||||
const nodes = presentationByConcept.get(fact.conceptKey) ?? [];
|
||||
const bestNode = nodes[0] ?? null;
|
||||
const statementKind = bestNode
|
||||
? classifyStatementRole(bestNode.roleUri)
|
||||
: conceptStatementFallback(fact.localName);
|
||||
|
||||
return {
|
||||
...fact,
|
||||
__sourceFactId: index + 1,
|
||||
statement_kind: statementKind,
|
||||
role_uri: bestNode?.roleUri ?? null
|
||||
};
|
||||
});
|
||||
|
||||
const rowsByStatement = createStatementRecord<TaxonomyStatementRow[]>(() => []);
|
||||
const conceptByKey = new Map<string, TaxonomyConcept>();
|
||||
const groupedByStatement = createStatementRecord<Map<string, typeof enrichedFacts>>(() => new Map());
|
||||
|
||||
for (const fact of enrichedFacts) {
|
||||
if (!fact.statement_kind) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const group = groupedByStatement[fact.statement_kind].get(fact.conceptKey);
|
||||
if (group) {
|
||||
group.push(fact);
|
||||
} else {
|
||||
groupedByStatement[fact.statement_kind].set(fact.conceptKey, [fact]);
|
||||
}
|
||||
}
|
||||
|
||||
for (const statement of Object.keys(rowsByStatement) as FinancialStatementKind[]) {
|
||||
const conceptKeys = new Set<string>();
|
||||
|
||||
for (const node of input.presentation) {
|
||||
if (classifyStatementRole(node.roleUri) !== statement) {
|
||||
continue;
|
||||
}
|
||||
|
||||
conceptKeys.add(node.conceptKey);
|
||||
}
|
||||
|
||||
for (const conceptKey of groupedByStatement[statement].keys()) {
|
||||
conceptKeys.add(conceptKey);
|
||||
}
|
||||
|
||||
const orderedConcepts = [...conceptKeys]
|
||||
.map((conceptKey) => {
|
||||
const presentationNodes = input.presentation.filter(
|
||||
(node) => node.conceptKey === conceptKey && classifyStatementRole(node.roleUri) === statement
|
||||
);
|
||||
const presentationOrder = presentationNodes.length > 0
|
||||
? Math.min(...presentationNodes.map((node) => node.order))
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
const presentationDepth = presentationNodes.length > 0
|
||||
? Math.min(...presentationNodes.map((node) => node.depth))
|
||||
: 0;
|
||||
const roleUri = presentationNodes[0]?.roleUri ?? null;
|
||||
const parentConceptKey = presentationNodes[0]?.parentConceptKey ?? null;
|
||||
return {
|
||||
conceptKey,
|
||||
presentationOrder,
|
||||
presentationDepth,
|
||||
roleUri,
|
||||
parentConceptKey
|
||||
};
|
||||
})
|
||||
.sort((left, right) => {
|
||||
if (left.presentationOrder !== right.presentationOrder) {
|
||||
return left.presentationOrder - right.presentationOrder;
|
||||
}
|
||||
|
||||
return left.conceptKey.localeCompare(right.conceptKey);
|
||||
});
|
||||
|
||||
for (const orderedConcept of orderedConcepts) {
|
||||
const facts = groupedByStatement[statement].get(orderedConcept.conceptKey) ?? [];
|
||||
const { namespaceUri, localName } = splitConceptKey(orderedConcept.conceptKey);
|
||||
const qname = facts[0]?.qname ?? `unknown:${localName}`;
|
||||
const label = input.labelByConcept.get(orderedConcept.conceptKey) ?? localNameToLabel(localName);
|
||||
const values: Record<string, number | null> = {};
|
||||
const units: Record<string, string | null> = {};
|
||||
|
||||
const factGroups = new Map<string, typeof facts>();
|
||||
for (const fact of facts) {
|
||||
const signature = periodSignature(fact);
|
||||
const group = factGroups.get(signature);
|
||||
if (group) {
|
||||
group.push(fact);
|
||||
} else {
|
||||
factGroups.set(signature, [fact]);
|
||||
}
|
||||
}
|
||||
|
||||
const sourceFactIds: number[] = [];
|
||||
let hasDimensions = false;
|
||||
for (const [signature, group] of factGroups.entries()) {
|
||||
const periodId = periodIdBySignature.get(signature);
|
||||
if (!periodId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const preferred = pickPreferredFact(group);
|
||||
if (!preferred) {
|
||||
continue;
|
||||
}
|
||||
|
||||
values[periodId] = preferred.value;
|
||||
units[periodId] = preferred.unit;
|
||||
const sourceFactId = (preferred as { __sourceFactId?: number }).__sourceFactId;
|
||||
if (typeof sourceFactId === 'number') {
|
||||
sourceFactIds.push(sourceFactId);
|
||||
}
|
||||
|
||||
if (group.some((entry) => !entry.isDimensionless)) {
|
||||
hasDimensions = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(values).length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const row: TaxonomyStatementRow = {
|
||||
key: orderedConcept.conceptKey,
|
||||
label,
|
||||
conceptKey: orderedConcept.conceptKey,
|
||||
qname,
|
||||
namespaceUri,
|
||||
localName,
|
||||
isExtension: !isUsGaapNamespace(namespaceUri),
|
||||
statement,
|
||||
roleUri: orderedConcept.roleUri,
|
||||
order: Number.isFinite(orderedConcept.presentationOrder)
|
||||
? orderedConcept.presentationOrder
|
||||
: rowsByStatement[statement].length + 1,
|
||||
depth: orderedConcept.presentationDepth,
|
||||
parentKey: orderedConcept.parentConceptKey,
|
||||
values,
|
||||
units,
|
||||
hasDimensions,
|
||||
sourceFactIds
|
||||
};
|
||||
|
||||
rowsByStatement[statement].push(row);
|
||||
|
||||
if (!conceptByKey.has(orderedConcept.conceptKey)) {
|
||||
conceptByKey.set(orderedConcept.conceptKey, {
|
||||
concept_key: orderedConcept.conceptKey,
|
||||
qname,
|
||||
namespace_uri: namespaceUri,
|
||||
local_name: localName,
|
||||
label,
|
||||
is_extension: !isUsGaapNamespace(namespaceUri),
|
||||
statement_kind: statement,
|
||||
role_uri: orderedConcept.roleUri,
|
||||
presentation_order: row.order,
|
||||
presentation_depth: row.depth,
|
||||
parent_concept_key: row.parentKey,
|
||||
is_abstract: /abstract/i.test(localName)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const fact of enrichedFacts) {
|
||||
if (conceptByKey.has(fact.conceptKey)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
conceptByKey.set(fact.conceptKey, {
|
||||
concept_key: fact.conceptKey,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespaceUri,
|
||||
local_name: fact.localName,
|
||||
label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName),
|
||||
is_extension: !isUsGaapNamespace(fact.namespaceUri),
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
presentation_order: null,
|
||||
presentation_depth: null,
|
||||
parent_concept_key: null,
|
||||
is_abstract: /abstract/i.test(fact.localName)
|
||||
});
|
||||
}
|
||||
|
||||
const concepts = [...conceptByKey.values()];
|
||||
const factRows = enrichedFacts.map((fact) => ({
|
||||
concept_key: fact.conceptKey,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespaceUri,
|
||||
local_name: fact.localName,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
context_id: fact.contextId,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
value_num: fact.value,
|
||||
period_start: fact.periodStart,
|
||||
period_end: fact.periodEnd,
|
||||
period_instant: fact.periodInstant,
|
||||
dimensions: fact.dimensions,
|
||||
is_dimensionless: fact.isDimensionless,
|
||||
source_file: fact.sourceFile,
|
||||
}));
|
||||
|
||||
const dimensionsCount = enrichedFacts.reduce((total, fact) => {
|
||||
return total + fact.dimensions.length;
|
||||
}, 0);
|
||||
|
||||
return {
|
||||
periods,
|
||||
statement_rows: rowsByStatement,
|
||||
concepts,
|
||||
facts: factRows,
|
||||
dimensionsCount
|
||||
};
|
||||
}
|
||||
55
lib/server/taxonomy/metrics.test.ts
Normal file
55
lib/server/taxonomy/metrics.test.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import type { TaxonomyFact } from '@/lib/server/taxonomy/types';
|
||||
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
|
||||
|
||||
function fact(localName: string, value: number, overrides?: Partial<TaxonomyFact>): TaxonomyFact {
|
||||
return {
|
||||
conceptKey: `http://fasb.org/us-gaap/2024#${localName}`,
|
||||
qname: `us-gaap:${localName}`,
|
||||
namespaceUri: 'http://fasb.org/us-gaap/2024',
|
||||
localName,
|
||||
contextId: 'c1',
|
||||
unit: 'iso4217:USD',
|
||||
decimals: '-6',
|
||||
value,
|
||||
periodStart: '2025-01-01',
|
||||
periodEnd: '2025-12-31',
|
||||
periodInstant: null,
|
||||
dimensions: [],
|
||||
isDimensionless: true,
|
||||
sourceFile: 'abc_htm.xml',
|
||||
...overrides
|
||||
};
|
||||
}
|
||||
|
||||
describe('taxonomy metric derivation', () => {
|
||||
it('applies concept priority for canonical metrics and debt component fallback', () => {
|
||||
const metrics = deriveTaxonomyMetrics([
|
||||
fact('SalesRevenueNet', 500),
|
||||
fact('Revenues', 450),
|
||||
fact('NetIncomeLoss', 40),
|
||||
fact('Assets', 1000),
|
||||
fact('CashAndCashEquivalentsAtCarryingValue', 80),
|
||||
fact('DebtCurrent', 15),
|
||||
fact('LongTermDebtNoncurrent', 35)
|
||||
]);
|
||||
|
||||
expect(metrics).toEqual({
|
||||
revenue: 450,
|
||||
netIncome: 40,
|
||||
totalAssets: 1000,
|
||||
cash: 80,
|
||||
debt: 50
|
||||
});
|
||||
});
|
||||
|
||||
it('uses direct debt concept before computed debt fallback when available', () => {
|
||||
const metrics = deriveTaxonomyMetrics([
|
||||
fact('DebtCurrent', 15),
|
||||
fact('LongTermDebtNoncurrent', 35),
|
||||
fact('LongTermDebtAndCapitalLeaseObligations', 90)
|
||||
]);
|
||||
|
||||
expect(metrics.debt).toBe(90);
|
||||
});
|
||||
});
|
||||
106
lib/server/taxonomy/metrics.ts
Normal file
106
lib/server/taxonomy/metrics.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import type { Filing } from '@/lib/types';
|
||||
import type { TaxonomyFact } from '@/lib/server/taxonomy/types';
|
||||
|
||||
const METRIC_LOCAL_NAME_PRIORITY = {
|
||||
revenue: [
|
||||
'Revenues',
|
||||
'SalesRevenueNet',
|
||||
'RevenueFromContractWithCustomerExcludingAssessedTax',
|
||||
'TotalRevenuesAndOtherIncome'
|
||||
],
|
||||
netIncome: ['NetIncomeLoss', 'ProfitLoss'],
|
||||
totalAssets: ['Assets'],
|
||||
cash: [
|
||||
'CashAndCashEquivalentsAtCarryingValue',
|
||||
'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
|
||||
],
|
||||
debtDirect: [
|
||||
'DebtAndFinanceLeaseLiabilities',
|
||||
'Debt',
|
||||
'LongTermDebtAndCapitalLeaseObligations'
|
||||
],
|
||||
debtCurrent: [
|
||||
'DebtCurrent',
|
||||
'ShortTermBorrowings',
|
||||
'LongTermDebtCurrent'
|
||||
],
|
||||
debtNonCurrent: [
|
||||
'LongTermDebtNoncurrent',
|
||||
'LongTermDebt',
|
||||
'DebtNoncurrent'
|
||||
]
|
||||
} as const;
|
||||
|
||||
function normalizeDateToEpoch(value: string | null) {
|
||||
if (!value) {
|
||||
return Number.NaN;
|
||||
}
|
||||
|
||||
return Date.parse(value);
|
||||
}
|
||||
|
||||
function sameLocalName(left: string, right: string) {
|
||||
return left.toLowerCase() === right.toLowerCase();
|
||||
}
|
||||
|
||||
function pickPreferredFact(facts: TaxonomyFact[]) {
|
||||
const ordered = [...facts].sort((left, right) => {
|
||||
const leftDimensionScore = left.isDimensionless ? 1 : 0;
|
||||
const rightDimensionScore = right.isDimensionless ? 1 : 0;
|
||||
if (leftDimensionScore !== rightDimensionScore) {
|
||||
return rightDimensionScore - leftDimensionScore;
|
||||
}
|
||||
|
||||
const leftDate = normalizeDateToEpoch(left.periodEnd ?? left.periodInstant);
|
||||
const rightDate = normalizeDateToEpoch(right.periodEnd ?? right.periodInstant);
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return rightDate - leftDate;
|
||||
}
|
||||
|
||||
return Math.abs(right.value) - Math.abs(left.value);
|
||||
});
|
||||
|
||||
return ordered[0] ?? null;
|
||||
}
|
||||
|
||||
function pickBestFact(facts: TaxonomyFact[], localNames: readonly string[]) {
|
||||
for (const localName of localNames) {
|
||||
const matches = facts.filter((fact) => sameLocalName(fact.localName, localName));
|
||||
if (matches.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return pickPreferredFact(matches);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function sumIfBoth(left: number | null, right: number | null) {
|
||||
if (left === null || right === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return left + right;
|
||||
}
|
||||
|
||||
export function deriveTaxonomyMetrics(facts: TaxonomyFact[]): NonNullable<Filing['metrics']> {
|
||||
const revenue = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.revenue)?.value ?? null;
|
||||
const netIncome = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.netIncome)?.value ?? null;
|
||||
const totalAssets = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.totalAssets)?.value ?? null;
|
||||
const cash = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.cash)?.value ?? null;
|
||||
|
||||
const directDebt = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtDirect)?.value ?? null;
|
||||
const debt = directDebt ?? sumIfBoth(
|
||||
pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtCurrent)?.value ?? null,
|
||||
pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtNonCurrent)?.value ?? null
|
||||
);
|
||||
|
||||
return {
|
||||
revenue,
|
||||
netIncome,
|
||||
totalAssets,
|
||||
cash,
|
||||
debt
|
||||
};
|
||||
}
|
||||
49
lib/server/taxonomy/pdf-validation.test.ts
Normal file
49
lib/server/taxonomy/pdf-validation.test.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { __pdfValidationInternals } from '@/lib/server/taxonomy/pdf-validation';
|
||||
|
||||
describe('pdf metric validation internals', () => {
|
||||
it('parses fenced json payloads and rejects invalid payloads', () => {
|
||||
const parsed = __pdfValidationInternals.parseValidationPayload([
|
||||
'```json',
|
||||
'{"revenue":{"value":1000,"pages":[3]},"cash":{"value":200,"pages":["4"]}}',
|
||||
'```'
|
||||
].join('\n'));
|
||||
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed?.revenue?.value).toBe(1000);
|
||||
expect(parsed?.cash?.pages).toEqual(['4']);
|
||||
expect(__pdfValidationInternals.parseValidationPayload('not-json')).toBeNull();
|
||||
});
|
||||
|
||||
it('compares taxonomy vs llm values with fixed tolerance rules', () => {
|
||||
expect(__pdfValidationInternals.diffStatus(1000, 1004)).toEqual({
|
||||
status: 'matched',
|
||||
absoluteDiff: 4,
|
||||
relativeDiff: 0.004
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(1000, 1007)).toEqual({
|
||||
status: 'mismatch',
|
||||
absoluteDiff: 7,
|
||||
relativeDiff: 0.007
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(0.5, 1.2)).toEqual({
|
||||
status: 'matched',
|
||||
absoluteDiff: 0.7,
|
||||
relativeDiff: 0.7
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(null, 1)).toEqual({
|
||||
status: 'mismatch',
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(null, null)).toEqual({
|
||||
status: 'not_run',
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
});
|
||||
});
|
||||
});
|
||||
336
lib/server/taxonomy/pdf-validation.ts
Normal file
336
lib/server/taxonomy/pdf-validation.ts
Normal file
@@ -0,0 +1,336 @@
|
||||
import { execFile } from 'node:child_process';
|
||||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { promisify } from 'node:util';
|
||||
import type { Filing, MetricValidationResult } from '@/lib/types';
|
||||
import { runAiAnalysis } from '@/lib/server/ai';
|
||||
import type { TaxonomyAsset, TaxonomyMetricValidationCheck } from '@/lib/server/taxonomy/types';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
const METRIC_KEYS: Array<keyof NonNullable<Filing['metrics']>> = [
|
||||
'revenue',
|
||||
'netIncome',
|
||||
'totalAssets',
|
||||
'cash',
|
||||
'debt'
|
||||
];
|
||||
|
||||
function extractJsonCandidate(raw: string) {
|
||||
const fencedJson = raw.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
|
||||
const candidate = fencedJson ?? (() => {
|
||||
const start = raw.indexOf('{');
|
||||
const end = raw.lastIndexOf('}');
|
||||
return start >= 0 && end > start ? raw.slice(start, end + 1) : null;
|
||||
})();
|
||||
|
||||
return candidate;
|
||||
}
|
||||
|
||||
function parseValidationPayload(raw: string) {
|
||||
const candidate = extractJsonCandidate(raw);
|
||||
if (!candidate) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(candidate) as Record<string, {
|
||||
value?: number | string | null;
|
||||
pages?: Array<number | string>;
|
||||
}>;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function asNumber(value: unknown) {
|
||||
if (typeof value === 'number') {
|
||||
return Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
if (typeof value === 'string') {
|
||||
const parsed = Number(value.replace(/[,\s]/g, ''));
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function asPageNumbers(raw: unknown): number[] {
|
||||
if (!Array.isArray(raw)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return raw
|
||||
.map((entry) => {
|
||||
if (typeof entry === 'number' && Number.isFinite(entry)) {
|
||||
return Math.trunc(entry);
|
||||
}
|
||||
|
||||
if (typeof entry === 'string') {
|
||||
const parsed = Number(entry);
|
||||
return Number.isFinite(parsed) ? Math.trunc(parsed) : Number.NaN;
|
||||
}
|
||||
|
||||
return Number.NaN;
|
||||
})
|
||||
.filter((entry) => Number.isFinite(entry) && entry > 0);
|
||||
}
|
||||
|
||||
function diffStatus(taxonomyValue: number | null, llmValue: number | null) {
|
||||
if (taxonomyValue === null && llmValue === null) {
|
||||
return {
|
||||
status: 'not_run' as const,
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
};
|
||||
}
|
||||
|
||||
if (taxonomyValue === null || llmValue === null) {
|
||||
return {
|
||||
status: 'mismatch' as const,
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
};
|
||||
}
|
||||
|
||||
const absoluteDiff = Math.abs(taxonomyValue - llmValue);
|
||||
const denominator = Math.max(Math.abs(taxonomyValue), 1);
|
||||
const relativeDiff = absoluteDiff / denominator;
|
||||
const tolerance = Math.max(1, Math.abs(taxonomyValue) * 0.005);
|
||||
|
||||
return {
|
||||
status: absoluteDiff <= tolerance ? 'matched' as const : 'mismatch' as const,
|
||||
absoluteDiff,
|
||||
relativeDiff
|
||||
};
|
||||
}
|
||||
|
||||
async function extractPdfText(url: string, fetchImpl: typeof fetch) {
|
||||
const response = await fetchImpl(url, {
|
||||
headers: {
|
||||
Accept: 'application/pdf, */*;q=0.8'
|
||||
},
|
||||
cache: 'no-store'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`PDF request failed (${response.status})`);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type') ?? '';
|
||||
if (!/pdf/i.test(contentType) && !/\.pdf$/i.test(url)) {
|
||||
throw new Error(`Asset is not a PDF (${contentType || 'unknown content-type'})`);
|
||||
}
|
||||
|
||||
const bytes = new Uint8Array(await response.arrayBuffer());
|
||||
const tempRoot = await mkdtemp(join(tmpdir(), 'fiscal-pdf-'));
|
||||
const pdfPath = join(tempRoot, 'source.pdf');
|
||||
|
||||
try {
|
||||
await writeFile(pdfPath, bytes);
|
||||
const { stdout } = await execFileAsync('pdftotext', ['-layout', '-enc', 'UTF-8', pdfPath, '-'], {
|
||||
maxBuffer: 16 * 1024 * 1024
|
||||
});
|
||||
|
||||
const text = stdout.trim();
|
||||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return text;
|
||||
} finally {
|
||||
await rm(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function validationPrompt(metrics: Filing['metrics'], pdfText: string) {
|
||||
const textSlice = pdfText.slice(0, 80_000);
|
||||
|
||||
return [
|
||||
'Extract numeric financial metrics from the provided financial statement PDF text.',
|
||||
`Taxonomy baseline metrics: ${JSON.stringify(metrics ?? {})}`,
|
||||
'Return ONLY JSON with keys revenue, netIncome, totalAssets, cash, debt.',
|
||||
'Each key must map to: {"value": number|null, "pages": [number]}.',
|
||||
'Use null when a metric is not found.',
|
||||
'PDF text follows:',
|
||||
textSlice
|
||||
].join('\n\n');
|
||||
}
|
||||
|
||||
function providerModelOrNull(value: string | undefined | null) {
|
||||
const normalized = value?.trim();
|
||||
return normalized && normalized.length > 0 ? normalized : null;
|
||||
}
|
||||
|
||||
export async function validateMetricsWithPdfLlm(input: {
|
||||
metrics: Filing['metrics'];
|
||||
assets: TaxonomyAsset[];
|
||||
fetchImpl?: typeof fetch;
|
||||
}): Promise<{
|
||||
validation_result: MetricValidationResult | null;
|
||||
metric_validations: TaxonomyMetricValidationCheck[];
|
||||
}> {
|
||||
const taxonomyMetrics = input.metrics ?? {
|
||||
revenue: null,
|
||||
netIncome: null,
|
||||
totalAssets: null,
|
||||
cash: null,
|
||||
debt: null
|
||||
};
|
||||
|
||||
const selectedPdf = input.assets.find((asset) => asset.asset_type === 'pdf' && asset.is_selected);
|
||||
if (!selectedPdf) {
|
||||
return {
|
||||
validation_result: {
|
||||
status: 'not_run',
|
||||
checks: [],
|
||||
validatedAt: null
|
||||
},
|
||||
metric_validations: []
|
||||
};
|
||||
}
|
||||
|
||||
const fetchImpl = input.fetchImpl ?? fetch;
|
||||
let pdfText: string | null = null;
|
||||
try {
|
||||
pdfText = await extractPdfText(selectedPdf.url, fetchImpl);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'PDF extraction failed';
|
||||
|
||||
const checks: TaxonomyMetricValidationCheck[] = METRIC_KEYS.map((metricKey) => ({
|
||||
metric_key: metricKey,
|
||||
taxonomy_value: taxonomyMetrics[metricKey],
|
||||
llm_value: null,
|
||||
absolute_diff: null,
|
||||
relative_diff: null,
|
||||
status: 'error',
|
||||
evidence_pages: [],
|
||||
pdf_url: selectedPdf.url,
|
||||
provider: null,
|
||||
model: null,
|
||||
error: message
|
||||
}));
|
||||
|
||||
return {
|
||||
validation_result: {
|
||||
status: 'error',
|
||||
checks: checks.map((check) => ({
|
||||
metricKey: check.metric_key,
|
||||
taxonomyValue: check.taxonomy_value,
|
||||
llmValue: check.llm_value,
|
||||
absoluteDiff: check.absolute_diff,
|
||||
relativeDiff: check.relative_diff,
|
||||
status: check.status,
|
||||
evidencePages: check.evidence_pages,
|
||||
pdfUrl: check.pdf_url,
|
||||
provider: check.provider,
|
||||
model: check.model,
|
||||
error: check.error
|
||||
})),
|
||||
validatedAt: new Date().toISOString()
|
||||
},
|
||||
metric_validations: checks
|
||||
};
|
||||
}
|
||||
|
||||
if (!pdfText) {
|
||||
return {
|
||||
validation_result: {
|
||||
status: 'not_run',
|
||||
checks: [],
|
||||
validatedAt: new Date().toISOString()
|
||||
},
|
||||
metric_validations: []
|
||||
};
|
||||
}
|
||||
|
||||
let parsed: Record<string, { value?: number | string | null; pages?: Array<number | string> }> | null = null;
|
||||
let provider: string | null = null;
|
||||
let model: string | null = null;
|
||||
let modelError: string | null = null;
|
||||
|
||||
try {
|
||||
const aiResult = await runAiAnalysis(validationPrompt(taxonomyMetrics, pdfText), undefined, {
|
||||
workload: 'extraction'
|
||||
});
|
||||
|
||||
provider = providerModelOrNull(aiResult.provider);
|
||||
model = providerModelOrNull(aiResult.model);
|
||||
parsed = parseValidationPayload(aiResult.text);
|
||||
if (!parsed) {
|
||||
modelError = 'LLM response did not contain valid JSON payload';
|
||||
}
|
||||
} catch (error) {
|
||||
modelError = error instanceof Error ? error.message : 'LLM validation failed';
|
||||
}
|
||||
|
||||
const validations: TaxonomyMetricValidationCheck[] = METRIC_KEYS.map((metricKey) => {
|
||||
const taxonomyValue = taxonomyMetrics[metricKey] ?? null;
|
||||
|
||||
if (!parsed) {
|
||||
return {
|
||||
metric_key: metricKey,
|
||||
taxonomy_value: taxonomyValue,
|
||||
llm_value: null,
|
||||
absolute_diff: null,
|
||||
relative_diff: null,
|
||||
status: modelError ? 'error' : 'not_run',
|
||||
evidence_pages: [],
|
||||
pdf_url: selectedPdf.url,
|
||||
provider,
|
||||
model,
|
||||
error: modelError
|
||||
};
|
||||
}
|
||||
|
||||
const entry = parsed[metricKey as string] ?? {};
|
||||
const llmValue = asNumber(entry.value);
|
||||
const pages = asPageNumbers(entry.pages);
|
||||
const diff = diffStatus(taxonomyValue, llmValue);
|
||||
|
||||
return {
|
||||
metric_key: metricKey,
|
||||
taxonomy_value: taxonomyValue,
|
||||
llm_value: llmValue,
|
||||
absolute_diff: diff.absoluteDiff,
|
||||
relative_diff: diff.relativeDiff,
|
||||
status: diff.status,
|
||||
evidence_pages: pages,
|
||||
pdf_url: selectedPdf.url,
|
||||
provider,
|
||||
model,
|
||||
error: null
|
||||
};
|
||||
});
|
||||
|
||||
const hasError = validations.some((entry) => entry.status === 'error');
|
||||
const hasMismatch = validations.some((entry) => entry.status === 'mismatch');
|
||||
|
||||
return {
|
||||
validation_result: {
|
||||
status: hasError ? 'error' : hasMismatch ? 'mismatch' : 'matched',
|
||||
checks: validations.map((check) => ({
|
||||
metricKey: check.metric_key,
|
||||
taxonomyValue: check.taxonomy_value,
|
||||
llmValue: check.llm_value,
|
||||
absoluteDiff: check.absolute_diff,
|
||||
relativeDiff: check.relative_diff,
|
||||
status: check.status,
|
||||
evidencePages: check.evidence_pages,
|
||||
pdfUrl: check.pdf_url,
|
||||
provider: check.provider,
|
||||
model: check.model,
|
||||
error: check.error
|
||||
})),
|
||||
validatedAt: new Date().toISOString()
|
||||
},
|
||||
metric_validations: validations
|
||||
};
|
||||
}
|
||||
|
||||
export const __pdfValidationInternals = {
|
||||
parseValidationPayload,
|
||||
diffStatus
|
||||
};
|
||||
136
lib/server/taxonomy/types.ts
Normal file
136
lib/server/taxonomy/types.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type {
|
||||
FilingTaxonomyAssetType,
|
||||
FilingTaxonomyParseStatus,
|
||||
FilingTaxonomyPeriod,
|
||||
FilingTaxonomySource
|
||||
} from '@/lib/server/repos/filing-taxonomy';
|
||||
|
||||
export type TaxonomyAsset = {
|
||||
asset_type: FilingTaxonomyAssetType;
|
||||
name: string;
|
||||
url: string;
|
||||
size_bytes: number | null;
|
||||
score: number | null;
|
||||
is_selected: boolean;
|
||||
};
|
||||
|
||||
export type TaxonomyNamespaceMap = Record<string, string>;
|
||||
|
||||
export type TaxonomyContext = {
|
||||
id: string;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
periodInstant: string | null;
|
||||
dimensions: Array<{ axis: string; member: string }>;
|
||||
};
|
||||
|
||||
export type TaxonomyUnit = {
|
||||
id: string;
|
||||
measure: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyFact = {
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
contextId: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
value: number;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
periodInstant: string | null;
|
||||
dimensions: Array<{ axis: string; member: string }>;
|
||||
isDimensionless: boolean;
|
||||
sourceFile: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyPresentationConcept = {
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
roleUri: string;
|
||||
order: number;
|
||||
depth: number;
|
||||
parentConceptKey: string | null;
|
||||
isAbstract: boolean;
|
||||
};
|
||||
|
||||
export type TaxonomyConcept = {
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
label: string | null;
|
||||
is_extension: boolean;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
presentation_order: number | null;
|
||||
presentation_depth: number | null;
|
||||
parent_concept_key: string | null;
|
||||
is_abstract: boolean;
|
||||
};
|
||||
|
||||
export type TaxonomyMetricValidationCheck = {
|
||||
metric_key: keyof NonNullable<Filing['metrics']>;
|
||||
taxonomy_value: number | null;
|
||||
llm_value: number | null;
|
||||
absolute_diff: number | null;
|
||||
relative_diff: number | null;
|
||||
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
||||
evidence_pages: number[];
|
||||
pdf_url: string | null;
|
||||
provider: string | null;
|
||||
model: string | null;
|
||||
error: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyHydrationInput = {
|
||||
filingId: number;
|
||||
ticker: string;
|
||||
cik: string;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
filingType: '10-K' | '10-Q';
|
||||
filingUrl: string | null;
|
||||
primaryDocument: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyHydrationResult = {
|
||||
filing_id: number;
|
||||
ticker: string;
|
||||
filing_date: string;
|
||||
filing_type: '10-K' | '10-Q';
|
||||
parse_status: FilingTaxonomyParseStatus;
|
||||
parse_error: string | null;
|
||||
source: FilingTaxonomySource;
|
||||
periods: FilingTaxonomyPeriod[];
|
||||
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
derived_metrics: Filing['metrics'];
|
||||
validation_result: MetricValidationResult | null;
|
||||
facts_count: number;
|
||||
concepts_count: number;
|
||||
dimensions_count: number;
|
||||
assets: TaxonomyAsset[];
|
||||
concepts: TaxonomyConcept[];
|
||||
facts: Array<{
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
context_id: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
value_num: number;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
period_instant: string | null;
|
||||
dimensions: Array<{ axis: string; member: string }>;
|
||||
is_dimensionless: boolean;
|
||||
source_file: string | null;
|
||||
}>;
|
||||
metric_validations: TaxonomyMetricValidationCheck[];
|
||||
};
|
||||
60
lib/server/taxonomy/xbrl-parser.test.ts
Normal file
60
lib/server/taxonomy/xbrl-parser.test.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
|
||||
const SAMPLE_XBRL = `
|
||||
<xbrli:xbrl xmlns:xbrli="http://www.xbrl.org/2003/instance"
|
||||
xmlns:xbrldi="http://xbrl.org/2006/xbrldi"
|
||||
xmlns:us-gaap="http://fasb.org/us-gaap/2024"
|
||||
xmlns:dei="http://xbrl.sec.gov/dei/2024">
|
||||
<xbrli:context id="c1">
|
||||
<xbrli:period>
|
||||
<xbrli:startDate>2025-01-01</xbrli:startDate>
|
||||
<xbrli:endDate>2025-12-31</xbrli:endDate>
|
||||
</xbrli:period>
|
||||
</xbrli:context>
|
||||
<xbrli:context id="c2">
|
||||
<xbrli:entity>
|
||||
<xbrli:segment>
|
||||
<xbrldi:explicitMember dimension="us-gaap:StatementBusinessSegmentsAxis">us-gaap:ConsolidatedGroupMember</xbrldi:explicitMember>
|
||||
</xbrli:segment>
|
||||
</xbrli:entity>
|
||||
<xbrli:period>
|
||||
<xbrli:instant>2025-12-31</xbrli:instant>
|
||||
</xbrli:period>
|
||||
</xbrli:context>
|
||||
<xbrli:unit id="u1">
|
||||
<xbrli:measure>iso4217:USD</xbrli:measure>
|
||||
</xbrli:unit>
|
||||
<us-gaap:Revenues contextRef="c1" unitRef="u1" decimals="-6">1,234</us-gaap:Revenues>
|
||||
<us-gaap:Assets contextRef="c2" unitRef="u1" decimals="-6">5,678</us-gaap:Assets>
|
||||
<dei:EntityRegistrantName contextRef="c1">Acme Corp</dei:EntityRegistrantName>
|
||||
</xbrli:xbrl>
|
||||
`;
|
||||
|
||||
describe('xbrl instance parser', () => {
|
||||
it('parses contexts, units, numeric facts, dimensions, and concept keys', () => {
|
||||
const parsed = parseXbrlInstance(SAMPLE_XBRL, 'abc_htm.xml');
|
||||
|
||||
expect(parsed.contexts.c1?.periodStart).toBe('2025-01-01');
|
||||
expect(parsed.contexts.c1?.periodEnd).toBe('2025-12-31');
|
||||
expect(parsed.contexts.c2?.periodInstant).toBe('2025-12-31');
|
||||
expect(parsed.contexts.c2?.dimensions.length).toBe(1);
|
||||
expect(parsed.units.u1?.measure).toBe('iso4217:USD');
|
||||
|
||||
expect(parsed.facts.length).toBe(2);
|
||||
const revenueFact = parsed.facts.find((fact) => fact.localName === 'Revenues');
|
||||
const assetsFact = parsed.facts.find((fact) => fact.localName === 'Assets');
|
||||
|
||||
expect(revenueFact?.conceptKey).toBe('http://fasb.org/us-gaap/2024#Revenues');
|
||||
expect(revenueFact?.isDimensionless).toBe(true);
|
||||
expect(revenueFact?.value).toBe(1234);
|
||||
expect(revenueFact?.sourceFile).toBe('abc_htm.xml');
|
||||
|
||||
expect(assetsFact?.conceptKey).toBe('http://fasb.org/us-gaap/2024#Assets');
|
||||
expect(assetsFact?.isDimensionless).toBe(false);
|
||||
expect(assetsFact?.dimensions[0]).toEqual({
|
||||
axis: 'us-gaap:StatementBusinessSegmentsAxis',
|
||||
member: 'us-gaap:ConsolidatedGroupMember'
|
||||
});
|
||||
});
|
||||
});
|
||||
264
lib/server/taxonomy/xbrl-parser.ts
Normal file
264
lib/server/taxonomy/xbrl-parser.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import type { TaxonomyContext, TaxonomyFact, TaxonomyNamespaceMap, TaxonomyUnit } from '@/lib/server/taxonomy/types';
|
||||
|
||||
function decodeXmlEntities(value: string) {
|
||||
return value
|
||||
.replace(/&/gi, '&')
|
||||
.replace(/</gi, '<')
|
||||
.replace(/>/gi, '>')
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/gi, "'")
|
||||
.replace(/ | /gi, ' ')
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
|
||||
const parsed = Number.parseInt(hex, 16);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return ' ';
|
||||
}
|
||||
|
||||
try {
|
||||
return String.fromCodePoint(parsed);
|
||||
} catch {
|
||||
return ' ';
|
||||
}
|
||||
})
|
||||
.replace(/&#([0-9]+);/g, (_match, numeric) => {
|
||||
const parsed = Number.parseInt(numeric, 10);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return ' ';
|
||||
}
|
||||
|
||||
try {
|
||||
return String.fromCodePoint(parsed);
|
||||
} catch {
|
||||
return ' ';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function parseNumber(value: string) {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (/^--+$/.test(trimmed)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const negative = trimmed.startsWith('(') && trimmed.endsWith(')');
|
||||
const normalized = trimmed
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/[,$\s]/g, '')
|
||||
.replace(/[()]/g, '')
|
||||
.replace(/\u2212/g, '-');
|
||||
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsed = Number.parseFloat(normalized);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return negative ? -Math.abs(parsed) : parsed;
|
||||
}
|
||||
|
||||
function parseNamespaceMapFromDocument(raw: string): TaxonomyNamespaceMap {
|
||||
const map: TaxonomyNamespaceMap = {};
|
||||
const rootStart = raw.match(/<[^>]*xbrl[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
|
||||
|
||||
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
|
||||
const prefix = (match[1] ?? '').trim();
|
||||
const uri = (match[2] ?? '').trim();
|
||||
|
||||
if (!prefix || !uri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
map[prefix] = uri;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
function parseContexts(raw: string): Record<string, TaxonomyContext> {
|
||||
const contexts: Record<string, TaxonomyContext> = {};
|
||||
const contextPattern = /<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?context>/gi;
|
||||
|
||||
for (const match of raw.matchAll(contextPattern)) {
|
||||
const contextId = (match[1] ?? '').trim();
|
||||
const block = match[2] ?? '';
|
||||
if (!contextId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const periodStart = block.match(/<(?:[a-z0-9_\-]+:)?startDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?startDate>/i)?.[1]?.trim() ?? null;
|
||||
const periodEnd = block.match(/<(?:[a-z0-9_\-]+:)?endDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?endDate>/i)?.[1]?.trim() ?? null;
|
||||
const periodInstant = block.match(/<(?:[a-z0-9_\-]+:)?instant>([^<]+)<\/(?:[a-z0-9_\-]+:)?instant>/i)?.[1]?.trim() ?? null;
|
||||
|
||||
const dimensions: Array<{ axis: string; member: string }> = [];
|
||||
const dimPattern = /<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9_\-]+:)?explicitMember>/gi;
|
||||
for (const dimMatch of block.matchAll(dimPattern)) {
|
||||
const axis = decodeXmlEntities((dimMatch[1] ?? '').trim());
|
||||
const member = decodeXmlEntities((dimMatch[2] ?? '').trim());
|
||||
if (!axis || !member) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dimensions.push({ axis, member });
|
||||
}
|
||||
|
||||
contexts[contextId] = {
|
||||
id: contextId,
|
||||
periodStart,
|
||||
periodEnd,
|
||||
periodInstant,
|
||||
dimensions
|
||||
};
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
function parseUnits(raw: string): Record<string, TaxonomyUnit> {
|
||||
const units: Record<string, TaxonomyUnit> = {};
|
||||
const unitPattern = /<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?unit>/gi;
|
||||
|
||||
for (const match of raw.matchAll(unitPattern)) {
|
||||
const id = (match[1] ?? '').trim();
|
||||
const block = match[2] ?? '';
|
||||
if (!id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const measures = [...block.matchAll(/<(?:[a-z0-9_\-]+:)?measure>([^<]+)<\/(?:[a-z0-9_\-]+:)?measure>/gi)]
|
||||
.map((entry) => decodeXmlEntities((entry[1] ?? '').trim()))
|
||||
.filter(Boolean);
|
||||
|
||||
let measure: string | null = null;
|
||||
if (measures.length === 1) {
|
||||
measure = measures[0] ?? null;
|
||||
} else if (measures.length > 1) {
|
||||
measure = measures.join('/');
|
||||
}
|
||||
|
||||
units[id] = {
|
||||
id,
|
||||
measure
|
||||
};
|
||||
}
|
||||
|
||||
return units;
|
||||
}
|
||||
|
||||
function classifyStatementKind(localName: string): FinancialStatementKind | null {
|
||||
const normalized = localName.toLowerCase();
|
||||
|
||||
if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) {
|
||||
return 'cash_flow';
|
||||
}
|
||||
|
||||
if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) {
|
||||
return 'equity';
|
||||
}
|
||||
|
||||
if (/comprehensiveincome/.test(normalized)) {
|
||||
return 'comprehensive_income';
|
||||
}
|
||||
|
||||
if (/asset|liabilit|debt/.test(normalized)) {
|
||||
return 'balance';
|
||||
}
|
||||
|
||||
if (/revenue|income|profit|expense|costof/.test(normalized)) {
|
||||
return 'income';
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function isXbrlInfrastructurePrefix(prefix: string) {
|
||||
const normalized = prefix.toLowerCase();
|
||||
return normalized === 'xbrli'
|
||||
|| normalized === 'xlink'
|
||||
|| normalized === 'link'
|
||||
|| normalized === 'xbrldi'
|
||||
|| normalized === 'xbrldt';
|
||||
}
|
||||
|
||||
function localNameToKey(namespaceUri: string, localName: string) {
|
||||
return `${namespaceUri}#${localName}`;
|
||||
}
|
||||
|
||||
export function parseXbrlInstance(
|
||||
raw: string,
|
||||
sourceFile: string | null
|
||||
): {
|
||||
namespaces: TaxonomyNamespaceMap;
|
||||
contexts: Record<string, TaxonomyContext>;
|
||||
units: Record<string, TaxonomyUnit>;
|
||||
facts: TaxonomyFact[];
|
||||
} {
|
||||
const namespaces = parseNamespaceMapFromDocument(raw);
|
||||
const contexts = parseContexts(raw);
|
||||
const units = parseUnits(raw);
|
||||
const facts: TaxonomyFact[] = [];
|
||||
|
||||
const factPattern = /<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>([\s\S]*?)<\/\1:\2>/g;
|
||||
|
||||
for (const match of raw.matchAll(factPattern)) {
|
||||
const prefix = (match[1] ?? '').trim();
|
||||
const localName = (match[2] ?? '').trim();
|
||||
const attrs = match[3] ?? '';
|
||||
const body = decodeXmlEntities((match[4] ?? '').trim());
|
||||
|
||||
if (!prefix || !localName || isXbrlInfrastructurePrefix(prefix)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const contextId = attrs.match(/\bcontextRef=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!contextId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const value = parseNumber(body);
|
||||
if (value === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const unitRef = attrs.match(/\bunitRef=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
||||
const decimals = attrs.match(/\bdecimals=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
||||
|
||||
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
|
||||
const context = contexts[contextId];
|
||||
|
||||
facts.push({
|
||||
conceptKey: localNameToKey(namespaceUri, localName),
|
||||
qname: `${prefix}:${localName}`,
|
||||
namespaceUri,
|
||||
localName,
|
||||
contextId,
|
||||
unit: unitRef && units[unitRef]?.measure ? units[unitRef]?.measure ?? unitRef : unitRef,
|
||||
decimals,
|
||||
value,
|
||||
periodStart: context?.periodStart ?? null,
|
||||
periodEnd: context?.periodEnd ?? null,
|
||||
periodInstant: context?.periodInstant ?? null,
|
||||
dimensions: context?.dimensions ?? [],
|
||||
isDimensionless: (context?.dimensions.length ?? 0) === 0,
|
||||
sourceFile,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
namespaces,
|
||||
contexts,
|
||||
units,
|
||||
facts
|
||||
};
|
||||
}
|
||||
|
||||
export function conceptStatementFallback(localName: string) {
|
||||
return classifyStatementKind(localName);
|
||||
}
|
||||
Reference in New Issue
Block a user