186 lines
5.7 KiB
TypeScript
186 lines
5.7 KiB
TypeScript
import type { FinancialStatementKind } from '@/lib/types';
|
|
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
|
|
import { parseLabelLinkbase, parsePresentationLinkbase } from '@/lib/server/taxonomy/linkbase-parser';
|
|
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
|
|
import { materializeTaxonomyStatements } from '@/lib/server/taxonomy/materialize';
|
|
import { validateMetricsWithPdfLlm } from '@/lib/server/taxonomy/pdf-validation';
|
|
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
|
|
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
|
|
|
|
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
|
return {
|
|
income: factory(),
|
|
balance: factory(),
|
|
cash_flow: factory(),
|
|
equity: factory(),
|
|
comprehensive_income: factory()
|
|
};
|
|
}
|
|
|
|
function envUserAgent() {
|
|
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
|
}
|
|
|
|
async function fetchText(url: string, fetchImpl: typeof fetch) {
|
|
const response = await fetchImpl(url, {
|
|
headers: {
|
|
'User-Agent': envUserAgent(),
|
|
Accept: 'text/xml, text/plain, text/html;q=0.8, */*;q=0.5'
|
|
},
|
|
cache: 'no-store'
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`SEC request failed (${response.status})`);
|
|
}
|
|
|
|
return await response.text();
|
|
}
|
|
|
|
export async function hydrateFilingTaxonomySnapshot(
|
|
input: TaxonomyHydrationInput,
|
|
options?: {
|
|
fetchImpl?: typeof fetch;
|
|
}
|
|
): Promise<TaxonomyHydrationResult> {
|
|
const fetchImpl = options?.fetchImpl ?? fetch;
|
|
|
|
const discovered = await discoverFilingAssets({
|
|
cik: input.cik,
|
|
accessionNumber: input.accessionNumber,
|
|
filingUrl: input.filingUrl,
|
|
primaryDocument: input.primaryDocument,
|
|
fetchImpl
|
|
});
|
|
|
|
const emptyResult: TaxonomyHydrationResult = {
|
|
filing_id: input.filingId,
|
|
ticker: input.ticker.trim().toUpperCase(),
|
|
filing_date: input.filingDate,
|
|
filing_type: input.filingType,
|
|
parse_status: 'failed',
|
|
parse_error: 'No XBRL instance found',
|
|
source: 'legacy_html_fallback',
|
|
periods: [],
|
|
statement_rows: createStatementRecord(() => []),
|
|
derived_metrics: null,
|
|
validation_result: {
|
|
status: 'not_run',
|
|
checks: [],
|
|
validatedAt: null
|
|
},
|
|
facts_count: 0,
|
|
concepts_count: 0,
|
|
dimensions_count: 0,
|
|
assets: discovered.assets,
|
|
concepts: [],
|
|
facts: [],
|
|
metric_validations: []
|
|
};
|
|
|
|
const selectedInstance = discovered.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected)
|
|
?? discovered.assets.find((asset) => asset.asset_type === 'instance')
|
|
?? null;
|
|
|
|
if (!selectedInstance) {
|
|
return emptyResult;
|
|
}
|
|
|
|
let parseError: string | null = null;
|
|
let source: TaxonomyHydrationResult['source'] = 'xbrl_instance';
|
|
|
|
let instanceText = '';
|
|
try {
|
|
instanceText = await fetchText(selectedInstance.url, fetchImpl);
|
|
} catch (error) {
|
|
parseError = error instanceof Error ? error.message : 'Unable to fetch instance file';
|
|
return {
|
|
...emptyResult,
|
|
parse_error: parseError
|
|
};
|
|
}
|
|
|
|
const parsedInstance = parseXbrlInstance(instanceText, selectedInstance.name);
|
|
|
|
const labelByConcept = new Map<string, string>();
|
|
const presentation: ReturnType<typeof parsePresentationLinkbase> = [];
|
|
|
|
for (const asset of discovered.assets) {
|
|
if (!asset.is_selected) {
|
|
continue;
|
|
}
|
|
|
|
if (asset.asset_type !== 'presentation' && asset.asset_type !== 'label') {
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
const content = await fetchText(asset.url, fetchImpl);
|
|
if (asset.asset_type === 'presentation') {
|
|
const parsed = parsePresentationLinkbase(content);
|
|
if (parsed.length > 0) {
|
|
source = 'xbrl_instance_with_linkbase';
|
|
}
|
|
|
|
presentation.push(...parsed);
|
|
} else if (asset.asset_type === 'label') {
|
|
const parsed = parseLabelLinkbase(content);
|
|
for (const [conceptKey, label] of parsed.entries()) {
|
|
if (!labelByConcept.has(conceptKey)) {
|
|
labelByConcept.set(conceptKey, label);
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
parseError = parseError ?? (error instanceof Error ? error.message : 'Failed to parse taxonomy linkbase');
|
|
}
|
|
}
|
|
|
|
const materialized = materializeTaxonomyStatements({
|
|
filingId: input.filingId,
|
|
accessionNumber: input.accessionNumber,
|
|
filingDate: input.filingDate,
|
|
filingType: input.filingType,
|
|
facts: parsedInstance.facts,
|
|
presentation,
|
|
labelByConcept
|
|
});
|
|
|
|
const derivedMetrics = deriveTaxonomyMetrics(parsedInstance.facts);
|
|
const llmValidation = await validateMetricsWithPdfLlm({
|
|
metrics: derivedMetrics,
|
|
assets: discovered.assets,
|
|
fetchImpl
|
|
});
|
|
|
|
const hasRows = (Object.values(materialized.statement_rows).reduce((total, rows) => total + rows.length, 0)) > 0;
|
|
const hasFacts = materialized.facts.length > 0;
|
|
|
|
const parseStatus: TaxonomyHydrationResult['parse_status'] = hasRows && hasFacts
|
|
? 'ready'
|
|
: hasFacts
|
|
? 'partial'
|
|
: 'failed';
|
|
|
|
return {
|
|
filing_id: input.filingId,
|
|
ticker: input.ticker.trim().toUpperCase(),
|
|
filing_date: input.filingDate,
|
|
filing_type: input.filingType,
|
|
parse_status: parseStatus,
|
|
parse_error: parseStatus === 'failed' ? (parseError ?? 'No XBRL facts extracted') : parseError,
|
|
source,
|
|
periods: materialized.periods,
|
|
statement_rows: materialized.statement_rows,
|
|
derived_metrics: derivedMetrics,
|
|
validation_result: llmValidation.validation_result,
|
|
facts_count: materialized.facts.length,
|
|
concepts_count: materialized.concepts.length,
|
|
dimensions_count: materialized.dimensionsCount,
|
|
assets: discovered.assets,
|
|
concepts: materialized.concepts,
|
|
facts: materialized.facts,
|
|
metric_validations: llmValidation.metric_validations
|
|
};
|
|
}
|