import type { FinancialStatementKind } from '@/lib/types'; import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery'; import { parseLabelLinkbase, parsePresentationLinkbase } from '@/lib/server/taxonomy/linkbase-parser'; import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics'; import { materializeTaxonomyStatements } from '@/lib/server/taxonomy/materialize'; import { validateMetricsWithPdfLlm } from '@/lib/server/taxonomy/pdf-validation'; import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types'; import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser'; function createStatementRecord(factory: () => T): Record { return { income: factory(), balance: factory(), cash_flow: factory(), equity: factory(), comprehensive_income: factory() }; } function envUserAgent() { return process.env.SEC_USER_AGENT || 'Fiscal Clone '; } async function fetchText(url: string, fetchImpl: typeof fetch) { const response = await fetchImpl(url, { headers: { 'User-Agent': envUserAgent(), Accept: 'text/xml, text/plain, text/html;q=0.8, */*;q=0.5' }, cache: 'no-store' }); if (!response.ok) { throw new Error(`SEC request failed (${response.status})`); } return await response.text(); } export async function hydrateFilingTaxonomySnapshot( input: TaxonomyHydrationInput, options?: { fetchImpl?: typeof fetch; } ): Promise { const fetchImpl = options?.fetchImpl ?? fetch; const discovered = await discoverFilingAssets({ cik: input.cik, accessionNumber: input.accessionNumber, filingUrl: input.filingUrl, primaryDocument: input.primaryDocument, fetchImpl }); const emptyResult: TaxonomyHydrationResult = { filing_id: input.filingId, ticker: input.ticker.trim().toUpperCase(), filing_date: input.filingDate, filing_type: input.filingType, parse_status: 'failed', parse_error: 'No XBRL instance found', source: 'legacy_html_fallback', periods: [], statement_rows: createStatementRecord(() => []), derived_metrics: null, validation_result: { status: 'not_run', checks: [], validatedAt: null }, facts_count: 0, concepts_count: 0, dimensions_count: 0, assets: discovered.assets, concepts: [], facts: [], metric_validations: [] }; const selectedInstance = discovered.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected) ?? discovered.assets.find((asset) => asset.asset_type === 'instance') ?? null; if (!selectedInstance) { return emptyResult; } let parseError: string | null = null; let source: TaxonomyHydrationResult['source'] = 'xbrl_instance'; let instanceText = ''; try { instanceText = await fetchText(selectedInstance.url, fetchImpl); } catch (error) { parseError = error instanceof Error ? error.message : 'Unable to fetch instance file'; return { ...emptyResult, parse_error: parseError }; } const parsedInstance = parseXbrlInstance(instanceText, selectedInstance.name); const labelByConcept = new Map(); const presentation: ReturnType = []; for (const asset of discovered.assets) { if (!asset.is_selected) { continue; } if (asset.asset_type !== 'presentation' && asset.asset_type !== 'label') { continue; } try { const content = await fetchText(asset.url, fetchImpl); if (asset.asset_type === 'presentation') { const parsed = parsePresentationLinkbase(content); if (parsed.length > 0) { source = 'xbrl_instance_with_linkbase'; } presentation.push(...parsed); } else if (asset.asset_type === 'label') { const parsed = parseLabelLinkbase(content); for (const [conceptKey, label] of parsed.entries()) { if (!labelByConcept.has(conceptKey)) { labelByConcept.set(conceptKey, label); } } } } catch (error) { parseError = parseError ?? (error instanceof Error ? error.message : 'Failed to parse taxonomy linkbase'); } } const materialized = materializeTaxonomyStatements({ filingId: input.filingId, accessionNumber: input.accessionNumber, filingDate: input.filingDate, filingType: input.filingType, facts: parsedInstance.facts, presentation, labelByConcept }); const derivedMetrics = deriveTaxonomyMetrics(parsedInstance.facts); const llmValidation = await validateMetricsWithPdfLlm({ metrics: derivedMetrics, assets: discovered.assets, fetchImpl }); const hasRows = (Object.values(materialized.statement_rows).reduce((total, rows) => total + rows.length, 0)) > 0; const hasFacts = materialized.facts.length > 0; const parseStatus: TaxonomyHydrationResult['parse_status'] = hasRows && hasFacts ? 'ready' : hasFacts ? 'partial' : 'failed'; return { filing_id: input.filingId, ticker: input.ticker.trim().toUpperCase(), filing_date: input.filingDate, filing_type: input.filingType, parse_status: parseStatus, parse_error: parseStatus === 'failed' ? (parseError ?? 'No XBRL facts extracted') : parseError, source, periods: materialized.periods, statement_rows: materialized.statement_rows, derived_metrics: derivedMetrics, validation_result: llmValidation.validation_result, facts_count: materialized.facts.length, concepts_count: materialized.concepts.length, dimensions_count: materialized.dimensionsCount, assets: discovered.assets, concepts: materialized.concepts, facts: materialized.facts, metric_validations: llmValidation.metric_validations }; }