import type { Filing, FinancialStatementKind } from '@/lib/types'; import type { DimensionStatementBundle, DimensionStatementSnapshotRow, FilingFaithfulStatementSnapshotRow, FilingStatementBundle, FilingStatementSnapshotPeriod, StandardizedStatementBundle, StandardizedStatementSnapshotRow } from '@/lib/server/repos/filing-statements'; type FilingType = Filing['filing_type']; type FilingMetrics = NonNullable; type TickerDirectoryRecord = { cik_str: number; ticker: string; title: string; }; type RecentFilingsPayload = { filings?: { recent?: { accessionNumber?: string[]; filingDate?: string[]; form?: string[]; primaryDocument?: string[]; }; }; cik?: string; name?: string; }; type CompanyFactsPayload = { facts?: { 'us-gaap'?: Record }>; }; }; type CompanyFactPoint = { val?: number; end?: string; filed?: string; accn?: string; form?: string; fy?: number; fp?: string; frame?: string; }; type SecFiling = { ticker: string; cik: string; companyName: string; filingType: FilingType; filingDate: string; accessionNumber: string; filingUrl: string | null; submissionUrl: string | null; primaryDocument: string | null; }; type FilingDocumentInput = { filingUrl: string | null; cik: string; accessionNumber: string; primaryDocument: string | null; }; type FetchPrimaryFilingTextOptions = { fetchImpl?: typeof fetch; maxChars?: number; }; export type FilingDocumentText = { source: 'primary_document'; url: string; text: string; truncated: boolean; }; type FilingMetricsLookupInput = { accessionNumber: string; filingDate: string; filingType: FilingType; }; const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K']; const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12; const FILING_TEXT_MAX_CHARS = 24_000; const METRIC_TAGS = { revenue: [ 'Revenues', 'SalesRevenueNet', 'RevenueFromContractWithCustomerExcludingAssessedTax', 'TotalRevenuesAndOtherIncome' ], netIncome: ['NetIncomeLoss', 'ProfitLoss'], totalAssets: ['Assets'], cash: [ 'CashAndCashEquivalentsAtCarryingValue', 'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents' ], debt: [ 'LongTermDebtAndCapitalLeaseObligations', 'LongTermDebtNoncurrent', 'LongTermDebt', 'DebtAndFinanceLeaseLiabilities' ] } as const; let tickerCache = new Map(); let tickerCacheLoadedAt = 0; function envUserAgent() { return process.env.SEC_USER_AGENT || 'Fiscal Clone '; } function todayIso() { return new Date().toISOString().slice(0, 10); } function decodeHtmlEntities(value: string) { const decodeCodePoint = (code: number) => { if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) { return ' '; } try { return String.fromCodePoint(code); } catch { return ' '; } }; return value .replace(/ | /gi, ' ') .replace(/&/gi, '&') .replace(/</gi, '<') .replace(/>/gi, '>') .replace(/"/gi, '"') .replace(/'/gi, '\'') .replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => { const code = Number.parseInt(rawCode, 16); return decodeCodePoint(code); }) .replace(/&#([0-9]+);/g, (_match, rawCode: string) => { const code = Number.parseInt(rawCode, 10); return decodeCodePoint(code); }); } export function normalizeSecDocumentText(raw: string) { return decodeHtmlEntities( raw .replace(/\r/g, '\n') .replace(//gi, ' ') .replace(//gi, ' ') .replace(//gi, ' ') .replace(//g, ' ') .replace(/<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi, '\n') .replace(/<[^>]+>/g, ' ') ) .replace(/[ \t]+\n/g, '\n') .replace(/\n[ \t]+/g, '\n') .replace(/[ \t]{2,}/g, ' ') .replace(/\n{3,}/g, '\n\n') .trim(); } export function trimSecDocumentTextForPrompt(text: string, maxChars = FILING_TEXT_MAX_CHARS) { const safeMax = Math.max(Math.trunc(maxChars), 1_000); if (text.length <= safeMax) { return { text, truncated: false }; } const slice = text.slice(0, safeMax); const newlineBoundary = slice.lastIndexOf('\n'); const wordBoundary = slice.lastIndexOf(' '); const boundary = Math.max(newlineBoundary, wordBoundary); const clipped = (boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice).trimEnd(); return { text: clipped, truncated: true }; } function compactAccessionNumber(value: string) { return value.replace(/-/g, ''); } function normalizeAccessionKey(value: string | undefined | null) { return (value ?? '').replace(/\D/g, ''); } function normalizeForm(value: string | undefined | null) { const normalized = (value ?? '').trim().toUpperCase(); if (!normalized) { return ''; } return normalized.endsWith('/A') ? normalized.slice(0, -2) : normalized; } function parseDate(value: string | undefined | null) { if (!value) { return Number.NaN; } return Date.parse(value); } function normalizeCikForPath(value: string) { const digits = value.replace(/\D/g, ''); if (!digits) { return null; } const numeric = Number(digits); if (!Number.isFinite(numeric)) { return null; } return String(numeric); } export function resolvePrimaryFilingUrl(input: FilingDocumentInput) { const directUrl = input.filingUrl?.trim(); if (directUrl) { return directUrl; } if (!input.primaryDocument) { return null; } const cikPath = normalizeCikForPath(input.cik); const accessionPath = compactAccessionNumber(input.accessionNumber); if (!cikPath || !accessionPath) { return null; } return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`; } export async function fetchPrimaryFilingText( input: FilingDocumentInput, options?: FetchPrimaryFilingTextOptions ): Promise { const url = resolvePrimaryFilingUrl(input); if (!url) { return null; } const doFetch = options?.fetchImpl ?? fetch; const response = await doFetch(url, { headers: { 'User-Agent': envUserAgent(), Accept: 'text/html, text/plain;q=0.9, */*;q=0.8' }, cache: 'no-store' }); if (!response.ok) { throw new Error(`SEC filing request failed (${response.status})`); } const raw = await response.text(); const normalized = normalizeSecDocumentText(raw); if (!normalized) { return null; } const clipped = trimSecDocumentTextForPrompt(normalized, options?.maxChars ?? FILING_TEXT_MAX_CHARS); if (!clipped.text) { return null; } return { source: 'primary_document', url, text: clipped.text, truncated: clipped.truncated }; } async function fetchJson(url: string): Promise { const response = await fetch(url, { headers: { 'User-Agent': envUserAgent(), Accept: 'application/json' }, cache: 'no-store' }); if (!response.ok) { throw new Error(`SEC request failed (${response.status})`); } return await response.json() as T; } async function ensureTickerCache() { const isFresh = Date.now() - tickerCacheLoadedAt < TICKER_CACHE_TTL_MS; if (isFresh && tickerCache.size > 0) { return; } const payload = await fetchJson>('https://www.sec.gov/files/company_tickers.json'); const next = new Map(); for (const record of Object.values(payload)) { next.set(record.ticker.toUpperCase(), record); } tickerCache = next; tickerCacheLoadedAt = Date.now(); } async function resolveTicker(ticker: string) { await ensureTickerCache(); const normalized = ticker.trim().toUpperCase(); const record = tickerCache.get(normalized); if (!record) { throw new Error(`Ticker ${normalized} not found in SEC directory`); } return { ticker: normalized, cik: String(record.cik_str), companyName: record.title }; } function pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null { return pickFactForFiling(payload, tag, { accessionNumber: '', filingDate: '', filingType: '10-Q' }); } function collectFactSeries(payload: CompanyFactsPayload, tag: string): CompanyFactPoint[] { const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units; if (!unitCollections) { return []; } const usdSeries: CompanyFactPoint[] = []; const fallbackSeries: CompanyFactPoint[] = []; for (const [unit, series] of Object.entries(unitCollections)) { if (!Array.isArray(series) || series.length === 0) { continue; } if (unit === 'USD' || /^USD(?!\/shares)/i.test(unit)) { usdSeries.push(...series); continue; } fallbackSeries.push(...series); } const points = usdSeries.length > 0 ? usdSeries : fallbackSeries; return points.filter((point) => typeof point.val === 'number' && Number.isFinite(point.val)); } function pickMostRecentFact(points: CompanyFactPoint[]) { return [...points].sort((a, b) => { const aDate = parseDate(a.filed ?? a.end); const bDate = parseDate(b.filed ?? b.end); if (Number.isFinite(aDate) && Number.isFinite(bDate)) { return bDate - aDate; } if (Number.isFinite(bDate)) { return 1; } if (Number.isFinite(aDate)) { return -1; } return 0; })[0] ?? null; } function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) { if (points.length === 0) { return null; } if (!Number.isFinite(targetDate)) { return pickMostRecentFact(points); } const dated = points .map((point) => ({ point, date: parseDate(point.filed ?? point.end) })) .filter((entry) => Number.isFinite(entry.date)); if (dated.length === 0) { return pickMostRecentFact(points); } const beforeTarget = dated.filter((entry) => entry.date <= targetDate); if (beforeTarget.length > 0) { return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null; } return dated.sort((a, b) => { const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate); if (distance !== 0) { return distance; } return b.date - a.date; })[0]?.point ?? null; } function pickFactForFiling( payload: CompanyFactsPayload, tag: string, filing: FilingMetricsLookupInput ): number | null { const points = collectFactSeries(payload, tag); if (points.length === 0) { return null; } const accessionKey = normalizeAccessionKey(filing.accessionNumber); if (accessionKey) { const byAccession = points.filter((point) => normalizeAccessionKey(point.accn) === accessionKey); if (byAccession.length > 0) { const matched = pickMostRecentFact(byAccession); if (typeof matched?.val === 'number' && Number.isFinite(matched.val)) { return matched.val; } } } const filingForm = normalizeForm(filing.filingType); const byForm = filingForm ? points.filter((point) => normalizeForm(point.form) === filingForm) : points; const targetDate = parseDate(filing.filingDate); const bestByForm = pickClosestByDate(byForm, targetDate); if (typeof bestByForm?.val === 'number' && Number.isFinite(bestByForm.val)) { return bestByForm.val; } const bestAny = pickClosestByDate(points, targetDate); return typeof bestAny?.val === 'number' && Number.isFinite(bestAny.val) ? bestAny.val : null; } function pickFactByTags( payload: CompanyFactsPayload, tags: readonly string[], filing: FilingMetricsLookupInput ) { for (const tag of tags) { const value = pickFactForFiling(payload, tag, filing); if (value !== null) { return value; } } return null; } function emptyMetrics(): FilingMetrics { return { revenue: null, netIncome: null, totalAssets: null, cash: null, debt: null }; } export async function fetchRecentFilings(ticker: string, limit = 20): Promise { const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50); const company = await resolveTicker(ticker); const cikPadded = company.cik.padStart(10, '0'); const payload = await fetchJson(`https://data.sec.gov/submissions/CIK${cikPadded}.json`); const recent = payload.filings?.recent; const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`; if (!recent) { return []; } const forms = recent.form ?? []; const accessionNumbers = recent.accessionNumber ?? []; const filingDates = recent.filingDate ?? []; const primaryDocuments = recent.primaryDocument ?? []; const filings: SecFiling[] = []; for (let i = 0; i < forms.length; i += 1) { const normalizedForm = normalizeForm(forms[i]) as FilingType; if (!SUPPORTED_FORMS.includes(normalizedForm)) { continue; } const accessionNumber = accessionNumbers[i]; if (!accessionNumber) { continue; } const compactAccession = accessionNumber.replace(/-/g, ''); const documentName = primaryDocuments[i]; const filingUrl = documentName ? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}` : null; filings.push({ ticker: company.ticker, cik: company.cik, companyName: payload.name ?? company.companyName, filingType: normalizedForm, filingDate: filingDates[i] ?? todayIso(), accessionNumber, filingUrl, submissionUrl, primaryDocument: documentName ?? null }); if (filings.length >= safeLimit) { break; } } return filings; } export async function fetchLatestFilingMetrics(cik: string) { const normalized = cik.padStart(10, '0'); const payload = await fetchJson(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`); return { revenue: pickLatestFact(payload, 'Revenues'), netIncome: pickLatestFact(payload, 'NetIncomeLoss'), totalAssets: pickLatestFact(payload, 'Assets'), cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'), debt: pickLatestFact(payload, 'LongTermDebt') } satisfies FilingMetrics; } export async function fetchFilingMetricsForFilings( cik: string, _ticker: string, filings: FilingMetricsLookupInput[] ) { const metricsByAccession = new Map(); if (filings.length === 0) { return metricsByAccession; } try { const normalized = cik.padStart(10, '0'); const payload = await fetchJson(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`); for (const filing of filings) { metricsByAccession.set(filing.accessionNumber, { revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing), netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing), totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing), cash: pickFactByTags(payload, METRIC_TAGS.cash, filing), debt: pickFactByTags(payload, METRIC_TAGS.debt, filing) }); } return metricsByAccession; } catch { for (const filing of filings) { metricsByAccession.set(filing.accessionNumber, emptyMetrics()); } return metricsByAccession; } } type FilingStatementHydrationInput = { filingId: number; ticker: string; cik: string; accessionNumber: string; filingDate: string; filingType: '10-K' | '10-Q'; filingUrl: string | null; primaryDocument: string | null; metrics: Filing['metrics']; }; type FilingStatementHydrationResult = { filing_id: number; ticker: string; filing_date: string; filing_type: '10-K' | '10-Q'; period_end: string | null; statement_bundle: FilingStatementBundle | null; standardized_bundle: StandardizedStatementBundle | null; dimension_bundle: DimensionStatementBundle | null; parse_status: 'ready' | 'partial' | 'failed'; parse_error: string | null; source: 'sec_filing_summary' | 'xbrl_instance' | 'companyfacts_fallback'; }; type StatementReportDescriptor = { shortName: string; longName: string; htmlFileName: string | null; xmlFileName: string | null; }; type StatementParseRow = { key: string; label: string; concept: string | null; order: number; depth: number; isSubtotal: boolean; value: number | null; }; type DimensionContext = { endDate: string | null; dimensions: Array<{ axis: string; member: string }>; }; type CanonicalRowDefinition = { key: string; label: string; category: string; conceptPatterns: RegExp[]; labelPatterns: RegExp[]; }; const FINANCIAL_STATEMENT_KINDS: FinancialStatementKind[] = [ 'income', 'balance', 'cash_flow', 'equity', 'comprehensive_income' ]; const STATEMENT_REPORT_PATTERNS: Record = { income: [ /\bstatements?\s+of\s+operations?\b/i, /\bstatements?\s+of\s+income\b/i, /\bincome\s+statement/i ], balance: [ /\bbalance\s+sheets?\b/i, /\bstatement\s+of\s+financial\s+position\b/i ], cash_flow: [ /\bstatements?\s+of\s+cash\s+flows?\b/i, /\bcash\s+flows?\b/i ], equity: [ /\bstatements?\s+of\s+(stockholders|shareholders)['’]?\s+equity\b/i, /\bchanges\s+in\s+equity\b/i ], comprehensive_income: [ /\bstatements?\s+of\s+comprehensive\s+income\b/i, /\bcomprehensive\s+income\b/i ] }; const STANDARDIZED_ROW_DEFINITIONS: Record = { income: [ { key: 'revenue', label: 'Revenue', category: 'core', conceptPatterns: [/revenue/i, /salesrevenuenet/i], labelPatterns: [/\brevenue\b/i, /\bsales\b/i] }, { key: 'cost-of-revenue', label: 'Cost of Revenue', category: 'core', conceptPatterns: [/costofrevenue/i, /costofgoods/i], labelPatterns: [/\bcost of revenue\b/i, /\bcost of sales\b/i] }, { key: 'gross-profit', label: 'Gross Profit', category: 'core', conceptPatterns: [/grossprofit/i], labelPatterns: [/\bgross profit\b/i] }, { key: 'operating-income', label: 'Operating Income', category: 'core', conceptPatterns: [/operatingincome/i, /incomefromoperations/i], labelPatterns: [/\boperating income\b/i, /\bincome from operations\b/i] }, { key: 'net-income', label: 'Net Income', category: 'core', conceptPatterns: [/netincomeloss/i, /profitloss/i], labelPatterns: [/\bnet income\b/i, /\bnet earnings\b/i] } ], balance: [ { key: 'total-assets', label: 'Total Assets', category: 'core', conceptPatterns: [/^assets$/i], labelPatterns: [/\btotal assets\b/i] }, { key: 'total-liabilities', label: 'Total Liabilities', category: 'core', conceptPatterns: [/liabilities/i], labelPatterns: [/\btotal liabilities\b/i] }, { key: 'stockholders-equity', label: 'Stockholders Equity', category: 'core', conceptPatterns: [/stockholdersequity/i, /shareholdersequity/i, /equity/i], labelPatterns: [/\bequity\b/i] }, { key: 'cash-and-equivalents', label: 'Cash and Equivalents', category: 'liquidity', conceptPatterns: [/cashandcashequivalents/i, /cashandequivalents/i], labelPatterns: [/\bcash\b/i, /\bcash equivalents\b/i] }, { key: 'total-debt', label: 'Total Debt', category: 'leverage', conceptPatterns: [/longtermdebt/i, /debt/i, /borrowings/i], labelPatterns: [/\btotal debt\b/i, /\blong-term debt\b/i, /\bdebt\b/i] } ], cash_flow: [ { key: 'net-cash-operating', label: 'Net Cash from Operating Activities', category: 'core', conceptPatterns: [/netcashprovidedbyusedinoperatingactivities/i, /netcashfromoperatingactivities/i], labelPatterns: [/\boperating activities\b/i] }, { key: 'net-cash-investing', label: 'Net Cash from Investing Activities', category: 'core', conceptPatterns: [/netcashprovidedbyusedininvestingactivities/i], labelPatterns: [/\binvesting activities\b/i] }, { key: 'net-cash-financing', label: 'Net Cash from Financing Activities', category: 'core', conceptPatterns: [/netcashprovidedbyusedinfinancingactivities/i], labelPatterns: [/\bfinancing activities\b/i] }, { key: 'net-change-cash', label: 'Net Change in Cash', category: 'core', conceptPatterns: [/cashandcashequivalentsperiodincrease/i, /increase.*cash/i], labelPatterns: [/\bnet change\b/i, /\bincrease.*cash\b/i] } ], equity: [ { key: 'equity-balance', label: 'Total Equity', category: 'core', conceptPatterns: [/stockholdersequity/i, /shareholdersequity/i, /equity/i], labelPatterns: [/\btotal equity\b/i, /\bequity\b/i] }, { key: 'retained-earnings', label: 'Retained Earnings', category: 'core', conceptPatterns: [/retainedearnings/i], labelPatterns: [/\bretained earnings\b/i] } ], comprehensive_income: [ { key: 'comprehensive-income', label: 'Comprehensive Income', category: 'core', conceptPatterns: [/comprehensiveincome/i], labelPatterns: [/\bcomprehensive income\b/i] }, { key: 'other-comprehensive-income', label: 'Other Comprehensive Income', category: 'core', conceptPatterns: [/othercomprehensiveincome/i], labelPatterns: [/\bother comprehensive income\b/i] } ] }; function createStatementRecord(factory: () => T): Record { return FINANCIAL_STATEMENT_KINDS.reduce((acc, kind) => { acc[kind] = factory(); return acc; }, {} as Record); } function statementKindLabel(kind: FinancialStatementKind) { switch (kind) { case 'income': return 'Income Statement'; case 'balance': return 'Balance Sheet'; case 'cash_flow': return 'Cash Flow Statement'; case 'equity': return 'Statement of Equity'; case 'comprehensive_income': return 'Comprehensive Income'; default: return kind; } } function resolveFilingDirectoryUrl(input: { filingUrl: string | null; cik: string; accessionNumber: string; }) { const direct = input.filingUrl?.trim(); if (direct) { const lastSlash = direct.lastIndexOf('/'); if (lastSlash > 'https://'.length) { return direct.slice(0, lastSlash + 1); } } const cikPath = normalizeCikForPath(input.cik); const accessionPath = compactAccessionNumber(input.accessionNumber); if (!cikPath || !accessionPath) { return null; } return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`; } function toAbsoluteArchiveUrl(baseUrl: string, relativePath: string | null) { const normalized = (relativePath ?? '').trim(); if (!normalized) { return null; } if (/^https?:\/\//i.test(normalized)) { return normalized; } return `${baseUrl}${normalized.replace(/^\/+/, '')}`; } async function fetchText(url: string, fetchImpl: typeof fetch) { const response = await fetchImpl(url, { headers: { 'User-Agent': envUserAgent(), Accept: 'text/xml, text/html, text/plain;q=0.9, */*;q=0.8' }, cache: 'no-store' }); if (!response.ok) { throw new Error(`SEC request failed (${response.status})`); } return await response.text(); } function xmlTextValue(block: string, tagName: string) { const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const pattern = new RegExp(`<${escaped}>([\\s\\S]*?)<\\/${escaped}>`, 'i'); const match = block.match(pattern); if (!match) { return ''; } return decodeHtmlEntities(match[1] ?? '').trim(); } function parseFilingSummaryReports(xml: string) { const reports: StatementReportDescriptor[] = []; const reportPattern = /([\s\S]*?)<\/Report>/gi; for (const match of xml.matchAll(reportPattern)) { const block = match[1] ?? ''; reports.push({ shortName: xmlTextValue(block, 'ShortName'), longName: xmlTextValue(block, 'LongName'), htmlFileName: xmlTextValue(block, 'HtmlFileName') || null, xmlFileName: xmlTextValue(block, 'XmlFileName') || null }); } return reports; } function scoreReport(kind: FinancialStatementKind, report: StatementReportDescriptor) { const haystack = `${report.shortName} ${report.longName}`.trim(); if (!haystack) { return 0; } let score = 0; for (const pattern of STATEMENT_REPORT_PATTERNS[kind]) { if (pattern.test(haystack)) { score += 2; } } if (/\bparenthetical\b/i.test(haystack) || /\bdetail\b/i.test(haystack)) { score -= 1; } return score; } function chooseStatementReport(kind: FinancialStatementKind, reports: StatementReportDescriptor[]) { let best: StatementReportDescriptor | null = null; let bestScore = 0; for (const report of reports) { const score = scoreReport(kind, report); if (score > bestScore) { best = report; bestScore = score; } } return bestScore > 0 ? best : null; } function sanitizeCellText(raw: string) { return decodeHtmlEntities( raw .replace(//gi, '\n') .replace(/<[^>]+>/g, ' ') ) .replace(/[ \t]+/g, ' ') .replace(/\n+/g, ' ') .trim(); } function extractConceptFromMarkup(markup: string) { const defref = markup.match(/defref[_:-]([a-z0-9_:.:-]+)/i); if (defref?.[1]) { return defref[1].replace(/_/g, ':'); } const nameAttr = markup.match(/\bname=[\"']([a-z0-9_:.:-]+)[\"']/i); if (nameAttr?.[1]) { return nameAttr[1]; } return null; } function parseIndentDepth(attrs: string) { const style = attrs.match(/\bstyle=[\"']([^\"']+)[\"']/i)?.[1] ?? ''; const padding = style.match(/padding-left:\s*([0-9.]+)px/i)?.[1]; if (padding) { const numeric = Number.parseFloat(padding); if (Number.isFinite(numeric) && numeric > 0) { return Math.max(0, Math.round(numeric / 12)); } } const margin = style.match(/margin-left:\s*([0-9.]+)px/i)?.[1]; if (margin) { const numeric = Number.parseFloat(margin); if (Number.isFinite(numeric) && numeric > 0) { return Math.max(0, Math.round(numeric / 12)); } } return 0; } function parseStatementNumber(raw: string): number | null { const trimmed = raw.trim(); if (!trimmed || /^n\/a$/i.test(trimmed) || /^--+$/.test(trimmed)) { return null; } if (/%$/.test(trimmed)) { return null; } const negative = trimmed.startsWith('(') && trimmed.endsWith(')'); const cleaned = trimmed .replace(/[$,\s]/g, '') .replace(/[()]/g, '') .replace(/\u2212/g, '-'); const value = Number.parseFloat(cleaned); if (!Number.isFinite(value)) { return null; } return negative ? -Math.abs(value) : value; } function slug(value: string) { return value .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, ''); } function parseStatementRowsFromReport(content: string): StatementParseRow[] { const tables = [...content.matchAll(/]*>([\s\S]*?)<\/table>/gi)]; if (tables.length === 0) { return []; } let bestRows: StatementParseRow[] = []; for (const tableMatch of tables) { const table = tableMatch[0] ?? ''; const rows: StatementParseRow[] = []; let order = 0; for (const rowMatch of table.matchAll(/]*>([\s\S]*?)<\/tr>/gi)) { const rowMarkup = rowMatch[0] ?? ''; const cells = [...rowMarkup.matchAll(/]*)>([\s\S]*?)<\/t[dh]>/gi)]; if (cells.length < 2) { continue; } const labelCell = cells[0]; const labelAttrs = labelCell?.[1] ?? ''; const labelRaw = labelCell?.[2] ?? ''; const label = sanitizeCellText(labelRaw); if (!label || /^(years ended|assets|liabilities|equity)$/i.test(label)) { continue; } let value: number | null = null; for (let i = 1; i < cells.length; i += 1) { const text = sanitizeCellText(cells[i]?.[2] ?? ''); const parsed = parseStatementNumber(text); if (parsed !== null) { value = parsed; break; } } if (value === null) { continue; } order += 1; const concept = extractConceptFromMarkup(rowMarkup); rows.push({ key: concept ? slug(concept) : `${slug(label)}-${order}`, label, concept, order, depth: parseIndentDepth(labelAttrs), isSubtotal: /^total\b/i.test(label) || /\bsubtotal\b/i.test(label), value }); } if (rows.length > bestRows.length) { bestRows = rows; } } return bestRows; } function toSnapshotRows(periodId: string, rows: StatementParseRow[]): FilingFaithfulStatementSnapshotRow[] { return rows.map((row) => ({ key: row.key, label: row.label, concept: row.concept, order: row.order, depth: row.depth, isSubtotal: row.isSubtotal, values: { [periodId]: row.value } })); } function matchStandardizedDefinition( row: FilingFaithfulStatementSnapshotRow, definition: CanonicalRowDefinition ) { const concept = row.concept ?? ''; return definition.conceptPatterns.some((pattern) => pattern.test(concept)) || definition.labelPatterns.some((pattern) => pattern.test(row.label)); } function fallbackMetricValue( kind: FinancialStatementKind, rowKey: string, metrics: Filing['metrics'] ) { if (!metrics) { return null; } if (kind === 'income' && rowKey === 'revenue') { return metrics.revenue ?? null; } if (kind === 'income' && rowKey === 'net-income') { return metrics.netIncome ?? null; } if (kind === 'balance' && rowKey === 'total-assets') { return metrics.totalAssets ?? null; } if (kind === 'balance' && rowKey === 'cash-and-equivalents') { return metrics.cash ?? null; } if (kind === 'balance' && rowKey === 'total-debt') { return metrics.debt ?? null; } return null; } function toStandardizedRows( kind: FinancialStatementKind, periodId: string, rows: FilingFaithfulStatementSnapshotRow[], metrics: Filing['metrics'] ): StandardizedStatementSnapshotRow[] { const definitions = STANDARDIZED_ROW_DEFINITIONS[kind]; const normalizedRows = [...rows]; const usedKeys = new Set(); const standardizedRows: StandardizedStatementSnapshotRow[] = []; for (const definition of definitions) { const matched = normalizedRows.find((row) => !usedKeys.has(row.key) && matchStandardizedDefinition(row, definition)); const matchedValue = matched?.values[periodId] ?? null; const fallbackValue = matchedValue === null ? fallbackMetricValue(kind, definition.key, metrics) : null; if (matched) { usedKeys.add(matched.key); } standardizedRows.push({ key: definition.key, label: definition.label, concept: matched?.concept ?? definition.key, category: definition.category, sourceConcepts: matched?.concept ? [matched.concept] : [], values: { [periodId]: matchedValue ?? fallbackValue } }); } for (const row of normalizedRows) { if (usedKeys.has(row.key)) { continue; } standardizedRows.push({ key: `other-${row.key}`, label: row.label, concept: row.concept ?? row.key, category: 'other', sourceConcepts: row.concept ? [row.concept] : [], values: { [periodId]: row.values[periodId] ?? null } }); } return standardizedRows; } function parseContextsWithDimensions(raw: string) { const contexts = new Map(); const contextPattern = /<(?:[a-z0-9]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9]+:)?context>/gi; for (const match of raw.matchAll(contextPattern)) { const contextId = match[1] ?? ''; const block = match[2] ?? ''; if (!contextId) { continue; } const endDate = block.match(/<(?:[a-z0-9]+:)?endDate>([^<]+)<\/(?:[a-z0-9]+:)?endDate>/i)?.[1]?.trim() ?? null; const dimensions: Array<{ axis: string; member: string }> = []; const dimPattern = /<(?:[a-z0-9]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9]+:)?explicitMember>/gi; for (const dimMatch of block.matchAll(dimPattern)) { const axis = (dimMatch[1] ?? '').trim(); const member = (dimMatch[2] ?? '').trim(); if (!axis || !member) { continue; } dimensions.push({ axis, member }); } if (dimensions.length === 0) { continue; } contexts.set(contextId, { endDate, dimensions }); } return contexts; } function statementKindFromConcept(concept: string): FinancialStatementKind | null { const normalized = concept.toLowerCase(); if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) { return 'cash_flow'; } if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) { return 'equity'; } if (/comprehensiveincome/.test(normalized)) { return 'comprehensive_income'; } if (/asset|liabilit|debt/.test(normalized)) { return 'balance'; } if (/revenue|income|profit|expense|costof/.test(normalized)) { return 'income'; } return null; } function parseDimensionFacts( raw: string, fallbackPeriodId: string ) { const contexts = parseContextsWithDimensions(raw); if (contexts.size === 0) { return createStatementRecord(() => []); } const rows = createStatementRecord(() => []); const ixPattern = /]*)>([\s\S]*?)<\/ix:nonfraction>/gi; let guard = 0; for (const match of raw.matchAll(ixPattern)) { guard += 1; if (guard > 8_000) { break; } const attrs = match[1] ?? ''; const body = sanitizeCellText(match[2] ?? ''); const contextRef = attrs.match(/\bcontextref=["']([^"']+)["']/i)?.[1] ?? ''; const concept = attrs.match(/\bname=["']([^"']+)["']/i)?.[1] ?? ''; const unit = attrs.match(/\bunitref=["']([^"']+)["']/i)?.[1] ?? null; if (!contextRef || !concept) { continue; } const context = contexts.get(contextRef); if (!context || context.dimensions.length === 0) { continue; } const kind = statementKindFromConcept(concept); if (!kind) { continue; } const value = parseStatementNumber(body); if (value === null) { continue; } const periodId = context.endDate ?? fallbackPeriodId; const rowKey = slug(concept); for (const dimension of context.dimensions) { rows[kind].push({ rowKey, concept, periodId, axis: dimension.axis, member: dimension.member, value, unit }); } } return rows; } function markHasDimensions( rows: T[], dimensions: DimensionStatementSnapshotRow[] ) { const dimensionConcepts = new Set(dimensions.map((item) => item.concept?.toLowerCase() ?? '').filter(Boolean)); const dimensionRowKeys = new Set(dimensions.map((item) => item.rowKey)); return rows.map((row) => { const concept = row.concept?.toLowerCase() ?? ''; const hasDimensions = dimensionRowKeys.has(row.key) || (concept ? dimensionConcepts.has(concept) : false); return { ...row, hasDimensions }; }); } function emptyStatementBundle(period: FilingStatementSnapshotPeriod): FilingStatementBundle { return { periods: [period], statements: createStatementRecord(() => []) }; } function emptyStandardizedBundle(period: FilingStatementSnapshotPeriod): StandardizedStatementBundle { return { periods: [period], statements: createStatementRecord(() => []) }; } function emptyDimensionBundle(): DimensionStatementBundle { return { statements: createStatementRecord(() => []) }; } export async function hydrateFilingStatementSnapshot( input: FilingStatementHydrationInput, options?: { fetchImpl?: typeof fetch; } ): Promise { const periodId = `${input.filingDate}-${compactAccessionNumber(input.accessionNumber)}`; const period: FilingStatementSnapshotPeriod = { id: periodId, filingId: input.filingId, accessionNumber: input.accessionNumber, filingDate: input.filingDate, periodStart: null, periodEnd: input.filingDate, filingType: input.filingType, periodLabel: input.filingType === '10-Q' ? 'Quarter End' : 'Fiscal Year End' }; const fetchImpl = options?.fetchImpl ?? fetch; const statementBundle = emptyStatementBundle(period); const standardizedBundle = emptyStandardizedBundle(period); const dimensionBundle = emptyDimensionBundle(); let source: FilingStatementHydrationResult['source'] = 'companyfacts_fallback'; let parseError: string | null = null; try { const filingDirectory = resolveFilingDirectoryUrl({ filingUrl: input.filingUrl, cik: input.cik, accessionNumber: input.accessionNumber }); if (filingDirectory) { const summaryXml = await fetchText(`${filingDirectory}FilingSummary.xml`, fetchImpl); const reports = parseFilingSummaryReports(summaryXml); for (const kind of FINANCIAL_STATEMENT_KINDS) { const report = chooseStatementReport(kind, reports); if (!report) { continue; } const reportUrl = toAbsoluteArchiveUrl(filingDirectory, report.htmlFileName ?? report.xmlFileName); if (!reportUrl) { continue; } try { const reportText = await fetchText(reportUrl, fetchImpl); const parsedRows = parseStatementRowsFromReport(reportText); if (parsedRows.length === 0) { continue; } source = 'sec_filing_summary'; statementBundle.statements[kind] = toSnapshotRows(periodId, parsedRows); } catch { // Continue to other statements when one report fails. } } } } catch (error) { parseError = error instanceof Error ? error.message : 'Failed to parse filing summary'; } try { const primaryUrl = resolvePrimaryFilingUrl({ filingUrl: input.filingUrl, cik: input.cik, accessionNumber: input.accessionNumber, primaryDocument: input.primaryDocument }); if (primaryUrl) { const rawDocument = await fetchText(primaryUrl, fetchImpl); const dimensions = parseDimensionFacts(rawDocument, periodId); for (const kind of FINANCIAL_STATEMENT_KINDS) { dimensionBundle.statements[kind] = dimensions[kind]; } const hasAnyDimensions = FINANCIAL_STATEMENT_KINDS.some((kind) => dimensionBundle.statements[kind].length > 0); if (hasAnyDimensions && source === 'companyfacts_fallback') { source = 'xbrl_instance'; } } } catch (error) { if (!parseError) { parseError = error instanceof Error ? error.message : 'Failed to parse inline XBRL dimensions'; } } for (const kind of FINANCIAL_STATEMENT_KINDS) { const faithfulRows = statementBundle.statements[kind]; standardizedBundle.statements[kind] = toStandardizedRows(kind, periodId, faithfulRows, input.metrics); statementBundle.statements[kind] = markHasDimensions( faithfulRows, dimensionBundle.statements[kind] ); standardizedBundle.statements[kind] = markHasDimensions( standardizedBundle.statements[kind], dimensionBundle.statements[kind] ); } const statementCount = FINANCIAL_STATEMENT_KINDS.filter((kind) => statementBundle.statements[kind].length > 0).length; const standardizedCount = FINANCIAL_STATEMENT_KINDS.filter((kind) => standardizedBundle.statements[kind].length > 0).length; const parseStatus: FilingStatementHydrationResult['parse_status'] = statementCount === FINANCIAL_STATEMENT_KINDS.length ? 'ready' : (statementCount > 0 || standardizedCount > 0) ? 'partial' : 'failed'; return { filing_id: input.filingId, ticker: input.ticker.trim().toUpperCase(), filing_date: input.filingDate, filing_type: input.filingType, period_end: input.filingDate, statement_bundle: statementBundle, standardized_bundle: standardizedBundle, dimension_bundle: dimensionBundle, parse_status: parseStatus, parse_error: parseStatus === 'failed' ? (parseError ?? 'No financial statement tables found') : parseError, source, }; } export const __statementInternals = { parseFilingSummaryReports, parseStatementRowsFromReport, parseDimensionFacts, statementKindLabel };