import type { Filing, FinancialStatementKind } from "@/lib/types"; import type { DimensionStatementBundle, DimensionStatementSnapshotRow, FilingFaithfulStatementSnapshotRow, FilingStatementBundle, FilingStatementSnapshotPeriod, StandardizedStatementBundle, StandardizedStatementSnapshotRow, } from "@/lib/server/repos/filing-statements"; type FilingType = Filing["filing_type"]; type FilingMetrics = NonNullable; type TickerDirectoryRecord = { cik_str: number; ticker: string; title: string; }; type RecentFilingsPayload = { filings?: { recent?: { accessionNumber?: string[]; filingDate?: string[]; form?: string[]; primaryDocument?: string[]; }; }; cik?: string; name?: string; }; type CompanyFactsPayload = { facts?: { "us-gaap"?: Record }>; }; }; type CompanyFactPoint = { val?: number; end?: string; filed?: string; accn?: string; form?: string; fy?: number; fp?: string; frame?: string; }; type SecFiling = { ticker: string; cik: string; companyName: string; filingType: FilingType; filingDate: string; accessionNumber: string; filingUrl: string | null; submissionUrl: string | null; primaryDocument: string | null; }; type FilingDocumentInput = { filingUrl: string | null; cik: string; accessionNumber: string; primaryDocument: string | null; }; type FetchPrimaryFilingTextOptions = { fetchImpl?: typeof fetch; maxChars?: number; }; type FilingDocumentText = { source: "primary_document"; url: string; text: string; truncated: boolean; }; type FilingMetricsLookupInput = { accessionNumber: string; filingDate: string; filingType: FilingType; }; const SUPPORTED_FORMS: FilingType[] = ["10-K", "10-Q", "8-K"]; const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12; const FILING_TEXT_MAX_CHARS = 24_000; const METRIC_TAGS = { revenue: [ "Revenues", "SalesRevenueNet", "RevenueFromContractWithCustomerExcludingAssessedTax", "TotalRevenuesAndOtherIncome", ], netIncome: ["NetIncomeLoss", "ProfitLoss"], totalAssets: ["Assets"], cash: [ "CashAndCashEquivalentsAtCarryingValue", "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents", ], debt: [ "LongTermDebtAndCapitalLeaseObligations", "LongTermDebtNoncurrent", "LongTermDebt", "DebtAndFinanceLeaseLiabilities", ], } as const; let tickerCache = new Map(); let tickerCacheLoadedAt = 0; function envUserAgent() { return process.env.SEC_USER_AGENT || "Fiscal Clone "; } function todayIso() { return new Date().toISOString().slice(0, 10); } function decodeHtmlEntities(value: string) { const decodeCodePoint = (code: number) => { if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) { return " "; } try { return String.fromCodePoint(code); } catch { return " "; } }; return value .replace(/ | /gi, " ") .replace(/&/gi, "&") .replace(/</gi, "<") .replace(/>/gi, ">") .replace(/"/gi, '"') .replace(/'/gi, "'") .replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => { const code = Number.parseInt(rawCode, 16); return decodeCodePoint(code); }) .replace(/&#([0-9]+);/g, (_match, rawCode: string) => { const code = Number.parseInt(rawCode, 10); return decodeCodePoint(code); }); } export function normalizeSecDocumentText(raw: string) { return decodeHtmlEntities( raw .replace(/\r/g, "\n") .replace(//gi, " ") .replace(//gi, " ") .replace(//gi, " ") .replace(//g, " ") .replace( /<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi, "\n", ) .replace(/<[^>]+>/g, " "), ) .replace(/[ \t]+\n/g, "\n") .replace(/\n[ \t]+/g, "\n") .replace(/[ \t]{2,}/g, " ") .replace(/\n{3,}/g, "\n\n") .trim(); } export function trimSecDocumentTextForPrompt( text: string, maxChars = FILING_TEXT_MAX_CHARS, ) { const safeMax = Math.max(Math.trunc(maxChars), 1_000); if (text.length <= safeMax) { return { text, truncated: false }; } const slice = text.slice(0, safeMax); const newlineBoundary = slice.lastIndexOf("\n"); const wordBoundary = slice.lastIndexOf(" "); const boundary = Math.max(newlineBoundary, wordBoundary); const clipped = ( boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice ).trimEnd(); return { text: clipped, truncated: true }; } function compactAccessionNumber(value: string) { return value.replace(/-/g, ""); } function normalizeAccessionKey(value: string | undefined | null) { return (value ?? "").replace(/\D/g, ""); } function normalizeForm(value: string | undefined | null) { const normalized = (value ?? "").trim().toUpperCase(); if (!normalized) { return ""; } return normalized.endsWith("/A") ? normalized.slice(0, -2) : normalized; } function parseDate(value: string | undefined | null) { if (!value) { return Number.NaN; } return Date.parse(value); } function normalizeCikForPath(value: string) { const digits = value.replace(/\D/g, ""); if (!digits) { return null; } const numeric = Number(digits); if (!Number.isFinite(numeric)) { return null; } return String(numeric); } export function resolvePrimaryFilingUrl(input: FilingDocumentInput) { const directUrl = input.filingUrl?.trim(); if (directUrl) { return directUrl; } if (!input.primaryDocument) { return null; } const cikPath = normalizeCikForPath(input.cik); const accessionPath = compactAccessionNumber(input.accessionNumber); if (!cikPath || !accessionPath) { return null; } return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`; } export async function fetchPrimaryFilingText( input: FilingDocumentInput, options?: FetchPrimaryFilingTextOptions, ): Promise { const url = resolvePrimaryFilingUrl(input); if (!url) { return null; } const doFetch = options?.fetchImpl ?? fetch; const response = await doFetch(url, { headers: { "User-Agent": envUserAgent(), Accept: "text/html, text/plain;q=0.9, */*;q=0.8", }, cache: "no-store", }); if (!response.ok) { throw new Error(`SEC filing request failed (${response.status})`); } const raw = await response.text(); const normalized = normalizeSecDocumentText(raw); if (!normalized) { return null; } const clipped = trimSecDocumentTextForPrompt( normalized, options?.maxChars ?? FILING_TEXT_MAX_CHARS, ); if (!clipped.text) { return null; } return { source: "primary_document", url, text: clipped.text, truncated: clipped.truncated, }; } async function fetchJson(url: string): Promise { const response = await fetch(url, { headers: { "User-Agent": envUserAgent(), Accept: "application/json", }, cache: "no-store", }); if (!response.ok) { throw new Error(`SEC request failed (${response.status})`); } return (await response.json()) as T; } async function ensureTickerCache() { const isFresh = Date.now() - tickerCacheLoadedAt < TICKER_CACHE_TTL_MS; if (isFresh && tickerCache.size > 0) { return; } const payload = await fetchJson>( "https://www.sec.gov/files/company_tickers.json", ); const next = new Map(); for (const record of Object.values(payload)) { next.set(record.ticker.toUpperCase(), record); } tickerCache = next; tickerCacheLoadedAt = Date.now(); } async function resolveTicker(ticker: string) { await ensureTickerCache(); const normalized = ticker.trim().toUpperCase(); const record = tickerCache.get(normalized); if (!record) { throw new Error(`Ticker ${normalized} not found in SEC directory`); } return { ticker: normalized, cik: String(record.cik_str), companyName: record.title, }; } function pickLatestFact( payload: CompanyFactsPayload, tag: string, ): number | null { return pickFactForFiling(payload, tag, { accessionNumber: "", filingDate: "", filingType: "10-Q", }); } function collectFactSeries( payload: CompanyFactsPayload, tag: string, ): CompanyFactPoint[] { const unitCollections = payload.facts?.["us-gaap"]?.[tag]?.units; if (!unitCollections) { return []; } const usdSeries: CompanyFactPoint[] = []; const fallbackSeries: CompanyFactPoint[] = []; for (const [unit, series] of Object.entries(unitCollections)) { if (!Array.isArray(series) || series.length === 0) { continue; } if (unit === "USD" || /^USD(?!\/shares)/i.test(unit)) { usdSeries.push(...series); continue; } fallbackSeries.push(...series); } const points = usdSeries.length > 0 ? usdSeries : fallbackSeries; return points.filter( (point) => typeof point.val === "number" && Number.isFinite(point.val), ); } function pickMostRecentFact(points: CompanyFactPoint[]) { return ( [...points].sort((a, b) => { const aDate = parseDate(a.filed ?? a.end); const bDate = parseDate(b.filed ?? b.end); if (Number.isFinite(aDate) && Number.isFinite(bDate)) { return bDate - aDate; } if (Number.isFinite(bDate)) { return 1; } if (Number.isFinite(aDate)) { return -1; } return 0; })[0] ?? null ); } function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) { if (points.length === 0) { return null; } if (!Number.isFinite(targetDate)) { return pickMostRecentFact(points); } const dated = points .map((point) => ({ point, date: parseDate(point.filed ?? point.end) })) .filter((entry) => Number.isFinite(entry.date)); if (dated.length === 0) { return pickMostRecentFact(points); } const beforeTarget = dated.filter((entry) => entry.date <= targetDate); if (beforeTarget.length > 0) { return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null; } return ( dated.sort((a, b) => { const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate); if (distance !== 0) { return distance; } return b.date - a.date; })[0]?.point ?? null ); } function pickFactForFiling( payload: CompanyFactsPayload, tag: string, filing: FilingMetricsLookupInput, ): number | null { const points = collectFactSeries(payload, tag); if (points.length === 0) { return null; } const accessionKey = normalizeAccessionKey(filing.accessionNumber); if (accessionKey) { const byAccession = points.filter( (point) => normalizeAccessionKey(point.accn) === accessionKey, ); if (byAccession.length > 0) { const matched = pickMostRecentFact(byAccession); if (typeof matched?.val === "number" && Number.isFinite(matched.val)) { return matched.val; } } } const filingForm = normalizeForm(filing.filingType); const byForm = filingForm ? points.filter((point) => normalizeForm(point.form) === filingForm) : points; const targetDate = parseDate(filing.filingDate); const bestByForm = pickClosestByDate(byForm, targetDate); if (typeof bestByForm?.val === "number" && Number.isFinite(bestByForm.val)) { return bestByForm.val; } const bestAny = pickClosestByDate(points, targetDate); return typeof bestAny?.val === "number" && Number.isFinite(bestAny.val) ? bestAny.val : null; } function pickFactByTags( payload: CompanyFactsPayload, tags: readonly string[], filing: FilingMetricsLookupInput, ) { for (const tag of tags) { const value = pickFactForFiling(payload, tag, filing); if (value !== null) { return value; } } return null; } function emptyMetrics(): FilingMetrics { return { revenue: null, netIncome: null, totalAssets: null, cash: null, debt: null, }; } export async function fetchRecentFilings( ticker: string, limit = 20, ): Promise { const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50); const company = await resolveTicker(ticker); const cikPadded = company.cik.padStart(10, "0"); const payload = await fetchJson( `https://data.sec.gov/submissions/CIK${cikPadded}.json`, ); const recent = payload.filings?.recent; const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`; if (!recent) { return []; } const forms = recent.form ?? []; const accessionNumbers = recent.accessionNumber ?? []; const filingDates = recent.filingDate ?? []; const primaryDocuments = recent.primaryDocument ?? []; const filings: SecFiling[] = []; for (let i = 0; i < forms.length; i += 1) { const normalizedForm = normalizeForm(forms[i]) as FilingType; if (!SUPPORTED_FORMS.includes(normalizedForm)) { continue; } const accessionNumber = accessionNumbers[i]; if (!accessionNumber) { continue; } const compactAccession = accessionNumber.replace(/-/g, ""); const documentName = primaryDocuments[i]; const filingUrl = documentName ? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}` : null; filings.push({ ticker: company.ticker, cik: company.cik, companyName: payload.name ?? company.companyName, filingType: normalizedForm, filingDate: filingDates[i] ?? todayIso(), accessionNumber, filingUrl, submissionUrl, primaryDocument: documentName ?? null, }); if (filings.length >= safeLimit) { break; } } return filings; } async function fetchLatestFilingMetrics(cik: string) { const normalized = cik.padStart(10, "0"); const payload = await fetchJson( `https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`, ); return { revenue: pickLatestFact(payload, "Revenues"), netIncome: pickLatestFact(payload, "NetIncomeLoss"), totalAssets: pickLatestFact(payload, "Assets"), cash: pickLatestFact(payload, "CashAndCashEquivalentsAtCarryingValue"), debt: pickLatestFact(payload, "LongTermDebt"), } satisfies FilingMetrics; } export async function fetchFilingMetricsForFilings( cik: string, _ticker: string, filings: FilingMetricsLookupInput[], ) { const metricsByAccession = new Map(); if (filings.length === 0) { return metricsByAccession; } try { const normalized = cik.padStart(10, "0"); const payload = await fetchJson( `https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`, ); for (const filing of filings) { metricsByAccession.set(filing.accessionNumber, { revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing), netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing), totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing), cash: pickFactByTags(payload, METRIC_TAGS.cash, filing), debt: pickFactByTags(payload, METRIC_TAGS.debt, filing), }); } return metricsByAccession; } catch { for (const filing of filings) { metricsByAccession.set(filing.accessionNumber, emptyMetrics()); } return metricsByAccession; } } type FilingStatementHydrationInput = { filingId: number; ticker: string; cik: string; accessionNumber: string; filingDate: string; filingType: "10-K" | "10-Q"; filingUrl: string | null; primaryDocument: string | null; metrics: Filing["metrics"]; }; type FilingStatementHydrationResult = { filing_id: number; ticker: string; filing_date: string; filing_type: "10-K" | "10-Q"; period_end: string | null; statement_bundle: FilingStatementBundle | null; standardized_bundle: StandardizedStatementBundle | null; dimension_bundle: DimensionStatementBundle | null; parse_status: "ready" | "partial" | "failed"; parse_error: string | null; source: "sec_filing_summary" | "xbrl_instance" | "companyfacts_fallback"; }; type StatementReportDescriptor = { shortName: string; longName: string; htmlFileName: string | null; xmlFileName: string | null; }; type StatementParseRow = { key: string; label: string; concept: string | null; order: number; depth: number; isSubtotal: boolean; value: number | null; }; type DimensionContext = { endDate: string | null; dimensions: Array<{ axis: string; member: string }>; }; type CanonicalRowDefinition = { key: string; label: string; category: string; conceptPatterns: RegExp[]; labelPatterns: RegExp[]; }; const FINANCIAL_STATEMENT_KINDS: FinancialStatementKind[] = [ "income", "balance", "cash_flow", "equity", "comprehensive_income", ]; const STATEMENT_REPORT_PATTERNS: Record = { income: [ /\bstatements?\s+of\s+operations?\b/i, /\bstatements?\s+of\s+income\b/i, /\bincome\s+statement/i, ], balance: [ /\bbalance\s+sheets?\b/i, /\bstatement\s+of\s+financial\s+position\b/i, ], cash_flow: [/\bstatements?\s+of\s+cash\s+flows?\b/i, /\bcash\s+flows?\b/i], equity: [ /\bstatements?\s+of\s+(stockholders|shareholders)['']?\s+equity\b/i, /\bchanges\s+in\s+equity\b/i, ], comprehensive_income: [ /\bstatements?\s+of\s+comprehensive\s+income\b/i, /\bcomprehensive\s+income\b/i, ], disclosure: [], }; const STANDARDIZED_ROW_DEFINITIONS: Record< FinancialStatementKind, CanonicalRowDefinition[] > = { income: [ { key: "revenue", label: "Revenue", category: "core", conceptPatterns: [/revenue/i, /salesrevenuenet/i], labelPatterns: [/\brevenue\b/i, /\bsales\b/i], }, { key: "cost-of-revenue", label: "Cost of Revenue", category: "core", conceptPatterns: [/costofrevenue/i, /costofgoods/i], labelPatterns: [/\bcost of revenue\b/i, /\bcost of sales\b/i], }, { key: "gross-profit", label: "Gross Profit", category: "core", conceptPatterns: [/grossprofit/i], labelPatterns: [/\bgross profit\b/i], }, { key: "operating-income", label: "Operating Income", category: "core", conceptPatterns: [/operatingincome/i, /incomefromoperations/i], labelPatterns: [/\boperating income\b/i, /\bincome from operations\b/i], }, { key: "net-income", label: "Net Income", category: "core", conceptPatterns: [/netincomeloss/i, /profitloss/i], labelPatterns: [/\bnet income\b/i, /\bnet earnings\b/i], }, ], balance: [ { key: "total-assets", label: "Total Assets", category: "core", conceptPatterns: [/^assets$/i], labelPatterns: [/\btotal assets\b/i], }, { key: "total-liabilities", label: "Total Liabilities", category: "core", conceptPatterns: [/liabilities/i], labelPatterns: [/\btotal liabilities\b/i], }, { key: "stockholders-equity", label: "Stockholders Equity", category: "core", conceptPatterns: [ /stockholdersequity/i, /shareholdersequity/i, /equity/i, ], labelPatterns: [/\bequity\b/i], }, { key: "cash-and-equivalents", label: "Cash and Equivalents", category: "liquidity", conceptPatterns: [/cashandcashequivalents/i, /cashandequivalents/i], labelPatterns: [/\bcash\b/i, /\bcash equivalents\b/i], }, { key: "total-debt", label: "Total Debt", category: "leverage", conceptPatterns: [/longtermdebt/i, /debt/i, /borrowings/i], labelPatterns: [/\btotal debt\b/i, /\blong-term debt\b/i, /\bdebt\b/i], }, ], cash_flow: [ { key: "net-cash-operating", label: "Net Cash from Operating Activities", category: "core", conceptPatterns: [ /netcashprovidedbyusedinoperatingactivities/i, /netcashfromoperatingactivities/i, ], labelPatterns: [/\boperating activities\b/i], }, { key: "net-cash-investing", label: "Net Cash from Investing Activities", category: "core", conceptPatterns: [/netcashprovidedbyusedininvestingactivities/i], labelPatterns: [/\binvesting activities\b/i], }, { key: "net-cash-financing", label: "Net Cash from Financing Activities", category: "core", conceptPatterns: [/netcashprovidedbyusedinfinancingactivities/i], labelPatterns: [/\bfinancing activities\b/i], }, { key: "net-change-cash", label: "Net Change in Cash", category: "core", conceptPatterns: [ /cashandcashequivalentsperiodincrease/i, /increase.*cash/i, ], labelPatterns: [/\bnet change\b/i, /\bincrease.*cash\b/i], }, ], equity: [ { key: "equity-balance", label: "Total Equity", category: "core", conceptPatterns: [ /stockholdersequity/i, /shareholdersequity/i, /equity/i, ], labelPatterns: [/\btotal equity\b/i, /\bequity\b/i], }, { key: "retained-earnings", label: "Retained Earnings", category: "core", conceptPatterns: [/retainedearnings/i], labelPatterns: [/\bretained earnings\b/i], }, ], comprehensive_income: [ { key: "comprehensive-income", label: "Comprehensive Income", category: "core", conceptPatterns: [/comprehensiveincome/i], labelPatterns: [/\bcomprehensive income\b/i], }, { key: "other-comprehensive-income", label: "Other Comprehensive Income", category: "core", conceptPatterns: [/othercomprehensiveincome/i], labelPatterns: [/\bother comprehensive income\b/i], }, ], disclosure: [], }; function createStatementRecord( factory: () => T, ): Record { const record = {} as Record; for (const kind of FINANCIAL_STATEMENT_KINDS) { record[kind] = factory(); } record.disclosure = factory(); return record; } function statementKindLabel(kind: FinancialStatementKind) { switch (kind) { case "income": return "Income Statement"; case "balance": return "Balance Sheet"; case "cash_flow": return "Cash Flow Statement"; case "equity": return "Statement of Equity"; case "comprehensive_income": return "Comprehensive Income"; case "disclosure": return "Disclosures"; default: return kind; } } function resolveFilingDirectoryUrl(input: { filingUrl: string | null; cik: string; accessionNumber: string; }) { const direct = input.filingUrl?.trim(); if (direct) { const lastSlash = direct.lastIndexOf("/"); if (lastSlash > "https://".length) { return direct.slice(0, lastSlash + 1); } } const cikPath = normalizeCikForPath(input.cik); const accessionPath = compactAccessionNumber(input.accessionNumber); if (!cikPath || !accessionPath) { return null; } return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`; } function toAbsoluteArchiveUrl(baseUrl: string, relativePath: string | null) { const normalized = (relativePath ?? "").trim(); if (!normalized) { return null; } if (/^https?:\/\//i.test(normalized)) { return normalized; } return `${baseUrl}${normalized.replace(/^\/+/, "")}`; } async function fetchText(url: string, fetchImpl: typeof fetch) { const response = await fetchImpl(url, { headers: { "User-Agent": envUserAgent(), Accept: "text/xml, text/html, text/plain;q=0.9, */*;q=0.8", }, cache: "no-store", }); if (!response.ok) { throw new Error(`SEC request failed (${response.status})`); } return await response.text(); } function xmlTextValue(block: string, tagName: string) { const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const pattern = new RegExp(`<${escaped}>([\\s\\S]*?)<\\/${escaped}>`, "i"); const match = block.match(pattern); if (!match) { return ""; } return decodeHtmlEntities(match[1] ?? "").trim(); } function parseFilingSummaryReports(xml: string) { const reports: StatementReportDescriptor[] = []; const reportPattern = /([\s\S]*?)<\/Report>/gi; for (const match of xml.matchAll(reportPattern)) { const block = match[1] ?? ""; reports.push({ shortName: xmlTextValue(block, "ShortName"), longName: xmlTextValue(block, "LongName"), htmlFileName: xmlTextValue(block, "HtmlFileName") || null, xmlFileName: xmlTextValue(block, "XmlFileName") || null, }); } return reports; } function scoreReport( kind: FinancialStatementKind, report: StatementReportDescriptor, ) { const haystack = `${report.shortName} ${report.longName}`.trim(); if (!haystack) { return 0; } let score = 0; for (const pattern of STATEMENT_REPORT_PATTERNS[kind]) { if (pattern.test(haystack)) { score += 2; } } if (/\bparenthetical\b/i.test(haystack) || /\bdetail\b/i.test(haystack)) { score -= 1; } return score; } function chooseStatementReport( kind: FinancialStatementKind, reports: StatementReportDescriptor[], ) { let best: StatementReportDescriptor | null = null; let bestScore = 0; for (const report of reports) { const score = scoreReport(kind, report); if (score > bestScore) { best = report; bestScore = score; } } return bestScore > 0 ? best : null; } function sanitizeCellText(raw: string) { return decodeHtmlEntities( raw.replace(//gi, "\n").replace(/<[^>]+>/g, " "), ) .replace(/[ \t]+/g, " ") .replace(/\n+/g, " ") .trim(); } function extractConceptFromMarkup(markup: string) { const defref = markup.match(/defref[_:-]([a-z0-9_:.:-]+)/i); if (defref?.[1]) { return defref[1].replace(/_/g, ":"); } const nameAttr = markup.match(/\bname=[\"']([a-z0-9_:.:-]+)[\"']/i); if (nameAttr?.[1]) { return nameAttr[1]; } return null; } function parseIndentDepth(attrs: string) { const style = attrs.match(/\bstyle=[\"']([^\"']+)[\"']/i)?.[1] ?? ""; const padding = style.match(/padding-left:\s*([0-9.]+)px/i)?.[1]; if (padding) { const numeric = Number.parseFloat(padding); if (Number.isFinite(numeric) && numeric > 0) { return Math.max(0, Math.round(numeric / 12)); } } const margin = style.match(/margin-left:\s*([0-9.]+)px/i)?.[1]; if (margin) { const numeric = Number.parseFloat(margin); if (Number.isFinite(numeric) && numeric > 0) { return Math.max(0, Math.round(numeric / 12)); } } return 0; } function parseStatementNumber(raw: string): number | null { const trimmed = raw.trim(); if (!trimmed || /^n\/a$/i.test(trimmed) || /^--+$/.test(trimmed)) { return null; } if (/%$/.test(trimmed)) { return null; } const negative = trimmed.startsWith("(") && trimmed.endsWith(")"); const cleaned = trimmed .replace(/[$,\s]/g, "") .replace(/[()]/g, "") .replace(/\u2212/g, "-"); const value = Number.parseFloat(cleaned); if (!Number.isFinite(value)) { return null; } return negative ? -Math.abs(value) : value; } function slug(value: string) { return value .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-+|-+$/g, ""); } function parseStatementRowsFromReport(content: string): StatementParseRow[] { const tables = [...content.matchAll(/]*>([\s\S]*?)<\/table>/gi)]; if (tables.length === 0) { return []; } let bestRows: StatementParseRow[] = []; for (const tableMatch of tables) { const table = tableMatch[0] ?? ""; const rows: StatementParseRow[] = []; let order = 0; for (const rowMatch of table.matchAll(/]*>([\s\S]*?)<\/tr>/gi)) { const rowMarkup = rowMatch[0] ?? ""; const cells = [ ...rowMarkup.matchAll(/]*)>([\s\S]*?)<\/t[dh]>/gi), ]; if (cells.length < 2) { continue; } const labelCell = cells[0]; const labelAttrs = labelCell?.[1] ?? ""; const labelRaw = labelCell?.[2] ?? ""; const label = sanitizeCellText(labelRaw); if (!label || /^(years ended|assets|liabilities|equity)$/i.test(label)) { continue; } let value: number | null = null; for (let i = 1; i < cells.length; i += 1) { const text = sanitizeCellText(cells[i]?.[2] ?? ""); const parsed = parseStatementNumber(text); if (parsed !== null) { value = parsed; break; } } if (value === null) { continue; } order += 1; const concept = extractConceptFromMarkup(rowMarkup); rows.push({ key: concept ? slug(concept) : `${slug(label)}-${order}`, label, concept, order, depth: parseIndentDepth(labelAttrs), isSubtotal: /^total\b/i.test(label) || /\bsubtotal\b/i.test(label), value, }); } if (rows.length > bestRows.length) { bestRows = rows; } } return bestRows; } function toSnapshotRows( periodId: string, rows: StatementParseRow[], ): FilingFaithfulStatementSnapshotRow[] { return rows.map((row) => ({ key: row.key, label: row.label, concept: row.concept, order: row.order, depth: row.depth, isSubtotal: row.isSubtotal, values: { [periodId]: row.value, }, })); } function matchStandardizedDefinition( row: FilingFaithfulStatementSnapshotRow, definition: CanonicalRowDefinition, ) { const concept = row.concept ?? ""; return ( definition.conceptPatterns.some((pattern) => pattern.test(concept)) || definition.labelPatterns.some((pattern) => pattern.test(row.label)) ); } function fallbackMetricValue( kind: FinancialStatementKind, rowKey: string, metrics: Filing["metrics"], ) { if (!metrics) { return null; } if (kind === "income" && rowKey === "revenue") { return metrics.revenue ?? null; } if (kind === "income" && rowKey === "net-income") { return metrics.netIncome ?? null; } if (kind === "balance" && rowKey === "total-assets") { return metrics.totalAssets ?? null; } if (kind === "balance" && rowKey === "cash-and-equivalents") { return metrics.cash ?? null; } if (kind === "balance" && rowKey === "total-debt") { return metrics.debt ?? null; } return null; } function toStandardizedRows( kind: FinancialStatementKind, periodId: string, rows: FilingFaithfulStatementSnapshotRow[], metrics: Filing["metrics"], ): StandardizedStatementSnapshotRow[] { const definitions = STANDARDIZED_ROW_DEFINITIONS[kind]; const normalizedRows = [...rows]; const usedKeys = new Set(); const standardizedRows: StandardizedStatementSnapshotRow[] = []; for (const definition of definitions) { const matched = normalizedRows.find( (row) => !usedKeys.has(row.key) && matchStandardizedDefinition(row, definition), ); const matchedValue = matched?.values[periodId] ?? null; const fallbackValue = matchedValue === null ? fallbackMetricValue(kind, definition.key, metrics) : null; if (matched) { usedKeys.add(matched.key); } standardizedRows.push({ key: definition.key, label: definition.label, concept: matched?.concept ?? definition.key, category: definition.category, sourceConcepts: matched?.concept ? [matched.concept] : [], values: { [periodId]: matchedValue ?? fallbackValue, }, }); } for (const row of normalizedRows) { if (usedKeys.has(row.key)) { continue; } standardizedRows.push({ key: `other-${row.key}`, label: row.label, concept: row.concept ?? row.key, category: "other", sourceConcepts: row.concept ? [row.concept] : [], values: { [periodId]: row.values[periodId] ?? null, }, }); } return standardizedRows; } function parseContextsWithDimensions(raw: string) { const contexts = new Map(); const contextPattern = /<(?:[a-z0-9]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9]+:)?context>/gi; for (const match of raw.matchAll(contextPattern)) { const contextId = match[1] ?? ""; const block = match[2] ?? ""; if (!contextId) { continue; } const endDate = block .match( /<(?:[a-z0-9]+:)?endDate>([^<]+)<\/(?:[a-z0-9]+:)?endDate>/i, )?.[1] ?.trim() ?? null; const dimensions: Array<{ axis: string; member: string }> = []; const dimPattern = /<(?:[a-z0-9]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9]+:)?explicitMember>/gi; for (const dimMatch of block.matchAll(dimPattern)) { const axis = (dimMatch[1] ?? "").trim(); const member = (dimMatch[2] ?? "").trim(); if (!axis || !member) { continue; } dimensions.push({ axis, member }); } if (dimensions.length === 0) { continue; } contexts.set(contextId, { endDate, dimensions }); } return contexts; } function statementKindFromConcept( concept: string, ): FinancialStatementKind | null { const normalized = concept.toLowerCase(); if ( /cash|operatingactivities|investingactivities|financingactivities/.test( normalized, ) ) { return "cash_flow"; } if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) { return "equity"; } if (/comprehensiveincome/.test(normalized)) { return "comprehensive_income"; } if (/asset|liabilit|debt/.test(normalized)) { return "balance"; } if (/revenue|income|profit|expense|costof/.test(normalized)) { return "income"; } return null; } function parseDimensionFacts(raw: string, fallbackPeriodId: string) { const contexts = parseContextsWithDimensions(raw); if (contexts.size === 0) { return createStatementRecord(() => []); } const rows = createStatementRecord(() => []); const ixPattern = /]*)>([\s\S]*?)<\/ix:nonfraction>/gi; let guard = 0; for (const match of raw.matchAll(ixPattern)) { guard += 1; if (guard > 8_000) { break; } const attrs = match[1] ?? ""; const body = sanitizeCellText(match[2] ?? ""); const contextRef = attrs.match(/\bcontextref=["']([^"']+)["']/i)?.[1] ?? ""; const concept = attrs.match(/\bname=["']([^"']+)["']/i)?.[1] ?? ""; const unit = attrs.match(/\bunitref=["']([^"']+)["']/i)?.[1] ?? null; if (!contextRef || !concept) { continue; } const context = contexts.get(contextRef); if (!context || context.dimensions.length === 0) { continue; } const kind = statementKindFromConcept(concept); if (!kind) { continue; } const value = parseStatementNumber(body); if (value === null) { continue; } const periodId = context.endDate ?? fallbackPeriodId; const rowKey = slug(concept); for (const dimension of context.dimensions) { rows[kind].push({ rowKey, concept, periodId, axis: dimension.axis, member: dimension.member, value, unit, }); } } return rows; } function markHasDimensions< T extends { key: string; concept: string | null; hasDimensions?: boolean }, >(rows: T[], dimensions: DimensionStatementSnapshotRow[]) { const dimensionConcepts = new Set( dimensions.map((item) => item.concept?.toLowerCase() ?? "").filter(Boolean), ); const dimensionRowKeys = new Set(dimensions.map((item) => item.rowKey)); return rows.map((row) => { const concept = row.concept?.toLowerCase() ?? ""; const hasDimensions = dimensionRowKeys.has(row.key) || (concept ? dimensionConcepts.has(concept) : false); return { ...row, hasDimensions, }; }); } function emptyStatementBundle( period: FilingStatementSnapshotPeriod, ): FilingStatementBundle { return { periods: [period], statements: createStatementRecord(() => []), }; } function emptyStandardizedBundle( period: FilingStatementSnapshotPeriod, ): StandardizedStatementBundle { return { periods: [period], statements: createStatementRecord(() => []), }; } function emptyDimensionBundle(): DimensionStatementBundle { return { statements: createStatementRecord(() => []), }; } export async function hydrateFilingStatementSnapshot( input: FilingStatementHydrationInput, options?: { fetchImpl?: typeof fetch; }, ): Promise { const periodId = `${input.filingDate}-${compactAccessionNumber(input.accessionNumber)}`; const period: FilingStatementSnapshotPeriod = { id: periodId, filingId: input.filingId, accessionNumber: input.accessionNumber, filingDate: input.filingDate, periodStart: null, periodEnd: input.filingDate, filingType: input.filingType, periodLabel: input.filingType === "10-Q" ? "Quarter End" : "Fiscal Year End", }; const fetchImpl = options?.fetchImpl ?? fetch; const statementBundle = emptyStatementBundle(period); const standardizedBundle = emptyStandardizedBundle(period); const dimensionBundle = emptyDimensionBundle(); let source: FilingStatementHydrationResult["source"] = "companyfacts_fallback"; let parseError: string | null = null; try { const filingDirectory = resolveFilingDirectoryUrl({ filingUrl: input.filingUrl, cik: input.cik, accessionNumber: input.accessionNumber, }); if (filingDirectory) { const summaryXml = await fetchText( `${filingDirectory}FilingSummary.xml`, fetchImpl, ); const reports = parseFilingSummaryReports(summaryXml); for (const kind of FINANCIAL_STATEMENT_KINDS) { const report = chooseStatementReport(kind, reports); if (!report) { continue; } const reportUrl = toAbsoluteArchiveUrl( filingDirectory, report.htmlFileName ?? report.xmlFileName, ); if (!reportUrl) { continue; } try { const reportText = await fetchText(reportUrl, fetchImpl); const parsedRows = parseStatementRowsFromReport(reportText); if (parsedRows.length === 0) { continue; } source = "sec_filing_summary"; statementBundle.statements[kind] = toSnapshotRows( periodId, parsedRows, ); } catch { // Continue to other statements when one report fails. } } } } catch (error) { parseError = error instanceof Error ? error.message : "Failed to parse filing summary"; } try { const primaryUrl = resolvePrimaryFilingUrl({ filingUrl: input.filingUrl, cik: input.cik, accessionNumber: input.accessionNumber, primaryDocument: input.primaryDocument, }); if (primaryUrl) { const rawDocument = await fetchText(primaryUrl, fetchImpl); const dimensions = parseDimensionFacts(rawDocument, periodId); for (const kind of FINANCIAL_STATEMENT_KINDS) { dimensionBundle.statements[kind] = dimensions[kind]; } const hasAnyDimensions = FINANCIAL_STATEMENT_KINDS.some( (kind) => dimensionBundle.statements[kind].length > 0, ); if (hasAnyDimensions && source === "companyfacts_fallback") { source = "xbrl_instance"; } } } catch (error) { if (!parseError) { parseError = error instanceof Error ? error.message : "Failed to parse inline XBRL dimensions"; } } for (const kind of FINANCIAL_STATEMENT_KINDS) { const faithfulRows = statementBundle.statements[kind]; standardizedBundle.statements[kind] = toStandardizedRows( kind, periodId, faithfulRows, input.metrics, ); statementBundle.statements[kind] = markHasDimensions( faithfulRows, dimensionBundle.statements[kind], ); standardizedBundle.statements[kind] = markHasDimensions( standardizedBundle.statements[kind], dimensionBundle.statements[kind], ); } const statementCount = FINANCIAL_STATEMENT_KINDS.filter( (kind) => statementBundle.statements[kind].length > 0, ).length; const standardizedCount = FINANCIAL_STATEMENT_KINDS.filter( (kind) => standardizedBundle.statements[kind].length > 0, ).length; const parseStatus: FilingStatementHydrationResult["parse_status"] = statementCount === FINANCIAL_STATEMENT_KINDS.length ? "ready" : statementCount > 0 || standardizedCount > 0 ? "partial" : "failed"; return { filing_id: input.filingId, ticker: input.ticker.trim().toUpperCase(), filing_date: input.filingDate, filing_type: input.filingType, period_end: input.filingDate, statement_bundle: statementBundle, standardized_bundle: standardizedBundle, dimension_bundle: dimensionBundle, parse_status: parseStatus, parse_error: parseStatus === "failed" ? (parseError ?? "No financial statement tables found") : parseError, source, }; } export const __statementInternals = { parseFilingSummaryReports, parseStatementRowsFromReport, parseDimensionFacts, statementKindLabel, };