Files
Neon-Desk/lib/server/sec.ts
francy51 14a7773504 Add consolidated disclosure statement type
Create unified disclosure statement to organize footnote disclosures
separate from primary financial statements. Disclosures are now grouped
by type (tax, debt, securities, derivatives, leases, intangibles, ma,
revenue, cash_flow) in a dedicated statement type for cleaner UI
presentation.
2026-03-16 18:54:23 -04:00

1623 lines
41 KiB
TypeScript

import type { Filing, FinancialStatementKind } from "@/lib/types";
import type {
DimensionStatementBundle,
DimensionStatementSnapshotRow,
FilingFaithfulStatementSnapshotRow,
FilingStatementBundle,
FilingStatementSnapshotPeriod,
StandardizedStatementBundle,
StandardizedStatementSnapshotRow,
} from "@/lib/server/repos/filing-statements";
type FilingType = Filing["filing_type"];
type FilingMetrics = NonNullable<Filing["metrics"]>;
type TickerDirectoryRecord = {
cik_str: number;
ticker: string;
title: string;
};
type RecentFilingsPayload = {
filings?: {
recent?: {
accessionNumber?: string[];
filingDate?: string[];
form?: string[];
primaryDocument?: string[];
};
};
cik?: string;
name?: string;
};
type CompanyFactsPayload = {
facts?: {
"us-gaap"?: Record<string, { units?: Record<string, CompanyFactPoint[]> }>;
};
};
type CompanyFactPoint = {
val?: number;
end?: string;
filed?: string;
accn?: string;
form?: string;
fy?: number;
fp?: string;
frame?: string;
};
type SecFiling = {
ticker: string;
cik: string;
companyName: string;
filingType: FilingType;
filingDate: string;
accessionNumber: string;
filingUrl: string | null;
submissionUrl: string | null;
primaryDocument: string | null;
};
type FilingDocumentInput = {
filingUrl: string | null;
cik: string;
accessionNumber: string;
primaryDocument: string | null;
};
type FetchPrimaryFilingTextOptions = {
fetchImpl?: typeof fetch;
maxChars?: number;
};
export type FilingDocumentText = {
source: "primary_document";
url: string;
text: string;
truncated: boolean;
};
type FilingMetricsLookupInput = {
accessionNumber: string;
filingDate: string;
filingType: FilingType;
};
const SUPPORTED_FORMS: FilingType[] = ["10-K", "10-Q", "8-K"];
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12;
const FILING_TEXT_MAX_CHARS = 24_000;
const METRIC_TAGS = {
revenue: [
"Revenues",
"SalesRevenueNet",
"RevenueFromContractWithCustomerExcludingAssessedTax",
"TotalRevenuesAndOtherIncome",
],
netIncome: ["NetIncomeLoss", "ProfitLoss"],
totalAssets: ["Assets"],
cash: [
"CashAndCashEquivalentsAtCarryingValue",
"CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents",
],
debt: [
"LongTermDebtAndCapitalLeaseObligations",
"LongTermDebtNoncurrent",
"LongTermDebt",
"DebtAndFinanceLeaseLiabilities",
],
} as const;
let tickerCache = new Map<string, TickerDirectoryRecord>();
let tickerCacheLoadedAt = 0;
function envUserAgent() {
return process.env.SEC_USER_AGENT || "Fiscal Clone <support@fiscal.local>";
}
function todayIso() {
return new Date().toISOString().slice(0, 10);
}
function decodeHtmlEntities(value: string) {
const decodeCodePoint = (code: number) => {
if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) {
return " ";
}
try {
return String.fromCodePoint(code);
} catch {
return " ";
}
};
return value
.replace(/&nbsp;|&#160;/gi, " ")
.replace(/&amp;/gi, "&")
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">")
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, "'")
.replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => {
const code = Number.parseInt(rawCode, 16);
return decodeCodePoint(code);
})
.replace(/&#([0-9]+);/g, (_match, rawCode: string) => {
const code = Number.parseInt(rawCode, 10);
return decodeCodePoint(code);
});
}
export function normalizeSecDocumentText(raw: string) {
return decodeHtmlEntities(
raw
.replace(/\r/g, "\n")
.replace(/<script[\s\S]*?<\/script>/gi, " ")
.replace(/<style[\s\S]*?<\/style>/gi, " ")
.replace(/<noscript[\s\S]*?<\/noscript>/gi, " ")
.replace(/<!--[\s\S]*?-->/g, " ")
.replace(
/<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi,
"\n",
)
.replace(/<[^>]+>/g, " "),
)
.replace(/[ \t]+\n/g, "\n")
.replace(/\n[ \t]+/g, "\n")
.replace(/[ \t]{2,}/g, " ")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
export function trimSecDocumentTextForPrompt(
text: string,
maxChars = FILING_TEXT_MAX_CHARS,
) {
const safeMax = Math.max(Math.trunc(maxChars), 1_000);
if (text.length <= safeMax) {
return { text, truncated: false };
}
const slice = text.slice(0, safeMax);
const newlineBoundary = slice.lastIndexOf("\n");
const wordBoundary = slice.lastIndexOf(" ");
const boundary = Math.max(newlineBoundary, wordBoundary);
const clipped = (
boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice
).trimEnd();
return { text: clipped, truncated: true };
}
function compactAccessionNumber(value: string) {
return value.replace(/-/g, "");
}
function normalizeAccessionKey(value: string | undefined | null) {
return (value ?? "").replace(/\D/g, "");
}
function normalizeForm(value: string | undefined | null) {
const normalized = (value ?? "").trim().toUpperCase();
if (!normalized) {
return "";
}
return normalized.endsWith("/A") ? normalized.slice(0, -2) : normalized;
}
function parseDate(value: string | undefined | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function normalizeCikForPath(value: string) {
const digits = value.replace(/\D/g, "");
if (!digits) {
return null;
}
const numeric = Number(digits);
if (!Number.isFinite(numeric)) {
return null;
}
return String(numeric);
}
export function resolvePrimaryFilingUrl(input: FilingDocumentInput) {
const directUrl = input.filingUrl?.trim();
if (directUrl) {
return directUrl;
}
if (!input.primaryDocument) {
return null;
}
const cikPath = normalizeCikForPath(input.cik);
const accessionPath = compactAccessionNumber(input.accessionNumber);
if (!cikPath || !accessionPath) {
return null;
}
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`;
}
export async function fetchPrimaryFilingText(
input: FilingDocumentInput,
options?: FetchPrimaryFilingTextOptions,
): Promise<FilingDocumentText | null> {
const url = resolvePrimaryFilingUrl(input);
if (!url) {
return null;
}
const doFetch = options?.fetchImpl ?? fetch;
const response = await doFetch(url, {
headers: {
"User-Agent": envUserAgent(),
Accept: "text/html, text/plain;q=0.9, */*;q=0.8",
},
cache: "no-store",
});
if (!response.ok) {
throw new Error(`SEC filing request failed (${response.status})`);
}
const raw = await response.text();
const normalized = normalizeSecDocumentText(raw);
if (!normalized) {
return null;
}
const clipped = trimSecDocumentTextForPrompt(
normalized,
options?.maxChars ?? FILING_TEXT_MAX_CHARS,
);
if (!clipped.text) {
return null;
}
return {
source: "primary_document",
url,
text: clipped.text,
truncated: clipped.truncated,
};
}
async function fetchJson<T>(url: string): Promise<T> {
const response = await fetch(url, {
headers: {
"User-Agent": envUserAgent(),
Accept: "application/json",
},
cache: "no-store",
});
if (!response.ok) {
throw new Error(`SEC request failed (${response.status})`);
}
return (await response.json()) as T;
}
async function ensureTickerCache() {
const isFresh = Date.now() - tickerCacheLoadedAt < TICKER_CACHE_TTL_MS;
if (isFresh && tickerCache.size > 0) {
return;
}
const payload = await fetchJson<Record<string, TickerDirectoryRecord>>(
"https://www.sec.gov/files/company_tickers.json",
);
const next = new Map<string, TickerDirectoryRecord>();
for (const record of Object.values(payload)) {
next.set(record.ticker.toUpperCase(), record);
}
tickerCache = next;
tickerCacheLoadedAt = Date.now();
}
async function resolveTicker(ticker: string) {
await ensureTickerCache();
const normalized = ticker.trim().toUpperCase();
const record = tickerCache.get(normalized);
if (!record) {
throw new Error(`Ticker ${normalized} not found in SEC directory`);
}
return {
ticker: normalized,
cik: String(record.cik_str),
companyName: record.title,
};
}
function pickLatestFact(
payload: CompanyFactsPayload,
tag: string,
): number | null {
return pickFactForFiling(payload, tag, {
accessionNumber: "",
filingDate: "",
filingType: "10-Q",
});
}
function collectFactSeries(
payload: CompanyFactsPayload,
tag: string,
): CompanyFactPoint[] {
const unitCollections = payload.facts?.["us-gaap"]?.[tag]?.units;
if (!unitCollections) {
return [];
}
const usdSeries: CompanyFactPoint[] = [];
const fallbackSeries: CompanyFactPoint[] = [];
for (const [unit, series] of Object.entries(unitCollections)) {
if (!Array.isArray(series) || series.length === 0) {
continue;
}
if (unit === "USD" || /^USD(?!\/shares)/i.test(unit)) {
usdSeries.push(...series);
continue;
}
fallbackSeries.push(...series);
}
const points = usdSeries.length > 0 ? usdSeries : fallbackSeries;
return points.filter(
(point) => typeof point.val === "number" && Number.isFinite(point.val),
);
}
function pickMostRecentFact(points: CompanyFactPoint[]) {
return (
[...points].sort((a, b) => {
const aDate = parseDate(a.filed ?? a.end);
const bDate = parseDate(b.filed ?? b.end);
if (Number.isFinite(aDate) && Number.isFinite(bDate)) {
return bDate - aDate;
}
if (Number.isFinite(bDate)) {
return 1;
}
if (Number.isFinite(aDate)) {
return -1;
}
return 0;
})[0] ?? null
);
}
function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) {
if (points.length === 0) {
return null;
}
if (!Number.isFinite(targetDate)) {
return pickMostRecentFact(points);
}
const dated = points
.map((point) => ({ point, date: parseDate(point.filed ?? point.end) }))
.filter((entry) => Number.isFinite(entry.date));
if (dated.length === 0) {
return pickMostRecentFact(points);
}
const beforeTarget = dated.filter((entry) => entry.date <= targetDate);
if (beforeTarget.length > 0) {
return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null;
}
return (
dated.sort((a, b) => {
const distance =
Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate);
if (distance !== 0) {
return distance;
}
return b.date - a.date;
})[0]?.point ?? null
);
}
function pickFactForFiling(
payload: CompanyFactsPayload,
tag: string,
filing: FilingMetricsLookupInput,
): number | null {
const points = collectFactSeries(payload, tag);
if (points.length === 0) {
return null;
}
const accessionKey = normalizeAccessionKey(filing.accessionNumber);
if (accessionKey) {
const byAccession = points.filter(
(point) => normalizeAccessionKey(point.accn) === accessionKey,
);
if (byAccession.length > 0) {
const matched = pickMostRecentFact(byAccession);
if (typeof matched?.val === "number" && Number.isFinite(matched.val)) {
return matched.val;
}
}
}
const filingForm = normalizeForm(filing.filingType);
const byForm = filingForm
? points.filter((point) => normalizeForm(point.form) === filingForm)
: points;
const targetDate = parseDate(filing.filingDate);
const bestByForm = pickClosestByDate(byForm, targetDate);
if (typeof bestByForm?.val === "number" && Number.isFinite(bestByForm.val)) {
return bestByForm.val;
}
const bestAny = pickClosestByDate(points, targetDate);
return typeof bestAny?.val === "number" && Number.isFinite(bestAny.val)
? bestAny.val
: null;
}
function pickFactByTags(
payload: CompanyFactsPayload,
tags: readonly string[],
filing: FilingMetricsLookupInput,
) {
for (const tag of tags) {
const value = pickFactForFiling(payload, tag, filing);
if (value !== null) {
return value;
}
}
return null;
}
function emptyMetrics(): FilingMetrics {
return {
revenue: null,
netIncome: null,
totalAssets: null,
cash: null,
debt: null,
};
}
export async function fetchRecentFilings(
ticker: string,
limit = 20,
): Promise<SecFiling[]> {
const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50);
const company = await resolveTicker(ticker);
const cikPadded = company.cik.padStart(10, "0");
const payload = await fetchJson<RecentFilingsPayload>(
`https://data.sec.gov/submissions/CIK${cikPadded}.json`,
);
const recent = payload.filings?.recent;
const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`;
if (!recent) {
return [];
}
const forms = recent.form ?? [];
const accessionNumbers = recent.accessionNumber ?? [];
const filingDates = recent.filingDate ?? [];
const primaryDocuments = recent.primaryDocument ?? [];
const filings: SecFiling[] = [];
for (let i = 0; i < forms.length; i += 1) {
const normalizedForm = normalizeForm(forms[i]) as FilingType;
if (!SUPPORTED_FORMS.includes(normalizedForm)) {
continue;
}
const accessionNumber = accessionNumbers[i];
if (!accessionNumber) {
continue;
}
const compactAccession = accessionNumber.replace(/-/g, "");
const documentName = primaryDocuments[i];
const filingUrl = documentName
? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}`
: null;
filings.push({
ticker: company.ticker,
cik: company.cik,
companyName: payload.name ?? company.companyName,
filingType: normalizedForm,
filingDate: filingDates[i] ?? todayIso(),
accessionNumber,
filingUrl,
submissionUrl,
primaryDocument: documentName ?? null,
});
if (filings.length >= safeLimit) {
break;
}
}
return filings;
}
export async function fetchLatestFilingMetrics(cik: string) {
const normalized = cik.padStart(10, "0");
const payload = await fetchJson<CompanyFactsPayload>(
`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`,
);
return {
revenue: pickLatestFact(payload, "Revenues"),
netIncome: pickLatestFact(payload, "NetIncomeLoss"),
totalAssets: pickLatestFact(payload, "Assets"),
cash: pickLatestFact(payload, "CashAndCashEquivalentsAtCarryingValue"),
debt: pickLatestFact(payload, "LongTermDebt"),
} satisfies FilingMetrics;
}
export async function fetchFilingMetricsForFilings(
cik: string,
_ticker: string,
filings: FilingMetricsLookupInput[],
) {
const metricsByAccession = new Map<string, FilingMetrics>();
if (filings.length === 0) {
return metricsByAccession;
}
try {
const normalized = cik.padStart(10, "0");
const payload = await fetchJson<CompanyFactsPayload>(
`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`,
);
for (const filing of filings) {
metricsByAccession.set(filing.accessionNumber, {
revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing),
netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing),
totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing),
cash: pickFactByTags(payload, METRIC_TAGS.cash, filing),
debt: pickFactByTags(payload, METRIC_TAGS.debt, filing),
});
}
return metricsByAccession;
} catch {
for (const filing of filings) {
metricsByAccession.set(filing.accessionNumber, emptyMetrics());
}
return metricsByAccession;
}
}
type FilingStatementHydrationInput = {
filingId: number;
ticker: string;
cik: string;
accessionNumber: string;
filingDate: string;
filingType: "10-K" | "10-Q";
filingUrl: string | null;
primaryDocument: string | null;
metrics: Filing["metrics"];
};
type FilingStatementHydrationResult = {
filing_id: number;
ticker: string;
filing_date: string;
filing_type: "10-K" | "10-Q";
period_end: string | null;
statement_bundle: FilingStatementBundle | null;
standardized_bundle: StandardizedStatementBundle | null;
dimension_bundle: DimensionStatementBundle | null;
parse_status: "ready" | "partial" | "failed";
parse_error: string | null;
source: "sec_filing_summary" | "xbrl_instance" | "companyfacts_fallback";
};
type StatementReportDescriptor = {
shortName: string;
longName: string;
htmlFileName: string | null;
xmlFileName: string | null;
};
type StatementParseRow = {
key: string;
label: string;
concept: string | null;
order: number;
depth: number;
isSubtotal: boolean;
value: number | null;
};
type DimensionContext = {
endDate: string | null;
dimensions: Array<{ axis: string; member: string }>;
};
type CanonicalRowDefinition = {
key: string;
label: string;
category: string;
conceptPatterns: RegExp[];
labelPatterns: RegExp[];
};
const FINANCIAL_STATEMENT_KINDS: FinancialStatementKind[] = [
"income",
"balance",
"cash_flow",
"equity",
"comprehensive_income",
];
const STATEMENT_REPORT_PATTERNS: Record<FinancialStatementKind, RegExp[]> = {
income: [
/\bstatements?\s+of\s+operations?\b/i,
/\bstatements?\s+of\s+income\b/i,
/\bincome\s+statement/i,
],
balance: [
/\bbalance\s+sheets?\b/i,
/\bstatement\s+of\s+financial\s+position\b/i,
],
cash_flow: [/\bstatements?\s+of\s+cash\s+flows?\b/i, /\bcash\s+flows?\b/i],
equity: [
/\bstatements?\s+of\s+(stockholders|shareholders)['']?\s+equity\b/i,
/\bchanges\s+in\s+equity\b/i,
],
comprehensive_income: [
/\bstatements?\s+of\s+comprehensive\s+income\b/i,
/\bcomprehensive\s+income\b/i,
],
disclosure: [],
};
const STANDARDIZED_ROW_DEFINITIONS: Record<
FinancialStatementKind,
CanonicalRowDefinition[]
> = {
income: [
{
key: "revenue",
label: "Revenue",
category: "core",
conceptPatterns: [/revenue/i, /salesrevenuenet/i],
labelPatterns: [/\brevenue\b/i, /\bsales\b/i],
},
{
key: "cost-of-revenue",
label: "Cost of Revenue",
category: "core",
conceptPatterns: [/costofrevenue/i, /costofgoods/i],
labelPatterns: [/\bcost of revenue\b/i, /\bcost of sales\b/i],
},
{
key: "gross-profit",
label: "Gross Profit",
category: "core",
conceptPatterns: [/grossprofit/i],
labelPatterns: [/\bgross profit\b/i],
},
{
key: "operating-income",
label: "Operating Income",
category: "core",
conceptPatterns: [/operatingincome/i, /incomefromoperations/i],
labelPatterns: [/\boperating income\b/i, /\bincome from operations\b/i],
},
{
key: "net-income",
label: "Net Income",
category: "core",
conceptPatterns: [/netincomeloss/i, /profitloss/i],
labelPatterns: [/\bnet income\b/i, /\bnet earnings\b/i],
},
],
balance: [
{
key: "total-assets",
label: "Total Assets",
category: "core",
conceptPatterns: [/^assets$/i],
labelPatterns: [/\btotal assets\b/i],
},
{
key: "total-liabilities",
label: "Total Liabilities",
category: "core",
conceptPatterns: [/liabilities/i],
labelPatterns: [/\btotal liabilities\b/i],
},
{
key: "stockholders-equity",
label: "Stockholders Equity",
category: "core",
conceptPatterns: [
/stockholdersequity/i,
/shareholdersequity/i,
/equity/i,
],
labelPatterns: [/\bequity\b/i],
},
{
key: "cash-and-equivalents",
label: "Cash and Equivalents",
category: "liquidity",
conceptPatterns: [/cashandcashequivalents/i, /cashandequivalents/i],
labelPatterns: [/\bcash\b/i, /\bcash equivalents\b/i],
},
{
key: "total-debt",
label: "Total Debt",
category: "leverage",
conceptPatterns: [/longtermdebt/i, /debt/i, /borrowings/i],
labelPatterns: [/\btotal debt\b/i, /\blong-term debt\b/i, /\bdebt\b/i],
},
],
cash_flow: [
{
key: "net-cash-operating",
label: "Net Cash from Operating Activities",
category: "core",
conceptPatterns: [
/netcashprovidedbyusedinoperatingactivities/i,
/netcashfromoperatingactivities/i,
],
labelPatterns: [/\boperating activities\b/i],
},
{
key: "net-cash-investing",
label: "Net Cash from Investing Activities",
category: "core",
conceptPatterns: [/netcashprovidedbyusedininvestingactivities/i],
labelPatterns: [/\binvesting activities\b/i],
},
{
key: "net-cash-financing",
label: "Net Cash from Financing Activities",
category: "core",
conceptPatterns: [/netcashprovidedbyusedinfinancingactivities/i],
labelPatterns: [/\bfinancing activities\b/i],
},
{
key: "net-change-cash",
label: "Net Change in Cash",
category: "core",
conceptPatterns: [
/cashandcashequivalentsperiodincrease/i,
/increase.*cash/i,
],
labelPatterns: [/\bnet change\b/i, /\bincrease.*cash\b/i],
},
],
equity: [
{
key: "equity-balance",
label: "Total Equity",
category: "core",
conceptPatterns: [
/stockholdersequity/i,
/shareholdersequity/i,
/equity/i,
],
labelPatterns: [/\btotal equity\b/i, /\bequity\b/i],
},
{
key: "retained-earnings",
label: "Retained Earnings",
category: "core",
conceptPatterns: [/retainedearnings/i],
labelPatterns: [/\bretained earnings\b/i],
},
],
comprehensive_income: [
{
key: "comprehensive-income",
label: "Comprehensive Income",
category: "core",
conceptPatterns: [/comprehensiveincome/i],
labelPatterns: [/\bcomprehensive income\b/i],
},
{
key: "other-comprehensive-income",
label: "Other Comprehensive Income",
category: "core",
conceptPatterns: [/othercomprehensiveincome/i],
labelPatterns: [/\bother comprehensive income\b/i],
},
],
disclosure: [],
};
function createStatementRecord<T>(
factory: () => T,
): Record<FinancialStatementKind, T> {
const record = {} as Record<FinancialStatementKind, T>;
for (const kind of FINANCIAL_STATEMENT_KINDS) {
record[kind] = factory();
}
record.disclosure = factory();
return record;
}
function statementKindLabel(kind: FinancialStatementKind) {
switch (kind) {
case "income":
return "Income Statement";
case "balance":
return "Balance Sheet";
case "cash_flow":
return "Cash Flow Statement";
case "equity":
return "Statement of Equity";
case "comprehensive_income":
return "Comprehensive Income";
case "disclosure":
return "Disclosures";
default:
return kind;
}
}
function resolveFilingDirectoryUrl(input: {
filingUrl: string | null;
cik: string;
accessionNumber: string;
}) {
const direct = input.filingUrl?.trim();
if (direct) {
const lastSlash = direct.lastIndexOf("/");
if (lastSlash > "https://".length) {
return direct.slice(0, lastSlash + 1);
}
}
const cikPath = normalizeCikForPath(input.cik);
const accessionPath = compactAccessionNumber(input.accessionNumber);
if (!cikPath || !accessionPath) {
return null;
}
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`;
}
function toAbsoluteArchiveUrl(baseUrl: string, relativePath: string | null) {
const normalized = (relativePath ?? "").trim();
if (!normalized) {
return null;
}
if (/^https?:\/\//i.test(normalized)) {
return normalized;
}
return `${baseUrl}${normalized.replace(/^\/+/, "")}`;
}
async function fetchText(url: string, fetchImpl: typeof fetch) {
const response = await fetchImpl(url, {
headers: {
"User-Agent": envUserAgent(),
Accept: "text/xml, text/html, text/plain;q=0.9, */*;q=0.8",
},
cache: "no-store",
});
if (!response.ok) {
throw new Error(`SEC request failed (${response.status})`);
}
return await response.text();
}
function xmlTextValue(block: string, tagName: string) {
const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const pattern = new RegExp(`<${escaped}>([\\s\\S]*?)<\\/${escaped}>`, "i");
const match = block.match(pattern);
if (!match) {
return "";
}
return decodeHtmlEntities(match[1] ?? "").trim();
}
function parseFilingSummaryReports(xml: string) {
const reports: StatementReportDescriptor[] = [];
const reportPattern = /<Report>([\s\S]*?)<\/Report>/gi;
for (const match of xml.matchAll(reportPattern)) {
const block = match[1] ?? "";
reports.push({
shortName: xmlTextValue(block, "ShortName"),
longName: xmlTextValue(block, "LongName"),
htmlFileName: xmlTextValue(block, "HtmlFileName") || null,
xmlFileName: xmlTextValue(block, "XmlFileName") || null,
});
}
return reports;
}
function scoreReport(
kind: FinancialStatementKind,
report: StatementReportDescriptor,
) {
const haystack = `${report.shortName} ${report.longName}`.trim();
if (!haystack) {
return 0;
}
let score = 0;
for (const pattern of STATEMENT_REPORT_PATTERNS[kind]) {
if (pattern.test(haystack)) {
score += 2;
}
}
if (/\bparenthetical\b/i.test(haystack) || /\bdetail\b/i.test(haystack)) {
score -= 1;
}
return score;
}
function chooseStatementReport(
kind: FinancialStatementKind,
reports: StatementReportDescriptor[],
) {
let best: StatementReportDescriptor | null = null;
let bestScore = 0;
for (const report of reports) {
const score = scoreReport(kind, report);
if (score > bestScore) {
best = report;
bestScore = score;
}
}
return bestScore > 0 ? best : null;
}
function sanitizeCellText(raw: string) {
return decodeHtmlEntities(
raw.replace(/<br\s*\/?>/gi, "\n").replace(/<[^>]+>/g, " "),
)
.replace(/[ \t]+/g, " ")
.replace(/\n+/g, " ")
.trim();
}
function extractConceptFromMarkup(markup: string) {
const defref = markup.match(/defref[_:-]([a-z0-9_:.:-]+)/i);
if (defref?.[1]) {
return defref[1].replace(/_/g, ":");
}
const nameAttr = markup.match(/\bname=[\"']([a-z0-9_:.:-]+)[\"']/i);
if (nameAttr?.[1]) {
return nameAttr[1];
}
return null;
}
function parseIndentDepth(attrs: string) {
const style = attrs.match(/\bstyle=[\"']([^\"']+)[\"']/i)?.[1] ?? "";
const padding = style.match(/padding-left:\s*([0-9.]+)px/i)?.[1];
if (padding) {
const numeric = Number.parseFloat(padding);
if (Number.isFinite(numeric) && numeric > 0) {
return Math.max(0, Math.round(numeric / 12));
}
}
const margin = style.match(/margin-left:\s*([0-9.]+)px/i)?.[1];
if (margin) {
const numeric = Number.parseFloat(margin);
if (Number.isFinite(numeric) && numeric > 0) {
return Math.max(0, Math.round(numeric / 12));
}
}
return 0;
}
function parseStatementNumber(raw: string): number | null {
const trimmed = raw.trim();
if (!trimmed || /^n\/a$/i.test(trimmed) || /^--+$/.test(trimmed)) {
return null;
}
if (/%$/.test(trimmed)) {
return null;
}
const negative = trimmed.startsWith("(") && trimmed.endsWith(")");
const cleaned = trimmed
.replace(/[$,\s]/g, "")
.replace(/[()]/g, "")
.replace(/\u2212/g, "-");
const value = Number.parseFloat(cleaned);
if (!Number.isFinite(value)) {
return null;
}
return negative ? -Math.abs(value) : value;
}
function slug(value: string) {
return value
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "");
}
function parseStatementRowsFromReport(content: string): StatementParseRow[] {
const tables = [...content.matchAll(/<table[^>]*>([\s\S]*?)<\/table>/gi)];
if (tables.length === 0) {
return [];
}
let bestRows: StatementParseRow[] = [];
for (const tableMatch of tables) {
const table = tableMatch[0] ?? "";
const rows: StatementParseRow[] = [];
let order = 0;
for (const rowMatch of table.matchAll(/<tr[^>]*>([\s\S]*?)<\/tr>/gi)) {
const rowMarkup = rowMatch[0] ?? "";
const cells = [
...rowMarkup.matchAll(/<t[dh]([^>]*)>([\s\S]*?)<\/t[dh]>/gi),
];
if (cells.length < 2) {
continue;
}
const labelCell = cells[0];
const labelAttrs = labelCell?.[1] ?? "";
const labelRaw = labelCell?.[2] ?? "";
const label = sanitizeCellText(labelRaw);
if (!label || /^(years ended|assets|liabilities|equity)$/i.test(label)) {
continue;
}
let value: number | null = null;
for (let i = 1; i < cells.length; i += 1) {
const text = sanitizeCellText(cells[i]?.[2] ?? "");
const parsed = parseStatementNumber(text);
if (parsed !== null) {
value = parsed;
break;
}
}
if (value === null) {
continue;
}
order += 1;
const concept = extractConceptFromMarkup(rowMarkup);
rows.push({
key: concept ? slug(concept) : `${slug(label)}-${order}`,
label,
concept,
order,
depth: parseIndentDepth(labelAttrs),
isSubtotal: /^total\b/i.test(label) || /\bsubtotal\b/i.test(label),
value,
});
}
if (rows.length > bestRows.length) {
bestRows = rows;
}
}
return bestRows;
}
function toSnapshotRows(
periodId: string,
rows: StatementParseRow[],
): FilingFaithfulStatementSnapshotRow[] {
return rows.map((row) => ({
key: row.key,
label: row.label,
concept: row.concept,
order: row.order,
depth: row.depth,
isSubtotal: row.isSubtotal,
values: {
[periodId]: row.value,
},
}));
}
function matchStandardizedDefinition(
row: FilingFaithfulStatementSnapshotRow,
definition: CanonicalRowDefinition,
) {
const concept = row.concept ?? "";
return (
definition.conceptPatterns.some((pattern) => pattern.test(concept)) ||
definition.labelPatterns.some((pattern) => pattern.test(row.label))
);
}
function fallbackMetricValue(
kind: FinancialStatementKind,
rowKey: string,
metrics: Filing["metrics"],
) {
if (!metrics) {
return null;
}
if (kind === "income" && rowKey === "revenue") {
return metrics.revenue ?? null;
}
if (kind === "income" && rowKey === "net-income") {
return metrics.netIncome ?? null;
}
if (kind === "balance" && rowKey === "total-assets") {
return metrics.totalAssets ?? null;
}
if (kind === "balance" && rowKey === "cash-and-equivalents") {
return metrics.cash ?? null;
}
if (kind === "balance" && rowKey === "total-debt") {
return metrics.debt ?? null;
}
return null;
}
function toStandardizedRows(
kind: FinancialStatementKind,
periodId: string,
rows: FilingFaithfulStatementSnapshotRow[],
metrics: Filing["metrics"],
): StandardizedStatementSnapshotRow[] {
const definitions = STANDARDIZED_ROW_DEFINITIONS[kind];
const normalizedRows = [...rows];
const usedKeys = new Set<string>();
const standardizedRows: StandardizedStatementSnapshotRow[] = [];
for (const definition of definitions) {
const matched = normalizedRows.find(
(row) =>
!usedKeys.has(row.key) && matchStandardizedDefinition(row, definition),
);
const matchedValue = matched?.values[periodId] ?? null;
const fallbackValue =
matchedValue === null
? fallbackMetricValue(kind, definition.key, metrics)
: null;
if (matched) {
usedKeys.add(matched.key);
}
standardizedRows.push({
key: definition.key,
label: definition.label,
concept: matched?.concept ?? definition.key,
category: definition.category,
sourceConcepts: matched?.concept ? [matched.concept] : [],
values: {
[periodId]: matchedValue ?? fallbackValue,
},
});
}
for (const row of normalizedRows) {
if (usedKeys.has(row.key)) {
continue;
}
standardizedRows.push({
key: `other-${row.key}`,
label: row.label,
concept: row.concept ?? row.key,
category: "other",
sourceConcepts: row.concept ? [row.concept] : [],
values: {
[periodId]: row.values[periodId] ?? null,
},
});
}
return standardizedRows;
}
function parseContextsWithDimensions(raw: string) {
const contexts = new Map<string, DimensionContext>();
const contextPattern =
/<(?:[a-z0-9]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9]+:)?context>/gi;
for (const match of raw.matchAll(contextPattern)) {
const contextId = match[1] ?? "";
const block = match[2] ?? "";
if (!contextId) {
continue;
}
const endDate =
block
.match(
/<(?:[a-z0-9]+:)?endDate>([^<]+)<\/(?:[a-z0-9]+:)?endDate>/i,
)?.[1]
?.trim() ?? null;
const dimensions: Array<{ axis: string; member: string }> = [];
const dimPattern =
/<(?:[a-z0-9]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9]+:)?explicitMember>/gi;
for (const dimMatch of block.matchAll(dimPattern)) {
const axis = (dimMatch[1] ?? "").trim();
const member = (dimMatch[2] ?? "").trim();
if (!axis || !member) {
continue;
}
dimensions.push({ axis, member });
}
if (dimensions.length === 0) {
continue;
}
contexts.set(contextId, { endDate, dimensions });
}
return contexts;
}
function statementKindFromConcept(
concept: string,
): FinancialStatementKind | null {
const normalized = concept.toLowerCase();
if (
/cash|operatingactivities|investingactivities|financingactivities/.test(
normalized,
)
) {
return "cash_flow";
}
if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) {
return "equity";
}
if (/comprehensiveincome/.test(normalized)) {
return "comprehensive_income";
}
if (/asset|liabilit|debt/.test(normalized)) {
return "balance";
}
if (/revenue|income|profit|expense|costof/.test(normalized)) {
return "income";
}
return null;
}
function parseDimensionFacts(raw: string, fallbackPeriodId: string) {
const contexts = parseContextsWithDimensions(raw);
if (contexts.size === 0) {
return createStatementRecord<DimensionStatementSnapshotRow[]>(() => []);
}
const rows = createStatementRecord<DimensionStatementSnapshotRow[]>(() => []);
const ixPattern = /<ix:nonfraction\b([^>]*)>([\s\S]*?)<\/ix:nonfraction>/gi;
let guard = 0;
for (const match of raw.matchAll(ixPattern)) {
guard += 1;
if (guard > 8_000) {
break;
}
const attrs = match[1] ?? "";
const body = sanitizeCellText(match[2] ?? "");
const contextRef = attrs.match(/\bcontextref=["']([^"']+)["']/i)?.[1] ?? "";
const concept = attrs.match(/\bname=["']([^"']+)["']/i)?.[1] ?? "";
const unit = attrs.match(/\bunitref=["']([^"']+)["']/i)?.[1] ?? null;
if (!contextRef || !concept) {
continue;
}
const context = contexts.get(contextRef);
if (!context || context.dimensions.length === 0) {
continue;
}
const kind = statementKindFromConcept(concept);
if (!kind) {
continue;
}
const value = parseStatementNumber(body);
if (value === null) {
continue;
}
const periodId = context.endDate ?? fallbackPeriodId;
const rowKey = slug(concept);
for (const dimension of context.dimensions) {
rows[kind].push({
rowKey,
concept,
periodId,
axis: dimension.axis,
member: dimension.member,
value,
unit,
});
}
}
return rows;
}
function markHasDimensions<
T extends { key: string; concept: string | null; hasDimensions?: boolean },
>(rows: T[], dimensions: DimensionStatementSnapshotRow[]) {
const dimensionConcepts = new Set(
dimensions.map((item) => item.concept?.toLowerCase() ?? "").filter(Boolean),
);
const dimensionRowKeys = new Set(dimensions.map((item) => item.rowKey));
return rows.map((row) => {
const concept = row.concept?.toLowerCase() ?? "";
const hasDimensions =
dimensionRowKeys.has(row.key) ||
(concept ? dimensionConcepts.has(concept) : false);
return {
...row,
hasDimensions,
};
});
}
function emptyStatementBundle(
period: FilingStatementSnapshotPeriod,
): FilingStatementBundle {
return {
periods: [period],
statements: createStatementRecord(() => []),
};
}
function emptyStandardizedBundle(
period: FilingStatementSnapshotPeriod,
): StandardizedStatementBundle {
return {
periods: [period],
statements: createStatementRecord(() => []),
};
}
function emptyDimensionBundle(): DimensionStatementBundle {
return {
statements: createStatementRecord(() => []),
};
}
export async function hydrateFilingStatementSnapshot(
input: FilingStatementHydrationInput,
options?: {
fetchImpl?: typeof fetch;
},
): Promise<FilingStatementHydrationResult> {
const periodId = `${input.filingDate}-${compactAccessionNumber(input.accessionNumber)}`;
const period: FilingStatementSnapshotPeriod = {
id: periodId,
filingId: input.filingId,
accessionNumber: input.accessionNumber,
filingDate: input.filingDate,
periodStart: null,
periodEnd: input.filingDate,
filingType: input.filingType,
periodLabel:
input.filingType === "10-Q" ? "Quarter End" : "Fiscal Year End",
};
const fetchImpl = options?.fetchImpl ?? fetch;
const statementBundle = emptyStatementBundle(period);
const standardizedBundle = emptyStandardizedBundle(period);
const dimensionBundle = emptyDimensionBundle();
let source: FilingStatementHydrationResult["source"] =
"companyfacts_fallback";
let parseError: string | null = null;
try {
const filingDirectory = resolveFilingDirectoryUrl({
filingUrl: input.filingUrl,
cik: input.cik,
accessionNumber: input.accessionNumber,
});
if (filingDirectory) {
const summaryXml = await fetchText(
`${filingDirectory}FilingSummary.xml`,
fetchImpl,
);
const reports = parseFilingSummaryReports(summaryXml);
for (const kind of FINANCIAL_STATEMENT_KINDS) {
const report = chooseStatementReport(kind, reports);
if (!report) {
continue;
}
const reportUrl = toAbsoluteArchiveUrl(
filingDirectory,
report.htmlFileName ?? report.xmlFileName,
);
if (!reportUrl) {
continue;
}
try {
const reportText = await fetchText(reportUrl, fetchImpl);
const parsedRows = parseStatementRowsFromReport(reportText);
if (parsedRows.length === 0) {
continue;
}
source = "sec_filing_summary";
statementBundle.statements[kind] = toSnapshotRows(
periodId,
parsedRows,
);
} catch {
// Continue to other statements when one report fails.
}
}
}
} catch (error) {
parseError =
error instanceof Error ? error.message : "Failed to parse filing summary";
}
try {
const primaryUrl = resolvePrimaryFilingUrl({
filingUrl: input.filingUrl,
cik: input.cik,
accessionNumber: input.accessionNumber,
primaryDocument: input.primaryDocument,
});
if (primaryUrl) {
const rawDocument = await fetchText(primaryUrl, fetchImpl);
const dimensions = parseDimensionFacts(rawDocument, periodId);
for (const kind of FINANCIAL_STATEMENT_KINDS) {
dimensionBundle.statements[kind] = dimensions[kind];
}
const hasAnyDimensions = FINANCIAL_STATEMENT_KINDS.some(
(kind) => dimensionBundle.statements[kind].length > 0,
);
if (hasAnyDimensions && source === "companyfacts_fallback") {
source = "xbrl_instance";
}
}
} catch (error) {
if (!parseError) {
parseError =
error instanceof Error
? error.message
: "Failed to parse inline XBRL dimensions";
}
}
for (const kind of FINANCIAL_STATEMENT_KINDS) {
const faithfulRows = statementBundle.statements[kind];
standardizedBundle.statements[kind] = toStandardizedRows(
kind,
periodId,
faithfulRows,
input.metrics,
);
statementBundle.statements[kind] = markHasDimensions(
faithfulRows,
dimensionBundle.statements[kind],
);
standardizedBundle.statements[kind] = markHasDimensions(
standardizedBundle.statements[kind],
dimensionBundle.statements[kind],
);
}
const statementCount = FINANCIAL_STATEMENT_KINDS.filter(
(kind) => statementBundle.statements[kind].length > 0,
).length;
const standardizedCount = FINANCIAL_STATEMENT_KINDS.filter(
(kind) => standardizedBundle.statements[kind].length > 0,
).length;
const parseStatus: FilingStatementHydrationResult["parse_status"] =
statementCount === FINANCIAL_STATEMENT_KINDS.length
? "ready"
: statementCount > 0 || standardizedCount > 0
? "partial"
: "failed";
return {
filing_id: input.filingId,
ticker: input.ticker.trim().toUpperCase(),
filing_date: input.filingDate,
filing_type: input.filingType,
period_end: input.filingDate,
statement_bundle: statementBundle,
standardized_bundle: standardizedBundle,
dimension_bundle: dimensionBundle,
parse_status: parseStatus,
parse_error:
parseStatus === "failed"
? (parseError ?? "No financial statement tables found")
: parseError,
source,
};
}
export const __statementInternals = {
parseFilingSummaryReports,
parseStatementRowsFromReport,
parseDimensionFacts,
statementKindLabel,
};