1504 lines
40 KiB
TypeScript
1504 lines
40 KiB
TypeScript
import type { Filing, FinancialStatementKind } from '@/lib/types';
|
||
import type {
|
||
DimensionStatementBundle,
|
||
DimensionStatementSnapshotRow,
|
||
FilingFaithfulStatementSnapshotRow,
|
||
FilingStatementBundle,
|
||
FilingStatementSnapshotPeriod,
|
||
StandardizedStatementBundle,
|
||
StandardizedStatementSnapshotRow
|
||
} from '@/lib/server/repos/filing-statements';
|
||
|
||
type FilingType = Filing['filing_type'];
|
||
type FilingMetrics = NonNullable<Filing['metrics']>;
|
||
|
||
type TickerDirectoryRecord = {
|
||
cik_str: number;
|
||
ticker: string;
|
||
title: string;
|
||
};
|
||
|
||
type RecentFilingsPayload = {
|
||
filings?: {
|
||
recent?: {
|
||
accessionNumber?: string[];
|
||
filingDate?: string[];
|
||
form?: string[];
|
||
primaryDocument?: string[];
|
||
};
|
||
};
|
||
cik?: string;
|
||
name?: string;
|
||
};
|
||
|
||
type CompanyFactsPayload = {
|
||
facts?: {
|
||
'us-gaap'?: Record<string, { units?: Record<string, CompanyFactPoint[]> }>;
|
||
};
|
||
};
|
||
|
||
type CompanyFactPoint = {
|
||
val?: number;
|
||
end?: string;
|
||
filed?: string;
|
||
accn?: string;
|
||
form?: string;
|
||
fy?: number;
|
||
fp?: string;
|
||
frame?: string;
|
||
};
|
||
|
||
type SecFiling = {
|
||
ticker: string;
|
||
cik: string;
|
||
companyName: string;
|
||
filingType: FilingType;
|
||
filingDate: string;
|
||
accessionNumber: string;
|
||
filingUrl: string | null;
|
||
submissionUrl: string | null;
|
||
primaryDocument: string | null;
|
||
};
|
||
|
||
type FilingDocumentInput = {
|
||
filingUrl: string | null;
|
||
cik: string;
|
||
accessionNumber: string;
|
||
primaryDocument: string | null;
|
||
};
|
||
|
||
type FetchPrimaryFilingTextOptions = {
|
||
fetchImpl?: typeof fetch;
|
||
maxChars?: number;
|
||
};
|
||
|
||
export type FilingDocumentText = {
|
||
source: 'primary_document';
|
||
url: string;
|
||
text: string;
|
||
truncated: boolean;
|
||
};
|
||
|
||
type FilingMetricsLookupInput = {
|
||
accessionNumber: string;
|
||
filingDate: string;
|
||
filingType: FilingType;
|
||
};
|
||
|
||
const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K'];
|
||
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 12;
|
||
const FILING_TEXT_MAX_CHARS = 24_000;
|
||
const METRIC_TAGS = {
|
||
revenue: [
|
||
'Revenues',
|
||
'SalesRevenueNet',
|
||
'RevenueFromContractWithCustomerExcludingAssessedTax',
|
||
'TotalRevenuesAndOtherIncome'
|
||
],
|
||
netIncome: ['NetIncomeLoss', 'ProfitLoss'],
|
||
totalAssets: ['Assets'],
|
||
cash: [
|
||
'CashAndCashEquivalentsAtCarryingValue',
|
||
'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
|
||
],
|
||
debt: [
|
||
'LongTermDebtAndCapitalLeaseObligations',
|
||
'LongTermDebtNoncurrent',
|
||
'LongTermDebt',
|
||
'DebtAndFinanceLeaseLiabilities'
|
||
]
|
||
} as const;
|
||
|
||
let tickerCache = new Map<string, TickerDirectoryRecord>();
|
||
let tickerCacheLoadedAt = 0;
|
||
|
||
function envUserAgent() {
|
||
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
||
}
|
||
|
||
function todayIso() {
|
||
return new Date().toISOString().slice(0, 10);
|
||
}
|
||
|
||
function decodeHtmlEntities(value: string) {
|
||
const decodeCodePoint = (code: number) => {
|
||
if (!Number.isFinite(code) || code < 0 || code > 0x10ffff) {
|
||
return ' ';
|
||
}
|
||
|
||
try {
|
||
return String.fromCodePoint(code);
|
||
} catch {
|
||
return ' ';
|
||
}
|
||
};
|
||
|
||
return value
|
||
.replace(/ | /gi, ' ')
|
||
.replace(/&/gi, '&')
|
||
.replace(/</gi, '<')
|
||
.replace(/>/gi, '>')
|
||
.replace(/"/gi, '"')
|
||
.replace(/'/gi, '\'')
|
||
.replace(/&#x([0-9a-f]+);/gi, (_match, rawCode: string) => {
|
||
const code = Number.parseInt(rawCode, 16);
|
||
return decodeCodePoint(code);
|
||
})
|
||
.replace(/&#([0-9]+);/g, (_match, rawCode: string) => {
|
||
const code = Number.parseInt(rawCode, 10);
|
||
return decodeCodePoint(code);
|
||
});
|
||
}
|
||
|
||
export function normalizeSecDocumentText(raw: string) {
|
||
return decodeHtmlEntities(
|
||
raw
|
||
.replace(/\r/g, '\n')
|
||
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
||
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
||
.replace(/<noscript[\s\S]*?<\/noscript>/gi, ' ')
|
||
.replace(/<!--[\s\S]*?-->/g, ' ')
|
||
.replace(/<\/?(p|div|section|article|li|tr|td|th|h[1-6]|br|hr)[^>]*>/gi, '\n')
|
||
.replace(/<[^>]+>/g, ' ')
|
||
)
|
||
.replace(/[ \t]+\n/g, '\n')
|
||
.replace(/\n[ \t]+/g, '\n')
|
||
.replace(/[ \t]{2,}/g, ' ')
|
||
.replace(/\n{3,}/g, '\n\n')
|
||
.trim();
|
||
}
|
||
|
||
export function trimSecDocumentTextForPrompt(text: string, maxChars = FILING_TEXT_MAX_CHARS) {
|
||
const safeMax = Math.max(Math.trunc(maxChars), 1_000);
|
||
if (text.length <= safeMax) {
|
||
return { text, truncated: false };
|
||
}
|
||
|
||
const slice = text.slice(0, safeMax);
|
||
const newlineBoundary = slice.lastIndexOf('\n');
|
||
const wordBoundary = slice.lastIndexOf(' ');
|
||
const boundary = Math.max(newlineBoundary, wordBoundary);
|
||
const clipped = (boundary > safeMax * 0.7 ? slice.slice(0, boundary) : slice).trimEnd();
|
||
|
||
return { text: clipped, truncated: true };
|
||
}
|
||
|
||
function compactAccessionNumber(value: string) {
|
||
return value.replace(/-/g, '');
|
||
}
|
||
|
||
function normalizeAccessionKey(value: string | undefined | null) {
|
||
return (value ?? '').replace(/\D/g, '');
|
||
}
|
||
|
||
function normalizeForm(value: string | undefined | null) {
|
||
const normalized = (value ?? '').trim().toUpperCase();
|
||
|
||
if (!normalized) {
|
||
return '';
|
||
}
|
||
|
||
return normalized.endsWith('/A')
|
||
? normalized.slice(0, -2)
|
||
: normalized;
|
||
}
|
||
|
||
function parseDate(value: string | undefined | null) {
|
||
if (!value) {
|
||
return Number.NaN;
|
||
}
|
||
|
||
return Date.parse(value);
|
||
}
|
||
|
||
function normalizeCikForPath(value: string) {
|
||
const digits = value.replace(/\D/g, '');
|
||
if (!digits) {
|
||
return null;
|
||
}
|
||
|
||
const numeric = Number(digits);
|
||
if (!Number.isFinite(numeric)) {
|
||
return null;
|
||
}
|
||
|
||
return String(numeric);
|
||
}
|
||
|
||
export function resolvePrimaryFilingUrl(input: FilingDocumentInput) {
|
||
const directUrl = input.filingUrl?.trim();
|
||
if (directUrl) {
|
||
return directUrl;
|
||
}
|
||
|
||
if (!input.primaryDocument) {
|
||
return null;
|
||
}
|
||
|
||
const cikPath = normalizeCikForPath(input.cik);
|
||
const accessionPath = compactAccessionNumber(input.accessionNumber);
|
||
if (!cikPath || !accessionPath) {
|
||
return null;
|
||
}
|
||
|
||
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/${input.primaryDocument}`;
|
||
}
|
||
|
||
export async function fetchPrimaryFilingText(
|
||
input: FilingDocumentInput,
|
||
options?: FetchPrimaryFilingTextOptions
|
||
): Promise<FilingDocumentText | null> {
|
||
const url = resolvePrimaryFilingUrl(input);
|
||
if (!url) {
|
||
return null;
|
||
}
|
||
|
||
const doFetch = options?.fetchImpl ?? fetch;
|
||
const response = await doFetch(url, {
|
||
headers: {
|
||
'User-Agent': envUserAgent(),
|
||
Accept: 'text/html, text/plain;q=0.9, */*;q=0.8'
|
||
},
|
||
cache: 'no-store'
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`SEC filing request failed (${response.status})`);
|
||
}
|
||
|
||
const raw = await response.text();
|
||
const normalized = normalizeSecDocumentText(raw);
|
||
if (!normalized) {
|
||
return null;
|
||
}
|
||
|
||
const clipped = trimSecDocumentTextForPrompt(normalized, options?.maxChars ?? FILING_TEXT_MAX_CHARS);
|
||
if (!clipped.text) {
|
||
return null;
|
||
}
|
||
|
||
return {
|
||
source: 'primary_document',
|
||
url,
|
||
text: clipped.text,
|
||
truncated: clipped.truncated
|
||
};
|
||
}
|
||
|
||
async function fetchJson<T>(url: string): Promise<T> {
|
||
const response = await fetch(url, {
|
||
headers: {
|
||
'User-Agent': envUserAgent(),
|
||
Accept: 'application/json'
|
||
},
|
||
cache: 'no-store'
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`SEC request failed (${response.status})`);
|
||
}
|
||
|
||
return await response.json() as T;
|
||
}
|
||
|
||
async function ensureTickerCache() {
|
||
const isFresh = Date.now() - tickerCacheLoadedAt < TICKER_CACHE_TTL_MS;
|
||
if (isFresh && tickerCache.size > 0) {
|
||
return;
|
||
}
|
||
|
||
const payload = await fetchJson<Record<string, TickerDirectoryRecord>>('https://www.sec.gov/files/company_tickers.json');
|
||
const next = new Map<string, TickerDirectoryRecord>();
|
||
|
||
for (const record of Object.values(payload)) {
|
||
next.set(record.ticker.toUpperCase(), record);
|
||
}
|
||
|
||
tickerCache = next;
|
||
tickerCacheLoadedAt = Date.now();
|
||
}
|
||
|
||
async function resolveTicker(ticker: string) {
|
||
await ensureTickerCache();
|
||
|
||
const normalized = ticker.trim().toUpperCase();
|
||
const record = tickerCache.get(normalized);
|
||
|
||
if (!record) {
|
||
throw new Error(`Ticker ${normalized} not found in SEC directory`);
|
||
}
|
||
|
||
return {
|
||
ticker: normalized,
|
||
cik: String(record.cik_str),
|
||
companyName: record.title
|
||
};
|
||
}
|
||
|
||
function pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null {
|
||
return pickFactForFiling(payload, tag, {
|
||
accessionNumber: '',
|
||
filingDate: '',
|
||
filingType: '10-Q'
|
||
});
|
||
}
|
||
|
||
function collectFactSeries(payload: CompanyFactsPayload, tag: string): CompanyFactPoint[] {
|
||
const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units;
|
||
if (!unitCollections) {
|
||
return [];
|
||
}
|
||
|
||
const usdSeries: CompanyFactPoint[] = [];
|
||
const fallbackSeries: CompanyFactPoint[] = [];
|
||
|
||
for (const [unit, series] of Object.entries(unitCollections)) {
|
||
if (!Array.isArray(series) || series.length === 0) {
|
||
continue;
|
||
}
|
||
|
||
if (unit === 'USD' || /^USD(?!\/shares)/i.test(unit)) {
|
||
usdSeries.push(...series);
|
||
continue;
|
||
}
|
||
|
||
fallbackSeries.push(...series);
|
||
}
|
||
|
||
const points = usdSeries.length > 0 ? usdSeries : fallbackSeries;
|
||
|
||
return points.filter((point) => typeof point.val === 'number' && Number.isFinite(point.val));
|
||
}
|
||
|
||
function pickMostRecentFact(points: CompanyFactPoint[]) {
|
||
return [...points].sort((a, b) => {
|
||
const aDate = parseDate(a.filed ?? a.end);
|
||
const bDate = parseDate(b.filed ?? b.end);
|
||
|
||
if (Number.isFinite(aDate) && Number.isFinite(bDate)) {
|
||
return bDate - aDate;
|
||
}
|
||
|
||
if (Number.isFinite(bDate)) {
|
||
return 1;
|
||
}
|
||
|
||
if (Number.isFinite(aDate)) {
|
||
return -1;
|
||
}
|
||
|
||
return 0;
|
||
})[0] ?? null;
|
||
}
|
||
|
||
function pickClosestByDate(points: CompanyFactPoint[], targetDate: number) {
|
||
if (points.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
if (!Number.isFinite(targetDate)) {
|
||
return pickMostRecentFact(points);
|
||
}
|
||
|
||
const dated = points
|
||
.map((point) => ({ point, date: parseDate(point.filed ?? point.end) }))
|
||
.filter((entry) => Number.isFinite(entry.date));
|
||
|
||
if (dated.length === 0) {
|
||
return pickMostRecentFact(points);
|
||
}
|
||
|
||
const beforeTarget = dated.filter((entry) => entry.date <= targetDate);
|
||
if (beforeTarget.length > 0) {
|
||
return beforeTarget.sort((a, b) => b.date - a.date)[0]?.point ?? null;
|
||
}
|
||
|
||
return dated.sort((a, b) => {
|
||
const distance = Math.abs(a.date - targetDate) - Math.abs(b.date - targetDate);
|
||
if (distance !== 0) {
|
||
return distance;
|
||
}
|
||
|
||
return b.date - a.date;
|
||
})[0]?.point ?? null;
|
||
}
|
||
|
||
function pickFactForFiling(
|
||
payload: CompanyFactsPayload,
|
||
tag: string,
|
||
filing: FilingMetricsLookupInput
|
||
): number | null {
|
||
const points = collectFactSeries(payload, tag);
|
||
if (points.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
const accessionKey = normalizeAccessionKey(filing.accessionNumber);
|
||
if (accessionKey) {
|
||
const byAccession = points.filter((point) => normalizeAccessionKey(point.accn) === accessionKey);
|
||
if (byAccession.length > 0) {
|
||
const matched = pickMostRecentFact(byAccession);
|
||
if (typeof matched?.val === 'number' && Number.isFinite(matched.val)) {
|
||
return matched.val;
|
||
}
|
||
}
|
||
}
|
||
|
||
const filingForm = normalizeForm(filing.filingType);
|
||
const byForm = filingForm
|
||
? points.filter((point) => normalizeForm(point.form) === filingForm)
|
||
: points;
|
||
|
||
const targetDate = parseDate(filing.filingDate);
|
||
const bestByForm = pickClosestByDate(byForm, targetDate);
|
||
if (typeof bestByForm?.val === 'number' && Number.isFinite(bestByForm.val)) {
|
||
return bestByForm.val;
|
||
}
|
||
|
||
const bestAny = pickClosestByDate(points, targetDate);
|
||
return typeof bestAny?.val === 'number' && Number.isFinite(bestAny.val)
|
||
? bestAny.val
|
||
: null;
|
||
}
|
||
|
||
function pickFactByTags(
|
||
payload: CompanyFactsPayload,
|
||
tags: readonly string[],
|
||
filing: FilingMetricsLookupInput
|
||
) {
|
||
for (const tag of tags) {
|
||
const value = pickFactForFiling(payload, tag, filing);
|
||
if (value !== null) {
|
||
return value;
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
function emptyMetrics(): FilingMetrics {
|
||
return {
|
||
revenue: null,
|
||
netIncome: null,
|
||
totalAssets: null,
|
||
cash: null,
|
||
debt: null
|
||
};
|
||
}
|
||
|
||
export async function fetchRecentFilings(ticker: string, limit = 20): Promise<SecFiling[]> {
|
||
const safeLimit = Math.min(Math.max(Math.trunc(limit), 1), 50);
|
||
|
||
const company = await resolveTicker(ticker);
|
||
const cikPadded = company.cik.padStart(10, '0');
|
||
const payload = await fetchJson<RecentFilingsPayload>(`https://data.sec.gov/submissions/CIK${cikPadded}.json`);
|
||
const recent = payload.filings?.recent;
|
||
const submissionUrl = `https://data.sec.gov/submissions/CIK${cikPadded}.json`;
|
||
|
||
if (!recent) {
|
||
return [];
|
||
}
|
||
|
||
const forms = recent.form ?? [];
|
||
const accessionNumbers = recent.accessionNumber ?? [];
|
||
const filingDates = recent.filingDate ?? [];
|
||
const primaryDocuments = recent.primaryDocument ?? [];
|
||
const filings: SecFiling[] = [];
|
||
|
||
for (let i = 0; i < forms.length; i += 1) {
|
||
const normalizedForm = normalizeForm(forms[i]) as FilingType;
|
||
if (!SUPPORTED_FORMS.includes(normalizedForm)) {
|
||
continue;
|
||
}
|
||
|
||
const accessionNumber = accessionNumbers[i];
|
||
if (!accessionNumber) {
|
||
continue;
|
||
}
|
||
|
||
const compactAccession = accessionNumber.replace(/-/g, '');
|
||
const documentName = primaryDocuments[i];
|
||
const filingUrl = documentName
|
||
? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}`
|
||
: null;
|
||
|
||
filings.push({
|
||
ticker: company.ticker,
|
||
cik: company.cik,
|
||
companyName: payload.name ?? company.companyName,
|
||
filingType: normalizedForm,
|
||
filingDate: filingDates[i] ?? todayIso(),
|
||
accessionNumber,
|
||
filingUrl,
|
||
submissionUrl,
|
||
primaryDocument: documentName ?? null
|
||
});
|
||
|
||
if (filings.length >= safeLimit) {
|
||
break;
|
||
}
|
||
}
|
||
|
||
return filings;
|
||
}
|
||
|
||
export async function fetchLatestFilingMetrics(cik: string) {
|
||
const normalized = cik.padStart(10, '0');
|
||
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
|
||
|
||
return {
|
||
revenue: pickLatestFact(payload, 'Revenues'),
|
||
netIncome: pickLatestFact(payload, 'NetIncomeLoss'),
|
||
totalAssets: pickLatestFact(payload, 'Assets'),
|
||
cash: pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'),
|
||
debt: pickLatestFact(payload, 'LongTermDebt')
|
||
} satisfies FilingMetrics;
|
||
}
|
||
|
||
export async function fetchFilingMetricsForFilings(
|
||
cik: string,
|
||
_ticker: string,
|
||
filings: FilingMetricsLookupInput[]
|
||
) {
|
||
const metricsByAccession = new Map<string, FilingMetrics>();
|
||
if (filings.length === 0) {
|
||
return metricsByAccession;
|
||
}
|
||
|
||
try {
|
||
const normalized = cik.padStart(10, '0');
|
||
const payload = await fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
|
||
|
||
for (const filing of filings) {
|
||
metricsByAccession.set(filing.accessionNumber, {
|
||
revenue: pickFactByTags(payload, METRIC_TAGS.revenue, filing),
|
||
netIncome: pickFactByTags(payload, METRIC_TAGS.netIncome, filing),
|
||
totalAssets: pickFactByTags(payload, METRIC_TAGS.totalAssets, filing),
|
||
cash: pickFactByTags(payload, METRIC_TAGS.cash, filing),
|
||
debt: pickFactByTags(payload, METRIC_TAGS.debt, filing)
|
||
});
|
||
}
|
||
|
||
return metricsByAccession;
|
||
} catch {
|
||
for (const filing of filings) {
|
||
metricsByAccession.set(filing.accessionNumber, emptyMetrics());
|
||
}
|
||
|
||
return metricsByAccession;
|
||
}
|
||
}
|
||
|
||
type FilingStatementHydrationInput = {
|
||
filingId: number;
|
||
ticker: string;
|
||
cik: string;
|
||
accessionNumber: string;
|
||
filingDate: string;
|
||
filingType: '10-K' | '10-Q';
|
||
filingUrl: string | null;
|
||
primaryDocument: string | null;
|
||
metrics: Filing['metrics'];
|
||
};
|
||
|
||
type FilingStatementHydrationResult = {
|
||
filing_id: number;
|
||
ticker: string;
|
||
filing_date: string;
|
||
filing_type: '10-K' | '10-Q';
|
||
period_end: string | null;
|
||
statement_bundle: FilingStatementBundle | null;
|
||
standardized_bundle: StandardizedStatementBundle | null;
|
||
dimension_bundle: DimensionStatementBundle | null;
|
||
parse_status: 'ready' | 'partial' | 'failed';
|
||
parse_error: string | null;
|
||
source: 'sec_filing_summary' | 'xbrl_instance' | 'companyfacts_fallback';
|
||
};
|
||
|
||
type StatementReportDescriptor = {
|
||
shortName: string;
|
||
longName: string;
|
||
htmlFileName: string | null;
|
||
xmlFileName: string | null;
|
||
};
|
||
|
||
type StatementParseRow = {
|
||
key: string;
|
||
label: string;
|
||
concept: string | null;
|
||
order: number;
|
||
depth: number;
|
||
isSubtotal: boolean;
|
||
value: number | null;
|
||
};
|
||
|
||
type DimensionContext = {
|
||
endDate: string | null;
|
||
dimensions: Array<{ axis: string; member: string }>;
|
||
};
|
||
|
||
type CanonicalRowDefinition = {
|
||
key: string;
|
||
label: string;
|
||
category: string;
|
||
conceptPatterns: RegExp[];
|
||
labelPatterns: RegExp[];
|
||
};
|
||
|
||
const FINANCIAL_STATEMENT_KINDS: FinancialStatementKind[] = [
|
||
'income',
|
||
'balance',
|
||
'cash_flow',
|
||
'equity',
|
||
'comprehensive_income'
|
||
];
|
||
|
||
const STATEMENT_REPORT_PATTERNS: Record<FinancialStatementKind, RegExp[]> = {
|
||
income: [
|
||
/\bstatements?\s+of\s+operations?\b/i,
|
||
/\bstatements?\s+of\s+income\b/i,
|
||
/\bincome\s+statement/i
|
||
],
|
||
balance: [
|
||
/\bbalance\s+sheets?\b/i,
|
||
/\bstatement\s+of\s+financial\s+position\b/i
|
||
],
|
||
cash_flow: [
|
||
/\bstatements?\s+of\s+cash\s+flows?\b/i,
|
||
/\bcash\s+flows?\b/i
|
||
],
|
||
equity: [
|
||
/\bstatements?\s+of\s+(stockholders|shareholders)['’]?\s+equity\b/i,
|
||
/\bchanges\s+in\s+equity\b/i
|
||
],
|
||
comprehensive_income: [
|
||
/\bstatements?\s+of\s+comprehensive\s+income\b/i,
|
||
/\bcomprehensive\s+income\b/i
|
||
]
|
||
};
|
||
|
||
const STANDARDIZED_ROW_DEFINITIONS: Record<FinancialStatementKind, CanonicalRowDefinition[]> = {
|
||
income: [
|
||
{
|
||
key: 'revenue',
|
||
label: 'Revenue',
|
||
category: 'core',
|
||
conceptPatterns: [/revenue/i, /salesrevenuenet/i],
|
||
labelPatterns: [/\brevenue\b/i, /\bsales\b/i]
|
||
},
|
||
{
|
||
key: 'cost-of-revenue',
|
||
label: 'Cost of Revenue',
|
||
category: 'core',
|
||
conceptPatterns: [/costofrevenue/i, /costofgoods/i],
|
||
labelPatterns: [/\bcost of revenue\b/i, /\bcost of sales\b/i]
|
||
},
|
||
{
|
||
key: 'gross-profit',
|
||
label: 'Gross Profit',
|
||
category: 'core',
|
||
conceptPatterns: [/grossprofit/i],
|
||
labelPatterns: [/\bgross profit\b/i]
|
||
},
|
||
{
|
||
key: 'operating-income',
|
||
label: 'Operating Income',
|
||
category: 'core',
|
||
conceptPatterns: [/operatingincome/i, /incomefromoperations/i],
|
||
labelPatterns: [/\boperating income\b/i, /\bincome from operations\b/i]
|
||
},
|
||
{
|
||
key: 'net-income',
|
||
label: 'Net Income',
|
||
category: 'core',
|
||
conceptPatterns: [/netincomeloss/i, /profitloss/i],
|
||
labelPatterns: [/\bnet income\b/i, /\bnet earnings\b/i]
|
||
}
|
||
],
|
||
balance: [
|
||
{
|
||
key: 'total-assets',
|
||
label: 'Total Assets',
|
||
category: 'core',
|
||
conceptPatterns: [/^assets$/i],
|
||
labelPatterns: [/\btotal assets\b/i]
|
||
},
|
||
{
|
||
key: 'total-liabilities',
|
||
label: 'Total Liabilities',
|
||
category: 'core',
|
||
conceptPatterns: [/liabilities/i],
|
||
labelPatterns: [/\btotal liabilities\b/i]
|
||
},
|
||
{
|
||
key: 'stockholders-equity',
|
||
label: 'Stockholders Equity',
|
||
category: 'core',
|
||
conceptPatterns: [/stockholdersequity/i, /shareholdersequity/i, /equity/i],
|
||
labelPatterns: [/\bequity\b/i]
|
||
},
|
||
{
|
||
key: 'cash-and-equivalents',
|
||
label: 'Cash and Equivalents',
|
||
category: 'liquidity',
|
||
conceptPatterns: [/cashandcashequivalents/i, /cashandequivalents/i],
|
||
labelPatterns: [/\bcash\b/i, /\bcash equivalents\b/i]
|
||
},
|
||
{
|
||
key: 'total-debt',
|
||
label: 'Total Debt',
|
||
category: 'leverage',
|
||
conceptPatterns: [/longtermdebt/i, /debt/i, /borrowings/i],
|
||
labelPatterns: [/\btotal debt\b/i, /\blong-term debt\b/i, /\bdebt\b/i]
|
||
}
|
||
],
|
||
cash_flow: [
|
||
{
|
||
key: 'net-cash-operating',
|
||
label: 'Net Cash from Operating Activities',
|
||
category: 'core',
|
||
conceptPatterns: [/netcashprovidedbyusedinoperatingactivities/i, /netcashfromoperatingactivities/i],
|
||
labelPatterns: [/\boperating activities\b/i]
|
||
},
|
||
{
|
||
key: 'net-cash-investing',
|
||
label: 'Net Cash from Investing Activities',
|
||
category: 'core',
|
||
conceptPatterns: [/netcashprovidedbyusedininvestingactivities/i],
|
||
labelPatterns: [/\binvesting activities\b/i]
|
||
},
|
||
{
|
||
key: 'net-cash-financing',
|
||
label: 'Net Cash from Financing Activities',
|
||
category: 'core',
|
||
conceptPatterns: [/netcashprovidedbyusedinfinancingactivities/i],
|
||
labelPatterns: [/\bfinancing activities\b/i]
|
||
},
|
||
{
|
||
key: 'net-change-cash',
|
||
label: 'Net Change in Cash',
|
||
category: 'core',
|
||
conceptPatterns: [/cashandcashequivalentsperiodincrease/i, /increase.*cash/i],
|
||
labelPatterns: [/\bnet change\b/i, /\bincrease.*cash\b/i]
|
||
}
|
||
],
|
||
equity: [
|
||
{
|
||
key: 'equity-balance',
|
||
label: 'Total Equity',
|
||
category: 'core',
|
||
conceptPatterns: [/stockholdersequity/i, /shareholdersequity/i, /equity/i],
|
||
labelPatterns: [/\btotal equity\b/i, /\bequity\b/i]
|
||
},
|
||
{
|
||
key: 'retained-earnings',
|
||
label: 'Retained Earnings',
|
||
category: 'core',
|
||
conceptPatterns: [/retainedearnings/i],
|
||
labelPatterns: [/\bretained earnings\b/i]
|
||
}
|
||
],
|
||
comprehensive_income: [
|
||
{
|
||
key: 'comprehensive-income',
|
||
label: 'Comprehensive Income',
|
||
category: 'core',
|
||
conceptPatterns: [/comprehensiveincome/i],
|
||
labelPatterns: [/\bcomprehensive income\b/i]
|
||
},
|
||
{
|
||
key: 'other-comprehensive-income',
|
||
label: 'Other Comprehensive Income',
|
||
category: 'core',
|
||
conceptPatterns: [/othercomprehensiveincome/i],
|
||
labelPatterns: [/\bother comprehensive income\b/i]
|
||
}
|
||
]
|
||
};
|
||
|
||
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
||
return FINANCIAL_STATEMENT_KINDS.reduce((acc, kind) => {
|
||
acc[kind] = factory();
|
||
return acc;
|
||
}, {} as Record<FinancialStatementKind, T>);
|
||
}
|
||
|
||
function statementKindLabel(kind: FinancialStatementKind) {
|
||
switch (kind) {
|
||
case 'income':
|
||
return 'Income Statement';
|
||
case 'balance':
|
||
return 'Balance Sheet';
|
||
case 'cash_flow':
|
||
return 'Cash Flow Statement';
|
||
case 'equity':
|
||
return 'Statement of Equity';
|
||
case 'comprehensive_income':
|
||
return 'Comprehensive Income';
|
||
default:
|
||
return kind;
|
||
}
|
||
}
|
||
|
||
function resolveFilingDirectoryUrl(input: {
|
||
filingUrl: string | null;
|
||
cik: string;
|
||
accessionNumber: string;
|
||
}) {
|
||
const direct = input.filingUrl?.trim();
|
||
if (direct) {
|
||
const lastSlash = direct.lastIndexOf('/');
|
||
if (lastSlash > 'https://'.length) {
|
||
return direct.slice(0, lastSlash + 1);
|
||
}
|
||
}
|
||
|
||
const cikPath = normalizeCikForPath(input.cik);
|
||
const accessionPath = compactAccessionNumber(input.accessionNumber);
|
||
if (!cikPath || !accessionPath) {
|
||
return null;
|
||
}
|
||
|
||
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`;
|
||
}
|
||
|
||
function toAbsoluteArchiveUrl(baseUrl: string, relativePath: string | null) {
|
||
const normalized = (relativePath ?? '').trim();
|
||
if (!normalized) {
|
||
return null;
|
||
}
|
||
|
||
if (/^https?:\/\//i.test(normalized)) {
|
||
return normalized;
|
||
}
|
||
|
||
return `${baseUrl}${normalized.replace(/^\/+/, '')}`;
|
||
}
|
||
|
||
async function fetchText(url: string, fetchImpl: typeof fetch) {
|
||
const response = await fetchImpl(url, {
|
||
headers: {
|
||
'User-Agent': envUserAgent(),
|
||
Accept: 'text/xml, text/html, text/plain;q=0.9, */*;q=0.8'
|
||
},
|
||
cache: 'no-store'
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`SEC request failed (${response.status})`);
|
||
}
|
||
|
||
return await response.text();
|
||
}
|
||
|
||
function xmlTextValue(block: string, tagName: string) {
|
||
const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||
const pattern = new RegExp(`<${escaped}>([\\s\\S]*?)<\\/${escaped}>`, 'i');
|
||
const match = block.match(pattern);
|
||
if (!match) {
|
||
return '';
|
||
}
|
||
|
||
return decodeHtmlEntities(match[1] ?? '').trim();
|
||
}
|
||
|
||
function parseFilingSummaryReports(xml: string) {
|
||
const reports: StatementReportDescriptor[] = [];
|
||
const reportPattern = /<Report>([\s\S]*?)<\/Report>/gi;
|
||
|
||
for (const match of xml.matchAll(reportPattern)) {
|
||
const block = match[1] ?? '';
|
||
reports.push({
|
||
shortName: xmlTextValue(block, 'ShortName'),
|
||
longName: xmlTextValue(block, 'LongName'),
|
||
htmlFileName: xmlTextValue(block, 'HtmlFileName') || null,
|
||
xmlFileName: xmlTextValue(block, 'XmlFileName') || null
|
||
});
|
||
}
|
||
|
||
return reports;
|
||
}
|
||
|
||
function scoreReport(kind: FinancialStatementKind, report: StatementReportDescriptor) {
|
||
const haystack = `${report.shortName} ${report.longName}`.trim();
|
||
if (!haystack) {
|
||
return 0;
|
||
}
|
||
|
||
let score = 0;
|
||
for (const pattern of STATEMENT_REPORT_PATTERNS[kind]) {
|
||
if (pattern.test(haystack)) {
|
||
score += 2;
|
||
}
|
||
}
|
||
|
||
if (/\bparenthetical\b/i.test(haystack) || /\bdetail\b/i.test(haystack)) {
|
||
score -= 1;
|
||
}
|
||
|
||
return score;
|
||
}
|
||
|
||
function chooseStatementReport(kind: FinancialStatementKind, reports: StatementReportDescriptor[]) {
|
||
let best: StatementReportDescriptor | null = null;
|
||
let bestScore = 0;
|
||
|
||
for (const report of reports) {
|
||
const score = scoreReport(kind, report);
|
||
if (score > bestScore) {
|
||
best = report;
|
||
bestScore = score;
|
||
}
|
||
}
|
||
|
||
return bestScore > 0 ? best : null;
|
||
}
|
||
|
||
function sanitizeCellText(raw: string) {
|
||
return decodeHtmlEntities(
|
||
raw
|
||
.replace(/<br\s*\/?>/gi, '\n')
|
||
.replace(/<[^>]+>/g, ' ')
|
||
)
|
||
.replace(/[ \t]+/g, ' ')
|
||
.replace(/\n+/g, ' ')
|
||
.trim();
|
||
}
|
||
|
||
function extractConceptFromMarkup(markup: string) {
|
||
const defref = markup.match(/defref[_:-]([a-z0-9_:.:-]+)/i);
|
||
if (defref?.[1]) {
|
||
return defref[1].replace(/_/g, ':');
|
||
}
|
||
|
||
const nameAttr = markup.match(/\bname=[\"']([a-z0-9_:.:-]+)[\"']/i);
|
||
if (nameAttr?.[1]) {
|
||
return nameAttr[1];
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
function parseIndentDepth(attrs: string) {
|
||
const style = attrs.match(/\bstyle=[\"']([^\"']+)[\"']/i)?.[1] ?? '';
|
||
const padding = style.match(/padding-left:\s*([0-9.]+)px/i)?.[1];
|
||
if (padding) {
|
||
const numeric = Number.parseFloat(padding);
|
||
if (Number.isFinite(numeric) && numeric > 0) {
|
||
return Math.max(0, Math.round(numeric / 12));
|
||
}
|
||
}
|
||
|
||
const margin = style.match(/margin-left:\s*([0-9.]+)px/i)?.[1];
|
||
if (margin) {
|
||
const numeric = Number.parseFloat(margin);
|
||
if (Number.isFinite(numeric) && numeric > 0) {
|
||
return Math.max(0, Math.round(numeric / 12));
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
function parseStatementNumber(raw: string): number | null {
|
||
const trimmed = raw.trim();
|
||
if (!trimmed || /^n\/a$/i.test(trimmed) || /^--+$/.test(trimmed)) {
|
||
return null;
|
||
}
|
||
|
||
if (/%$/.test(trimmed)) {
|
||
return null;
|
||
}
|
||
|
||
const negative = trimmed.startsWith('(') && trimmed.endsWith(')');
|
||
const cleaned = trimmed
|
||
.replace(/[$,\s]/g, '')
|
||
.replace(/[()]/g, '')
|
||
.replace(/\u2212/g, '-');
|
||
|
||
const value = Number.parseFloat(cleaned);
|
||
if (!Number.isFinite(value)) {
|
||
return null;
|
||
}
|
||
|
||
return negative ? -Math.abs(value) : value;
|
||
}
|
||
|
||
function slug(value: string) {
|
||
return value
|
||
.toLowerCase()
|
||
.replace(/[^a-z0-9]+/g, '-')
|
||
.replace(/^-+|-+$/g, '');
|
||
}
|
||
|
||
function parseStatementRowsFromReport(content: string): StatementParseRow[] {
|
||
const tables = [...content.matchAll(/<table[^>]*>([\s\S]*?)<\/table>/gi)];
|
||
if (tables.length === 0) {
|
||
return [];
|
||
}
|
||
|
||
let bestRows: StatementParseRow[] = [];
|
||
|
||
for (const tableMatch of tables) {
|
||
const table = tableMatch[0] ?? '';
|
||
const rows: StatementParseRow[] = [];
|
||
let order = 0;
|
||
|
||
for (const rowMatch of table.matchAll(/<tr[^>]*>([\s\S]*?)<\/tr>/gi)) {
|
||
const rowMarkup = rowMatch[0] ?? '';
|
||
const cells = [...rowMarkup.matchAll(/<t[dh]([^>]*)>([\s\S]*?)<\/t[dh]>/gi)];
|
||
if (cells.length < 2) {
|
||
continue;
|
||
}
|
||
|
||
const labelCell = cells[0];
|
||
const labelAttrs = labelCell?.[1] ?? '';
|
||
const labelRaw = labelCell?.[2] ?? '';
|
||
const label = sanitizeCellText(labelRaw);
|
||
if (!label || /^(years ended|assets|liabilities|equity)$/i.test(label)) {
|
||
continue;
|
||
}
|
||
|
||
let value: number | null = null;
|
||
for (let i = 1; i < cells.length; i += 1) {
|
||
const text = sanitizeCellText(cells[i]?.[2] ?? '');
|
||
const parsed = parseStatementNumber(text);
|
||
if (parsed !== null) {
|
||
value = parsed;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (value === null) {
|
||
continue;
|
||
}
|
||
|
||
order += 1;
|
||
|
||
const concept = extractConceptFromMarkup(rowMarkup);
|
||
rows.push({
|
||
key: concept ? slug(concept) : `${slug(label)}-${order}`,
|
||
label,
|
||
concept,
|
||
order,
|
||
depth: parseIndentDepth(labelAttrs),
|
||
isSubtotal: /^total\b/i.test(label) || /\bsubtotal\b/i.test(label),
|
||
value
|
||
});
|
||
}
|
||
|
||
if (rows.length > bestRows.length) {
|
||
bestRows = rows;
|
||
}
|
||
}
|
||
|
||
return bestRows;
|
||
}
|
||
|
||
function toSnapshotRows(periodId: string, rows: StatementParseRow[]): FilingFaithfulStatementSnapshotRow[] {
|
||
return rows.map((row) => ({
|
||
key: row.key,
|
||
label: row.label,
|
||
concept: row.concept,
|
||
order: row.order,
|
||
depth: row.depth,
|
||
isSubtotal: row.isSubtotal,
|
||
values: {
|
||
[periodId]: row.value
|
||
}
|
||
}));
|
||
}
|
||
|
||
function matchStandardizedDefinition(
|
||
row: FilingFaithfulStatementSnapshotRow,
|
||
definition: CanonicalRowDefinition
|
||
) {
|
||
const concept = row.concept ?? '';
|
||
return definition.conceptPatterns.some((pattern) => pattern.test(concept))
|
||
|| definition.labelPatterns.some((pattern) => pattern.test(row.label));
|
||
}
|
||
|
||
function fallbackMetricValue(
|
||
kind: FinancialStatementKind,
|
||
rowKey: string,
|
||
metrics: Filing['metrics']
|
||
) {
|
||
if (!metrics) {
|
||
return null;
|
||
}
|
||
|
||
if (kind === 'income' && rowKey === 'revenue') {
|
||
return metrics.revenue ?? null;
|
||
}
|
||
|
||
if (kind === 'income' && rowKey === 'net-income') {
|
||
return metrics.netIncome ?? null;
|
||
}
|
||
|
||
if (kind === 'balance' && rowKey === 'total-assets') {
|
||
return metrics.totalAssets ?? null;
|
||
}
|
||
|
||
if (kind === 'balance' && rowKey === 'cash-and-equivalents') {
|
||
return metrics.cash ?? null;
|
||
}
|
||
|
||
if (kind === 'balance' && rowKey === 'total-debt') {
|
||
return metrics.debt ?? null;
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
function toStandardizedRows(
|
||
kind: FinancialStatementKind,
|
||
periodId: string,
|
||
rows: FilingFaithfulStatementSnapshotRow[],
|
||
metrics: Filing['metrics']
|
||
): StandardizedStatementSnapshotRow[] {
|
||
const definitions = STANDARDIZED_ROW_DEFINITIONS[kind];
|
||
const normalizedRows = [...rows];
|
||
const usedKeys = new Set<string>();
|
||
const standardizedRows: StandardizedStatementSnapshotRow[] = [];
|
||
|
||
for (const definition of definitions) {
|
||
const matched = normalizedRows.find((row) => !usedKeys.has(row.key) && matchStandardizedDefinition(row, definition));
|
||
const matchedValue = matched?.values[periodId] ?? null;
|
||
const fallbackValue = matchedValue === null
|
||
? fallbackMetricValue(kind, definition.key, metrics)
|
||
: null;
|
||
|
||
if (matched) {
|
||
usedKeys.add(matched.key);
|
||
}
|
||
|
||
standardizedRows.push({
|
||
key: definition.key,
|
||
label: definition.label,
|
||
concept: matched?.concept ?? definition.key,
|
||
category: definition.category,
|
||
sourceConcepts: matched?.concept ? [matched.concept] : [],
|
||
values: {
|
||
[periodId]: matchedValue ?? fallbackValue
|
||
}
|
||
});
|
||
}
|
||
|
||
for (const row of normalizedRows) {
|
||
if (usedKeys.has(row.key)) {
|
||
continue;
|
||
}
|
||
|
||
standardizedRows.push({
|
||
key: `other-${row.key}`,
|
||
label: row.label,
|
||
concept: row.concept ?? row.key,
|
||
category: 'other',
|
||
sourceConcepts: row.concept ? [row.concept] : [],
|
||
values: {
|
||
[periodId]: row.values[periodId] ?? null
|
||
}
|
||
});
|
||
}
|
||
|
||
return standardizedRows;
|
||
}
|
||
|
||
function parseContextsWithDimensions(raw: string) {
|
||
const contexts = new Map<string, DimensionContext>();
|
||
const contextPattern = /<(?:[a-z0-9]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9]+:)?context>/gi;
|
||
|
||
for (const match of raw.matchAll(contextPattern)) {
|
||
const contextId = match[1] ?? '';
|
||
const block = match[2] ?? '';
|
||
if (!contextId) {
|
||
continue;
|
||
}
|
||
|
||
const endDate = block.match(/<(?:[a-z0-9]+:)?endDate>([^<]+)<\/(?:[a-z0-9]+:)?endDate>/i)?.[1]?.trim() ?? null;
|
||
const dimensions: Array<{ axis: string; member: string }> = [];
|
||
|
||
const dimPattern = /<(?:[a-z0-9]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9]+:)?explicitMember>/gi;
|
||
for (const dimMatch of block.matchAll(dimPattern)) {
|
||
const axis = (dimMatch[1] ?? '').trim();
|
||
const member = (dimMatch[2] ?? '').trim();
|
||
if (!axis || !member) {
|
||
continue;
|
||
}
|
||
|
||
dimensions.push({ axis, member });
|
||
}
|
||
|
||
if (dimensions.length === 0) {
|
||
continue;
|
||
}
|
||
|
||
contexts.set(contextId, { endDate, dimensions });
|
||
}
|
||
|
||
return contexts;
|
||
}
|
||
|
||
function statementKindFromConcept(concept: string): FinancialStatementKind | null {
|
||
const normalized = concept.toLowerCase();
|
||
|
||
if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) {
|
||
return 'cash_flow';
|
||
}
|
||
|
||
if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) {
|
||
return 'equity';
|
||
}
|
||
|
||
if (/comprehensiveincome/.test(normalized)) {
|
||
return 'comprehensive_income';
|
||
}
|
||
|
||
if (/asset|liabilit|debt/.test(normalized)) {
|
||
return 'balance';
|
||
}
|
||
|
||
if (/revenue|income|profit|expense|costof/.test(normalized)) {
|
||
return 'income';
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
function parseDimensionFacts(
|
||
raw: string,
|
||
fallbackPeriodId: string
|
||
) {
|
||
const contexts = parseContextsWithDimensions(raw);
|
||
if (contexts.size === 0) {
|
||
return createStatementRecord<DimensionStatementSnapshotRow[]>(() => []);
|
||
}
|
||
|
||
const rows = createStatementRecord<DimensionStatementSnapshotRow[]>(() => []);
|
||
|
||
const ixPattern = /<ix:nonfraction\b([^>]*)>([\s\S]*?)<\/ix:nonfraction>/gi;
|
||
let guard = 0;
|
||
|
||
for (const match of raw.matchAll(ixPattern)) {
|
||
guard += 1;
|
||
if (guard > 8_000) {
|
||
break;
|
||
}
|
||
|
||
const attrs = match[1] ?? '';
|
||
const body = sanitizeCellText(match[2] ?? '');
|
||
|
||
const contextRef = attrs.match(/\bcontextref=["']([^"']+)["']/i)?.[1] ?? '';
|
||
const concept = attrs.match(/\bname=["']([^"']+)["']/i)?.[1] ?? '';
|
||
const unit = attrs.match(/\bunitref=["']([^"']+)["']/i)?.[1] ?? null;
|
||
|
||
if (!contextRef || !concept) {
|
||
continue;
|
||
}
|
||
|
||
const context = contexts.get(contextRef);
|
||
if (!context || context.dimensions.length === 0) {
|
||
continue;
|
||
}
|
||
|
||
const kind = statementKindFromConcept(concept);
|
||
if (!kind) {
|
||
continue;
|
||
}
|
||
|
||
const value = parseStatementNumber(body);
|
||
if (value === null) {
|
||
continue;
|
||
}
|
||
|
||
const periodId = context.endDate ?? fallbackPeriodId;
|
||
const rowKey = slug(concept);
|
||
for (const dimension of context.dimensions) {
|
||
rows[kind].push({
|
||
rowKey,
|
||
concept,
|
||
periodId,
|
||
axis: dimension.axis,
|
||
member: dimension.member,
|
||
value,
|
||
unit
|
||
});
|
||
}
|
||
}
|
||
|
||
return rows;
|
||
}
|
||
|
||
function markHasDimensions<T extends { key: string; concept: string | null; hasDimensions?: boolean }>(
|
||
rows: T[],
|
||
dimensions: DimensionStatementSnapshotRow[]
|
||
) {
|
||
const dimensionConcepts = new Set(dimensions.map((item) => item.concept?.toLowerCase() ?? '').filter(Boolean));
|
||
const dimensionRowKeys = new Set(dimensions.map((item) => item.rowKey));
|
||
|
||
return rows.map((row) => {
|
||
const concept = row.concept?.toLowerCase() ?? '';
|
||
const hasDimensions = dimensionRowKeys.has(row.key) || (concept ? dimensionConcepts.has(concept) : false);
|
||
return {
|
||
...row,
|
||
hasDimensions
|
||
};
|
||
});
|
||
}
|
||
|
||
function emptyStatementBundle(period: FilingStatementSnapshotPeriod): FilingStatementBundle {
|
||
return {
|
||
periods: [period],
|
||
statements: createStatementRecord(() => [])
|
||
};
|
||
}
|
||
|
||
function emptyStandardizedBundle(period: FilingStatementSnapshotPeriod): StandardizedStatementBundle {
|
||
return {
|
||
periods: [period],
|
||
statements: createStatementRecord(() => [])
|
||
};
|
||
}
|
||
|
||
function emptyDimensionBundle(): DimensionStatementBundle {
|
||
return {
|
||
statements: createStatementRecord(() => [])
|
||
};
|
||
}
|
||
|
||
export async function hydrateFilingStatementSnapshot(
|
||
input: FilingStatementHydrationInput,
|
||
options?: {
|
||
fetchImpl?: typeof fetch;
|
||
}
|
||
): Promise<FilingStatementHydrationResult> {
|
||
const periodId = `${input.filingDate}-${compactAccessionNumber(input.accessionNumber)}`;
|
||
const period: FilingStatementSnapshotPeriod = {
|
||
id: periodId,
|
||
filingId: input.filingId,
|
||
accessionNumber: input.accessionNumber,
|
||
filingDate: input.filingDate,
|
||
periodStart: null,
|
||
periodEnd: input.filingDate,
|
||
filingType: input.filingType,
|
||
periodLabel: input.filingType === '10-Q' ? 'Quarter End' : 'Fiscal Year End'
|
||
};
|
||
|
||
const fetchImpl = options?.fetchImpl ?? fetch;
|
||
const statementBundle = emptyStatementBundle(period);
|
||
const standardizedBundle = emptyStandardizedBundle(period);
|
||
const dimensionBundle = emptyDimensionBundle();
|
||
let source: FilingStatementHydrationResult['source'] = 'companyfacts_fallback';
|
||
let parseError: string | null = null;
|
||
|
||
try {
|
||
const filingDirectory = resolveFilingDirectoryUrl({
|
||
filingUrl: input.filingUrl,
|
||
cik: input.cik,
|
||
accessionNumber: input.accessionNumber
|
||
});
|
||
|
||
if (filingDirectory) {
|
||
const summaryXml = await fetchText(`${filingDirectory}FilingSummary.xml`, fetchImpl);
|
||
const reports = parseFilingSummaryReports(summaryXml);
|
||
|
||
for (const kind of FINANCIAL_STATEMENT_KINDS) {
|
||
const report = chooseStatementReport(kind, reports);
|
||
if (!report) {
|
||
continue;
|
||
}
|
||
|
||
const reportUrl = toAbsoluteArchiveUrl(filingDirectory, report.htmlFileName ?? report.xmlFileName);
|
||
if (!reportUrl) {
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
const reportText = await fetchText(reportUrl, fetchImpl);
|
||
const parsedRows = parseStatementRowsFromReport(reportText);
|
||
if (parsedRows.length === 0) {
|
||
continue;
|
||
}
|
||
|
||
source = 'sec_filing_summary';
|
||
statementBundle.statements[kind] = toSnapshotRows(periodId, parsedRows);
|
||
} catch {
|
||
// Continue to other statements when one report fails.
|
||
}
|
||
}
|
||
}
|
||
} catch (error) {
|
||
parseError = error instanceof Error ? error.message : 'Failed to parse filing summary';
|
||
}
|
||
|
||
try {
|
||
const primaryUrl = resolvePrimaryFilingUrl({
|
||
filingUrl: input.filingUrl,
|
||
cik: input.cik,
|
||
accessionNumber: input.accessionNumber,
|
||
primaryDocument: input.primaryDocument
|
||
});
|
||
|
||
if (primaryUrl) {
|
||
const rawDocument = await fetchText(primaryUrl, fetchImpl);
|
||
const dimensions = parseDimensionFacts(rawDocument, periodId);
|
||
for (const kind of FINANCIAL_STATEMENT_KINDS) {
|
||
dimensionBundle.statements[kind] = dimensions[kind];
|
||
}
|
||
|
||
const hasAnyDimensions = FINANCIAL_STATEMENT_KINDS.some((kind) => dimensionBundle.statements[kind].length > 0);
|
||
if (hasAnyDimensions && source === 'companyfacts_fallback') {
|
||
source = 'xbrl_instance';
|
||
}
|
||
}
|
||
} catch (error) {
|
||
if (!parseError) {
|
||
parseError = error instanceof Error ? error.message : 'Failed to parse inline XBRL dimensions';
|
||
}
|
||
}
|
||
|
||
for (const kind of FINANCIAL_STATEMENT_KINDS) {
|
||
const faithfulRows = statementBundle.statements[kind];
|
||
standardizedBundle.statements[kind] = toStandardizedRows(kind, periodId, faithfulRows, input.metrics);
|
||
|
||
statementBundle.statements[kind] = markHasDimensions(
|
||
faithfulRows,
|
||
dimensionBundle.statements[kind]
|
||
);
|
||
|
||
standardizedBundle.statements[kind] = markHasDimensions(
|
||
standardizedBundle.statements[kind],
|
||
dimensionBundle.statements[kind]
|
||
);
|
||
}
|
||
|
||
const statementCount = FINANCIAL_STATEMENT_KINDS.filter((kind) => statementBundle.statements[kind].length > 0).length;
|
||
const standardizedCount = FINANCIAL_STATEMENT_KINDS.filter((kind) => standardizedBundle.statements[kind].length > 0).length;
|
||
const parseStatus: FilingStatementHydrationResult['parse_status'] = statementCount === FINANCIAL_STATEMENT_KINDS.length
|
||
? 'ready'
|
||
: (statementCount > 0 || standardizedCount > 0)
|
||
? 'partial'
|
||
: 'failed';
|
||
|
||
return {
|
||
filing_id: input.filingId,
|
||
ticker: input.ticker.trim().toUpperCase(),
|
||
filing_date: input.filingDate,
|
||
filing_type: input.filingType,
|
||
period_end: input.filingDate,
|
||
statement_bundle: statementBundle,
|
||
standardized_bundle: standardizedBundle,
|
||
dimension_bundle: dimensionBundle,
|
||
parse_status: parseStatus,
|
||
parse_error: parseStatus === 'failed' ? (parseError ?? 'No financial statement tables found') : parseError,
|
||
source,
|
||
};
|
||
}
|
||
|
||
export const __statementInternals = {
|
||
parseFilingSummaryReports,
|
||
parseStatementRowsFromReport,
|
||
parseDimensionFacts,
|
||
statementKindLabel
|
||
};
|