import { type Filings } from '../db'; export class SECScraper { private baseUrl = 'https://www.sec.gov'; private userAgent = 'Fiscal Clone (contact@example.com)'; /** * Search SEC filings by ticker */ async searchFilings(ticker: string, count = 20): Promise { const cik = await this.getCIK(ticker); const response = await fetch( `https://data.sec.gov/submissions/CIK${cik.padStart(10, '0')}.json`, { headers: { 'User-Agent': this.userAgent } } ); if (!response.ok) { throw new Error(`SEC API error: ${response.status}`); } const data = await response.json(); const filings = data.filings?.recent || []; const filteredFilings = filings .filter((f: any) => ['10-K', '10-Q', '8-K'].includes(f.form) ) .slice(0, count) .map((f: any) => ({ ticker, filing_type: f.form, filing_date: new Date(f.filingDate), accession_number: f.accessionNumber, cik: data.cik, company_name: data.name || ticker, })); return filteredFilings; } /** * Check for new filings and save to database */ async checkNewFilings(db: any) { const tickers = await db` SELECT DISTINCT ticker FROM watchlist `; console.log(`Checking filings for ${tickers.length} tickers...`); for (const { ticker } of tickers) { try { const latest = await db` SELECT accession_number FROM filings WHERE ticker = ${ticker} ORDER BY filing_date DESC LIMIT 1 `; const filings = await this.searchFilings(ticker, 10); const newFilings = filings.filter( f => !latest.some((l: any) => l.accession_number === f.accession_number) ); if (newFilings.length > 0) { console.log(`Found ${newFilings.length} new filings for ${ticker}`); for (const filing of newFilings) { const metrics = await this.extractKeyMetrics(filing); await db` INSERT INTO filings ${db(filing, metrics)} ON CONFLICT (accession_number) DO NOTHING `; } } } catch (error) { console.error(`Error checking filings for ${ticker}:`, error); } } } /** * Get CIK for a ticker */ private async getCIK(ticker: string): Promise { const response = await fetch( `https://www.sec.gov/files/company_tickers.json` ); if (!response.ok) { throw new Error('Failed to get company tickers'); } const data = await response.json(); const companies = data.data; for (const [cik, company] of Object.entries(companies)) { if (company.ticker === ticker.toUpperCase()) { return cik; } } throw new Error(`Ticker ${ticker} not found`); } /** * Extract key metrics from filing */ async extractKeyMetrics(filing: any): Promise { try { const filingUrl = `${this.baseUrl}/Archives/${filing.accession_number.replace(/-/g, '')}/${filing.accession_number}-index.htm`; const response = await fetch(filingUrl, { headers: { 'User-Agent': this.userAgent } }); if (!response.ok) return null; const html = await response.text(); // Extract key financial metrics from XBRL const metrics = { revenue: this.extractMetric(html, 'Revenues'), netIncome: this.extractMetric(html, 'NetIncomeLoss'), totalAssets: this.extractMetric(html, 'Assets'), cash: this.extractMetric(html, 'CashAndCashEquivalentsAtCarryingValue'), debt: this.extractMetric(html, 'LongTermDebt') }; return metrics; } catch (error) { console.error('Error extracting metrics:', error); return null; } } /** * Extract a specific metric from XBRL data */ private extractMetric(html: string, metricName: string): number | null { const regex = new RegExp(`]*name="[^"]*${metricName}[^"]*"[^>]*>([^<]+)<`, 'i'); const match = html.match(regex); return match ? parseFloat(match[1].replace(/,/g, '')) : null; } /** * Get filing details by accession number */ async getFilingDetails(accessionNumber: string) { const filingUrl = `${this.baseUrl}/Archives/${accessionNumber.replace(/-/g, '')}/${accessionNumber}-index.htm`; return { filing_url: filingUrl }; } }