- SEC filings extraction (10-K, 10-Q, 8-K) - Portfolio analytics with real-time prices - Watchlist management - NextAuth.js authentication - OpenClaw AI integration - PostgreSQL database with auto P&L calculations - Elysia.js backend (Bun runtime) - Next.js 14 frontend (TailwindCSS + Recharts) - Production-ready Docker configurations
163 lines
4.4 KiB
TypeScript
163 lines
4.4 KiB
TypeScript
import { type Filings } from '../db';
|
|
|
|
export class SECScraper {
|
|
private baseUrl = 'https://www.sec.gov';
|
|
private userAgent = 'Fiscal Clone (contact@example.com)';
|
|
|
|
/**
|
|
* Search SEC filings by ticker
|
|
*/
|
|
async searchFilings(ticker: string, count = 20): Promise<Filings[]> {
|
|
const cik = await this.getCIK(ticker);
|
|
|
|
const response = await fetch(
|
|
`https://data.sec.gov/submissions/CIK${cik.padStart(10, '0')}.json`,
|
|
{
|
|
headers: {
|
|
'User-Agent': this.userAgent
|
|
}
|
|
}
|
|
);
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`SEC API error: ${response.status}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
const filings = data.filings?.recent || [];
|
|
|
|
const filteredFilings = filings
|
|
.filter((f: any) =>
|
|
['10-K', '10-Q', '8-K'].includes(f.form)
|
|
)
|
|
.slice(0, count)
|
|
.map((f: any) => ({
|
|
ticker,
|
|
filing_type: f.form,
|
|
filing_date: new Date(f.filingDate),
|
|
accession_number: f.accessionNumber,
|
|
cik: data.cik,
|
|
company_name: data.name || ticker,
|
|
}));
|
|
|
|
return filteredFilings;
|
|
}
|
|
|
|
/**
|
|
* Check for new filings and save to database
|
|
*/
|
|
async checkNewFilings(db: any) {
|
|
const tickers = await db`
|
|
SELECT DISTINCT ticker FROM watchlist
|
|
`;
|
|
|
|
console.log(`Checking filings for ${tickers.length} tickers...`);
|
|
|
|
for (const { ticker } of tickers) {
|
|
try {
|
|
const latest = await db`
|
|
SELECT accession_number FROM filings
|
|
WHERE ticker = ${ticker}
|
|
ORDER BY filing_date DESC
|
|
LIMIT 1
|
|
`;
|
|
|
|
const filings = await this.searchFilings(ticker, 10);
|
|
const newFilings = filings.filter(
|
|
f => !latest.some((l: any) => l.accession_number === f.accession_number)
|
|
);
|
|
|
|
if (newFilings.length > 0) {
|
|
console.log(`Found ${newFilings.length} new filings for ${ticker}`);
|
|
|
|
for (const filing of newFilings) {
|
|
const metrics = await this.extractKeyMetrics(filing);
|
|
|
|
await db`
|
|
INSERT INTO filings ${db(filing, metrics)}
|
|
ON CONFLICT (accession_number) DO NOTHING
|
|
`;
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error checking filings for ${ticker}:`, error);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get CIK for a ticker
|
|
*/
|
|
private async getCIK(ticker: string): Promise<string> {
|
|
const response = await fetch(
|
|
`https://www.sec.gov/files/company_tickers.json`
|
|
);
|
|
|
|
if (!response.ok) {
|
|
throw new Error('Failed to get company tickers');
|
|
}
|
|
|
|
const data = await response.json();
|
|
const companies = data.data;
|
|
|
|
for (const [cik, company] of Object.entries(companies)) {
|
|
if (company.ticker === ticker.toUpperCase()) {
|
|
return cik;
|
|
}
|
|
}
|
|
|
|
throw new Error(`Ticker ${ticker} not found`);
|
|
}
|
|
|
|
/**
|
|
* Extract key metrics from filing
|
|
*/
|
|
async extractKeyMetrics(filing: any): Promise<any> {
|
|
try {
|
|
const filingUrl = `${this.baseUrl}/Archives/${filing.accession_number.replace(/-/g, '')}/${filing.accession_number}-index.htm`;
|
|
|
|
const response = await fetch(filingUrl, {
|
|
headers: { 'User-Agent': this.userAgent }
|
|
});
|
|
|
|
if (!response.ok) return null;
|
|
|
|
const html = await response.text();
|
|
|
|
// Extract key financial metrics from XBRL
|
|
const metrics = {
|
|
revenue: this.extractMetric(html, 'Revenues'),
|
|
netIncome: this.extractMetric(html, 'NetIncomeLoss'),
|
|
totalAssets: this.extractMetric(html, 'Assets'),
|
|
cash: this.extractMetric(html, 'CashAndCashEquivalentsAtCarryingValue'),
|
|
debt: this.extractMetric(html, 'LongTermDebt')
|
|
};
|
|
|
|
return metrics;
|
|
} catch (error) {
|
|
console.error('Error extracting metrics:', error);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract a specific metric from XBRL data
|
|
*/
|
|
private extractMetric(html: string, metricName: string): number | null {
|
|
const regex = new RegExp(`<ix:nonFraction[^>]*name="[^"]*${metricName}[^"]*"[^>]*>([^<]+)<`, 'i');
|
|
const match = html.match(regex);
|
|
return match ? parseFloat(match[1].replace(/,/g, '')) : null;
|
|
}
|
|
|
|
/**
|
|
* Get filing details by accession number
|
|
*/
|
|
async getFilingDetails(accessionNumber: string) {
|
|
const filingUrl = `${this.baseUrl}/Archives/${accessionNumber.replace(/-/g, '')}/${accessionNumber}-index.htm`;
|
|
|
|
return {
|
|
filing_url: filingUrl
|
|
};
|
|
}
|
|
}
|