feat: rebuild fiscal clone architecture and harden coolify deployment
This commit is contained in:
@@ -1,162 +1,208 @@
|
||||
import { type Filings } from '../db';
|
||||
import { env } from '../config';
|
||||
import type { FilingMetrics, FilingType } from '../types';
|
||||
|
||||
export class SECScraper {
|
||||
private baseUrl = 'https://www.sec.gov';
|
||||
private userAgent = 'Fiscal Clone (contact@example.com)';
|
||||
type TickerDirectoryRecord = {
|
||||
cik_str: number;
|
||||
ticker: string;
|
||||
title: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Search SEC filings by ticker
|
||||
*/
|
||||
async searchFilings(ticker: string, count = 20): Promise<Filings[]> {
|
||||
const cik = await this.getCIK(ticker);
|
||||
type RecentFilingsPayload = {
|
||||
filings?: {
|
||||
recent?: {
|
||||
accessionNumber?: string[];
|
||||
filingDate?: string[];
|
||||
form?: string[];
|
||||
primaryDocument?: string[];
|
||||
};
|
||||
};
|
||||
cik?: string;
|
||||
name?: string;
|
||||
};
|
||||
|
||||
const response = await fetch(
|
||||
`https://data.sec.gov/submissions/CIK${cik.padStart(10, '0')}.json`,
|
||||
{
|
||||
headers: {
|
||||
'User-Agent': this.userAgent
|
||||
}
|
||||
type CompanyFactsPayload = {
|
||||
facts?: {
|
||||
'us-gaap'?: Record<string, { units?: Record<string, Array<{ val?: number; end?: string; filed?: string }>> }>;
|
||||
};
|
||||
};
|
||||
|
||||
export type SecFiling = {
|
||||
ticker: string;
|
||||
cik: string;
|
||||
companyName: string;
|
||||
filingType: FilingType;
|
||||
filingDate: string;
|
||||
accessionNumber: string;
|
||||
filingUrl: string | null;
|
||||
};
|
||||
|
||||
const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K'];
|
||||
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 24;
|
||||
const FACTS_CACHE_TTL_MS = 1000 * 60 * 10;
|
||||
|
||||
export class SecService {
|
||||
private tickerCache: Map<string, TickerDirectoryRecord> = new Map();
|
||||
private tickerCacheLoadedAt = 0;
|
||||
private factsCache: Map<string, { loadedAt: number; metrics: FilingMetrics }> = new Map();
|
||||
|
||||
private async fetchJson<T>(url: string): Promise<T> {
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': env.SEC_USER_AGENT,
|
||||
Accept: 'application/json'
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`SEC API error: ${response.status}`);
|
||||
throw new Error(`SEC request failed (${response.status}) for ${url}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const filings = data.filings?.recent || [];
|
||||
|
||||
const filteredFilings = filings
|
||||
.filter((f: any) =>
|
||||
['10-K', '10-Q', '8-K'].includes(f.form)
|
||||
)
|
||||
.slice(0, count)
|
||||
.map((f: any) => ({
|
||||
ticker,
|
||||
filing_type: f.form,
|
||||
filing_date: new Date(f.filingDate),
|
||||
accession_number: f.accessionNumber,
|
||||
cik: data.cik,
|
||||
company_name: data.name || ticker,
|
||||
}));
|
||||
|
||||
return filteredFilings;
|
||||
return await response.json() as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for new filings and save to database
|
||||
*/
|
||||
async checkNewFilings(db: any) {
|
||||
const tickers = await db`
|
||||
SELECT DISTINCT ticker FROM watchlist
|
||||
`;
|
||||
private async ensureTickerCache() {
|
||||
const isFresh = Date.now() - this.tickerCacheLoadedAt < TICKER_CACHE_TTL_MS;
|
||||
|
||||
console.log(`Checking filings for ${tickers.length} tickers...`);
|
||||
|
||||
for (const { ticker } of tickers) {
|
||||
try {
|
||||
const latest = await db`
|
||||
SELECT accession_number FROM filings
|
||||
WHERE ticker = ${ticker}
|
||||
ORDER BY filing_date DESC
|
||||
LIMIT 1
|
||||
`;
|
||||
|
||||
const filings = await this.searchFilings(ticker, 10);
|
||||
const newFilings = filings.filter(
|
||||
f => !latest.some((l: any) => l.accession_number === f.accession_number)
|
||||
);
|
||||
|
||||
if (newFilings.length > 0) {
|
||||
console.log(`Found ${newFilings.length} new filings for ${ticker}`);
|
||||
|
||||
for (const filing of newFilings) {
|
||||
const metrics = await this.extractKeyMetrics(filing);
|
||||
|
||||
await db`
|
||||
INSERT INTO filings ${db(filing, metrics)}
|
||||
ON CONFLICT (accession_number) DO NOTHING
|
||||
`;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error checking filings for ${ticker}:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get CIK for a ticker
|
||||
*/
|
||||
private async getCIK(ticker: string): Promise<string> {
|
||||
const response = await fetch(
|
||||
`https://www.sec.gov/files/company_tickers.json`
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to get company tickers');
|
||||
if (isFresh && this.tickerCache.size > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const companies = data.data;
|
||||
const payload = await this.fetchJson<Record<string, TickerDirectoryRecord>>('https://www.sec.gov/files/company_tickers.json');
|
||||
const nextCache = new Map<string, TickerDirectoryRecord>();
|
||||
|
||||
for (const [cik, company] of Object.entries(companies)) {
|
||||
if (company.ticker === ticker.toUpperCase()) {
|
||||
return cik;
|
||||
}
|
||||
for (const record of Object.values(payload)) {
|
||||
nextCache.set(record.ticker.toUpperCase(), record);
|
||||
}
|
||||
|
||||
throw new Error(`Ticker ${ticker} not found`);
|
||||
this.tickerCache = nextCache;
|
||||
this.tickerCacheLoadedAt = Date.now();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract key metrics from filing
|
||||
*/
|
||||
async extractKeyMetrics(filing: any): Promise<any> {
|
||||
try {
|
||||
const filingUrl = `${this.baseUrl}/Archives/${filing.accession_number.replace(/-/g, '')}/${filing.accession_number}-index.htm`;
|
||||
async resolveTicker(ticker: string) {
|
||||
await this.ensureTickerCache();
|
||||
|
||||
const response = await fetch(filingUrl, {
|
||||
headers: { 'User-Agent': this.userAgent }
|
||||
});
|
||||
const normalizedTicker = ticker.trim().toUpperCase();
|
||||
const record = this.tickerCache.get(normalizedTicker);
|
||||
|
||||
if (!response.ok) return null;
|
||||
|
||||
const html = await response.text();
|
||||
|
||||
// Extract key financial metrics from XBRL
|
||||
const metrics = {
|
||||
revenue: this.extractMetric(html, 'Revenues'),
|
||||
netIncome: this.extractMetric(html, 'NetIncomeLoss'),
|
||||
totalAssets: this.extractMetric(html, 'Assets'),
|
||||
cash: this.extractMetric(html, 'CashAndCashEquivalentsAtCarryingValue'),
|
||||
debt: this.extractMetric(html, 'LongTermDebt')
|
||||
};
|
||||
|
||||
return metrics;
|
||||
} catch (error) {
|
||||
console.error('Error extracting metrics:', error);
|
||||
return null;
|
||||
if (!record) {
|
||||
throw new Error(`Ticker ${normalizedTicker} was not found in SEC directory`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a specific metric from XBRL data
|
||||
*/
|
||||
private extractMetric(html: string, metricName: string): number | null {
|
||||
const regex = new RegExp(`<ix:nonFraction[^>]*name="[^"]*${metricName}[^"]*"[^>]*>([^<]+)<`, 'i');
|
||||
const match = html.match(regex);
|
||||
return match ? parseFloat(match[1].replace(/,/g, '')) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get filing details by accession number
|
||||
*/
|
||||
async getFilingDetails(accessionNumber: string) {
|
||||
const filingUrl = `${this.baseUrl}/Archives/${accessionNumber.replace(/-/g, '')}/${accessionNumber}-index.htm`;
|
||||
|
||||
return {
|
||||
filing_url: filingUrl
|
||||
ticker: normalizedTicker,
|
||||
cik: String(record.cik_str),
|
||||
companyName: record.title
|
||||
};
|
||||
}
|
||||
|
||||
async fetchRecentFilings(ticker: string, limit = 20): Promise<SecFiling[]> {
|
||||
const company = await this.resolveTicker(ticker);
|
||||
const cikPadded = company.cik.padStart(10, '0');
|
||||
|
||||
const payload = await this.fetchJson<RecentFilingsPayload>(`https://data.sec.gov/submissions/CIK${cikPadded}.json`);
|
||||
const recent = payload.filings?.recent;
|
||||
|
||||
if (!recent) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const forms = recent.form ?? [];
|
||||
const accessionNumbers = recent.accessionNumber ?? [];
|
||||
const filingDates = recent.filingDate ?? [];
|
||||
const primaryDocuments = recent.primaryDocument ?? [];
|
||||
const filings: SecFiling[] = [];
|
||||
|
||||
for (let i = 0; i < forms.length; i += 1) {
|
||||
const filingType = forms[i] as FilingType;
|
||||
|
||||
if (!SUPPORTED_FORMS.includes(filingType)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const accessionNumber = accessionNumbers[i];
|
||||
|
||||
if (!accessionNumber) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const compactAccession = accessionNumber.replace(/-/g, '');
|
||||
const documentName = primaryDocuments[i];
|
||||
const filingUrl = documentName
|
||||
? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}`
|
||||
: null;
|
||||
|
||||
filings.push({
|
||||
ticker: company.ticker,
|
||||
cik: company.cik,
|
||||
companyName: payload.name ?? company.companyName,
|
||||
filingType,
|
||||
filingDate: filingDates[i] ?? new Date().toISOString().slice(0, 10),
|
||||
accessionNumber,
|
||||
filingUrl
|
||||
});
|
||||
|
||||
if (filings.length >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return filings;
|
||||
}
|
||||
|
||||
private pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null {
|
||||
const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units;
|
||||
|
||||
if (!unitCollections) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const preferredUnits = ['USD', 'USD/shares'];
|
||||
|
||||
for (const unit of preferredUnits) {
|
||||
const series = unitCollections[unit];
|
||||
if (!series?.length) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const best = [...series]
|
||||
.filter((item) => typeof item.val === 'number')
|
||||
.sort((a, b) => {
|
||||
const aDate = Date.parse(a.filed ?? a.end ?? '1970-01-01');
|
||||
const bDate = Date.parse(b.filed ?? b.end ?? '1970-01-01');
|
||||
return bDate - aDate;
|
||||
})[0];
|
||||
|
||||
if (best?.val !== undefined) {
|
||||
return best.val;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async fetchMetrics(cik: string): Promise<FilingMetrics> {
|
||||
const normalized = cik.padStart(10, '0');
|
||||
const cached = this.factsCache.get(normalized);
|
||||
|
||||
if (cached && Date.now() - cached.loadedAt < FACTS_CACHE_TTL_MS) {
|
||||
return cached.metrics;
|
||||
}
|
||||
|
||||
const payload = await this.fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
|
||||
|
||||
const metrics: FilingMetrics = {
|
||||
revenue: this.pickLatestFact(payload, 'Revenues'),
|
||||
netIncome: this.pickLatestFact(payload, 'NetIncomeLoss'),
|
||||
totalAssets: this.pickLatestFact(payload, 'Assets'),
|
||||
cash: this.pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'),
|
||||
debt: this.pickLatestFact(payload, 'LongTermDebt')
|
||||
};
|
||||
|
||||
this.factsCache.set(normalized, {
|
||||
loadedAt: Date.now(),
|
||||
metrics
|
||||
});
|
||||
|
||||
return metrics;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user