feat: rebuild fiscal clone architecture and harden coolify deployment

This commit is contained in:
2026-02-23 21:10:39 -05:00
parent cae7cbb98f
commit 04e5caf4e1
61 changed files with 3826 additions and 2923 deletions

View File

@@ -1,162 +1,208 @@
import { type Filings } from '../db';
import { env } from '../config';
import type { FilingMetrics, FilingType } from '../types';
export class SECScraper {
private baseUrl = 'https://www.sec.gov';
private userAgent = 'Fiscal Clone (contact@example.com)';
type TickerDirectoryRecord = {
cik_str: number;
ticker: string;
title: string;
};
/**
* Search SEC filings by ticker
*/
async searchFilings(ticker: string, count = 20): Promise<Filings[]> {
const cik = await this.getCIK(ticker);
type RecentFilingsPayload = {
filings?: {
recent?: {
accessionNumber?: string[];
filingDate?: string[];
form?: string[];
primaryDocument?: string[];
};
};
cik?: string;
name?: string;
};
const response = await fetch(
`https://data.sec.gov/submissions/CIK${cik.padStart(10, '0')}.json`,
{
headers: {
'User-Agent': this.userAgent
}
type CompanyFactsPayload = {
facts?: {
'us-gaap'?: Record<string, { units?: Record<string, Array<{ val?: number; end?: string; filed?: string }>> }>;
};
};
export type SecFiling = {
ticker: string;
cik: string;
companyName: string;
filingType: FilingType;
filingDate: string;
accessionNumber: string;
filingUrl: string | null;
};
const SUPPORTED_FORMS: FilingType[] = ['10-K', '10-Q', '8-K'];
const TICKER_CACHE_TTL_MS = 1000 * 60 * 60 * 24;
const FACTS_CACHE_TTL_MS = 1000 * 60 * 10;
export class SecService {
private tickerCache: Map<string, TickerDirectoryRecord> = new Map();
private tickerCacheLoadedAt = 0;
private factsCache: Map<string, { loadedAt: number; metrics: FilingMetrics }> = new Map();
private async fetchJson<T>(url: string): Promise<T> {
const response = await fetch(url, {
headers: {
'User-Agent': env.SEC_USER_AGENT,
Accept: 'application/json'
}
);
});
if (!response.ok) {
throw new Error(`SEC API error: ${response.status}`);
throw new Error(`SEC request failed (${response.status}) for ${url}`);
}
const data = await response.json();
const filings = data.filings?.recent || [];
const filteredFilings = filings
.filter((f: any) =>
['10-K', '10-Q', '8-K'].includes(f.form)
)
.slice(0, count)
.map((f: any) => ({
ticker,
filing_type: f.form,
filing_date: new Date(f.filingDate),
accession_number: f.accessionNumber,
cik: data.cik,
company_name: data.name || ticker,
}));
return filteredFilings;
return await response.json() as T;
}
/**
* Check for new filings and save to database
*/
async checkNewFilings(db: any) {
const tickers = await db`
SELECT DISTINCT ticker FROM watchlist
`;
private async ensureTickerCache() {
const isFresh = Date.now() - this.tickerCacheLoadedAt < TICKER_CACHE_TTL_MS;
console.log(`Checking filings for ${tickers.length} tickers...`);
for (const { ticker } of tickers) {
try {
const latest = await db`
SELECT accession_number FROM filings
WHERE ticker = ${ticker}
ORDER BY filing_date DESC
LIMIT 1
`;
const filings = await this.searchFilings(ticker, 10);
const newFilings = filings.filter(
f => !latest.some((l: any) => l.accession_number === f.accession_number)
);
if (newFilings.length > 0) {
console.log(`Found ${newFilings.length} new filings for ${ticker}`);
for (const filing of newFilings) {
const metrics = await this.extractKeyMetrics(filing);
await db`
INSERT INTO filings ${db(filing, metrics)}
ON CONFLICT (accession_number) DO NOTHING
`;
}
}
} catch (error) {
console.error(`Error checking filings for ${ticker}:`, error);
}
}
}
/**
* Get CIK for a ticker
*/
private async getCIK(ticker: string): Promise<string> {
const response = await fetch(
`https://www.sec.gov/files/company_tickers.json`
);
if (!response.ok) {
throw new Error('Failed to get company tickers');
if (isFresh && this.tickerCache.size > 0) {
return;
}
const data = await response.json();
const companies = data.data;
const payload = await this.fetchJson<Record<string, TickerDirectoryRecord>>('https://www.sec.gov/files/company_tickers.json');
const nextCache = new Map<string, TickerDirectoryRecord>();
for (const [cik, company] of Object.entries(companies)) {
if (company.ticker === ticker.toUpperCase()) {
return cik;
}
for (const record of Object.values(payload)) {
nextCache.set(record.ticker.toUpperCase(), record);
}
throw new Error(`Ticker ${ticker} not found`);
this.tickerCache = nextCache;
this.tickerCacheLoadedAt = Date.now();
}
/**
* Extract key metrics from filing
*/
async extractKeyMetrics(filing: any): Promise<any> {
try {
const filingUrl = `${this.baseUrl}/Archives/${filing.accession_number.replace(/-/g, '')}/${filing.accession_number}-index.htm`;
async resolveTicker(ticker: string) {
await this.ensureTickerCache();
const response = await fetch(filingUrl, {
headers: { 'User-Agent': this.userAgent }
});
const normalizedTicker = ticker.trim().toUpperCase();
const record = this.tickerCache.get(normalizedTicker);
if (!response.ok) return null;
const html = await response.text();
// Extract key financial metrics from XBRL
const metrics = {
revenue: this.extractMetric(html, 'Revenues'),
netIncome: this.extractMetric(html, 'NetIncomeLoss'),
totalAssets: this.extractMetric(html, 'Assets'),
cash: this.extractMetric(html, 'CashAndCashEquivalentsAtCarryingValue'),
debt: this.extractMetric(html, 'LongTermDebt')
};
return metrics;
} catch (error) {
console.error('Error extracting metrics:', error);
return null;
if (!record) {
throw new Error(`Ticker ${normalizedTicker} was not found in SEC directory`);
}
}
/**
* Extract a specific metric from XBRL data
*/
private extractMetric(html: string, metricName: string): number | null {
const regex = new RegExp(`<ix:nonFraction[^>]*name="[^"]*${metricName}[^"]*"[^>]*>([^<]+)<`, 'i');
const match = html.match(regex);
return match ? parseFloat(match[1].replace(/,/g, '')) : null;
}
/**
* Get filing details by accession number
*/
async getFilingDetails(accessionNumber: string) {
const filingUrl = `${this.baseUrl}/Archives/${accessionNumber.replace(/-/g, '')}/${accessionNumber}-index.htm`;
return {
filing_url: filingUrl
ticker: normalizedTicker,
cik: String(record.cik_str),
companyName: record.title
};
}
async fetchRecentFilings(ticker: string, limit = 20): Promise<SecFiling[]> {
const company = await this.resolveTicker(ticker);
const cikPadded = company.cik.padStart(10, '0');
const payload = await this.fetchJson<RecentFilingsPayload>(`https://data.sec.gov/submissions/CIK${cikPadded}.json`);
const recent = payload.filings?.recent;
if (!recent) {
return [];
}
const forms = recent.form ?? [];
const accessionNumbers = recent.accessionNumber ?? [];
const filingDates = recent.filingDate ?? [];
const primaryDocuments = recent.primaryDocument ?? [];
const filings: SecFiling[] = [];
for (let i = 0; i < forms.length; i += 1) {
const filingType = forms[i] as FilingType;
if (!SUPPORTED_FORMS.includes(filingType)) {
continue;
}
const accessionNumber = accessionNumbers[i];
if (!accessionNumber) {
continue;
}
const compactAccession = accessionNumber.replace(/-/g, '');
const documentName = primaryDocuments[i];
const filingUrl = documentName
? `https://www.sec.gov/Archives/edgar/data/${Number(company.cik)}/${compactAccession}/${documentName}`
: null;
filings.push({
ticker: company.ticker,
cik: company.cik,
companyName: payload.name ?? company.companyName,
filingType,
filingDate: filingDates[i] ?? new Date().toISOString().slice(0, 10),
accessionNumber,
filingUrl
});
if (filings.length >= limit) {
break;
}
}
return filings;
}
private pickLatestFact(payload: CompanyFactsPayload, tag: string): number | null {
const unitCollections = payload.facts?.['us-gaap']?.[tag]?.units;
if (!unitCollections) {
return null;
}
const preferredUnits = ['USD', 'USD/shares'];
for (const unit of preferredUnits) {
const series = unitCollections[unit];
if (!series?.length) {
continue;
}
const best = [...series]
.filter((item) => typeof item.val === 'number')
.sort((a, b) => {
const aDate = Date.parse(a.filed ?? a.end ?? '1970-01-01');
const bDate = Date.parse(b.filed ?? b.end ?? '1970-01-01');
return bDate - aDate;
})[0];
if (best?.val !== undefined) {
return best.val;
}
}
return null;
}
async fetchMetrics(cik: string): Promise<FilingMetrics> {
const normalized = cik.padStart(10, '0');
const cached = this.factsCache.get(normalized);
if (cached && Date.now() - cached.loadedAt < FACTS_CACHE_TTL_MS) {
return cached.metrics;
}
const payload = await this.fetchJson<CompanyFactsPayload>(`https://data.sec.gov/api/xbrl/companyfacts/CIK${normalized}.json`);
const metrics: FilingMetrics = {
revenue: this.pickLatestFact(payload, 'Revenues'),
netIncome: this.pickLatestFact(payload, 'NetIncomeLoss'),
totalAssets: this.pickLatestFact(payload, 'Assets'),
cash: this.pickLatestFact(payload, 'CashAndCashEquivalentsAtCarryingValue'),
debt: this.pickLatestFact(payload, 'LongTermDebt')
};
this.factsCache.set(normalized, {
loadedAt: Date.now(),
metrics
});
return metrics;
}
}