diff --git a/lib/server/api/app.ts b/lib/server/api/app.ts index f333526..7d13ad6 100644 --- a/lib/server/api/app.ts +++ b/lib/server/api/app.ts @@ -75,6 +75,7 @@ import { synthesizeCompanyOverview } from '@/lib/server/company-overview-synthes import { getRecentDevelopments } from '@/lib/server/recent-developments'; import { deriveValuationSnapshot, getSecCompanyProfile, toCompanyProfile } from '@/lib/server/sec-company-profile'; import { getCompanyDescription } from '@/lib/server/sec-description'; +import { getYahooCompanyDescription } from '@/lib/server/yahoo-company-profile'; import { answerSearchQuery, searchKnowledgeBase } from '@/lib/server/search'; import { enqueueTask, @@ -1449,10 +1450,12 @@ export const app = new Elysia({ prefix: '/api' }) : null }; const annualFiling = redactedFilings.find((entry) => entry.filing_type === '10-K') ?? null; - const [description, synthesizedDevelopments] = await Promise.all([ + const [secDescription, yahooDescription, synthesizedDevelopments] = await Promise.all([ getCompanyDescription(annualFiling), + getYahooCompanyDescription(ticker), getRecentDevelopments(ticker, { filings: redactedFilings }) ]); + const description = yahooDescription ?? secDescription; const latestFilingSummary = latestFiling ? { accessionNumber: latestFiling.accession_number, diff --git a/lib/server/yahoo-company-profile.test.ts b/lib/server/yahoo-company-profile.test.ts new file mode 100644 index 0000000..98a3921 --- /dev/null +++ b/lib/server/yahoo-company-profile.test.ts @@ -0,0 +1,66 @@ +import { beforeEach, describe, expect, it, mock } from 'bun:test'; +import { + __yahooCompanyProfileInternals, + getYahooCompanyDescription +} from './yahoo-company-profile'; + +describe('yahoo company profile', () => { + beforeEach(() => { + __yahooCompanyProfileInternals.resetCaches(); + }); + + it('extracts a Yahoo session cookie from response headers', () => { + const headers = new Headers(); + headers.append('set-cookie', 'A3=session-token; Domain=.yahoo.com; Path=/; Secure; HttpOnly'); + + expect(__yahooCompanyProfileInternals.pickYahooSessionCookie(headers)).toBe('A3=session-token'); + }); + + it('normalizes and clips long descriptions', () => { + const value = ` Microsoft builds software.\n\n\n${'x'.repeat(1800)} `; + const normalized = __yahooCompanyProfileInternals.normalizeDescription(value); + + expect(normalized).toContain('Microsoft builds software.'); + expect((normalized?.length ?? 0) <= 1600).toBe(true); + }); + + it('fetches longBusinessSummary through Yahoo cookie and crumb endpoints', async () => { + const fetchMock = mock(async (input: string | URL | Request) => { + const url = String(input); + + if (url === 'https://fc.yahoo.com') { + return new Response('', { + status: 404, + headers: { + 'set-cookie': 'A3=session-token; Domain=.yahoo.com; Path=/; Secure; HttpOnly' + } + }); + } + + if (url === 'https://query1.finance.yahoo.com/v1/test/getcrumb') { + return new Response('crumb-token', { status: 200 }); + } + + if (url === 'https://query1.finance.yahoo.com/v10/finance/quoteSummary/MSFT?modules=assetProfile&crumb=crumb-token') { + return Response.json({ + quoteSummary: { + result: [ + { + assetProfile: { + longBusinessSummary: 'Microsoft builds cloud and software products worldwide.' + } + } + ] + } + }); + } + + throw new Error(`Unexpected URL: ${url}`); + }) as unknown as typeof fetch; + + const description = await getYahooCompanyDescription('msft', { fetchImpl: fetchMock }); + + expect(description).toBe('Microsoft builds cloud and software products worldwide.'); + expect(fetchMock).toHaveBeenCalledTimes(3); + }); +}); diff --git a/lib/server/yahoo-company-profile.ts b/lib/server/yahoo-company-profile.ts new file mode 100644 index 0000000..944de1a --- /dev/null +++ b/lib/server/yahoo-company-profile.ts @@ -0,0 +1,203 @@ +type FetchImpl = typeof fetch; + +type CacheEntry = { + expiresAt: number; + value: T; +}; + +type YahooQuoteSummaryPayload = { + quoteSummary?: { + result?: Array<{ + assetProfile?: { + longBusinessSummary?: string; + }; + }>; + }; +}; + +const YAHOO_COOKIE_URL = 'https://fc.yahoo.com'; +const YAHOO_CRUMB_URL = 'https://query1.finance.yahoo.com/v1/test/getcrumb'; +const YAHOO_QUOTE_SUMMARY_BASE = 'https://query1.finance.yahoo.com/v10/finance/quoteSummary'; + +const YAHOO_SESSION_TTL_MS = 1000 * 60 * 15; +const DESCRIPTION_CACHE_TTL_MS = 1000 * 60 * 60 * 6; +const DESCRIPTION_MAX_CHARS = 1_600; + +let yahooSessionCache: CacheEntry<{ cookie: string; crumb: string }> | null = null; +const descriptionCache = new Map>(); + +function yahooUserAgent() { + return 'Mozilla/5.0 (compatible; FiscalClone/3.0)'; +} + +function normalizeWhitespace(value: string) { + return value + .replace(/[ \t]+/g, ' ') + .replace(/\n{3,}/g, '\n\n') + .trim(); +} + +function clipAtSentenceBoundary(value: string, maxChars = DESCRIPTION_MAX_CHARS) { + if (value.length <= maxChars) { + return value; + } + + const slice = value.slice(0, maxChars); + const sentenceBoundary = Math.max( + slice.lastIndexOf('. '), + slice.lastIndexOf('! '), + slice.lastIndexOf('? ') + ); + + if (sentenceBoundary > maxChars * 0.6) { + return slice.slice(0, sentenceBoundary + 1).trim(); + } + + const wordBoundary = slice.lastIndexOf(' '); + return (wordBoundary > maxChars * 0.7 ? slice.slice(0, wordBoundary) : slice).trim(); +} + +function normalizeDescription(value: unknown) { + if (typeof value !== 'string') { + return null; + } + + const normalized = clipAtSentenceBoundary(normalizeWhitespace(value)); + return normalized.length > 0 ? normalized : null; +} + +function readSetCookieHeader(headers: Headers) { + const maybeHeaders = headers as Headers & { getSetCookie?: () => string[] }; + + if (typeof maybeHeaders.getSetCookie === 'function') { + const values = maybeHeaders.getSetCookie().filter((value) => value.trim().length > 0); + if (values.length > 0) { + return values; + } + } + + const single = headers.get('set-cookie'); + return single ? [single] : []; +} + +function pickYahooSessionCookie(headers: Headers) { + const cookies = readSetCookieHeader(headers); + const match = cookies + .map((value) => /^([^=;,\s]+)=([^;]+)/.exec(value)) + .find((entry) => entry && (entry[1] === 'A3' || entry[1] === 'A1')); + + return match ? `${match[1]}=${match[2]}` : null; +} + +async function getYahooSession(fetchImpl: FetchImpl = fetch) { + if (yahooSessionCache && yahooSessionCache.expiresAt > Date.now()) { + return yahooSessionCache.value; + } + + const cookieResponse = await fetchImpl(YAHOO_COOKIE_URL, { + headers: { + 'User-Agent': yahooUserAgent(), + Accept: '*/*' + }, + cache: 'no-store' + }); + + const cookie = pickYahooSessionCookie(cookieResponse.headers); + if (!cookie) { + throw new Error( + cookieResponse.ok + ? 'Yahoo session cookie unavailable' + : `Yahoo cookie request failed (${cookieResponse.status})` + ); + } + + const crumbResponse = await fetchImpl(YAHOO_CRUMB_URL, { + headers: { + 'User-Agent': yahooUserAgent(), + Accept: 'text/plain', + Cookie: cookie + }, + cache: 'no-store' + }); + + if (!crumbResponse.ok) { + throw new Error(`Yahoo crumb request failed (${crumbResponse.status})`); + } + + const crumb = (await crumbResponse.text()).trim(); + if (!crumb || crumb.startsWith('{')) { + throw new Error('Yahoo crumb unavailable'); + } + + const session = { cookie, crumb }; + yahooSessionCache = { + value: session, + expiresAt: Date.now() + YAHOO_SESSION_TTL_MS + }; + + return session; +} + +export async function getYahooCompanyDescription( + ticker: string, + options?: { fetchImpl?: FetchImpl } +) { + const normalizedTicker = ticker.trim().toUpperCase(); + if (!normalizedTicker) { + return null; + } + + const cached = descriptionCache.get(normalizedTicker); + if (cached && cached.expiresAt > Date.now()) { + return cached.value; + } + + try { + const session = await getYahooSession(options?.fetchImpl); + const url = new URL(`${YAHOO_QUOTE_SUMMARY_BASE}/${normalizedTicker}`); + url.searchParams.set('modules', 'assetProfile'); + url.searchParams.set('crumb', session.crumb); + + const response = await (options?.fetchImpl ?? fetch)(url, { + headers: { + 'User-Agent': yahooUserAgent(), + Accept: 'application/json', + Cookie: session.cookie + }, + cache: 'no-store' + }); + + if (!response.ok) { + throw new Error(`Yahoo profile request failed (${response.status})`); + } + + const payload = await response.json() as YahooQuoteSummaryPayload; + const description = normalizeDescription( + payload.quoteSummary?.result?.[0]?.assetProfile?.longBusinessSummary + ); + + descriptionCache.set(normalizedTicker, { + value: description, + expiresAt: Date.now() + DESCRIPTION_CACHE_TTL_MS + }); + + return description; + } catch { + descriptionCache.set(normalizedTicker, { + value: null, + expiresAt: Date.now() + DESCRIPTION_CACHE_TTL_MS + }); + return null; + } +} + +export const __yahooCompanyProfileInternals = { + clipAtSentenceBoundary, + getYahooSession, + normalizeDescription, + pickYahooSessionCookie, + resetCaches() { + yahooSessionCache = null; + descriptionCache.clear(); + } +};