diff --git a/MosaicIQ/README.md b/MosaicIQ/README.md index 3793312..d52c26c 100644 --- a/MosaicIQ/README.md +++ b/MosaicIQ/README.md @@ -6,6 +6,30 @@ This template should help get you started developing with Tauri, React and Types - [Rig Agent Harness Architecture](./docs/rig-agent-harness.md) +## Local-First News + +The news runtime lives in `src-tauri/src/news/` and stores feed state plus articles in a local SQLite database under the app data directory. Reads are local-first: the UI and `/news` terminal command render cached articles immediately, while refreshes update the local cache in the background. + +Frontend example: + +```ts +import { useNewsFeed } from './src/news'; + +const { articles, refresh, toggleSaved, markRead } = useNewsFeed({ + onlyHighlighted: true, + limit: 20, +}); +``` + +Terminal usage: + +```text +/news +/news NVDA +``` + +`/news` never fetches the network at read time. It filters articles already persisted in the local news database. + ## Recommended IDE Setup - [VS Code](https://code.visualstudio.com/) + [Tauri](https://marketplace.visualstudio.com/items?itemName=tauri-apps.tauri-vscode) + [rust-analyzer](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) diff --git a/MosaicIQ/package.json b/MosaicIQ/package.json index 9d639d5..d8ecc9c 100644 --- a/MosaicIQ/package.json +++ b/MosaicIQ/package.json @@ -7,7 +7,8 @@ "dev": "vite", "build": "tsc && vite build", "preview": "vite preview", - "tauri": "tauri" + "tauri": "tauri", + "test": "bun test" }, "dependencies": { "@tailwindcss/vite": "^4.2.2", diff --git a/MosaicIQ/src-tauri/Cargo.lock b/MosaicIQ/src-tauri/Cargo.lock index 37d4d73..334a4b3 100644 --- a/MosaicIQ/src-tauri/Cargo.lock +++ b/MosaicIQ/src-tauri/Cargo.lock @@ -83,6 +83,16 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "async-broadcast" version = "0.7.2" @@ -271,6 +281,19 @@ dependencies = [ "system-deps", ] +[[package]] +name = "atom_syndication" +version = "0.12.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f68d23e2cb4fd958c705b91a6b4c80ceeaf27a9e11651272a8389d5ce1a4a3" +dependencies = [ + "chrono", + "derive_builder", + "diligent-date-parser", + "never", + "quick-xml 0.37.5", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -893,14 +916,38 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + [[package]] name = "darling" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.23.0", + "darling_macro 0.23.0", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", ] [[package]] @@ -916,13 +963,24 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.117", +] + [[package]] name = "darling_macro" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core", + "darling_core 0.23.0", "quote", "syn 2.0.117", ] @@ -933,6 +991,24 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "deranged" version = "0.5.8" @@ -943,6 +1019,37 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "0.99.20" @@ -987,6 +1094,15 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "diligent-date-parser" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ede7d79366f419921e2e2f67889c12125726692a313bffb474bd5f37a581e9" +dependencies = [ + "chrono", +] + [[package]] name = "dirs" version = "6.0.0" @@ -1235,6 +1351,18 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -1825,6 +1953,15 @@ dependencies = [ "ahash 0.7.8", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -1840,6 +1977,15 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "heck" version = "0.4.1" @@ -1925,6 +2071,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "hyper" version = "1.9.0" @@ -1939,6 +2091,7 @@ dependencies = [ "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -2466,6 +2619,17 @@ dependencies = [ "libc", ] +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -2624,23 +2788,30 @@ dependencies = [ name = "mosaiciq" version = "0.1.0" dependencies = [ + "atom_syndication", "chrono", "chrono-tz", "crabrl", "futures", + "hex", "quick-xml 0.36.2", "regex", "reqwest 0.12.28", "rig-core", + "rss", + "rusqlite", "serde", "serde_json", + "sha2", "tauri", "tauri-build", "tauri-plugin-opener", "tauri-plugin-store", + "tempfile", "thiserror 2.0.18", "tokio", "urlencoding", + "wiremock", "yfinance-rs", ] @@ -2727,6 +2898,12 @@ dependencies = [ "jni-sys 0.3.1", ] +[[package]] +name = "never" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91" + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -2764,6 +2941,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "num_enum" version = "0.7.6" @@ -3678,6 +3865,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quick-xml" version = "0.38.4" @@ -4130,6 +4327,32 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "rss" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2107738f003660f0a91f56fd3e3bd3ab5d918b2ddaf1e1ec2136fb1c46f71bf" +dependencies = [ + "atom_syndication", + "derive_builder", + "never", + "quick-xml 0.37.5", +] + +[[package]] +name = "rusqlite" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" +dependencies = [ + "bitflags 2.11.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rust_decimal" version = "1.41.0" @@ -4559,7 +4782,7 @@ version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling", + "darling 0.23.0", "proc-macro2", "quote", "syn 2.0.117", @@ -6674,6 +6897,29 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64 0.22.1", + "deadpool", + "futures", + "http", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/MosaicIQ/src-tauri/Cargo.toml b/MosaicIQ/src-tauri/Cargo.toml index f501cca..a4cc428 100644 --- a/MosaicIQ/src-tauri/Cargo.toml +++ b/MosaicIQ/src-tauri/Cargo.toml @@ -24,7 +24,7 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" rig-core = "0.34.0" tauri-plugin-store = "2" -tokio = { version = "1", features = ["time", "sync"] } +tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread", "sync", "time"] } futures = "0.3" reqwest = { version = "0.12", features = ["json", "cookies", "gzip", "brotli"] } chrono = { version = "0.4", features = ["clock"] } @@ -35,6 +35,13 @@ regex = "1" thiserror = "2" urlencoding = "2" yfinance-rs = "0.7.2" +atom_syndication = "0.12" +hex = "0.4" +rss = "2" +rusqlite = { version = "0.32", features = ["bundled"] } +sha2 = "0.10" [dev-dependencies] tauri = { version = "2", features = ["test"] } +tempfile = "3" +wiremock = "0.6" diff --git a/MosaicIQ/src-tauri/news-feeds.default.json b/MosaicIQ/src-tauri/news-feeds.default.json new file mode 100644 index 0000000..9101b73 --- /dev/null +++ b/MosaicIQ/src-tauri/news-feeds.default.json @@ -0,0 +1,34 @@ +{ + "feeds": [ + { + "id": "fed-press-all", + "name": "Federal Reserve Press Releases", + "url": "https://www.federalreserve.gov/feeds/press_all.xml", + "refreshMinutes": 30 + }, + { + "id": "fed-monetary", + "name": "Federal Reserve Monetary Policy", + "url": "https://www.federalreserve.gov/feeds/press_monetary.xml", + "refreshMinutes": 15 + }, + { + "id": "sec-press", + "name": "SEC Press Releases", + "url": "https://www.sec.gov/news/pressreleases.rss", + "refreshMinutes": 15 + }, + { + "id": "sec-8k", + "name": "SEC Current 8-K Filings", + "url": "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=8-K&count=100&output=atom", + "refreshMinutes": 15 + }, + { + "id": "sec-10q", + "name": "SEC Current 10-Q Filings", + "url": "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=10-Q&count=100&output=atom", + "refreshMinutes": 15 + } + ] +} diff --git a/MosaicIQ/src-tauri/src/agent/panel_context.rs b/MosaicIQ/src-tauri/src/agent/panel_context.rs index a0c7dfc..0c214d5 100644 --- a/MosaicIQ/src-tauri/src/agent/panel_context.rs +++ b/MosaicIQ/src-tauri/src/agent/panel_context.rs @@ -4,10 +4,11 @@ use rig::completion::Message; use crate::agent::ChatPanelContext; use crate::error::AppError; +use crate::news::NewsArticle; use crate::terminal::{ CashFlowPanelData, CashFlowPeriod, Company, CompanyPricePoint, DividendEvent, DividendsPanelData, EarningsPanelData, EarningsPeriod, ErrorPanel, FinancialsPanelData, - Holding, NewsItem, PanelPayload, Portfolio, SourceStatus, StatementPeriod, StockAnalysis, + Holding, PanelPayload, Portfolio, SourceStatus, StatementPeriod, StockAnalysis, }; const MAX_TEXT_FIELD_LENGTH: usize = 600; @@ -153,19 +154,26 @@ fn compact_portfolio_panel(data: &Portfolio) -> Value { }) } -fn compact_news_panel(data: &[NewsItem], ticker: Option<&str>) -> Value { +fn compact_news_panel(data: &[NewsArticle], ticker: Option<&str>) -> Value { json!({ "ticker": ticker, "items": data .iter() .take(MAX_NEWS_ITEMS) .map(|item| json!({ + "id": item.id, + "sourceId": item.source_id, "source": truncate_text(&item.source), "headline": truncate_text(&item.headline), - "timestamp": item.timestamp, - "snippet": truncate_text(&item.snippet), + "summary": truncate_text(&item.summary), "url": item.url, - "relatedTickers": item.related_tickers, + "publishedAt": item.published_at, + "publishedTs": item.published_ts, + "sentiment": item.sentiment, + "highlightReason": item.highlight_reason, + "tickers": item.tickers, + "isRead": item.is_read, + "isSaved": item.is_saved, })) .collect::>(), }) @@ -393,10 +401,11 @@ mod tests { use super::{build_panel_context_message, compact_panel_payload, truncate_text}; use crate::agent::ChatPanelContext; + use crate::news::types::{HighlightReason, NewsArticle, NewsSentiment}; use crate::terminal::{ CashFlowPanelData, CashFlowPeriod, Company, CompanyProfile, DividendEvent, DividendsPanelData, EarningsPanelData, EarningsPeriod, ErrorPanel, FilingRef, - FinancialsPanelData, Frequency, Holding, NewsItem, PanelPayload, Portfolio, SourceStatus, + FinancialsPanelData, Frequency, Holding, PanelPayload, Portfolio, SourceStatus, StatementPeriod, StockAnalysis, }; @@ -422,7 +431,7 @@ mod tests { let items = value["items"].as_array().unwrap(); assert_eq!(items.len(), 5); - assert!(items[0]["snippet"].as_str().unwrap().len() <= 603); + assert!(items[0]["summary"].as_str().unwrap().len() <= 603); } #[test] @@ -555,15 +564,24 @@ mod tests { } } - fn sample_news_item(index: usize) -> NewsItem { - NewsItem { + fn sample_news_item(index: usize) -> NewsArticle { + NewsArticle { id: format!("news-{index}"), + source_id: "source-id".to_string(), source: "Source".to_string(), headline: format!("Headline {index}"), - timestamp: "2026-04-06T10:00:00Z".to_string(), - snippet: "S".repeat(650), + summary: "S".repeat(650), url: Some("https://example.com/story".to_string()), - related_tickers: vec!["AAPL".to_string()], + canonical_url: Some("https://example.com/story".to_string()), + published_at: "2026-04-06T10:00:00Z".to_string(), + published_ts: 1_775_469_600, + fetched_at: "2026-04-06T10:05:00Z".to_string(), + sentiment: NewsSentiment::Bull, + sentiment_score: 0.66, + highlight_reason: Some(HighlightReason::TickerDetected), + tickers: vec!["AAPL".to_string()], + is_read: false, + is_saved: false, } } diff --git a/MosaicIQ/src-tauri/src/commands/mod.rs b/MosaicIQ/src-tauri/src/commands/mod.rs index f16b5f8..04bfda6 100644 --- a/MosaicIQ/src-tauri/src/commands/mod.rs +++ b/MosaicIQ/src-tauri/src/commands/mod.rs @@ -1,4 +1,5 @@ //! Tauri command handlers. +pub mod news; pub mod settings; pub mod terminal; diff --git a/MosaicIQ/src-tauri/src/commands/news.rs b/MosaicIQ/src-tauri/src/commands/news.rs new file mode 100644 index 0000000..74c650b --- /dev/null +++ b/MosaicIQ/src-tauri/src/commands/news.rs @@ -0,0 +1,49 @@ +use tauri::{AppHandle, Emitter}; + +use crate::news::{ + QueryNewsFeedRequest, QueryNewsFeedResponse, RefreshNewsFeedRequest, RefreshNewsFeedResult, + UpdateNewsArticleStateRequest, +}; +use crate::state::AppState; + +#[tauri::command] +pub async fn query_news_feed( + state: tauri::State<'_, AppState>, + request: QueryNewsFeedRequest, +) -> Result { + state + .news_service + .query_feed(request) + .await + .map_err(|error| error.to_string()) +} + +#[tauri::command] +pub async fn refresh_news_feed( + app: AppHandle, + state: tauri::State<'_, AppState>, + request: RefreshNewsFeedRequest, +) -> Result { + let result = state + .news_service + .refresh_feed(request) + .await + .map_err(|error| error.to_string())?; + + app.emit("news_feed_updated", &result) + .map_err(|error| error.to_string())?; + + Ok(result) +} + +#[tauri::command] +pub async fn update_news_article_state( + state: tauri::State<'_, AppState>, + request: UpdateNewsArticleStateRequest, +) -> Result<(), String> { + state + .news_service + .update_article_state(request) + .await + .map_err(|error| error.to_string()) +} diff --git a/MosaicIQ/src-tauri/src/lib.rs b/MosaicIQ/src-tauri/src/lib.rs index 63b728e..44417af 100644 --- a/MosaicIQ/src-tauri/src/lib.rs +++ b/MosaicIQ/src-tauri/src/lib.rs @@ -7,13 +7,14 @@ mod agent; mod commands; mod error; +mod news; mod portfolio; mod state; mod terminal; #[cfg(test)] mod test_support; -use tauri::Manager; +use tauri::{Emitter, Manager}; /// Starts the Tauri application and registers the backend command surface. #[cfg_attr(mobile, tauri::mobile_entry_point)] @@ -23,8 +24,13 @@ pub fn run() { .setup(|app| { let state = state::AppState::new(app.handle()) .map_err(|error| -> Box { Box::new(error) })?; + let news_service = state.news_service.clone(); + let app_handle = app.handle().clone(); app.manage(state); + news::scheduler::spawn_news_scheduler(news_service, move |result| { + let _ = app_handle.emit("news_feed_updated", &result); + }); Ok(()) }) .plugin(tauri_plugin_opener::init()) @@ -36,7 +42,10 @@ pub fn run() { commands::settings::get_agent_config_status, commands::settings::save_agent_runtime_config, commands::settings::update_remote_api_key, - commands::settings::clear_remote_api_key + commands::settings::clear_remote_api_key, + commands::news::query_news_feed, + commands::news::refresh_news_feed, + commands::news::update_news_article_state ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/MosaicIQ/src-tauri/src/news/classifier.rs b/MosaicIQ/src-tauri/src/news/classifier.rs new file mode 100644 index 0000000..1b1243b --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/classifier.rs @@ -0,0 +1,304 @@ +use std::collections::{BTreeSet, HashSet}; +use std::sync::OnceLock; + +use chrono::Utc; +use regex::Regex; +use reqwest::Url; +use sha2::{Digest, Sha256}; + +use crate::news::types::{ + ClassifiedNewsArticle, HighlightReason, NewsSentiment, ParsedNewsArticle, +}; + +const POSITIVE_KEYWORDS: [(&str, f64); 8] = [ + ("surge", 0.22), + ("beats", 0.18), + ("record", 0.16), + ("growth", 0.14), + ("upgrades", 0.18), + ("expands", 0.14), + ("strong demand", 0.2), + ("raises guidance", 0.28), +]; +const NEGATIVE_KEYWORDS: [(&str, f64); 8] = [ + ("plunge", -0.24), + ("misses", -0.18), + ("cuts guidance", -0.28), + ("layoffs", -0.16), + ("downgrade", -0.18), + ("fraud", -0.3), + ("investigation", -0.22), + ("default", -0.3), +]; + +pub fn classify_article(article: ParsedNewsArticle) -> ClassifiedNewsArticle { + let sentiment_score = sentiment_score(&article.headline, &article.summary); + let sentiment = sentiment_label(sentiment_score); + let tickers = extract_tickers(&article.headline, &article.summary); + let canonical_url = normalize_url(article.canonical_url.as_deref()).or(article.canonical_url); + let highlight_reason = highlight_reason( + &article.headline, + &article.summary, + sentiment_score, + !tickers.is_empty(), + article.published_ts, + &article.source_id, + ); + let fingerprint = fingerprint( + canonical_url.as_deref(), + &article.source, + &article.headline, + article.published_ts, + &article.summary, + ); + + ClassifiedNewsArticle { + fingerprint, + source_id: article.source_id, + source: article.source, + headline: article.headline, + summary: article.summary, + url: article.url, + canonical_url, + published_at: article.published_at, + published_ts: article.published_ts, + fetched_at: article.fetched_at, + sentiment, + sentiment_score, + highlight_reason, + tickers, + } +} + +pub fn sentiment_label(score: f64) -> NewsSentiment { + if score >= 0.35 { + NewsSentiment::Bull + } else if score <= -0.35 { + NewsSentiment::Bear + } else { + NewsSentiment::Neutral + } +} + +pub fn sentiment_score(headline: &str, summary: &str) -> f64 { + let haystack = format!( + "{} {}", + headline.to_ascii_lowercase(), + summary.to_ascii_lowercase() + ); + let score = POSITIVE_KEYWORDS + .iter() + .chain(NEGATIVE_KEYWORDS.iter()) + .filter(|(keyword, _)| haystack.contains(keyword)) + .map(|(_, weight)| *weight) + .sum::(); + + score.clamp(-1.0, 1.0) +} + +pub fn extract_tickers(headline: &str, summary: &str) -> Vec { + static DOLLAR_RE: OnceLock = OnceLock::new(); + static PAREN_RE: OnceLock = OnceLock::new(); + static CAPS_RE: OnceLock = OnceLock::new(); + static STOPLIST: OnceLock> = OnceLock::new(); + + let stoplist = STOPLIST.get_or_init(|| { + [ + "USA", "CEO", "ETF", "GDP", "CPI", "SEC", "FED", "USD", "EPS", "AI", "IPO", "DOJ", + "FOMC", "ECB", + ] + .into_iter() + .collect() + }); + let mut tickers = BTreeSet::new(); + let full_text = format!("{headline} {summary}"); + + for captures in DOLLAR_RE + .get_or_init(|| Regex::new(r"\$([A-Z]{1,5})\b").expect("dollar ticker regex")) + .captures_iter(&full_text) + { + tickers.insert(captures[1].to_string()); + } + + for captures in PAREN_RE + .get_or_init(|| Regex::new(r"\(([A-Z]{1,5})\)").expect("paren ticker regex")) + .captures_iter(&full_text) + { + tickers.insert(captures[1].to_string()); + } + + for captures in CAPS_RE + .get_or_init(|| Regex::new(r"\b([A-Z]{1,5})\b").expect("caps ticker regex")) + .captures_iter(headline) + .chain( + CAPS_RE + .get_or_init(|| Regex::new(r"\b([A-Z]{1,5})\b").expect("caps ticker regex")) + .captures_iter(summary), + ) + { + let candidate = captures[1].to_string(); + if !stoplist.contains(candidate.as_str()) { + tickers.insert(candidate); + } + } + + tickers.into_iter().collect() +} + +pub fn fingerprint( + canonical_url: Option<&str>, + source: &str, + headline: &str, + published_ts: i64, + summary: &str, +) -> String { + let payload = if let Some(url) = canonical_url.filter(|value| !value.is_empty()) { + normalize_url(Some(url)).unwrap_or_else(|| url.to_string()) + } else { + let published_day = published_ts.div_euclid(86_400); + format!( + "{}|{}|{}|{}", + source.to_ascii_lowercase(), + normalize_text(headline), + published_day, + summary + .chars() + .take(180) + .collect::() + .to_ascii_lowercase() + ) + }; + + hex::encode(Sha256::digest(payload.as_bytes())) +} + +fn highlight_reason( + headline: &str, + summary: &str, + sentiment_score: f64, + has_ticker: bool, + published_ts: i64, + source_id: &str, +) -> Option { + let haystack = format!( + "{} {}", + headline.to_ascii_lowercase(), + summary.to_ascii_lowercase() + ); + + if ["breaking", "alert", "just in", "urgent"] + .iter() + .any(|keyword| haystack.contains(keyword)) + { + return Some(HighlightReason::BreakingKeyword); + } + + if [ + "federal reserve", + "interest rate", + "monetary policy", + "inflation", + "cpi", + "gdp", + "jobs report", + "8-k", + "10-q", + ] + .iter() + .any(|keyword| haystack.contains(keyword)) + { + return Some(HighlightReason::MacroEvent); + } + + if sentiment_score.abs() >= 0.7 { + return Some(HighlightReason::StrongSentiment); + } + + if has_ticker { + return Some(HighlightReason::TickerDetected); + } + + let recent_seconds = Utc::now().timestamp() - published_ts; + if recent_seconds <= 6 * 60 * 60 + && (source_id.contains("sec") + || source_id.contains("fed") + || ["filing", "guidance", "earnings", "policy", "meeting"] + .iter() + .any(|keyword| haystack.contains(keyword))) + { + return Some(HighlightReason::RecentHighValue); + } + + None +} + +fn normalize_url(url: Option<&str>) -> Option { + let raw = url?.trim(); + if raw.is_empty() { + return None; + } + + let mut parsed = Url::parse(raw).ok()?; + parsed.set_fragment(None); + let retained_pairs = parsed + .query_pairs() + .filter(|(key, _)| !key.starts_with("utm_") && key != "cmpid" && key != "ref") + .map(|(key, value)| (key.to_string(), value.to_string())) + .collect::>(); + parsed + .query_pairs_mut() + .clear() + .extend_pairs(retained_pairs); + + let normalized = parsed.to_string().trim_end_matches('?').to_string(); + Some(normalized.trim_end_matches('/').to_string()) +} + +fn normalize_text(value: &str) -> String { + value + .chars() + .map(|character| { + if character.is_ascii_alphanumeric() || character.is_ascii_whitespace() { + character.to_ascii_lowercase() + } else { + ' ' + } + }) + .collect::() + .split_whitespace() + .collect::>() + .join(" ") +} + +#[cfg(test)] +mod tests { + use super::{extract_tickers, sentiment_label, sentiment_score}; + use crate::news::types::NewsSentiment; + + #[test] + fn sentiment_label_should_return_bull_above_threshold() { + let score = sentiment_score( + "NVIDIA beats estimates and raises guidance", + "Shares surge on strong demand", + ); + + assert_eq!(sentiment_label(score), NewsSentiment::Bull); + } + + #[test] + fn sentiment_label_should_return_bear_below_threshold() { + let score = sentiment_score( + "Company cuts guidance after fraud investigation", + "Shares plunge on downgrade", + ); + + assert_eq!(sentiment_label(score), NewsSentiment::Bear); + } + + #[test] + fn extract_tickers_should_filter_stoplist_symbols() { + let tickers = extract_tickers("FED mentions $NVDA and (TSLA)", "USA GDP CPI CEO ETF"); + + assert_eq!(tickers, vec!["NVDA".to_string(), "TSLA".to_string()]); + } +} diff --git a/MosaicIQ/src-tauri/src/news/config.rs b/MosaicIQ/src-tauri/src/news/config.rs new file mode 100644 index 0000000..e67a335 --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/config.rs @@ -0,0 +1,91 @@ +use std::collections::HashSet; +use std::fs; +use std::path::Path; + +use crate::news::types::{NewsSourceConfig, NewsSourceConfigFile}; +use crate::news::{NewsError, Result}; + +pub fn load_or_bootstrap_config( + config_path: &Path, + default_config_bytes: &[u8], +) -> Result> { + if !config_path.exists() { + let Some(parent) = config_path.parent() else { + return Err(NewsError::Config(format!( + "config path has no parent: {}", + config_path.display() + ))); + }; + fs::create_dir_all(parent)?; + fs::write(config_path, default_config_bytes)?; + } + + let config = serde_json::from_slice::(&fs::read(config_path)?)?; + validate_config(&config.feeds)?; + Ok(config.feeds) +} + +fn validate_config(feeds: &[NewsSourceConfig]) -> Result<()> { + if feeds.is_empty() { + return Err(NewsError::Config( + "feed configuration must contain at least one feed".to_string(), + )); + } + + let mut seen_ids = HashSet::new(); + for feed in feeds { + if feed.id.trim().is_empty() { + return Err(NewsError::Config("feed id cannot be empty".to_string())); + } + if feed.name.trim().is_empty() { + return Err(NewsError::Config(format!( + "feed {} has an empty name", + feed.id + ))); + } + if !(feed.url.starts_with("http://") || feed.url.starts_with("https://")) { + return Err(NewsError::Config(format!( + "feed {} must use http or https", + feed.id + ))); + } + if feed.refresh_minutes == 0 { + return Err(NewsError::Config(format!( + "feed {} must use a positive refreshMinutes", + feed.id + ))); + } + if !seen_ids.insert(feed.id.clone()) { + return Err(NewsError::Config(format!( + "duplicate feed id in config: {}", + feed.id + ))); + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::fs; + + use tempfile::tempdir; + + use super::load_or_bootstrap_config; + + #[test] + fn load_or_bootstrap_config_should_copy_default_file_when_missing() { + let temp_dir = tempdir().unwrap(); + let config_path = temp_dir.path().join("news-feeds.json"); + + let feeds = load_or_bootstrap_config( + &config_path, + br#"{"feeds":[{"id":"fed","name":"Fed","url":"https://example.com","refreshMinutes":15}]}"#, + ) + .unwrap(); + + assert_eq!(feeds.len(), 1); + assert!(fs::metadata(config_path).is_ok()); + } +} diff --git a/MosaicIQ/src-tauri/src/news/fetcher.rs b/MosaicIQ/src-tauri/src/news/fetcher.rs new file mode 100644 index 0000000..33d0393 --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/fetcher.rs @@ -0,0 +1,83 @@ +use std::time::Duration; + +use chrono::Utc; +use reqwest::header::{ETAG, IF_MODIFIED_SINCE, IF_NONE_MATCH, LAST_MODIFIED, USER_AGENT}; +use reqwest::{Client, StatusCode}; + +use crate::news::types::{FeedSourceRecord, FetchResultKind, FetchedFeed}; +use crate::news::{NewsError, Result}; + +const DEFAULT_USER_AGENT: &str = "MosaicIQ/0.1 (local-first-news)"; + +#[derive(Clone)] +pub struct FeedFetcher { + client: Client, +} + +impl FeedFetcher { + pub fn new() -> Result { + Self::with_timeout(Duration::from_secs(8)) + } + + pub fn with_timeout(timeout: Duration) -> Result { + let client = Client::builder().timeout(timeout).build()?; + + Ok(Self { client }) + } + + pub async fn fetch(&self, source: &FeedSourceRecord, force: bool) -> Result { + let checked_at = Utc::now().to_rfc3339(); + let mut request = self + .client + .get(&source.url) + .header(USER_AGENT, DEFAULT_USER_AGENT); + + if !force { + if let Some(etag) = source.etag.as_deref() { + request = request.header(IF_NONE_MATCH, etag); + } + if let Some(last_modified) = source.last_modified.as_deref() { + request = request.header(IF_MODIFIED_SINCE, last_modified); + } + } + + let response = request.send().await?; + let etag = response + .headers() + .get(ETAG) + .and_then(|value| value.to_str().ok()) + .map(ToString::to_string); + let last_modified = response + .headers() + .get(LAST_MODIFIED) + .and_then(|value| value.to_str().ok()) + .map(ToString::to_string); + + if response.status() == StatusCode::NOT_MODIFIED { + return Ok(FetchedFeed { + kind: FetchResultKind::NotModified, + body: None, + etag, + last_modified, + checked_at, + }); + } + + if !response.status().is_success() { + let status = response.status(); + let detail = response.text().await.unwrap_or_default(); + return Err(NewsError::Parse(format!( + "feed {} returned {} {}", + source.id, status, detail + ))); + } + + Ok(FetchedFeed { + kind: FetchResultKind::Updated, + body: Some(response.text().await?), + etag, + last_modified, + checked_at, + }) + } +} diff --git a/MosaicIQ/src-tauri/src/news/mod.rs b/MosaicIQ/src-tauri/src/news/mod.rs new file mode 100644 index 0000000..a5bd539 --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/mod.rs @@ -0,0 +1,38 @@ +pub mod classifier; +pub mod config; +pub mod fetcher; +pub mod parser; +pub mod repository; +pub mod scheduler; +pub mod service; +pub mod types; + +use thiserror::Error; + +pub use service::NewsService; +pub use types::{ + NewsArticle, QueryNewsFeedRequest, QueryNewsFeedResponse, RefreshNewsFeedRequest, + RefreshNewsFeedResult, UpdateNewsArticleStateRequest, +}; + +#[derive(Debug, Error)] +pub enum NewsError { + #[error("news configuration error: {0}")] + Config(String), + #[error("news I/O failed: {0}")] + Io(#[from] std::io::Error), + #[error("news HTTP request failed: {0}")] + Http(#[from] reqwest::Error), + #[error("news database failed: {0}")] + Db(#[from] rusqlite::Error), + #[error("news JSON failed: {0}")] + Json(#[from] serde_json::Error), + #[error("news task join failed: {0}")] + Join(String), + #[error("news parse failed: {0}")] + Parse(String), + #[error("unknown news article: {0}")] + ArticleNotFound(String), +} + +pub type Result = std::result::Result; diff --git a/MosaicIQ/src-tauri/src/news/parser.rs b/MosaicIQ/src-tauri/src/news/parser.rs new file mode 100644 index 0000000..43468c3 --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/parser.rs @@ -0,0 +1,255 @@ +use std::io::Cursor; +use std::sync::OnceLock; + +use atom_syndication::Feed as AtomFeed; +use chrono::{DateTime, Utc}; +use regex::Regex; +use rss::Channel; + +use crate::news::types::{FeedSourceRecord, ParsedFeed, ParsedNewsArticle}; +use crate::news::{NewsError, Result}; + +pub fn parse_feed(source: &FeedSourceRecord, body: &str, fetched_at: &str) -> Result { + if let Ok(channel) = Channel::read_from(Cursor::new(body.as_bytes())) { + return Ok(parse_rss(source, &channel, fetched_at)); + } + + if let Ok(feed) = AtomFeed::read_from(Cursor::new(body.as_bytes())) { + return Ok(parse_atom(source, &feed, fetched_at)); + } + + Err(NewsError::Parse(format!( + "feed {} is neither valid RSS nor Atom", + source.id + ))) +} + +fn parse_rss(source: &FeedSourceRecord, channel: &Channel, fetched_at: &str) -> ParsedFeed { + let mut articles = Vec::new(); + let mut malformed_entries = 0; + + for item in channel.items() { + match parse_rss_item(source, item, fetched_at) { + Ok(article) => articles.push(article), + Err(_) => malformed_entries += 1, + } + } + + ParsedFeed { + articles, + malformed_entries, + } +} + +fn parse_atom(source: &FeedSourceRecord, feed: &AtomFeed, fetched_at: &str) -> ParsedFeed { + let mut articles = Vec::new(); + let mut malformed_entries = 0; + + for entry in feed.entries() { + match parse_atom_entry(source, entry, fetched_at) { + Ok(article) => articles.push(article), + Err(_) => malformed_entries += 1, + } + } + + ParsedFeed { + articles, + malformed_entries, + } +} + +fn parse_rss_item( + source: &FeedSourceRecord, + item: &rss::Item, + fetched_at: &str, +) -> Result { + let headline = item + .title() + .map(str::trim) + .filter(|value| !value.is_empty()) + .ok_or_else(|| NewsError::Parse(format!("feed {} item missing title", source.id)))?; + + let summary = item + .content() + .or_else(|| item.description()) + .map(strip_markup) + .unwrap_or_default(); + + let url = item + .link() + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToString::to_string); + let published = item.pub_date().or_else(|| { + item.dublin_core_ext() + .and_then(|ext| ext.dates().first().map(String::as_str)) + }); + + Ok(ParsedNewsArticle { + source_id: source.id.clone(), + source: source.name.clone(), + headline: strip_markup(headline), + summary, + canonical_url: url.clone(), + url, + published_at: resolve_published_at(published, fetched_at)?, + published_ts: resolve_published_ts(published, fetched_at)?, + fetched_at: fetched_at.to_string(), + }) +} + +fn parse_atom_entry( + source: &FeedSourceRecord, + entry: &atom_syndication::Entry, + fetched_at: &str, +) -> Result { + let headline = entry.title().trim(); + if headline.is_empty() { + return Err(NewsError::Parse(format!( + "feed {} entry missing title", + source.id + ))); + } + + let summary = entry + .summary() + .map(|text| strip_markup(text.as_str())) + .or_else(|| { + entry + .content() + .and_then(|content| content.value()) + .map(strip_markup) + }) + .unwrap_or_default(); + let url = entry + .links() + .iter() + .find(|link| link.rel() == "alternate" || link.rel().is_empty() || link.rel() == "self") + .map(|link| link.href().trim().to_string()) + .filter(|value| !value.is_empty()); + let published = entry + .published() + .map(|value| value.to_rfc3339()) + .unwrap_or_else(|| entry.updated().to_rfc3339()); + + Ok(ParsedNewsArticle { + source_id: source.id.clone(), + source: source.name.clone(), + headline: strip_markup(headline), + summary, + canonical_url: url.clone(), + url, + published_at: resolve_published_at(Some(published.as_str()), fetched_at)?, + published_ts: resolve_published_ts(Some(published.as_str()), fetched_at)?, + fetched_at: fetched_at.to_string(), + }) +} + +fn resolve_published_at(raw_value: Option<&str>, fallback: &str) -> Result { + match raw_value.and_then(parse_datetime) { + Some(value) => Ok(value.to_rfc3339()), + None if !fallback.is_empty() => Ok(fallback.to_string()), + None => Err(NewsError::Parse( + "feed entry missing publish time".to_string(), + )), + } +} + +fn resolve_published_ts(raw_value: Option<&str>, fallback: &str) -> Result { + match raw_value.and_then(parse_datetime) { + Some(value) => Ok(value.timestamp()), + None if !fallback.is_empty() => DateTime::parse_from_rfc3339(fallback) + .map(|value| value.timestamp()) + .map_err(|error| NewsError::Parse(error.to_string())), + None => Err(NewsError::Parse( + "feed entry missing publish timestamp".to_string(), + )), + } +} + +fn parse_datetime(value: &str) -> Option> { + let trimmed = value.trim(); + if trimmed.is_empty() { + return None; + } + + DateTime::parse_from_rfc2822(trimmed) + .or_else(|_| DateTime::parse_from_rfc3339(trimmed)) + .map(|value| value.with_timezone(&Utc)) + .ok() +} + +fn strip_markup(value: &str) -> String { + static TAG_RE: OnceLock = OnceLock::new(); + static WHITESPACE_RE: OnceLock = OnceLock::new(); + + let without_tags = TAG_RE + .get_or_init(|| Regex::new(r"(?is)<[^>]+>").expect("tag regex should compile")) + .replace_all(value, " "); + + WHITESPACE_RE + .get_or_init(|| Regex::new(r"\s+").expect("whitespace regex should compile")) + .replace_all(without_tags.trim(), " ") + .trim() + .to_string() +} + +#[cfg(test)] +mod tests { + use super::parse_feed; + use crate::news::types::FeedSourceRecord; + + #[test] + fn parse_feed_should_read_rss_fixture() { + let parsed = parse_feed( + &sample_source(), + include_str!("../../tests/fixtures/news/sample.rss"), + "2026-04-08T10:00:00Z", + ) + .unwrap(); + + assert_eq!(parsed.articles.len(), 2); + assert_eq!(parsed.articles[0].headline, "Fed signals steady rates"); + } + + #[test] + fn parse_feed_should_read_atom_fixture() { + let parsed = parse_feed( + &sample_source(), + include_str!("../../tests/fixtures/news/sample.atom"), + "2026-04-08T10:00:00Z", + ) + .unwrap(); + + assert_eq!(parsed.articles.len(), 2); + assert_eq!(parsed.articles[0].source, "Sample Feed"); + } + + #[test] + fn parse_feed_should_skip_malformed_entries() { + let parsed = parse_feed( + &sample_source(), + include_str!("../../tests/fixtures/news/malformed.rss"), + "2026-04-08T10:00:00Z", + ) + .unwrap(); + + assert_eq!(parsed.articles.len(), 1); + assert_eq!(parsed.malformed_entries, 1); + } + + fn sample_source() -> FeedSourceRecord { + FeedSourceRecord { + id: "sample".to_string(), + name: "Sample Feed".to_string(), + url: "https://example.com/feed.xml".to_string(), + refresh_minutes: 15, + etag: None, + last_modified: None, + last_checked_at: None, + last_success_at: None, + last_error: None, + failure_count: 0, + } + } +} diff --git a/MosaicIQ/src-tauri/src/news/repository.rs b/MosaicIQ/src-tauri/src/news/repository.rs new file mode 100644 index 0000000..637b5dc --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/repository.rs @@ -0,0 +1,751 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use rusqlite::types::Value; +use rusqlite::{params, params_from_iter, Connection, OptionalExtension}; + +use crate::news::types::{ + ArticleUpsertSummary, ClassifiedNewsArticle, FeedSourceRecord, HighlightReason, NewsArticle, + NewsSentiment, NewsSourceConfig, QueryNewsFeedRequest, QueryNewsFeedResponse, + UpdateNewsArticleStateRequest, +}; +use crate::news::{NewsError, Result}; + +#[derive(Clone)] +pub struct NewsRepository { + db_path: PathBuf, +} + +impl NewsRepository { + pub fn new(db_path: PathBuf) -> Result { + if let Some(parent) = db_path.parent() { + std::fs::create_dir_all(parent)?; + } + + let repository = Self { db_path }; + let connection = repository.open_connection()?; + repository.initialize_schema(&connection)?; + Ok(repository) + } + + pub fn sync_sources_blocking(&self, sources: Vec) -> Result<()> { + let mut connection = self.open_connection()?; + sync_sources_in_connection(&mut connection, sources) + } + + pub async fn sync_sources(&self, sources: Vec) -> Result<()> { + self.with_connection(move |connection| sync_sources_in_connection(connection, sources)) + .await + } + + pub async fn list_sources(&self) -> Result> { + self.with_connection(|connection| { + let mut statement = connection.prepare( + "SELECT id, name, url, refresh_minutes, etag, last_modified, + last_checked_at, last_success_at, last_error, failure_count + FROM feed_sources + ORDER BY name ASC", + )?; + let rows = statement.query_map([], |row| { + Ok(FeedSourceRecord { + id: row.get(0)?, + name: row.get(1)?, + url: row.get(2)?, + refresh_minutes: row.get::<_, u32>(3)?, + etag: row.get(4)?, + last_modified: row.get(5)?, + last_checked_at: row.get(6)?, + last_success_at: row.get(7)?, + last_error: row.get(8)?, + failure_count: row.get::<_, u32>(9)?, + }) + })?; + + rows.collect::, _>>() + .map_err(NewsError::from) + }) + .await + } + + pub async fn record_fetch_success( + &self, + source_id: String, + checked_at: String, + etag: Option, + last_modified: Option, + ) -> Result<()> { + self.with_connection(move |connection| { + connection.execute( + "UPDATE feed_sources + SET etag = COALESCE(?2, etag), + last_modified = COALESCE(?3, last_modified), + last_checked_at = ?4, + last_success_at = ?4, + last_error = NULL, + failure_count = 0 + WHERE id = ?1", + params![source_id, etag, last_modified, checked_at], + )?; + Ok(()) + }) + .await + } + + pub async fn record_fetch_failure( + &self, + source_id: String, + checked_at: String, + error_message: String, + ) -> Result<()> { + self.with_connection(move |connection| { + connection.execute( + "UPDATE feed_sources + SET last_checked_at = ?2, + last_error = ?3, + failure_count = failure_count + 1 + WHERE id = ?1", + params![source_id, checked_at, error_message], + )?; + Ok(()) + }) + .await + } + + pub async fn upsert_articles( + &self, + articles: Vec, + ) -> Result { + self.with_connection(move |connection| { + let transaction = connection.transaction()?; + let mut summary = ArticleUpsertSummary::default(); + + for article in articles { + let existing = transaction + .query_row( + "SELECT id, source_id, source, headline, summary, url, canonical_url, + published_at, published_ts, sentiment, sentiment_score, highlight_reason + FROM articles + WHERE fingerprint = ?1", + params![article.fingerprint.clone()], + |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, String>(3)?, + row.get::<_, String>(4)?, + row.get::<_, Option>(5)?, + row.get::<_, Option>(6)?, + row.get::<_, String>(7)?, + row.get::<_, i64>(8)?, + row.get::<_, String>(9)?, + row.get::<_, f64>(10)?, + row.get::<_, Option>(11)?, + )) + }, + ) + .optional()?; + + let is_changed = existing + .as_ref() + .map(|existing| { + existing.1 != article.source_id + || existing.2 != article.source + || existing.3 != article.headline + || existing.4 != article.summary + || existing.5 != article.url + || existing.6 != article.canonical_url + || existing.7 != article.published_at + || existing.8 != article.published_ts + || existing.9 != sentiment_to_db(&article.sentiment) + || (existing.10 - article.sentiment_score).abs() > f64::EPSILON + || existing.11 != article.highlight_reason.as_ref().map(highlight_to_db) + }) + .unwrap_or(true); + + if existing.is_none() { + summary.new_articles += 1; + } else if is_changed { + summary.updated_articles += 1; + } else { + summary.unchanged_articles += 1; + } + + transaction.execute( + "INSERT INTO articles ( + id, source_id, source, headline, summary, url, canonical_url, fingerprint, + published_at, published_ts, fetched_at, sentiment, sentiment_score, + highlight_reason, is_read, is_saved + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?1, ?8, ?9, ?10, ?11, ?12, ?13, 0, 0) + ON CONFLICT(id) DO UPDATE SET + source_id = excluded.source_id, + source = excluded.source, + headline = excluded.headline, + summary = excluded.summary, + url = excluded.url, + canonical_url = excluded.canonical_url, + published_at = excluded.published_at, + published_ts = excluded.published_ts, + fetched_at = excluded.fetched_at, + sentiment = excluded.sentiment, + sentiment_score = excluded.sentiment_score, + highlight_reason = excluded.highlight_reason", + params![ + article.fingerprint, + article.source_id, + article.source, + article.headline, + article.summary, + article.url, + article.canonical_url, + article.published_at, + article.published_ts, + article.fetched_at, + sentiment_to_db(&article.sentiment), + article.sentiment_score, + article.highlight_reason.as_ref().map(highlight_to_db), + ], + )?; + + transaction.execute( + "DELETE FROM article_tickers WHERE article_id = ?1", + params![article.fingerprint], + )?; + for ticker in &article.tickers { + transaction.execute( + "INSERT OR IGNORE INTO article_tickers (article_id, ticker) VALUES (?1, ?2)", + params![article.fingerprint, ticker], + )?; + } + + transaction.execute( + "DELETE FROM news_fts WHERE article_id = ?1", + params![article.fingerprint], + )?; + transaction.execute( + "INSERT INTO news_fts (article_id, headline, summary) VALUES (?1, ?2, ?3)", + params![article.fingerprint, article.headline, article.summary], + )?; + } + + transaction.commit()?; + Ok(summary) + }) + .await + } + + pub async fn query_articles( + &self, + request: QueryNewsFeedRequest, + ) -> Result { + self.with_connection(move |connection| query_articles(connection, request)) + .await + } + + pub async fn update_article_state(&self, request: UpdateNewsArticleStateRequest) -> Result<()> { + self.with_connection(move |connection| { + let rows_updated = connection.execute( + "UPDATE articles + SET is_read = COALESCE(?2, is_read), + is_saved = COALESCE(?3, is_saved) + WHERE id = ?1", + params![ + request.article_id, + request.is_read.map(i64::from), + request.is_saved.map(i64::from), + ], + )?; + + if rows_updated == 0 { + return Err(NewsError::ArticleNotFound(request.article_id)); + } + + Ok(()) + }) + .await + } + + fn open_connection(&self) -> Result { + open_connection(&self.db_path) + } + + fn initialize_schema(&self, connection: &Connection) -> Result<()> { + connection.execute_batch( + "PRAGMA foreign_keys = ON; + PRAGMA journal_mode = WAL; + PRAGMA synchronous = NORMAL; + CREATE TABLE IF NOT EXISTS feed_sources ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + url TEXT NOT NULL, + refresh_minutes INTEGER NOT NULL, + etag TEXT, + last_modified TEXT, + last_checked_at TEXT, + last_success_at TEXT, + last_error TEXT, + failure_count INTEGER NOT NULL DEFAULT 0 + ); + CREATE TABLE IF NOT EXISTS articles ( + id TEXT PRIMARY KEY, + source_id TEXT NOT NULL REFERENCES feed_sources(id) ON DELETE CASCADE, + source TEXT NOT NULL, + headline TEXT NOT NULL, + summary TEXT NOT NULL, + url TEXT, + canonical_url TEXT, + fingerprint TEXT NOT NULL UNIQUE, + published_at TEXT NOT NULL, + published_ts INTEGER NOT NULL, + fetched_at TEXT NOT NULL, + sentiment TEXT NOT NULL, + sentiment_score REAL NOT NULL, + highlight_reason TEXT, + is_read INTEGER NOT NULL DEFAULT 0, + is_saved INTEGER NOT NULL DEFAULT 0 + ); + CREATE INDEX IF NOT EXISTS articles_published_ts_idx ON articles (published_ts DESC); + CREATE INDEX IF NOT EXISTS articles_highlight_idx ON articles (highlight_reason); + CREATE INDEX IF NOT EXISTS articles_saved_idx ON articles (is_saved, published_ts DESC); + CREATE INDEX IF NOT EXISTS articles_read_idx ON articles (is_read, published_ts DESC); + CREATE TABLE IF NOT EXISTS article_tickers ( + article_id TEXT NOT NULL REFERENCES articles(id) ON DELETE CASCADE, + ticker TEXT NOT NULL, + PRIMARY KEY (article_id, ticker) + ); + CREATE INDEX IF NOT EXISTS article_tickers_ticker_idx ON article_tickers (ticker); + CREATE VIRTUAL TABLE IF NOT EXISTS news_fts USING fts5(article_id UNINDEXED, headline, summary);", + )?; + Ok(()) + } + + async fn with_connection(&self, task: F) -> Result + where + F: FnOnce(&mut Connection) -> Result + Send + 'static, + T: Send + 'static, + { + let db_path = self.db_path.clone(); + tokio::task::spawn_blocking(move || { + let mut connection = open_connection(&db_path)?; + task(&mut connection) + }) + .await + .map_err(|error| NewsError::Join(error.to_string()))? + } +} + +fn sync_sources_in_connection( + connection: &mut Connection, + sources: Vec, +) -> Result<()> { + let transaction = connection.transaction()?; + for source in sources { + transaction.execute( + "INSERT INTO feed_sources ( + id, name, url, refresh_minutes, failure_count + ) VALUES (?1, ?2, ?3, ?4, 0) + ON CONFLICT(id) DO UPDATE SET + name = excluded.name, + url = excluded.url, + refresh_minutes = excluded.refresh_minutes", + params![ + source.id, + source.name, + source.url, + i64::from(source.refresh_minutes) + ], + )?; + } + transaction.commit()?; + Ok(()) +} + +fn open_connection(path: &Path) -> Result { + let connection = Connection::open(path)?; + connection.execute_batch( + "PRAGMA foreign_keys = ON; + PRAGMA journal_mode = WAL; + PRAGMA synchronous = NORMAL;", + )?; + Ok(connection) +} + +fn query_articles( + connection: &mut Connection, + request: QueryNewsFeedRequest, +) -> Result { + let (where_clause, parameters) = build_article_filters(&request); + let count_sql = format!("SELECT COUNT(*) FROM articles a WHERE {where_clause}"); + let total = connection.query_row(&count_sql, params_from_iter(parameters.iter()), |row| { + row.get::<_, usize>(0) + })?; + + let limit = i64::try_from(request.limit.unwrap_or(50).min(200)) + .map_err(|error| NewsError::Parse(error.to_string()))?; + let offset = i64::try_from(request.offset.unwrap_or(0)) + .map_err(|error| NewsError::Parse(error.to_string()))?; + + let mut query_params = parameters.clone(); + query_params.push(Value::Integer(limit)); + query_params.push(Value::Integer(offset)); + + let query_sql = format!( + "SELECT a.id, a.source_id, a.source, a.headline, a.summary, a.url, a.canonical_url, + a.published_at, a.published_ts, a.fetched_at, a.sentiment, a.sentiment_score, + a.highlight_reason, a.is_read, a.is_saved + FROM articles a + WHERE {where_clause} + ORDER BY a.published_ts DESC, a.fetched_at DESC + LIMIT ? OFFSET ?" + ); + + let mut statement = connection.prepare(&query_sql)?; + let rows = statement.query_map(params_from_iter(query_params.iter()), |row| { + Ok(NewsArticle { + id: row.get(0)?, + source_id: row.get(1)?, + source: row.get(2)?, + headline: row.get(3)?, + summary: row.get(4)?, + url: row.get(5)?, + canonical_url: row.get(6)?, + published_at: row.get(7)?, + published_ts: row.get(8)?, + fetched_at: row.get(9)?, + sentiment: sentiment_from_db(&row.get::<_, String>(10)?), + sentiment_score: row.get(11)?, + highlight_reason: row + .get::<_, Option>(12)? + .as_deref() + .map(highlight_from_db), + is_read: row.get::<_, i64>(13)? != 0, + is_saved: row.get::<_, i64>(14)? != 0, + tickers: Vec::new(), + }) + })?; + + let mut articles = rows.collect::, _>>()?; + let ticker_map = load_tickers_for_articles(connection, &articles)?; + for article in &mut articles { + article.tickers = ticker_map.get(&article.id).cloned().unwrap_or_default(); + } + + let last_synced_at = connection + .query_row("SELECT MAX(last_success_at) FROM feed_sources", [], |row| { + row.get::<_, Option>(0) + }) + .optional()? + .flatten(); + let sources = load_source_statuses(connection)?; + + Ok(QueryNewsFeedResponse { + articles, + total, + last_synced_at, + sources, + }) +} + +fn build_article_filters(request: &QueryNewsFeedRequest) -> (String, Vec) { + let mut clauses = vec!["1 = 1".to_string()]; + let mut parameters = Vec::new(); + + if let Some(ticker) = request + .ticker + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + { + clauses.push( + "EXISTS (SELECT 1 FROM article_tickers at WHERE at.article_id = a.id AND at.ticker = ?)" + .to_string(), + ); + parameters.push(Value::Text(ticker.to_ascii_uppercase())); + } + + if request.only_highlighted.unwrap_or(false) { + clauses.push("a.highlight_reason IS NOT NULL".to_string()); + } + if request.only_saved.unwrap_or(false) { + clauses.push("a.is_saved = 1".to_string()); + } + if request.only_unread.unwrap_or(false) { + clauses.push("a.is_read = 0".to_string()); + } + if let Some(search) = request + .search + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + { + clauses.push( + "EXISTS (SELECT 1 FROM news_fts WHERE news_fts.article_id = a.id AND news_fts MATCH ?)" + .to_string(), + ); + parameters.push(Value::Text(fts_query(search))); + } + + (clauses.join(" AND "), parameters) +} + +fn fts_query(input: &str) -> String { + let tokens = input + .split(|character: char| !character.is_ascii_alphanumeric()) + .filter(|value| !value.is_empty()) + .map(|value| format!("{value}*")) + .collect::>(); + + if tokens.is_empty() { + input.to_string() + } else { + tokens.join(" AND ") + } +} + +fn load_tickers_for_articles( + connection: &Connection, + articles: &[NewsArticle], +) -> Result>> { + let mut map = HashMap::new(); + let mut statement = connection + .prepare("SELECT ticker FROM article_tickers WHERE article_id = ?1 ORDER BY ticker ASC")?; + + for article in articles { + let rows = statement.query_map(params![article.id], |row| row.get::<_, String>(0))?; + map.insert( + article.id.clone(), + rows.collect::, _>>()?, + ); + } + + Ok(map) +} + +fn load_source_statuses( + connection: &Connection, +) -> Result> { + let mut statement = connection.prepare( + "SELECT id, name, url, refresh_minutes, last_checked_at, last_success_at, last_error, failure_count + FROM feed_sources + ORDER BY name ASC", + )?; + let rows = statement.query_map([], |row| { + Ok(crate::news::types::NewsSourceStatus { + id: row.get(0)?, + name: row.get(1)?, + url: row.get(2)?, + refresh_minutes: row.get::<_, u32>(3)?, + last_checked_at: row.get(4)?, + last_success_at: row.get(5)?, + last_error: row.get(6)?, + failure_count: row.get::<_, u32>(7)?, + }) + })?; + + rows.collect::, _>>() + .map_err(NewsError::from) +} + +fn sentiment_to_db(value: &NewsSentiment) -> &'static str { + match value { + NewsSentiment::Bull => "BULL", + NewsSentiment::Bear => "BEAR", + NewsSentiment::Neutral => "NEUTRAL", + } +} + +fn sentiment_from_db(value: &str) -> NewsSentiment { + match value { + "BULL" => NewsSentiment::Bull, + "BEAR" => NewsSentiment::Bear, + _ => NewsSentiment::Neutral, + } +} + +fn highlight_to_db(value: &HighlightReason) -> String { + match value { + HighlightReason::BreakingKeyword => "breaking_keyword", + HighlightReason::MacroEvent => "macro_event", + HighlightReason::StrongSentiment => "strong_sentiment", + HighlightReason::TickerDetected => "ticker_detected", + HighlightReason::RecentHighValue => "recent_high_value", + } + .to_string() +} + +fn highlight_from_db(value: &str) -> HighlightReason { + match value { + "breaking_keyword" => HighlightReason::BreakingKeyword, + "macro_event" => HighlightReason::MacroEvent, + "strong_sentiment" => HighlightReason::StrongSentiment, + "ticker_detected" => HighlightReason::TickerDetected, + _ => HighlightReason::RecentHighValue, + } +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::path::PathBuf; + use std::time::{SystemTime, UNIX_EPOCH}; + + use super::NewsRepository; + use crate::news::types::{ + ClassifiedNewsArticle, HighlightReason, NewsSentiment, NewsSourceConfig, + QueryNewsFeedRequest, UpdateNewsArticleStateRequest, + }; + + #[tokio::test] + async fn upsert_articles_should_preserve_read_and_saved_flags() { + let repository = sample_repository().await; + seed_source(&repository).await; + repository + .upsert_articles(vec![sample_article("article-1", vec!["NVDA".to_string()])]) + .await + .unwrap(); + repository + .update_article_state(UpdateNewsArticleStateRequest { + article_id: "article-1".to_string(), + is_read: Some(true), + is_saved: Some(true), + }) + .await + .unwrap(); + + repository + .upsert_articles(vec![ClassifiedNewsArticle { + headline: "Updated headline".to_string(), + ..sample_article("article-1", vec!["NVDA".to_string()]) + }]) + .await + .unwrap(); + + let response = repository + .query_articles(QueryNewsFeedRequest { + ticker: Some("NVDA".to_string()), + search: None, + only_highlighted: None, + only_saved: Some(true), + only_unread: None, + limit: Some(10), + offset: Some(0), + }) + .await + .unwrap(); + + assert_eq!(response.articles.len(), 1); + assert!(response.articles[0].is_read); + assert!(response.articles[0].is_saved); + } + + #[tokio::test] + async fn query_articles_should_filter_by_ticker_saved_unread_highlight_and_search() { + let repository = sample_repository().await; + seed_source(&repository).await; + repository + .upsert_articles(vec![ + sample_article("article-1", vec!["NVDA".to_string()]), + ClassifiedNewsArticle { + fingerprint: "article-2".to_string(), + headline: "Fed policy update".to_string(), + summary: "Macro event".to_string(), + tickers: vec!["AAPL".to_string()], + highlight_reason: Some(HighlightReason::MacroEvent), + ..sample_article("article-2", vec!["AAPL".to_string()]) + }, + ]) + .await + .unwrap(); + repository + .update_article_state(UpdateNewsArticleStateRequest { + article_id: "article-2".to_string(), + is_read: Some(true), + is_saved: Some(true), + }) + .await + .unwrap(); + + let response = repository + .query_articles(QueryNewsFeedRequest { + ticker: Some("AAPL".to_string()), + search: Some("policy".to_string()), + only_highlighted: Some(true), + only_saved: Some(true), + only_unread: Some(false), + limit: Some(10), + offset: Some(0), + }) + .await + .unwrap(); + + assert_eq!(response.total, 1); + assert_eq!(response.articles[0].id, "article-2"); + } + + #[tokio::test] + async fn new_should_create_schema_on_empty_database() { + let root = unique_test_directory("news-repository"); + let repository = NewsRepository::new(root.join("news.sqlite")).unwrap(); + + let response = repository + .query_articles(QueryNewsFeedRequest { + ticker: None, + search: None, + only_highlighted: None, + only_saved: None, + only_unread: None, + limit: Some(5), + offset: Some(0), + }) + .await + .unwrap(); + + assert_eq!(response.total, 0); + } + + async fn sample_repository() -> NewsRepository { + let root = unique_test_directory("news-repository"); + NewsRepository::new(root.join("news.sqlite")).unwrap() + } + + async fn seed_source(repository: &NewsRepository) { + repository + .sync_sources(vec![NewsSourceConfig { + id: "sample".to_string(), + name: "Sample".to_string(), + url: "https://example.com/feed.xml".to_string(), + refresh_minutes: 15, + }]) + .await + .unwrap(); + } + + fn sample_article(fingerprint: &str, tickers: Vec) -> ClassifiedNewsArticle { + ClassifiedNewsArticle { + fingerprint: fingerprint.to_string(), + source_id: "sample".to_string(), + source: "Sample".to_string(), + headline: "NVIDIA beats estimates".to_string(), + summary: "Strong demand lifts outlook".to_string(), + url: Some("https://example.com/story".to_string()), + canonical_url: Some("https://example.com/story".to_string()), + published_at: "2026-04-08T10:00:00Z".to_string(), + published_ts: 1_775_642_400, + fetched_at: "2026-04-08T10:05:00Z".to_string(), + sentiment: NewsSentiment::Bull, + sentiment_score: 0.64, + highlight_reason: Some(HighlightReason::TickerDetected), + tickers, + } + } + + fn unique_test_directory(prefix: &str) -> PathBuf { + let suffix = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let path = std::env::temp_dir().join(format!("{prefix}-{suffix}")); + fs::create_dir_all(&path).unwrap(); + path + } +} diff --git a/MosaicIQ/src-tauri/src/news/scheduler.rs b/MosaicIQ/src-tauri/src/news/scheduler.rs new file mode 100644 index 0000000..c97b1c5 --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/scheduler.rs @@ -0,0 +1,28 @@ +use std::sync::Arc; +use std::time::Duration; + +use tokio::time; + +use crate::news::service::NewsService; +use crate::news::types::{RefreshNewsFeedRequest, RefreshNewsFeedResult}; + +pub fn spawn_news_scheduler(service: Arc, on_refresh: F) +where + F: Fn(RefreshNewsFeedResult) + Send + Sync + 'static, +{ + let callback = Arc::new(on_refresh); + tokio::spawn(async move { + time::sleep(Duration::from_secs(5)).await; + + loop { + if let Ok(result) = service + .refresh_feed(RefreshNewsFeedRequest { force: Some(false) }) + .await + { + callback(result); + } + + time::sleep(Duration::from_secs(15 * 60)).await; + } + }); +} diff --git a/MosaicIQ/src-tauri/src/news/service.rs b/MosaicIQ/src-tauri/src/news/service.rs new file mode 100644 index 0000000..2182d72 --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/service.rs @@ -0,0 +1,387 @@ +use std::path::PathBuf; +use std::sync::Arc; + +use chrono::Utc; +use futures::stream::{self, StreamExt}; + +use crate::news::classifier::classify_article; +use crate::news::config::load_or_bootstrap_config; +use crate::news::fetcher::FeedFetcher; +use crate::news::parser::parse_feed; +use crate::news::repository::NewsRepository; +use crate::news::types::{ + ArticleUpsertSummary, FeedSourceRecord, FetchResultKind, QueryNewsFeedRequest, + QueryNewsFeedResponse, RefreshNewsFeedRequest, RefreshNewsFeedResult, + UpdateNewsArticleStateRequest, +}; +use crate::news::Result; + +#[derive(Clone)] +pub struct NewsService { + repository: Arc, + fetcher: FeedFetcher, + config_path: PathBuf, + default_config_bytes: Arc>, +} + +impl NewsService { + pub fn new( + db_path: PathBuf, + config_path: PathBuf, + default_config_bytes: &[u8], + ) -> Result { + Self::with_fetcher( + db_path, + config_path, + default_config_bytes, + FeedFetcher::new()?, + ) + } + + pub(crate) fn with_fetcher( + db_path: PathBuf, + config_path: PathBuf, + default_config_bytes: &[u8], + fetcher: FeedFetcher, + ) -> Result { + let feeds = load_or_bootstrap_config(&config_path, default_config_bytes)?; + let repository = Arc::new(NewsRepository::new(db_path)?); + repository.sync_sources_blocking(feeds)?; + + Ok(Self { + repository, + fetcher, + config_path, + default_config_bytes: Arc::new(default_config_bytes.to_vec()), + }) + } + + pub async fn query_feed(&self, request: QueryNewsFeedRequest) -> Result { + self.repository.query_articles(request).await + } + + pub async fn refresh_feed( + &self, + request: RefreshNewsFeedRequest, + ) -> Result { + let force = request.force.unwrap_or(false); + let feeds = load_or_bootstrap_config(&self.config_path, &self.default_config_bytes)?; + self.repository.sync_sources(feeds).await?; + + let now = Utc::now(); + let sources = self.repository.list_sources().await?; + let due_sources = sources + .into_iter() + .filter(|source| force || source.is_due(now)) + .collect::>(); + + if due_sources.is_empty() { + return Ok(RefreshNewsFeedResult { + feeds_checked: 0, + feeds_succeeded: 0, + feeds_failed: 0, + new_articles: 0, + updated_articles: 0, + unchanged_articles: 0, + finished_at: now.to_rfc3339(), + }); + } + + let outcomes = stream::iter(due_sources) + .map(|source| { + let service = self.clone(); + async move { service.refresh_source(source, force).await } + }) + .buffer_unordered(4) + .collect::>() + .await; + + let mut result = RefreshNewsFeedResult { + feeds_checked: 0, + feeds_succeeded: 0, + feeds_failed: 0, + new_articles: 0, + updated_articles: 0, + unchanged_articles: 0, + finished_at: Utc::now().to_rfc3339(), + }; + + for outcome in outcomes { + result.feeds_checked += 1; + if outcome.succeeded { + result.feeds_succeeded += 1; + } else { + result.feeds_failed += 1; + } + result.new_articles += outcome.upsert_summary.new_articles; + result.updated_articles += outcome.upsert_summary.updated_articles; + result.unchanged_articles += outcome.upsert_summary.unchanged_articles; + } + + Ok(result) + } + + pub async fn update_article_state(&self, request: UpdateNewsArticleStateRequest) -> Result<()> { + self.repository.update_article_state(request).await + } + + async fn refresh_source(&self, source: FeedSourceRecord, force: bool) -> RefreshOutcome { + let fetched = match self.fetcher.fetch(&source, force).await { + Ok(value) => value, + Err(error) => { + let checked_at = Utc::now().to_rfc3339(); + let _ = self + .repository + .record_fetch_failure(source.id, checked_at, error.to_string()) + .await; + return RefreshOutcome::failed(); + } + }; + + if fetched.kind == FetchResultKind::NotModified { + let _ = self + .repository + .record_fetch_success( + source.id, + fetched.checked_at, + fetched.etag, + fetched.last_modified, + ) + .await; + return RefreshOutcome::succeeded(ArticleUpsertSummary::default()); + } + + let Some(body) = fetched.body.as_deref() else { + let _ = self + .repository + .record_fetch_failure( + source.id, + fetched.checked_at, + "feed body missing after successful fetch".to_string(), + ) + .await; + return RefreshOutcome::failed(); + }; + + let parsed = match parse_feed(&source, body, &fetched.checked_at) { + Ok(value) => value, + Err(error) => { + let _ = self + .repository + .record_fetch_failure(source.id, fetched.checked_at, error.to_string()) + .await; + return RefreshOutcome::failed(); + } + }; + + let articles = parsed + .articles + .into_iter() + .map(classify_article) + .collect::>(); + let upsert_summary = match self.repository.upsert_articles(articles).await { + Ok(value) => value, + Err(error) => { + let _ = self + .repository + .record_fetch_failure(source.id, fetched.checked_at, error.to_string()) + .await; + return RefreshOutcome::failed(); + } + }; + + let _ = self + .repository + .record_fetch_success( + source.id, + fetched.checked_at, + fetched.etag, + fetched.last_modified, + ) + .await; + RefreshOutcome::succeeded(upsert_summary) + } +} + +struct RefreshOutcome { + succeeded: bool, + upsert_summary: ArticleUpsertSummary, +} + +impl RefreshOutcome { + fn succeeded(upsert_summary: ArticleUpsertSummary) -> Self { + Self { + succeeded: true, + upsert_summary, + } + } + + fn failed() -> Self { + Self { + succeeded: false, + upsert_summary: ArticleUpsertSummary::default(), + } + } +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::time::Duration; + + use tempfile::tempdir; + use wiremock::matchers::{header, method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + use super::NewsService; + use crate::news::fetcher::FeedFetcher; + use crate::news::types::{QueryNewsFeedRequest, RefreshNewsFeedRequest}; + + #[tokio::test] + async fn refresh_feed_should_continue_when_one_feed_times_out() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/ok.xml")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "application/rss+xml") + .set_body_string(include_str!("../../tests/fixtures/news/sample.rss")), + ) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path("/slow.xml")) + .respond_with( + ResponseTemplate::new(200) + .set_delay(Duration::from_millis(150)) + .set_body_string(include_str!("../../tests/fixtures/news/sample.rss")), + ) + .mount(&server) + .await; + + let temp_dir = tempdir().unwrap(); + let config = format!( + r#"{{ + "feeds": [ + {{"id":"ok","name":"OK Feed","url":"{}/ok.xml","refreshMinutes":15}}, + {{"id":"slow","name":"Slow Feed","url":"{}/slow.xml","refreshMinutes":15}} + ] + }}"#, + server.uri(), + server.uri(), + ); + let service = NewsService::with_fetcher( + temp_dir.path().join("news.sqlite"), + temp_dir.path().join("news-feeds.json"), + config.as_bytes(), + FeedFetcher::with_timeout(Duration::from_millis(50)).unwrap(), + ) + .unwrap(); + + let result = service + .refresh_feed(RefreshNewsFeedRequest { force: Some(true) }) + .await + .unwrap(); + let response = service + .query_feed(QueryNewsFeedRequest { + ticker: None, + search: None, + only_highlighted: None, + only_saved: None, + only_unread: None, + limit: Some(10), + offset: Some(0), + }) + .await + .unwrap(); + + assert_eq!(result.feeds_checked, 2); + assert_eq!(result.feeds_succeeded, 1); + assert_eq!(result.feeds_failed, 1); + assert_eq!(response.total, 2); + } + + #[tokio::test] + async fn refresh_source_should_use_conditional_get_headers_after_initial_sync() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/etag.xml")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("etag", "v1") + .set_body_string(include_str!("../../tests/fixtures/news/sample.rss")), + ) + .mount(&server) + .await; + + let temp_dir = tempdir().unwrap(); + let config = format!( + r#"{{"feeds":[{{"id":"etag","name":"ETag Feed","url":"{}/etag.xml","refreshMinutes":15}}]}}"#, + server.uri(), + ); + let service = NewsService::with_fetcher( + temp_dir.path().join("news.sqlite"), + temp_dir.path().join("news-feeds.json"), + config.as_bytes(), + FeedFetcher::with_timeout(Duration::from_millis(100)).unwrap(), + ) + .unwrap(); + + service + .refresh_feed(RefreshNewsFeedRequest { force: Some(true) }) + .await + .unwrap(); + + server.reset().await; + Mock::given(method("GET")) + .and(path("/etag.xml")) + .and(header("if-none-match", "v1")) + .respond_with(ResponseTemplate::new(304)) + .mount(&server) + .await; + + let source = service + .repository + .list_sources() + .await + .unwrap() + .into_iter() + .next() + .unwrap(); + let outcome = service.refresh_source(source, false).await; + + assert!(outcome.succeeded); + assert_eq!(outcome.upsert_summary.new_articles, 0); + } + + #[tokio::test] + async fn startup_should_create_database_schema_on_empty_path() { + let temp_dir = tempdir().unwrap(); + let db_path = temp_dir.path().join("news.sqlite"); + let config_path = temp_dir.path().join("news-feeds.json"); + + let service = NewsService::new( + db_path.clone(), + config_path, + br#"{"feeds":[{"id":"sample","name":"Sample Feed","url":"https://example.com/feed.xml","refreshMinutes":15}]}"#, + ) + .unwrap(); + + let metadata = fs::metadata(db_path).unwrap(); + let response = service + .query_feed(QueryNewsFeedRequest { + ticker: None, + search: None, + only_highlighted: None, + only_saved: None, + only_unread: None, + limit: Some(10), + offset: Some(0), + }) + .await + .unwrap(); + + assert!(metadata.is_file()); + assert_eq!(response.total, 0); + } +} diff --git a/MosaicIQ/src-tauri/src/news/types.rs b/MosaicIQ/src-tauri/src/news/types.rs new file mode 100644 index 0000000..22f9464 --- /dev/null +++ b/MosaicIQ/src-tauri/src/news/types.rs @@ -0,0 +1,225 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +pub enum NewsSentiment { + Bull, + Bear, + Neutral, +} + +impl Default for NewsSentiment { + fn default() -> Self { + Self::Neutral + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum HighlightReason { + BreakingKeyword, + MacroEvent, + StrongSentiment, + TickerDetected, + RecentHighValue, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct NewsArticle { + pub id: String, + pub source_id: String, + pub source: String, + pub headline: String, + pub summary: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub canonical_url: Option, + pub published_at: String, + pub published_ts: i64, + pub fetched_at: String, + pub sentiment: NewsSentiment, + pub sentiment_score: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub highlight_reason: Option, + pub tickers: Vec, + pub is_read: bool, + pub is_saved: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct QueryNewsFeedRequest { + #[serde(skip_serializing_if = "Option::is_none")] + pub ticker: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub search: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub only_highlighted: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub only_saved: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub only_unread: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub limit: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub offset: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct QueryNewsFeedResponse { + pub articles: Vec, + pub total: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_synced_at: Option, + pub sources: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "camelCase")] +pub struct RefreshNewsFeedRequest { + #[serde(skip_serializing_if = "Option::is_none")] + pub force: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct RefreshNewsFeedResult { + pub feeds_checked: usize, + pub feeds_succeeded: usize, + pub feeds_failed: usize, + pub new_articles: usize, + pub updated_articles: usize, + pub unchanged_articles: usize, + pub finished_at: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct UpdateNewsArticleStateRequest { + pub article_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_read: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_saved: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct NewsSourceConfig { + pub id: String, + pub name: String, + pub url: String, + pub refresh_minutes: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct NewsSourceConfigFile { + pub feeds: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct NewsSourceStatus { + pub id: String, + pub name: String, + pub url: String, + pub refresh_minutes: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_checked_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_success_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_error: Option, + pub failure_count: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct FeedSourceRecord { + pub id: String, + pub name: String, + pub url: String, + pub refresh_minutes: u32, + pub etag: Option, + pub last_modified: Option, + pub last_checked_at: Option, + pub last_success_at: Option, + pub last_error: Option, + pub failure_count: u32, +} + +impl FeedSourceRecord { + pub(crate) fn is_due(&self, now: DateTime) -> bool { + let Some(last_checked_at) = self.last_checked_at.as_deref() else { + return true; + }; + + DateTime::parse_from_rfc3339(last_checked_at) + .map(|value| value.with_timezone(&Utc)) + .map(|value| now >= value + chrono::Duration::minutes(i64::from(self.refresh_minutes))) + .unwrap_or(true) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct ParsedFeed { + pub articles: Vec, + pub malformed_entries: usize, +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct ParsedNewsArticle { + pub source_id: String, + pub source: String, + pub headline: String, + pub summary: String, + pub url: Option, + pub canonical_url: Option, + pub published_at: String, + pub published_ts: i64, + pub fetched_at: String, +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct ClassifiedNewsArticle { + pub fingerprint: String, + pub source_id: String, + pub source: String, + pub headline: String, + pub summary: String, + pub url: Option, + pub canonical_url: Option, + pub published_at: String, + pub published_ts: i64, + pub fetched_at: String, + pub sentiment: NewsSentiment, + pub sentiment_score: f64, + pub highlight_reason: Option, + pub tickers: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum FetchResultKind { + Updated, + NotModified, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct FetchedFeed { + pub kind: FetchResultKind, + pub body: Option, + pub etag: Option, + pub last_modified: Option, + pub checked_at: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub(crate) struct ArticleUpsertSummary { + pub new_articles: usize, + pub updated_articles: usize, + pub unchanged_articles: usize, +} diff --git a/MosaicIQ/src-tauri/src/research/ai.rs b/MosaicIQ/src-tauri/src/research/ai.rs new file mode 100644 index 0000000..802eb3c --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/ai.rs @@ -0,0 +1,85 @@ +//! AI enrichment abstraction with a deterministic fallback implementation. + +use crate::research::heuristics::classify_note; +use crate::research::types::{ModelInfo, NoteType, ResearchNote, SourceKind, ValuationRef}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct AiEnrichmentResult { + pub note_type: Option, + pub annotation: Option, + pub tags: Vec, + pub risks: Vec, + pub catalysts: Vec, + pub valuation_refs: Vec, + pub missing_evidence: bool, +} + +pub(crate) trait ResearchAiGateway: Send + Sync { + fn enrich_note(&self, note: &ResearchNote, model_info: Option) -> AiEnrichmentResult; +} + +#[derive(Debug, Clone, Default)] +pub(crate) struct DeterministicResearchAiGateway; + +impl ResearchAiGateway for DeterministicResearchAiGateway { + fn enrich_note(&self, note: &ResearchNote, _model_info: Option) -> AiEnrichmentResult { + let heuristic = classify_note( + ¬e.cleaned_text, + note.provenance.source_kind, + Some(note.note_type), + ); + let annotation = Some(build_annotation(note)); + + AiEnrichmentResult { + note_type: Some(heuristic.note_type), + annotation, + tags: heuristic.tags, + risks: heuristic.risks, + catalysts: heuristic.catalysts, + valuation_refs: if note.valuation_refs.is_empty() { + heuristic.valuation_refs + } else { + note.valuation_refs.clone() + }, + missing_evidence: note.source_id.is_none() + && !matches!(note.note_type, NoteType::Question | NoteType::FollowUpTask | NoteType::SourceReference), + } + } +} + +fn build_annotation(note: &ResearchNote) -> String { + let kind = match note.note_type { + NoteType::ManagementSignal => "Management is signaling a directional read that should be checked against operating evidence.", + NoteType::ValuationPoint => "This note matters if the valuation frame can be tied to a concrete operating driver.", + NoteType::Risk => "This note points to downside that should be sized and sourced before it informs conviction.", + NoteType::Catalyst => "This note suggests a possible stock-moving trigger and should be paired with timing evidence.", + NoteType::Fact => "This datapoint is most useful when linked directly into a claim, risk, or valuation bridge.", + NoteType::Quote => "Treat the quote as evidence, then separate the analyst interpretation into linked notes.", + NoteType::Claim | NoteType::Thesis | NoteType::SubThesis => "This statement is an inference until supporting evidence and explicit counterpoints are attached.", + NoteType::ChannelCheck => "This datapoint is potentially informative but should be kept caveated unless corroborated.", + _ => "This note may be more valuable once it is linked into a driver, risk, catalyst, or source trail.", + }; + + if note.source_id.is_none() && !matches!(note.note_type, NoteType::Question | NoteType::FollowUpTask) { + format!("{kind} Evidence is still missing or indirect.") + } else { + kind.to_string() + } +} + +pub(crate) fn build_model_info(model: &str, task_profile: &str) -> Option { + if model.trim().is_empty() { + return None; + } + + Some(ModelInfo { + task_profile: task_profile.to_string(), + model: model.to_string(), + provider: Some("remote".to_string()), + }) +} + +#[allow(dead_code)] +fn _kind_from_source(note: &ResearchNote) -> SourceKind { + note.provenance.source_kind +} diff --git a/MosaicIQ/src-tauri/src/research/errors.rs b/MosaicIQ/src-tauri/src/research/errors.rs new file mode 100644 index 0000000..8dd72c3 --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/errors.rs @@ -0,0 +1,32 @@ +//! Research subsystem error definitions. + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ResearchError { + #[error("research I/O failed: {0}")] + Io(#[from] std::io::Error), + #[error("research database failed: {0}")] + Db(#[from] rusqlite::Error), + #[error("research JSON failed: {0}")] + Json(#[from] serde_json::Error), + #[error("research HTTP failed: {0}")] + Http(#[from] reqwest::Error), + #[error("research task join failed: {0}")] + Join(String), + #[error("research workspace not found: {0}")] + WorkspaceNotFound(String), + #[error("research note not found: {0}")] + NoteNotFound(String), + #[error("research ghost note not found: {0}")] + GhostNoteNotFound(String), + #[error("research job not found: {0}")] + JobNotFound(String), + #[error("research validation failed: {0}")] + Validation(String), + #[error("research AI gateway failed: {0}")] + Ai(String), +} + +pub type Result = std::result::Result; + diff --git a/MosaicIQ/src-tauri/src/research/events.rs b/MosaicIQ/src-tauri/src/research/events.rs new file mode 100644 index 0000000..cf83c3e --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/events.rs @@ -0,0 +1,33 @@ +//! Tauri event helpers for the research subsystem. + +use serde::Serialize; +use tauri::{AppHandle, Emitter, Runtime}; + +#[derive(Debug, Clone)] +pub struct ResearchEventEmitter { + app_handle: AppHandle, +} + +impl ResearchEventEmitter { + pub fn new(app_handle: &AppHandle) -> Self { + Self { + app_handle: app_handle.clone(), + } + } + + pub fn workspace_updated(&self, payload: &T) { + let _ = self.app_handle.emit("research_workspace_updated", payload); + } + + pub fn note_updated(&self, payload: &T) { + let _ = self.app_handle.emit("research_note_updated", payload); + } + + pub fn ghost_updated(&self, payload: &T) { + let _ = self.app_handle.emit("research_ghost_updated", payload); + } + + pub fn job_updated(&self, payload: &T) { + let _ = self.app_handle.emit("research_job_updated", payload); + } +} diff --git a/MosaicIQ/src-tauri/src/research/export.rs b/MosaicIQ/src-tauri/src/research/export.rs new file mode 100644 index 0000000..55dc23a --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/export.rs @@ -0,0 +1,53 @@ +//! Memo and bundle export helpers. + +use serde_json::json; + +use crate::research::projections::build_memo_blocks; +use crate::research::types::{ + AuditEvent, GhostNote, NoteLink, ResearchBundleExport, ResearchNote, ResearchWorkspace, + SourceRecord, +}; + +pub(crate) fn export_bundle( + workspace: ResearchWorkspace, + notes: Vec, + links: Vec, + ghosts: Vec, + sources: Vec, + audit_events: Vec, +) -> ResearchBundleExport { + let memo_blocks = build_memo_blocks(¬es, &ghosts); + let markdown_memo = memo_blocks + .iter() + .map(|block| { + format!( + "## {}\n\n{}\n\nSources: {}\n", + block.headline, + block.body, + block.citation_refs.join(", ") + ) + }) + .collect::>() + .join("\n"); + + let json_bundle = json!({ + "workspace": workspace, + "notes": notes, + "links": links, + "ghosts": ghosts, + "sources": sources, + "auditEvents": audit_events, + "memoBlocks": memo_blocks, + }); + + ResearchBundleExport { + workspace: workspace.clone(), + notes: notes.clone(), + links: links.clone(), + ghosts: ghosts.clone(), + sources: sources.clone(), + audit_events: audit_events.clone(), + markdown_memo, + json_bundle, + } +} diff --git a/MosaicIQ/src-tauri/src/research/ghosts.rs b/MosaicIQ/src-tauri/src/research/ghosts.rs new file mode 100644 index 0000000..e52f0e0 --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/ghosts.rs @@ -0,0 +1,385 @@ +//! Provisional synthesis generation over note clusters and tensions. + +use std::collections::{BTreeMap, BTreeSet, HashSet}; + +use crate::research::types::{ + GhostLifecycleState, GhostNote, GhostNoteClass, GhostTone, GhostVisibilityState, LinkType, + MemoSectionKind, NoteLink, NoteType, ResearchNote, ResearchWorkspace, +}; +use crate::research::util::{now_rfc3339, sha256_hex}; + +pub(crate) fn generate_ghost_notes( + workspace: &ResearchWorkspace, + notes: &[ResearchNote], + links: &[NoteLink], +) -> Vec { + let mut ghosts = Vec::new(); + ghosts.extend(generate_missing_evidence_prompts(workspace, notes, links)); + ghosts.extend(generate_contradiction_alerts(workspace, notes, links)); + ghosts.extend(generate_candidate_risks(workspace, notes)); + ghosts.extend(generate_candidate_catalysts(workspace, notes)); + ghosts.extend(generate_valuation_bridges(workspace, notes)); + if let Some(thesis) = generate_candidate_thesis(workspace, notes, links) { + ghosts.push(thesis); + } + + rank_and_limit_visibility(&mut ghosts); + ghosts +} + +fn generate_missing_evidence_prompts( + workspace: &ResearchWorkspace, + notes: &[ResearchNote], + links: &[NoteLink], +) -> Vec { + notes.iter() + .filter(|note| matches!(note.note_type, NoteType::Claim | NoteType::Thesis | NoteType::SubThesis)) + .filter(|note| { + !links.iter().any(|link| { + link.from_note_id == note.id && matches!(link.link_type, LinkType::SourcedBy | LinkType::Supports) + }) + }) + .map(|note| ghost( + workspace, + GhostNoteClass::MissingEvidencePrompt, + vec![note.id.clone()], + Vec::new(), + note.source_id.iter().cloned().collect(), + "Missing evidence for claim".to_string(), + format!( + "Observed: this claim is currently unsupported by a linked source note. Inference: the argument may be directionally useful but should not be treated as evidence yet. What would confirm/refute: add a filing, transcript, article, or model-backed source." + ), + 0.42, + false, + GhostVisibilityState::Collapsed, + None, + )) + .collect() +} + +fn generate_contradiction_alerts( + workspace: &ResearchWorkspace, + notes: &[ResearchNote], + links: &[NoteLink], +) -> Vec { + let notes_by_id = notes.iter().map(|note| (note.id.as_str(), note)).collect::>(); + links.iter() + .filter(|link| matches!(link.link_type, LinkType::Contradicts | LinkType::ManagementVsReality)) + .filter_map(|link| { + let left = notes_by_id.get(link.from_note_id.as_str())?; + let right = notes_by_id.get(link.to_note_id.as_str())?; + Some(ghost( + workspace, + GhostNoteClass::ContradictionAlert, + vec![left.id.clone(), right.id.clone()], + vec![right.id.clone()], + collect_source_ids([*left, *right]), + "Contradiction alert".to_string(), + format!( + "Observed: {}. Inference: this conflicts with {}. What would confirm/refute: reconcile the newer datapoint, source freshness, and management framing before treating either statement as settled.", + left.cleaned_text, right.cleaned_text + ), + 0.86, + true, + GhostVisibilityState::Visible, + Some(MemoSectionKind::RiskRegister), + )) + }) + .collect() +} + +fn generate_candidate_risks(workspace: &ResearchWorkspace, notes: &[ResearchNote]) -> Vec { + let risk_notes = notes + .iter() + .filter(|note| matches!(note.note_type, NoteType::Risk | NoteType::Contradiction | NoteType::ChannelCheck)) + .collect::>(); + if risk_notes.len() < 3 { + return Vec::new(); + } + + let supporting_ids = risk_notes.iter().map(|note| note.id.clone()).collect::>(); + vec![ghost( + workspace, + GhostNoteClass::CandidateRisk, + supporting_ids, + Vec::new(), + collect_source_ids(risk_notes.into_iter()), + "Possible emerging risk cluster".to_string(), + "Observed: several notes point to downside pressure around the same operating area. Inference: this may represent an investable risk theme, but it should remain provisional until the source trail is tightened. What would confirm/refute: corroborate with fresh operating evidence or management disclosures.".to_string(), + 0.73, + true, + GhostVisibilityState::Visible, + Some(MemoSectionKind::RiskRegister), + )] +} + +fn generate_candidate_catalysts(workspace: &ResearchWorkspace, notes: &[ResearchNote]) -> Vec { + let catalyst_notes = notes + .iter() + .filter(|note| matches!(note.note_type, NoteType::Catalyst | NoteType::EventTakeaway | NoteType::ManagementSignal)) + .collect::>(); + if catalyst_notes.len() < 2 { + return Vec::new(); + } + + let supporting_ids = catalyst_notes.iter().map(|note| note.id.clone()).collect::>(); + vec![ghost( + workspace, + GhostNoteClass::CandidateCatalyst, + supporting_ids, + Vec::new(), + collect_source_ids(catalyst_notes.into_iter()), + "Possible catalyst cluster".to_string(), + "Observed: multiple notes point toward an identifiable event or operating trigger. Inference: this could matter for the next stock move if timing and evidence quality hold. What would confirm/refute: map the catalyst to dated milestones and an observable KPI.".to_string(), + 0.7, + true, + GhostVisibilityState::Visible, + Some(MemoSectionKind::CatalystCalendar), + )] +} + +fn generate_valuation_bridges(workspace: &ResearchWorkspace, notes: &[ResearchNote]) -> Vec { + let valuation_notes = notes + .iter() + .filter(|note| note.note_type == NoteType::ValuationPoint) + .collect::>(); + let driver_notes = notes + .iter() + .filter(|note| matches!(note.note_type, NoteType::IndustryObservation | NoteType::ManagementSignal | NoteType::Fact | NoteType::Catalyst)) + .collect::>(); + + if valuation_notes.len() < 2 || driver_notes.is_empty() { + return Vec::new(); + } + + let mut support = valuation_notes.iter().map(|note| note.id.clone()).collect::>(); + support.extend(driver_notes.iter().take(2).map(|note| note.id.clone())); + vec![ghost( + workspace, + GhostNoteClass::ValuationBridge, + support, + Vec::new(), + collect_source_ids(valuation_notes.into_iter().chain(driver_notes.into_iter())), + "Possible valuation bridge".to_string(), + "Observed: valuation notes point to a discount while operating notes suggest a driver that could narrow that gap. Inference: there may be a rerating bridge if the operating evidence persists. What would confirm/refute: track the KPI that should transmit into multiple expansion.".to_string(), + 0.76, + true, + GhostVisibilityState::Visible, + Some(MemoSectionKind::ValuationWriteUp), + )] +} + +fn generate_candidate_thesis( + workspace: &ResearchWorkspace, + notes: &[ResearchNote], + links: &[NoteLink], +) -> Option { + let source_count = notes.iter().filter_map(|note| note.source_id.as_ref()).collect::>().len(); + let family_count = notes + .iter() + .map(|note| note.note_type) + .collect::>() + .len(); + let corroborated_count = notes + .iter() + .filter(|note| matches!(note.note_type, NoteType::Fact | NoteType::Quote | NoteType::ManagementSignal | NoteType::ValuationPoint)) + .count(); + let has_catalyst_or_valuation = notes.iter().any(|note| matches!(note.note_type, NoteType::Catalyst | NoteType::ValuationPoint)); + let has_unresolved_contradiction = links.iter().any(|link| { + matches!(link.link_type, LinkType::Contradicts | LinkType::ManagementVsReality) && link.confidence > 0.75 + }); + + if notes.len() < 4 + || family_count < 2 + || source_count < 2 + || corroborated_count < 2 + || !has_catalyst_or_valuation + { + return None; + } + + let headline = if has_unresolved_contradiction { + "Possible thesis emerging, but tension remains" + } else { + "Possible candidate thesis" + }; + let body = if has_unresolved_contradiction { + "Observed: enough connected evidence exists to suggest an investable pattern, but at least one unresolved contradiction remains. Inference: a thesis may be forming, though conviction should stay tempered until the conflict is resolved. What would confirm/refute: close the contradiction with fresher operating evidence." + } else { + "Observed: multiple notes across evidence, catalyst, and valuation categories are pointing in the same direction. Inference: a coherent thesis may be emerging, though it should remain provisional until explicitly accepted by the analyst. What would confirm/refute: one more corroborating datapoint tied to the key driver." + }; + + Some(ghost( + workspace, + GhostNoteClass::CandidateThesis, + notes.iter().take(6).map(|note| note.id.clone()).collect(), + Vec::new(), + collect_source_ids(notes.iter()), + headline.to_string(), + body.to_string(), + if has_unresolved_contradiction { 0.68 } else { 0.82 }, + !has_unresolved_contradiction, + GhostVisibilityState::Visible, + Some(MemoSectionKind::InvestmentMemo), + )) +} + +fn rank_and_limit_visibility(ghosts: &mut [GhostNote]) { + ghosts.sort_by(|left, right| right.confidence.total_cmp(&left.confidence)); + let mut visible_count = 0usize; + for ghost in ghosts { + if matches!(ghost.visibility_state, GhostVisibilityState::Visible | GhostVisibilityState::Pinned) { + if visible_count >= 3 { + ghost.visibility_state = GhostVisibilityState::Hidden; + ghost.state = GhostLifecycleState::Generated; + } else { + visible_count += 1; + ghost.state = GhostLifecycleState::Visible; + } + } + } +} + +fn ghost( + workspace: &ResearchWorkspace, + ghost_class: GhostNoteClass, + supporting_note_ids: Vec, + contradicting_note_ids: Vec, + source_ids: Vec, + headline: String, + body: String, + confidence: f32, + evidence_threshold_met: bool, + visibility_state: GhostVisibilityState, + memo_section_hint: Option, +) -> GhostNote { + let now = now_rfc3339(); + let mut key_parts = BTreeSet::new(); + key_parts.extend(supporting_note_ids.iter().cloned()); + key_parts.extend(contradicting_note_ids.iter().cloned()); + let ghost_key = format!("{ghost_class:?}-{}", key_parts.into_iter().collect::>().join(",")); + + GhostNote { + id: format!("ghost-{}", &sha256_hex(&ghost_key)[..16]), + workspace_id: workspace.id.clone(), + ghost_class, + headline, + body, + tone: GhostTone::Tentative, + confidence, + visibility_state, + state: GhostLifecycleState::Generated, + supporting_note_ids, + contradicting_note_ids, + source_ids, + evidence_threshold_met, + created_at: now.clone(), + updated_at: now, + superseded_by_ghost_id: None, + promoted_note_id: None, + memo_section_hint, + } +} + +fn collect_source_ids<'a>(notes: impl IntoIterator) -> Vec { + notes + .into_iter() + .filter_map(|note| note.source_id.clone()) + .collect::>() + .into_iter() + .collect() +} + +#[cfg(test)] +mod tests { + use crate::research::types::{ + AnalystStatus, EvidenceStatus, GhostStatus, NotePriority, NoteProvenance, NoteType, + ProvenanceActor, ResearchNote, ResearchWorkspace, ThesisStatus, WorkspaceScope, + WorkspaceViewKind, + }; + use crate::research::util::{now_rfc3339, sha256_hex}; + + use super::generate_ghost_notes; + + fn workspace() -> ResearchWorkspace { + let now = now_rfc3339(); + ResearchWorkspace { + id: "workspace-1".to_string(), + name: "AAPL".to_string(), + primary_ticker: "AAPL".to_string(), + scope: WorkspaceScope::SingleCompany, + stage: crate::research::types::ResearchStage::Thesis, + default_view: WorkspaceViewKind::ThesisBuilder, + pinned_note_ids: Vec::new(), + archived: false, + created_at: now.clone(), + updated_at: now, + } + } + + fn note(id: &str, note_type: NoteType, source_id: Option<&str>) -> ResearchNote { + let now = now_rfc3339(); + ResearchNote { + id: id.to_string(), + workspace_id: "workspace-1".to_string(), + company_id: None, + ticker: Some("AAPL".to_string()), + source_id: source_id.map(ToOwned::to_owned), + raw_text: id.to_string(), + cleaned_text: id.to_string(), + title: None, + note_type, + subtype: None, + analyst_status: AnalystStatus::Captured, + ai_annotation: None, + confidence: 0.8, + evidence_status: EvidenceStatus::SourceLinked, + inferred_links: Vec::new(), + ghost_status: GhostStatus::None, + thesis_status: ThesisStatus::None, + created_at: now.clone(), + updated_at: now.clone(), + provenance: NoteProvenance { + created_by: ProvenanceActor::Manual, + capture_method: crate::research::types::CaptureMethod::QuickEntry, + source_kind: crate::research::types::SourceKind::Manual, + origin_note_id: None, + origin_ghost_id: None, + model_info: None, + created_at: now, + raw_input_hash: sha256_hex(id), + }, + tags: Vec::new(), + catalysts: Vec::new(), + risks: Vec::new(), + valuation_refs: Vec::new(), + time_horizon: None, + scenario: None, + priority: NotePriority::Normal, + pinned: false, + archived: false, + revision: 1, + source_excerpt: None, + last_enriched_at: None, + last_linked_at: None, + stale_reason: None, + superseded_by_note_id: None, + } + } + + #[test] + fn generate_ghost_notes_should_surface_candidate_thesis_when_evidence_threshold_met() { + let ghosts = generate_ghost_notes( + &workspace(), + &[ + note("fact-1", NoteType::Fact, Some("source-1")), + note("quote-1", NoteType::Quote, Some("source-2")), + note("catalyst-1", NoteType::Catalyst, Some("source-2")), + note("valuation-1", NoteType::ValuationPoint, Some("source-1")), + ], + &[], + ); + + assert!(ghosts.iter().any(|ghost| ghost.ghost_class == crate::research::types::GhostNoteClass::CandidateThesis)); + } +} diff --git a/MosaicIQ/src-tauri/src/research/grounding.rs b/MosaicIQ/src-tauri/src/research/grounding.rs new file mode 100644 index 0000000..197379e --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/grounding.rs @@ -0,0 +1,176 @@ +//! Source grounding helpers for notes and citations. + +use regex::Regex; +use serde_json::json; + +use crate::research::heuristics::derive_title; +use crate::research::types::{ + AnalystStatus, EvidenceStatus, FreshnessBucket, NotePriority, NoteProvenance, NoteType, + ResearchNote, SourceExcerpt, SourceKind, SourceRecord, SourceReferenceInput, +}; +use crate::research::util::{generate_id, now_rfc3339, sha256_hex}; + +pub(crate) fn build_source_record( + workspace_id: &str, + ticker: Option<&str>, + input: &SourceReferenceInput, +) -> SourceRecord { + let now = now_rfc3339(); + let title = input + .title + .clone() + .or_else(|| input.url.as_deref().map(derive_title_from_url)) + .unwrap_or_else(|| "Attached source".to_string()); + + SourceRecord { + id: generate_id("source"), + workspace_id: workspace_id.to_string(), + kind: input.kind, + ticker: ticker.map(ToOwned::to_owned), + title, + publisher: input.url.as_deref().and_then(extract_publisher), + url: input.url.clone(), + canonical_url: input.url.clone(), + filing_accession: input.filing_accession.clone(), + form_type: input.form_type.clone(), + published_at: input.published_at.clone(), + as_of_date: input.published_at.clone(), + ingested_at: now, + freshness_bucket: FreshnessBucket::Fresh, + checksum: input.url.as_deref().map(sha256_hex), + metadata_json: json!({ + "kind": input.kind, + "locationLabel": input.location_label, + }), + superseded_by_source_id: None, + } +} + +pub(crate) fn build_source_reference_note( + workspace_id: &str, + ticker: Option<&str>, + source: &SourceRecord, + excerpt: Option<&SourceExcerpt>, +) -> ResearchNote { + let now = now_rfc3339(); + let raw_text = format!( + "{}{}", + source.title, + source + .url + .as_deref() + .map(|url| format!(" ({url})")) + .unwrap_or_default() + ); + + ResearchNote { + id: generate_id("note"), + workspace_id: workspace_id.to_string(), + company_id: None, + ticker: ticker.map(ToOwned::to_owned), + source_id: Some(source.id.clone()), + raw_text: raw_text.clone(), + cleaned_text: raw_text, + title: derive_title(&source.title, NoteType::SourceReference), + note_type: NoteType::SourceReference, + subtype: Some(format!("{:?}", source.kind).to_ascii_lowercase()), + analyst_status: AnalystStatus::Accepted, + ai_annotation: None, + confidence: 1.0, + evidence_status: EvidenceStatus::SourceLinked, + inferred_links: Vec::new(), + ghost_status: crate::research::types::GhostStatus::None, + thesis_status: crate::research::types::ThesisStatus::None, + created_at: now.clone(), + updated_at: now.clone(), + provenance: NoteProvenance { + created_by: crate::research::types::ProvenanceActor::Import, + capture_method: crate::research::types::CaptureMethod::ManualLink, + source_kind: source.kind, + origin_note_id: None, + origin_ghost_id: None, + model_info: None, + created_at: now, + raw_input_hash: sha256_hex(&source.title), + }, + tags: vec!["source".to_string()], + catalysts: Vec::new(), + risks: Vec::new(), + valuation_refs: Vec::new(), + time_horizon: None, + scenario: None, + priority: NotePriority::Low, + pinned: false, + archived: false, + revision: 1, + source_excerpt: excerpt.cloned(), + last_enriched_at: None, + last_linked_at: None, + stale_reason: None, + superseded_by_note_id: None, + } +} + +pub(crate) fn source_excerpt_from_input(source_id: &str, input: &SourceReferenceInput) -> Option { + if input.excerpt_text.is_none() && input.location_label.is_none() { + return None; + } + + Some(SourceExcerpt { + source_id: source_id.to_string(), + excerpt_text: input.excerpt_text.clone(), + location_label: input.location_label.clone(), + start_offset: None, + end_offset: None, + }) +} + +pub(crate) async fn refresh_source_metadata(source: &SourceRecord) -> crate::research::Result { + let Some(url) = source.url.as_deref() else { + return Ok(source.clone()); + }; + + let body = reqwest::get(url).await?.text().await?; + let title = extract_html_title(&body).unwrap_or_else(|| source.title.clone()); + + let mut refreshed = source.clone(); + refreshed.title = title; + refreshed.publisher = extract_publisher(url); + refreshed.metadata_json = json!({ + "kind": refreshed.kind, + "refreshedFromUrl": url, + }); + Ok(refreshed) +} + +fn derive_title_from_url(url: &str) -> String { + let mut trimmed = url + .trim_start_matches("https://") + .trim_start_matches("http://") + .trim_end_matches('/'); + if let Some((host, path)) = trimmed.split_once('/') { + trimmed = if path.is_empty() { host } else { path }; + } + + trimmed.replace('-', " ") +} + +fn extract_publisher(url: &str) -> Option { + let without_scheme = url + .trim_start_matches("https://") + .trim_start_matches("http://"); + let host = without_scheme.split('/').next().unwrap_or_default(); + if host.is_empty() { + None + } else { + Some(host.to_string()) + } +} + +fn extract_html_title(body: &str) -> Option { + let regex = Regex::new(r"(?is)(.*?)").expect("title regex should compile"); + regex + .captures(body) + .and_then(|captures| captures.get(1).map(|value| value.as_str().trim().to_string())) + .filter(|value| !value.is_empty()) +} diff --git a/MosaicIQ/src-tauri/src/research/heuristics.rs b/MosaicIQ/src-tauri/src/research/heuristics.rs new file mode 100644 index 0000000..1409b60 --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/heuristics.rs @@ -0,0 +1,249 @@ +//! Deterministic first-pass note typing and extraction rules. + +use regex::Regex; + +use crate::research::types::{ + NotePriority, NoteType, ScenarioKind, SourceKind, TimeHorizon, ValuationRef, +}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct HeuristicTypingResult { + pub note_type: NoteType, + pub subtype: Option, + pub confidence: f32, + pub tags: Vec, + pub catalysts: Vec, + pub risks: Vec, + pub valuation_refs: Vec, + pub time_horizon: Option, + pub scenario: Option, + pub priority: NotePriority, +} + +pub(crate) fn classify_note( + cleaned_text: &str, + source_kind: SourceKind, + override_type: Option, +) -> HeuristicTypingResult { + if let Some(note_type) = override_type { + let mut result = baseline_result(note_type, 0.99); + result.tags = extract_tags(cleaned_text); + result.valuation_refs = extract_valuation_refs(cleaned_text); + return result; + } + + let lower = cleaned_text.to_ascii_lowercase(); + let mut result = if looks_like_quote(cleaned_text, source_kind) { + baseline_result(NoteType::Quote, 0.92) + } else if lower.contains("management says") + || lower.contains("mgmt says") + || (matches!(source_kind, SourceKind::Transcript) && has_any(&lower, &["expect", "seeing", "confident", "guidance"])) + { + baseline_result(NoteType::ManagementSignal, 0.83) + } else if has_any(&lower, &["ev/ebitda", "p/e", "fcf yield", "multiple", "price target", "rerating", "valuation"]) { + baseline_result(NoteType::ValuationPoint, 0.87) + } else if has_any(&lower, &["risk", "downside", "headwind", "pressure", "weakness", "inventory"]) { + baseline_result(NoteType::Risk, 0.78) + } else if has_any(&lower, &["catalyst", "launch", "approval", "guidance", "next quarter", "earnings", "rerating"]) { + baseline_result(NoteType::Catalyst, 0.74) + } else if has_any(&lower, &["if ", "assume", "base case", "bull case", "bear case", "scenario"]) { + baseline_result(NoteType::ScenarioAssumption, 0.8) + } else if cleaned_text.ends_with('?') || has_any(&lower, &["what if", "why is", "how does", "question"]) { + baseline_result(NoteType::Question, 0.91) + } else if has_any(&lower, &["channel check", "retail", "sell-through", "inventory in channel"]) { + baseline_result(NoteType::ChannelCheck, 0.82) + } else if has_any(&lower, &["peer", "vs ", "versus", "relative to", "competitor"]) { + baseline_result(NoteType::CompetitorComparison, 0.77) + } else if has_any(&lower, &["industry", "category", "market", "sector"]) { + baseline_result(NoteType::IndustryObservation, 0.73) + } else if has_any(&lower, &["thesis", "stock can", "we think", "market is missing"]) { + baseline_result(NoteType::Thesis, 0.71) + } else if has_any(&lower, &["follow up", "check", "verify", "ask ir", "need to"]) { + baseline_result(NoteType::FollowUpTask, 0.85) + } else if has_any(&lower, &["call takeaway", "takeaway", "earnings recap", "event"]) { + baseline_result(NoteType::EventTakeaway, 0.76) + } else if looks_like_fact(cleaned_text, source_kind) { + baseline_result(NoteType::Fact, 0.72) + } else { + baseline_result(NoteType::Claim, 0.58) + }; + + result.tags = extract_tags(cleaned_text); + result.catalysts = extract_keyword_bucket(cleaned_text, &["launch", "approval", "guidance", "margin", "enterprise demand"]); + result.risks = extract_keyword_bucket(cleaned_text, &["inventory", "pricing", "churn", "competition", "demand softness"]); + result.valuation_refs = extract_valuation_refs(cleaned_text); + result.time_horizon = infer_time_horizon(&lower); + result.scenario = infer_scenario(&lower); + result.priority = infer_priority(&result.note_type, &lower); + result +} + +pub(crate) fn derive_title(cleaned_text: &str, note_type: NoteType) -> Option { + if cleaned_text.is_empty() { + return None; + } + + let prefix = match note_type { + NoteType::Risk => "Risk", + NoteType::Catalyst => "Catalyst", + NoteType::ValuationPoint => "Valuation", + NoteType::ManagementSignal => "Mgmt", + NoteType::Question => "Question", + NoteType::Contradiction => "Conflict", + NoteType::FollowUpTask => "Follow up", + _ => "Note", + }; + + Some(format!("{prefix}: {}", crate::research::util::clean_title(cleaned_text, 72))) +} + +pub(crate) fn detect_urls(text: &str) -> Vec { + let regex = Regex::new(r"https?://[^\s)]+").expect("URL regex should compile"); + regex + .find_iter(text) + .map(|capture| capture.as_str().trim_end_matches('.').to_string()) + .collect() +} + +pub(crate) fn extract_tickers(text: &str) -> Vec { + let regex = Regex::new(r"\b[A-Z]{2,5}\b").expect("ticker regex should compile"); + regex + .find_iter(text) + .map(|capture| capture.as_str().to_string()) + .collect() +} + +fn baseline_result(note_type: NoteType, confidence: f32) -> HeuristicTypingResult { + HeuristicTypingResult { + note_type, + subtype: None, + confidence, + tags: Vec::new(), + catalysts: Vec::new(), + risks: Vec::new(), + valuation_refs: Vec::new(), + time_horizon: None, + scenario: None, + priority: NotePriority::Normal, + } +} + +fn looks_like_quote(cleaned_text: &str, source_kind: SourceKind) -> bool { + cleaned_text.contains('\"') || matches!(source_kind, SourceKind::Transcript) && cleaned_text.contains(':') +} + +fn looks_like_fact(cleaned_text: &str, source_kind: SourceKind) -> bool { + let lower = cleaned_text.to_ascii_lowercase(); + matches!(source_kind, SourceKind::Filing | SourceKind::Transcript | SourceKind::Article) + || has_any(&lower, &["reported", "was", "were", "increased", "decreased"]) + || Regex::new(r"\b\d+(\.\d+)?(%|x|bps|m|bn)?\b") + .expect("fact regex should compile") + .is_match(cleaned_text) +} + +fn extract_tags(cleaned_text: &str) -> Vec { + let mut tags = Vec::new(); + let lower = cleaned_text.to_ascii_lowercase(); + for tag in [ + "margin", + "demand", + "inventory", + "pricing", + "guidance", + "subscription", + "enterprise", + "valuation", + "peer", + ] { + if lower.contains(tag) { + tags.push(tag.to_string()); + } + } + tags +} + +fn extract_keyword_bucket(cleaned_text: &str, keywords: &[&str]) -> Vec { + let lower = cleaned_text.to_ascii_lowercase(); + keywords + .iter() + .filter(|keyword| lower.contains(**keyword)) + .map(|keyword| (*keyword).to_string()) + .collect() +} + +fn extract_valuation_refs(cleaned_text: &str) -> Vec { + let regex = + Regex::new(r"(?P\d+(\.\d+)?)x\s+(?P[A-Za-z/]+)").expect("valuation regex should compile"); + regex + .captures_iter(cleaned_text) + .map(|captures| ValuationRef { + metric: captures["metric"].to_string(), + multiple: captures["multiple"].parse::().ok(), + unit: Some("x".to_string()), + basis: None, + }) + .collect() +} + +fn infer_time_horizon(lower: &str) -> Option { + if has_any(lower, &["next quarter", "next half", "next earnings"]) { + Some(TimeHorizon::NextEarnings) + } else if has_any(lower, &["12 month", "twelve month", "next year"]) { + Some(TimeHorizon::TwelveMonth) + } else if has_any(lower, &["multi-year", "long term", "3 year", "5 year"]) { + Some(TimeHorizon::MultiYear) + } else if has_any(lower, &["near term", "this quarter"]) { + Some(TimeHorizon::NearTerm) + } else { + None + } +} + +fn infer_scenario(lower: &str) -> Option { + if lower.contains("bull case") { + Some(ScenarioKind::Bull) + } else if lower.contains("bear case") { + Some(ScenarioKind::Bear) + } else if lower.contains("downside") { + Some(ScenarioKind::DownsideCase) + } else if lower.contains("upside") { + Some(ScenarioKind::UpsideCase) + } else if lower.contains("base case") { + Some(ScenarioKind::Base) + } else { + None + } +} + +fn infer_priority(note_type: &NoteType, lower: &str) -> NotePriority { + if matches!(note_type, NoteType::Contradiction | NoteType::Risk) && has_any(lower, &["material", "severe", "significant"]) { + NotePriority::Critical + } else if matches!(note_type, NoteType::Risk | NoteType::Catalyst | NoteType::Thesis | NoteType::ManagementSignal) { + NotePriority::High + } else { + NotePriority::Normal + } +} + +fn has_any(input: &str, needles: &[&str]) -> bool { + needles.iter().any(|needle| input.contains(needle)) +} + +#[cfg(test)] +mod tests { + use crate::research::types::{NoteType, SourceKind, TimeHorizon}; + + use super::classify_note; + + #[test] + fn classify_note_should_flag_management_signal_for_transcript_language() { + let result = classify_note( + "Mgmt says enterprise demand improved sequentially and expects gross margin to exit above 70% next half.", + SourceKind::Transcript, + None, + ); + + assert_eq!(result.note_type, NoteType::ManagementSignal); + assert_eq!(result.time_horizon, Some(TimeHorizon::NextEarnings)); + } +} diff --git a/MosaicIQ/src-tauri/src/research/links.rs b/MosaicIQ/src-tauri/src/research/links.rs new file mode 100644 index 0000000..a78dc7e --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/links.rs @@ -0,0 +1,300 @@ +//! Deterministic relationship inference across research notes. + +use std::collections::{BTreeSet, HashSet}; + +use crate::research::types::{ + EvidenceBasis, LinkOrigin, LinkStrength, LinkType, NoteLink, NoteType, ResearchNote, +}; +use crate::research::util::{generate_id, now_rfc3339}; + +pub(crate) fn infer_links(notes: &[ResearchNote]) -> Vec { + let mut links = Vec::new(); + + for left_index in 0..notes.len() { + let left = ¬es[left_index]; + for right in ¬es[(left_index + 1)..] { + if left.workspace_id != right.workspace_id || left.archived || right.archived { + continue; + } + + if let Some(link) = infer_pair(left, right) { + links.push(link); + } + + if let Some(link) = infer_reverse_pair(left, right) { + links.push(link); + } + } + } + + links +} + +fn infer_pair(left: &ResearchNote, right: &ResearchNote) -> Option { + if left.note_type != NoteType::SourceReference + && right.note_type == NoteType::SourceReference + && left.source_id.as_deref() == right.source_id.as_deref() + && left.source_id.is_some() + { + return Some(build_link(left, right, LinkType::SourcedBy, 0.98, LinkStrength::Strong, EvidenceBasis::SharedSource)); + } + + if left.note_type == NoteType::ValuationPoint && is_valuation_dependency_target(right) && shares_keywords(left, right) { + return Some(build_link(left, right, LinkType::ValuationDependsOn, 0.8, LinkStrength::Strong, EvidenceBasis::Lexical)); + } + + if left.note_type == NoteType::Risk && is_thesis_family(right) && shares_keywords(left, right) { + return Some(build_link(left, right, LinkType::RiskTo, 0.76, LinkStrength::Strong, EvidenceBasis::Lexical)); + } + + if left.note_type == NoteType::Catalyst && is_thesis_family(right) && shares_keywords(left, right) { + return Some(build_link(left, right, LinkType::CatalystFor, 0.78, LinkStrength::Strong, EvidenceBasis::Temporal)); + } + + if left.note_type == NoteType::ScenarioAssumption && is_assumption_target(right) && shares_keywords(left, right) { + return Some(build_link(left, right, LinkType::AssumptionFor, 0.75, LinkStrength::Strong, EvidenceBasis::Structured)); + } + + if is_evidence_note(left) && is_claim_family(right) && shares_keywords(left, right) { + if signals_contradiction(left, right) { + return Some(build_link(left, right, LinkType::Contradicts, 0.79, LinkStrength::Critical, EvidenceBasis::Lexical)); + } + return Some(build_link(left, right, LinkType::Supports, 0.72, LinkStrength::Strong, EvidenceBasis::Lexical)); + } + + if left.note_type == NoteType::ManagementSignal + && matches!(right.note_type, NoteType::Fact | NoteType::ChannelCheck | NoteType::EventTakeaway) + && shares_keywords(left, right) + && signals_contradiction(left, right) + { + return Some(build_link(left, right, LinkType::ManagementVsReality, 0.9, LinkStrength::Critical, EvidenceBasis::Temporal)); + } + + if left.note_type == right.note_type + && left.ticker == right.ticker + && text_similarity(left, right) > 0.94 + && left.revision != right.revision + { + let link_type = if left.updated_at <= right.updated_at { + LinkType::Updates + } else { + LinkType::Supersedes + }; + return Some(build_link(left, right, link_type, 0.7, LinkStrength::Medium, EvidenceBasis::Structured)); + } + + if shares_keywords(left, right) + && left.time_horizon.is_some() + && right.time_horizon.is_some() + && left.time_horizon != right.time_horizon + { + return Some(build_link(left, right, LinkType::TimeframeConflict, 0.67, LinkStrength::Medium, EvidenceBasis::Temporal)); + } + + None +} + +fn infer_reverse_pair(left: &ResearchNote, right: &ResearchNote) -> Option { + if left.note_type == NoteType::ManagementSignal + && matches!(right.note_type, NoteType::Fact | NoteType::ChannelCheck | NoteType::EventTakeaway) + && shares_keywords(left, right) + && signals_contradiction(left, right) + { + return Some(build_link(right, left, LinkType::Contradicts, 0.84, LinkStrength::Critical, EvidenceBasis::Temporal)); + } + + None +} + +fn build_link( + from: &ResearchNote, + to: &ResearchNote, + link_type: LinkType, + confidence: f32, + strength: LinkStrength, + evidence_basis: EvidenceBasis, +) -> NoteLink { + let now = now_rfc3339(); + NoteLink { + id: generate_id("link"), + workspace_id: from.workspace_id.clone(), + from_note_id: from.id.clone(), + to_note_id: to.id.clone(), + link_type, + directional: !matches!(link_type, LinkType::TimeframeConflict), + confidence, + strength, + evidence_basis, + created_by: LinkOrigin::Heuristic, + created_at: now.clone(), + updated_at: now, + source_revision_pair: (from.revision, to.revision), + stale: false, + stale_reason: None, + } +} + +fn is_claim_family(note: &ResearchNote) -> bool { + matches!( + note.note_type, + NoteType::Claim + | NoteType::Thesis + | NoteType::SubThesis + | NoteType::Risk + | NoteType::Catalyst + | NoteType::MosaicInsight + | NoteType::ManagementSignal + ) +} + +fn is_evidence_note(note: &ResearchNote) -> bool { + matches!( + note.note_type, + NoteType::Fact + | NoteType::Quote + | NoteType::EventTakeaway + | NoteType::ChannelCheck + | NoteType::IndustryObservation + | NoteType::CompetitorComparison + | NoteType::ManagementSignal + ) +} + +fn is_thesis_family(note: &ResearchNote) -> bool { + matches!(note.note_type, NoteType::Thesis | NoteType::SubThesis | NoteType::Claim | NoteType::MosaicInsight) +} + +fn is_assumption_target(note: &ResearchNote) -> bool { + matches!(note.note_type, NoteType::ValuationPoint | NoteType::Thesis | NoteType::SubThesis | NoteType::Risk) +} + +fn is_valuation_dependency_target(note: &ResearchNote) -> bool { + matches!( + note.note_type, + NoteType::Fact + | NoteType::Catalyst + | NoteType::ScenarioAssumption + | NoteType::IndustryObservation + | NoteType::ManagementSignal + | NoteType::Claim + ) +} + +fn shares_keywords(left: &ResearchNote, right: &ResearchNote) -> bool { + let left_words = significant_words(&left.cleaned_text); + let right_words = significant_words(&right.cleaned_text); + + left_words.intersection(&right_words).count() >= 2 + || left.tags.iter().any(|tag| right.tags.contains(tag)) + || left.ticker == right.ticker && left.ticker.is_some() +} + +fn signals_contradiction(left: &ResearchNote, right: &ResearchNote) -> bool { + let negative = ["decline", "weak", "pressure", "discount", "inventory", "miss", "soft"]; + let positive = ["improve", "improved", "normalized", "strong", "reaccelerat", "above"]; + let left_lower = left.cleaned_text.to_ascii_lowercase(); + let right_lower = right.cleaned_text.to_ascii_lowercase(); + + has_any(&left_lower, &positive) && has_any(&right_lower, &negative) + || has_any(&left_lower, &negative) && has_any(&right_lower, &positive) +} + +fn significant_words(text: &str) -> HashSet { + let stop_words = BTreeSet::from([ + "the", "and", "for", "with", "that", "from", "this", "next", "says", "said", + "have", "has", "into", "above", "below", "about", "quarter", "company", + ]); + + text.to_ascii_lowercase() + .split(|character: char| !character.is_ascii_alphanumeric()) + .filter(|value| value.len() >= 4 && !stop_words.contains(*value)) + .map(ToOwned::to_owned) + .collect() +} + +fn text_similarity(left: &ResearchNote, right: &ResearchNote) -> f32 { + let left_words = significant_words(&left.cleaned_text); + let right_words = significant_words(&right.cleaned_text); + if left_words.is_empty() || right_words.is_empty() { + return 0.0; + } + + let intersection = left_words.intersection(&right_words).count() as f32; + let union = left_words.union(&right_words).count() as f32; + intersection / union +} + +fn has_any(input: &str, needles: &[&str]) -> bool { + needles.iter().any(|needle| input.contains(needle)) +} + +#[cfg(test)] +mod tests { + use crate::research::types::{ + AnalystStatus, EvidenceStatus, GhostStatus, NotePriority, NoteProvenance, NoteType, + ProvenanceActor, ResearchNote, ThesisStatus, + }; + use crate::research::util::{now_rfc3339, sha256_hex}; + + use super::infer_links; + + fn note(id: &str, note_type: NoteType, text: &str) -> ResearchNote { + let now = now_rfc3339(); + ResearchNote { + id: id.to_string(), + workspace_id: "workspace-1".to_string(), + company_id: None, + ticker: Some("AAPL".to_string()), + source_id: None, + raw_text: text.to_string(), + cleaned_text: text.to_string(), + title: None, + note_type, + subtype: None, + analyst_status: AnalystStatus::Captured, + ai_annotation: None, + confidence: 0.8, + evidence_status: EvidenceStatus::Unsourced, + inferred_links: Vec::new(), + ghost_status: GhostStatus::None, + thesis_status: ThesisStatus::None, + created_at: now.clone(), + updated_at: now.clone(), + provenance: NoteProvenance { + created_by: ProvenanceActor::Manual, + capture_method: crate::research::types::CaptureMethod::QuickEntry, + source_kind: crate::research::types::SourceKind::Manual, + origin_note_id: None, + origin_ghost_id: None, + model_info: None, + created_at: now, + raw_input_hash: sha256_hex(text), + }, + tags: Vec::new(), + catalysts: Vec::new(), + risks: Vec::new(), + valuation_refs: Vec::new(), + time_horizon: None, + scenario: None, + priority: NotePriority::Normal, + pinned: false, + archived: false, + revision: 1, + source_excerpt: None, + last_enriched_at: None, + last_linked_at: None, + stale_reason: None, + superseded_by_note_id: None, + } + } + + #[test] + fn infer_links_should_create_management_vs_reality_for_conflicting_notes() { + let links = infer_links(&[ + note("mgmt", NoteType::ManagementSignal, "Management says inventory is now normalized."), + note("fact", NoteType::Fact, "Inventory days increased 12% sequentially and discounting remains elevated."), + ]); + + assert!(links.iter().any(|link| link.link_type == crate::research::types::LinkType::ManagementVsReality)); + } +} diff --git a/MosaicIQ/src-tauri/src/research/mod.rs b/MosaicIQ/src-tauri/src/research/mod.rs new file mode 100644 index 0000000..2185c58 --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/mod.rs @@ -0,0 +1,29 @@ +//! Local-first equity research workspace subsystem. + +mod ai; +mod events; +mod export; +mod errors; +mod ghosts; +mod grounding; +mod heuristics; +mod links; +mod pipeline; +mod projections; +mod repository; +mod service; +mod types; +mod util; + +pub use errors::{ResearchError, Result}; +pub use events::ResearchEventEmitter; +pub use pipeline::spawn_research_scheduler; +pub use service::ResearchService; +pub use types::{ + ArchiveResearchNoteRequest, AuditEvent, CaptureResearchNoteRequest, CreateResearchWorkspaceRequest, + ExportResearchBundleRequest, GetNoteAuditTrailRequest, GetWorkspaceProjectionRequest, + GhostNote, ListNoteLinksRequest, ListResearchNotesRequest, ListWorkspaceGhostNotesRequest, + MemoBlockCandidate, NoteAuditTrail, NoteCaptureResult, NoteLink, PipelineJob, + PromoteNoteToThesisRequest, ResearchBundleExport, ResearchNote, ResearchWorkspace, + RetryResearchJobsRequest, ReviewGhostNoteRequest, WorkspaceProjection, +}; diff --git a/MosaicIQ/src-tauri/src/research/pipeline.rs b/MosaicIQ/src-tauri/src/research/pipeline.rs new file mode 100644 index 0000000..9d0c6f0 --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/pipeline.rs @@ -0,0 +1,120 @@ +//! Persisted background job queue and scheduler helpers. + +use std::sync::Arc; +use std::time::Duration; + +use serde_json::json; +use tauri::Runtime; + +use crate::research::repository::ResearchRepository; +use crate::research::types::{JobKind, JobStatus, PipelineJob}; +use crate::research::util::{generate_id, now_rfc3339}; + +#[derive(Clone)] +pub struct ResearchPipeline { + repository: Arc, +} + +impl ResearchPipeline { + pub fn new(repository: Arc) -> Self { + Self { repository } + } + + pub async fn enqueue_capture_jobs( + &self, + workspace_id: &str, + note_id: &str, + revision: u32, + refresh_source_id: Option, + ) -> crate::research::Result> { + let mut jobs = vec![ + new_job(workspace_id, note_id, JobKind::EnrichNote, json!({ "noteId": note_id, "expectedRevision": revision })), + new_job(workspace_id, note_id, JobKind::InferLinks, json!({ "workspaceId": workspace_id, "noteId": note_id, "expectedRevision": revision })), + new_job(workspace_id, note_id, JobKind::EvaluateDuplicates, json!({ "workspaceId": workspace_id, "noteId": note_id, "expectedRevision": revision })), + new_job(workspace_id, note_id, JobKind::EvaluateGhosts, json!({ "workspaceId": workspace_id })), + ]; + if let Some(source_id) = refresh_source_id { + jobs.push(new_job( + workspace_id, + &source_id, + JobKind::RefreshSourceMetadata, + json!({ "sourceId": source_id }), + )); + } + + self.repository.enqueue_jobs(jobs).await + } + + pub async fn mark_running(&self, mut job: PipelineJob) -> crate::research::Result { + job.status = JobStatus::Running; + job.attempt_count += 1; + job.updated_at = now_rfc3339(); + self.repository.save_job(job).await + } + + pub async fn mark_completed(&self, mut job: PipelineJob) -> crate::research::Result { + job.status = JobStatus::Completed; + job.last_error = None; + job.next_attempt_at = None; + job.updated_at = now_rfc3339(); + self.repository.save_job(job).await + } + + pub async fn mark_skipped(&self, mut job: PipelineJob, reason: &str) -> crate::research::Result { + job.status = JobStatus::Skipped; + job.last_error = Some(reason.to_string()); + job.next_attempt_at = None; + job.updated_at = now_rfc3339(); + self.repository.save_job(job).await + } + + pub async fn mark_failed(&self, mut job: PipelineJob, error: &str) -> crate::research::Result { + job.status = JobStatus::Failed; + job.last_error = Some(error.to_string()); + job.next_attempt_at = Some(next_retry_timestamp(job.attempt_count + 1)); + job.updated_at = now_rfc3339(); + self.repository.save_job(job).await + } + + pub async fn due_jobs(&self, limit: usize) -> crate::research::Result> { + self.repository.list_due_jobs(limit).await + } +} + +pub fn spawn_research_scheduler( + service: Arc>, +) { + tauri::async_runtime::spawn(async move { + loop { + let _ = service.process_due_jobs().await; + tokio::time::sleep(Duration::from_secs(3)).await; + } + }); +} + +fn new_job(workspace_id: &str, entity_id: &str, job_kind: JobKind, payload_json: serde_json::Value) -> PipelineJob { + let now = now_rfc3339(); + PipelineJob { + id: generate_id("job"), + workspace_id: workspace_id.to_string(), + entity_id: entity_id.to_string(), + job_kind, + status: JobStatus::Queued, + attempt_count: 0, + max_attempts: 3, + next_attempt_at: None, + last_error: None, + payload_json, + created_at: now.clone(), + updated_at: now, + } +} + +fn next_retry_timestamp(attempt_number: u32) -> String { + let seconds = match attempt_number { + 0 | 1 => 30, + 2 => 5 * 60, + _ => 30 * 60, + }; + (chrono::Utc::now() + chrono::Duration::seconds(i64::from(seconds))).to_rfc3339() +} diff --git a/MosaicIQ/src-tauri/src/research/projections.rs b/MosaicIQ/src-tauri/src/research/projections.rs new file mode 100644 index 0000000..f82c987 --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/projections.rs @@ -0,0 +1,239 @@ +//! Read-model projections for frontend workspace views. + +use crate::research::types::{ + GhostNote, KanbanColumn, MemoBlockCandidate, MemoSectionKind, NoteLink, NoteType, ResearchNote, + ResearchWorkspace, TimelineEvent, WorkspaceProjection, WorkspaceViewKind, GraphEdge, GraphNode, +}; + +pub(crate) fn build_workspace_projection( + workspace: ResearchWorkspace, + requested_view: WorkspaceViewKind, + notes: Vec, + links: Vec, + ghosts: Vec, +) -> WorkspaceProjection { + let memo_blocks = build_memo_blocks(¬es, &ghosts); + let graph_nodes = notes + .iter() + .map(|note| GraphNode { + id: note.id.clone(), + label: note.title.clone().unwrap_or_else(|| note.cleaned_text.clone()), + kind: format!("{:?}", note.note_type).to_ascii_lowercase(), + confidence: note.confidence, + evidence_status: note.evidence_status, + }) + .collect(); + let graph_edges = links + .iter() + .map(|link| GraphEdge { + id: link.id.clone(), + from: link.from_note_id.clone(), + to: link.to_note_id.clone(), + link_type: link.link_type, + strength: link.strength, + confidence: link.confidence, + }) + .collect(); + let kanban_columns = build_kanban_columns(¬es); + let timeline_events = build_timeline(¬es, &ghosts); + + WorkspaceProjection { + workspace, + active_view: requested_view, + notes, + links, + ghosts, + memo_blocks, + graph_nodes, + graph_edges, + kanban_columns, + timeline_events, + } +} + +pub(crate) fn build_memo_blocks(notes: &[ResearchNote], ghosts: &[GhostNote]) -> Vec { + let mut blocks = notes + .iter() + .filter(|note| { + !note.archived + && !matches!(note.note_type, NoteType::Question | NoteType::FollowUpTask | NoteType::SourceReference) + && !matches!(note.evidence_status, crate::research::types::EvidenceStatus::Unsourced) + }) + .filter_map(|note| { + let section_kind = section_for_note(note.note_type)?; + Some(MemoBlockCandidate { + section_kind, + headline: note.title.clone().unwrap_or_else(|| note.cleaned_text.clone()), + body: note.ai_annotation.clone().unwrap_or_else(|| note.cleaned_text.clone()), + source_note_ids: vec![note.id.clone()], + citation_refs: note.source_id.iter().cloned().collect(), + confidence: note.confidence, + accepted: matches!( + note.thesis_status, + crate::research::types::ThesisStatus::AcceptedSupport + | crate::research::types::ThesisStatus::AcceptedCore + | crate::research::types::ThesisStatus::BullCase + | crate::research::types::ThesisStatus::BearCase + ), + }) + }) + .collect::>(); + + blocks.extend( + ghosts + .iter() + .filter(|ghost| matches!(ghost.state, crate::research::types::GhostLifecycleState::Accepted | crate::research::types::GhostLifecycleState::Converted)) + .filter_map(|ghost| { + Some(MemoBlockCandidate { + section_kind: ghost.memo_section_hint?, + headline: ghost.headline.clone(), + body: ghost.body.clone(), + source_note_ids: ghost.supporting_note_ids.clone(), + citation_refs: ghost.source_ids.clone(), + confidence: ghost.confidence, + accepted: true, + }) + }), + ); + + blocks +} + +fn build_kanban_columns(notes: &[ResearchNote]) -> Vec { + let mut columns = Vec::new(); + for note_type in [ + NoteType::Fact, + NoteType::ManagementSignal, + NoteType::Claim, + NoteType::Risk, + NoteType::Catalyst, + NoteType::ValuationPoint, + NoteType::Question, + NoteType::SourceReference, + ] { + columns.push(KanbanColumn { + key: format!("{note_type:?}").to_ascii_lowercase(), + label: format!("{note_type:?}").replace("Point", "").replace("Signal", " Signal"), + notes: notes + .iter() + .filter(|note| note.note_type == note_type && !note.archived) + .cloned() + .collect(), + }); + } + columns +} + +fn build_timeline(notes: &[ResearchNote], ghosts: &[GhostNote]) -> Vec { + let mut timeline = notes + .iter() + .filter(|note| matches!(note.note_type, NoteType::EventTakeaway | NoteType::Catalyst | NoteType::ManagementSignal)) + .map(|note| TimelineEvent { + id: note.id.clone(), + label: note.title.clone().unwrap_or_else(|| note.cleaned_text.clone()), + note_id: note.id.clone(), + at: note.source_excerpt.as_ref().and_then(|excerpt| excerpt.location_label.clone()), + }) + .collect::>(); + timeline.extend( + ghosts + .iter() + .filter(|ghost| matches!(ghost.ghost_class, crate::research::types::GhostNoteClass::ContradictionAlert)) + .map(|ghost| TimelineEvent { + id: ghost.id.clone(), + label: ghost.headline.clone(), + note_id: ghost.supporting_note_ids.first().cloned().unwrap_or_else(|| ghost.id.clone()), + at: None, + }), + ); + timeline +} + +fn section_for_note(note_type: NoteType) -> Option { + match note_type { + NoteType::Thesis | NoteType::SubThesis | NoteType::MosaicInsight => Some(MemoSectionKind::InvestmentMemo), + NoteType::Risk | NoteType::Contradiction => Some(MemoSectionKind::RiskRegister), + NoteType::Catalyst => Some(MemoSectionKind::CatalystCalendar), + NoteType::ValuationPoint | NoteType::ScenarioAssumption => Some(MemoSectionKind::ValuationWriteUp), + NoteType::EventTakeaway => Some(MemoSectionKind::EarningsRecap), + NoteType::ChannelCheck => Some(MemoSectionKind::WatchlistUpdate), + NoteType::Fact + | NoteType::Quote + | NoteType::ManagementSignal + | NoteType::Claim + | NoteType::IndustryObservation + | NoteType::CompetitorComparison => Some(MemoSectionKind::StockPitch), + NoteType::Question | NoteType::FollowUpTask | NoteType::SourceReference | NoteType::Uncertainty => None, + } +} + +#[cfg(test)] +mod tests { + use crate::research::types::{EvidenceStatus, MemoSectionKind, NoteType}; + + use super::build_memo_blocks; + + fn note(note_type: NoteType, evidence_status: EvidenceStatus) -> crate::research::types::ResearchNote { + let now = crate::research::util::now_rfc3339(); + crate::research::types::ResearchNote { + id: format!("note-{note_type:?}"), + workspace_id: "workspace-1".to_string(), + company_id: None, + ticker: Some("AAPL".to_string()), + source_id: Some("source-1".to_string()), + raw_text: "sample".to_string(), + cleaned_text: "sample".to_string(), + title: Some("Sample".to_string()), + note_type, + subtype: None, + analyst_status: crate::research::types::AnalystStatus::Accepted, + ai_annotation: Some("annotation".to_string()), + confidence: 0.8, + evidence_status, + inferred_links: Vec::new(), + ghost_status: crate::research::types::GhostStatus::None, + thesis_status: crate::research::types::ThesisStatus::AcceptedSupport, + created_at: now.clone(), + updated_at: now.clone(), + provenance: crate::research::types::NoteProvenance { + created_by: crate::research::types::ProvenanceActor::Manual, + capture_method: crate::research::types::CaptureMethod::QuickEntry, + source_kind: crate::research::types::SourceKind::Manual, + origin_note_id: None, + origin_ghost_id: None, + model_info: None, + created_at: now, + raw_input_hash: "hash".to_string(), + }, + tags: Vec::new(), + catalysts: Vec::new(), + risks: Vec::new(), + valuation_refs: Vec::new(), + time_horizon: None, + scenario: None, + priority: crate::research::types::NotePriority::Normal, + pinned: false, + archived: false, + revision: 1, + source_excerpt: None, + last_enriched_at: None, + last_linked_at: None, + stale_reason: None, + superseded_by_note_id: None, + } + } + + #[test] + fn build_memo_blocks_should_exclude_unsourced_questions() { + let blocks = build_memo_blocks( + &[ + note(NoteType::Question, EvidenceStatus::Unsourced), + note(NoteType::Fact, EvidenceStatus::Corroborated), + ], + &[], + ); + + assert_eq!(blocks.len(), 1); + assert_eq!(blocks[0].section_kind, MemoSectionKind::StockPitch); + } +} diff --git a/MosaicIQ/src-tauri/src/research/repository.rs b/MosaicIQ/src-tauri/src/research/repository.rs new file mode 100644 index 0000000..ab045dd --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/repository.rs @@ -0,0 +1,897 @@ +//! SQLite-backed persistence for research workspaces, notes, links, ghosts, and jobs. + +use std::path::{Path, PathBuf}; + +use rusqlite::{params, Connection, OptionalExtension}; + +use crate::research::errors::{ResearchError, Result}; +use crate::research::types::{ + AuditEvent, GhostLifecycleState, GhostNote, GhostVisibilityState, JobKind, JobStatus, NoteLink, + NoteType, PipelineJob, ResearchNote, ResearchWorkspace, SourceRecord, +}; + +#[derive(Clone)] +pub struct ResearchRepository { + db_path: PathBuf, +} + +impl ResearchRepository { + pub fn new(db_path: PathBuf) -> Result { + if let Some(parent) = db_path.parent() { + std::fs::create_dir_all(parent)?; + } + + let repository = Self { db_path }; + let connection = repository.open_connection()?; + repository.initialize_schema(&connection)?; + Ok(repository) + } + + pub async fn create_workspace(&self, workspace: ResearchWorkspace) -> Result { + let value = workspace.clone(); + self.with_connection(move |connection| { + connection.execute( + "INSERT INTO research_workspaces ( + id, name, primary_ticker, stage, default_view, archived, created_at, updated_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + params![ + value.id, + value.name, + value.primary_ticker, + serde_json::to_string(&value.stage)?, + serde_json::to_string(&value.default_view)?, + i64::from(value.archived), + value.created_at, + value.updated_at, + serde_json::to_string(&value)?, + ], + )?; + Ok(value) + }) + .await + } + + pub async fn save_workspace(&self, workspace: ResearchWorkspace) -> Result { + let value = workspace.clone(); + self.with_connection(move |connection| { + connection.execute( + "INSERT INTO research_workspaces ( + id, name, primary_ticker, stage, default_view, archived, created_at, updated_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9) + ON CONFLICT(id) DO UPDATE SET + name = excluded.name, + primary_ticker = excluded.primary_ticker, + stage = excluded.stage, + default_view = excluded.default_view, + archived = excluded.archived, + updated_at = excluded.updated_at, + entity_json = excluded.entity_json", + params![ + value.id, + value.name, + value.primary_ticker, + serde_json::to_string(&value.stage)?, + serde_json::to_string(&value.default_view)?, + i64::from(value.archived), + value.created_at, + value.updated_at, + serde_json::to_string(&value)?, + ], + )?; + Ok(value) + }) + .await + } + + pub async fn list_workspaces(&self) -> Result> { + self.with_connection(|connection| { + let mut statement = connection.prepare( + "SELECT entity_json + FROM research_workspaces + WHERE archived = 0 + ORDER BY updated_at DESC", + )?; + let rows = statement.query_map([], |row| row.get::<_, String>(0))?; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + pub async fn get_workspace(&self, workspace_id: &str) -> Result { + let workspace_id = workspace_id.to_string(); + self.with_connection(move |connection| { + let json = connection + .query_row( + "SELECT entity_json FROM research_workspaces WHERE id = ?1", + params![workspace_id], + |row| row.get::<_, String>(0), + ) + .optional()? + .ok_or_else(|| ResearchError::WorkspaceNotFound(workspace_id.clone()))?; + Ok(serde_json::from_str(&json)?) + }) + .await + } + + pub async fn create_note(&self, note: ResearchNote) -> Result { + self.save_note(note).await + } + + pub async fn save_note(&self, note: ResearchNote) -> Result { + let value = note.clone(); + self.with_connection(move |connection| { + connection.execute( + "INSERT INTO research_notes ( + id, workspace_id, note_type, ticker, source_id, archived, pinned, revision, + evidence_status, created_at, updated_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12) + ON CONFLICT(id) DO UPDATE SET + workspace_id = excluded.workspace_id, + note_type = excluded.note_type, + ticker = excluded.ticker, + source_id = excluded.source_id, + archived = excluded.archived, + pinned = excluded.pinned, + revision = excluded.revision, + evidence_status = excluded.evidence_status, + updated_at = excluded.updated_at, + entity_json = excluded.entity_json", + params![ + value.id, + value.workspace_id, + serde_json::to_string(&value.note_type)?, + value.ticker, + value.source_id, + i64::from(value.archived), + i64::from(value.pinned), + i64::from(value.revision), + serde_json::to_string(&value.evidence_status)?, + value.created_at, + value.updated_at, + serde_json::to_string(&value)?, + ], + )?; + connection.execute( + "DELETE FROM research_fts WHERE note_id = ?1", + params![value.id.clone()], + )?; + connection.execute( + "INSERT INTO research_fts (note_id, title, cleaned_text, ai_annotation) + VALUES (?1, ?2, ?3, ?4)", + params![ + value.id, + value.title.clone().unwrap_or_default(), + value.cleaned_text, + value.ai_annotation.unwrap_or_default(), + ], + )?; + Ok(value) + }) + .await + } + + pub async fn get_note(&self, note_id: &str) -> Result { + let note_id = note_id.to_string(); + self.with_connection(move |connection| { + let json = connection + .query_row( + "SELECT entity_json FROM research_notes WHERE id = ?1", + params![note_id], + |row| row.get::<_, String>(0), + ) + .optional()? + .ok_or_else(|| ResearchError::NoteNotFound(note_id.clone()))?; + Ok(serde_json::from_str(&json)?) + }) + .await + } + + pub async fn list_notes( + &self, + workspace_id: &str, + include_archived: bool, + note_type: Option, + ) -> Result> { + let workspace_id = workspace_id.to_string(); + self.with_connection(move |connection| { + let mut statement = if note_type.is_some() { + connection.prepare( + "SELECT entity_json FROM research_notes + WHERE workspace_id = ?1 AND (?2 = 1 OR archived = 0) AND note_type = ?3 + ORDER BY pinned DESC, updated_at DESC", + )? + } else { + connection.prepare( + "SELECT entity_json FROM research_notes + WHERE workspace_id = ?1 AND (?2 = 1 OR archived = 0) + ORDER BY pinned DESC, updated_at DESC", + )? + }; + + let note_type_json = note_type + .map(|value| serde_json::to_string(&value)) + .transpose()?; + let rows = if let Some(note_type_json) = note_type_json { + statement.query_map( + params![workspace_id, i64::from(include_archived), note_type_json], + |row| row.get::<_, String>(0), + )? + } else { + statement.query_map(params![workspace_id, i64::from(include_archived)], |row| { + row.get::<_, String>(0) + })? + }; + + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + pub async fn archive_note(&self, note_id: &str, archived: bool) -> Result { + let mut note = self.get_note(note_id).await?; + note.archived = archived; + note.updated_at = crate::research::util::now_rfc3339(); + self.save_note(note).await + } + + pub async fn find_source_reference_note( + &self, + workspace_id: &str, + source_id: &str, + ) -> Result> { + let workspace_id = workspace_id.to_string(); + let source_id = source_id.to_string(); + self.with_connection(move |connection| { + let json = connection + .query_row( + "SELECT entity_json + FROM research_notes + WHERE workspace_id = ?1 AND source_id = ?2 AND note_type = ?3 + LIMIT 1", + params![ + workspace_id, + source_id, + serde_json::to_string(&NoteType::SourceReference)?, + ], + |row| row.get::<_, String>(0), + ) + .optional()?; + json.map(|value| serde_json::from_str(&value).map_err(ResearchError::from)) + .transpose() + }) + .await + } + + pub async fn save_source(&self, source: SourceRecord) -> Result { + let value = source.clone(); + self.with_connection(move |connection| { + connection.execute( + "INSERT INTO source_records ( + id, workspace_id, ticker, kind, published_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6) + ON CONFLICT(id) DO UPDATE SET + workspace_id = excluded.workspace_id, + ticker = excluded.ticker, + kind = excluded.kind, + published_at = excluded.published_at, + entity_json = excluded.entity_json", + params![ + value.id, + value.workspace_id, + value.ticker, + serde_json::to_string(&value.kind)?, + value.published_at, + serde_json::to_string(&value)?, + ], + )?; + Ok(value) + }) + .await + } + + pub async fn list_sources(&self, workspace_id: &str) -> Result> { + let workspace_id = workspace_id.to_string(); + self.with_connection(move |connection| { + let mut statement = connection.prepare( + "SELECT entity_json FROM source_records WHERE workspace_id = ?1 ORDER BY published_at DESC, id DESC", + )?; + let rows = statement.query_map(params![workspace_id], |row| row.get::<_, String>(0))?; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + pub async fn list_sources_by_ids(&self, source_ids: &[String]) -> Result> { + if source_ids.is_empty() { + return Ok(Vec::new()); + } + + let ids = source_ids.to_vec(); + self.with_connection(move |connection| { + let mut results = Vec::new(); + let mut statement = connection.prepare("SELECT entity_json FROM source_records WHERE id = ?1")?; + for source_id in ids { + if let Some(json) = statement + .query_row(params![source_id], |row| row.get::<_, String>(0)) + .optional()? + { + results.push(serde_json::from_str(&json)?); + } + } + Ok(results) + }) + .await + } + + pub async fn find_source_by_checksum_or_accession( + &self, + workspace_id: &str, + checksum: Option<&str>, + filing_accession: Option<&str>, + ) -> Result> { + let workspace_id = workspace_id.to_string(); + let checksum = checksum.map(ToOwned::to_owned); + let filing_accession = filing_accession.map(ToOwned::to_owned); + self.with_connection(move |connection| { + let json = if let Some(checksum) = checksum { + connection + .query_row( + "SELECT entity_json FROM source_records + WHERE workspace_id = ?1 AND json_extract(entity_json, '$.checksum') = ?2 + LIMIT 1", + params![workspace_id, checksum], + |row| row.get::<_, String>(0), + ) + .optional()? + } else if let Some(filing_accession) = filing_accession { + connection + .query_row( + "SELECT entity_json FROM source_records + WHERE workspace_id = ?1 AND json_extract(entity_json, '$.filingAccession') = ?2 + LIMIT 1", + params![workspace_id, filing_accession], + |row| row.get::<_, String>(0), + ) + .optional()? + } else { + None + }; + + json.map(|value| serde_json::from_str(&value).map_err(ResearchError::from)) + .transpose() + }) + .await + } + + pub async fn replace_links_for_workspace( + &self, + workspace_id: &str, + links: Vec, + ) -> Result> { + let workspace_id = workspace_id.to_string(); + let values = links.clone(); + self.with_connection(move |connection| { + let transaction = connection.transaction()?; + transaction.execute("DELETE FROM note_links WHERE workspace_id = ?1", params![workspace_id])?; + for link in &values { + transaction.execute( + "INSERT INTO note_links ( + id, workspace_id, from_note_id, to_note_id, link_type, stale, updated_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", + params![ + link.id, + link.workspace_id, + link.from_note_id, + link.to_note_id, + serde_json::to_string(&link.link_type)?, + i64::from(link.stale), + link.updated_at, + serde_json::to_string(link)?, + ], + )?; + } + transaction.commit()?; + Ok(values) + }) + .await + } + + pub async fn list_links(&self, workspace_id: &str, note_id: Option<&str>) -> Result> { + let workspace_id = workspace_id.to_string(); + let note_id = note_id.map(ToOwned::to_owned); + self.with_connection(move |connection| { + let mut statement = if note_id.is_some() { + connection.prepare( + "SELECT entity_json + FROM note_links + WHERE workspace_id = ?1 AND (from_note_id = ?2 OR to_note_id = ?2) + ORDER BY updated_at DESC", + )? + } else { + connection.prepare( + "SELECT entity_json + FROM note_links + WHERE workspace_id = ?1 + ORDER BY updated_at DESC", + )? + }; + let rows = if let Some(note_id) = note_id { + statement.query_map(params![workspace_id, note_id], |row| row.get::<_, String>(0))? + } else { + statement.query_map(params![workspace_id], |row| row.get::<_, String>(0))? + }; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + pub async fn replace_ghosts_for_workspace( + &self, + workspace_id: &str, + ghosts: Vec, + ) -> Result> { + let workspace_id = workspace_id.to_string(); + let generated = ghosts.clone(); + self.with_connection(move |connection| { + let transaction = connection.transaction()?; + + let mut existing_statement = transaction.prepare( + "SELECT id, entity_json FROM ghost_notes WHERE workspace_id = ?1", + )?; + let existing_rows = existing_statement.query_map(params![workspace_id.clone()], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + let existing = existing_rows.collect::, _>>()?; + transaction.execute("DELETE FROM ghost_notes WHERE workspace_id = ?1", params![workspace_id])?; + + for mut ghost in generated.clone() { + if let Some((_, json)) = existing.iter().find(|(id, _)| *id == ghost.id) { + let prior: GhostNote = serde_json::from_str(json)?; + if matches!( + prior.state, + GhostLifecycleState::Accepted + | GhostLifecycleState::Dismissed + | GhostLifecycleState::Converted + | GhostLifecycleState::Ignored + ) { + ghost.state = prior.state; + } + if matches!(prior.visibility_state, GhostVisibilityState::Pinned) { + ghost.visibility_state = prior.visibility_state; + } + ghost.promoted_note_id = prior.promoted_note_id; + } + + transaction.execute( + "INSERT INTO ghost_notes ( + id, workspace_id, state, visibility_state, updated_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params![ + ghost.id, + ghost.workspace_id, + serde_json::to_string(&ghost.state)?, + serde_json::to_string(&ghost.visibility_state)?, + ghost.updated_at, + serde_json::to_string(&ghost)?, + ], + )?; + } + + transaction.commit()?; + Ok(generated) + }) + .await + } + + pub async fn get_ghost(&self, ghost_note_id: &str) -> Result { + let ghost_note_id = ghost_note_id.to_string(); + self.with_connection(move |connection| { + let json = connection + .query_row( + "SELECT entity_json FROM ghost_notes WHERE id = ?1", + params![ghost_note_id], + |row| row.get::<_, String>(0), + ) + .optional()? + .ok_or_else(|| ResearchError::GhostNoteNotFound(ghost_note_id.clone()))?; + Ok(serde_json::from_str(&json)?) + }) + .await + } + + pub async fn save_ghost(&self, ghost: GhostNote) -> Result { + let value = ghost.clone(); + self.with_connection(move |connection| { + connection.execute( + "INSERT INTO ghost_notes ( + id, workspace_id, state, visibility_state, updated_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6) + ON CONFLICT(id) DO UPDATE SET + state = excluded.state, + visibility_state = excluded.visibility_state, + updated_at = excluded.updated_at, + entity_json = excluded.entity_json", + params![ + value.id, + value.workspace_id, + serde_json::to_string(&value.state)?, + serde_json::to_string(&value.visibility_state)?, + value.updated_at, + serde_json::to_string(&value)?, + ], + )?; + Ok(value) + }) + .await + } + + pub async fn list_ghosts(&self, workspace_id: &str, include_hidden: bool) -> Result> { + let workspace_id = workspace_id.to_string(); + self.with_connection(move |connection| { + let mut statement = connection.prepare( + "SELECT entity_json FROM ghost_notes + WHERE workspace_id = ?1 + ORDER BY updated_at DESC", + )?; + let rows = statement.query_map(params![workspace_id], |row| row.get::<_, String>(0))?; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str::(&json).map_err(ResearchError::from)) + .collect::>>() + .map(|ghosts| { + ghosts + .into_iter() + .filter(|ghost| include_hidden || !matches!(ghost.visibility_state, GhostVisibilityState::Hidden)) + .collect() + }) + }) + .await + } + + pub async fn enqueue_jobs(&self, jobs: Vec) -> Result> { + let values = jobs.clone(); + self.with_connection(move |connection| { + let transaction = connection.transaction()?; + for job in &values { + transaction.execute( + "INSERT INTO pipeline_jobs ( + id, workspace_id, entity_id, job_kind, status, next_attempt_at, updated_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) + ON CONFLICT(id) DO UPDATE SET + status = excluded.status, + next_attempt_at = excluded.next_attempt_at, + updated_at = excluded.updated_at, + entity_json = excluded.entity_json", + params![ + job.id, + job.workspace_id, + job.entity_id, + serde_json::to_string(&job.job_kind)?, + serde_json::to_string(&job.status)?, + job.next_attempt_at, + job.updated_at, + serde_json::to_string(job)?, + ], + )?; + } + transaction.commit()?; + Ok(values) + }) + .await + } + + pub async fn save_job(&self, job: PipelineJob) -> Result { + let value = job.clone(); + self.enqueue_jobs(vec![value.clone()]).await?; + Ok(value) + } + + pub async fn list_due_jobs(&self, limit: usize) -> Result> { + self.with_connection(move |connection| { + let mut statement = connection.prepare( + "SELECT entity_json + FROM pipeline_jobs + WHERE status IN (?1, ?2) + AND (next_attempt_at IS NULL OR next_attempt_at <= datetime('now')) + ORDER BY updated_at ASC + LIMIT ?3", + )?; + let rows = statement.query_map( + params![ + serde_json::to_string(&JobStatus::Queued)?, + serde_json::to_string(&JobStatus::Failed)?, + i64::try_from(limit).map_err(|error| ResearchError::Validation(error.to_string()))?, + ], + |row| row.get::<_, String>(0), + )?; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + pub async fn list_jobs(&self, workspace_id: &str, job_kind: Option) -> Result> { + let workspace_id = workspace_id.to_string(); + self.with_connection(move |connection| { + let mut statement = if job_kind.is_some() { + connection.prepare( + "SELECT entity_json FROM pipeline_jobs + WHERE workspace_id = ?1 AND job_kind = ?2 + ORDER BY updated_at DESC", + )? + } else { + connection.prepare( + "SELECT entity_json FROM pipeline_jobs + WHERE workspace_id = ?1 + ORDER BY updated_at DESC", + )? + }; + let rows = if let Some(job_kind) = job_kind { + statement.query_map( + params![workspace_id, serde_json::to_string(&job_kind)?], + |row| row.get::<_, String>(0), + )? + } else { + statement.query_map(params![workspace_id], |row| row.get::<_, String>(0))? + }; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + pub async fn retry_failed_jobs( + &self, + workspace_id: &str, + job_kind: Option, + ) -> Result> { + let mut jobs = self.list_jobs(workspace_id, job_kind).await?; + let mut retried = Vec::new(); + for job in &mut jobs { + if matches!(job.status, JobStatus::Failed | JobStatus::Skipped) { + job.status = JobStatus::Queued; + job.next_attempt_at = None; + job.last_error = None; + job.updated_at = crate::research::util::now_rfc3339(); + retried.push(job.clone()); + } + } + self.enqueue_jobs(retried.clone()).await?; + Ok(retried) + } + + pub async fn append_audit_event(&self, event: AuditEvent) -> Result { + let value = event.clone(); + self.with_connection(move |connection| { + connection.execute( + "INSERT INTO audit_events ( + id, workspace_id, entity_id, entity_kind, created_at, entity_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params![ + value.id, + value.workspace_id, + value.entity_id, + serde_json::to_string(&value.entity_kind)?, + value.created_at, + serde_json::to_string(&value)?, + ], + )?; + Ok(value) + }) + .await + } + + pub async fn list_audit_events_for_entity(&self, entity_id: &str) -> Result> { + let entity_id = entity_id.to_string(); + self.with_connection(move |connection| { + let mut statement = connection.prepare( + "SELECT entity_json FROM audit_events WHERE entity_id = ?1 ORDER BY created_at ASC", + )?; + let rows = statement.query_map(params![entity_id], |row| row.get::<_, String>(0))?; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + pub async fn list_audit_events_for_workspace(&self, workspace_id: &str) -> Result> { + let workspace_id = workspace_id.to_string(); + self.with_connection(move |connection| { + let mut statement = connection.prepare( + "SELECT entity_json FROM audit_events WHERE workspace_id = ?1 ORDER BY created_at ASC", + )?; + let rows = statement.query_map(params![workspace_id], |row| row.get::<_, String>(0))?; + rows.collect::, _>>()? + .into_iter() + .map(|json| serde_json::from_str(&json).map_err(ResearchError::from)) + .collect() + }) + .await + } + + fn open_connection(&self) -> Result { + open_connection(&self.db_path) + } + + fn initialize_schema(&self, connection: &Connection) -> Result<()> { + connection.execute_batch( + "PRAGMA foreign_keys = ON; + PRAGMA journal_mode = WAL; + PRAGMA synchronous = NORMAL; + CREATE TABLE IF NOT EXISTS research_workspaces ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + primary_ticker TEXT NOT NULL, + stage TEXT NOT NULL, + default_view TEXT NOT NULL, + archived INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS research_notes ( + id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL REFERENCES research_workspaces(id) ON DELETE CASCADE, + note_type TEXT NOT NULL, + ticker TEXT, + source_id TEXT, + archived INTEGER NOT NULL DEFAULT 0, + pinned INTEGER NOT NULL DEFAULT 0, + revision INTEGER NOT NULL, + evidence_status TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS research_notes_workspace_archived_updated_idx + ON research_notes (workspace_id, archived, updated_at DESC); + CREATE INDEX IF NOT EXISTS research_notes_workspace_type_archived_idx + ON research_notes (workspace_id, note_type, archived); + CREATE INDEX IF NOT EXISTS research_notes_workspace_ticker_updated_idx + ON research_notes (workspace_id, ticker, updated_at DESC); + CREATE TABLE IF NOT EXISTS note_links ( + id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL REFERENCES research_workspaces(id) ON DELETE CASCADE, + from_note_id TEXT NOT NULL, + to_note_id TEXT NOT NULL, + link_type TEXT NOT NULL, + stale INTEGER NOT NULL DEFAULT 0, + updated_at TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS note_links_workspace_from_type_idx + ON note_links (workspace_id, from_note_id, link_type); + CREATE TABLE IF NOT EXISTS ghost_notes ( + id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL REFERENCES research_workspaces(id) ON DELETE CASCADE, + state TEXT NOT NULL, + visibility_state TEXT NOT NULL, + updated_at TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS ghost_notes_workspace_state_visibility_idx + ON ghost_notes (workspace_id, state, visibility_state, updated_at DESC); + CREATE TABLE IF NOT EXISTS source_records ( + id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL REFERENCES research_workspaces(id) ON DELETE CASCADE, + ticker TEXT, + kind TEXT NOT NULL, + published_at TEXT, + entity_json TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS source_records_workspace_ticker_kind_published_idx + ON source_records (workspace_id, ticker, kind, published_at DESC); + CREATE TABLE IF NOT EXISTS source_excerpts ( + id TEXT PRIMARY KEY, + source_id TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS pipeline_jobs ( + id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL REFERENCES research_workspaces(id) ON DELETE CASCADE, + entity_id TEXT NOT NULL, + job_kind TEXT NOT NULL, + status TEXT NOT NULL, + next_attempt_at TEXT, + updated_at TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS pipeline_jobs_status_next_attempt_idx + ON pipeline_jobs (status, next_attempt_at); + CREATE TABLE IF NOT EXISTS workspace_view_state ( + workspace_id TEXT PRIMARY KEY, + entity_json TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS memo_exports ( + id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS audit_events ( + id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL REFERENCES research_workspaces(id) ON DELETE CASCADE, + entity_id TEXT NOT NULL, + entity_kind TEXT NOT NULL, + created_at TEXT NOT NULL, + entity_json TEXT NOT NULL + ); + CREATE VIRTUAL TABLE IF NOT EXISTS research_fts USING fts5(note_id UNINDEXED, title, cleaned_text, ai_annotation);", + )?; + Ok(()) + } + + async fn with_connection(&self, task: F) -> Result + where + F: FnOnce(&mut Connection) -> Result + Send + 'static, + T: Send + 'static, + { + let db_path = self.db_path.clone(); + tokio::task::spawn_blocking(move || { + let mut connection = open_connection(&db_path)?; + task(&mut connection) + }) + .await + .map_err(|error| ResearchError::Join(error.to_string()))? + } +} + +fn open_connection(path: &Path) -> Result { + let connection = Connection::open(path)?; + connection.execute_batch( + "PRAGMA foreign_keys = ON; + PRAGMA journal_mode = WAL; + PRAGMA synchronous = NORMAL;", + )?; + Ok(connection) +} + +#[cfg(test)] +mod tests { + use tempfile::tempdir; + + use super::ResearchRepository; + use crate::research::types::{ResearchStage, ResearchWorkspace, WorkspaceScope, WorkspaceViewKind}; + + #[tokio::test] + async fn new_should_initialize_schema() { + let dir = tempdir().unwrap(); + let repository = ResearchRepository::new(dir.path().join("research.sqlite")).unwrap(); + let now = crate::research::util::now_rfc3339(); + repository + .create_workspace(ResearchWorkspace { + id: "workspace-1".to_string(), + name: "AAPL".to_string(), + primary_ticker: "AAPL".to_string(), + scope: WorkspaceScope::SingleCompany, + stage: ResearchStage::Capture, + default_view: WorkspaceViewKind::Canvas, + pinned_note_ids: Vec::new(), + archived: false, + created_at: now.clone(), + updated_at: now, + }) + .await + .unwrap(); + + let workspaces = repository.list_workspaces().await.unwrap(); + assert_eq!(workspaces.len(), 1); + } +} diff --git a/MosaicIQ/src-tauri/src/research/types.rs b/MosaicIQ/src-tauri/src/research/types.rs new file mode 100644 index 0000000..0eff5fb --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/types.rs @@ -0,0 +1,851 @@ +//! Serializable research-domain entities, enums, and command DTOs. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum WorkspaceScope { + #[default] + SingleCompany, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum ResearchStage { + #[default] + Capture, + Organize, + Thesis, + Drafting, + Monitor, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum WorkspaceViewKind { + #[default] + Canvas, + Kanban, + Graph, + ThesisBuilder, + MemoDrafting, + EvidenceTrace, + CatalystRiskMap, + Timeline, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum NoteType { + #[default] + Fact, + Quote, + ManagementSignal, + Claim, + Thesis, + SubThesis, + Risk, + Catalyst, + ValuationPoint, + ScenarioAssumption, + IndustryObservation, + CompetitorComparison, + Question, + Contradiction, + Uncertainty, + FollowUpTask, + SourceReference, + EventTakeaway, + ChannelCheck, + MosaicInsight, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum AnalystStatus { + #[default] + Captured, + ReviewQueue, + Reviewed, + Accepted, + NeedsFollowUp, + Dismissed, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum EvidenceStatus { + #[default] + Unsourced, + SourceLinked, + Quoted, + Corroborated, + Inferred, + Contradicted, + Stale, + Superseded, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum GhostStatus { + #[default] + None, + CandidateInput, + GhostGenerated, + GhostPromoted, + GhostDismissed, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum ThesisStatus { + #[default] + None, + CandidateSupport, + CandidateCore, + AcceptedSupport, + AcceptedCore, + BullCase, + BearCase, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum TimeHorizon { + NearTerm, + NextEarnings, + TwelveMonth, + MultiYear, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum ScenarioKind { + Base, + Bull, + Bear, + DownsideCase, + UpsideCase, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum NotePriority { + Low, + #[default] + Normal, + High, + Critical, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum StaleReason { + NewFiling, + NewGuidance, + NewEarnings, + SourceExpired, + NoteUpdated, + SupersededByAnalyst, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum SourceKind { + Filing, + Transcript, + Article, + NewsFeed, + Model, + #[default] + Manual, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum FreshnessBucket { + Fresh, + Aging, + Stale, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum EvidenceStrength { + PrimaryVerified, + PrimaryExcerpt, + SecondaryReputable, + SecondaryUnverified, + AnalystModel, + ManualUnsourced, + AiInferenceOnly, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum ProvenanceActor { + Manual, + AiEnrichment, + GhostConversion, + Import, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum CaptureMethod { + QuickEntry, + TranscriptClip, + FilingExtract, + NewsImport, + ManualLink, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ModelInfo { + pub task_profile: String, + pub model: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub provider: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct NoteProvenance { + pub created_by: ProvenanceActor, + pub capture_method: CaptureMethod, + pub source_kind: SourceKind, + #[serde(skip_serializing_if = "Option::is_none")] + pub origin_note_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub origin_ghost_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub model_info: Option, + pub created_at: String, + pub raw_input_hash: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct SourceExcerpt { + pub source_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub excerpt_text: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub location_label: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub start_offset: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub end_offset: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ValuationRef { + pub metric: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub multiple: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub unit: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub basis: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ResearchWorkspace { + pub id: String, + pub name: String, + pub primary_ticker: String, + pub scope: WorkspaceScope, + pub stage: ResearchStage, + pub default_view: WorkspaceViewKind, + pub pinned_note_ids: Vec, + pub archived: bool, + pub created_at: String, + pub updated_at: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ResearchNote { + pub id: String, + pub workspace_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub company_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ticker: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub source_id: Option, + pub raw_text: String, + pub cleaned_text: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + pub note_type: NoteType, + #[serde(skip_serializing_if = "Option::is_none")] + pub subtype: Option, + pub analyst_status: AnalystStatus, + #[serde(skip_serializing_if = "Option::is_none")] + pub ai_annotation: Option, + pub confidence: f32, + pub evidence_status: EvidenceStatus, + pub inferred_links: Vec, + pub ghost_status: GhostStatus, + pub thesis_status: ThesisStatus, + pub created_at: String, + pub updated_at: String, + pub provenance: NoteProvenance, + pub tags: Vec, + pub catalysts: Vec, + pub risks: Vec, + pub valuation_refs: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub time_horizon: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub scenario: Option, + pub priority: NotePriority, + pub pinned: bool, + pub archived: bool, + pub revision: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub source_excerpt: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_enriched_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_linked_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub stale_reason: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub superseded_by_note_id: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum LinkType { + Supports, + Contradicts, + Qualifies, + DerivedFrom, + SourcedBy, + Updates, + Supersedes, + PeerReadthrough, + ValuationDependsOn, + CatalystFor, + RiskTo, + TimeframeConflict, + AssumptionFor, + ManagementVsReality, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum LinkStrength { + Weak, + Medium, + Strong, + Critical, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum EvidenceBasis { + Lexical, + SharedSource, + Temporal, + Numerical, + Structured, + ModelAssisted, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum LinkOrigin { + Heuristic, + Ai, + Analyst, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct NoteLink { + pub id: String, + pub workspace_id: String, + pub from_note_id: String, + pub to_note_id: String, + pub link_type: LinkType, + pub directional: bool, + pub confidence: f32, + pub strength: LinkStrength, + pub evidence_basis: EvidenceBasis, + pub created_by: LinkOrigin, + pub created_at: String, + pub updated_at: String, + pub source_revision_pair: (u32, u32), + pub stale: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub stale_reason: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum GhostNoteClass { + CandidateThesis, + CandidateRisk, + CandidateCatalyst, + MissingEvidencePrompt, + ContradictionAlert, + ValuationBridge, + ScenarioImplication, + MemoOutlineSuggestion, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] +#[serde(rename_all = "snake_case")] +pub enum GhostTone { + #[default] + Tentative, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum GhostVisibilityState { + Hidden, + Collapsed, + Visible, + Pinned, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum GhostLifecycleState { + Generated, + Visible, + Ignored, + Dismissed, + Accepted, + Converted, + Superseded, + Stale, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum MemoSectionKind { + StockPitch, + InvestmentMemo, + BullCase, + BearCase, + CatalystCalendar, + RiskRegister, + ValuationWriteUp, + EarningsPreview, + EarningsRecap, + WatchlistUpdate, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct GhostNote { + pub id: String, + pub workspace_id: String, + pub ghost_class: GhostNoteClass, + pub headline: String, + pub body: String, + pub tone: GhostTone, + pub confidence: f32, + pub visibility_state: GhostVisibilityState, + pub state: GhostLifecycleState, + pub supporting_note_ids: Vec, + pub contradicting_note_ids: Vec, + pub source_ids: Vec, + pub evidence_threshold_met: bool, + pub created_at: String, + pub updated_at: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub superseded_by_ghost_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub promoted_note_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub memo_section_hint: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct SourceRecord { + pub id: String, + pub workspace_id: String, + pub kind: SourceKind, + #[serde(skip_serializing_if = "Option::is_none")] + pub ticker: Option, + pub title: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub publisher: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub canonical_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub filing_accession: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub form_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub published_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub as_of_date: Option, + pub ingested_at: String, + pub freshness_bucket: FreshnessBucket, + #[serde(skip_serializing_if = "Option::is_none")] + pub checksum: Option, + pub metadata_json: Value, + #[serde(skip_serializing_if = "Option::is_none")] + pub superseded_by_source_id: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum JobKind { + EnrichNote, + InferLinks, + EvaluateDuplicates, + EvaluateGhosts, + RefreshSourceMetadata, + RecalculateStaleness, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum JobStatus { + Queued, + Running, + Completed, + Failed, + Skipped, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct PipelineJob { + pub id: String, + pub workspace_id: String, + pub entity_id: String, + pub job_kind: JobKind, + pub status: JobStatus, + pub attempt_count: u32, + pub max_attempts: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_attempt_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_error: Option, + pub payload_json: Value, + pub created_at: String, + pub updated_at: String, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum AuditEntityKind { + Workspace, + Note, + Link, + Ghost, + Source, + Job, + MemoExport, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum AuditActor { + Analyst, + System, + Ai, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct AuditEvent { + pub id: String, + pub workspace_id: String, + pub entity_id: String, + pub entity_kind: AuditEntityKind, + pub action: String, + pub actor: AuditActor, + #[serde(skip_serializing_if = "Option::is_none")] + pub prior_revision: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub new_revision: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub job_id: Option, + pub source_ids: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub detail: Option, + pub created_at: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct PlacementHint { + pub tile_lane: String, + pub kanban_column: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub graph_anchor_note_id: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct NoteCaptureResult { + pub note: ResearchNote, + pub placement_hint: PlacementHint, + pub queued_jobs: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct WorkspaceProjection { + pub workspace: ResearchWorkspace, + pub active_view: WorkspaceViewKind, + pub notes: Vec, + pub links: Vec, + pub ghosts: Vec, + pub memo_blocks: Vec, + pub graph_nodes: Vec, + pub graph_edges: Vec, + pub kanban_columns: Vec, + pub timeline_events: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct GraphNode { + pub id: String, + pub label: String, + pub kind: String, + pub confidence: f32, + pub evidence_status: EvidenceStatus, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct GraphEdge { + pub id: String, + pub from: String, + pub to: String, + pub link_type: LinkType, + pub strength: LinkStrength, + pub confidence: f32, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct KanbanColumn { + pub key: String, + pub label: String, + pub notes: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct TimelineEvent { + pub id: String, + pub label: String, + pub note_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub at: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct MemoBlockCandidate { + pub section_kind: MemoSectionKind, + pub headline: String, + pub body: String, + pub source_note_ids: Vec, + pub citation_refs: Vec, + pub confidence: f32, + pub accepted: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ResearchBundleExport { + pub workspace: ResearchWorkspace, + pub notes: Vec, + pub links: Vec, + pub ghosts: Vec, + pub sources: Vec, + pub audit_events: Vec, + pub markdown_memo: String, + pub json_bundle: Value, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct NoteAuditTrail { + pub note: ResearchNote, + pub links: Vec, + pub related_ghosts: Vec, + pub sources: Vec, + pub audit_events: Vec, + pub memo_blocks: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct SourceReferenceInput { + pub kind: SourceKind, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub published_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub filing_accession: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub form_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub excerpt_text: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub location_label: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CreateResearchWorkspaceRequest { + pub name: String, + pub primary_ticker: String, + #[serde(default)] + pub stage: ResearchStage, + #[serde(default)] + pub default_view: WorkspaceViewKind, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CaptureResearchNoteRequest { + pub workspace_id: String, + pub raw_text: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub company_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ticker: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub source_ref: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub position_hint: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub user_note_type_override: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct UpdateResearchNoteRequest { + pub note_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_text: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub note_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub subtype: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub analyst_status: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pinned: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub priority: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub thesis_status: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ArchiveResearchNoteRequest { + pub note_id: String, + pub archived: bool, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ListResearchNotesRequest { + pub workspace_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_archived: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub note_type: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct GetWorkspaceProjectionRequest { + pub workspace_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub view: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ListNoteLinksRequest { + pub workspace_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub note_id: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ListWorkspaceGhostNotesRequest { + pub workspace_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub include_hidden: Option, +} + +#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum GhostReviewAction { + Accept, + Ignore, + Dismiss, + Pin, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ReviewGhostNoteRequest { + pub ghost_note_id: String, + pub action: GhostReviewAction, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct PromoteNoteToThesisRequest { + pub note_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub thesis_status: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub note_type: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct RetryResearchJobsRequest { + pub workspace_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub job_kind: Option, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct GetNoteAuditTrailRequest { + pub note_id: String, +} + +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ExportResearchBundleRequest { + pub workspace_id: String, +} + diff --git a/MosaicIQ/src-tauri/src/research/util.rs b/MosaicIQ/src-tauri/src/research/util.rs new file mode 100644 index 0000000..bcad89a --- /dev/null +++ b/MosaicIQ/src-tauri/src/research/util.rs @@ -0,0 +1,41 @@ +//! Internal helpers shared across the research subsystem. + +use std::sync::atomic::{AtomicU64, Ordering}; + +use chrono::Utc; +use sha2::{Digest, Sha256}; + +static NEXT_ID: AtomicU64 = AtomicU64::new(1); + +pub(crate) fn now_rfc3339() -> String { + Utc::now().to_rfc3339() +} + +pub(crate) fn now_timestamp() -> i64 { + Utc::now().timestamp() +} + +pub(crate) fn generate_id(prefix: &str) -> String { + format!("{prefix}-{}-{}", now_timestamp(), NEXT_ID.fetch_add(1, Ordering::Relaxed)) +} + +pub(crate) fn sha256_hex(input: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(input.as_bytes()); + format!("{:x}", hasher.finalize()) +} + +pub(crate) fn normalize_text(input: &str) -> String { + input.split_whitespace().collect::>().join(" ") +} + +pub(crate) fn clean_title(input: &str, max_len: usize) -> String { + let normalized = normalize_text(input); + if normalized.chars().count() <= max_len { + return normalized; + } + + let mut shortened = normalized.chars().take(max_len.saturating_sub(1)).collect::(); + shortened.push('…'); + shortened +} diff --git a/MosaicIQ/src-tauri/src/state.rs b/MosaicIQ/src-tauri/src/state.rs index d24058d..5b6b78f 100644 --- a/MosaicIQ/src-tauri/src/state.rs +++ b/MosaicIQ/src-tauri/src/state.rs @@ -4,11 +4,12 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; -use tauri::{AppHandle, Wry}; +use tauri::{AppHandle, Manager, Wry}; use tokio::sync::{oneshot, Mutex as AsyncMutex}; use crate::agent::{AgentService, AgentSettingsService}; use crate::error::AppError; +use crate::news::NewsService; use crate::portfolio::PortfolioService; use crate::terminal::google_finance::GoogleFinanceLookup; use crate::terminal::sec_edgar::{ @@ -95,7 +96,9 @@ impl SecUserAgentProvider for SettingsBackedSecUserAgentProvider { pub struct AppState { /// Stateful chat service used for per-session conversation history and agent config. pub agent: AsyncMutex>, - /// Slash-command executor backed by shared mock data. + /// Local-first RSS/Atom news runtime backed by SQLite. + pub news_service: Arc, + /// Slash-command executor backed by shared services. pub command_service: Arc, /// Pending approvals for agent-triggered mutating commands. pub pending_agent_tool_approvals: Arc, @@ -113,13 +116,30 @@ impl AppState { )))); let portfolio_service = Arc::new(PortfolioService::new(app_handle, security_lookup.clone())); + let news_root = app_handle + .path() + .app_data_dir() + .map_err(|_| { + AppError::InvalidSettings("news app data directory is unavailable".to_string()) + })? + .join("news"); + let news_service = Arc::new( + NewsService::new( + news_root.join("news.sqlite"), + news_root.join("news-feeds.json"), + include_bytes!("../news-feeds.default.json"), + ) + .map_err(|error| AppError::InvalidSettings(error.to_string()))?, + ); Ok(Self { agent: AsyncMutex::new(AgentService::new(app_handle)?), + news_service: news_service.clone(), command_service: Arc::new(TerminalCommandService::new( security_lookup, sec_edgar_lookup, portfolio_service, + news_service, )), pending_agent_tool_approvals: Arc::new(PendingAgentToolApprovals::new()), }) diff --git a/MosaicIQ/src-tauri/src/terminal/command_service.rs b/MosaicIQ/src-tauri/src/terminal/command_service.rs index b7c05ca..2211e51 100644 --- a/MosaicIQ/src-tauri/src/terminal/command_service.rs +++ b/MosaicIQ/src-tauri/src/terminal/command_service.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use std::time::Duration; +use crate::news::{NewsService, QueryNewsFeedRequest}; use crate::portfolio::{ CashConfirmation, PortfolioCommandError, PortfolioManagement, PortfolioStats, PortfolioTransaction, TradeConfirmation, TransactionKind, @@ -18,6 +19,7 @@ use crate::terminal::{ /// Executes supported slash commands against live search plus shared local fixture data. pub struct TerminalCommandService { mock_data: MockFinancialData, + news_service: Arc, security_lookup: Arc, edgar_lookup: Arc, portfolio_service: Arc, @@ -30,12 +32,14 @@ impl TerminalCommandService { security_lookup: Arc, edgar_lookup: Arc, portfolio_service: Arc, + news_service: Arc, ) -> Self { Self::with_dependencies( load_mock_financial_data(), security_lookup, edgar_lookup, portfolio_service, + news_service, DEFAULT_LOOKUP_FOLLOWUP_DELAY, ) } @@ -45,10 +49,12 @@ impl TerminalCommandService { security_lookup: Arc, edgar_lookup: Arc, portfolio_service: Arc, + news_service: Arc, lookup_followup_delay: Duration, ) -> Self { Self { mock_data, + news_service, security_lookup, edgar_lookup, portfolio_service, @@ -114,7 +120,7 @@ impl TerminalCommandService { .await } } - "/news" => self.news(command.args.first().map(String::as_str)), + "/news" => self.news(command.args.first().map(String::as_str)).await, "/analyze" => self.analyze(command.args.first().map(String::as_str)), "/help" => help_response(), _ => TerminalCommandResponse::Text { @@ -241,21 +247,29 @@ impl TerminalCommandService { self.load_search_match(query, selected_match, true).await } - fn news(&self, ticker: Option<&str>) -> TerminalCommandResponse { + async fn news(&self, ticker: Option<&str>) -> TerminalCommandResponse { let normalized_ticker = ticker.map(|value| value.trim().to_uppercase()); - let news_items = match normalized_ticker.as_deref() { - Some(ticker) if !ticker.is_empty() => self - .mock_data - .news_items - .iter() - .filter(|item| { - item.related_tickers - .iter() - .any(|related| related.eq_ignore_ascii_case(ticker)) - }) - .cloned() - .collect(), - _ => self.mock_data.news_items.clone(), + let response = self + .news_service + .query_feed(QueryNewsFeedRequest { + ticker: normalized_ticker.clone().filter(|value| !value.is_empty()), + search: None, + only_highlighted: None, + only_saved: None, + only_unread: None, + limit: Some(50), + offset: Some(0), + }) + .await; + + let news_items = match response { + Ok(response) => response.articles, + Err(error) => { + return TerminalCommandResponse::Text { + content: format!("News feed unavailable: {error}"), + portfolio: None, + }; + } }; TerminalCommandResponse::panel(PanelPayload::News { @@ -825,13 +839,19 @@ fn format_quantity(value: f64) -> String { #[cfg(test)] mod tests { + use std::fs; + use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Duration; + use std::time::{SystemTime, UNIX_EPOCH}; use futures::future::BoxFuture; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; use super::TerminalCommandService; + use crate::news::{NewsService, RefreshNewsFeedRequest}; use crate::portfolio::{ CashConfirmation, PortfolioCommandError, PortfolioManagement, PortfolioStats, PortfolioTransaction, TradeConfirmation, TransactionKind, @@ -1156,6 +1176,7 @@ mod tests { lookup.clone(), Arc::new(FakeEdgarLookup), portfolio_service, + test_news_service(), Duration::ZERO, ), lookup, @@ -1175,6 +1196,7 @@ mod tests { }), Arc::new(FakeEdgarLookup), Arc::new(FakePortfolioService::default()), + test_news_service(), Duration::ZERO, ) } @@ -1193,6 +1215,88 @@ mod tests { futures::executor::block_on(service.lookup_company(symbol)) } + fn test_news_service() -> Arc { + let root = unique_test_directory("terminal-news-service"); + Arc::new( + NewsService::new( + root.join("news.sqlite"), + root.join("news-feeds.json"), + br#"{"feeds":[{"id":"sample","name":"Sample Feed","url":"https://example.com/feed.xml","refreshMinutes":15}]}"#, + ) + .unwrap(), + ) + } + + fn unique_test_directory(prefix: &str) -> PathBuf { + let suffix = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let path = std::env::temp_dir().join(format!("{prefix}-{suffix}")); + fs::create_dir_all(&path).unwrap(); + path + } + + #[tokio::test] + async fn news_command_returns_articles_from_local_database() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/news.atom")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "application/atom+xml") + .set_body_string(include_str!("../../tests/fixtures/news/sample.atom")), + ) + .mount(&server) + .await; + + let root = unique_test_directory("terminal-news-command"); + let config = format!( + r#"{{"feeds":[{{"id":"sample","name":"Sample Feed","url":"{}/news.atom","refreshMinutes":15}}]}}"#, + server.uri(), + ); + let news_service = Arc::new( + NewsService::new( + root.join("news.sqlite"), + root.join("news-feeds.json"), + config.as_bytes(), + ) + .unwrap(), + ); + news_service + .refresh_feed(RefreshNewsFeedRequest { force: Some(true) }) + .await + .unwrap(); + server.reset().await; + + let service = TerminalCommandService::with_dependencies( + load_mock_financial_data(), + Arc::new(FakeSecurityLookup::successful(vec![])), + Arc::new(FakeEdgarLookup), + Arc::new(FakePortfolioService::default()), + news_service, + Duration::ZERO, + ); + let response = service + .execute(ExecuteTerminalCommandRequest { + workspace_id: "workspace-1".to_string(), + input: "/news NVDA".to_string(), + }) + .await; + + match response { + TerminalCommandResponse::Panel { panel } => match panel.as_ref() { + PanelPayload::News { data, ticker } => { + assert_eq!(ticker.as_deref(), Some("NVDA")); + assert_eq!(data.len(), 1); + assert_eq!(data[0].tickers, vec!["NVDA".to_string()]); + } + other => panic!("expected news panel, got {other:?}"), + }, + other => panic!("expected panel response, got {other:?}"), + } + } + #[test] fn returns_company_panel_for_exact_search_match() { let (service, lookup) = build_service(Ok(vec![SecurityMatch { @@ -1351,6 +1455,7 @@ mod tests { Arc::new(FakeSecurityLookup::successful(vec![])), Arc::new(FakeEdgarLookup), Arc::new(FakePortfolioService::default()), + test_news_service(), Duration::ZERO, ); @@ -1396,6 +1501,7 @@ mod tests { Arc::new(FakeSecurityLookup::successful(vec![])), Arc::new(FakeEdgarLookup), Arc::new(FakePortfolioService::default()), + test_news_service(), Duration::ZERO, ); @@ -1416,6 +1522,7 @@ mod tests { Arc::new(FakeSecurityLookup::successful(vec![])), Arc::new(FakeEdgarLookup), Arc::new(FakePortfolioService::default()), + test_news_service(), Duration::ZERO, ); @@ -1436,6 +1543,7 @@ mod tests { Arc::new(FakeSecurityLookup::successful(vec![])), Arc::new(FakeEdgarLookup), Arc::new(FakePortfolioService::default()), + test_news_service(), Duration::ZERO, ); diff --git a/MosaicIQ/src-tauri/src/terminal/mod.rs b/MosaicIQ/src-tauri/src/terminal/mod.rs index 12fb2ef..ebbd6c2 100644 --- a/MosaicIQ/src-tauri/src/terminal/mod.rs +++ b/MosaicIQ/src-tauri/src/terminal/mod.rs @@ -10,6 +10,6 @@ pub use types::{ CashFlowPanelData, CashFlowPeriod, ChatCommandRequest, Company, CompanyPricePoint, CompanyProfile, DividendEvent, DividendsPanelData, EarningsPanelData, EarningsPeriod, ErrorPanel, ExecuteTerminalCommandRequest, FilingRef, FinancialsPanelData, Frequency, Holding, - LookupCompanyRequest, MockFinancialData, NewsItem, PanelPayload, Portfolio, SourceStatus, + LookupCompanyRequest, MockFinancialData, PanelPayload, Portfolio, SourceStatus, StatementPeriod, StockAnalysis, TerminalCommandResponse, }; diff --git a/MosaicIQ/src-tauri/src/terminal/types.rs b/MosaicIQ/src-tauri/src/terminal/types.rs index b3e0bbe..6d9734a 100644 --- a/MosaicIQ/src-tauri/src/terminal/types.rs +++ b/MosaicIQ/src-tauri/src/terminal/types.rs @@ -2,6 +2,8 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; +use crate::news::NewsArticle; + /// Frontend request payload for slash-command execution. #[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] #[serde(rename_all = "camelCase")] @@ -68,7 +70,7 @@ pub enum PanelPayload { data: Portfolio, }, News { - data: Vec, + data: Vec, ticker: Option, }, Analysis { @@ -206,19 +208,6 @@ pub struct Portfolio { pub stale_pricing_symbols: Option>, } -/// News item serialized with an ISO timestamp for transport safety. -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct NewsItem { - pub id: String, - pub source: String, - pub headline: String, - pub timestamp: String, - pub snippet: String, - pub url: Option, - pub related_tickers: Vec, -} - /// Structured analysis payload for the analysis panel. #[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] #[serde(rename_all = "camelCase")] @@ -239,7 +228,6 @@ pub struct StockAnalysis { pub struct MockFinancialData { #[allow(dead_code)] pub companies: Vec, - pub news_items: Vec, pub analyses: HashMap, } diff --git a/MosaicIQ/src-tauri/tests/fixtures/news/malformed.rss b/MosaicIQ/src-tauri/tests/fixtures/news/malformed.rss new file mode 100644 index 0000000..4d6c0b8 --- /dev/null +++ b/MosaicIQ/src-tauri/tests/fixtures/news/malformed.rss @@ -0,0 +1,18 @@ + + + + Malformed RSS Feed + https://example.com + Malformed RSS description + + Valid entry + https://example.com/valid-entry + Valid summary + Tue, 08 Apr 2026 10:00:00 GMT + + + Missing title should be skipped + Tue, 08 Apr 2026 09:00:00 GMT + + + diff --git a/MosaicIQ/src-tauri/tests/fixtures/news/sample.atom b/MosaicIQ/src-tauri/tests/fixtures/news/sample.atom new file mode 100644 index 0000000..ab5cbca --- /dev/null +++ b/MosaicIQ/src-tauri/tests/fixtures/news/sample.atom @@ -0,0 +1,20 @@ + + + Sample Atom Feed + 2026-04-08T10:00:00Z + https://example.com/feed + + SEC current report posts for NVDA + + https://example.com/nvda-8k + 2026-04-08T10:00:00Z + Current report mentions NVDA supply updates. + + + Federal Reserve releases policy minutes + + https://example.com/fed-minutes + 2026-04-08T09:45:00Z + Minutes discuss inflation and labor market trends. + + diff --git a/MosaicIQ/src-tauri/tests/fixtures/news/sample.rss b/MosaicIQ/src-tauri/tests/fixtures/news/sample.rss new file mode 100644 index 0000000..46ae52f --- /dev/null +++ b/MosaicIQ/src-tauri/tests/fixtures/news/sample.rss @@ -0,0 +1,20 @@ + + + + Sample RSS Feed + https://example.com + Sample RSS description + + Fed signals steady rates + https://example.com/fed-rates + The Federal Reserve left policy unchanged.

]]>
+ Tue, 08 Apr 2026 10:00:00 GMT +
+ + SEC highlights new filing activity + https://example.com/sec-filings + Fresh 8-K activity across major issuers. + Tue, 08 Apr 2026 09:30:00 GMT + +
+
diff --git a/MosaicIQ/src/bun-test.d.ts b/MosaicIQ/src/bun-test.d.ts new file mode 100644 index 0000000..3e1c4e3 --- /dev/null +++ b/MosaicIQ/src/bun-test.d.ts @@ -0,0 +1,11 @@ +declare module 'bun:test' { + interface BunExpect { + (value: unknown): any; + any(value: unknown): any; + } + + export const describe: (label: string, run: () => void | Promise) => void; + export const it: (label: string, run: () => void | Promise) => void; + export const expect: BunExpect; + export const mock: any>(fn: T) => T; +} diff --git a/MosaicIQ/src/components/Panels/NewsPanel.test.tsx b/MosaicIQ/src/components/Panels/NewsPanel.test.tsx new file mode 100644 index 0000000..191a208 --- /dev/null +++ b/MosaicIQ/src/components/Panels/NewsPanel.test.tsx @@ -0,0 +1,77 @@ +import { describe, expect, it } from 'bun:test'; +import { renderToStaticMarkup } from 'react-dom/server'; +import { NewsPanel } from './NewsPanel'; +import type { NewsArticle } from '../../types/news'; + +const sampleNews: NewsArticle[] = [ + { + id: '1', + sourceId: 'source-1', + source: 'Source 1', + headline: 'Breaking macro item', + summary: 'Summary 1', + url: 'https://example.com/1', + publishedAt: '2026-04-08T11:00:00Z', + publishedTs: 400, + fetchedAt: '2026-04-08T11:01:00Z', + sentiment: 'BULL', + sentimentScore: 0.7, + highlightReason: 'macro_event', + tickers: ['AAPL'], + isRead: false, + isSaved: false, + }, + { + id: '2', + sourceId: 'source-2', + source: 'Source 2', + headline: 'Recent market item', + summary: 'Summary 2', + publishedAt: '2026-04-08T10:00:00Z', + publishedTs: 300, + fetchedAt: '2026-04-08T10:01:00Z', + sentiment: 'NEUTRAL', + sentimentScore: 0, + tickers: ['MSFT'], + isRead: false, + isSaved: false, + }, +]; + +describe('NewsPanel', () => { + it('renders highlights and recent sections for generic /news', () => { + const html = renderToStaticMarkup(); + + expect(html).toContain('Market News'); + expect(html).toContain('Highlights'); + expect(html).toContain('Recent'); + }); + + it('hides highlights when there are no highlighted articles', () => { + const html = renderToStaticMarkup( + ({ + ...article, + highlightReason: undefined, + }))} + />, + ); + + expect(html).not.toContain('Highlights'); + expect(html).toContain('Recent'); + }); + + it('renders ticker mode as a single chronological view', () => { + const html = renderToStaticMarkup(); + + expect(html).toContain('News: NVDA'); + expect(html).not.toContain('>Highlights<'); + expect(html).not.toContain('>Recent<'); + }); + + it('includes the ticker symbol in the empty state for ticker mode', () => { + const html = renderToStaticMarkup(); + + expect(html).toContain('No news articles found for NVDA.'); + }); +}); diff --git a/MosaicIQ/src/components/Panels/NewsPanel.tsx b/MosaicIQ/src/components/Panels/NewsPanel.tsx index 36328c8..c71e25f 100644 --- a/MosaicIQ/src/components/Panels/NewsPanel.tsx +++ b/MosaicIQ/src/components/Panels/NewsPanel.tsx @@ -1,98 +1,348 @@ -import React from 'react'; -import { NewsItem } from '../../types/financial'; +import React, { useEffect, useState } from 'react'; +import { openUrl } from '@tauri-apps/plugin-opener'; +import { newsBridge } from '../../lib/newsBridge'; +import { + buildNewsTickerCommand, + formatNewsRelativeTime, + highlightReasonLabel, + newsSentimentTone, + partitionNewsSummaryArticles, + sortNewsArticlesChronologically, +} from '../../news'; +import type { NewsArticle } from '../../types/news'; +import { SentimentBadge } from '../ui/SentimentBadge'; interface NewsPanelProps { - news: NewsItem[]; + news: NewsArticle[]; + ticker?: string; + onRunCommand?: (command: string) => void; +} + +interface NewsPanelHeaderProps { + title: string; + count: number; ticker?: string; } -const formatTime = (date: Date) => { - const now = new Date(); - const diff = now.getTime() - date.getTime(); - const hours = Math.floor(diff / (1000 * 60 * 60)); +interface NewsSectionProps { + title: string; + children: React.ReactNode; +} - if (hours < 1) { - const minutes = Math.floor(diff / (1000 * 60)); - return `${minutes}m ago`; - } - if (hours < 24) { - return `${hours}h ago`; - } - const days = Math.floor(hours / 24); - return `${days}d ago`; +interface NewsArticleCardProps { + article: NewsArticle; + isTickerMode: boolean; + isPending: boolean; + activeTicker?: string; + onOpenArticle: (article: NewsArticle) => Promise; + onMarkRead: (articleId: string) => Promise; + onToggleSaved: (articleId: string, isSaved: boolean) => Promise; + onRunCommand?: (command: string) => void; +} + +const NewsPanelHeader: React.FC = ({ title, count, ticker }) => ( +
+

{title}

+ {ticker ? ( + + {ticker} + + ) : null} + + {count} + +
+); + +const NewsSection: React.FC = ({ title, children }) => ( +
+
+

+ {title} +

+
+
+ {children} +
+); + +const NewsArticleCard: React.FC = ({ + article, + isTickerMode, + isPending, + activeTicker, + onOpenArticle, + onMarkRead, + onToggleSaved, + onRunCommand, +}) => { + const summaryClassName = isTickerMode ? 'line-clamp-3' : 'line-clamp-2'; + const visibleTickers = article.tickers.slice(0, isTickerMode ? article.tickers.length : 3); + + const runTickerCommand = (articleTicker: string) => { + if (!onRunCommand) { + return; + } + + if (activeTicker && articleTicker.toUpperCase() === activeTicker.toUpperCase()) { + return; + } + + onRunCommand(buildNewsTickerCommand(articleTicker)); + }; + + return ( +
+
+ + {article.source} + + + {formatNewsRelativeTime(article.publishedAt)} + + + {article.highlightReason ? ( + + {highlightReasonLabel(article.highlightReason)} + + ) : null} +
+ + {article.url ? ( + + ) : ( +

+ {article.headline} +

+ )} + + {article.summary ? ( +

+ {article.summary} +

+ ) : null} + +
+ {visibleTickers.map((articleTicker) => { + const isActiveTicker = + activeTicker && + articleTicker.toUpperCase() === activeTicker.toUpperCase(); + + return onRunCommand ? ( + + ) : ( + + {articleTicker} + + ); + })} + +
+ + + {article.url ? ( + + ) : null} +
+
+
+ ); }; -export const NewsPanel: React.FC = ({ news, ticker }) => { +export const NewsPanel: React.FC = ({ news, ticker, onRunCommand }) => { + const [articles, setArticles] = useState(news); + const [pendingArticleIds, setPendingArticleIds] = useState>({}); + const normalizedTicker = ticker?.trim().toUpperCase(); + const isTickerMode = Boolean(normalizedTicker); + + useEffect(() => { + setArticles(news); + }, [news]); + + const sortedArticles = sortNewsArticlesChronologically(articles); + const { highlights, recent } = partitionNewsSummaryArticles(sortedArticles); + + const setPending = (articleId: string, pending: boolean) => { + setPendingArticleIds((current) => ({ + ...current, + [articleId]: pending, + })); + }; + + const replaceArticle = (articleId: string, updater: (article: NewsArticle) => NewsArticle) => { + setArticles((current) => + current.map((article) => (article.id === articleId ? updater(article) : article)), + ); + }; + + const toggleSaved = async (articleId: string, isSaved: boolean) => { + const previousArticle = articles.find((article) => article.id === articleId); + if (!previousArticle) { + return; + } + + setPending(articleId, true); + replaceArticle(articleId, (article) => ({ ...article, isSaved })); + try { + await newsBridge.updateNewsArticleState({ articleId, isSaved }); + } catch { + replaceArticle(articleId, () => previousArticle); + } finally { + setPending(articleId, false); + } + }; + + const markRead = async (articleId: string) => { + const previousArticle = articles.find((article) => article.id === articleId); + if (!previousArticle || previousArticle.isRead) { + return; + } + + setPending(articleId, true); + replaceArticle(articleId, (article) => ({ ...article, isRead: true })); + try { + await newsBridge.updateNewsArticleState({ articleId, isRead: true }); + } catch { + replaceArticle(articleId, () => previousArticle); + } finally { + setPending(articleId, false); + } + }; + + const openArticle = async (article: NewsArticle) => { + if (!article.url) { + return; + } + + await markRead(article.id); + await openUrl(article.url); + }; + + const renderArticleCard = (article: NewsArticle) => ( + + ); + + if (sortedArticles.length === 0) { + return ( +
+ +
+

+ {isTickerMode + ? `No news articles found for ${normalizedTicker}.` + : 'No news articles found.'} +

+ {!isTickerMode ? ( +

+ Try again after the next background refresh. +

+ ) : null} +
+
+ ); + } + + if (isTickerMode) { + return ( +
+ +
{sortedArticles.map(renderArticleCard)}
+
+ ); + } + return ( -
- {/* Header - Inline with badges */} -
-

- {ticker ? `News: ${ticker.toUpperCase()}` : 'Market News'} -

- {ticker && ( - - {ticker} - - )} - - {news.length} - -
- - {/* News List - Minimal dividers */} - {news.length > 0 ? ( -
- {news.map((item, idx) => ( -
- {idx > 0 &&
} - - {/* Source & Time */} -
- - {item.source} - - - {formatTime(item.timestamp)} - -
- - {/* Headline */} -

- {item.headline} -

- - {/* Snippet */} -

- {item.snippet} -

- - {/* Related Tickers */} - {item.relatedTickers.length > 0 && ( -
- {item.relatedTickers.map((ticker) => ( - - {ticker} - - ))} -
- )} -
- ))} -
- ) : ( - /* Empty State */ -
-
📰
-

No news articles found

-
- )} +
+ +
+ {highlights.length > 0 ? ( + +
{highlights.map(renderArticleCard)}
+
+ ) : null} + {recent.length > 0 ? ( + +
{recent.map(renderArticleCard)}
+
+ ) : null} +
); }; diff --git a/MosaicIQ/src/components/Terminal/TerminalOutput.tsx b/MosaicIQ/src/components/Terminal/TerminalOutput.tsx index 3ae5df1..e7400b3 100644 --- a/MosaicIQ/src/components/Terminal/TerminalOutput.tsx +++ b/MosaicIQ/src/components/Terminal/TerminalOutput.tsx @@ -265,7 +265,13 @@ export const TerminalOutput: React.FC = ({ /> ); case 'news': - return ; + return ( + + ); case 'analysis': return ; case 'financials': @@ -284,13 +290,13 @@ export const TerminalOutput: React.FC = ({ return (
-
+
{history.map((entry) => (
{ + it('merges filter updates without dropping existing values', () => { + const state = createNewsFeedState({ ticker: 'NVDA', limit: 20 }); + + const nextState = newsFeedReducer(state, { + type: 'filters_merged', + filters: { onlyHighlighted: true }, + }); + + expect(nextState.filters).toEqual({ + ticker: 'NVDA', + limit: 20, + onlyHighlighted: true, + offset: 0, + }); + }); + + it('stores query results and clears loading flags', () => { + const state = newsFeedReducer(createNewsFeedState(), { + type: 'load_started', + refresh: false, + }); + + const nextState = newsFeedReducer(state, { + type: 'load_succeeded', + articles: [sampleArticle], + total: 1, + lastSyncedAt: '2026-04-08T10:05:00Z', + sources: [], + }); + + expect(nextState.isLoading).toBe(false); + expect(nextState.total).toBe(1); + expect(nextState.articles[0]?.id).toBe('article-1'); + }); + + it('applies optimistic article state updates', () => { + const state = newsFeedReducer(createNewsFeedState(), { + type: 'load_succeeded', + articles: [sampleArticle], + total: 1, + lastSyncedAt: '2026-04-08T10:05:00Z', + sources: [], + }); + + const nextState = newsFeedReducer(state, { + type: 'article_updated', + articleId: 'article-1', + patch: { isSaved: true, isRead: true }, + }); + + expect(nextState.articles[0]?.isSaved).toBe(true); + expect(nextState.articles[0]?.isRead).toBe(true); + }); +}); diff --git a/MosaicIQ/src/hooks/useNewsFeed.ts b/MosaicIQ/src/hooks/useNewsFeed.ts new file mode 100644 index 0000000..111e498 --- /dev/null +++ b/MosaicIQ/src/hooks/useNewsFeed.ts @@ -0,0 +1,214 @@ +import { + startTransition, + useDeferredValue, + useEffect, + useEffectEvent, + useReducer, +} from 'react'; +import { newsBridge } from '../lib/newsBridge'; +import type { + NewsArticle, + NewsSourceStatus, + QueryNewsFeedRequest, + RefreshNewsFeedResult, +} from '../types/news'; + +export interface NewsFeedState { + articles: NewsArticle[]; + total: number; + lastSyncedAt?: string; + sources: NewsSourceStatus[]; + filters: QueryNewsFeedRequest; + isLoading: boolean; + isRefreshing: boolean; + error?: string; +} + +type NewsFeedAction = + | { type: 'load_started'; refresh: boolean } + | { + type: 'load_succeeded'; + articles: NewsArticle[]; + total: number; + lastSyncedAt?: string; + sources: NewsSourceStatus[]; + } + | { type: 'load_failed'; error: string } + | { type: 'filters_merged'; filters: QueryNewsFeedRequest } + | { type: 'article_updated'; articleId: string; patch: Partial }; + +export const createNewsFeedState = ( + filters: QueryNewsFeedRequest = {}, +): NewsFeedState => ({ + articles: [], + total: 0, + lastSyncedAt: undefined, + sources: [], + filters, + isLoading: true, + isRefreshing: false, + error: undefined, +}); + +export const newsFeedReducer = ( + state: NewsFeedState, + action: NewsFeedAction, +): NewsFeedState => { + switch (action.type) { + case 'load_started': + return { + ...state, + isLoading: !action.refresh, + isRefreshing: action.refresh, + error: undefined, + }; + case 'load_succeeded': + return { + ...state, + articles: action.articles, + total: action.total, + lastSyncedAt: action.lastSyncedAt, + sources: action.sources, + isLoading: false, + isRefreshing: false, + error: undefined, + }; + case 'load_failed': + return { + ...state, + isLoading: false, + isRefreshing: false, + error: action.error, + }; + case 'filters_merged': + return { + ...state, + filters: { + ...state.filters, + ...action.filters, + offset: action.filters.offset ?? state.filters.offset ?? 0, + }, + }; + case 'article_updated': + return { + ...state, + articles: state.articles.map((article) => + article.id === action.articleId ? { ...article, ...action.patch } : article, + ), + }; + default: + return state; + } +}; + +export const useNewsFeed = (initialFilters: QueryNewsFeedRequest = {}) => { + const [state, dispatch] = useReducer( + newsFeedReducer, + initialFilters, + createNewsFeedState, + ); + const deferredSearch = useDeferredValue(state.filters.search); + const deferredFilters = { + ...state.filters, + search: deferredSearch, + }; + + const loadFeed = useEffectEvent(async (refresh: boolean) => { + dispatch({ type: 'load_started', refresh }); + try { + const response = await newsBridge.queryNewsFeed(deferredFilters); + dispatch({ + type: 'load_succeeded', + articles: response.articles, + total: response.total, + lastSyncedAt: response.lastSyncedAt, + sources: response.sources, + }); + } catch (error) { + dispatch({ + type: 'load_failed', + error: error instanceof Error ? error.message : String(error), + }); + } + }); + + useEffect(() => { + void loadFeed(false); + }, [ + deferredFilters.limit, + deferredFilters.offset, + deferredFilters.onlyHighlighted, + deferredFilters.onlySaved, + deferredFilters.onlyUnread, + deferredFilters.search, + deferredFilters.ticker, + loadFeed, + ]); + + useEffect(() => { + let disposed = false; + let unlisten: (() => void) | undefined; + + void newsBridge.listenForUpdates(() => { + if (!disposed) { + void loadFeed(false); + } + }).then((listener) => { + unlisten = listener; + }); + + return () => { + disposed = true; + unlisten?.(); + }; + }, [loadFeed]); + + const setFilters = (filters: QueryNewsFeedRequest) => { + startTransition(() => { + dispatch({ type: 'filters_merged', filters }); + }); + }; + + const refresh = async (force = false): Promise => { + dispatch({ type: 'load_started', refresh: true }); + try { + const result = await newsBridge.refreshNewsFeed({ force }); + await loadFeed(true); + return result; + } catch (error) { + dispatch({ + type: 'load_failed', + error: error instanceof Error ? error.message : String(error), + }); + throw error; + } + }; + + const toggleSaved = async (articleId: string, isSaved: boolean) => { + dispatch({ type: 'article_updated', articleId, patch: { isSaved } }); + try { + await newsBridge.updateNewsArticleState({ articleId, isSaved }); + } catch (error) { + await loadFeed(false); + throw error; + } + }; + + const markRead = async (articleId: string, isRead = true) => { + dispatch({ type: 'article_updated', articleId, patch: { isRead } }); + try { + await newsBridge.updateNewsArticleState({ articleId, isRead }); + } catch (error) { + await loadFeed(false); + throw error; + } + }; + + return { + ...state, + refresh, + setFilters, + toggleSaved, + markRead, + }; +}; diff --git a/MosaicIQ/src/hooks/usePortfolioWorkflow.ts b/MosaicIQ/src/hooks/usePortfolioWorkflow.ts index acb996b..fee74c3 100644 --- a/MosaicIQ/src/hooks/usePortfolioWorkflow.ts +++ b/MosaicIQ/src/hooks/usePortfolioWorkflow.ts @@ -4,7 +4,7 @@ import { PortfolioAction, PortfolioActionDraft, PortfolioActionSeed, - ResolvedTerminalCommandResponse, + TerminalCommandResponse, } from '../types/terminal'; export type PortfolioSnapshotStatus = 'idle' | 'loading' | 'ready' | 'error'; @@ -123,7 +123,7 @@ export const usePortfolioWorkflow = () => { ( workspaceId: string, command: string, - response: ResolvedTerminalCommandResponse, + response: TerminalCommandResponse, ) => { const action = commandToPortfolioAction(command); if (!action) { diff --git a/MosaicIQ/src/hooks/useTerminalOrchestrator.ts b/MosaicIQ/src/hooks/useTerminalOrchestrator.ts index 0ac0e84..cdb8ac1 100644 --- a/MosaicIQ/src/hooks/useTerminalOrchestrator.ts +++ b/MosaicIQ/src/hooks/useTerminalOrchestrator.ts @@ -15,7 +15,7 @@ import { PortfolioAction, PortfolioActionDraft, PortfolioActionSeed, - ResolvedTerminalCommandResponse, + TerminalCommandResponse, } from '../types/terminal'; type AppView = 'terminal' | 'settings'; @@ -67,7 +67,7 @@ export const useTerminalOrchestrator = ({ ( workspaceId: string, command: string | undefined, - response: ResolvedTerminalCommandResponse, + response: TerminalCommandResponse, ) => { tabs.appendWorkspaceEntry( workspaceId, @@ -196,7 +196,7 @@ export const useTerminalOrchestrator = ({ const processStreamItem = ( event: Omit & { - response?: ResolvedTerminalCommandResponse; + response?: TerminalCommandResponse; }, ) => { if (event.sequence <= lastSequenceSeen) { diff --git a/MosaicIQ/src/lib/chatPanelContext.ts b/MosaicIQ/src/lib/chatPanelContext.ts index 86ea3e1..c85afe8 100644 --- a/MosaicIQ/src/lib/chatPanelContext.ts +++ b/MosaicIQ/src/lib/chatPanelContext.ts @@ -1,23 +1,4 @@ -import { - ChatPanelContext, - PanelPayload, - TerminalEntry, - TransportPanelPayload, -} from '../types/terminal'; - -const toTransportPanelPayload = (panel: PanelPayload): TransportPanelPayload => { - if (panel.type !== 'news') { - return panel; - } - - return { - ...panel, - data: panel.data.map((item) => ({ - ...item, - timestamp: item.timestamp.toISOString(), - })), - }; -}; +import { ChatPanelContext, TerminalEntry } from '../types/terminal'; export const extractChatPanelContext = ( history: TerminalEntry[], @@ -44,7 +25,7 @@ export const extractChatPanelContext = ( return { sourceCommand, capturedAt: entry.timestamp?.toISOString(), - panel: toTransportPanelPayload(entry.content), + panel: entry.content, }; } diff --git a/MosaicIQ/src/lib/newsBridge.test.ts b/MosaicIQ/src/lib/newsBridge.test.ts new file mode 100644 index 0000000..10f8c15 --- /dev/null +++ b/MosaicIQ/src/lib/newsBridge.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it, mock } from 'bun:test'; +import { createNewsBridge } from './newsBridge'; + +describe('createNewsBridge', () => { + it('passes request payloads to the expected Tauri commands', async () => { + const invoke = mock(async ( + command: string, + args?: Record, + ): Promise => { + if (command === 'query_news_feed') { + return { + articles: [], + total: 0, + sources: [], + } as T; + } + + if (command === 'refresh_news_feed') { + return { + feedsChecked: 1, + feedsSucceeded: 1, + feedsFailed: 0, + newArticles: 0, + updatedArticles: 0, + unchangedArticles: 0, + finishedAt: '2026-04-08T10:00:00Z', + } as T; + } + + expect(command).toBe('update_news_article_state'); + expect(args).toEqual({ + request: { + articleId: 'article-1', + isSaved: true, + }, + }); + return undefined as T; + }); + const bridge = createNewsBridge(invoke); + + const query = await bridge.queryNewsFeed({ ticker: 'NVDA', limit: 20 }); + const refresh = await bridge.refreshNewsFeed({ force: true }); + await bridge.updateNewsArticleState({ articleId: 'article-1', isSaved: true }); + + expect(invoke).toHaveBeenNthCalledWith(1, 'query_news_feed', { + request: { ticker: 'NVDA', limit: 20 }, + }); + expect(invoke).toHaveBeenNthCalledWith(2, 'refresh_news_feed', { + request: { force: true }, + }); + expect(query.total).toBe(0); + expect(refresh.feedsChecked).toBe(1); + }); + + it('subscribes to news updates with the expected event name', async () => { + const listen = mock( + async ( + _event: string, + handler: (event: { payload: T }) => void, + ): Promise<() => void> => { + handler({ + payload: { + feedsChecked: 1, + feedsSucceeded: 1, + feedsFailed: 0, + newArticles: 2, + updatedArticles: 1, + unchangedArticles: 0, + finishedAt: '2026-04-08T10:00:00Z', + } as T, + }); + return () => {}; + }, + ); + const handler = mock(() => {}); + const bridge = createNewsBridge(async () => undefined as never, listen); + + await bridge.listenForUpdates(handler); + + expect(listen).toHaveBeenCalledWith('news_feed_updated', expect.any(Function)); + expect(handler).toHaveBeenCalledWith({ + feedsChecked: 1, + feedsSucceeded: 1, + feedsFailed: 0, + newArticles: 2, + updatedArticles: 1, + unchangedArticles: 0, + finishedAt: '2026-04-08T10:00:00Z', + }); + }); +}); diff --git a/MosaicIQ/src/lib/newsBridge.ts b/MosaicIQ/src/lib/newsBridge.ts new file mode 100644 index 0000000..9934afb --- /dev/null +++ b/MosaicIQ/src/lib/newsBridge.ts @@ -0,0 +1,49 @@ +import { invoke } from '@tauri-apps/api/core'; +import { listen, type UnlistenFn } from '@tauri-apps/api/event'; +import type { + QueryNewsFeedRequest, + QueryNewsFeedResponse, + RefreshNewsFeedRequest, + RefreshNewsFeedResult, + UpdateNewsArticleStateRequest, +} from '../types/news'; + +type Invoker = (command: string, args?: Record) => Promise; +type Listener = ( + event: string, + handler: (event: { payload: T }) => void, +) => Promise; + +export interface NewsBridge { + queryNewsFeed(request: QueryNewsFeedRequest): Promise; + refreshNewsFeed(request?: RefreshNewsFeedRequest): Promise; + updateNewsArticleState(request: UpdateNewsArticleStateRequest): Promise; + listenForUpdates( + handler: (payload: RefreshNewsFeedResult) => void, + ): Promise; +} + +export const createNewsBridge = ( + invoker: Invoker = invoke, + listener: Listener = listen, +): NewsBridge => ({ + queryNewsFeed(request) { + return invoker('query_news_feed', { request }); + }, + + refreshNewsFeed(request = {}) { + return invoker('refresh_news_feed', { request }); + }, + + updateNewsArticleState(request) { + return invoker('update_news_article_state', { request }); + }, + + listenForUpdates(handler) { + return listener('news_feed_updated', (event) => + handler(event.payload), + ); + }, +}); + +export const newsBridge = createNewsBridge(); diff --git a/MosaicIQ/src/lib/terminalBridge.ts b/MosaicIQ/src/lib/terminalBridge.ts index 3f865d9..12d7157 100644 --- a/MosaicIQ/src/lib/terminalBridge.ts +++ b/MosaicIQ/src/lib/terminalBridge.ts @@ -1,24 +1,20 @@ import { invoke } from '@tauri-apps/api/core'; import { listen, type UnlistenFn } from '@tauri-apps/api/event'; -import { NewsItem } from '../types/financial'; import { AgentStreamItemEvent, ChatStreamStart, LookupCompanyRequest, ExecuteTerminalCommandRequest, - PanelPayload, ResolveAgentToolApprovalRequest, - ResolvedTerminalCommandResponse, StartChatStreamRequest, TerminalCommandResponse, - TransportPanelPayload, } from '../types/terminal'; import { Company } from '../types/financial'; interface StreamCallbacks { workspaceId: string; onStreamItem: (event: Omit & { - response?: ResolvedTerminalCommandResponse; + response?: TerminalCommandResponse; }) => void; } @@ -30,36 +26,6 @@ const createRequestId = (): string => { return `request-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`; }; -const deserializePanelPayload = (payload: TransportPanelPayload): PanelPayload => { - if (payload.type !== 'news') { - return payload; - } - - // News timestamps cross the Tauri boundary as strings and are rehydrated here for panel rendering. - return { - ...payload, - data: payload.data.map( - (item): NewsItem => ({ - ...item, - timestamp: new Date(item.timestamp), - }), - ), - }; -}; - -const deserializeTerminalCommandResponse = ( - response: TerminalCommandResponse, -): ResolvedTerminalCommandResponse => { - if (response.kind === 'text') { - return response; - } - - return { - kind: 'panel', - panel: deserializePanelPayload(response.panel), - }; -}; - class TerminalBridge { private listenersReady: Promise | null = null; private unlistenFns: UnlistenFn[] = []; @@ -78,9 +44,7 @@ class TerminalBridge { } callbacks.onStreamItem({ ...event.payload, - response: event.payload.response - ? deserializeTerminalCommandResponse(event.payload.response) - : undefined, + response: event.payload.response, }); if ( event.payload.kind === 'stream_complete' || @@ -98,16 +62,10 @@ class TerminalBridge { async executeTerminalCommand( request: ExecuteTerminalCommandRequest, - ): Promise { - const response = await invoke('execute_terminal_command', { + ): Promise { + return invoke('execute_terminal_command', { request, }); - - if (response.kind === 'text') { - return response; - } - - return deserializeTerminalCommandResponse(response); } async lookupCompany(request: LookupCompanyRequest): Promise { diff --git a/MosaicIQ/src/lib/tickerHistory.ts b/MosaicIQ/src/lib/tickerHistory.ts index 10e8e4f..87c773b 100644 --- a/MosaicIQ/src/lib/tickerHistory.ts +++ b/MosaicIQ/src/lib/tickerHistory.ts @@ -1,6 +1,6 @@ import { Company } from '../types/financial'; import { - ResolvedTerminalCommandResponse, + TerminalCommandResponse, TickerHistorySnapshot, } from '../types/terminal'; @@ -9,7 +9,7 @@ const TICKER_REQUIRED_COMMANDS = new Set(['/fa', '/cf', '/dvd', '/em', '/analyze const FREQUENCY_OPTION_COMMANDS = new Set(['/fa', '/cf', '/em']); export const extractTickerSymbolFromResponse = ( - response: ResolvedTerminalCommandResponse, + response: TerminalCommandResponse, ): string | null => { if (response.kind !== 'panel') { return null; diff --git a/MosaicIQ/src/news/index.test.ts b/MosaicIQ/src/news/index.test.ts new file mode 100644 index 0000000..e71db9e --- /dev/null +++ b/MosaicIQ/src/news/index.test.ts @@ -0,0 +1,102 @@ +import { describe, expect, it } from 'bun:test'; +import { + buildNewsTickerCommand, + formatNewsRelativeTime, + highlightReasonLabel, + newsSentimentLabel, + newsSentimentTone, + partitionNewsSummaryArticles, + sortNewsArticlesChronologically, +} from './index'; +import type { NewsArticle } from '../types/news'; + +const sampleArticles: NewsArticle[] = [ + { + id: '3', + sourceId: 'source-1', + source: 'Source 1', + headline: 'Recent neutral item', + summary: 'Summary 3', + publishedAt: '2026-04-08T10:00:00Z', + publishedTs: 300, + fetchedAt: '2026-04-08T10:01:00Z', + sentiment: 'NEUTRAL', + sentimentScore: 0, + tickers: ['SPY'], + isRead: false, + isSaved: false, + }, + { + id: '2', + sourceId: 'source-2', + source: 'Source 2', + headline: 'Older highlighted item', + summary: 'Summary 2', + publishedAt: '2026-04-08T09:00:00Z', + publishedTs: 200, + fetchedAt: '2026-04-08T09:01:00Z', + sentiment: 'BULL', + sentimentScore: 0.7, + highlightReason: 'macro_event', + tickers: ['AAPL'], + isRead: false, + isSaved: false, + }, + { + id: '1', + sourceId: 'source-3', + source: 'Source 3', + headline: 'Newest highlighted item', + summary: 'Summary 1', + publishedAt: '2026-04-08T11:00:00Z', + publishedTs: 400, + fetchedAt: '2026-04-08T11:01:00Z', + sentiment: 'BEAR', + sentimentScore: -0.7, + highlightReason: 'strong_sentiment', + tickers: ['NVDA'], + isRead: false, + isSaved: false, + }, +]; + +describe('news formatting helpers', () => { + it('formats recent and older relative times', () => { + expect( + formatNewsRelativeTime('2026-04-08T09:45:00Z', new Date('2026-04-08T10:00:00Z')), + ).toBe('15m ago'); + expect( + formatNewsRelativeTime('2026-04-07T10:00:00Z', new Date('2026-04-08T10:00:00Z')), + ).toBe('1d ago'); + }); + + it('maps sentiment values to terminal badge tone and label', () => { + expect(newsSentimentTone('BULL')).toBe('bullish'); + expect(newsSentimentTone('BEAR')).toBe('bearish'); + expect(newsSentimentLabel('NEUTRAL')).toBe('Neutral'); + }); + + it('maps highlight reasons to concise labels', () => { + expect(highlightReasonLabel('macro_event')).toBe('Macro'); + expect(highlightReasonLabel('recent_high_value')).toBe('Fresh'); + }); + + it('sorts articles in reverse chronological order', () => { + expect(sortNewsArticlesChronologically(sampleArticles).map((article) => article.id)).toEqual([ + '1', + '3', + '2', + ]); + }); + + it('partitions summary articles without duplicating highlighted items', () => { + const { highlights, recent } = partitionNewsSummaryArticles(sampleArticles); + + expect(highlights.map((article) => article.id)).toEqual(['1', '2']); + expect(recent.map((article) => article.id)).toEqual(['3']); + }); + + it('builds ticker commands from chip values', () => { + expect(buildNewsTickerCommand(' nvda ')).toBe('/news NVDA'); + }); +}); diff --git a/MosaicIQ/src/news/index.ts b/MosaicIQ/src/news/index.ts new file mode 100644 index 0000000..a5e55e5 --- /dev/null +++ b/MosaicIQ/src/news/index.ts @@ -0,0 +1,114 @@ +export { useNewsFeed, newsFeedReducer, createNewsFeedState } from '../hooks/useNewsFeed'; +export type { + HighlightReason, + NewsArticle, + NewsSentiment, + NewsSourceStatus, + QueryNewsFeedRequest, + QueryNewsFeedResponse, + RefreshNewsFeedRequest, + RefreshNewsFeedResult, + UpdateNewsArticleStateRequest, +} from '../types/news'; + +import type { + HighlightReason, + NewsArticle, + NewsSentiment, +} from '../types/news'; + +const compareNewsArticles = (left: NewsArticle, right: NewsArticle) => { + if (right.publishedTs !== left.publishedTs) { + return right.publishedTs - left.publishedTs; + } + + if (left.highlightReason && !right.highlightReason) { + return -1; + } + + if (!left.highlightReason && right.highlightReason) { + return 1; + } + + return left.id.localeCompare(right.id); +}; + +export const sortNewsArticlesChronologically = (articles: NewsArticle[]) => + [...articles].sort(compareNewsArticles); + +export const partitionNewsSummaryArticles = ( + articles: NewsArticle[], +): { highlights: NewsArticle[]; recent: NewsArticle[] } => { + const sortedArticles = sortNewsArticlesChronologically(articles); + const highlights = sortedArticles + .filter((article) => article.highlightReason) + .slice(0, 4); + const highlightIds = new Set(highlights.map((article) => article.id)); + const recent = sortedArticles + .filter((article) => !highlightIds.has(article.id)) + .slice(0, 6); + + return { highlights, recent }; +}; + +export const formatNewsRelativeTime = (publishedAt: string, now = new Date()) => { + const publishedDate = new Date(publishedAt); + const diffMs = Math.max(0, now.getTime() - publishedDate.getTime()); + const minutes = Math.floor(diffMs / (1000 * 60)); + + if (minutes < 1) { + return 'just now'; + } + if (minutes < 60) { + return `${minutes}m ago`; + } + + const hours = Math.floor(minutes / 60); + if (hours < 24) { + return `${hours}h ago`; + } + + const days = Math.floor(hours / 24); + return `${days}d ago`; +}; + +export const newsSentimentLabel = (sentiment: NewsSentiment) => { + switch (sentiment) { + case 'BULL': + return 'Bullish'; + case 'BEAR': + return 'Bearish'; + default: + return 'Neutral'; + } +}; + +export const highlightReasonLabel = (reason?: HighlightReason) => { + switch (reason) { + case 'breaking_keyword': + return 'Breaking'; + case 'macro_event': + return 'Macro'; + case 'strong_sentiment': + return 'High Conviction'; + case 'ticker_detected': + return 'Ticker'; + case 'recent_high_value': + return 'Fresh'; + default: + return 'Standard'; + } +}; + +export const newsSentimentTone = (sentiment: NewsSentiment) => { + switch (sentiment) { + case 'BULL': + return 'bullish'; + case 'BEAR': + return 'bearish'; + default: + return 'neutral'; + } +}; + +export const buildNewsTickerCommand = (ticker: string) => `/news ${ticker.trim().toUpperCase()}`; diff --git a/MosaicIQ/src/types/financial.ts b/MosaicIQ/src/types/financial.ts index ff6d42d..9ecbd8f 100644 --- a/MosaicIQ/src/types/financial.ts +++ b/MosaicIQ/src/types/financial.ts @@ -73,16 +73,6 @@ export interface Portfolio { stalePricingSymbols?: string[]; } -export interface NewsItem { - id: string; - source: string; - headline: string; - timestamp: Date; - snippet: string; - url?: string; - relatedTickers: string[]; -} - export interface StockAnalysis { symbol: string; summary: string; @@ -94,14 +84,9 @@ export interface StockAnalysis { targetPrice?: number; } -export interface SerializedNewsItem extends Omit { - timestamp: string; -} - export interface MockFinancialData { companies: Company[]; portfolio: Portfolio; - newsItems: SerializedNewsItem[]; analyses: Record; } diff --git a/MosaicIQ/src/types/news.ts b/MosaicIQ/src/types/news.ts new file mode 100644 index 0000000..ff4ab0d --- /dev/null +++ b/MosaicIQ/src/types/news.ts @@ -0,0 +1,75 @@ +export type NewsSentiment = 'BULL' | 'BEAR' | 'NEUTRAL'; + +export type HighlightReason = + | 'breaking_keyword' + | 'macro_event' + | 'strong_sentiment' + | 'ticker_detected' + | 'recent_high_value'; + +export interface NewsArticle { + id: string; + sourceId: string; + source: string; + headline: string; + summary: string; + url?: string; + canonicalUrl?: string; + publishedAt: string; + publishedTs: number; + fetchedAt: string; + sentiment: NewsSentiment; + sentimentScore: number; + highlightReason?: HighlightReason; + tickers: string[]; + isRead: boolean; + isSaved: boolean; +} + +export interface QueryNewsFeedRequest { + ticker?: string; + search?: string; + onlyHighlighted?: boolean; + onlySaved?: boolean; + onlyUnread?: boolean; + limit?: number; + offset?: number; +} + +export interface NewsSourceStatus { + id: string; + name: string; + url: string; + refreshMinutes: number; + lastCheckedAt?: string; + lastSuccessAt?: string; + lastError?: string; + failureCount: number; +} + +export interface QueryNewsFeedResponse { + articles: NewsArticle[]; + total: number; + lastSyncedAt?: string; + sources: NewsSourceStatus[]; +} + +export interface RefreshNewsFeedRequest { + force?: boolean; +} + +export interface RefreshNewsFeedResult { + feedsChecked: number; + feedsSucceeded: number; + feedsFailed: number; + newArticles: number; + updatedArticles: number; + unchangedArticles: number; + finishedAt: string; +} + +export interface UpdateNewsArticleStateRequest { + articleId: string; + isRead?: boolean; + isSaved?: boolean; +} diff --git a/MosaicIQ/src/types/terminal.ts b/MosaicIQ/src/types/terminal.ts index 3c46700..9ca30c6 100644 --- a/MosaicIQ/src/types/terminal.ts +++ b/MosaicIQ/src/types/terminal.ts @@ -4,29 +4,17 @@ import { DividendsPanelData, EarningsPanelData, FinancialsPanelData, - NewsItem, Portfolio, - SerializedNewsItem, StockAnalysis, } from './financial'; import { TaskProfile } from './agentSettings'; +import { NewsArticle } from './news'; export type PanelPayload = | { type: 'company'; data: Company } | { type: 'error'; data: ErrorPanel } | { type: 'portfolio'; data: Portfolio } - | { type: 'news'; data: NewsItem[]; ticker?: string } - | { type: 'analysis'; data: StockAnalysis } - | { type: 'financials'; data: FinancialsPanelData } - | { type: 'cashFlow'; data: CashFlowPanelData } - | { type: 'dividends'; data: DividendsPanelData } - | { type: 'earnings'; data: EarningsPanelData }; - -export type TransportPanelPayload = - | { type: 'company'; data: Company } - | { type: 'error'; data: ErrorPanel } - | { type: 'portfolio'; data: Portfolio } - | { type: 'news'; data: SerializedNewsItem[]; ticker?: string } + | { type: 'news'; data: NewsArticle[]; ticker?: string } | { type: 'analysis'; data: StockAnalysis } | { type: 'financials'; data: FinancialsPanelData } | { type: 'cashFlow'; data: CashFlowPanelData } @@ -34,17 +22,13 @@ export type TransportPanelPayload = | { type: 'earnings'; data: EarningsPanelData }; export type TerminalCommandResponse = - | { kind: 'text'; content: string; portfolio?: Portfolio } - | { kind: 'panel'; panel: TransportPanelPayload }; - -export type ResolvedTerminalCommandResponse = | { kind: 'text'; content: string; portfolio?: Portfolio } | { kind: 'panel'; panel: PanelPayload }; export interface ChatPanelContext { sourceCommand?: string; capturedAt?: string; - panel: TransportPanelPayload; + panel: PanelPayload; } export interface ExecuteTerminalCommandRequest {