# news_sentiment.py # pip install gnews nltk rapidfuzz from __future__ import annotations from datetime import datetime, timezone import time from typing import List, Dict, Any from gnews import GNews from rapidfuzz import fuzz from nltk.sentiment import SentimentIntensityAnalyzer import nltk # Ensure VADER is available (safe to call multiple times) try: nltk.data.find("sentiment/vader_lexicon.zip") except LookupError: nltk.download("vader_lexicon") # Keep one analyzer instance _SIA = SentimentIntensityAnalyzer() def _sentiment_label(compound: float) -> str: if compound > 0.05: return "Positive" elif compound < -0.05: return "Negative" return "Neutral" def _is_similar(title: str, seen_titles: List[str], threshold: int = 60) -> bool: for t in seen_titles: if fuzz.ratio(title, t) > threshold: return True return False def get_latest_news_with_sentiment( query: str, *, period: str = "1d", max_results: int = 20, language: str = "en", country: str = "US", retries: int = 3, backoff_seconds: int = 3 ) -> Dict[str, Any]: seen_titles: List[str] = [] results = [] for attempt in range(retries): try: g = GNews(language=language, country=country, period=period, max_results=max_results) results = g.get_news(query) or [] if results: break except Exception as e: print(f"[Attempt {attempt+1}] GNews error: {e}") time.sleep(backoff_seconds * (attempt + 1)) if not results: return {"overall_news_score": 0.0, "count": 0, "items": []} items: List[Dict[str, Any]] = [] total_compound = 0.0 for art in results: title = (art.get("title") or "").strip() if not title: continue if _is_similar(title, seen_titles, threshold=60): continue seen_titles.append(title) url = (art.get("url") or art.get("link") or art.get("source", {}).get("url") or "") published_raw = (art.get("published date") or art.get("publishedDate") or art.get("datetime") or "") if isinstance(published_raw, datetime): if published_raw.tzinfo is None: published_raw = published_raw.replace(tzinfo=timezone.utc) published = published_raw.strftime("%Y-%m-%d %H:%M") else: published = str(published_raw) compound = _SIA.polarity_scores(title)["compound"] items.append({ "title": title, "url": url, "published": published, "sentiment": _sentiment_label(compound), "compound": round(compound, 3), }) total_compound += compound n = len(items) overall = round(((total_compound / n) + 1) * 2.5, 2) if n else 0.0 return {"overall_news_score": overall, "count": n, "items": items}