import os import sys import json from datetime import datetime # Ensure we can find the 'src' module from the root sys.path.append(os.getcwd()) try: from src.utils.utils import ( scrape_facebook, scrape_twitter, scrape_local_news, scrape_reddit, scrape_government_gazette, scrape_cse_stock_data, tool_weather_nowcast, tool_dmc_alerts, scrape_linkedin, scrape_instagram, ) print("✅ Libraries loaded successfully.\n") except ImportError as e: print(f"❌ Error loading libraries: {e}") print("Make sure you are running this from the 'Roger-Final' folder.") sys.exit(1) def print_separator(char="=", length=70): print(char * length) def print_header(text): print_separator() print(f" {text}") print_separator() def run_test(name, func, description="", **kwargs): print(f"\n🔍 Testing: {name}") if description: print(f" {description}") print("-" * 70) start_time = datetime.now() try: # Check if it's a LangChain tool (needs .invoke) if hasattr(func, "invoke"): res = func.invoke(kwargs) else: res = func(**kwargs) elapsed = (datetime.now() - start_time).total_seconds() # Try to print pretty JSON try: parsed = json.loads(res) # Custom formatting for better readability if isinstance(parsed, dict): if "results" in parsed: print(f"\n✅ Success! Found {len(parsed.get('results', []))} results in {elapsed:.2f}s") print(f"\nSample Results:") for i, item in enumerate(parsed['results'][:3], 1): print(f"\n [{i}] {item.get('title', 'No title')}") if 'snippet' in item: snippet = item['snippet'][:150] + "..." if len(item['snippet']) > 150 else item['snippet'] print(f" {snippet}") if 'url' in item: print(f" 🔗 {item['url']}") else: print(f"\n✅ Success in {elapsed:.2f}s") print(json.dumps(parsed, indent=2)[:1000]) else: print(json.dumps(parsed, indent=2)[:1000]) except: print(res[:1000] if len(res) > 1000 else res) print(f"\n⏱️ Completed in {elapsed:.2f} seconds") except Exception as e: print(f"❌ Error: {e}") print("-" * 70) def check_sessions(): """Check which session files exist""" print_header("Session Status Check") session_paths = [ "src/utils/.sessions", ".sessions" ] platforms = ["facebook", "twitter", "linkedin", "instagram", "reddit"] found_sessions = [] print("session_path: ", session_paths) for path in session_paths: if os.path.exists(path): print(f"\n📁 Checking {path}/") for platform in platforms: session_file = os.path.join(path, f"{platform}_storage_state.json") if os.path.exists(session_file): size = os.path.getsize(session_file) print(f" ✅ {platform:12} ({size:,} bytes)") found_sessions.append(platform) else: print(f" ❌ {platform:12} (not found)") if not found_sessions: print("\n⚠️ No session files found!") print(" Run 'python src/utils/session_manager.py' to create sessions.") print_separator() return found_sessions def main(): print_header("Roger Debug Runner - Comprehensive Tool Testing") print("\n📋 Available Test Categories:") print(" 1. Weather & Alerts (No auth required)") print(" 2. News & Government (No auth required)") print(" 3. Financial Data (No auth required)") print(" 4. Social Media (Requires auth)") print(" 5. Check Sessions") print(" 6. Run All Tests") print(" q. Quit") choice = input("\nSelect category (1-6 or q): ").strip() if choice == "q": return if choice == "5": check_sessions() return # === CATEGORY 1: Weather & Alerts === if choice in ["1", "6"]: print_header("CATEGORY 1: Weather & Alerts") run_test( "Weather Nowcast", tool_weather_nowcast, "Comprehensive weather data from Department of Meteorology", location="Colombo" ) run_test( "DMC Alerts", tool_dmc_alerts, "Disaster Management Centre severe weather alerts" ) # === CATEGORY 2: News & Government === if choice in ["2", "6"]: print_header("CATEGORY 2: News & Government") run_test( "Local News", scrape_local_news, "Scraping Daily Mirror, Daily FT, News First", keywords=["economy", "politics"], max_articles=5 ) run_test( "Government Gazette", scrape_government_gazette, "Latest gazette notifications", keywords=["regulation"], max_items=3 ) # === CATEGORY 3: Financial Data === if choice in ["3", "6"]: print_header("CATEGORY 3: Financial Data") run_test( "CSE Stock Data", scrape_cse_stock_data, "Colombo Stock Exchange - ASPI Index", symbol="ASPI", period="1d" ) # === CATEGORY 4: Social Media === if choice in ["4", "6"]: print_header("CATEGORY 4: Social Media (Authentication Required)") available_sessions = check_sessions() if "facebook" in available_sessions: run_test( "Facebook", scrape_facebook, "Facebook search results", keywords=["Sri Lanka", "Elon musk", "business"], max_items=5 ) else: print("\n⚠️ Facebook session not found - skipping") if "instagram" in available_sessions: run_test( "Instagram", scrape_instagram, "Instagram search results", keywords=["Sri Lanka", "Elon musk", "business"], max_items=5 ) else: print("\n⚠️ Facebook session not found - skipping") if "linkedin" in available_sessions: run_test( "Linkedin", scrape_linkedin, "Linkedin search results", keywords=["Sri Lanka", "Elon musk", "business"], max_items=5 ) else: print("\n⚠️ Facebook session not found - skipping") if "twitter" in available_sessions: run_test( "Twitter", scrape_twitter, "Twitter/X search", query="Sri Lanka economy" ) else: print("\n⚠️ Twitter session not found - skipping") # Reddit doesn't need session run_test( "Reddit", scrape_reddit, "Reddit posts (no auth needed)", keywords=["Sri Lanka"], limit=5 ) print_header("Testing Complete!") print(f"\n⏰ Finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") if __name__ == "__main__": main()