diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..9ac047203a6040483dd372e73682b9b0cff63bfe 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +ModelX[[:space:]]Final[[:space:]]Problem.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/deploy-backend.yaml b/.github/workflows/deploy-backend.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e7257ae03108beb73d34d2f230c9ab1c4bde18c --- /dev/null +++ b/.github/workflows/deploy-backend.yaml @@ -0,0 +1,63 @@ +name: Deploy Backend to Hugging Face Space + +on: + push: + branches: + - main + - master + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + + - name: Setup Git LFS + run: | + git lfs install + git lfs pull + git lfs checkout + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install huggingface_hub + run: pip install huggingface_hub + + - name: Push to Hugging Face Space + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + python -c " + from huggingface_hub import HfApi, login + import os + + token = os.environ['HF_TOKEN'] + login(token=token) + + api = HfApi() + api.upload_folder( + folder_path='.', + repo_id='nivakaran/modelx', + repo_type='space', + token=token, + ignore_patterns=['*.pyc', '__pycache__', '.git', 'node_modules', '*.log', '.env'] + ) + print('✅ Successfully pushed to Hugging Face Space!') + " + + - name: Verify Sync + if: success() + run: echo "✅ Successfully synced to Hugging Face Space!" + + - name: Sync Failed + if: failure() + run: echo "❌ Failed to sync. Check if the Space exists at https://huggingface.co/spaces/nivakaran/modelx" diff --git a/.github/workflows/deploy-frontend.yaml b/.github/workflows/deploy-frontend.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40ee352f0241aa7d9e76aa8e6c5b7a665627b9a7 --- /dev/null +++ b/.github/workflows/deploy-frontend.yaml @@ -0,0 +1,68 @@ +name: Deploy Frontend to GitHub (for Vercel) + +on: + push: + branches: + - main + - master + paths: + # Trigger only when frontend files change + - 'frontend/**' + workflow_dispatch: + +jobs: + deploy-frontend: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Configure Git + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + + - name: Extract and Push Frontend + env: + # Set your frontend repository here (format: username/repo) + FRONTEND_REPO: ${{ secrets.FRONTEND_REPO }} + # GitHub PAT with repo permissions for cross-repo push + DEPLOY_TOKEN: ${{ secrets.FRONTEND_DEPLOY_TOKEN }} + run: | + # Default repo if secret not set + REPO=${FRONTEND_REPO:-"Nivakaran-S/modelx-frontend"} + + # Create a new directory for frontend-only repo + mkdir -p /tmp/frontend-deploy + + # Copy frontend contents (not the folder itself) + cp -r frontend/* /tmp/frontend-deploy/ + + # Copy root configs needed for Next.js if they exist + [ -f "frontend/.gitignore" ] && cp frontend/.gitignore /tmp/frontend-deploy/ || true + + # Create .env.production with API URL placeholder + echo "NEXT_PUBLIC_API_URL=\${NEXT_PUBLIC_API_URL:-https://nivakaran-modelx.hf.space}" > /tmp/frontend-deploy/.env.production + + cd /tmp/frontend-deploy + + # Initialize git repo + git init + git add . + git commit -m "Deploy frontend from main repo - $(date +'%Y-%m-%d %H:%M:%S')" + + # Push to frontend repo + git remote add origin https://x-access-token:${DEPLOY_TOKEN}@github.com/${REPO}.git + git branch -M main + git push origin main --force + + - name: Verify Deployment + if: success() + run: echo "✅ Successfully pushed frontend to GitHub repository for Vercel deployment!" + + - name: Deployment Failed + if: failure() + run: echo "❌ Failed to push frontend. Check if FRONTEND_DEPLOY_TOKEN and FRONTEND_REPO secrets are configured." diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a40233c7730c3512cdb8ea58b85675aa4545b3f7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv + +.env +.env.template + +models/ +models +.langgraph_api +data/ +datasets/ +datasets +data \ No newline at end of file diff --git a/.langgraphignore b/.langgraphignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.python-version b/.python-version new file mode 100644 index 0000000000000000000000000000000000000000..2c0733315e415bfb5e5b353f9996ecd964d395b2 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..4f49a91dcacd76c3636ba8fe35bb66a18efe9491 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 8000 + +# Run API server +CMD ["python", "main.py"] \ No newline at end of file diff --git a/ModelX Final Problem.pdf b/ModelX Final Problem.pdf new file mode 100644 index 0000000000000000000000000000000000000000..be46cb35eb8a519dff0db76adf6b52951819deac --- /dev/null +++ b/ModelX Final Problem.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8666a7e263a028de6eb37864f48d8bfcfa78bf8303c72778c68396eade8b2300 +size 4413994 diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 0000000000000000000000000000000000000000..0ff9e221b691ea9a1cfc0c77b394ac0612927949 --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,140 @@ +# 🚀 Roger Quick Start Guide + +## Prerequisites +- Python 3.11+ +- Node.js 18+ +- Groq API Key ([Get Free Key](https://console.groq.com)) + +## Installation & Setup + +### 1. Install Python Dependencies +```bash +pip install -r requirements.txt +``` + +### 2. Configure Environment +```bash +# Copy template +cp .env.template .env + +# Edit .env and add your GROQ_API_KEY +# GROQ_API_KEY=your_key_here +``` + +### 3. Start Backend +```bash +python main.py +``` + +Wait for initialization logs: +``` +[StorageManager] Initializing multi-database storage system +[SQLiteCache] Initialized at data/cache/feeds.db +[ChromaDB] Initialized collection: Roger_feeds +[CombinedAgentNode] Initialized with production storage layer +``` + +### 4. Start Frontend (New Terminal) +```bash +cd frontend +npm install +npm run dev +``` + +### 5. Access Dashboard +Open: http://localhost:3000 + +--- + +## 🎯 What to Expect + +### First 60 Seconds +- System initializes 6 domain agents +- Begins scraping 47+ data sources +- Deduplication pipeline activates + +### After 60-120 Seconds +- First batch of events appears on dashboard +- Risk metrics start calculating +- Real-time WebSocket connects + +### Live Features +- ✅ Real-time intelligence feed +- ✅ Risk vs Opportunity classification +- ✅ 3-tier deduplication (SQLite + ChromaDB + Neo4j\*) +- ✅ CSV exports in `data/feeds/` +- ✅ Operational Risk Radar metrics + +\*Neo4j optional - requires Docker + +--- + +## 🐛 Troubleshooting + +### "ChromaDB not found" +```bash +pip install chromadb sentence-transformers +``` + +### "No events appearing" +- Wait 60-120 seconds for first batch +- Check backend logs for errors +- Verify GROQ_API_KEY is set correctly + +### Frontend can't connect +```bash +# Verify backend running +curl http://localhost:8000/api/status +``` + +--- + +## 📊 Production Features + +### Storage Stats +```bash +curl http://localhost:8000/api/storage/stats +``` + +### CSV Exports +```bash +ls -lh data/feeds/ +cat data/feeds/feed_$(date +%Y-%m-%d).csv +``` + +### Enable Neo4j (Optional) +```bash +# Start Neo4j with Docker +docker-compose -f docker-compose.prod.yml up -d neo4j + +# Update .env +NEO4J_ENABLED=true + +# Restart backend +python main.py + +# Access Neo4j Browser +open http://localhost:7474 +# Login: neo4j / Roger2024 +``` + +--- + +## 🏆 Demo for Judges + +**Show in this order**: +1. Live dashboard (http://localhost:3000) +2. Terminal logs showing deduplication stats +3. Neo4j graph visualization (if enabled) +4. CSV exports in data/feeds/ +5. Storage API: http://localhost:8000/api/storage/stats + +**Key talking points**: +- "47+ data sources, 6 domain agents running in parallel" +- "3-tier deduplication: SQLite for speed, ChromaDB for intelligence" +- "90%+ duplicate reduction vs 60% with basic hashing" +- "Production-ready with persistent storage and knowledge graphs" + +--- + +**Ready to win! 🏆** diff --git a/README.md b/README.md index b9f40ded328b6d5ae2bfcec03003dbfddf92966d..9bfc1acc472086e290261f3f36b99b42a34cb399 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,863 @@ +# 🇱🇰 Roger Intelligence Platform + +**Real-Time Situational Awareness for Sri Lanka** + +A multi-agent AI system that aggregates intelligence from 47+ data sources to provide risk analysis and opportunity detection for businesses operating in Sri Lanka. + +## 🌐 Live Demo + +| Component | URL | +|-----------|-----| +| **Frontend Dashboard** | [https://model-x-frontend-snowy.vercel.app/](https://model-x-frontend-snowy.vercel.app/) | +| **Backend API** | [https://nivakaran-Roger.hf.space](https://nivakaran-Roger.hf.space) | + +--- + +## 🎯 Key Features + +✅ **8 Domain Agents** running in parallel: +- Social Media Monitor (Reddit, Twitter, Facebook, Threads, BlueSky) +- Political Intelligence (Gazette, Parliament, District Social Media) +- Economic Analysis (CSE Stock Market + Technical Indicators) +- Meteorological Alerts (DMC Weather + RiverNet + **FloodWatch Integration** 🆕) +- Intelligence Agent (Brand Monitoring + Threat Detection + **User-Configurable Targets**) +- Data Retrieval Orchestrator (Web Scraping) +- Vectorization Agent (Multilingual BERT Embeddings + Anomaly Detection) + +✅ **ML Anomaly Detection Pipeline** (Integrated into Graph): +- Language-specific BERT models (Sinhala, Tamil, English) +- Real-time anomaly inference on every graph cycle +- Clustering (DBSCAN, KMeans, HDBSCAN) +- Anomaly Detection (Isolation Forest, LOF) +- MLflow + DagsHub tracking + +✅ **Weather Prediction ML Pipeline** 🆕: +- LSTM Neural Network (30-day sequences) +- Predicts: Temperature, Rainfall, Flood Risk, Severity +- 21 weather stations → 25 districts +- Airflow DAG runs daily at 4 AM + +✅ **Currency Prediction ML Pipeline** 🆕: +- GRU Neural Network (optimized for 8GB RAM) +- Predicts: USD/LKR exchange rate +- Features: Technical indicators + CSE + Gold + Oil + USD Index +- MLflow tracking + Airflow DAG at 4 AM + +✅ **Stock Price Prediction ML Pipeline** 🆕: +- Multi-Architecture: LSTM, GRU, BiLSTM, BiGRU +- Optuna hyperparameter tuning (30 trials per stock) +- Per-stock best model selection +- 10 top CSE stocks (JKH, COMB, DIAL, HNB, etc.) + +✅ **RAG-Powered Chatbot** 🆕: +- Chat-history aware Q&A +- Queries all ChromaDB intelligence collections +- Domain filtering (political, economic, weather, social) +- Floating chat UI in dashboard + +✅ **Real-Time Dashboard** with: +- Live Intelligence Feed +- Floating AI Chatbox +- Weather Predictions Tab +- **Live Satellite/Weather Map** (Windy.com) 🆕 +- **National Flood Threat Score** 🆕 +- **30-Year Historical Climate Analysis** 🆕 +- Operational Risk Radar +- ML Anomaly Detection Display +- Market Predictions with Moving Averages +- Risk & Opportunity Classification + +✅ **Weather Data Scraper for ML Training** 🆕: +- Open-Meteo API (free historical data) +- NASA FIRMS (fire/heat detection) +- All 25 districts coverage +- Year-wise CSV export for model training + +--- + +## 🏗️ System Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Roger Combined Graph │ +│ ┌────────────────────────────────────────────────────────────────┐ │ +│ │ Graph Initiator (Reset) │ │ +│ └────────────────────────────────────────────────────────────────┘ │ +│ │ Fan-Out │ +│ ┌────────────┬────────────┼────────────┬────────────┬────────────┐ │ +│ ▼ ▼ ▼ ▼ ▼ ▼ │ +│ ┌──────┐ ┌──────┐ ┌──────────┐ ┌──────┐ ┌──────────┐ ┌────┐│ +│ │Social│ │Econ │ │Political │ │Meteo │ │Intellig- │ │Data││ +│ │Agent │ │Agent │ │Agent │ │Agent │ │ence Agent│ │Retr││ +│ └──────┘ └──────┘ └──────────┘ └──────┘ └──────────┘ └────┘│ +│ │ │ │ │ │ │ │ +│ └────────────┴────────────┴────────────┴────────────┴────────────┘ │ +│ │ Fan-In │ +│ ┌─────────▼──────────┐ │ +│ │ Feed Aggregator │ │ +│ │ (Rank & Dedupe) │ │ +│ └─────────┬──────────┘ │ +│ ┌─────────▼──────────┐ │ +│ │ Vectorization │ ← NEW │ +│ │ Agent (Optional) │ │ +│ └─────────┬──────────┘ │ +│ ┌─────────▼──────────┐ │ +│ │ Router (Loop/End) │ │ +│ └────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 📊 Graph Implementations + +### 1. Combined Agent Graph (`combinedAgentGraph.py`) +**The Mother Graph** - Orchestrates all domain agents in parallel. + +```mermaid +graph TD + A[Graph Initiator] -->|Fan-Out| B[Social Agent] + A -->|Fan-Out| C[Economic Agent] + A -->|Fan-Out| D[Political Agent] + A -->|Fan-Out| E[Meteorological Agent] + A -->|Fan-Out| F[Intelligence Agent] + A -->|Fan-Out| G[Data Retrieval Agent] + B -->|Fan-In| H[Feed Aggregator] + C --> H + D --> H + E --> H + F --> H + G --> H + H --> I[Data Refresher] + I --> J{Router} + J -->|Loop| A + J -->|End| K[END] +``` + +**Key Features:** +- Custom state reducers for parallel execution +- Feed deduplication with content hashing +- Loop control with configurable intervals +- Real-time WebSocket broadcasting + +--- + +### 2. Political Agent Graph (`politicalAgentGraph.py`) +**3-Module Hybrid Architecture** + +| Module | Description | Sources | +|--------|-------------|---------| +| **Official Sources** | Government data | Gazette, Parliament Minutes | +| **Social Media** | Political sentiment | Twitter, Facebook, Reddit (National + 25 Districts) | +| **Feed Generation** | LLM Processing | Categorize → Summarize → Format | + +``` +┌─────────────────────────────────────────────┐ +│ Module 1: Official │ Module 2: Social │ +│ ┌─────────────────┐ │ ┌───────────────┐ │ +│ │ Gazette │ │ │ National │ │ +│ │ Parliament │ │ │ Districts (25)│ │ +│ └─────────────────┘ │ │ World Politics│ │ +│ │ └───────────────┘ │ +└────────────┬───────────┴────────┬──────────┘ + │ Fan-In │ + ▼ ▼ + ┌────────────────────────────┐ + │ Module 3: Feed Generation │ + │ Categorize → LLM → Format │ + └────────────────────────────┘ +``` + +--- + +### 3. Economic Agent Graph (`economicalAgentGraph.py`) +**Market Intelligence & Technical Analysis** + +| Component | Description | +|-----------|-------------| +| **Stock Collector** | CSE market data (200+ stocks) | +| **Technical Analyzer** | SMA, EMA, RSI, MACD | +| **Trend Detector** | Bullish/Bearish signals | +| **Feed Generator** | Risk/Opportunity classification | + +**Indicators Calculated:** +- Simple Moving Average (SMA-20, SMA-50) +- Exponential Moving Average (EMA-12, EMA-26) +- Relative Strength Index (RSI) +- MACD with Signal Line + +--- + +### 4. Meteorological Agent Graph (`meteorologicalAgentGraph.py`) +**Weather & Disaster Monitoring + FloodWatch Integration** 🆕 + +``` +┌─────────────────────────────────────┐ +│ DMC Weather Collector │ +│ (Daily forecasts, 25 districts) │ +└─────────────┬───────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ RiverNet Data Collector │ +│ (River levels, flood monitoring) │ +└─────────────┬───────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ FloodWatch Historical Data 🆕 │ +│ (30-year climate analysis) │ +└─────────────┬───────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ National Threat Calculator 🆕 │ +│ (Aggregated flood risk 0-100) │ +└─────────────┬───────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ Alert Generator │ +│ (Severity classification) │ +└─────────────────────────────────────┘ +``` + +**Alert Levels:** +- 🟢 Normal: Standard conditions +- 🟡 Advisory: Watch for developments +- 🟠 Warning: Take precautions +- 🔴 Critical: Immediate action required + +**FloodWatch Features (New):** +| Feature | Description | +|---------|-------------| +| **Historical Analysis** | 30-year climate data (1995-2025) | +| **Decadal Comparison** | 3 periods: 1995-2004, 2005-2014, 2015-2025 | +| **National Threat Score** | 0-100 aggregated risk from rivers + alerts + season | +| **High-Risk Periods** | May-Jun (SW Monsoon), Oct-Nov (NE Monsoon) | + +--- + +### 5. Social Agent Graph (`socialAgentGraph.py`) +**Multi-Platform Social Media Monitoring** + +| Platform | Data Source | Coverage | +|----------|-------------|----------| +| Reddit | PRAW API | r/srilanka, r/colombo | +| Twitter/X | Nitter scraping | #SriLanka, #Colombo | +| Facebook | Profile scraping | News pages | +| Threads | Meta API | Trending topics | +| BlueSky | AT Protocol | Political discourse | + +--- + +### 6. Intelligence Agent Graph (`intelligenceAgentGraph.py`) +**Brand & Threat Monitoring + User-Configurable Targets** 🆕 + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Brand Monitor │ │ Threat Scanner │ │ User Targets 🆕 │ +│ - Company news │ │ - Security │ │ - Custom keys │ +│ - Competitor │ │ - Compliance │ │ - User profiles │ +│ - Market share │ │ - Geopolitical │ │ - Products │ +└────────┬────────┘ └────────┬────────┘ └────────┬────────┘ + │ │ │ + └──────────────────────┼──────────────────────┘ + ▼ + ┌─────────────────────┐ + │ Intelligence Report │ + │ (Priority ranked) │ + └─────────────────────┘ +``` + +**User-Configurable Monitoring** 🆕: +Users can define custom monitoring targets via the frontend settings panel or API: + +| Config Type | Description | Example | +|-------------|-------------|---------| +| **Keywords** | Custom search terms | "Colombo Port", "BOI Investment" | +| **Products** | Products to track | "iPhone 15", "Samsung Galaxy" | +| **Profiles** | Social media accounts | @CompetitorX (Twitter), CompanyY (Facebook) | + +**API Endpoints:** +```bash +# Get current config +GET /api/intel/config + +# Update full config +POST /api/intel/config +Body: {"user_keywords": ["keyword1"], "user_profiles": {"twitter": ["@account"]}, "user_products": ["Product"]} + +# Add single target +POST /api/intel/config/add?target_type=keyword&value=Colombo+Port + +# Remove target +DELETE /api/intel/config/remove?target_type=profile&value=CompetitorX&platform=twitter +``` + +**Config File**: `src/config/intel_config.json` + +--- + +### 7. DATA Retrieval Agent Graph (`dataRetrievalAgentGraph.py`) +**Web Scraping Orchestrator** + +**Scraping Tools Available:** +- `scrape_news_site` - Generic news scraper +- `scrape_cse_live` - CSE stock prices +- `scrape_official_data` - Government portals +- `scrape_social_media` - Multi-platform + +**Anti-Bot Features:** +- Random delays (1-3s) +- User-agent rotation +- Retry with exponential backoff +- Headless browser fallback + +--- + +### 8. Vectorization Agent Graph (`vectorizationAgentGraph.py`) 🆕 +**Multilingual Text-to-Vector Conversion + Anomaly Detection** + +``` +┌─────────────────────────────────────────────────┐ +│ Step 1: Language Detection │ +│ FastText + Unicode script analysis │ +│ Supports: English, Sinhala (සිංහල), Tamil (தமிழ்)│ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Step 2: Text Vectorization │ +│ ┌─────────────┬─────────────┬─────────────────┐ │ +│ │ DistilBERT │ SinhalaBERTo│ Tamil-BERT │ │ +│ │ (English) │ (Sinhala) │ (Tamil) │ │ +│ └─────────────┴─────────────┴─────────────────┘ │ +│ Output: 768-dim vector per text │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Step 3: Anomaly Detection (Isolation Forest) 🆕 │ +│ - Runs inference on every graph cycle │ +│ - Outputs anomaly_score (0-1) │ +│ - Graceful fallback if model not trained │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Step 4: Expert Summary (GroqLLM) │ +│ - Opportunity identification │ +│ - Threat detection │ +│ - Sentiment analysis │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Step 5: Format Output │ +│ - Includes anomaly insights in domain_insights │ +│ - Passes results to parent graph │ +└─────────────────────────────────────────────────┘ +``` + +--- + +### 10. Weather Prediction Pipeline (`models/weather-prediction/`) 🆕 +**LSTM-Based Multi-District Weather Forecasting** + +``` +┌─────────────────────────────────────────────────┐ +│ Data Source: Tutiempo.net (21 stations) │ +│ Historical data since 1944 │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ LSTM Neural Network │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ Input: 30-day sequence (11 features) │ │ +│ │ Layer 1: LSTM(64) + BatchNorm + Dropout │ │ +│ │ Layer 2: LSTM(32) + BatchNorm + Dropout │ │ +│ │ Output: Dense(3) → temp_max, temp_min, rain │ │ +│ └─────────────────────────────────────────────┘ │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Severity Classifier │ +│ - Combines temp, rainfall, flood risk │ +│ - Outputs: normal/advisory/warning/critical │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Output: 25 District Predictions │ +│ - Temperature (high/low °C) │ +│ - Rainfall (mm + probability) │ +│ - Flood risk (integrated with RiverNet) │ +└─────────────────────────────────────────────────┘ +``` + +**Usage:** +```bash +# Run full pipeline +cd models/weather-prediction +python main.py --mode full + +# Just predictions +python main.py --mode predict + +# Train specific station +python main.py --mode train --station COLOMBO +``` + +--- + +### 11. Currency Prediction Pipeline (`models/currency-volatility-prediction/`) 🆕 +**GRU-Based USD/LKR Exchange Rate Forecasting** + +``` +┌─────────────────────────────────────────────────┐ +│ Data Sources (yfinance) │ +│ - USD/LKR exchange rate │ +│ - CSE stock index (correlation) │ +│ - Gold, Oil prices (global factors) │ +│ - USD strength index │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Feature Engineering (25+ features) │ +│ - SMA, EMA, RSI, MACD, Bollinger Bands │ +│ - Volatility, Momentum indicators │ +│ - Temporal encoding (day/month cycles) │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ GRU Neural Network (8GB RAM optimized) │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ Input: 30-day sequence │ │ +│ │ Layer 1: GRU(64) + BatchNorm + Dropout │ │ +│ │ Layer 2: GRU(32) + BatchNorm + Dropout │ │ +│ │ Output: Dense(1) → next_day_rate │ │ +│ └─────────────────────────────────────────────┘ │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Output: USD/LKR Prediction │ +│ - Current & predicted rate │ +│ - Change % and direction │ +│ - Volatility classification (low/medium/high) │ +└─────────────────────────────────────────────────┘ +``` + +**Usage:** +```bash +# Run full pipeline +cd models/currency-volatility-prediction +python main.py --mode full + +# Just predict +python main.py --mode predict + +# Train GRU model +python main.py --mode train --epochs 100 +``` + +--- + +### 12. RAG Chatbot (`src/rag.py`) +**Chat-History Aware Intelligence Q&A** + +``` +┌─────────────────────────────────────────────────┐ +│ MultiCollectionRetriever │ +│ - Connects to ALL ChromaDB collections │ +│ - Roger_feeds, Roger_rag_collection, etc. │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Question Reformulation (History-Aware) │ +│ - Uses last 3-5 exchanges for context │ +│ - Reformulates follow-up questions │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Groq LLM (llama-3.1-70b-versatile) │ +│ - RAG with source citations │ +│ - Domain-specific analysis │ +└─────────────────────────────────────────────────┘ +``` + +**Usage:** +```bash +# CLI mode +python src/rag.py + +# Or via API +curl -X POST http://localhost:8000/api/rag/chat \ + -H "Content-Type: application/json" \ + -d '{"message": "What are the latest political events?"}' +``` + --- -title: Modelx -emoji: 🐨 -colorFrom: green -colorTo: indigo -sdk: docker -pinned: false + +## 🤖 ML Anomaly Detection Pipeline + +Located in `models/anomaly-detection/` + +### Pipeline Components + +| Component | File | Description | +|-----------|------|-------------| +| Data Ingestion | `data_ingestion.py` | SQLite + CSV fetching | +| Data Validation | `data_validation.py` | Schema-based validation | +| Data Transformation | `data_transformation.py` | Language detection + BERT vectorization | +| Model Trainer | `model_trainer.py` | Optuna + MLflow training | + +### Clustering Models + +| Model | Type | Use Case | +|-------|------|----------| +| **DBSCAN** | Density-based | Noise-robust clustering | +| **KMeans** | Centroid-based | Fast, fixed k clusters | +| **HDBSCAN** | Hierarchical density | Variable density clusters | +| **Isolation Forest** | Anomaly detection | Outlier identification | +| **LOF** | Local outlier | Density-based anomalies | + +### Training with Optuna + +```python +# Hyperparameter optimization +study = optuna.create_study(direction="maximize") +study.optimize(objective, n_trials=50) +``` + +### MLflow Tracking + +```python +mlflow.set_tracking_uri("https://dagshub.com/...") +mlflow.log_params(best_params) +mlflow.log_metrics(metrics) +mlflow.sklearn.log_model(model, "model") +``` + --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +## 🌧️ Weather Data Scraper (`scripts/scrape_weather_data.py`) 🆕 + +**Historical weather data collection for ML model training** + +### Data Sources + +| Source | API Key? | Data Available | +|--------|----------|----------------| +| **Open-Meteo** | ❌ Free | Historical weather since 1940 | +| **NASA FIRMS** | ✅ Optional | Fire/heat spot detection | + +### Collected Weather Variables + +- `temperature_2m_max/min/mean` +- `precipitation_sum`, `rain_sum` +- `precipitation_hours` +- `wind_speed_10m_max`, `wind_gusts_10m_max` +- `wind_direction_10m_dominant` + +### Usage + +```bash +# Scrape last 30 days (default) +python scripts/scrape_weather_data.py + +# Scrape specific date range +python scripts/scrape_weather_data.py --start 2020-01-01 --end 2024-12-31 + +# Scrape multiple years for training dataset +python scripts/scrape_weather_data.py --years 2020,2021,2022,2023,2024 + +# Include fire detection data +python scripts/scrape_weather_data.py --years 2023,2024 --fires + +# Hourly resolution (default is daily) +python scripts/scrape_weather_data.py --start 2024-01-01 --end 2024-01-31 --resolution hourly +``` + +### Output + +``` +datasets/weather/ +├── weather_daily_2020-01-01_2020-12-31.csv +├── weather_daily_2021-01-01_2021-12-31.csv +├── weather_combined.csv (merged file) +└── fire_detections_20241207.csv +``` + +### Coverage + +All 25 Sri Lankan districts with coordinates: +- Colombo, Gampaha, Kalutara, Kandy, Matale, Nuwara Eliya +- Galle, Matara, Hambantota, Jaffna, Kilinochchi, Mannar +- Vavuniya, Mullaitivu, Batticaloa, Ampara, Trincomalee +- Kurunegala, Puttalam, Anuradhapura, Polonnaruwa +- Badulla, Monaragala, Ratnapura, Kegalle + +--- + +## 🚀 Quick Start + +### Prerequisites +- Python 3.11+ +- Node.js 18+ +- Docker Desktop (for Airflow) +- Groq API Key + +### Installation + +```bash +# 1. Clone repository +git clone +cd Roger-Final + +# 2. Create virtual environment +python -m venv .venv +source .venv/bin/activate # Linux/Mac +.\.venv\Scripts\activate # Windows + +# 3. Install dependencies +pip install -r requirements.txt + +# 4. Configure environment +cp .env.template .env +# Edit .env with your API keys + +# 5. Download ML models +python models/anomaly-detection/download_models.py + +# 6. Launch all services +./start_services.sh # Linux/Mac +.\start_services.ps1 # Windows +``` + +--- + +## 🔧 API Endpoints + +### REST API (FastAPI - Port 8000) + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/api/status` | GET | System health | +| `/api/dashboard` | GET | Risk metrics | +| `/api/feed` | GET | Latest events | +| `/api/feeds` | GET | All feeds with pagination | +| `/api/feeds/by_district` | GET | Feeds filtered by district | +| `/api/rivernet` | GET | River monitoring data | +| `/api/predict` | POST | Run anomaly predictions | +| `/api/anomalies` | GET | Get anomalous feeds | +| `/api/model/status` | GET | ML model status | +| `/api/weather/predictions` | GET | All district forecasts | +| `/api/weather/predictions/{district}` | GET | Single district | +| `/api/weather/model/status` | GET | Weather model info | +| `/api/weather/historical` | GET | 30-year climate analysis 🆕 | +| `/api/weather/threat` | GET | National flood threat score 🆕 | +| `/api/currency/prediction` | GET | USD/LKR next-day forecast | +| `/api/currency/history` | GET | Historical rates | +| `/api/currency/model/status` | GET | Currency model info | +| `/api/stocks/predictions` | GET | All CSE stock forecasts | +| `/api/stocks/predictions/{symbol}` | GET | Single stock prediction | +| `/api/stocks/model/status` | GET | Stock models info | +| `/api/rag/chat` | POST | Chat with RAG | +| `/api/rag/stats` | GET | RAG system stats | +| `/api/rag/clear` | POST | Clear chat history | + +### WebSocket +- `ws://localhost:8000/ws` - Real-time updates + +--- + +## ⏰ Airflow Orchestration + +### DAG: `anomaly_detection_training` + +``` +start → check_records → data_ingestion → data_validation + → data_transformation → model_training → end +``` + +**Triggers:** +- Batch threshold: 1000 new records +- Daily fallback: Every 24 hours + +**Access Dashboard:** +```bash +cd models/anomaly-detection +astro dev start +# Open http://localhost:8080 +``` + +### DAG: `weather_prediction_daily` 🆕 + +``` +ingest_data → train_models → generate_predictions → publish_predictions +``` + +**Schedule:** Daily at 4:00 AM IST + +**Tasks:** +- Scrape Tutiempo.net for latest data +- Train LSTM models (MLflow tracked) +- Generate 25-district predictions +- Save to JSON for API + +### DAG: `currency_prediction_daily` 🆕 + +``` +ingest_data → train_model → generate_prediction → publish_prediction +``` + +**Schedule:** Daily at 4:00 AM IST + +**Tasks:** +- Fetch USD/LKR + indicators from yfinance +- Train GRU model (MLflow tracked) +- Generate next-day prediction +- Save to JSON for API + +--- + +## 📁 Project Structure + +``` +Roger-Ultimate/ +├── src/ +│ ├── graphs/ # LangGraph definitions +│ │ ├── combinedAgentGraph.py # Mother graph +│ │ ├── politicalAgentGraph.py +│ │ ├── economicalAgentGraph.py +│ │ ├── meteorologicalAgentGraph.py +│ │ ├── socialAgentGraph.py +│ │ ├── intelligenceAgentGraph.py +│ │ ├── dataRetrievalAgentGraph.py +│ │ └── vectorizationAgentGraph.py # 5-step with anomaly detection +│ ├── nodes/ # Agent implementations +│ ├── states/ # State definitions +│ ├── llms/ # LLM configurations +│ ├── storage/ # ChromaDB, SQLite, Neo4j stores +│ ├── rag.py # RAG chatbot +│ └── utils/ +│ └── utils.py # Tools incl. FloodWatch 🆕 +├── scripts/ +│ └── scrape_weather_data.py # Weather data scraper 🆕 +├── models/ +│ ├── anomaly-detection/ # ML Anomaly Pipeline +│ │ ├── src/ +│ │ │ ├── components/ # Pipeline stages +│ │ │ ├── entity/ # Config/Artifact classes +│ │ │ ├── pipeline/ # Orchestrators +│ │ │ └── utils/ # Vectorizer, metrics +│ │ ├── dags/ # Airflow DAGs +│ │ ├── data_schema/ # Validation schemas +│ │ ├── output/ # Trained models +│ │ └── models_cache/ # Downloaded BERT models +│ ├── weather-prediction/ # Weather ML Pipeline +│ │ ├── src/components/ # data_ingestion, model_trainer, predictor +│ │ ├── dags/ # weather_prediction_dag.py (4 AM) +│ │ ├── artifacts/ # Trained LSTM models (.h5) +│ │ └── main.py # CLI entry point +│ └── currency-volatility-prediction/ # Currency ML Pipeline +│ ├── src/components/ # data_ingestion, model_trainer, predictor +│ ├── dags/ # currency_prediction_dag.py (4 AM) +│ ├── artifacts/ # Trained GRU model +│ └── main.py # CLI entry point +├── datasets/ +│ └── weather/ # Scraped weather CSVs 🆕 +├── frontend/ +│ └── app/ +│ ├── components/ +│ │ ├── dashboard/ +│ │ │ ├── AnomalyDetection.tsx +│ │ │ ├── WeatherPredictions.tsx +│ │ │ ├── CurrencyPrediction.tsx +│ │ │ ├── NationalThreatCard.tsx # Flood threat score 🆕 +│ │ │ ├── HistoricalIntel.tsx # 30-year climate 🆕 +│ │ │ └── ... +│ │ ├── map/ +│ │ │ ├── MapView.tsx +│ │ │ └── SatelliteView.tsx # Windy.com embed 🆕 +│ │ ├── FloatingChatBox.tsx # RAG chat UI +│ │ └── ... +│ └── pages/ +│ └── Index.tsx # 7 tabs incl. SATELLITE 🆕 +├── main.py # FastAPI backend +├── start.sh # Startup script +└── requirements.txt +``` + +--- + +## 🔐 Environment Variables + +```env +# LLM +GROQ_API_KEY=your_groq_key + +# Database +MONGO_DB_URL=mongodb+srv://... +SQLITE_DB_PATH=./feed_cache.db + +# MLflow (DagsHub) +MLFLOW_TRACKING_URI=https://dagshub.com/... +MLFLOW_TRACKING_USERNAME=... +MLFLOW_TRACKING_PASSWORD=... + +# Pipeline +BATCH_THRESHOLD=1000 +``` + +--- + +## 🐛 Troubleshooting + +### FastText won't install on Windows +```bash +# Use pre-built wheel instead +pip install fasttext-wheel +``` + +### BERT models downloading slowly +```bash +# Pre-download all models +python models/anomaly-detection/download_models.py +``` + +### Airflow not starting +```bash +# Ensure Docker is running +docker info + +# Initialize Astro project +cd models/anomaly-detection +astro dev init +astro dev start +``` + +--- + +## 📄 License + +MIT License - Built for Production + +--- + +## 🙏 Acknowledgments + +- **Groq** - High-speed LLM inference +- **LangGraph** - Agent orchestration +- **HuggingFace** - SinhalaBERTo, Tamil-BERT, DistilBERT +- **Optuna** - Hyperparameter optimization +- **MLflow** - Experiment tracking +- Sri Lankan government for open data sources diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..9a0a977b62e880e5d6970177262c2c15e2bdbc5f --- /dev/null +++ b/app.py @@ -0,0 +1,361 @@ +""" +app.py +Streamlit Dashboard for Roger Platform +Interactive interface with Infinite Auto-Refresh & Smart Updates +""" +import streamlit as st +import json +import hashlib +from datetime import datetime +import plotly.graph_objects as go +import time + +# Import Roger components +# NOTE: Ensure these imports work in your local environment +from src.graphs.RogerGraph import graph +from src.states.combinedAgentState import CombinedAgentState + +# ============================================ +# PAGE CONFIGURATION +# ============================================ + +st.set_page_config( + page_title="Roger - Situational Awareness Platform", + page_icon="🇱🇰", + layout="wide", + initial_sidebar_state="expanded" +) + +# ============================================ +# CUSTOM CSS +# ============================================ + +st.markdown(""" + +""", unsafe_allow_html=True) + +# ============================================ +# HEADER +# ============================================ + +st.markdown('
🇱🇰 Roger
', unsafe_allow_html=True) +st.markdown('
National Situational Awareness Platform
', unsafe_allow_html=True) + +# ============================================ +# SIDEBAR +# ============================================ + +with st.sidebar: + st.header("⚙️ Configuration") + + # Auto-refresh interval + refresh_rate = st.slider("Polling Interval (s)", 5, 60, 10) + + st.divider() + + # Control Buttons + col_start, col_stop = st.columns(2) + with col_start: + if st.button("▶ START", type="primary", use_container_width=True): + st.session_state.monitoring_active = True + st.rerun() + + with col_stop: + if st.button("⏹ STOP", use_container_width=True): + st.session_state.monitoring_active = False + st.rerun() + + st.divider() + st.info(""" + **Team Adagard** Open Innovation Track + + Roger transforms national-scale noise into actionable business intelligence using autonomous multi-agent architecture. + """) + st.code("START → Fan-Out → [Agents] → Fan-In → Dashboard → Loop", language="text") + +# ============================================ +# SESSION STATE INITIALIZATION +# ============================================ + +if "monitoring_active" not in st.session_state: + st.session_state.monitoring_active = False +if "latest_result" not in st.session_state: + st.session_state.latest_result = None +if "last_hash" not in st.session_state: + st.session_state.last_hash = "" +if "execution_count" not in st.session_state: + st.session_state.execution_count = 0 + +# ============================================ +# HELPER FUNCTIONS +# ============================================ + +def calculate_hash(data_dict): + """Creates a hash of the dashboard data to detect changes.""" + # We focus on the snapshot and the feed length/content + snapshot = data_dict.get("risk_dashboard_snapshot", {}) + feed = data_dict.get("final_ranked_feed", []) + + # Create a simplified string representation to hash + content_str = f"{snapshot.get('last_updated')}-{len(feed)}-{snapshot.get('opportunity_index')}" + return hashlib.md5(content_str.encode()).hexdigest() + +def render_dashboard(container, result): + """Renders the entire dashboard into the provided container.""" + snapshot = result.get("risk_dashboard_snapshot", {}) + feed = result.get("final_ranked_feed", []) + + # Clear the container to ensure clean re-render + container.empty() + + with container.container(): + st.divider() + + # ------------------------------------------------------------------------- + # 1. METRICS ROW + # ------------------------------------------------------------------------- + st.subheader("📊 Operational Metrics") + m1, m2, m3, m4 = st.columns(4) + + with m1: + st.metric("Logistics Friction", f"{snapshot.get('logistics_friction', 0):.3f}", help="Route risk score") + with m2: + st.metric("Compliance Volatility", f"{snapshot.get('compliance_volatility', 0):.3f}", help="Regulatory risk") + with m3: + st.metric("Market Instability", f"{snapshot.get('market_instability', 0):.3f}", help="Economic volatility") + with m4: + opp_val = snapshot.get("opportunity_index", 0.0) + st.metric("Opportunity Index", f"{opp_val:.3f}", delta="Growth Signal" if opp_val > 0.5 else "Neutral", delta_color="normal") + + # ------------------------------------------------------------------------- + # 2. RADAR CHART + # ------------------------------------------------------------------------- + st.divider() + c1, c2 = st.columns([1, 1]) + + with c1: + st.subheader("📡 Risk vs. Opportunity Radar") + + categories = ['Logistics', 'Compliance', 'Market', 'Social', 'Weather'] + risk_vals = [ + snapshot.get('logistics_friction', 0), + snapshot.get('compliance_volatility', 0), + snapshot.get('market_instability', 0), + 0.4, 0.2 + ] + + fig = go.Figure() + + # Risk Layer + fig.add_trace(go.Scatterpolar( + r=risk_vals, theta=categories, fill='toself', name='Operational Risk', + line_color='#ff4444' + )) + + # Opportunity Layer + fig.add_trace(go.Scatterpolar( + r=[opp_val] * 5, theta=categories, name='Opportunity Threshold', + line_color='#00CC96', opacity=0.7, line=dict(dash='dot') + )) + + fig.update_layout( + polar=dict(radialaxis=dict(visible=True, range=[0, 1])), + showlegend=True, + height=350, + margin=dict(l=40, r=40, t=20, b=20), + paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)', + font=dict(color="white") + ) + st.plotly_chart(fig, use_container_width=True) + + # ------------------------------------------------------------------------- + # 3. INTELLIGENCE FEED + # ------------------------------------------------------------------------- + with c2: + st.subheader("📰 Intelligence Feed") + + tab_all, tab_risk, tab_opp = st.tabs(["All Events", "Risks ⚠️", "Opportunities 🚀"]) + + def render_feed(filter_type=None): + if not feed: + st.info("No events detected.") + return + + count = 0 + for event in feed[:15]: + imp = event.get("impact_type", "risk") + if filter_type and imp != filter_type: continue + + border_color = "#ff4444" if imp == "risk" else "#00CC96" + icon = "⚠️" if imp == "risk" else "🚀" + + summary = event.get("content_summary", "") + domain = event.get("target_agent", "unknown").upper() + score = event.get("confidence_score", 0.0) + + st.markdown( + f""" +
+
+ {domain} + SCORE: {score:.2f} +
+
+ {icon} {summary} +
+
+ """, + unsafe_allow_html=True + ) + count += 1 + + if count == 0: + st.caption("No events in this category.") + + with tab_all: render_feed() + with tab_risk: render_feed("risk") + with tab_opp: render_feed("opportunity") + + st.divider() + st.caption(f"Last Updated: {datetime.utcnow().strftime('%H:%M:%S UTC')} | Run Count: {st.session_state.execution_count}") + +# ============================================ +# MAIN EXECUTION LOGIC +# ============================================ + +# We use a placeholder that we can overwrite dynamically +dashboard_placeholder = st.empty() + +if st.session_state.monitoring_active: + + # --------------------------------------------------------- + # PHASE 1: INITIAL LOAD (Runs only if we have NO data) + # --------------------------------------------------------- + if st.session_state.latest_result is None: + with dashboard_placeholder.container(): + st.markdown("


", unsafe_allow_html=True) + col1, col2, col3 = st.columns([1, 2, 1]) + with col2: + st.markdown('
INITIALIZING NEURAL AGENTS...
', unsafe_allow_html=True) + st.markdown('
Connecting to Roger Graph Network
', unsafe_allow_html=True) + progress_bar = st.progress(0) + + # Visual effect for initialization + steps = ["Loading Social Graph...", "Connecting to Market Data...", "Calibrating Risk Radar...", "Starting Fan-Out Sequence..."] + for i, step in enumerate(steps): + time.sleep(0.3) + progress_bar.progress((i + 1) * 25) + + # --- PERFORM FIRST FETCH --- + try: + current_state = CombinedAgentState(max_runs=1, run_count=0) + result = graph.invoke(current_state) + + # Save to session state + st.session_state.latest_result = result + st.session_state.last_hash = calculate_hash(result) + st.session_state.execution_count = 1 + + except Exception as e: + st.error(f"Initialization Error: {e}") + st.session_state.monitoring_active = False + st.stop() + + # --------------------------------------------------------- + # PHASE 2: CONTINUOUS MONITORING LOOP + # --------------------------------------------------------- + # By this point, st.session_state.latest_result is GUARANTEED to have data. + + while st.session_state.monitoring_active: + + # 1. RENDER CURRENT DATA + # We render whatever is in the state immediately. + # This replaces the loading screen or the previous frame. + render_dashboard(dashboard_placeholder, st.session_state.latest_result) + + # 2. WAIT (The "Background" part) + # The UI is now visible to the user while we sleep. + time.sleep(refresh_rate) + + # 3. FETCH NEW DATA + try: + current_state = CombinedAgentState(max_runs=1, run_count=st.session_state.execution_count) + # Run the graph silently in background + new_result = graph.invoke(current_state) + + # 4. CHECK FOR DIFFERENCES + new_hash = calculate_hash(new_result) + + if new_hash != st.session_state.last_hash: + # DATA CHANGED: Update state + st.session_state.last_hash = new_hash + st.session_state.latest_result = new_result + st.session_state.execution_count += 1 + + # Optional: Pop a toast + st.toast(f"New Intel Detected ({len(new_result.get('final_ranked_feed', []))} events)", icon="⚡") + + # The loop continues... + # The NEXT iteration (Step 1) will render this new data. + else: + # NO CHANGE: + # We do nothing. The loop continues. + # Step 1 will simply re-render the existing stable data. + pass + + except Exception as e: + st.error(f"Monitoring Error: {e}") + time.sleep(5) # Wait before retrying on error + +else: + # --------------------------------------------------------- + # IDLE STATE + # --------------------------------------------------------- + with dashboard_placeholder.container(): + st.markdown("

", unsafe_allow_html=True) + col1, col2, col3 = st.columns([1, 4, 1]) + with col2: + st.info("System Standby. Click '▶ START' in the sidebar to begin autonomous monitoring.") + + if st.session_state.latest_result: + st.markdown("### Last Session Snapshot:") + # We use a temporary container here just for the snapshot + with st.container(): + render_dashboard(st.empty(), st.session_state.latest_result) diff --git a/debug_path.py b/debug_path.py new file mode 100644 index 0000000000000000000000000000000000000000..9d324feb704b325c683ccc852ef75ba04945eb53 --- /dev/null +++ b/debug_path.py @@ -0,0 +1,30 @@ +# Debug path calculation +from pathlib import Path + +# Simulate the path from data_transformation.py +file_path = Path(r"C:\Users\LENOVO\Desktop\Roger-Ultimate\models\anomaly-detection\src\components\data_transformation.py") + +print("File:", file_path) +print() +print("1 up (.parent):", file_path.parent) # components +print("2 up:", file_path.parent.parent) # src +print("3 up:", file_path.parent.parent.parent) # anomaly-detection +print("4 up:", file_path.parent.parent.parent.parent) # models +print("5 up:", file_path.parent.parent.parent.parent.parent) # Roger-Ultimate (CORRECT!) +print() + +main_project = file_path.parent.parent.parent.parent.parent +print("Main project root:", main_project) +print("Should be:", r"C:\Users\LENOVO\Desktop\Roger-Ultimate") +print("Match:", str(main_project) == r"C:\Users\LENOVO\Desktop\Roger-Ultimate") + +# Check if src/graphs exists +src_graphs = main_project / "src" / "graphs" +print() +print("src/graphs path:", src_graphs) +print("Exists:", src_graphs.exists()) + +# Check vectorizationAgentGraph +vec_graph = src_graphs / "vectorizationAgentGraph.py" +print("vectorizationAgentGraph.py:", vec_graph) +print("Exists:", vec_graph.exists()) diff --git a/debug_runner.py b/debug_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..46b387a4ce83b8dcde0b96c65ffe8f2e5454779c --- /dev/null +++ b/debug_runner.py @@ -0,0 +1,250 @@ +import os +import sys +import json +from datetime import datetime + +# Ensure we can find the 'src' module from the root +sys.path.append(os.getcwd()) + +try: + from src.utils.utils import ( + scrape_facebook, + scrape_twitter, + scrape_local_news, + scrape_reddit, + scrape_government_gazette, + scrape_cse_stock_data, + tool_weather_nowcast, + tool_dmc_alerts, + scrape_linkedin, + scrape_instagram, + ) + print("✅ Libraries loaded successfully.\n") +except ImportError as e: + print(f"❌ Error loading libraries: {e}") + print("Make sure you are running this from the 'Roger-Final' folder.") + sys.exit(1) + +def print_separator(char="=", length=70): + print(char * length) + +def print_header(text): + print_separator() + print(f" {text}") + print_separator() + +def run_test(name, func, description="", **kwargs): + print(f"\n🔍 Testing: {name}") + if description: + print(f" {description}") + print("-" * 70) + + start_time = datetime.now() + + try: + # Check if it's a LangChain tool (needs .invoke) + if hasattr(func, "invoke"): + res = func.invoke(kwargs) + else: + res = func(**kwargs) + + elapsed = (datetime.now() - start_time).total_seconds() + + # Try to print pretty JSON + try: + parsed = json.loads(res) + + # Custom formatting for better readability + if isinstance(parsed, dict): + if "results" in parsed: + print(f"\n✅ Success! Found {len(parsed.get('results', []))} results in {elapsed:.2f}s") + print(f"\nSample Results:") + for i, item in enumerate(parsed['results'][:3], 1): + print(f"\n [{i}] {item.get('title', 'No title')}") + if 'snippet' in item: + snippet = item['snippet'][:150] + "..." if len(item['snippet']) > 150 else item['snippet'] + print(f" {snippet}") + if 'url' in item: + print(f" 🔗 {item['url']}") + else: + print(f"\n✅ Success in {elapsed:.2f}s") + print(json.dumps(parsed, indent=2)[:1000]) + else: + print(json.dumps(parsed, indent=2)[:1000]) + + except: + print(res[:1000] if len(res) > 1000 else res) + + print(f"\n⏱️ Completed in {elapsed:.2f} seconds") + + except Exception as e: + print(f"❌ Error: {e}") + + print("-" * 70) + +def check_sessions(): + """Check which session files exist""" + print_header("Session Status Check") + + session_paths = [ + "src/utils/.sessions", + ".sessions" + ] + + platforms = ["facebook", "twitter", "linkedin", "instagram", "reddit"] + found_sessions = [] + print("session_path: ", session_paths) + + for path in session_paths: + if os.path.exists(path): + print(f"\n📁 Checking {path}/") + for platform in platforms: + session_file = os.path.join(path, f"{platform}_storage_state.json") + if os.path.exists(session_file): + size = os.path.getsize(session_file) + print(f" ✅ {platform:12} ({size:,} bytes)") + found_sessions.append(platform) + else: + print(f" ❌ {platform:12} (not found)") + + if not found_sessions: + print("\n⚠️ No session files found!") + print(" Run 'python src/utils/session_manager.py' to create sessions.") + + print_separator() + return found_sessions + +def main(): + print_header("Roger Debug Runner - Comprehensive Tool Testing") + + print("\n📋 Available Test Categories:") + print(" 1. Weather & Alerts (No auth required)") + print(" 2. News & Government (No auth required)") + print(" 3. Financial Data (No auth required)") + print(" 4. Social Media (Requires auth)") + print(" 5. Check Sessions") + print(" 6. Run All Tests") + print(" q. Quit") + + choice = input("\nSelect category (1-6 or q): ").strip() + + if choice == "q": + return + + if choice == "5": + check_sessions() + return + + # === CATEGORY 1: Weather & Alerts === + if choice in ["1", "6"]: + print_header("CATEGORY 1: Weather & Alerts") + + run_test( + "Weather Nowcast", + tool_weather_nowcast, + "Comprehensive weather data from Department of Meteorology", + location="Colombo" + ) + + run_test( + "DMC Alerts", + tool_dmc_alerts, + "Disaster Management Centre severe weather alerts" + ) + + # === CATEGORY 2: News & Government === + if choice in ["2", "6"]: + print_header("CATEGORY 2: News & Government") + + run_test( + "Local News", + scrape_local_news, + "Scraping Daily Mirror, Daily FT, News First", + keywords=["economy", "politics"], + max_articles=5 + ) + + run_test( + "Government Gazette", + scrape_government_gazette, + "Latest gazette notifications", + keywords=["regulation"], + max_items=3 + ) + + # === CATEGORY 3: Financial Data === + if choice in ["3", "6"]: + print_header("CATEGORY 3: Financial Data") + + run_test( + "CSE Stock Data", + scrape_cse_stock_data, + "Colombo Stock Exchange - ASPI Index", + symbol="ASPI", + period="1d" + ) + + # === CATEGORY 4: Social Media === + if choice in ["4", "6"]: + print_header("CATEGORY 4: Social Media (Authentication Required)") + + available_sessions = check_sessions() + + if "facebook" in available_sessions: + run_test( + "Facebook", + scrape_facebook, + "Facebook search results", + keywords=["Sri Lanka", "Elon musk", "business"], + max_items=5 + ) + else: + print("\n⚠️ Facebook session not found - skipping") + + if "instagram" in available_sessions: + run_test( + "Instagram", + scrape_instagram, + "Instagram search results", + keywords=["Sri Lanka", "Elon musk", "business"], + max_items=5 + ) + else: + print("\n⚠️ Facebook session not found - skipping") + + if "linkedin" in available_sessions: + run_test( + "Linkedin", + scrape_linkedin, + "Linkedin search results", + keywords=["Sri Lanka", "Elon musk", "business"], + max_items=5 + ) + else: + print("\n⚠️ Facebook session not found - skipping") + + + if "twitter" in available_sessions: + run_test( + "Twitter", + scrape_twitter, + "Twitter/X search", + query="Sri Lanka economy" + ) + else: + print("\n⚠️ Twitter session not found - skipping") + + # Reddit doesn't need session + run_test( + "Reddit", + scrape_reddit, + "Reddit posts (no auth needed)", + keywords=["Sri Lanka"], + limit=5 + ) + + print_header("Testing Complete!") + print(f"\n⏰ Finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + +if __name__ == "__main__": + main() diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000000000000000000000000000000000000..7222dbfeb77776b86b5f5b00e17b2bc792634048 --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,65 @@ +version: '3.8' + +services: + # Neo4j Knowledge Graph (Production Feature) + neo4j: + image: neo4j:5.15-community + container_name: modelx-neo4j + ports: + - "7474:7474" # Browser UI + - "7687:7687" # Bolt protocol + environment: + NEO4J_AUTH: neo4j/modelx2024 + NEO4J_PLUGINS: '["apoc"]' + NEO4J_dbms_security_procedures_unrestricted: "apoc.*" + volumes: + - ./data/neo4j/data:/data + - ./data/neo4j/logs:/logs + healthcheck: + test: ["CMD-SHELL", "cypher-shell -u neo4j -p modelx2024 'RETURN 1' || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + + # Backend API + backend: + build: + context: . + dockerfile: Dockerfile + ports: + - "8000:8000" + environment: + - GROQ_API_KEY=${GROQ_API_KEY} + - PYTHONUNBUFFERED=1 + - NEO4J_ENABLED=true + - NEO4J_URI=bolt://neo4j:7687 + - NEO4J_USER=neo4j + - NEO4J_PASSWORD=modelx2024 + volumes: + - ./src:/app/src + - ./data:/app/data # Persist storage data + command: python main.py + depends_on: + neo4j: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/api/status"] + interval: 30s + timeout: 10s + retries: 3 + + # Frontend (Next.js) + frontend: + build: + context: ./frontend + dockerfile: Dockerfile + ports: + - "3000:3000" + environment: + - NEXT_PUBLIC_API_URL=http://backend:8000 + depends_on: + - backend + volumes: + - ./frontend:/app + - /app/node_modules + - /app/. next diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..75da0b58838f6994f4dcf19ca3c97c4f84b2f6c8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,38 @@ +version: '3.8' + +services: + # Backend API + backend: + build: + context: . + dockerfile: backend/Dockerfile + ports: + - "8000:8000" + environment: + - GROQ_API_KEY=${GROQ_API_KEY} + - PYTHONUNBUFFERED=1 + volumes: + - ./src:/app/src + - ./backend:/app/backend + command: python backend/api/main.py + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/api/status"] + interval: 30s + timeout: 10s + retries: 3 + + # Frontend (Next.js) + frontend: + build: + context: ./frontend + dockerfile: Dockerfile + ports: + - "3000:3000" + environment: + - NEXT_PUBLIC_API_URL=http://localhost:8000 + depends_on: + - backend + volumes: + - ./frontend:/app + - /app/node_modules + - /app/.next \ No newline at end of file diff --git a/frontend/.gitignore b/frontend/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5ef6a520780202a1d6addd833d800ccb1ecac0bb --- /dev/null +++ b/frontend/.gitignore @@ -0,0 +1,41 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.* +.yarn/* +!.yarn/patches +!.yarn/plugins +!.yarn/releases +!.yarn/versions + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# env files (can opt-in for committing if needed) +.env* + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/frontend/README.md b/frontend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e215bc4ccf138bbc38ad58ad57e92135484b3c0f --- /dev/null +++ b/frontend/README.md @@ -0,0 +1,36 @@ +This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app). + +## Getting Started + +First, run the development server: + +```bash +npm run dev +# or +yarn dev +# or +pnpm dev +# or +bun dev +``` + +Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. + +You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file. + +This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. + +## Learn More + +To learn more about Next.js, take a look at the following resources: + +- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. +- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. + +You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! + +## Deploy on Vercel + +The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. + +Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. diff --git a/frontend/app/components/App.tsx b/frontend/app/components/App.tsx new file mode 100644 index 0000000000000000000000000000000000000000..c4106b637175332b81a394f75e8979954dacfcca --- /dev/null +++ b/frontend/app/components/App.tsx @@ -0,0 +1,19 @@ +'use client' + +import { Routes, Route } from "react-router-dom"; +import ClientWrapper from "./ClientWrapper"; +import Index from "../pages/Index"; +import NotFound from "../pages/NotFound"; +import FloatingChatBox from "./FloatingChatBox"; + +export default function App() { + return ( + + + } /> + } /> + + + + ); +} diff --git a/frontend/app/components/ClientWrapper.tsx b/frontend/app/components/ClientWrapper.tsx new file mode 100644 index 0000000000000000000000000000000000000000..ee07c9e09f2769de709b8d6b9e7c338410e9ff02 --- /dev/null +++ b/frontend/app/components/ClientWrapper.tsx @@ -0,0 +1,28 @@ +'use client' + +import { ReactNode, useState } from 'react'; +import { Toaster } from "./ui/toaster"; +import { Toaster as Sonner } from "./ui/sonner"; +import { TooltipProvider } from "./ui/tooltip"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { BrowserRouter } from "react-router-dom"; + +interface ClientWrapperProps { + children: ReactNode; +} + +export default function ClientWrapper({ children }: ClientWrapperProps) { + const [queryClient] = useState(() => new QueryClient()); + + return ( + + + + + + {children} + + + + ); +} diff --git a/frontend/app/components/FloatingChatBox.tsx b/frontend/app/components/FloatingChatBox.tsx new file mode 100644 index 0000000000000000000000000000000000000000..6e33342c86d6bbdcf221e9b6246681fbb8066340 --- /dev/null +++ b/frontend/app/components/FloatingChatBox.tsx @@ -0,0 +1,310 @@ +'use client'; + +import { useState, useRef, useEffect } from 'react'; +import { Send, Brain, Trash2, Radio } from 'lucide-react'; +import { Badge } from './ui/badge'; +import ReactMarkdown from 'react-markdown'; +import remarkGfm from 'remark-gfm'; +import './Roger.css'; + +interface Message { + id: string; + role: 'user' | 'assistant'; + content: string; + sources?: Array<{ + domain: string; + platform: string; + similarity: number; + }>; + timestamp: Date; +} + +const FloatingChatBox = () => { + const [isOpen, setIsOpen] = useState(false); + const [messages, setMessages] = useState([]); + const [input, setInput] = useState(''); + const [isLoading, setIsLoading] = useState(false); + const [domainFilter, setDomainFilter] = useState(null); + const scrollContainerRef = useRef(null); + + const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; + + // Auto-scroll to bottom + useEffect(() => { + if (scrollContainerRef.current) { + scrollContainerRef.current.scrollTop = scrollContainerRef.current.scrollHeight; + } + }, [messages, isLoading]); + + // Handle body scroll when chat is open (mobile) + useEffect(() => { + if (isOpen) { + document.body.style.overflow = 'hidden'; + } else { + document.body.style.overflow = 'unset'; + } + return () => { + document.body.style.overflow = 'unset'; + }; + }, [isOpen]); + + const sendMessage = async () => { + if (!input.trim() || isLoading) return; + + const userMessage: Message = { + id: Date.now().toString(), + role: 'user', + content: input, + timestamp: new Date() + }; + + setMessages(prev => [...prev, userMessage]); + const currentInput = input; + setInput(''); + setIsLoading(true); + + try { + const response = await fetch(`${API_BASE}/api/rag/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + message: currentInput, + domain_filter: domainFilter, + use_history: true + }) + }); + + const data = await response.json(); + + const assistantMessage: Message = { + id: (Date.now() + 1).toString(), + role: 'assistant', + content: data.answer || 'No response received.', + sources: data.sources, + timestamp: new Date() + }; + + setMessages(prev => [...prev, assistantMessage]); + } catch (error) { + const errorMessage: Message = { + id: (Date.now() + 1).toString(), + role: 'assistant', + content: 'Failed to connect to Roger Intelligence. Please ensure the backend is running.', + timestamp: new Date() + }; + setMessages(prev => [...prev, errorMessage]); + } finally { + setIsLoading(false); + } + }; + + const clearHistory = async () => { + try { + await fetch(`${API_BASE}/api/rag/clear`, { method: 'POST' }); + setMessages([]); + } catch (error) { + console.error('Failed to clear history:', error); + } + }; + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + sendMessage(); + } + }; + + const toggleChat = () => { + setIsOpen(!isOpen); + }; + + const domains = ['political', 'economic', 'weather', 'social', 'intelligence']; + + return ( +
+ {/* Backdrop */} +
setIsOpen(false)} + className={`absolute top-0 left-0 w-screen h-screen bg-black transition-opacity duration-500 ${isOpen ? 'opacity-40 flex' : 'opacity-0 hidden'}`} + /> + + {/* Roger Button */} +
+ +

Roger

+
+ + {/* Chat Container */} +
+ + {/* Header - with safe area for iPhone notch */} +
+
+
+ +
+
+

Roger

+

Intelligence Assistant

+
+
+
+
+ +
+
+

Close

+
+
+
+ + {/* Domain Filter - scrollable on mobile */} +
+ setDomainFilter(null)} + > + All + + {domains.map(domain => ( + setDomainFilter(domain)} + > + {domain} + + ))} +
+ + {/* Messages Container */} + {messages.length > 0 ? ( +
+ {/* Today Badge */} +
+
+

Today

+
+
+ + {messages.map((msg) => ( +
+
+ {msg.role === 'assistant' ? ( +
+ + {msg.content} + +
+ ) : ( +

{msg.content}

+ )} + + {/* Sources */} + {msg.sources && msg.sources.length > 0 && ( +
+

Sources:

+
+ {msg.sources.slice(0, 3).map((src, i) => ( + + {src.domain} ({Math.round(src.similarity * 100)}%) + + ))} +
+
+ )} +
+
+ ))} + + {/* Typing Indicator */} + {isLoading && ( +
+
+ +
+
+ )} +
+ ) : ( +
+
+ +
+
+

+ Hello! I'm Roger, your intelligence assistant. +

+

+ Ask me anything about Sri Lanka's political, economic, weather, or social intelligence data. +

+
+

Try asking:

+

"What are the latest political events?"

+

"Any weather warnings today?"

+
+
+
+ )} + + {/* Input Area - with safe area for bottom */} +
+
+