Spaces:

Mehedi2
/

new_assignment

Sleeping

App Files Files Community

Mehedi2 commited on Sep 28, 2025

Commit

28c303d

verified ·

1 Parent(s): 58052c9

Update gaia_api.py

Browse files

Files changed (1) hide show

gaia_api.py +200 -98

gaia_api.py CHANGED Viewed

@@ -1,105 +1,207 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from app import run_agent  # Import from your main app
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-app = FastAPI(
-    title="GAIA Test Agent API",
-    description="API endpoint for GAIA benchmark evaluation",
-    version="1.0.0"
-)
-class GaiaRequest(BaseModel):
-    prompt: str
-class GaiaResponse(BaseModel):
-    output: str
-@app.post("/predict", response_model=GaiaResponse)
-async def predict(request: GaiaRequest):
     """
-    Main prediction endpoint for GAIA evaluation
-    This is the endpoint that GAIA will call to get answers
     """
     try:
-        logger.info(f"Received question: {request.prompt[:100]}...")
-        # Get answer from your agent
-        answer = run_agent(request.prompt)
-        logger.info(f"Generated answer: {answer[:100]}...")
-        return GaiaResponse(output=answer)
     except Exception as e:
-        logger.error(f"Error processing request: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {
-        "status": "healthy",
-        "message": "GAIA Test Agent is running"
-    }
-@app.get("/")
-async def root():
-    """Root endpoint with API information"""
-    return {
-        "name": "GAIA Test Agent",
-        "description": "AI Agent for GAIA Benchmark Evaluation",
-        "endpoints": {
-            "predict": "/predict - Main prediction endpoint (POST)",
-            "health": "/health - Health check (GET)",
-            "docs": "/docs - Interactive API documentation (GET)"
-        },
-        "usage": {
-            "predict": {
-                "method": "POST",
-                "body": {
-                    "prompt": "Your question here"
-                },
-                "response": {
-                    "output": "Agent's answer"
-                }
             }
-        }
-    }
-@app.get("/info")
-async def info():
-    """Get agent information"""
-    return {
-        "agent_type": "General AI Assistant",
-        "model": "DeepSeek V3.1 Terminus via OpenRouter",
-        "capabilities": [
-            "General question answering",
-            "Mathematical calculations",
-            "Factual queries",
-            "Yes/No questions",
-            "Reasoning tasks"
-        ],
-        "optimized_for": "GAIA benchmark evaluation"
-    }
-# For debugging - remove in production
-@app.get("/test")
-async def test_endpoint():
-    """Test endpoint to verify the agent works"""
     try:
-        test_answer = run_agent("What is 2 + 2?")
-        return {
-            "test_question": "What is 2 + 2?",
-            "test_answer": test_answer,
-            "status": "Agent working correctly"
-        }
-    except Exception as e:
-        return {
-            "status": "Error",
-            "error": str(e)
-        }

+from typing import Dict, List, Tuple
+import re
+import tempfile
+from pathlib import Path
+import pandas as pd
+import requests
+from pandas import DataFrame
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
+SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
+FILE_PATH = f"{DEFAULT_API_URL}/files/"
+# --- Helper Methods ---
+def fetch_all_questions() -> Dict:
+    """Fetches all questions from the specified API endpoint.
+    This function retrieves a list of questions from the API, handles potential errors
+    such as network issues, invalid responses, or empty question lists, and returns
+    the questions as a dictionary.
+    Returns:
+        Dict: A dictionary containing the questions data retrieved from the API.
+    Raises:
+        UserWarning: If there is an error fetching the questions, such as network issues,
+            invalid JSON response, or an empty question list.  The exception message
+            provides details about the specific error encountered.
     """
+    print(f"Fetching questions from: {QUESTIONS_URL}")
+    response = requests.get(QUESTIONS_URL, timeout=15)
+    try:
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            raise UserWarning("Fetched questions list is empty or invalid format.")
+        print(f"Fetched {len(questions_data)} questions.")
+        return questions_data
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        raise UserWarning(f"Error fetching questions: {e}")
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        raise UserWarning(f"Error decoding server response for questions: {e}")
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        raise UserWarning(f"An unexpected error occurred fetching questions: {e}")
+def submit_answers(submission_data: dict, results_log: list) -> Tuple[str, DataFrame]:
+    """Submits answers to the scoring API and returns the submission status and results.
+    This function sends the provided answers to the scoring API, handles potential errors
+    such as network issues, server errors, or invalid responses, and returns a status
+    message indicating the success or failure of the submission, along with a DataFrame
+    containing the results log.
+    Args:
+        submission_data (dict): A dictionary containing the answers to be submitted.
+            Expected to have a structure compatible with the scoring API.
+        results_log (list): A list of dictionaries containing the results log.
+            This log is converted to a Pandas DataFrame and returned.
+    Returns:
+        Tuple[str, DataFrame]: A tuple containing:
+            - A status message (str) indicating the submission status and any relevant
+              information or error messages.
+            - A Pandas DataFrame containing the results log.
     """
     try:
+        response = requests.post(SUBMIT_URL, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/"
+            f"{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+def run_agent(gaia_agent, questions_data: List[Dict]) -> Tuple[List[Dict], List[Dict]]:
+    """Runs the agent on a list of questions and returns the results and answers.
+    This function iterates through a list of questions, runs the provided agent on each
+    question, and collects the results and answers. It handles potential errors during
+    agent execution and returns the results log and the answers payload.
+    Args:
+        gaia_agent: An instance of the GaiaAgent class, which is responsible for
+            generating answers to the questions.
+        questions_data (List[Dict]): A list of dictionaries, where each dictionary
+            represents a question and contains at least the 'task_id' and 'question' keys.
+    Returns:
+        Tuple[List[Dict], List[Dict]]: A tuple containing:
+            - A list of dictionaries representing the results log, where each dictionary
+              contains the 'Task ID', 'Question', and 'Submitted Answer'.
+            - A list of dictionaries representing the answers payload, where each dictionary
+              contains the 'task_id' and 'submitted_answer'.
+    """
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        question_text = process_file(task_id, question_text)
+        if not task_id or question_text is None:
+            print(f"Skipping invalid item (missing task_id or question): {item}")
+            continue
+        try:
+            submitted_answer = gaia_agent(task_id, question_text)
+            answers_payload.append(
+                {"task_id": task_id, "submitted_answer": submitted_answer}
+            )
+        except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            submitted_answer = f"AGENT ERROR: {e}"
+        results_log.append(
+            {
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": submitted_answer,
             }
+        )
+    return results_log, answers_payload
+def process_file(task_id: str, question_text: str) -> str:
+    """
+    Attempt to download a file associated with a task from the API.
+    - If the file exists (HTTP 200), it is saved to a temp directory and the local file path is returned.
+    - If no file is found (HTTP 404), returns the original question text.
+    - For all other HTTP errors, the exception is propagated to the caller.
+    """
+    file_url = f"{FILE_PATH}{task_id}"
     try:
+        response = requests.get(file_url, timeout=30)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as exc:
+        print(f"Exception in download_file>> {str(exc)}")
+        return question_text # Unable to get the file
+    # Determine filename from 'Content-Disposition' header, fallback to task_id
+    content_disposition = response.headers.get("content-disposition", "")
+    filename = task_id
+    match = re.search(r'filename="([^"]+)"', content_disposition)
+    if match:
+        filename = match.group(1)
+    # Save file in a temp directory
+    temp_storage_dir = Path(tempfile.gettempdir()) / "gaia_cached_files"
+    temp_storage_dir.mkdir(parents=True, exist_ok=True)
+    file_path = temp_storage_dir / filename
+    file_path.write_bytes(response.content)
+    print(f"Downloaded file for task {task_id}: {filename}")
+    return (
+        f"{question_text}\n\n"
+        f"---\n"
+        f"A file was downloaded for this task and saved locally at:\n"
+        f"{str(file_path)}\n"
+        f"---\n\n"
+    )