Spaces:

gaonkarrs
/

RAG_Evaluation_System

Sleeping

App Files Files Community

gaonkarrs commited on Aug 2, 2025

Commit

bc177f1

1 Parent(s): d95e5de

New code

Browse files

Files changed (1) hide show

app.py +23 -47

app.py CHANGED Viewed

@@ -505,22 +505,6 @@ def compute_ragbench_metrics(judge_response: dict, retrieved_sentence_keys: list
         "Adherence": adherence
     }
-# --- Dataset dictionary ---
-domain_datasets = {
-    "Legal": legal_dataset,
-    "Medical": med_dataset,
-    "GK": gk_dataset,
-    "CS": cs_dataset,
-    "Finance": fin_dataset
-}
-# --- Get questions for selected domain ---
-def get_questions_for_domain(domain):
-    dataset = domain_datasets.get(domain, [])
-    if not dataset:
-        return "⚠️ No dataset found for the selected domain."
-    return "\n".join([f"{i}. {item['question']}" for i, item in enumerate(dataset)])
 def evaluate_rag_pipeline(domain, q_indices):
     import torch
@@ -613,47 +597,39 @@ def evaluate_rag_pipeline(domain, q_indices):
 # Updated wrapper
 def evaluate_rag_gradio(domain, q_indices_str):
     log_stream = io.StringIO()
     sys.stdout = log_stream
     try:
         q_indices = [int(x.strip()) for x in q_indices_str.split(",") if x.strip().isdigit()]
         results = evaluate_rag_pipeline(domain, q_indices)
         logs = log_stream.getvalue()
         return results, logs
     except Exception as e:
         traceback.print_exc()
         return {"error": str(e)}, log_stream.getvalue()
-    finally:
-        sys.stdout = sys.__stdout__
-# === Gradio UI using Blocks ===
-    with gr.Blocks(title="RAG Evaluation Dashboard") as demo:
-        gr.Markdown("## 📊 RAG Evaluation Dashboard")
-        gr.Markdown("Evaluate your RAG pipeline and also browse the questions available for each domain.")
-    with gr.Row():
-        domain_input = gr.Dropdown(choices=list(domain_datasets.keys()), label="Select Domain")
-        q_index_input = gr.Textbox(label="Enter Query Indices (e.g., 89,121,245)", lines=1)
-    with gr.Row():
-        view_btn = gr.Button("📋 View Questions for Selected Domain")
-        questions_display = gr.Textbox(label="Domain Questions", lines=10, interactive=False)
-    with gr.Row():
-        run_btn = gr.Button("🚀 Run Evaluation")
-    result_output = gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)")
-    log_output = gr.Textbox(label="Execution Log", lines=10, interactive=True)
-    # Bindings
-    view_btn.click(fn=get_questions_for_domain, inputs=domain_input, outputs=questions_display)
-    run_btn.click(
-        fn=evaluate_rag_gradio,
-        inputs=[domain_input, q_index_input],
-        outputs=[result_output, log_output]
-    )
-# === Launch ===
-demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)

         "Adherence": adherence
     }
 def evaluate_rag_pipeline(domain, q_indices):
     import torch
 # Updated wrapper
 def evaluate_rag_gradio(domain, q_indices_str):
+    # Capture logs
     log_stream = io.StringIO()
     sys.stdout = log_stream
     try:
+        # Parse comma-separated indices
         q_indices = [int(x.strip()) for x in q_indices_str.split(",") if x.strip().isdigit()]
         results = evaluate_rag_pipeline(domain, q_indices)
         logs = log_stream.getvalue()
         return results, logs
     except Exception as e:
         traceback.print_exc()
         return {"error": str(e)}, log_stream.getvalue()
+    finally:
+        sys.stdout = sys.__stdout__  # Restore stdout
+# Gradio interface
+iface = gr.Interface(
+    fn=evaluate_rag_gradio,
+    inputs=[
+        gr.Dropdown(choices=["Legal", "Medical", "GK", "CS", "Finance"], label="Domain"),
+        gr.Textbox(label="Comma-separated Query Indices (e.g. 89,121,245)", lines=1),
+    ],
+    outputs=[
+        gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
+        gr.Textbox(label="Execution Log", lines=10, interactive=True),
+    ],
+    title="RAG Evaluation Dashboard",
+    description="Evaluate your RAG pipeline across selected queries using GPT-based generation and judgment."
+)
+# Launch app
+iface.launch(server_name="0.0.0.0", server_port=7860, debug=True)