Spaces:
Running
Running
| import json | |
| from pathlib import Path | |
| from typing import Any, Dict, List, Tuple | |
| from .llm import LLMClient | |
| from .config import JUDGE_MODEL | |
| def format_docs_with_keys( | |
| documents_sentences: List[List[Tuple[str, str]]] | |
| ) -> str: | |
| blocks = [] | |
| for doc in documents_sentences: | |
| for key, sent in doc: | |
| blocks.append(f"{key}: {sent}") | |
| blocks.append("") # blank line | |
| return "\n".join(blocks).strip() | |
| class RAGJudge: | |
| def __init__(self, prompt_path: str = "prompts/ragbench_judge_prompt.txt"): | |
| self.client = LLMClient(JUDGE_MODEL) | |
| self.prompt_template = Path(prompt_path).read_text(encoding="utf-8") | |
| def annotate( | |
| self, | |
| question: str, | |
| answer: str, | |
| docs_sentences: List[List[Tuple[str, str]]], | |
| ) -> Dict[str, Any]: | |
| docs_block = format_docs_with_keys(docs_sentences) | |
| prompt = self.prompt_template.format( | |
| documents=docs_block, | |
| question=question, | |
| answer=answer, | |
| ) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are an evaluator that outputs STRICT JSON only.", | |
| }, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| raw = self.client.chat(messages, max_tokens=2048) | |
| try: | |
| data = json.loads(raw) | |
| except json.JSONDecodeError as e: | |
| raise ValueError(f"Judge JSON parse error: {e}\nRaw: {raw[:500]}") | |
| for key in [ | |
| "relevance_explanation", | |
| "all_relevant_sentence_keys", | |
| "overall_supported_explanation", | |
| "overall_supported", | |
| "sentence_support_information", | |
| "all_utilized_sentence_keys", | |
| ]: | |
| if key not in data: | |
| raise ValueError(f"Missing key in judge output: {key}") | |
| return data | |