Renangi's picture
Initial commit without secrets
c8dfbc0
raw
history blame
1.87 kB
import json
from pathlib import Path
from typing import Any, Dict, List, Tuple
from .llm import LLMClient
from .config import JUDGE_MODEL
def format_docs_with_keys(
documents_sentences: List[List[Tuple[str, str]]]
) -> str:
blocks = []
for doc in documents_sentences:
for key, sent in doc:
blocks.append(f"{key}: {sent}")
blocks.append("") # blank line
return "\n".join(blocks).strip()
class RAGJudge:
def __init__(self, prompt_path: str = "prompts/ragbench_judge_prompt.txt"):
self.client = LLMClient(JUDGE_MODEL)
self.prompt_template = Path(prompt_path).read_text(encoding="utf-8")
def annotate(
self,
question: str,
answer: str,
docs_sentences: List[List[Tuple[str, str]]],
) -> Dict[str, Any]:
docs_block = format_docs_with_keys(docs_sentences)
prompt = self.prompt_template.format(
documents=docs_block,
question=question,
answer=answer,
)
messages = [
{
"role": "system",
"content": "You are an evaluator that outputs STRICT JSON only.",
},
{"role": "user", "content": prompt},
]
raw = self.client.chat(messages, max_tokens=2048)
try:
data = json.loads(raw)
except json.JSONDecodeError as e:
raise ValueError(f"Judge JSON parse error: {e}\nRaw: {raw[:500]}")
for key in [
"relevance_explanation",
"all_relevant_sentence_keys",
"overall_supported_explanation",
"overall_supported",
"sentence_support_information",
"all_utilized_sentence_keys",
]:
if key not in data:
raise ValueError(f"Missing key in judge output: {key}")
return data