Spaces:

Renangi
/

ragbench-rag-eval

Running

Initial commit without secrets

c8dfbc0 about 1 month ago

1.87 kB

	import json
	from pathlib import Path
	from typing import Any, Dict, List, Tuple

	from .llm import LLMClient
	from .config import JUDGE_MODEL


	def format_docs_with_keys(
	documents_sentences: List[List[Tuple[str, str]]]
	) -> str:
	blocks = []
	for doc in documents_sentences:
	for key, sent in doc:
	blocks.append(f"{key}: {sent}")
	blocks.append("") # blank line
	return "\n".join(blocks).strip()


	class RAGJudge:
	def __init__(self, prompt_path: str = "prompts/ragbench_judge_prompt.txt"):
	self.client = LLMClient(JUDGE_MODEL)
	self.prompt_template = Path(prompt_path).read_text(encoding="utf-8")

	def annotate(
	self,
	question: str,
	answer: str,
	docs_sentences: List[List[Tuple[str, str]]],
	) -> Dict[str, Any]:
	docs_block = format_docs_with_keys(docs_sentences)
	prompt = self.prompt_template.format(
	documents=docs_block,
	question=question,
	answer=answer,
	)
	messages = [
	{
	"role": "system",
	"content": "You are an evaluator that outputs STRICT JSON only.",
	},
	{"role": "user", "content": prompt},
	]
	raw = self.client.chat(messages, max_tokens=2048)

	try:
	data = json.loads(raw)
	except json.JSONDecodeError as e:
	raise ValueError(f"Judge JSON parse error: {e}\nRaw: {raw[:500]}")
	for key in [
	"relevance_explanation",
	"all_relevant_sentence_keys",
	"overall_supported_explanation",
	"overall_supported",
	"sentence_support_information",
	"all_utilized_sentence_keys",
	]:
	if key not in data:
	raise ValueError(f"Missing key in judge output: {key}")
	return data