|
|
import torch |
|
|
import nltk |
|
|
import numpy as np |
|
|
import os |
|
|
import kagglehub |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification |
|
|
from bert_score import score as bert_score_calculator |
|
|
|
|
|
try: |
|
|
nltk.data.find('tokenizers/punkt') |
|
|
except nltk.downloader.DownloadError: |
|
|
nltk.download('punkt') |
|
|
|
|
|
class LLM_Generator: |
|
|
def __init__(self, model_handle, device='cuda'): |
|
|
self.device = device |
|
|
print(f"Downloading model from Kaggle Hub: {model_handle}") |
|
|
model_path = kagglehub.model_download(model_handle) |
|
|
print(f"Model downloaded to: {model_path}") |
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
|
model_path, |
|
|
torch_dtype="auto", |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
def generate(self, prompt, num_samples=1, temperature=0.7, max_new_tokens=150): |
|
|
messages = [ |
|
|
{"role": "system", "content": "you are a helpful assistant."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
text = self.tokenizer.apply_chat_template( |
|
|
messages, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True, |
|
|
enable_thinking=True |
|
|
) |
|
|
model_inputs = self.tokenizer([text] * num_samples, return_tensors="pt").to(self.device) |
|
|
|
|
|
generated_ids_batch = self.model.generate( |
|
|
**model_inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
do_sample=True, |
|
|
temperature=temperature, |
|
|
num_return_sequences=num_samples |
|
|
) |
|
|
|
|
|
input_ids_len = model_inputs.input_ids.shape[1] |
|
|
final_responses = [] |
|
|
|
|
|
for generated_ids in generated_ids_batch: |
|
|
output_ids = generated_ids[input_ids_len:].tolist() |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
index = len(output_ids) - output_ids[::-1].index(151668) |
|
|
except ValueError: |
|
|
index = 0 |
|
|
|
|
|
content = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") |
|
|
final_responses.append(content) |
|
|
|
|
|
return final_responses |
|
|
|
|
|
class SelfCheckGPT: |
|
|
def __init__(self, device=None): |
|
|
if device: |
|
|
self.device = device |
|
|
else: |
|
|
self.device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
self.nli_tokenizer = None |
|
|
self.nli_model = None |
|
|
|
|
|
def _load_nli_model(self): |
|
|
if self.nli_model is None: |
|
|
nli_model_name = "microsoft/deberta-v3-large-mnli" |
|
|
try: |
|
|
self.nli_tokenizer = AutoTokenizer.from_pretrained(nli_model_name) |
|
|
self.nli_model = AutoModelForSequenceClassification.from_pretrained(nli_model_name).to(self.device) |
|
|
except Exception as e: |
|
|
print(f"Error loading NLI model: {e}") |
|
|
raise |
|
|
|
|
|
def _check_bertscore(self, sentences, sample_responses): |
|
|
all_scores = [] |
|
|
for sent in sentences: |
|
|
refs = [sent] * len(sample_responses) |
|
|
cands = sample_responses |
|
|
|
|
|
_, _, F1 = bert_score_calculator( |
|
|
cands, refs, lang="en", verbose=False, idf=False, device=self.device |
|
|
) |
|
|
|
|
|
avg_bert_score = F1.mean().item() |
|
|
score = 1.0 - avg_bert_score |
|
|
all_scores.append(score) |
|
|
return all_scores |
|
|
|
|
|
def _check_nli(self, sentences, sample_responses): |
|
|
self._load_nli_model() |
|
|
all_scores = [] |
|
|
|
|
|
for sent in sentences: |
|
|
contradiction_probs = [] |
|
|
for sample in sample_responses: |
|
|
tokenized_input = self.nli_tokenizer( |
|
|
sample, sent, return_tensors="pt", truncation=True, max_length=512 |
|
|
).to(self.device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
logits = self.nli_model(**tokenized_input).logits |
|
|
|
|
|
entailment_logit = logits[0, self.nli_model.config.label2id['entailment']] |
|
|
contradiction_logit = logits[0, self.nli_model.config.label2id['contradiction']] |
|
|
|
|
|
prob_contradiction = torch.exp(contradiction_logit) / (torch.exp(entailment_logit) + torch.exp(contradiction_logit)) |
|
|
contradiction_probs.append(prob_contradiction.item()) |
|
|
|
|
|
avg_contradiction_prob = np.mean(contradiction_probs) |
|
|
all_scores.append(avg_contradiction_prob) |
|
|
|
|
|
return all_scores |
|
|
|
|
|
def check(self, main_response, sample_responses, method='nli'): |
|
|
sentences = nltk.sent_tokenize(main_response) |
|
|
if not sentences: |
|
|
return [] |
|
|
|
|
|
if method.lower() == 'bertscore': |
|
|
scores = self._check_bertscore(sentences, sample_responses) |
|
|
elif method.lower() == 'nli': |
|
|
scores = self._check_nli(sentences, sample_responses) |
|
|
else: |
|
|
raise ValueError(f"Invalid method '{method}'. Choose from 'bertscore', 'nli'.") |
|
|
|
|
|
results = [{"sentence": sent, "score": score} for sent, score in zip(sentences, scores)] |
|
|
return results |
|
|
|
|
|
def main(): |
|
|
model_handle = "qwen-lm/qwen-3/transformers/0.6b" |
|
|
|
|
|
print("Initializing LLM Generator...") |
|
|
generator = LLM_Generator(model_handle=model_handle) |
|
|
|
|
|
prompt = "Write a short biography of Neil Armstrong, the first man on the moon. Include the name of the spacecraft he used." |
|
|
print(f"Generating responses for prompt: '{prompt}'") |
|
|
|
|
|
responses = generator.generate(prompt, num_samples=6, temperature=0.8, max_new_tokens=150) |
|
|
main_response = responses[0] |
|
|
sample_responses = responses[1:] |
|
|
|
|
|
print("\n--- Generated Main Response ---") |
|
|
print(main_response) |
|
|
print("\n--- Generated Sample Responses ---") |
|
|
for i, r in enumerate(sample_responses): |
|
|
print(f"{i+1}. {r[:100]}...") |
|
|
|
|
|
checker = SelfCheckGPT() |
|
|
|
|
|
print("\n\n--- Running SelfCheckGPT with 'nli' method ---") |
|
|
nli_results = checker.check(main_response, sample_responses, method='nli') |
|
|
print("Higher scores suggest a higher probability of being a hallucination.") |
|
|
for result in nli_results: |
|
|
print(f"Score: {result['score']:.4f}\tSentence: {result['sentence']}") |
|
|
|
|
|
print("\n--- Running SelfCheckGPT with 'bertscore' method ---") |
|
|
bertscore_results = checker.check(main_response, sample_responses, method='bertscore') |
|
|
print("Higher scores suggest a higher probability of being a hallucination.") |
|
|
for result in bertscore_results: |
|
|
print(f"Score: {result['score']:.4f}\tSentence: {result['sentence']}") |