File size: 6,791 Bytes

b53c3eb

import torch
import nltk
import numpy as np
import os
import kagglehub
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
from bert_score import score as bert_score_calculator

try:
    nltk.data.find('tokenizers/punkt')
except nltk.downloader.DownloadError:
    nltk.download('punkt')

class LLM_Generator:
    def __init__(self, model_handle, device='cuda'):
        self.device = device
        print(f"Downloading model from Kaggle Hub: {model_handle}")
        model_path = kagglehub.model_download(model_handle)
        print(f"Model downloaded to: {model_path}")
        
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype="auto",
            device_map="auto"
        )

    def generate(self, prompt, num_samples=1, temperature=0.7, max_new_tokens=150):
        messages = [
            {"role": "system", "content": "you are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=True 
        )
        model_inputs = self.tokenizer([text] * num_samples, return_tensors="pt").to(self.device)

        generated_ids_batch = self.model.generate(
            **model_inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            num_return_sequences=num_samples
        )
        
        input_ids_len = model_inputs.input_ids.shape[1]
        final_responses = []

        for generated_ids in generated_ids_batch:
            output_ids = generated_ids[input_ids_len:].tolist()
            
            try:
                # Find the start of the final content after the "thinking" part
                # The token ID 151668 corresponds to the end of the thinking block for Qwen-3
                index = len(output_ids) - output_ids[::-1].index(151668)
            except ValueError:
                index = 0
            
            content = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
            final_responses.append(content)
            
        return final_responses

class SelfCheckGPT:
    def __init__(self, device=None):
        if device:
            self.device = device
        else:
            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        self.nli_tokenizer = None
        self.nli_model = None

    def _load_nli_model(self):
        if self.nli_model is None:
            nli_model_name = "microsoft/deberta-v3-large-mnli"
            try:
                self.nli_tokenizer = AutoTokenizer.from_pretrained(nli_model_name)
                self.nli_model = AutoModelForSequenceClassification.from_pretrained(nli_model_name).to(self.device)
            except Exception as e:
                print(f"Error loading NLI model: {e}")
                raise

    def _check_bertscore(self, sentences, sample_responses):
        all_scores = []
        for sent in sentences:
            refs = [sent] * len(sample_responses)
            cands = sample_responses
            
            _, _, F1 = bert_score_calculator(
                cands, refs, lang="en", verbose=False, idf=False, device=self.device
            )
            
            avg_bert_score = F1.mean().item()
            score = 1.0 - avg_bert_score
            all_scores.append(score)
        return all_scores

    def _check_nli(self, sentences, sample_responses):
        self._load_nli_model()
        all_scores = []

        for sent in sentences:
            contradiction_probs = []
            for sample in sample_responses:
                tokenized_input = self.nli_tokenizer(
                    sample, sent, return_tensors="pt", truncation=True, max_length=512
                ).to(self.device)

                with torch.no_grad():
                    logits = self.nli_model(**tokenized_input).logits
                
                entailment_logit = logits[0, self.nli_model.config.label2id['entailment']]
                contradiction_logit = logits[0, self.nli_model.config.label2id['contradiction']]
                
                prob_contradiction = torch.exp(contradiction_logit) / (torch.exp(entailment_logit) + torch.exp(contradiction_logit))
                contradiction_probs.append(prob_contradiction.item())

            avg_contradiction_prob = np.mean(contradiction_probs)
            all_scores.append(avg_contradiction_prob)
            
        return all_scores

    def check(self, main_response, sample_responses, method='nli'):
        sentences = nltk.sent_tokenize(main_response)
        if not sentences:
            return []
        
        if method.lower() == 'bertscore':
            scores = self._check_bertscore(sentences, sample_responses)
        elif method.lower() == 'nli':
            scores = self._check_nli(sentences, sample_responses)
        else:
            raise ValueError(f"Invalid method '{method}'. Choose from 'bertscore', 'nli'.")

        results = [{"sentence": sent, "score": score} for sent, score in zip(sentences, scores)]
        return results

def main():
    model_handle = "qwen-lm/qwen-3/transformers/0.6b"

    print("Initializing LLM Generator...")
    generator = LLM_Generator(model_handle=model_handle)
    
    prompt = "Write a short biography of Neil Armstrong, the first man on the moon. Include the name of the spacecraft he used."
    print(f"Generating responses for prompt: '{prompt}'")
    
    responses = generator.generate(prompt, num_samples=6, temperature=0.8, max_new_tokens=150)
    main_response = responses[0]
    sample_responses = responses[1:]

    print("\n--- Generated Main Response ---")
    print(main_response)
    print("\n--- Generated Sample Responses ---")
    for i, r in enumerate(sample_responses):
        print(f"{i+1}. {r[:100]}...")

    checker = SelfCheckGPT()
    
    print("\n\n--- Running SelfCheckGPT with 'nli' method ---")
    nli_results = checker.check(main_response, sample_responses, method='nli')
    print("Higher scores suggest a higher probability of being a hallucination.")
    for result in nli_results:
        print(f"Score: {result['score']:.4f}\tSentence: {result['sentence']}")

    print("\n--- Running SelfCheckGPT with 'bertscore' method ---")
    bertscore_results = checker.check(main_response, sample_responses, method='bertscore')
    print("Higher scores suggest a higher probability of being a hallucination.")
    for result in bertscore_results:
        print(f"Score: {result['score']:.4f}\tSentence: {result['sentence']}")