# ---------------- Imports ----------------
import torch
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from transformers import BertTokenizer, BertModel
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import json
import io

# ---------------- Load models once ----------------
model_options = {
    "BERT Large Uncased": "bert-large-uncased",
    "BERT Large Cased": "bert-large-cased",
    "BERT Base Uncased": "bert-base-uncased",
    "BERT Base Cased": "bert-base-cased"
}

# Default model
current_model_name = "bert-large-uncased"

# Load ELMo (TF Hub)
elmo = hub.KerasLayer("https://tfhub.dev/google/elmo/3", trainable=False)

# Load BERT (HuggingFace Transformers) - will be reloaded when model changes
tokenizer = BertTokenizer.from_pretrained(current_model_name)
bert_model = BertModel.from_pretrained(current_model_name)
bert_model.eval()  # disable training mode

# Global variables to store embeddings as matrices
bert_embeddings_matrix = None
elmo_embeddings_matrix = None
sentences_storage = []
current_bert_model = None
current_tokenizer = None

def load_bert_model(model_name):
    """Load BERT model and tokenizer"""
    global current_bert_model, current_tokenizer
    try:
        current_tokenizer = BertTokenizer.from_pretrained(model_name)
        current_bert_model = BertModel.from_pretrained(model_name)
        current_bert_model.eval()
        return f"✅ Loaded {model_name}"
    except Exception as e:
        return f"❌ Error loading {model_name}: {str(e)}"

# Initialize with default model
load_bert_model(current_model_name)

# ---------------- Single sentence embedding function ----------------
def get_single_embedding(sentence):
    """Get BERT and ELMo embeddings for a single sentence"""
    global current_bert_model, current_tokenizer
    
    # ------------ BERT ------------ #
    input_bert = current_tokenizer([sentence], return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        bert_output = current_bert_model(**input_bert)  # [1, seq_len, hidden_size]
        token_embeddings = bert_output.last_hidden_state  # tensor: (1, seq_len, 1024 for large)
    
    attention_mask = input_bert['attention_mask'].unsqueeze(-1)  # (1, seq_len, 1)
    masked_embeddings = token_embeddings * attention_mask
    bert_embedding = masked_embeddings.sum(1) / attention_mask.sum(1)  # mean pooling → (1, hidden_size)
    bert_embedding = bert_embedding.squeeze(0).numpy()  # Remove batch dimension and convert to numpy
    
    # ------------ ELMo ------------ #
    input_elmo = tf.convert_to_tensor([sentence], dtype=tf.string)
    elmo_emb = elmo(input_elmo)  # Default output is sentence-level embedding
    
    # ELMo typically returns a dictionary with different outputs, get the default embedding
    if isinstance(elmo_emb, dict):
        elmo_embedding = elmo_emb['default']  # or try 'elmo' key
    else:
        elmo_embedding = elmo_emb
    
    elmo_embedding = elmo_embedding.numpy().squeeze()  # Convert to numpy and remove extra dimensions
    
    return bert_embedding, elmo_embedding

def change_bert_model(model_choice):
    """Change BERT model and clear existing embeddings"""
    global bert_embeddings_matrix, elmo_embeddings_matrix, sentences_storage
    
    model_name = model_options[model_choice]
    status = load_bert_model(model_name)
    
    # Clear existing embeddings since we changed the model
    bert_embeddings_matrix = None
    elmo_embeddings_matrix = None
    sentences_storage = []
    
    clear_status = "🔄 Model changed! Previous embeddings cleared. Please add sentences again."
    return status, clear_status, "📝 No sentences added yet. Please add at least 2 sentences."

# ---------------- Add sentence function ----------------
def add_sentence(sentence):
    """Add a sentence and compute its embeddings"""
    global bert_embeddings_matrix, elmo_embeddings_matrix, sentences_storage
    
    if not sentence.strip():
        return "Please enter a valid sentence.", get_current_status()
    
    sentence = sentence.strip()
    
    try:
        # Get embeddings for this sentence
        bert_emb, elmo_emb = get_single_embedding(sentence)
        
        # Add to matrices row by row
        if bert_embeddings_matrix is None:
            # First sentence - initialize matrices
            bert_embeddings_matrix = bert_emb.reshape(1, -1)  # Make it 2D [1, features]
            elmo_embeddings_matrix = elmo_emb.reshape(1, -1)  # Make it 2D [1, features]
        else:
            # Add as new row using vstack
            bert_embeddings_matrix = np.vstack([bert_embeddings_matrix, bert_emb.reshape(1, -1)])
            elmo_embeddings_matrix = np.vstack([elmo_embeddings_matrix, elmo_emb.reshape(1, -1)])
        
        # Store sentence
        sentences_storage.append(sentence)
        
        return f"✓ Added sentence {len(sentences_storage)}: '{sentence}'", get_current_status()
        
    except Exception as e:
        return f"❌ Error processing sentence: {str(e)}", get_current_status()

# ---------------- Get current status ----------------
def get_current_status():
    """Return current status of stored sentences"""
    if len(sentences_storage) == 0:
        return "📝 No sentences added yet. Please add at least 2 sentences."
    elif len(sentences_storage) == 1:
        return f"📝 Current sentences ({len(sentences_storage)}/2 minimum):\n1: {sentences_storage[0]}\n\n➕ Add at least 1 more sentence to compute similarity."
    else:
        status = f"📝 Current sentences ({len(sentences_storage)}):\n"
        for i, sent in enumerate(sentences_storage):
            status += f"{i+1}: {sent}\n"
        status += f"\n✅ Ready to compute similarity!"
        return status

# ---------------- Compute similarity ----------------
def compute_similarity():
    """Compute similarity matrices for stored embeddings"""
    global bert_embeddings_matrix, elmo_embeddings_matrix, sentences_storage
    
    if len(sentences_storage) < 2:
        return "⚠️ Please add at least 2 sentences before computing similarity."
    
    try:
        # Convert to torch tensors for torch.cosine_similarity
        bert_tensor = torch.tensor(bert_embeddings_matrix, dtype=torch.float32)
        elmo_tensor = torch.tensor(elmo_embeddings_matrix, dtype=torch.float32)
        
        # Compute pairwise cosine similarity using torch
        def torch_pairwise_cosine_similarity(X):
            # Normalize vectors
            X_norm = torch.nn.functional.normalize(X, p=2, dim=1)
            # Compute similarity matrix
            return torch.mm(X_norm, X_norm.t())
        
        bert_sim_torch = torch_pairwise_cosine_similarity(bert_tensor)
        elmo_sim_torch = torch_pairwise_cosine_similarity(elmo_tensor)
        
        # Convert back to numpy for display
        bert_sim = bert_sim_torch.numpy()
        elmo_sim = elmo_sim_torch.numpy()
        
        # Alternative: Use sklearn for comparison
        bert_sim_sklearn = cosine_similarity(bert_embeddings_matrix)
        elmo_sim_sklearn = cosine_similarity(elmo_embeddings_matrix)
        
        # Format output
        result = f"🔍 Similarity Analysis for {len(sentences_storage)} sentences:\n\n"
        
        result += "🤖 BERT Similarity Matrix (PyTorch):\n"
        result += f"{np.round(bert_sim, 3)}\n\n"
        
        result += "🧠 ELMo Similarity Matrix (PyTorch):\n"
        result += f"{np.round(elmo_sim, 3)}\n\n"
        
        # Show comparison with sklearn (optional)
        result += "📊 Comparison Check:\n"
        result += f"BERT torch vs sklearn max diff: {np.max(np.abs(bert_sim - bert_sim_sklearn)):.6f}\n"
        result += f"ELMo torch vs sklearn max diff: {np.max(np.abs(elmo_sim - elmo_sim_sklearn)):.6f}\n\n"
        
        result += "📄 Sentences Reference:\n"
        for i, sentence in enumerate(sentences_storage):
            result += f"{i+1}: {sentence}\n"
        
        # Add matrix shapes info
        result += f"\n📊 Matrix Details:\n"
        result += f"BERT embeddings shape: {bert_embeddings_matrix.shape}\n"
        result += f"ELMo embeddings shape: {elmo_embeddings_matrix.shape}\n"
        result += f"Similarity matrices shape: {bert_sim.shape}"
            
        return result
        
    except Exception as e:
        return f"❌ Error computing similarity: {str(e)}"


def clear_all():
    """Clear all stored sentences and embeddings"""
    global bert_embeddings_matrix, elmo_embeddings_matrix, sentences_storage
    bert_embeddings_matrix = None
    elmo_embeddings_matrix = None
    sentences_storage = []
    return "🗑️ All sentences cleared.", "📝 No sentences added yet. Please add at least 2 sentences."

# ---------------- Gradio Interface ----------------
with gr.Blocks(title="BERT + ELMo Sentence Similarity", theme=gr.themes.Soft()) as iface:
    gr.Markdown("# 🤖 BERT + ELMo Sentence Similarity Analyzer")
    gr.Markdown("Add sentences one by one (minimum 2) and compute pairwise similarity using BERT and ELMo embeddings.")
    
    # Model selection section
    with gr.Row():
        with gr.Column(scale=1):
            model_dropdown = gr.Dropdown(
                choices=list(model_options.keys()),
                value="BERT Large Uncased",
                label="🔧 Select BERT Model",
                info="Choose between cased/uncased and base/large variants"
            )
            model_status = gr.Textbox(
                label="📋 Model Status",
                value="✅ Loaded bert-large-uncased",
                lines=1,
                interactive=False
            )
    
    with gr.Row():
        with gr.Column(scale=2):
            sentence_input = gr.Textbox(
                label="Enter a sentence",
                placeholder="Type your sentence here... (e.g., 'I love machine learning')",
                lines=2
            )
            with gr.Row():
                add_btn = gr.Button("➕ Add Sentence", variant="primary", size="lg")
                compute_btn = gr.Button("🔍 Compute Similarity", variant="secondary", size="lg")
                clear_btn = gr.Button("🗑️ Clear All", variant="stop", size="lg")
        
        with gr.Column(scale=1):
            status_output = gr.Textbox(
                label="📋 Current Status",
                value="📝 No sentences added yet. Please add at least 2 sentences.",
                lines=8,
                interactive=False
            )
    
    with gr.Row():
        result_output = gr.Textbox(
            label="📊 Similarity Results",
            lines=20,
            interactive=False,
            show_copy_button=True
        )
    
    
    gr.Markdown("""
    ### 📖 How to use:
    1. **Choose Model**: Select your preferred BERT variant (uncased recommended for similarity)
    2. **Add sentences**: Type a sentence and click "Add Sentence"
    3. **Repeat**: Add at least 2 sentences (you can add more!)  
    4. **Compute**: Click "Compute Similarity" to see the results
    5. **Export**: Download embeddings and similarity matrices for further analysis
    6. **Interpret**: Values closer to 1.0 indicate higher similarity
    
    ### 🔬 Models:
    - **BERT Large Uncased**: Best for semantic similarity (recommended) - 1024 dimensions
    - **BERT Large Cased**: Preserves capitalization, good for proper nouns - 1024 dimensions  
    - **BERT Base Uncased**: Faster, smaller model - 768 dimensions
    - **BERT Base Cased**: Cased version of base model - 768 dimensions
    - **ELMo**: Contextual word representations using LSTM - 1024 dimensions
    """)
    
    # Event handlers
    model_dropdown.change(
        fn=change_bert_model,
        inputs=[model_dropdown],
        outputs=[model_status, result_output, status_output]
    )
    
    add_btn.click(
        fn=add_sentence,
        inputs=[sentence_input],
        outputs=[result_output, status_output]
    ).then(
        lambda: "",  # Clear input after adding
        outputs=[sentence_input]
    )
    
    compute_btn.click(
        fn=compute_similarity,
        outputs=[result_output]
    )
    
    clear_btn.click(
        fn=clear_all,
        outputs=[result_output, status_output]
    )
    

if __name__ == "__main__":
    iface.launch(share=True)