import os import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ========================================================= # 1️⃣ Model Configuration (optimized for HF Spaces) # ========================================================= MODEL_ID = "Qwen/Qwen2.5-1.8B-Instruct" # Hugging Face Space-friendly environment tweaks os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache" os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" print(f"🔹 Loading model: {MODEL_ID}") # Smart device selection if torch.cuda.is_available(): device = "cuda" dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16 print("⚙️ Using GPU (CUDA).") else: device = "cpu" dtype = torch.float32 print("⚙️ Using CPU with memory-efficient loading.") # ========================================================= # 2️⃣ Load Model and Tokenizer # ========================================================= tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=dtype, device_map="auto" if device == "cuda" else {"": "cpu"}, low_cpu_mem_usage=True, offload_folder="./offload" if device == "cpu" else None, ) model.eval() # ========================================================= # 3️⃣ Inference Function # ========================================================= def enhance_prompt(user_prompt, temperature, max_tokens, chat_history): """Generate an enhanced version of the user prompt.""" if not user_prompt.strip(): return chat_history + [["", "⚠️ Please enter a prompt."]] messages = [ {"role": "system", "content": "Enhance and expand the following prompt with more detail, vivid context, and style."}, {"role": "user", "content": user_prompt}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", ).to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=int(max_tokens), temperature=float(temperature), top_p=0.9, do_sample=True, repetition_penalty=1.05, ) result = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True) chat_history = chat_history + [[user_prompt, result.strip()]] return chat_history # ========================================================= # 4️⃣ Gradio Interface # ========================================================= with gr.Blocks(title="Prompt Enhancer – Qwen 1.8B", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # ✨ Prompt Enhancer (Qwen2.5-1.8B-Instruct) Enhance and enrich your creative prompts using **Qwen 2.5 1.8B**, a lightweight model optimized for reasoning and descriptive text generation. --- """ ) with gr.Row(): chatbot = gr.Chatbot(height=400, label="Prompt Enhancer Chat") with gr.Column(scale=1): user_prompt = gr.Textbox( placeholder="Enter a prompt to enhance (e.g., 'A cat sitting on a chair').", label="Your Prompt", lines=3, ) temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature") max_tokens = gr.Slider(32, 512, value=128, step=16, label="Max Tokens") send_btn = gr.Button("🚀 Enhance Prompt", variant="primary") clear_btn = gr.Button("🧹 Clear Chat") send_btn.click(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot) user_prompt.submit(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot) clear_btn.click(lambda: [], None, chatbot) gr.Markdown( """ --- 💡 **Tips:** - Use short base prompts (e.g., *“a futuristic city skyline at sunset”*). - The model will expand and enhance them with extra creative context. - Works fully on CPU and is Space-friendly (<5 GB memory footprint). """ ) # ========================================================= # 5️⃣ Launch # ========================================================= if __name__ == "__main__": demo.launch(show_error=True, share=True)