rahul7star commited on
Commit
77384a1
·
verified ·
1 Parent(s): 35c9e22

Update app_low.py

Browse files
Files changed (1) hide show
  1. app_low.py +55 -45
app_low.py CHANGED
@@ -1,51 +1,56 @@
1
  import os
2
  import torch
3
  import gradio as gr
4
- from huggingface_hub import snapshot_download
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
 
7
  # =========================================================
8
- # 1️⃣ Download model from HF (once)
9
  # =========================================================
10
- MODEL_ID = "rubricreward/mR3-Qwen3-14B-en-prompt-en-thinking"
11
- print(f"📦 Downloading or loading cached model: {MODEL_ID} ...")
12
- model_path = snapshot_download(repo_id=MODEL_ID)
13
- print(f"✅ Model path: {model_path}")
14
 
15
- # =========================================================
16
- # 2️⃣ Smart device setup (auto CPU/GPU/offload)
17
- # =========================================================
 
 
 
 
18
  if torch.cuda.is_available():
19
  device = "cuda"
20
  dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
21
- print("⚙️ Using CUDA for inference.")
22
  else:
23
  device = "cpu"
24
  dtype = torch.float32
25
- print("⚙️ Using CPU applying efficient offloading settings.")
26
 
27
  # =========================================================
28
- # 3️⃣ Load model and tokenizer with optimized config
29
  # =========================================================
30
- tokenizer = AutoTokenizer.from_pretrained(model_path)
31
 
32
  model = AutoModelForCausalLM.from_pretrained(
33
- model_path,
34
  torch_dtype=dtype,
35
  device_map="auto" if device == "cuda" else {"": "cpu"},
36
  low_cpu_mem_usage=True,
37
  offload_folder="./offload" if device == "cpu" else None,
38
  )
 
39
 
40
  # =========================================================
41
- # 4️⃣ Chat inference function
42
  # =========================================================
43
- def chat_with_model(user_input, chat_history):
44
- """Run chat-style inference."""
45
- if not user_input.strip():
46
- return chat_history + [["", "⚠️ Please enter a message."]]
 
 
 
 
 
47
 
48
- messages = [{"role": "user", "content": user_input}]
49
  inputs = tokenizer.apply_chat_template(
50
  messages,
51
  add_generation_prompt=True,
@@ -56,54 +61,59 @@ def chat_with_model(user_input, chat_history):
56
  with torch.no_grad():
57
  outputs = model.generate(
58
  **inputs,
59
- max_new_tokens=128,
60
- temperature=0.7,
61
  top_p=0.9,
62
  do_sample=True,
63
- repetition_penalty=1.1,
64
  )
65
 
66
- response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
67
- chat_history = chat_history + [[user_input, response.strip()]]
68
  return chat_history
69
 
70
  # =========================================================
71
- # 5️⃣ Gradio Chat UI
72
  # =========================================================
73
- with gr.Blocks(theme=gr.themes.Soft(), title="💬 Qwen3-14B Thinking Chat") as demo:
74
  gr.Markdown(
75
  """
76
- # 🧠 mR3-Qwen3-14B Prompt-Enhanced Chat
77
- A reasoning-ready English chat model from **RubricReward**, optimized for CPU/GPU with offloading.
 
78
  ---
79
  """
80
  )
81
 
82
- chatbot = gr.Chatbot(height=400, label="Chat with mR3-Qwen3-14B")
83
- user_input = gr.Textbox(placeholder="Type your message here...", label="Your Message", lines=2)
84
- send_button = gr.Button("🚀 Send", variant="primary")
85
-
86
- def clear_history():
87
- return []
88
-
89
- clear_btn = gr.Button("🧹 Clear Chat")
 
 
 
 
90
 
91
- send_button.click(chat_with_model, [user_input, chatbot], chatbot)
92
- user_input.submit(chat_with_model, [user_input, chatbot], chatbot)
93
- clear_btn.click(fn=clear_history, outputs=chatbot)
94
 
95
  gr.Markdown(
96
  """
97
  ---
98
  💡 **Tips:**
99
- - Ask it to explain, reason, or summarize complex ideas.
100
- - Try: *“Explain quantum computing in simple terms.”*
101
- - Try: *“Give me 3 creative ways to promote a science exhibition.”*
102
  """
103
  )
104
 
105
  # =========================================================
106
- # 6️⃣ Launch App
107
  # =========================================================
108
  if __name__ == "__main__":
109
- demo.launch()
 
1
  import os
2
  import torch
3
  import gradio as gr
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
  # =========================================================
7
+ # 1️⃣ Model Configuration (optimized for HF Spaces)
8
  # =========================================================
9
+ MODEL_ID = "Qwen/Qwen2.5-1.8B-Instruct"
 
 
 
10
 
11
+ # Hugging Face Space-friendly environment tweaks
12
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
13
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
14
+
15
+ print(f"🔹 Loading model: {MODEL_ID}")
16
+
17
+ # Smart device selection
18
  if torch.cuda.is_available():
19
  device = "cuda"
20
  dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
21
+ print("⚙️ Using GPU (CUDA).")
22
  else:
23
  device = "cpu"
24
  dtype = torch.float32
25
+ print("⚙️ Using CPU with memory-efficient loading.")
26
 
27
  # =========================================================
28
+ # 2️⃣ Load Model and Tokenizer
29
  # =========================================================
30
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
31
 
32
  model = AutoModelForCausalLM.from_pretrained(
33
+ MODEL_ID,
34
  torch_dtype=dtype,
35
  device_map="auto" if device == "cuda" else {"": "cpu"},
36
  low_cpu_mem_usage=True,
37
  offload_folder="./offload" if device == "cpu" else None,
38
  )
39
+ model.eval()
40
 
41
  # =========================================================
42
+ # 3️⃣ Inference Function
43
  # =========================================================
44
+ def enhance_prompt(user_prompt, temperature, max_tokens, chat_history):
45
+ """Generate an enhanced version of the user prompt."""
46
+ if not user_prompt.strip():
47
+ return chat_history + [["", "⚠️ Please enter a prompt."]]
48
+
49
+ messages = [
50
+ {"role": "system", "content": "Enhance and expand the following prompt with more detail, vivid context, and style."},
51
+ {"role": "user", "content": user_prompt},
52
+ ]
53
 
 
54
  inputs = tokenizer.apply_chat_template(
55
  messages,
56
  add_generation_prompt=True,
 
61
  with torch.no_grad():
62
  outputs = model.generate(
63
  **inputs,
64
+ max_new_tokens=int(max_tokens),
65
+ temperature=float(temperature),
66
  top_p=0.9,
67
  do_sample=True,
68
+ repetition_penalty=1.05,
69
  )
70
 
71
+ result = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
72
+ chat_history = chat_history + [[user_prompt, result.strip()]]
73
  return chat_history
74
 
75
  # =========================================================
76
+ # 4️⃣ Gradio Interface
77
  # =========================================================
78
+ with gr.Blocks(title="Prompt Enhancer – Qwen 1.8B", theme=gr.themes.Soft()) as demo:
79
  gr.Markdown(
80
  """
81
+ # Prompt Enhancer (Qwen2.5-1.8B-Instruct)
82
+ Enhance and enrich your creative prompts using **Qwen 2.5 1.8B**,
83
+ a lightweight model optimized for reasoning and descriptive text generation.
84
  ---
85
  """
86
  )
87
 
88
+ with gr.Row():
89
+ chatbot = gr.Chatbot(height=400, label="Prompt Enhancer Chat")
90
+ with gr.Column(scale=1):
91
+ user_prompt = gr.Textbox(
92
+ placeholder="Enter a prompt to enhance (e.g., 'A cat sitting on a chair').",
93
+ label="Your Prompt",
94
+ lines=3,
95
+ )
96
+ temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature")
97
+ max_tokens = gr.Slider(32, 512, value=128, step=16, label="Max Tokens")
98
+ send_btn = gr.Button("🚀 Enhance Prompt", variant="primary")
99
+ clear_btn = gr.Button("🧹 Clear Chat")
100
 
101
+ send_btn.click(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot)
102
+ user_prompt.submit(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot)
103
+ clear_btn.click(lambda: [], None, chatbot)
104
 
105
  gr.Markdown(
106
  """
107
  ---
108
  💡 **Tips:**
109
+ - Use short base prompts (e.g., *“a futuristic city skyline at sunset”*).
110
+ - The model will expand and enhance them with extra creative context.
111
+ - Works fully on CPU and is Space-friendly (<5 GB memory footprint).
112
  """
113
  )
114
 
115
  # =========================================================
116
+ # 5️⃣ Launch
117
  # =========================================================
118
  if __name__ == "__main__":
119
+ demo.launch(show_error=True, share=True)