rahul7star commited on
Commit
ebe1e32
·
verified ·
1 Parent(s): 18ddea6

Update app_low.py

Browse files
Files changed (1) hide show
  1. app_low.py +27 -32
app_low.py CHANGED
@@ -4,31 +4,30 @@ import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
  # =========================================================
7
- # 1️⃣ Model Configuration (optimized for HF Spaces)
8
  # =========================================================
9
  MODEL_ID = "Qwen/Qwen2.5-1.5B"
10
 
11
- # Hugging Face Space-friendly environment tweaks
12
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
13
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
14
 
15
  print(f"🔹 Loading model: {MODEL_ID}")
16
 
17
- # Smart device selection
18
  if torch.cuda.is_available():
19
  device = "cuda"
20
  dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
21
- print("⚙️ Using GPU (CUDA).")
22
  else:
23
  device = "cpu"
24
  dtype = torch.float32
25
- print("⚙️ Using CPU with memory-efficient loading.")
26
 
27
  # =========================================================
28
- # 2️⃣ Load Model and Tokenizer
29
  # =========================================================
30
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
31
-
32
  model = AutoModelForCausalLM.from_pretrained(
33
  MODEL_ID,
34
  torch_dtype=dtype,
@@ -41,16 +40,12 @@ model.eval()
41
  # =========================================================
42
  # 3️⃣ Inference Function
43
  # =========================================================
44
- def enhance_prompt(user_prompt, temperature, max_tokens, chat_history):
45
- """Generate an enhanced version of the user prompt."""
46
- if not user_prompt.strip():
47
- return chat_history + [["", "⚠️ Please enter a prompt."]]
48
-
49
- messages = [
50
- {"role": "system", "content": "Enhance and expand the following prompt with more detail, vivid context, and style."},
51
- {"role": "user", "content": user_prompt},
52
- ]
53
 
 
54
  inputs = tokenizer.apply_chat_template(
55
  messages,
56
  add_generation_prompt=True,
@@ -69,51 +64,51 @@ def enhance_prompt(user_prompt, temperature, max_tokens, chat_history):
69
  )
70
 
71
  result = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
72
- chat_history = chat_history + [[user_prompt, result.strip()]]
73
  return chat_history
74
 
75
  # =========================================================
76
  # 4️⃣ Gradio Interface
77
  # =========================================================
78
- with gr.Blocks(title="Prompt Enhancer – Qwen 1.8B", theme=gr.themes.Soft()) as demo:
79
  gr.Markdown(
80
  """
81
- # Prompt Enhancer (Qwen2.5-1.8B-Instruct)
82
- Enhance and enrich your creative prompts using **Qwen 2.5 1.8B**,
83
- a lightweight model optimized for reasoning and descriptive text generation.
84
  ---
85
  """
86
  )
87
 
88
  with gr.Row():
89
- chatbot = gr.Chatbot(height=400, label="Prompt Enhancer Chat")
90
  with gr.Column(scale=1):
91
- user_prompt = gr.Textbox(
92
- placeholder="Enter a prompt to enhance (e.g., 'A cat sitting on a chair').",
93
- label="Your Prompt",
94
  lines=3,
95
  )
96
  temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature")
97
  max_tokens = gr.Slider(32, 512, value=128, step=16, label="Max Tokens")
98
- send_btn = gr.Button("🚀 Enhance Prompt", variant="primary")
99
  clear_btn = gr.Button("🧹 Clear Chat")
100
 
101
- send_btn.click(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot)
102
- user_prompt.submit(enhance_prompt, [user_prompt, temperature, max_tokens, chatbot], chatbot)
103
  clear_btn.click(lambda: [], None, chatbot)
104
 
105
  gr.Markdown(
106
  """
107
  ---
108
  💡 **Tips:**
109
- - Use short base prompts (e.g., *“a futuristic city skyline at sunset”*).
110
- - The model will expand and enhance them with extra creative context.
111
- - Works fully on CPU and is Space-friendly (<5 GB memory footprint).
112
  """
113
  )
114
 
115
  # =========================================================
116
- # 5️⃣ Launch
117
  # =========================================================
118
  if __name__ == "__main__":
119
  demo.launch(show_error=True, share=True)
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
  # =========================================================
7
+ # 1️⃣ Configuration
8
  # =========================================================
9
  MODEL_ID = "Qwen/Qwen2.5-1.5B"
10
 
11
+ # Space-friendly settings
12
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
13
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
14
 
15
  print(f"🔹 Loading model: {MODEL_ID}")
16
 
17
+ # Device setup
18
  if torch.cuda.is_available():
19
  device = "cuda"
20
  dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
21
+ print("⚙️ Using GPU for inference.")
22
  else:
23
  device = "cpu"
24
  dtype = torch.float32
25
+ print("⚙️ Using CPU (with offload folder).")
26
 
27
  # =========================================================
28
+ # 2️⃣ Load Model + Tokenizer (streaming from HF Hub)
29
  # =========================================================
30
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
31
  model = AutoModelForCausalLM.from_pretrained(
32
  MODEL_ID,
33
  torch_dtype=dtype,
 
40
  # =========================================================
41
  # 3️⃣ Inference Function
42
  # =========================================================
43
+ def chat_with_qwen(user_input, temperature, max_tokens, chat_history):
44
+ """Chat or enhance text using Qwen2.5-1.5B."""
45
+ if not user_input.strip():
46
+ return chat_history + [["", "⚠️ Please enter some text."]]
 
 
 
 
 
47
 
48
+ messages = [{"role": "user", "content": user_input}]
49
  inputs = tokenizer.apply_chat_template(
50
  messages,
51
  add_generation_prompt=True,
 
64
  )
65
 
66
  result = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
67
+ chat_history = chat_history + [[user_input, result.strip()]]
68
  return chat_history
69
 
70
  # =========================================================
71
  # 4️⃣ Gradio Interface
72
  # =========================================================
73
+ with gr.Blocks(title="Qwen 2.5 1.5B Chat", theme=gr.themes.Soft()) as demo:
74
  gr.Markdown(
75
  """
76
+ # 🧠 Qwen 2.5 1.5B Chat / Prompt Enhancer
77
+ A lightweight reasoning-capable chat model that works fully on CPU or GPU.
78
+ Optimized for Hugging Face Spaces with offloading and streaming model load.
79
  ---
80
  """
81
  )
82
 
83
  with gr.Row():
84
+ chatbot = gr.Chatbot(height=420, label="Qwen 2.5 Chat")
85
  with gr.Column(scale=1):
86
+ user_input = gr.Textbox(
87
+ placeholder="Type your question or prompt here...",
88
+ label="Your Message",
89
  lines=3,
90
  )
91
  temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature")
92
  max_tokens = gr.Slider(32, 512, value=128, step=16, label="Max Tokens")
93
+ send_btn = gr.Button("🚀 Generate", variant="primary")
94
  clear_btn = gr.Button("🧹 Clear Chat")
95
 
96
+ send_btn.click(chat_with_qwen, [user_input, temperature, max_tokens, chatbot], chatbot)
97
+ user_input.submit(chat_with_qwen, [user_input, temperature, max_tokens, chatbot], chatbot)
98
  clear_btn.click(lambda: [], None, chatbot)
99
 
100
  gr.Markdown(
101
  """
102
  ---
103
  💡 **Tips:**
104
+ - Works with both creative and factual queries.
105
+ - Try: *“Describe a futuristic city skyline at dawn.”*
106
+ - Small enough to run smoothly on CPU (under 5 GB memory).
107
  """
108
  )
109
 
110
  # =========================================================
111
+ # 5️⃣ Launch App
112
  # =========================================================
113
  if __name__ == "__main__":
114
  demo.launch(show_error=True, share=True)