Spaces:
Running
on
Zero
Running
on
Zero
怀羽
commited on
Commit
·
423e6fa
1
Parent(s):
a67e7e4
update zeroGPU
Browse files- app.py +67 -108
- requirements.txt +4 -1
app.py
CHANGED
|
@@ -1,78 +1,57 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
| 4 |
-
import sys
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
# --------------------------------------------------------------------------
|
| 8 |
-
# 1.
|
| 9 |
# --------------------------------------------------------------------------
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
# model_id = "/mnt/workspace/wanghao/model_saved/Marco-MT-WMT"
|
| 13 |
model_id = "AIDC-AI/Marco-MT-Algharb"
|
| 14 |
-
# 将模型目录添加到 Python 路径 (修复 Qwen3ForCausalLM 导入问题)
|
| 15 |
-
if os.path.isdir(model_id):
|
| 16 |
-
sys.path.insert(0, model_id)
|
| 17 |
-
print(f"已将模型目录添加到 sys.path: {model_id}")
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
model = None
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
try:
|
| 24 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 25 |
model_id,
|
| 26 |
trust_remote_code=True
|
| 27 |
)
|
| 28 |
print("Tokenizer 加载成功!")
|
| 29 |
-
except Exception as e:
|
| 30 |
-
print(f"Tokenizer 加载失败: {e}")
|
| 31 |
-
|
| 32 |
-
if tokenizer:
|
| 33 |
-
print(f"正在加载模型: {model_id}...")
|
| 34 |
-
try:
|
| 35 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 36 |
-
model_id,
|
| 37 |
-
trust_remote_code=True
|
| 38 |
-
).to(device).eval()
|
| 39 |
-
|
| 40 |
-
print("模型加载成功!")
|
| 41 |
-
except Exception as e:
|
| 42 |
-
print(f"模型加载失败: {e}")
|
| 43 |
-
model = None
|
| 44 |
-
else:
|
| 45 |
-
print("因 Tokenizer 加载失败,跳过模型加载。")
|
| 46 |
-
model = None
|
| 47 |
|
| 48 |
-
# ---
|
| 49 |
-
|
| 50 |
-
#
|
|
|
|
| 51 |
im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
|
| 52 |
-
|
| 53 |
-
# 2. 获取 <|endoftext|> 的 ID (通常是 151643)
|
| 54 |
eot_id = tokenizer.eos_token_id
|
| 55 |
-
|
| 56 |
print(f"设置停止 IDs: <|im_end|_id={im_end_id}, <|endoftext|_id={eot_id}")
|
| 57 |
-
|
| 58 |
-
# 3. 创建 GenerationConfig
|
| 59 |
generation_config = GenerationConfig(
|
| 60 |
do_sample=False,
|
| 61 |
max_new_tokens=512,
|
| 62 |
-
|
| 63 |
-
# 关键(1): 告诉 generate() 遇到 *这两个* token 中的任何一个都要停止
|
| 64 |
eos_token_id=[im_end_id, eot_id],
|
| 65 |
-
|
| 66 |
-
# 关键(2): 告诉 generate() 在批处理(batching)时使用哪个 token 进行填充
|
| 67 |
-
# (我们使用 <|endoftext|>)
|
| 68 |
pad_token_id=eot_id
|
| 69 |
)
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
)
|
| 76 |
|
| 77 |
# 语言代码到全名的映射 (保持不变)
|
| 78 |
source_lang_name_map = {
|
|
@@ -95,15 +74,40 @@ target_lang_name_map = {
|
|
| 95 |
"sr_latin": "serbian",
|
| 96 |
"de": "german",
|
| 97 |
}
|
|
|
|
| 98 |
# --------------------------------------------------------------------------
|
| 99 |
# 2. 定义核心翻译函数 (修改版)
|
| 100 |
# --------------------------------------------------------------------------
|
| 101 |
def translate(source_text, source_lang_code, target_lang_code):
|
| 102 |
"""
|
| 103 |
-
接收用户输入并返回翻译结果
|
|
|
|
| 104 |
"""
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# 简单的输入验证
|
| 109 |
if not source_text or not source_text.strip():
|
|
@@ -112,41 +116,27 @@ def translate(source_text, source_lang_code, target_lang_code):
|
|
| 112 |
source_language_name = source_lang_name_map.get(source_lang_code, "the source language")
|
| 113 |
target_language_name = target_lang_name_map.get(target_lang_code, "the target language")
|
| 114 |
|
| 115 |
-
# 构建与 vLLM 版本相同的提示
|
| 116 |
prompt = (
|
| 117 |
f"Human: Please translate the following text into {target_language_name}: \n"
|
| 118 |
f"{source_text}<|im_end|>\n"
|
| 119 |
f"Assistant:"
|
| 120 |
)
|
| 121 |
-
|
| 122 |
-
print(prompt)
|
| 123 |
-
print("--------------")
|
| 124 |
-
|
| 125 |
try:
|
| 126 |
-
|
| 127 |
-
# CausalLM 需要将 "Human: ... Assistant:" 整个作为输入
|
| 128 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 129 |
-
|
| 130 |
-
# 2. 将输入张量移动到模型所在的设备
|
| 131 |
-
# (当使用 device_map="auto" 时, model.device 指向第一个设备)
|
| 132 |
-
inputs = inputs.to(model.device)
|
| 133 |
|
| 134 |
-
|
| 135 |
-
with torch.no_grad(): # 推理时不需要计算梯度
|
| 136 |
outputs = model.generate(
|
| 137 |
**inputs,
|
| 138 |
generation_config=generation_config
|
| 139 |
)
|
| 140 |
|
| 141 |
-
# 4. 解码 (Decode)
|
| 142 |
-
# outputs[0] 包含了 "input_ids + generated_ids"
|
| 143 |
-
# 我们需要从 "input_ids" 之后开始解码
|
| 144 |
input_length = inputs.input_ids.shape[1]
|
| 145 |
generated_ids = outputs[0][input_length:]
|
| 146 |
generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
|
| 147 |
|
| 148 |
return generated_text
|
| 149 |
-
|
| 150 |
except Exception as e:
|
| 151 |
print(f"翻译过程中出错: {e}")
|
| 152 |
return f"翻译时发生错误: {e}"
|
|
@@ -157,57 +147,26 @@ def translate(source_text, source_lang_code, target_lang_code):
|
|
| 157 |
|
| 158 |
# <--- 定义自定义 CSS 样式 --->
|
| 159 |
css = """
|
| 160 |
-
/*
|
| 161 |
-
.gradio-container {
|
| 162 |
-
/* 新的背景: 一个更清晰、更现代的浅灰蓝色渐变 */
|
| 163 |
-
background: linear-gradient(135deg, #F5F7FA 0%, #E8EBEE 100%);
|
| 164 |
-
padding: 20px !important;
|
| 165 |
-
|
| 166 |
-
/* 新增: 设置全局字体为"正常"的系统默认字体 */
|
| 167 |
-
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
|
| 168 |
-
}
|
| 169 |
-
|
| 170 |
-
/* --- 2. 标题 (保持不变, 它将继承上面的新字体) --- */
|
| 171 |
-
.app-title {
|
| 172 |
-
font-size: 32px;
|
| 173 |
-
font-weight: 600;
|
| 174 |
-
text-align: center;
|
| 175 |
-
color: #333333;
|
| 176 |
-
margin-bottom: 5px;
|
| 177 |
-
padding-top: 10px;
|
| 178 |
-
}
|
| 179 |
-
.app-subtitle {
|
| 180 |
-
text-align: center;
|
| 181 |
-
font-size: 18px;
|
| 182 |
-
color: #555555;
|
| 183 |
-
margin-bottom: 20px;
|
| 184 |
-
}
|
| 185 |
-
|
| 186 |
-
/* --- 3. "悬浮卡片" 效果 (保持不变, 新背景将使其更突出) --- */
|
| 187 |
-
.gradio-group {
|
| 188 |
-
border-radius: 20px !important;
|
| 189 |
-
box-shadow: 0 10px 30px rgba(0,0,0,0.07) !important;
|
| 190 |
-
border: 0 !important;
|
| 191 |
-
background: white;
|
| 192 |
-
}
|
| 193 |
-
|
| 194 |
-
/* --- 4. 增大文本框 (保持不变) --- */
|
| 195 |
.gradio-textbox {
|
| 196 |
min-height: 300px !important;
|
| 197 |
}
|
| 198 |
"""
|
| 199 |
|
| 200 |
-
# <--- 修复:
|
| 201 |
source_lang_choices = [(name.capitalize(), code) for code, name in source_lang_name_map.items()]
|
| 202 |
target_lang_choices = [(name.capitalize(), code) for code, name in target_lang_name_map.items()]
|
| 203 |
|
| 204 |
|
| 205 |
-
# <---
|
| 206 |
with gr.Blocks(
|
| 207 |
-
theme=gr.themes.Soft(primary_hue="amber", secondary_hue="amber"),
|
| 208 |
css=css,
|
| 209 |
) as demo:
|
| 210 |
|
|
|
|
|
|
|
|
|
|
| 211 |
# --- 标题 ---
|
| 212 |
gr.HTML(f"""
|
| 213 |
<div class='app-title'>Marco-MT-Algharb</div>
|
|
@@ -264,7 +223,7 @@ with gr.Blocks(
|
|
| 264 |
inputs=[source_text_tb, source_lang_dd, target_lang_dd]
|
| 265 |
)
|
| 266 |
|
| 267 |
-
# ---
|
| 268 |
gr.HTML(f"""
|
| 269 |
<div style="color: #444; font-size: 16px; margin-top: 30px; padding: 20px 25px; background-color: #FFFFFF; border-radius: 15px; max-width: 900px; margin-left: auto; margin-right: auto; box-shadow: 0 4px 20px rgba(0,0,0,0.05);">
|
| 270 |
|
|
|
|
| 1 |
+
# app.py (为 Hugging Face ZeroGPU 修改)
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
| 4 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
|
|
|
| 5 |
import os
|
| 6 |
+
import sys
|
| 7 |
|
| 8 |
# --------------------------------------------------------------------------
|
| 9 |
+
# 1. 配置 (在应用启动时执行)
|
| 10 |
# --------------------------------------------------------------------------
|
| 11 |
|
| 12 |
+
# !! 重要的模型 ID (从 HF Hub 加载)
|
|
|
|
| 13 |
model_id = "AIDC-AI/Marco-MT-Algharb"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
# --- ZeroGPU 修改 1:
|
| 16 |
+
# 在启动时 *只* 定义全局变量为 None
|
| 17 |
+
# 大模型将在第一个请求到来时被加载
|
| 18 |
+
# ---
|
| 19 |
model = None
|
| 20 |
+
tokenizer = None
|
| 21 |
+
generation_config = None
|
| 22 |
+
|
| 23 |
+
print("ZeroGPU 启动脚本开始...")
|
| 24 |
+
print(f"准备从 {model_id} 加载 Tokenizer...")
|
| 25 |
+
|
| 26 |
+
# Tokenizer 很小, 可以在启动时加载
|
| 27 |
+
# ★★★ 提醒: 这仍然需要你已在 Space settings 中设置 HF_TOKEN 密钥 ★★★
|
| 28 |
try:
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 30 |
model_id,
|
| 31 |
trust_remote_code=True
|
| 32 |
)
|
| 33 |
print("Tokenizer 加载成功!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
# --- ZeroGPU 修改 2:
|
| 36 |
+
# Tokenizer 加载成功后, *立即* 定义 GenerationConfig
|
| 37 |
+
# (这解决了你之前关于 Qwen3 停止 token 的问题)
|
| 38 |
+
# ---
|
| 39 |
im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
|
|
|
|
|
|
|
| 40 |
eot_id = tokenizer.eos_token_id
|
| 41 |
+
|
| 42 |
print(f"设置停止 IDs: <|im_end|_id={im_end_id}, <|endoftext|_id={eot_id}")
|
| 43 |
+
|
|
|
|
| 44 |
generation_config = GenerationConfig(
|
| 45 |
do_sample=False,
|
| 46 |
max_new_tokens=512,
|
|
|
|
|
|
|
| 47 |
eos_token_id=[im_end_id, eot_id],
|
|
|
|
|
|
|
|
|
|
| 48 |
pad_token_id=eot_id
|
| 49 |
)
|
| 50 |
+
print("GenerationConfig 配置成功。")
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Tokenizer 加载失败: {e}")
|
| 54 |
+
print("!! 严重错误: 如果这是 Gated Repo 问题, 请确保 HF_TOKEN 密钥已设置并重启 Space。")
|
|
|
|
| 55 |
|
| 56 |
# 语言代码到全名的映射 (保持不变)
|
| 57 |
source_lang_name_map = {
|
|
|
|
| 74 |
"sr_latin": "serbian",
|
| 75 |
"de": "german",
|
| 76 |
}
|
| 77 |
+
|
| 78 |
# --------------------------------------------------------------------------
|
| 79 |
# 2. 定义核心翻译函数 (修改版)
|
| 80 |
# --------------------------------------------------------------------------
|
| 81 |
def translate(source_text, source_lang_code, target_lang_code):
|
| 82 |
"""
|
| 83 |
+
接收用户输入并返回翻译结果
|
| 84 |
+
(ZeroGPU: 在首次调用时加载模型)
|
| 85 |
"""
|
| 86 |
+
global model # ★★★ 关键: 引用全局 'model' 变量
|
| 87 |
+
|
| 88 |
+
# --- ZeroGPU 修改 3: 首次调用时加载模型 ---
|
| 89 |
+
if model is None:
|
| 90 |
+
if tokenizer is None:
|
| 91 |
+
return "错误:Tokenizer 未能成功加载,无法继续。请检查启动日志。"
|
| 92 |
+
|
| 93 |
+
print("--- 首次请求 ---")
|
| 94 |
+
print("检测到模型未加载。正在加载模型到 ZeroGPU (Nvidia H200)...")
|
| 95 |
+
try:
|
| 96 |
+
# 这一步会触发 ZeroGPU 分配 H200
|
| 97 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 98 |
+
model_id,
|
| 99 |
+
torch_dtype="auto",
|
| 100 |
+
device_map="auto", # 'auto' 将会检测到 H200
|
| 101 |
+
trust_remote_code=True
|
| 102 |
+
)
|
| 103 |
+
model.eval()
|
| 104 |
+
print("模型已成功加载到 GPU!")
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"在首次加载时模型失败: {e}")
|
| 107 |
+
return f"错误:模型在加载到 GPU 时失败: {e}"
|
| 108 |
+
# -----------------------------------------
|
| 109 |
+
|
| 110 |
+
# (从这里开始, 代码与之前版本相同)
|
| 111 |
|
| 112 |
# 简单的输入验证
|
| 113 |
if not source_text or not source_text.strip():
|
|
|
|
| 116 |
source_language_name = source_lang_name_map.get(source_lang_code, "the source language")
|
| 117 |
target_language_name = target_lang_name_map.get(target_lang_code, "the target language")
|
| 118 |
|
|
|
|
| 119 |
prompt = (
|
| 120 |
f"Human: Please translate the following text into {target_language_name}: \n"
|
| 121 |
f"{source_text}<|im_end|>\n"
|
| 122 |
f"Assistant:"
|
| 123 |
)
|
| 124 |
+
|
|
|
|
|
|
|
|
|
|
| 125 |
try:
|
| 126 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
+
with torch.no_grad():
|
|
|
|
| 129 |
outputs = model.generate(
|
| 130 |
**inputs,
|
| 131 |
generation_config=generation_config
|
| 132 |
)
|
| 133 |
|
|
|
|
|
|
|
|
|
|
| 134 |
input_length = inputs.input_ids.shape[1]
|
| 135 |
generated_ids = outputs[0][input_length:]
|
| 136 |
generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
|
| 137 |
|
| 138 |
return generated_text
|
| 139 |
+
|
| 140 |
except Exception as e:
|
| 141 |
print(f"翻译过程中出错: {e}")
|
| 142 |
return f"翻译时发生错误: {e}"
|
|
|
|
| 147 |
|
| 148 |
# <--- 定义自定义 CSS 样式 --->
|
| 149 |
css = """
|
| 150 |
+
/* ... 你的所有 CSS 样式 ... */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
.gradio-textbox {
|
| 152 |
min-height: 300px !important;
|
| 153 |
}
|
| 154 |
"""
|
| 155 |
|
| 156 |
+
# <--- 修复: choices 定义 ---
|
| 157 |
source_lang_choices = [(name.capitalize(), code) for code, name in source_lang_name_map.items()]
|
| 158 |
target_lang_choices = [(name.capitalize(), code) for code, name in target_lang_name_map.items()]
|
| 159 |
|
| 160 |
|
| 161 |
+
# <--- 使用 gr.Blocks 并保持主题 --->
|
| 162 |
with gr.Blocks(
|
| 163 |
+
theme=gr.themes.Soft(primary_hue="amber", secondary_hue="amber"),
|
| 164 |
css=css,
|
| 165 |
) as demo:
|
| 166 |
|
| 167 |
+
# ... (你所有的 Gradio 布局代码, gr.HTML, gr.Row, gr.Group 等...)
|
| 168 |
+
# ... (这部分不需要任何修改)
|
| 169 |
+
|
| 170 |
# --- 标题 ---
|
| 171 |
gr.HTML(f"""
|
| 172 |
<div class='app-title'>Marco-MT-Algharb</div>
|
|
|
|
| 223 |
inputs=[source_text_tb, source_lang_dd, target_lang_dd]
|
| 224 |
)
|
| 225 |
|
| 226 |
+
# --- 支持的语向卡片 ---
|
| 227 |
gr.HTML(f"""
|
| 228 |
<div style="color: #444; font-size: 16px; margin-top: 30px; padding: 20px 25px; background-color: #FFFFFF; border-radius: 15px; max-width: 900px; margin-left: auto; margin-right: auto; box-shadow: 0 4px 20px rgba(0,0,0,0.05);">
|
| 229 |
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
Transformers==4.55.0
|
| 2 |
gradio==5.49.1
|
| 3 |
-
tomli
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
Transformers==4.55.0
|
| 2 |
gradio==5.49.1
|
| 3 |
+
tomli
|
| 4 |
+
accelerate
|
| 5 |
+
bitsandbytes
|
| 6 |
+
sentencepiece
|