rahul7star commited on
Commit
334c887
·
verified ·
1 Parent(s): b6c66fe

Update app_allfile.py

Browse files
Files changed (1) hide show
  1. app_allfile.py +252 -1
app_allfile.py CHANGED
@@ -1 +1,252 @@
1
- import gradio as gr import numpy as np import random import torch import spaces from PIL import Image from diffusers import FlowMatchEulerDiscreteScheduler from optimization import optimize_pipeline_ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 import math import os import tempfile from huggingface_hub import hf_hub_download # --- Model & Repo --- HF_MODEL = os.environ.get("HF_UPLOAD_REPO", "rahul7star/qwen-edit-img-repo") dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" # --- Camera prompts --- BASE_PROMPTS = { "front": "Move the camera to a front-facing position showing the full character. Background is plain white.", "back": "Move the camera to a back-facing position showing the full character. Background is plain white.", "left": "Move the camera to a side (left) profile view. Background is plain white.", "right": "Move the camera to a side (right) profile view. Background is plain white.", "45_left": "Rotate camera 45° left", "45_right": "Rotate camera 45° right", "90_left": "Rotate camera 90° left", "90_right": "Rotate camera 90° right", "top_down": "Switch to top-down view", "low_angle": "Switch to low-angle view", "close_up": "Switch to close-up lens", "medium_close_up": "Switch to medium close-up lens", "zoom_out": "Switch to zoom out lens", } # --- Resolution presets --- RESOLUTIONS = { "1:4": (512, 2048), "1:3": (576, 1728), "nealy 9:16": (768, 1344), "nealy 2:3": (832, 1216), "3:4": (896, 1152), } MAX_SEED = np.iinfo(np.int32).max # --- CPU-only upload function --- def upload_image_and_prompt_cpu(input_image, prompt_text) -> str: from datetime import datetime import uuid, shutil from huggingface_hub import HfApi api = HfApi() print(prompt_text) today_str = datetime.now().strftime("%Y-%m-%d") unique_subfolder = f"Upload-Image-{uuid.uuid4().hex[:8]}" hf_folder = f"{today_str}/{unique_subfolder}" with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img: if isinstance(input_image, str): shutil.copy(input_image, tmp_img.name) else: input_image.save(tmp_img.name, format="PNG") tmp_img_path = tmp_img.name api.upload_file( path_or_fileobj=tmp_img_path, path_in_repo=f"{hf_folder}/input_image.png", repo_id=HF_MODEL, repo_type="model", token=os.environ.get("HUGGINGFACE_HUB_TOKEN") ) summary_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt").name with open(summary_file, "w", encoding="utf-8") as f: f.write(prompt_text) api.upload_file( path_or_fileobj=summary_file, path_in_repo=f"{hf_folder}/summary.txt", repo_id=HF_MODEL, repo_type="model", token=os.environ.get("HUGGINGFACE_HUB_TOKEN") ) os.remove(tmp_img_path) os.remove(summary_file) return hf_folder # --- Scheduler & model load --- scheduler_config = { "base_image_seq_len": 256, "base_shift": math.log(3), "invert_sigmas": False, "max_image_seq_len": 8192, "max_shift": math.log(3), "num_train_timesteps": 1000, "shift": 1.0, "shift_terminal": None, "stochastic_sampling": False, "time_shift_type": "exponential", "use_beta_sigmas": False, "use_dynamic_shifting": True, "use_exponential_sigmas": False, "use_karras_sigmas": False, } scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) pipe = QwenImageEditPlusPipeline.from_pretrained( "Qwen/Qwen-Image-Edit-2509", scheduler=scheduler, torch_dtype=dtype ).to(device) # Load LoRA weights pipe.load_lora_weights( "rahul7star/qwen-char-lora", weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors" ) pipe.fuse_lora(lora_scale=1.0) pipe.load_lora_weights( "rahul7star/qwen-char-lora", weight_name="qwen_lora/qwen-multiple-char.safetensors", ) pipe.fuse_lora(lora_scale=1.0) pipe.transformer.__class__ = QwenImageTransformer2DModel pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt") # --- Helpers --- def _append_prompt(base: str, extra: str) -> str: extra = (extra or "").strip() return (base if not extra else f"{base} {extra}").strip() def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale): generator = torch.Generator(device=device).manual_seed(seed) result = pipe( image=input_images if input_images else None, prompt=prompt, negative_prompt=" ", num_inference_steps=num_inference_steps, generator=generator, true_cfg_scale=true_guidance_scale, num_images_per_prompt=1, ).images try: upload_image_and_prompt_cpu(result[0], prompt) except Exception as e: print("Upload failed:", e) return result[0] def resize_to_preset(img: Image.Image, preset_key: str) -> Image.Image: w, h = RESOLUTIONS[preset_key] return img.resize((w, h), Image.LANCZOS) def concat_images_horizontally(images, bg_color=(255, 255, 255)): images = [img.convert("RGB") for img in images if img is not None] if not images: return None h = max(img.height for img in images) resized = [] for img in images: if img.height != h: w = int(img.width * (h / img.height)) img = img.resize((w, h), Image.LANCZOS) resized.append(img) w_total = sum(img.width for img in resized) canvas = Image.new("RGB", (w_total, h), bg_color) x = 0 for img in resized: canvas.paste(img, (x, 0)) x += img.width return canvas # --- Generate all camera angles dynamically --- @spaces.GPU() def generate_turnaround( image, extra_prompt="", preset_key="nealy 9:16", seed=42, randomize_seed=False, true_guidance_scale=1.0, num_inference_steps=4, progress=gr.Progress(track_tqdm=True), ): if randomize_seed: seed = random.randint(0, MAX_SEED) if image is None: return [None]*(len(BASE_PROMPTS)+1), seed, "❌ 入力画像をアップロードしてください" input_image = image.convert("RGB") if isinstance(image, Image.Image) else Image.open(image).convert("RGB") pil_images = [input_image] results = {} total = len(BASE_PROMPTS) for i, (key, base_prompt) in enumerate(BASE_PROMPTS.items(), start=1): progress(i/total, desc=f"{key} 生成中...") prompt_full = _append_prompt(base_prompt, extra_prompt) img = generate_single_view(pil_images, prompt_full, seed+i, num_inference_steps, true_guidance_scale) results[key] = resize_to_preset(img, preset_key) concat = concat_images_horizontally(list(results.values())) return [*results.values(), concat, seed, f"✅ {len(results)}視点の画像+連結画像を生成しました"] # --- UI --- css = """ #col-container {margin: 0 auto; max-width: 1400px;} .image-container img {object-fit: contain !important; max-width: 100%; max-height: 100%;} .notice {background: #fff5f5; border: 1px solid #fca5a5; color: #7f1d1d; padding: 12px 14px; border-radius: 10px; font-weight: 600; line-height: 1.5; margin-bottom: 10px;} """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): input_image = gr.Image(label="入力画像", type="pil", height=500) extra_prompt = gr.Textbox( label="追加プロンプト(各視点プロンプト末尾に追加)", placeholder="high detail, anime style, soft lighting, 4k", lines=2 ) preset_dropdown = gr.Dropdown( label="出力解像度プリセット", choices=list(RESOLUTIONS.keys()), value="nealy 9:16" ) run_button = gr.Button("🎨 生成開始", variant="primary") status_text = gr.Textbox(label="ステータス", interactive=False) # Dynamic outputs for all angles result_images = [] for key in BASE_PROMPTS.keys(): result_images.append(gr.Image(label=key.capitalize(), type="pil", format="png", height=400, show_download_button=True)) result_concat = gr.Image(label="連結画像(全視点)", type="pil", format="png", height=400, show_download_button=True) with gr.Accordion("⚙️ 詳細設定", open=False): seed_slider = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) randomize_seed_checkbox = gr.Checkbox(label="ランダムシード", value=True) guidance_scale_slider = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0) num_steps_slider = gr.Slider(label="生成ステップ数", minimum=1, maximum=40, step=1, value=4) run_button.click( fn=generate_turnaround, inputs=[input_image, extra_prompt, preset_dropdown, seed_slider, randomize_seed_checkbox, guidance_scale_slider, num_steps_slider], outputs=[*result_images, result_concat, seed_slider, status_text] ) if __name__ == "__main__": demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import spaces
6
+ from PIL import Image
7
+ from diffusers import FlowMatchEulerDiscreteScheduler
8
+ from optimization import optimize_pipeline_
9
+ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
10
+ from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
11
+ from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
12
+ import math
13
+ import os
14
+ import tempfile
15
+ from huggingface_hub import hf_hub_download
16
+
17
+ # --- Model & Repo ---
18
+ HF_MODEL = os.environ.get("HF_UPLOAD_REPO", "rahul7star/qwen-edit-img-repo")
19
+ dtype = torch.bfloat16
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+
22
+ # --- Camera prompts ---
23
+ BASE_PROMPTS = {
24
+ "front": "Move the camera to a front-facing position showing the full character. Background is plain white.",
25
+ "back": "Move the camera to a back-facing position showing the full character. Background is plain white.",
26
+ "left": "Move the camera to a side (left) profile view. Background is plain white.",
27
+ "right": "Move the camera to a side (right) profile view. Background is plain white.",
28
+ "45_left": "Rotate camera 45° left",
29
+ "45_right": "Rotate camera 45° right",
30
+ "90_left": "Rotate camera 90° left",
31
+ "90_right": "Rotate camera 90° right",
32
+ "top_down": "Switch to top-down view",
33
+ "low_angle": "Switch to low-angle view",
34
+ "close_up": "Switch to close-up lens",
35
+ "medium_close_up": "Switch to medium close-up lens",
36
+ "zoom_out": "Switch to zoom out lens",
37
+ }
38
+
39
+ # --- Resolution presets ---
40
+ RESOLUTIONS = {
41
+ "1:4": (512, 2048),
42
+ "1:3": (576, 1728),
43
+ "nealy 9:16": (768, 1344),
44
+ "nealy 2:3": (832, 1216),
45
+ "3:4": (896, 1152),
46
+ }
47
+
48
+ MAX_SEED = np.iinfo(np.int32).max
49
+
50
+ # --- CPU-only upload function ---
51
+ def upload_image_and_prompt_cpu(input_image, prompt_text) -> str:
52
+ from datetime import datetime
53
+ import uuid, shutil
54
+ from huggingface_hub import HfApi
55
+
56
+ api = HfApi()
57
+ print(prompt_text)
58
+ today_str = datetime.now().strftime("%Y-%m-%d")
59
+ unique_subfolder = f"Upload-Image-{uuid.uuid4().hex[:8]}"
60
+ hf_folder = f"{today_str}/{unique_subfolder}"
61
+
62
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
63
+ if isinstance(input_image, str):
64
+ shutil.copy(input_image, tmp_img.name)
65
+ else:
66
+ input_image.save(tmp_img.name, format="PNG")
67
+ tmp_img_path = tmp_img.name
68
+
69
+ api.upload_file(
70
+ path_or_fileobj=tmp_img_path,
71
+ path_in_repo=f"{hf_folder}/input_image.png",
72
+ repo_id=HF_MODEL,
73
+ repo_type="model",
74
+ token=os.environ.get("HUGGINGFACE_HUB_TOKEN")
75
+ )
76
+
77
+ summary_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt").name
78
+ with open(summary_file, "w", encoding="utf-8") as f:
79
+ f.write(prompt_text)
80
+
81
+ api.upload_file(
82
+ path_or_fileobj=summary_file,
83
+ path_in_repo=f"{hf_folder}/summary.txt",
84
+ repo_id=HF_MODEL,
85
+ repo_type="model",
86
+ token=os.environ.get("HUGGINGFACE_HUB_TOKEN")
87
+ )
88
+
89
+ os.remove(tmp_img_path)
90
+ os.remove(summary_file)
91
+ return hf_folder
92
+
93
+ # --- Scheduler & model load ---
94
+ scheduler_config = {
95
+ "base_image_seq_len": 256,
96
+ "base_shift": math.log(3),
97
+ "invert_sigmas": False,
98
+ "max_image_seq_len": 8192,
99
+ "max_shift": math.log(3),
100
+ "num_train_timesteps": 1000,
101
+ "shift": 1.0,
102
+ "shift_terminal": None,
103
+ "stochastic_sampling": False,
104
+ "time_shift_type": "exponential",
105
+ "use_beta_sigmas": False,
106
+ "use_dynamic_shifting": True,
107
+ "use_exponential_sigmas": False,
108
+ "use_karras_sigmas": False,
109
+ }
110
+ scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
111
+
112
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
113
+ "Qwen/Qwen-Image-Edit-2509",
114
+ scheduler=scheduler,
115
+ torch_dtype=dtype
116
+ ).to(device)
117
+
118
+ # Load LoRA weights
119
+ pipe.load_lora_weights(
120
+ "rahul7star/qwen-char-lora",
121
+ weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
122
+ )
123
+ pipe.fuse_lora(lora_scale=1.0)
124
+ pipe.load_lora_weights(
125
+ "rahul7star/qwen-char-lora",
126
+ weight_name="qwen_lora/qwen-multiple-char.safetensors",
127
+ )
128
+ pipe.fuse_lora(lora_scale=1.0)
129
+
130
+ pipe.transformer.__class__ = QwenImageTransformer2DModel
131
+ pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
132
+ optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
133
+
134
+ # --- Helpers ---
135
+ def _append_prompt(base: str, extra: str) -> str:
136
+ extra = (extra or "").strip()
137
+ return (base if not extra else f"{base} {extra}").strip()
138
+
139
+ def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
140
+ generator = torch.Generator(device=device).manual_seed(seed)
141
+ result = pipe(
142
+ image=input_images if input_images else None,
143
+ prompt=prompt,
144
+ negative_prompt=" ",
145
+ num_inference_steps=num_inference_steps,
146
+ generator=generator,
147
+ true_cfg_scale=true_guidance_scale,
148
+ num_images_per_prompt=1,
149
+ ).images
150
+ try:
151
+ upload_image_and_prompt_cpu(result[0], prompt)
152
+ except Exception as e:
153
+ print("Upload failed:", e)
154
+ return result[0]
155
+
156
+ def resize_to_preset(img: Image.Image, preset_key: str) -> Image.Image:
157
+ w, h = RESOLUTIONS[preset_key]
158
+ return img.resize((w, h), Image.LANCZOS)
159
+
160
+ def concat_images_horizontally(images, bg_color=(255, 255, 255)):
161
+ images = [img.convert("RGB") for img in images if img is not None]
162
+ if not images:
163
+ return None
164
+ h = max(img.height for img in images)
165
+ resized = []
166
+ for img in images:
167
+ if img.height != h:
168
+ w = int(img.width * (h / img.height))
169
+ img = img.resize((w, h), Image.LANCZOS)
170
+ resized.append(img)
171
+ w_total = sum(img.width for img in resized)
172
+ canvas = Image.new("RGB", (w_total, h), bg_color)
173
+ x = 0
174
+ for img in resized:
175
+ canvas.paste(img, (x, 0))
176
+ x += img.width
177
+ return canvas
178
+
179
+ # --- Generate all camera angles dynamically ---
180
+ @spaces.GPU()
181
+ def generate_turnaround(
182
+ image,
183
+ extra_prompt="",
184
+ preset_key="nealy 9:16",
185
+ seed=42,
186
+ randomize_seed=False,
187
+ true_guidance_scale=1.0,
188
+ num_inference_steps=4,
189
+ progress=gr.Progress(track_tqdm=True),
190
+ ):
191
+ if randomize_seed:
192
+ seed = random.randint(0, MAX_SEED)
193
+ if image is None:
194
+ return [None]*(len(BASE_PROMPTS)+1), seed, "❌ 入力画像をアップロードしてください"
195
+
196
+ input_image = image.convert("RGB") if isinstance(image, Image.Image) else Image.open(image).convert("RGB")
197
+ pil_images = [input_image]
198
+
199
+ results = {}
200
+ total = len(BASE_PROMPTS)
201
+ for i, (key, base_prompt) in enumerate(BASE_PROMPTS.items(), start=1):
202
+ progress(i/total, desc=f"{key} 生成中...")
203
+ prompt_full = _append_prompt(base_prompt, extra_prompt)
204
+ img = generate_single_view(pil_images, prompt_full, seed+i, num_inference_steps, true_guidance_scale)
205
+ results[key] = resize_to_preset(img, preset_key)
206
+
207
+ concat = concat_images_horizontally(list(results.values()))
208
+ return [*results.values(), concat, seed, f"✅ {len(results)}視点の画像+連結画像を生成しました"]
209
+
210
+ # --- UI ---
211
+ css = """
212
+ #col-container {margin: 0 auto; max-width: 1400px;}
213
+ .image-container img {object-fit: contain !important; max-width: 100%; max-height: 100%;}
214
+ .notice {background: #fff5f5; border: 1px solid #fca5a5; color: #7f1d1d; padding: 12px 14px; border-radius: 10px; font-weight: 600; line-height: 1.5; margin-bottom: 10px;}
215
+ """
216
+
217
+ with gr.Blocks(css=css) as demo:
218
+ with gr.Column(elem_id="col-container"):
219
+ input_image = gr.Image(label="入力画像", type="pil", height=500)
220
+ extra_prompt = gr.Textbox(
221
+ label="追加プロンプト(各視点プロンプト末尾に追加)",
222
+ placeholder="high detail, anime style, soft lighting, 4k",
223
+ lines=2
224
+ )
225
+ preset_dropdown = gr.Dropdown(
226
+ label="出力解像度プリセット",
227
+ choices=list(RESOLUTIONS.keys()),
228
+ value="nealy 9:16"
229
+ )
230
+ run_button = gr.Button("🎨 生成開始", variant="primary")
231
+ status_text = gr.Textbox(label="ステータス", interactive=False)
232
+
233
+ # Dynamic outputs for all angles
234
+ result_images = []
235
+ for key in BASE_PROMPTS.keys():
236
+ result_images.append(gr.Image(label=key.capitalize(), type="pil", format="png", height=400, show_download_button=True))
237
+ result_concat = gr.Image(label="連結画像(全視点)", type="pil", format="png", height=400, show_download_button=True)
238
+
239
+ with gr.Accordion("⚙️ 詳細設定", open=False):
240
+ seed_slider = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
241
+ randomize_seed_checkbox = gr.Checkbox(label="ランダムシード", value=True)
242
+ guidance_scale_slider = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
243
+ num_steps_slider = gr.Slider(label="生成ステップ数", minimum=1, maximum=40, step=1, value=4)
244
+
245
+ run_button.click(
246
+ fn=generate_turnaround,
247
+ inputs=[input_image, extra_prompt, preset_dropdown, seed_slider, randomize_seed_checkbox, guidance_scale_slider, num_steps_slider],
248
+ outputs=[*result_images, result_concat, seed_slider, status_text]
249
+ )
250
+
251
+ if __name__ == "__main__":
252
+ demo.launch()