akhaliq HF Staff commited on
Commit
defc73b
Β·
verified Β·
1 Parent(s): 8014508

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +252 -0
  2. requirements.txt +38 -0
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import spaces
4
+ import time
5
+ from diffusers import HunyuanVideo15ImageToVideoPipeline
6
+ from diffusers.utils import export_to_video, load_image
7
+ from PIL import Image
8
+ import os
9
+ import tempfile
10
+
11
+ # Model configuration
12
+ dtype = torch.bfloat16
13
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
14
+
15
+ # Initialize pipeline (will be loaded when needed)
16
+ pipe = None
17
+
18
+ def load_model():
19
+ """Load the HunyuanVideo pipeline on demand"""
20
+ global pipe
21
+ if pipe is None:
22
+ pipe = HunyuanVideo15ImageToVideoPipeline.from_pretrained(
23
+ "hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_i2v_distilled",
24
+ torch_dtype=dtype
25
+ )
26
+ pipe.enable_model_cpu_offload()
27
+ pipe.vae.enable_tiling()
28
+ return pipe
29
+
30
+ @spaces.GPU(duration=120)
31
+ def generate_video(image, prompt, seed=1, num_frames=121, num_inference_steps=50, fps=24):
32
+ """
33
+ Generate video from image and prompt using HunyuanVideo-1.5
34
+ """
35
+ if image is None:
36
+ raise gr.Error("Please upload an image first!")
37
+
38
+ if not prompt.strip():
39
+ raise gr.Error("Please enter a prompt!")
40
+
41
+ try:
42
+ # Load model
43
+ pipe = load_model()
44
+
45
+ # Create generator with seed
46
+ generator = torch.Generator(device=device).manual_seed(seed)
47
+
48
+ # Load and process image
49
+ if isinstance(image, str):
50
+ input_image = load_image(image)
51
+ else:
52
+ input_image = image
53
+
54
+ # Generate video
55
+ with torch.inference_mode():
56
+ video_frames = pipe(
57
+ prompt=prompt,
58
+ image=input_image,
59
+ generator=generator,
60
+ num_frames=num_frames,
61
+ num_inference_steps=num_inference_steps,
62
+ ).frames[0]
63
+
64
+ # Create temporary file for output
65
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
66
+ output_path = tmp_file.name
67
+
68
+ # Export video
69
+ export_to_video(video_frames, output_path, fps=fps)
70
+
71
+ return output_path
72
+
73
+ except Exception as e:
74
+ raise gr.Error(f"Error generating video: {str(e)}")
75
+ finally:
76
+ # Cleanup GPU memory
77
+ if torch.cuda.is_available():
78
+ torch.cuda.empty_cache()
79
+
80
+ def create_examples():
81
+ """Create example inputs for the app"""
82
+ example_image = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"
83
+ example_prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
84
+
85
+ return [
86
+ [example_image, example_prompt, 1, 121, 50, 24],
87
+ [example_image, "A majestic eagle soaring through mountain peaks at sunset", 42, 121, 50, 24],
88
+ [example_image, "Anime style, a girl with pink hair dancing in cherry blossom petals", 123, 121, 50, 24],
89
+ ]
90
+
91
+ # Custom theme
92
+ custom_theme = gr.themes.Soft(
93
+ primary_hue="blue",
94
+ secondary_hue="indigo",
95
+ neutral_hue="slate",
96
+ font=gr.themes.GoogleFont("Inter"),
97
+ text_size="lg",
98
+ spacing_size="lg",
99
+ radius_size="md"
100
+ ).set(
101
+ button_primary_background_fill="*primary_600",
102
+ button_primary_background_fill_hover="*primary_700",
103
+ block_title_text_weight="600",
104
+ )
105
+
106
+ with gr.Blocks() as demo:
107
+ # Header with "Built with anycoder" link
108
+ gr.HTML("""
109
+ <div style="text-align: center; margin-bottom: 20px;">
110
+ <h1>🎬 Image to Video Generator</h1>
111
+ <p style="color: #666;">Transform static images into dynamic videos using HunyuanVideo-1.5</p>
112
+ <p style="margin-top: 10px;">
113
+ <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #0066cc; text-decoration: none;">
114
+ Built with anycoder
115
+ </a>
116
+ </p>
117
+ </div>
118
+ """)
119
+
120
+ with gr.Row(equal_height=True):
121
+ with gr.Column(scale=1):
122
+ gr.Markdown("### πŸ“Έ Input Image")
123
+ input_image = gr.Image(
124
+ label="Upload Image",
125
+ type="pil",
126
+ height=300,
127
+ sources=["upload", "webcam", "clipboard"]
128
+ )
129
+
130
+ gr.Markdown("### ✍️ Prompt")
131
+ input_prompt = gr.Textbox(
132
+ label="Describe the video you want to generate",
133
+ placeholder="Describe the motion, style, and content...",
134
+ lines=4,
135
+ max_lines=6
136
+ )
137
+
138
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
139
+ seed = gr.Number(
140
+ label="Seed",
141
+ value=1,
142
+ minimum=0,
143
+ maximum=999999,
144
+ step=1,
145
+ info="Random seed for reproducible results"
146
+ )
147
+
148
+ num_frames = gr.Slider(
149
+ label="Number of Frames",
150
+ minimum=49,
151
+ maximum=121,
152
+ value=121,
153
+ step=1,
154
+ info="Higher values = longer videos"
155
+ )
156
+
157
+ num_inference_steps = gr.Slider(
158
+ label="Inference Steps",
159
+ minimum=20,
160
+ maximum=100,
161
+ value=50,
162
+ step=1,
163
+ info="Higher values = better quality but slower"
164
+ )
165
+
166
+ fps = gr.Slider(
167
+ label="FPS",
168
+ minimum=12,
169
+ maximum=30,
170
+ value=24,
171
+ step=1,
172
+ info="Frames per second for output video"
173
+ )
174
+
175
+ generate_btn = gr.Button(
176
+ "🎬 Generate Video",
177
+ variant="primary",
178
+ size="lg"
179
+ )
180
+
181
+ with gr.Column(scale=1):
182
+ gr.Markdown("### πŸŽ₯ Generated Video")
183
+ output_video = gr.Video(
184
+ label="Output Video",
185
+ height=400,
186
+ autoplay=True,
187
+ show_download_button=True
188
+ )
189
+
190
+ # Status message
191
+ status = gr.Markdown("Ready to generate your video!", visible=True)
192
+
193
+ # Examples section
194
+ gr.Markdown("### πŸ’‘ Examples")
195
+ gr.Examples(
196
+ examples=create_examples(),
197
+ inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
198
+ outputs=output_video,
199
+ fn=generate_video,
200
+ cache_examples=False,
201
+ label="Try these examples"
202
+ )
203
+
204
+ # Instructions
205
+ with gr.Accordion("πŸ“– How to Use", open=False):
206
+ gr.Markdown("""
207
+ 1. **Upload an Image**: Choose any image as the starting frame
208
+ 2. **Write a Prompt**: Describe the desired video content and motion
209
+ 3. **Adjust Settings**: Optionally modify seed, frames, and quality settings
210
+ 4. **Generate**: Click the button and wait for the magic to happen!
211
+
212
+ **Tips**:
213
+ - Use descriptive prompts with motion words (e.g., "flying", "dancing", "flowing")
214
+ - Higher inference steps improve quality but take longer
215
+ - The seed controls randomness - use the same seed for reproducible results
216
+ - For best results, use clear, high-quality input images
217
+ """)
218
+
219
+ # Event handler with loading states
220
+ def generate_with_loading(image, prompt, seed_val, frames, steps, fps_val):
221
+ status_msg = "πŸ”„ Generating video... This may take a few minutes."
222
+ yield gr.update(), gr.update(), status_msg
223
+
224
+ try:
225
+ video_path = generate_video(image, prompt, seed_val, frames, steps, fps_val)
226
+ success_msg = "βœ… Video generated successfully!"
227
+ yield video_path, gr.update(), success_msg
228
+ except Exception as e:
229
+ error_msg = f"❌ Error: {str(e)}"
230
+ yield gr.update(), gr.update(), error_msg
231
+
232
+ generate_btn.click(
233
+ fn=generate_with_loading,
234
+ inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
235
+ outputs=[output_video, generate_btn, status],
236
+ show_progress="full"
237
+ )
238
+
239
+ # Launch with Gradio 6 syntax
240
+ demo.launch(
241
+ theme=custom_theme,
242
+ css="""
243
+ .gradio-container {
244
+ max-width: 1200px !important;
245
+ margin: auto !important;
246
+ }
247
+ """,
248
+ footer_links=[
249
+ {"label": "HunyuanVideo Model", "url": "https://huggingface.co/tencent/HunyuanVideo-1.5"},
250
+ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
251
+ ]
252
+ )
requirements.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/diffusers
2
+ git+https://github.com/huggingface/transformers
3
+ sentencepiece
4
+ accelerate
5
+ torch
6
+ torchvision
7
+ torchaudio
8
+ tokenizers
9
+ Pillow
10
+ spaces
11
+ gradio>=6.0
12
+ numpy
13
+ requests
14
+ opencv-python
15
+ imageio
16
+ moviepy
17
+ safetensors
18
+ datasets
19
+ xformers
20
+ invisible-watermark
21
+ omegaconf
22
+ einops
23
+ ftfy
24
+ regex
25
+ timm
26
+ clip-by-openai
27
+ transformers-stream-generator
28
+ protobuf
29
+ triton
30
+ bitsandbytes
31
+ peft
32
+ diffusers-image-models
33
+ controlnet-aux
34
+ mediapipe
35
+ scipy
36
+ pillow-heif
37
+ av
38
+ ffmpeg-python