Spaces:

akhaliq
/

anycoder-355bd392

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 8 days ago

Commit

defc73b

verified ·

1 Parent(s): 8014508

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +252 -0
requirements.txt +38 -0

app.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import gradio as gr
+import torch
+import spaces
+import time
+from diffusers import HunyuanVideo15ImageToVideoPipeline
+from diffusers.utils import export_to_video, load_image
+from PIL import Image
+import os
+import tempfile
+# Model configuration
+dtype = torch.bfloat16
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+# Initialize pipeline (will be loaded when needed)
+pipe = None
+def load_model():
+    """Load the HunyuanVideo pipeline on demand"""
+    global pipe
+    if pipe is None:
+        pipe = HunyuanVideo15ImageToVideoPipeline.from_pretrained(
+            "hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_i2v_distilled",
+            torch_dtype=dtype
+        )
+        pipe.enable_model_cpu_offload()
+        pipe.vae.enable_tiling()
+    return pipe
+@spaces.GPU(duration=120)
+def generate_video(image, prompt, seed=1, num_frames=121, num_inference_steps=50, fps=24):
+    """
+    Generate video from image and prompt using HunyuanVideo-1.5
+    """
+    if image is None:
+        raise gr.Error("Please upload an image first!")
+    if not prompt.strip():
+        raise gr.Error("Please enter a prompt!")
+    try:
+        # Load model
+        pipe = load_model()
+        # Create generator with seed
+        generator = torch.Generator(device=device).manual_seed(seed)
+        # Load and process image
+        if isinstance(image, str):
+            input_image = load_image(image)
+        else:
+            input_image = image
+        # Generate video
+        with torch.inference_mode():
+            video_frames = pipe(
+                prompt=prompt,
+                image=input_image,
+                generator=generator,
+                num_frames=num_frames,
+                num_inference_steps=num_inference_steps,
+            ).frames[0]
+        # Create temporary file for output
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
+            output_path = tmp_file.name
+        # Export video
+        export_to_video(video_frames, output_path, fps=fps)
+        return output_path
+    except Exception as e:
+        raise gr.Error(f"Error generating video: {str(e)}")
+    finally:
+        # Cleanup GPU memory
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+def create_examples():
+    """Create example inputs for the app"""
+    example_image = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"
+    example_prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
+    return [
+        [example_image, example_prompt, 1, 121, 50, 24],
+        [example_image, "A majestic eagle soaring through mountain peaks at sunset", 42, 121, 50, 24],
+        [example_image, "Anime style, a girl with pink hair dancing in cherry blossom petals", 123, 121, 50, 24],
+    ]
+# Custom theme
+custom_theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="indigo",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Inter"),
+    text_size="lg",
+    spacing_size="lg",
+    radius_size="md"
+).set(
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_700",
+    block_title_text_weight="600",
+)
+with gr.Blocks() as demo:
+    # Header with "Built with anycoder" link
+    gr.HTML("""
+    <div style="text-align: center; margin-bottom: 20px;">
+        <h1>🎬 Image to Video Generator</h1>
+        <p style="color: #666;">Transform static images into dynamic videos using HunyuanVideo-1.5</p>
+        <p style="margin-top: 10px;">
+            <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #0066cc; text-decoration: none;">
+                Built with anycoder
+            </a>
+        </p>
+    </div>
+    """)
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1):
+            gr.Markdown("### 📸 Input Image")
+            input_image = gr.Image(
+                label="Upload Image",
+                type="pil",
+                height=300,
+                sources=["upload", "webcam", "clipboard"]
+            )
+            gr.Markdown("### ✍️ Prompt")
+            input_prompt = gr.Textbox(
+                label="Describe the video you want to generate",
+                placeholder="Describe the motion, style, and content...",
+                lines=4,
+                max_lines=6
+            )
+            with gr.Accordion("⚙️ Advanced Settings", open=False):
+                seed = gr.Number(
+                    label="Seed",
+                    value=1,
+                    minimum=0,
+                    maximum=999999,
+                    step=1,
+                    info="Random seed for reproducible results"
+                )
+                num_frames = gr.Slider(
+                    label="Number of Frames",
+                    minimum=49,
+                    maximum=121,
+                    value=121,
+                    step=1,
+                    info="Higher values = longer videos"
+                )
+                num_inference_steps = gr.Slider(
+                    label="Inference Steps",
+                    minimum=20,
+                    maximum=100,
+                    value=50,
+                    step=1,
+                    info="Higher values = better quality but slower"
+                )
+                fps = gr.Slider(
+                    label="FPS",
+                    minimum=12,
+                    maximum=30,
+                    value=24,
+                    step=1,
+                    info="Frames per second for output video"
+                )
+            generate_btn = gr.Button(
+                "🎬 Generate Video",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### 🎥 Generated Video")
+            output_video = gr.Video(
+                label="Output Video",
+                height=400,
+                autoplay=True,
+                show_download_button=True
+            )
+            # Status message
+            status = gr.Markdown("Ready to generate your video!", visible=True)
+    # Examples section
+    gr.Markdown("### 💡 Examples")
+    gr.Examples(
+        examples=create_examples(),
+        inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
+        outputs=output_video,
+        fn=generate_video,
+        cache_examples=False,
+        label="Try these examples"
+    )
+    # Instructions
+    with gr.Accordion("📖 How to Use", open=False):
+        gr.Markdown("""
+        1. **Upload an Image**: Choose any image as the starting frame
+        2. **Write a Prompt**: Describe the desired video content and motion
+        3. **Adjust Settings**: Optionally modify seed, frames, and quality settings
+        4. **Generate**: Click the button and wait for the magic to happen!
+        **Tips**:
+        - Use descriptive prompts with motion words (e.g., "flying", "dancing", "flowing")
+        - Higher inference steps improve quality but take longer
+        - The seed controls randomness - use the same seed for reproducible results
+        - For best results, use clear, high-quality input images
+        """)
+    # Event handler with loading states
+    def generate_with_loading(image, prompt, seed_val, frames, steps, fps_val):
+        status_msg = "🔄 Generating video... This may take a few minutes."
+        yield gr.update(), gr.update(), status_msg
+        try:
+            video_path = generate_video(image, prompt, seed_val, frames, steps, fps_val)
+            success_msg = "✅ Video generated successfully!"
+            yield video_path, gr.update(), success_msg
+        except Exception as e:
+            error_msg = f"❌ Error: {str(e)}"
+            yield gr.update(), gr.update(), error_msg
+    generate_btn.click(
+        fn=generate_with_loading,
+        inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
+        outputs=[output_video, generate_btn, status],
+        show_progress="full"
+    )
+# Launch with Gradio 6 syntax
+demo.launch(
+    theme=custom_theme,
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+    }
+    """,
+    footer_links=[
+        {"label": "HunyuanVideo Model", "url": "https://huggingface.co/tencent/HunyuanVideo-1.5"},
+        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
+    ]
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+git+https://github.com/huggingface/diffusers
+git+https://github.com/huggingface/transformers
+sentencepiece
+accelerate
+torch
+torchvision
+torchaudio
+tokenizers
+Pillow
+spaces
+gradio>=6.0
+numpy
+requests
+opencv-python
+imageio
+moviepy
+safetensors
+datasets
+xformers
+invisible-watermark
+omegaconf
+einops
+ftfy
+regex
+timm
+clip-by-openai
+transformers-stream-generator
+protobuf
+triton
+bitsandbytes
+peft
+diffusers-image-models
+controlnet-aux
+mediapipe
+scipy
+pillow-heif
+av
+ffmpeg-python