Spaces:

akhaliq
/

anycoder-355bd392

Running on Zero

App Files Files Community

anycoder-355bd392 / app.py

akhaliq HF Staff

Update app.py

7246c55 verified 5 days ago

raw

history blame contribute delete

9.14 kB

	import gradio as gr
	import torch
	import spaces
	import time
	from diffusers import HunyuanVideo15ImageToVideoPipeline
	from diffusers.utils import export_to_video, load_image
	from PIL import Image
	import os
	import tempfile

	# Model configuration
	dtype = torch.bfloat16
	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	# Initialize pipeline (will be loaded when needed)
	pipe = None

	def load_model():
	"""Load the official HunyuanVideo pipeline on demand"""
	global pipe
	if pipe is None:
	pipe = HunyuanVideo15ImageToVideoPipeline.from_pretrained(
	"tencent/HunyuanVideo-1.5",
	torch_dtype=dtype,
	variant="480p_i2v_step_distilled"
	)
	pipe.enable_model_cpu_offload()
	pipe.vae.enable_tiling()
	return pipe

	@spaces.GPU(duration=120)
	def generate_video(image, prompt, seed=1, num_frames=121, num_inference_steps=50, fps=24):
	"""
	Generate video from image and prompt using official HunyuanVideo-1.5
	"""
	if image is None:
	raise gr.Error("Please upload an image first!")

	if not prompt.strip():
	raise gr.Error("Please enter a prompt!")

	try:
	# Load model
	pipe = load_model()

	# Create generator with seed
	generator = torch.Generator(device=device).manual_seed(seed)

	# Load and process image
	if isinstance(image, str):
	input_image = load_image(image)
	else:
	input_image = image

	# Generate video
	with torch.inference_mode():
	video_frames = pipe(
	prompt=prompt,
	image=input_image,
	generator=generator,
	num_frames=num_frames,
	num_inference_steps=num_inference_steps,
	).frames[0]

	# Create temporary file for output
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
	output_path = tmp_file.name

	# Export video
	export_to_video(video_frames, output_path, fps=fps)

	return output_path

	except Exception as e:
	raise gr.Error(f"Error generating video: {str(e)}")
	finally:
	# Cleanup GPU memory
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	def create_examples():
	"""Create example inputs for the app"""
	example_image = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"
	example_prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."

	return [
	[example_image, example_prompt, 1, 121, 50, 24],
	[example_image, "A majestic eagle soaring through mountain peaks at sunset", 42, 121, 50, 24],
	[example_image, "Anime style, a girl with pink hair dancing in cherry blossom petals", 123, 121, 50, 24],
	]

	# Custom theme
	custom_theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="lg",
	radius_size="md"
	).set(
	button_primary_background_fill="*primary_600",
	button_primary_background_fill_hover="*primary_700",
	block_title_text_weight="600",
	)

	with gr.Blocks() as demo:
	# Header with "Built with anycoder" link
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 20px;">
	<h1>🎬 Image to Video Generator</h1>
	<p style="color: #666;">Transform static images into dynamic videos using Official HunyuanVideo-1.5</p>
	<p style="margin-top: 10px;">
	<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #0066cc; text-decoration: none;">
	Built with anycoder
	</a>
	</p>
	</div>
	""")

	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	gr.Markdown("### 📸 Input Image")
	input_image = gr.Image(
	label="Upload Image",
	type="pil",
	height=300,
	sources=["upload", "webcam", "clipboard"]
	)

	gr.Markdown("### ✍️ Prompt")
	input_prompt = gr.Textbox(
	label="Describe the video you want to generate",
	placeholder="Describe the motion, style, and content...",
	lines=4,
	max_lines=6
	)

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	seed = gr.Number(
	label="Seed",
	value=1,
	minimum=0,
	maximum=999999,
	step=1,
	info="Random seed for reproducible results"
	)

	num_frames = gr.Slider(
	label="Number of Frames",
	minimum=49,
	maximum=121,
	value=121,
	step=1,
	info="Higher values = longer videos"
	)

	num_inference_steps = gr.Slider(
	label="Inference Steps",
	minimum=20,
	maximum=100,
	value=50,
	step=1,
	info="Higher values = better quality but slower"
	)

	fps = gr.Slider(
	label="FPS",
	minimum=12,
	maximum=30,
	value=24,
	step=1,
	info="Frames per second for output video"
	)

	generate_btn = gr.Button(
	"🎬 Generate Video",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	gr.Markdown("### 🎥 Generated Video")
	output_video = gr.Video(
	label="Output Video",
	height=400,
	autoplay=True,
	)

	# Status message
	status = gr.Markdown("Ready to generate your video!", visible=True)

	# Examples section
	gr.Markdown("### 💡 Examples")
	gr.Examples(
	examples=create_examples(),
	inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
	outputs=output_video,
	fn=generate_video,
	cache_examples=False,
	label="Try these examples"
	)

	# Instructions
	with gr.Accordion("📖 How to Use", open=False):
	gr.Markdown("""
	1. Upload an Image: Choose any image as the starting frame
	2. Write a Prompt: Describe the desired video content and motion
	3. Adjust Settings: Optionally modify seed, frames, and quality settings
	4. Generate: Click the button and wait for the magic to happen!

	Tips:
	- Use descriptive prompts with motion words (e.g., "flying", "dancing", "flowing")
	- Higher inference steps improve quality but take longer
	- The seed controls randomness - use the same seed for reproducible results
	- For best results, use clear, high-quality input images
	- This app uses the official Tencent HunyuanVideo-1.5 model
	""")

	# Event handler with loading states
	def generate_with_loading(image, prompt, seed_val, frames, steps, fps_val):
	status_msg = "🔄 Generating video... This may take a few minutes."
	yield gr.update(), gr.update(), status_msg

	try:
	video_path = generate_video(image, prompt, seed_val, frames, steps, fps_val)
	success_msg = "✅ Video generated successfully!"
	yield video_path, gr.update(), success_msg
	except Exception as e:
	error_msg = f"❌ Error: {str(e)}"
	yield gr.update(), gr.update(), error_msg

	generate_btn.click(
	fn=generate_with_loading,
	inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
	outputs=[output_video, generate_btn, status],
	show_progress="full"
	)

	# Launch with Gradio 6 syntax
	demo.launch(
	theme=custom_theme,
	css="""
	.gradio-container {
	max-width: 1200px !important;
	margin: auto !important;
	}
	""",
	footer_links=[
	{"label": "Official HunyuanVideo Model", "url": "https://huggingface.co/tencent/HunyuanVideo-1.5"},
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
	]
	)