Spaces:

Xuanyou
/

Spirit_Animals_Example

Runtime error

App Files Files Community

Spirit_Animals_Example / app.py

Xuanyou

Update app.py

4a9f291 verified about 1 year ago

raw

history blame contribute delete

28.7 kB

	import gradio as gr
	import torch
	import cv2
	import numpy as np
	import mediapipe as mp
	import matplotlib.pyplot as plt
	from PIL import Image
	from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, StableDiffusionControlNetInpaintPipeline
	from transformers import AutoTokenizer
	import base64
	import requests
	import json
	from rembg import remove
	from scipy import ndimage
	from moviepy.editor import ImageSequenceClip
	from tqdm import tqdm
	import os
	import shutil
	import time
	from huggingface_hub import snapshot_download
	import subprocess
	import sys


	def download_liveportrait():
	"""
	Clone the LivePortrait repository and prepare its dependencies.
	"""
	liveportrait_path = "./LivePortrait"
	try:
	if not os.path.exists(liveportrait_path):
	print("Cloning LivePortrait repository...")
	os.system(f"git clone https://github.com/KwaiVGI/LivePortrait.git {liveportrait_path}")

	os.chdir(liveportrait_path)
	print("Installing LivePortrait dependencies...")
	os.system("pip install -r requirements.txt")

	dependency_path = "src/utils/dependencies/XPose/models/UniPose/ops"
	os.chdir(dependency_path)
	print("Building MultiScaleDeformableAttention...")
	os.system("python setup.py build")
	os.system("python setup.py install")

	module_path = os.path.abspath(dependency_path)
	if module_path not in sys.path:
	sys.path.append(module_path)

	os.chdir("../../../../../../../")
	print("LivePortrait setup completed")
	except Exception as e:
	print("Failed to initialize LivePortrait:", e)
	raise


	download_liveportrait()


	def download_huggingface_resources():
	"""
	Download additional necessary resources from Hugging Face using the CLI.
	"""
	try:
	local_dir = "./pretrained_weights"
	os.makedirs(local_dir, exist_ok=True)

	# Use the Hugging Face CLI for downloading
	cmd = [
	"huggingface-cli", "download",
	"KwaiVGI/LivePortrait",
	"--local-dir", local_dir,
	"--exclude", ".git", "README.md", "docs"
	]
	print("Executing command:", " ".join(cmd))
	subprocess.run(cmd, check=True)

	print("Resources successfully downloaded to:", local_dir)
	except subprocess.CalledProcessError as e:
	print("Error during Hugging Face CLI download:", e)
	raise
	except Exception as e:
	print("General error in downloading resources:", e)
	raise


	download_huggingface_resources()


	def get_project_root():
	"""Get the root directory of the current project."""
	return os.path.abspath(os.path.dirname(__file__))


	# Ensure working directory is project root
	os.chdir(get_project_root())


	# Initialize the necessary models and components
	mp_pose = mp.solutions.pose
	mp_drawing = mp.solutions.drawing_utils


	# Load ControlNet model
	controlnet = ControlNetModel.from_pretrained('lllyasviel/sd-controlnet-openpose', torch_dtype=torch.float16)


	# Load Stable Diffusion model with ControlNet
	pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
	'runwayml/stable-diffusion-v1-5',
	controlnet=controlnet,
	torch_dtype=torch.float16
	)


	# Load Inpaint Controlnet
	pipe_inpaint_controlnet = StableDiffusionControlNetInpaintPipeline.from_pretrained(
	"runwayml/stable-diffusion-inpainting",
	controlnet=controlnet,
	torch_dtype=torch.float16
	)


	# Move to GPU if available
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	pipe_controlnet.to(device)
	pipe_controlnet.enable_attention_slicing()
	pipe_inpaint_controlnet.to(device)
	pipe_inpaint_controlnet.enable_attention_slicing()


	def resize_to_multiple_of_64(width, height):
	return (width // 64) * 64, (height // 64) * 64


	def expand_mask(mask, kernel_size):
	mask_array = np.array(mask)
	structuring_element = np.ones((kernel_size, kernel_size), dtype=np.uint8)
	expanded_mask_array = ndimage.binary_dilation(
	mask_array, structure=structuring_element
	).astype(np.uint8) * 255
	return Image.fromarray(expanded_mask_array)


	def crop_face_to_square(image_rgb, padding_ratio=0.2, height_multiplier=1.2):
	"""
	Detect the face and crop a rectangular region that includes more of the body below the face.
	Instead of centering around the face, we start near the face region and extend downward.
	"""
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
	gray_image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
	faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

	if len(faces) == 0:
	print("No face detected.")
	return None

	x, y, w, h = faces[0]
	face_x_center = x + w // 2
	face_y_top = y
	face_side_length = max(w, h)
	padded_side_length = int(face_side_length * (1 + padding_ratio))

	cropped_width = padded_side_length
	cropped_height = int(padded_side_length * height_multiplier)

	top_left_x = max(face_x_center - cropped_width // 2, 0)

	top_margin = int(padded_side_length * 0.1)
	top_left_y = max(face_y_top - top_margin, 0)
	bottom_right_x = min(top_left_x + cropped_width, image_rgb.shape[1])
	bottom_right_y = min(top_left_y + cropped_height, image_rgb.shape[0])

	cropped_image = image_rgb[top_left_y:bottom_right_y, top_left_x:bottom_right_x]

	return cropped_image


	def spirit_animal_baseline(image_path, num_images = 4):

	image = cv2.imread(image_path)
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	image_rgb = crop_face_to_square(image_rgb)

	original_height, original_width, _ = image_rgb.shape
	aspect_ratio = original_width / original_height

	if aspect_ratio > 1:
	gen_width = 768
	gen_height = int(gen_width / aspect_ratio)
	else:
	gen_height = 768
	gen_width = int(gen_height * aspect_ratio)

	gen_width, gen_height = resize_to_multiple_of_64(gen_width, gen_height)

	with mp_pose.Pose(static_image_mode=True) as pose:
	results = pose.process(image_rgb)

	if results.pose_landmarks:
	annotated_image = image_rgb.copy()
	mp_drawing.draw_landmarks(
	annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS
	)
	else:
	print("No pose detected.")
	return "No pose detected.", []

	pose_image = np.zeros_like(image_rgb)
	for connection in mp_pose.POSE_CONNECTIONS:
	start_idx, end_idx = connection
	start, end = results.pose_landmarks.landmark[start_idx], results.pose_landmarks.landmark[end_idx]
	if start.visibility > 0.5 and end.visibility > 0.5:
	x1, y1 = int(start.x * pose_image.shape[1]), int(start.y * pose_image.shape[0])
	x2, y2 = int(end.x * pose_image.shape[1]), int(end.y * pose_image.shape[0])
	cv2.line(pose_image, (x1, y1), (x2, y2), (255, 255, 255), 2)

	pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))

	base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
	api_key = os.getenv("GPT_KEY")
	headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
	payload = {
	"model": "gpt-4o-mini",
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Based on the provided image, think of one spirit animal that is right for the person, and answer in the following format: An ultra-realistic, highly detailed photograph of a single {animal} with facial features characterized by {description}, standing upright in a human-like pose, looking directly at the camera, against a solid, neutral background. Generate one sentence without any other responses or numbering."},
	{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
	]
	}
	],
	"max_tokens": 100
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
	prompt = response.json()['choices'][0]['message']['content'] if 'choices' in response.json() else "A majestic animal"

	num_images = num_images
	generated_images = []
	with torch.no_grad():
	with torch.autocast(device_type=device.type):
	for _ in range(num_images):
	images = pipe_controlnet(
	prompt=prompt,
	negative_prompt=(
	"multiple heads, two heads, double head, triple head, extra limbs, extra arms, extra legs, "
	"duplicate faces, multiple faces, mutated anatomy, deformed, disfigured, malformed, "
	"extra ears, fused ears, blurred, low resolution, cartoonish, watermark, text, logo, "
	"poorly drawn, distorted, floating limbs, out-of-frame"
	),
	num_inference_steps=20,
	image=pose_pil,
	guidance_scale=5,
	width=gen_width,
	height=gen_height,
	).images
	generated_images.append(images[0])

	return prompt, generated_images


	def spirit_animal_with_background(image_path, num_images = 4):

	image = cv2.imread(image_path)
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	# image_rgb = crop_face_to_square(image_rgb)

	original_height, original_width, _ = image_rgb.shape
	aspect_ratio = original_width / original_height

	if aspect_ratio > 1:
	gen_width = 768
	gen_height = int(gen_width / aspect_ratio)
	else:
	gen_height = 768
	gen_width = int(gen_height * aspect_ratio)

	gen_width, gen_height = resize_to_multiple_of_64(gen_width, gen_height)

	with mp_pose.Pose(static_image_mode=True) as pose:
	results = pose.process(image_rgb)

	if results.pose_landmarks:
	annotated_image = image_rgb.copy()
	mp_drawing.draw_landmarks(
	annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS
	)
	else:
	print("No pose detected.")
	return "No pose detected.", []

	pose_image = np.zeros_like(image_rgb)
	for connection in mp_pose.POSE_CONNECTIONS:
	start_idx, end_idx = connection
	start, end = results.pose_landmarks.landmark[start_idx], results.pose_landmarks.landmark[end_idx]
	if start.visibility > 0.5 and end.visibility > 0.5:
	x1, y1 = int(start.x * pose_image.shape[1]), int(start.y * pose_image.shape[0])
	x2, y2 = int(end.x * pose_image.shape[1]), int(end.y * pose_image.shape[0])
	cv2.line(pose_image, (x1, y1), (x2, y2), (255, 255, 255), 2)

	pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))

	base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
	api_key = os.getenv("GPT_KEY")
	headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
	payload = {
	"model": "gpt-4o-mini",
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Based on the provided image, think of one spirit animal that is right for the person, and answer in the following format: An ultra-realistic, highly detailed photograph of a single {animal} with facial features characterized by {description}, standing upright in a human-like pose, looking directly at the camera, against a solid, neutral background. Generate one sentence without any other responses or numbering."},
	{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
	]
	}
	],
	"max_tokens": 100
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
	prompt = response.json()['choices'][0]['message']['content'] if 'choices' in response.json() else "A majestic animal"

	mask_image = remove(Image.fromarray(image_rgb))
	initial_mask = mask_image.split()[-1].convert('L')

	kernel_size = min(gen_width, gen_height) // 15
	expanded_mask = expand_mask(initial_mask, kernel_size)

	num_images = num_images
	generated_images = []
	with torch.no_grad():
	with torch.autocast(device_type=device.type):
	for _ in range(num_images):
	images = pipe_inpaint_controlnet(
	prompt=prompt,
	negative_prompt=(
	"multiple heads, two heads, double head, triple head, extra limbs, extra arms, extra legs, "
	"duplicate faces, multiple faces, mutated anatomy, deformed, disfigured, malformed, "
	"extra ears, fused ears, blurred, low resolution, cartoonish, watermark, text, logo, "
	"poorly drawn, distorted, floating limbs, out-of-frame"
	),
	num_inference_steps=20,
	image=Image.fromarray(image_rgb),
	mask_image=expanded_mask,
	control_image=pose_pil,
	width=gen_width,
	height=gen_height,
	guidance_scale=5,
	).images
	generated_images.append(images[0])

	return prompt, generated_images


	def generate_multiple_animals(image_path, keep_background=True, num_images = 4, height_multiplier = 1.5):

	image = cv2.imread(image_path)
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	image_rgb = crop_face_to_square(image_rgb, height_multiplier = height_multiplier)

	original_image = Image.fromarray(image_rgb)
	original_width, original_height = original_image.size

	aspect_ratio = original_width / original_height
	if aspect_ratio > 1:
	gen_width = 768
	gen_height = int(gen_width / aspect_ratio)
	else:
	gen_height = 768
	gen_width = int(gen_height * aspect_ratio)

	gen_width, gen_height = resize_to_multiple_of_64(gen_width, gen_height)

	base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
	api_key = os.getenv("GPT_KEY")
	headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
	payload = {
	"model": "gpt-4o-mini",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "Based on the provided image, think of " + str(num_images) + " different spirit animals that are right for the person, and answer in the following format for each: An ultra-realistic, highly detailed photograph of a {animal} with facial features characterized by {description}, standing upright in a human-like pose, looking directly at the camera, against a solid, neutral background. Generate these sentences without any other responses or numbering. For the animal choose between owl, bear, fox, koala, lion, dog"
	},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
	}
	]
	}
	],
	"max_tokens": 500
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
	response_json = response.json()

	if 'choices' in response_json and len(response_json['choices']) > 0:
	content = response_json['choices'][0]['message']['content']
	prompts = [prompt.strip() for prompt in content.strip().split('.') if prompt.strip()]
	negative_prompt=(
	"multiple heads, two heads, double head, triple head, extra limbs, extra arms, extra legs, "
	"duplicate faces, multiple faces, mutated anatomy, deformed, disfigured, malformed, "
	"extra ears, fused ears, blurred, low resolution, cartoonish, watermark, text, logo, "
	"poorly drawn, distorted, floating limbs, out-of-frame")

	formatted_prompts = "\n".join(f"{i+1}. {prompt}" for i, prompt in enumerate(prompts))

	with mp_pose.Pose(static_image_mode=True) as pose:
	results = pose.process(image_rgb)

	if results.pose_landmarks:
	annotated_image = image_rgb.copy()
	mp_drawing.draw_landmarks(
	annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS
	)
	else:
	print("No pose detected.")
	return "No pose detected.", []

	pose_image = np.zeros_like(image_rgb)
	for connection in mp_pose.POSE_CONNECTIONS:
	start_idx, end_idx = connection
	start, end = results.pose_landmarks.landmark[start_idx], results.pose_landmarks.landmark[end_idx]
	if start.visibility > 0.5 and end.visibility > 0.5:
	x1, y1 = int(start.x * pose_image.shape[1]), int(start.y * pose_image.shape[0])
	x2, y2 = int(end.x * pose_image.shape[1]), int(end.y * pose_image.shape[0])
	cv2.line(pose_image, (x1, y1), (x2, y2), (255, 255, 255), 2)

	pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))

	if keep_background:
	mask_image = remove(original_image)
	initial_mask = mask_image.split()[-1].convert('L')
	expanded_mask = expand_mask(initial_mask, kernel_size=min(gen_width, gen_height) // 15)
	else:
	expanded_mask = None

	generated_images = []

	if keep_background:
	with torch.no_grad():
	with torch.amp.autocast("cuda"):
	for prompt in prompts:
	images = pipe_inpaint_controlnet(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=20,
	image=Image.fromarray(image_rgb),
	mask_image=expanded_mask,
	control_image=pose_pil,
	width=gen_width,
	height=gen_height,
	guidance_scale=5,
	).images
	generated_images.append(images[0])
	else:
	with torch.no_grad():
	with torch.amp.autocast("cuda"):
	for prompt in prompts:
	images = pipe_controlnet(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=20,
	image=pose_pil,
	guidance_scale=5,
	width=gen_width,
	height=gen_height,
	).images
	generated_images.append(images[0])

	return formatted_prompts, generated_images


	def wait_for_file(file_path, timeout=500):
	"""
	Wait for a file to be created, with a specified timeout.
	Args:
	file_path (str): The path of the file to wait for.
	timeout (int): Maximum time to wait in seconds.
	Returns:
	bool: True if the file is created, False if timeout occurs.
	"""
	start_time = time.time()
	while not os.path.exists(file_path):
	if time.time() - start_time > timeout:
	return False
	time.sleep(0.5) # Check every 0.5 seconds
	return True


	def generate_spirit_animal_video(driving_video_path):
	os.chdir(".")
	try:
	# Step 1: Extract the first frame
	cap = cv2.VideoCapture(driving_video_path)
	if not cap.isOpened():
	print("Error: Unable to open video.")
	return None

	ret, frame = cap.read()
	cap.release()
	if not ret:
	print("Error: Unable to read the first frame.")
	return None

	# Save the first frame
	first_frame_path = "./first_frame.jpg"
	cv2.imwrite(first_frame_path, frame)
	print(f"First frame saved to: {first_frame_path}")

	# Generate spirit animal image
	_, input_image = generate_multiple_animals(first_frame_path, True, 1, height_multiplier = 1)
	if input_image is None or not input_image:
	print("Error: Spirit animal generation failed.")
	return None

	spirit_animal_path = "./animal.jpeg"
	cv2.imwrite(spirit_animal_path, cv2.cvtColor(np.array(input_image[0]), cv2.COLOR_RGB2BGR))
	print(f"Spirit animal image saved to: {spirit_animal_path}")

	# Step 3: Run inference
	output_path = "./animations/animal--uploaded_video_compressed.mp4"
	script_path = os.path.abspath("./LivePortrait/inference_animals.py")

	if not os.path.exists(script_path):
	print(f"Error: Inference script not found at {script_path}.")
	return None

	command = f"python {script_path} -s {spirit_animal_path} -d {driving_video_path} --driving_multiplier 1.75 --no_flag_stitching"
	print(f"Running command: {command}")
	result = os.system(command)

	if result != 0:
	print(f"Error: Command failed with exit code {result}.")
	return None

	# Verify output file exists
	if not os.path.exists(output_path):
	print(f"Error: Expected output video not found at {output_path}.")
	return None

	print(f"Output video generated at: {output_path}")
	return output_path
	except Exception as e:
	print(f"Error occurred: {e}")
	return None


	def generate_spirit_animal(image, animal_type, background):
	if animal_type == "Single Animal":
	if background == "Preserve Background":
	prompt, generated_images = spirit_animal_with_background(image)
	else:
	prompt, generated_images = spirit_animal_baseline(image)
	elif animal_type == "Multiple Animals":
	if background == "Preserve Background":
	prompt, generated_images = generate_multiple_animals(image, keep_background=True)
	else:
	prompt, generated_images = generate_multiple_animals(image, keep_background=False)
	return prompt, generated_images


	def compress_video(input_path, output_path, target_size_mb):
	target_size_bytes = target_size_mb * 1024 * 1024
	temp_output = "./temp_compressed.mp4"

	cap = cv2.VideoCapture(input_path)
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	fps = int(cap.get(cv2.CAP_PROP_FPS))
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	writer = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break
	writer.write(frame)

	cap.release()
	writer.release()

	current_size = os.path.getsize(temp_output)
	if current_size > target_size_bytes:
	bitrate = int(target_size_bytes * 8 / (current_size / target_size_bytes))
	os.system(f"ffmpeg -i {temp_output} -b:v {bitrate} -y {output_path}")
	os.remove(temp_output)
	else:
	shutil.move(temp_output, output_path)


	def process_video(video_file):

	compressed_path = "./uploaded_video_compressed.mp4"
	compress_video(video_file, compressed_path, target_size_mb=1)
	print(f"Compressed and moved video to: {compressed_path}")

	output_video_path = "./animations/animal--uploaded_video_compressed.mp4"

	generate_spirit_animal_video(compressed_path)

	# Wait until the output video is generated
	timeout = 1000 # Timeout in seconds
	if not wait_for_file(output_video_path, timeout=timeout):
	print("Timeout occurred while waiting for video generation.")
	return gr.update(value=None, visible=False) # Hide output if failed

	# Return the generated video path
	print(f"Output video is ready: {output_video_path}")
	return gr.update(value=output_video_path, visible=True) # Show video


	css = """
	#title-container {
	font-family: 'Arial', sans-serif;
	color: #4a4a4a;
	text-align: center;
	margin-bottom: 20px;
	}
	#title-container h1 {
	font-size: 2.5em;
	font-weight: bold;
	color: #ff9900;
	}
	#title-container h2 {
	font-size: 1.2em;
	color: #6c757d;
	}
	#intro-text {
	font-size: 1em;
	color: #6c757d;
	margin: 50px;
	text-align: center;
	font-style: italic;
	}
	#prompt-output {
	font-family: 'Courier New', monospace;
	color: #5a5a5a;
	font-size: 1.1em;
	padding: 10px;
	background-color: #f9f9f9;
	border: 1px solid #ddd;
	border-radius: 5px;
	margin-top: 10px;
	}
	.examples-container {
	display: flex;
	flex-wrap: wrap;
	gap: 10px;
	justify-content: center;
	align-items: flex-start;
	}
	"""


	# Title and description
	title_html = """
	<div id="title-container">
	<h1>Spirit Animal Generator</h1>
	<h2>Create your unique spirit animal with AI-assisted image generation.</h2>
	</div>
	"""

	description_text = """
	### Project Overview
	Welcome to the Spirit Animal Generator! This tool leverages Stable Diffusion models to create unique visualizations of spirit animals from videos and images.
	#### Key Features:
	1. Prompting: [GPT Model](https://arxiv.org/abs/2305.10435) generates descriptive prompts for each media input.
	2. Image Creation: [ControlNet Model](https://arxiv.org/abs/2302.05543) generates animal images with pose control.
	3. Video Transformation: [LivePortrait Model](https://arxiv.org/abs/2407.03168) generate animal animation with same facial expressions.
	---
	### How It Works:
	1. Upload Your Media:
	- Images: Use clear, high-resolution photos for better results.
	- Videos: Ensure the file is in MP4 format.
	2. Customize Options:
	- For images, select the type of animal and background settings.
	3. View Your Results:
	- Images will produce customized visual art along with a generated prompt.
	- Videos will be transformed into animal animations.
	Discover your spirit animal and let your imagination run wild!
	---
	"""

	with gr.Blocks(css=css) as demo:
	gr.HTML(title_html)
	gr.Markdown(description_text)

	with gr.Tabs():
	with gr.Tab("Generate Spirit Animal Image"):
	gr.Markdown("Upload an image to generate a spirit animal.")
	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(type="filepath", label="Upload an image")
	animal_type = gr.Radio(choices=["Single Animal", "Multiple Animals"], label="Animal Type", value="Single Animal")
	background_option = gr.Radio(choices=["Preserve Background", "Don't Preserve Background"], label="Background Option", value="Preserve Background")
	generate_image_button = gr.Button("Generate Image")
	gr.Examples(
	examples=["example1.jpg", "example2.jpg", "example3.jpg"],
	inputs=image_input,
	label="Example Images"
	)

	with gr.Column(scale=1):
	generated_prompt = gr.Textbox(label="Generated Prompt")
	generated_gallery = gr.Gallery(label="Generated Images")

	generate_image_button.click(
	fn=generate_spirit_animal,
	inputs=[image_input, animal_type, background_option],
	outputs=[generated_prompt, generated_gallery],
	)

	with gr.Tab("Generate Spirit Animal Video"):
	gr.Markdown("Upload a driving video to generate a spirit animal video.")
	with gr.Row():
	with gr.Column(scale=1):
	video_input = gr.Video(label="Upload a driving video (MP4 format)")
	generate_video_button = gr.Button("Generate Video")
	gr.Examples(
	examples=["video1.mp4", "video3.mp4", "video4.mp4"],
	inputs=video_input,
	label="Example Videos"
	)

	with gr.Column(scale=1):
	video_output = gr.Video(label="Generated Spirit Animal Video")

	generate_video_button.click(
	fn=process_video,
	inputs=video_input,
	outputs=video_output,
	)

	demo.launch()