Spaces:
Runtime error
Runtime error
File size: 3,850 Bytes
36d75a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import torch
import gradio as gr
from PIL import Image
import numpy as np
from transformers import CLIPImageProcessor, CLIPVisionModel
from diffusers import AutoencoderKL, DDPMScheduler
from src.diffusers.models.referencenet.referencenet_unet_2d_condition import (
ReferenceNetModel,
)
from src.diffusers.models.referencenet.unet_2d_condition import UNet2DConditionModel
from src.diffusers.pipelines.referencenet.pipeline_referencenet import (
StableDiffusionReferenceNetPipeline,
)
from utils.anonymize_faces_in_image import anonymize_faces_in_image
import face_alignment
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
def load_pipeline():
face_model_id = "hkung/face-anon-simple"
clip_model_id = "openai/clip-vit-large-patch14"
sd_model_id = "stabilityai/stable-diffusion-2-1"
unet = UNet2DConditionModel.from_pretrained(
face_model_id, subfolder="unet", use_safetensors=True
)
referencenet = ReferenceNetModel.from_pretrained(
face_model_id, subfolder="referencenet", use_safetensors=True
)
conditioning_referencenet = ReferenceNetModel.from_pretrained(
face_model_id, subfolder="conditioning_referencenet", use_safetensors=True
)
vae = AutoencoderKL.from_pretrained(
sd_model_id, subfolder="vae", use_safetensors=True
)
scheduler = DDPMScheduler.from_pretrained(
sd_model_id, subfolder="scheduler", use_safetensors=True
)
feature_extractor = CLIPImageProcessor.from_pretrained(
clip_model_id, use_safetensors=True
)
image_encoder = CLIPVisionModel.from_pretrained(
clip_model_id, use_safetensors=True
)
pipe = StableDiffusionReferenceNetPipeline(
unet=unet,
referencenet=referencenet,
conditioning_referencenet=conditioning_referencenet,
vae=vae,
feature_extractor=feature_extractor,
image_encoder=image_encoder,
scheduler=scheduler,
)
pipe = pipe.to(DEVICE)
return pipe
# Load heavy stuff once at startup (better UX + energy-wise)
pipe = load_pipeline()
generator = torch.manual_seed(1)
fa = face_alignment.FaceAlignment(
face_alignment.LandmarksType.TWO_D,
face_detector="sfd",
device=DEVICE,
)
def anonymize(
image: np.ndarray,
anonymization_degree: float = 1.25,
num_inference_steps: int = 25,
guidance_scale: float = 4.0,
):
"""
Gradio callback: takes an RGB numpy image and returns anonymized PIL image.
"""
if image is None:
return None
pil_image = Image.fromarray(image)
anon_image = anonymize_faces_in_image(
image=pil_image,
face_alignment=fa,
pipe=pipe,
generator=generator,
face_image_size=512,
num_inference_steps=int(num_inference_steps),
guidance_scale=float(guidance_scale),
anonymization_degree=float(anonymization_degree),
)
return anon_image
demo = gr.Interface(
fn=anonymize,
inputs=[
gr.Image(type="numpy", label="Input image"),
gr.Slider(
minimum=0.5,
maximum=2.0,
step=0.05,
value=1.25,
label="Anonymization strength",
),
gr.Slider(
minimum=10,
maximum=50,
step=1,
value=25,
label="Diffusion steps (speed vs quality)",
),
gr.Slider(
minimum=1.0,
maximum=10.0,
step=0.1,
value=4.0,
label="Guidance scale",
),
],
outputs=gr.Image(type="pil", label="Anonymized image"),
title="Face Anonymization Made Simple",
description=(
"Upload a photo and anonymize all faces using the WACV 2025 "
"\"Face Anonymization Made Simple\" model."
),
)
if __name__ == "__main__":
demo.launch() |