anonymizeface / app.py
JoelWester's picture
Create app.py
36d75a9 verified
import torch
import gradio as gr
from PIL import Image
import numpy as np
from transformers import CLIPImageProcessor, CLIPVisionModel
from diffusers import AutoencoderKL, DDPMScheduler
from src.diffusers.models.referencenet.referencenet_unet_2d_condition import (
ReferenceNetModel,
)
from src.diffusers.models.referencenet.unet_2d_condition import UNet2DConditionModel
from src.diffusers.pipelines.referencenet.pipeline_referencenet import (
StableDiffusionReferenceNetPipeline,
)
from utils.anonymize_faces_in_image import anonymize_faces_in_image
import face_alignment
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
def load_pipeline():
face_model_id = "hkung/face-anon-simple"
clip_model_id = "openai/clip-vit-large-patch14"
sd_model_id = "stabilityai/stable-diffusion-2-1"
unet = UNet2DConditionModel.from_pretrained(
face_model_id, subfolder="unet", use_safetensors=True
)
referencenet = ReferenceNetModel.from_pretrained(
face_model_id, subfolder="referencenet", use_safetensors=True
)
conditioning_referencenet = ReferenceNetModel.from_pretrained(
face_model_id, subfolder="conditioning_referencenet", use_safetensors=True
)
vae = AutoencoderKL.from_pretrained(
sd_model_id, subfolder="vae", use_safetensors=True
)
scheduler = DDPMScheduler.from_pretrained(
sd_model_id, subfolder="scheduler", use_safetensors=True
)
feature_extractor = CLIPImageProcessor.from_pretrained(
clip_model_id, use_safetensors=True
)
image_encoder = CLIPVisionModel.from_pretrained(
clip_model_id, use_safetensors=True
)
pipe = StableDiffusionReferenceNetPipeline(
unet=unet,
referencenet=referencenet,
conditioning_referencenet=conditioning_referencenet,
vae=vae,
feature_extractor=feature_extractor,
image_encoder=image_encoder,
scheduler=scheduler,
)
pipe = pipe.to(DEVICE)
return pipe
# Load heavy stuff once at startup (better UX + energy-wise)
pipe = load_pipeline()
generator = torch.manual_seed(1)
fa = face_alignment.FaceAlignment(
face_alignment.LandmarksType.TWO_D,
face_detector="sfd",
device=DEVICE,
)
def anonymize(
image: np.ndarray,
anonymization_degree: float = 1.25,
num_inference_steps: int = 25,
guidance_scale: float = 4.0,
):
"""
Gradio callback: takes an RGB numpy image and returns anonymized PIL image.
"""
if image is None:
return None
pil_image = Image.fromarray(image)
anon_image = anonymize_faces_in_image(
image=pil_image,
face_alignment=fa,
pipe=pipe,
generator=generator,
face_image_size=512,
num_inference_steps=int(num_inference_steps),
guidance_scale=float(guidance_scale),
anonymization_degree=float(anonymization_degree),
)
return anon_image
demo = gr.Interface(
fn=anonymize,
inputs=[
gr.Image(type="numpy", label="Input image"),
gr.Slider(
minimum=0.5,
maximum=2.0,
step=0.05,
value=1.25,
label="Anonymization strength",
),
gr.Slider(
minimum=10,
maximum=50,
step=1,
value=25,
label="Diffusion steps (speed vs quality)",
),
gr.Slider(
minimum=1.0,
maximum=10.0,
step=0.1,
value=4.0,
label="Guidance scale",
),
],
outputs=gr.Image(type="pil", label="Anonymized image"),
title="Face Anonymization Made Simple",
description=(
"Upload a photo and anonymize all faces using the WACV 2025 "
"\"Face Anonymization Made Simple\" model."
),
)
if __name__ == "__main__":
demo.launch()