File size: 10,241 Bytes
58fd2d0
 
 
 
453ed2e
 
 
58fd2d0
 
 
6914f7a
453ed2e
00f6a78
453ed2e
58fd2d0
4984c7e
58fd2d0
be85eb8
58fd2d0
453ed2e
58fd2d0
96e351a
c000f9c
96e351a
453ed2e
58fd2d0
 
 
 
 
00f6a78
58fd2d0
 
 
 
 
 
766763f
58fd2d0
 
 
 
 
766763f
58fd2d0
9ad92f4
00f6a78
453ed2e
00f6a78
58fd2d0
 
 
 
 
7391723
58fd2d0
7391723
58fd2d0
7391723
58fd2d0
a29e3ba
58fd2d0
453ed2e
58fd2d0
 
 
453ed2e
 
 
58fd2d0
 
 
 
9ad92f4
58fd2d0
 
 
 
 
9ad92f4
 
 
 
4984c7e
58fd2d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4984c7e
 
58fd2d0
 
 
 
4984c7e
86d5e88
 
 
58fd2d0
b31f6c0
49ad6a5
58fd2d0
 
 
18274c1
453ed2e
811e3ea
453ed2e
 
01e1199
58fd2d0
 
 
4984c7e
453ed2e
58fd2d0
c000f9c
 
453ed2e
b31f6c0
a29e3ba
58fd2d0
7391723
 
a29e3ba
58fd2d0
 
 
 
 
a29e3ba
453ed2e
 
4984c7e
9ad92f4
 
453ed2e
4984c7e
 
 
58fd2d0
4984c7e
58fd2d0
 
4984c7e
58fd2d0
 
4984c7e
 
 
58fd2d0
4984c7e
 
 
 
58fd2d0
4984c7e
 
58fd2d0
4984c7e
c000f9c
58fd2d0
c000f9c
 
 
 
 
 
 
 
 
 
 
 
58fd2d0
c000f9c
 
 
 
49ad6a5
58fd2d0
 
 
 
c000f9c
453ed2e
 
e07df8b
 
58fd2d0
 
 
e07df8b
453ed2e
 
b770306
49ad6a5
 
58fd2d0
453ed2e
 
96e351a
4984c7e
58fd2d0
 
 
 
 
 
453ed2e
 
58fd2d0
 
 
 
 
 
453ed2e
 
4984c7e
96e351a
 
 
 
dc7aed1
58fd2d0
fc70300
86d5e88
 
b31f6c0
ad4d288
1a833ba
a86f74c
58fd2d0
 
 
453ed2e
86d5e88
 
b31f6c0
ad4d288
453ed2e
a86f74c
58fd2d0
 
 
18274c1
c000f9c
 
 
 
 
 
 
58fd2d0
453ed2e
 
e266395
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
import os
import time
import random
import tempfile
import torch
import gradio as gr
from PIL import Image

import spaces
from gradio import processing_utils, utils

from diffusers import (
    AutoencoderKL,
    ControlNetModel,
    StableDiffusionControlNetPipeline,
    StableDiffusionControlNetImg2ImgPipeline,
    StableDiffusionLatentUpscalePipeline,
    DPMSolverMultistepScheduler,
    EulerDiscreteScheduler,
)

from share_btn import community_icon_html, loading_icon_html, share_js
import user_history
from illusion_style import css

# -----------------------------
# Device & dtype (GPU/CPU auto)
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32

# -----------------------------
# Base / ControlNet models
# -----------------------------
BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
VAE_ID = "stabilityai/sd-vae-ft-mse"
CONTROLNET_ID = "monster-labs/control_v1p_sd15_qrcode_monster"

# -----------------------------
# Load components
# -----------------------------
vae = AutoencoderKL.from_pretrained(VAE_ID, torch_dtype=dtype)
controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=dtype)

# โš ๏ธ safety checker & clip feature extractor removed
main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
    BASE_MODEL,
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,      # <= important
    feature_extractor=None,   # <= important
    torch_dtype=dtype,
)
main_pipe = main_pipe.to(device)

# Img2Img pipe reusing components
image_pipe = StableDiffusionControlNetImg2ImgPipeline(**main_pipe.components)
image_pipe = image_pipe.to(device)

# -----------------------------
# Sampler map
# -----------------------------
SAMPLER_MAP = {
    "DPM++ Karras SDE": lambda config: DPMSolverMultistepScheduler.from_config(
        config, use_karras=True, algorithm_type="sde-dpmsolver++"
    ),
    "Euler": lambda config: EulerDiscreteScheduler.from_config(config),
}

# -----------------------------
# Helpers
# -----------------------------
def center_crop_resize(img: Image.Image, output_size=(512, 512)):
    width, height = img.size
    new_dim = min(width, height)
    left = (width - new_dim) / 2
    top = (height - new_dim) / 2
    right = (width + new_dim) / 2
    bottom = (height + new_dim) / 2
    img = img.crop((left, top, right, bottom))
    img = img.resize(output_size)
    return img

def common_upscale(samples, width, height, upscale_method, crop=False):
    if crop == "center":
        old_w = samples.shape[3]
        old_h = samples.shape[2]
        old_aspect = old_w / old_h
        new_aspect = width / height
        x = 0
        y = 0
        if old_aspect > new_aspect:
            x = round((old_w - old_w * (new_aspect / old_aspect)) / 2)
        elif old_aspect < new_aspect:
            y = round((old_h - old_h * (old_aspect / new_aspect)) / 2)
        s = samples[:, :, y : old_h - y, x : old_w - x]
    else:
        s = samples
    return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)

def upscale(samples, upscale_method, scale_by):
    width = round(samples["images"].shape[3] * scale_by)
    height = round(samples["images"].shape[2] * scale_by)
    s = common_upscale(samples["images"], width, height, upscale_method, "disabled")
    return s

def check_inputs(prompt: str, control_image: Image.Image):
    if control_image is None:
        raise gr.Error("Please select or upload an Input Illusion")
    if not prompt:
        raise gr.Error("Prompt is required")

# -----------------------------
# Inference
# -----------------------------
@spaces.GPU
def inference(
    control_image: Image.Image,
    prompt: str,
    negative_prompt: str,
    guidance_scale: float = 8.0,
    controlnet_conditioning_scale: float = 1.0,
    control_guidance_start: float = 1.0,
    control_guidance_end: float = 1.0,
    upscaler_strength: float = 0.5,
    seed: int = -1,
    sampler: str = "DPM++ Karras SDE",
    progress = gr.Progress(track_tqdm=True),
    profile: gr.OAuthProfile | None = None,
):
    start_time = time.time()

    control_image_small = center_crop_resize(control_image, (512, 512))
    control_image_large = center_crop_resize(control_image, (1024, 1024))

    main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)

    my_seed = random.randint(0, 2**32 - 1) if seed == -1 else int(seed)
    generator = torch.Generator(device=device).manual_seed(my_seed)

    # First pass -> latents
    out = main_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=control_image_small,
        guidance_scale=float(guidance_scale),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
        generator=generator,
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        num_inference_steps=15,
        output_type="latent",
    )

    # Upscale latents
    upscaled_latents = upscale(out, "nearest-exact", 2)

    # Second pass -> image
    out_image = image_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        control_image=control_image_large,
        image=upscaled_latents,
        guidance_scale=float(guidance_scale),
        generator=generator,
        num_inference_steps=20,
        strength=float(upscaler_strength),
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
    )

    # Save history
    user_history.save_image(
        label=prompt,
        image=out_image["images"][0],
        profile=profile,
        metadata={
            "prompt": prompt,
            "negative_prompt": negative_prompt,
            "guidance_scale": guidance_scale,
            "controlnet_conditioning_scale": controlnet_conditioning_scale,
            "control_guidance_start": control_guidance_start,
            "control_guidance_end": control_guidance_end,
            "upscaler_strength": upscaler_strength,
            "seed": my_seed,
            "sampler": sampler,
        },
    )

    return out_image["images"][0], gr.update(visible=True), gr.update(visible=True), my_seed

# -----------------------------
# UI
# -----------------------------
with gr.Blocks() as app:
    gr.Markdown(
        '''
        <div style="text-align: center;">
            <h1>Illusion Diffusion HQ ๐ŸŒ€</h1>
            <p style="font-size:16px;">Generate high-quality illusion artwork with Stable Diffusion + ControlNet</p>
            <p>A space by AP with contributions from the community.</p>
            <p>This uses <a href="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster">Monster Labs QR ControlNet</a>.</p>
        </div>
        '''
    )

    state_img_input = gr.State()
    state_img_output = gr.State()

    with gr.Row():
        with gr.Column():
            control_image = gr.Image(label="Input Illusion", type="pil", elem_id="control_image")
            controlnet_conditioning_scale = gr.Slider(minimum=0.0, maximum=5.0, step=0.01, value=0.8, label="Illusion strength", elem_id="illusion_strength", info="ControlNet conditioning scale")
            gr.Examples(
                examples=["checkers.png", "checkers_mid.jpg", "pattern.png", "ultra_checkers.png", "spiral.jpeg", "funky.jpeg"],
                inputs=control_image
            )
            prompt = gr.Textbox(label="Prompt", elem_id="prompt", info="Type what you want to generate", placeholder="Medieval village scene with busy streets and a castle in the distance")
            negative_prompt = gr.Textbox(label="Negative Prompt", info="What you do NOT want", value="low quality, blurry", elem_id="negative_prompt")
            with gr.Accordion(label="Advanced Options", open=False):
                guidance_scale = gr.Slider(minimum=0.0, maximum=50.0, step=0.25, value=7.5, label="Guidance Scale")
                sampler = gr.Dropdown(choices=list(SAMPLER_MAP.keys()), value="Euler", label="Sampler")
                control_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.0, label="Start of ControlNet")
                control_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="End of ControlNet")
                strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Strength of the upscaler")
                seed = gr.Slider(minimum=-1, maximum=9999999999, step=1, value=-1, label="Seed", info="-1 = random")
                used_seed = gr.Number(label="Last seed used", interactive=False)
            run_btn = gr.Button("Run")
        with gr.Column():
            result_image = gr.Image(label="Illusion Diffusion Output", interactive=False, elem_id="output")
            with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
                community_icon = gr.HTML(community_icon_html)
                loading_icon = gr.HTML(loading_icon_html)
                share_button = gr.Button("Share to community", elem_id="share-btn")

    # Wire up
    prompt.submit(
        check_inputs,
        inputs=[prompt, control_image],
        queue=False
    ).success(
        inference,
        inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
        outputs=[result_image, result_image, share_group, used_seed]
    )

    run_btn.click(
        check_inputs,
        inputs=[prompt, control_image],
        queue=False
    ).success(
        inference,
        inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
        outputs=[result_image, result_image, share_group, used_seed]
    )

    share_button.click(None, [], [], js=share_js)

with gr.Blocks(css=css) as app_with_history:
    with gr.Tab("Demo"):
        app.render()
    with gr.Tab("Past generations"):
        user_history.render()

app_with_history.queue(max_size=20, api_open=False)

if __name__ == "__main__":
    app_with_history.launch(max_threads=400)