Fibo-local / app.py
linoyts's picture
linoyts HF Staff
Update app.py
52cbc87 verified
raw
history blame
10.5 kB
import gradio as gr
import random
import os
import spaces
import torch
import time
import json
import numpy as np
from diffusers import BriaFiboPipeline
from diffusers.modular_pipelines import ModularPipeline
from optimization import optimize_pipeline_
# resolutions=[
# "832 1248",
# "896 1152",
# "960 1088",
# "1024 1024",
# "1088 960",
# "1152 896",
# "1216 832",
# "1280 800",
# "1344 768",
# ]
MAX_SEED = np.iinfo(np.int32).max
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_grad_enabled(False)
vlm_pipe = ModularPipeline.from_pretrained("briaai/FIBO-VLM-prompt-to-JSON", trust_remote_code=True).to(device)
pipe = BriaFiboPipeline.from_pretrained(
"briaai/FIBO",
trust_remote_code=True,
torch_dtype=dtype).to(device)
test_prompt_json = """
{
"short_description": "A surreal and whimsical scene featuring a man, a woman, and a dog posed against a tri-colored backdrop. The woman stands in front of the red section, wearing a t-shirt with a Yoda motif and a skirt with birds. The dog, dressed as a superdog, sits on a checkerboard chair in front of the white section, with a blue tennis ball in its mouth. The man, in a purple suit, stands in front of the gold section, holding a tree branch with a blue jay. The backdrop is divided into red, white, and gold sections, with a small metal grating in the top left and a tear in the gold section. A rustic framed oil painting of the pyramids hangs above the dog.",
"objects": [
{
"description": "A woman standing in front of the red backdrop. She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it. Her right hand is holding an axe.",
"location": "Center-left",
"relationship": "She is positioned in front of the red backdrop and to the left of the dog and man.",
"relative_size": "Medium",
"shape_and_color": "Humanoid shape, beige and multicolored clothing.",
"appearance_details": "She has a long skirt with birds on it and is holding an axe.",
"pose": "Standing upright with a slight tilt to the right.",
"expression": "Neutral",
"clothing": "She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it.",
"action": "Standing",
"gender": "Female",
"skin_tone_and_texture": "Fair, smooth."
},
{
"description": "A dog dressed as a superdog, sitting on a checkerboard chair in front of the white backdrop. It has a blue tennis ball in its mouth.",
"location": "Center",
"relationship": "It is positioned in front of the white backdrop and between the woman and the man.",
"relative_size": "Medium",
"shape_and_color": "Canine shape, brown and white fur, blue tennis ball.",
"appearance_details": "It is dressed as a superdog and has a blue tennis ball in its mouth.",
"pose": "Sitting upright.",
"expression": "Neutral",
"clothing": "Superdog costume.",
"action": "Sitting",
"gender": "Male",
"skin_tone_and_texture": "Brown and white fur, soft."
},
{
"description": "A man standing in front of the gold backdrop. He is wearing a three piece purple suit and has spiky blue hair. His left hand is holding a tree branch with a blue jay on it.",
"location": "Center-right",
"relationship": "He is positioned in front of the gold backdrop and to the right of the woman and dog.",
"relative_size": "Medium",
"shape_and_color": "Humanoid shape, purple suit, blue hair.",
"appearance_details": "He has spiky blue hair and is holding a tree branch with a blue jay on it.",
"pose": "Standing upright with a slight tilt to the left.",
"expression": "Neutral",
"clothing": "He is wearing a three piece purple suit.",
"action": "Standing",
"gender": "Male",
"skin_tone_and_texture": "Fair, smooth."
},
{
"description": "A checkerboard armchair in yellow and brown.",
"location": "Bottom-center",
"relationship": "The dog is sitting on the chair.",
"relative_size": "Small",
"shape_and_color": "Chair shape, yellow and brown.",
"texture": "Smooth. End of texture answer.",
"appearance_details": "The chair is a checkerboard armchair in yellow and brown."
},
{
"description": "A rustic framed oil painting of the pyramids.",
"location": "Top-center",
"relationship": "The painting is hanging above the dog.",
"relative_size": "Small",
"shape_and_color": "Rectangular shape, brown frame, yellow and brown pyramids.",
"texture": "Rough. End of texture answer.",
"appearance_details": "The painting is a rustic framed oil painting of the pyramids."
}
],
"background_setting": "The background is a tri-colored backdrop divided equally into red, white, and gold sections. There is a small rectangular metal grating in the top left corner and a subtle tear in the gold backdrop in the bottom right corner.",
"lighting": {
"conditions": "Studio lighting",
"direction": "Front-lit",
"shadows": "Soft shadows are present, indicating diffused lighting."
},
"aesthetics": {
"composition": "The composition is centered, with the three figures arranged in a row. The backdrop is divided into thirds, creating a symmetrical balance.",
"color_scheme": "The color scheme is triadic, with red, white, and gold dominating the backdrop, complemented by the various colors of the figures' clothing and accessories.",
"mood_atmosphere": "The mood is whimsical and surreal, with a touch of humor due to the unusual costumes and props.",
"preference_score": "high",
"aesthetic_score": "high"
},
"photographic_characteristics": {
"depth_of_field": "Deep",
"focus": "Sharp focus on all subjects",
"camera_angle": "Eye-level",
"lens_focal_length": "Standard"
},
"style_medium": "Photograph",
"text_render": [
{
"text": "Yoda",
"location": "Center of the woman's t-shirt",
"size": "Small",
"color": "Beige",
"font": "Cartoonish",
"appearance_details": "The text is part of a graphic design on the t-shirt."
}
],
"context": "This is a surreal and whimsical portrait of a man, a woman, and a dog posed against a tri-colored backdrop. It could be an art piece or a promotional image for a quirky event or product.",
"artistic_style": "Surreal Pop"
}
"""
optimize_pipeline_(pipe, test_prompt_json)
def handle_json(text):
try:
json.loads(text)
return text
except:
return "Error"
@spaces.GPU(duration=100)
def infer(prompt,
negative_prompt="",
seed=42,
randomize_seed=False,
width=1024,
height=1024,
guidance_scale=5,
num_inference_steps=50,
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
t=time.time()
with torch.inference_mode():
# 1. Create a prompt to generate an initial image
output = vlm_pipe(prompt=prompt)
json_prompt = output.values["json_prompt"]
image = pipe(prompt=json_prompt,
num_inference_steps=num_inference_steps,
negative_prompt=negative_prompt,
width=width,height=height,
guidance_scale=guidance_scale).images[0]
return image, json_prompt
css = """
#col-container{
margin: 0 auto;
max-width: 768px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("## FOBI")
with gr.Group():
with gr.Column():
with gr.Row():
prompt_in = gr.Textbox(label="Prompt")
prompt_in_json = gr.JSON(label="Json")
submit_btn = gr.Button("Generate")
result = gr.Image(label="output")
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
guidance_scale = gr.Slider(
label="guidance scale",
minimum=1.0,
maximum=10.0,
step=0.1,
value=5.0
)
num_inference_steps = gr.Slider(
label="number of inference steps",
minimum=1,
maximum=60,
step=1,
value=50,
)
height = gr.Slider(
label="Height",
minimum=768,
maximum=1248,
step=32,
value=1024,
)
width = gr.Slider(
label="Width",
minimum=832,
maximum=1344,
step=64,
value=1024,
)
with gr.Row():
negative_prompt = gr.Textbox(label="negative prompt", value=json.dumps(''))
negative_prompt_json = gr.JSON(label="json negative prompt", value=json.dumps(''))
# prompt_in.change(
# handle_json,
# inputs=prompt_in,
# outputs=prompt_in_json)
# negative_prompt.change(handle_json, inputs=negative_prompt, outputs=negative_prompt_json)
submit_btn.click(
fn = infer,
inputs = [
prompt_in,
negative_prompt,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
],
outputs = [
result, prompt_in_json
]
)
demo.queue().launch()