Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import random | |
| import os | |
| import spaces | |
| import torch | |
| import time | |
| import json | |
| import numpy as np | |
| from diffusers import BriaFiboPipeline | |
| from diffusers.modular_pipelines import ModularPipeline | |
| from optimization import optimize_pipeline_ | |
| # resolutions=[ | |
| # "832 1248", | |
| # "896 1152", | |
| # "960 1088", | |
| # "1024 1024", | |
| # "1088 960", | |
| # "1152 896", | |
| # "1216 832", | |
| # "1280 800", | |
| # "1344 768", | |
| # ] | |
| MAX_SEED = np.iinfo(np.int32).max | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| torch.set_grad_enabled(False) | |
| vlm_pipe = ModularPipeline.from_pretrained("briaai/FIBO-VLM-prompt-to-JSON", trust_remote_code=True).to(device) | |
| pipe = BriaFiboPipeline.from_pretrained( | |
| "briaai/FIBO", | |
| trust_remote_code=True, | |
| torch_dtype=dtype).to(device) | |
| test_prompt_json = """ | |
| { | |
| "short_description": "A surreal and whimsical scene featuring a man, a woman, and a dog posed against a tri-colored backdrop. The woman stands in front of the red section, wearing a t-shirt with a Yoda motif and a skirt with birds. The dog, dressed as a superdog, sits on a checkerboard chair in front of the white section, with a blue tennis ball in its mouth. The man, in a purple suit, stands in front of the gold section, holding a tree branch with a blue jay. The backdrop is divided into red, white, and gold sections, with a small metal grating in the top left and a tear in the gold section. A rustic framed oil painting of the pyramids hangs above the dog.", | |
| "objects": [ | |
| { | |
| "description": "A woman standing in front of the red backdrop. She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it. Her right hand is holding an axe.", | |
| "location": "Center-left", | |
| "relationship": "She is positioned in front of the red backdrop and to the left of the dog and man.", | |
| "relative_size": "Medium", | |
| "shape_and_color": "Humanoid shape, beige and multicolored clothing.", | |
| "appearance_details": "She has a long skirt with birds on it and is holding an axe.", | |
| "pose": "Standing upright with a slight tilt to the right.", | |
| "expression": "Neutral", | |
| "clothing": "She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it.", | |
| "action": "Standing", | |
| "gender": "Female", | |
| "skin_tone_and_texture": "Fair, smooth." | |
| }, | |
| { | |
| "description": "A dog dressed as a superdog, sitting on a checkerboard chair in front of the white backdrop. It has a blue tennis ball in its mouth.", | |
| "location": "Center", | |
| "relationship": "It is positioned in front of the white backdrop and between the woman and the man.", | |
| "relative_size": "Medium", | |
| "shape_and_color": "Canine shape, brown and white fur, blue tennis ball.", | |
| "appearance_details": "It is dressed as a superdog and has a blue tennis ball in its mouth.", | |
| "pose": "Sitting upright.", | |
| "expression": "Neutral", | |
| "clothing": "Superdog costume.", | |
| "action": "Sitting", | |
| "gender": "Male", | |
| "skin_tone_and_texture": "Brown and white fur, soft." | |
| }, | |
| { | |
| "description": "A man standing in front of the gold backdrop. He is wearing a three piece purple suit and has spiky blue hair. His left hand is holding a tree branch with a blue jay on it.", | |
| "location": "Center-right", | |
| "relationship": "He is positioned in front of the gold backdrop and to the right of the woman and dog.", | |
| "relative_size": "Medium", | |
| "shape_and_color": "Humanoid shape, purple suit, blue hair.", | |
| "appearance_details": "He has spiky blue hair and is holding a tree branch with a blue jay on it.", | |
| "pose": "Standing upright with a slight tilt to the left.", | |
| "expression": "Neutral", | |
| "clothing": "He is wearing a three piece purple suit.", | |
| "action": "Standing", | |
| "gender": "Male", | |
| "skin_tone_and_texture": "Fair, smooth." | |
| }, | |
| { | |
| "description": "A checkerboard armchair in yellow and brown.", | |
| "location": "Bottom-center", | |
| "relationship": "The dog is sitting on the chair.", | |
| "relative_size": "Small", | |
| "shape_and_color": "Chair shape, yellow and brown.", | |
| "texture": "Smooth. End of texture answer.", | |
| "appearance_details": "The chair is a checkerboard armchair in yellow and brown." | |
| }, | |
| { | |
| "description": "A rustic framed oil painting of the pyramids.", | |
| "location": "Top-center", | |
| "relationship": "The painting is hanging above the dog.", | |
| "relative_size": "Small", | |
| "shape_and_color": "Rectangular shape, brown frame, yellow and brown pyramids.", | |
| "texture": "Rough. End of texture answer.", | |
| "appearance_details": "The painting is a rustic framed oil painting of the pyramids." | |
| } | |
| ], | |
| "background_setting": "The background is a tri-colored backdrop divided equally into red, white, and gold sections. There is a small rectangular metal grating in the top left corner and a subtle tear in the gold backdrop in the bottom right corner.", | |
| "lighting": { | |
| "conditions": "Studio lighting", | |
| "direction": "Front-lit", | |
| "shadows": "Soft shadows are present, indicating diffused lighting." | |
| }, | |
| "aesthetics": { | |
| "composition": "The composition is centered, with the three figures arranged in a row. The backdrop is divided into thirds, creating a symmetrical balance.", | |
| "color_scheme": "The color scheme is triadic, with red, white, and gold dominating the backdrop, complemented by the various colors of the figures' clothing and accessories.", | |
| "mood_atmosphere": "The mood is whimsical and surreal, with a touch of humor due to the unusual costumes and props.", | |
| "preference_score": "high", | |
| "aesthetic_score": "high" | |
| }, | |
| "photographic_characteristics": { | |
| "depth_of_field": "Deep", | |
| "focus": "Sharp focus on all subjects", | |
| "camera_angle": "Eye-level", | |
| "lens_focal_length": "Standard" | |
| }, | |
| "style_medium": "Photograph", | |
| "text_render": [ | |
| { | |
| "text": "Yoda", | |
| "location": "Center of the woman's t-shirt", | |
| "size": "Small", | |
| "color": "Beige", | |
| "font": "Cartoonish", | |
| "appearance_details": "The text is part of a graphic design on the t-shirt." | |
| } | |
| ], | |
| "context": "This is a surreal and whimsical portrait of a man, a woman, and a dog posed against a tri-colored backdrop. It could be an art piece or a promotional image for a quirky event or product.", | |
| "artistic_style": "Surreal Pop" | |
| } | |
| """ | |
| optimize_pipeline_(pipe, test_prompt_json) | |
| def handle_json(text): | |
| try: | |
| json.loads(text) | |
| return text | |
| except: | |
| return "Error" | |
| def infer(prompt, | |
| negative_prompt="", | |
| seed=42, | |
| randomize_seed=False, | |
| width=1024, | |
| height=1024, | |
| guidance_scale=5, | |
| num_inference_steps=50, | |
| ): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| t=time.time() | |
| with torch.inference_mode(): | |
| # 1. Create a prompt to generate an initial image | |
| output = vlm_pipe(prompt=prompt) | |
| json_prompt = output.values["json_prompt"] | |
| image = pipe(prompt=json_prompt, | |
| num_inference_steps=num_inference_steps, | |
| negative_prompt=negative_prompt, | |
| width=width,height=height, | |
| guidance_scale=guidance_scale).images[0] | |
| return image, json_prompt | |
| css = """ | |
| #col-container{ | |
| margin: 0 auto; | |
| max-width: 768px; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown("## FOBI") | |
| with gr.Group(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| prompt_in = gr.Textbox(label="Prompt") | |
| prompt_in_json = gr.JSON(label="Json") | |
| submit_btn = gr.Button("Generate") | |
| result = gr.Image(label="output") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| with gr.Row(): | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0, | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
| with gr.Row(): | |
| guidance_scale = gr.Slider( | |
| label="guidance scale", | |
| minimum=1.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=5.0 | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label="number of inference steps", | |
| minimum=1, | |
| maximum=60, | |
| step=1, | |
| value=50, | |
| ) | |
| height = gr.Slider( | |
| label="Height", | |
| minimum=768, | |
| maximum=1248, | |
| step=32, | |
| value=1024, | |
| ) | |
| width = gr.Slider( | |
| label="Width", | |
| minimum=832, | |
| maximum=1344, | |
| step=64, | |
| value=1024, | |
| ) | |
| with gr.Row(): | |
| negative_prompt = gr.Textbox(label="negative prompt", value=json.dumps('')) | |
| negative_prompt_json = gr.JSON(label="json negative prompt", value=json.dumps('')) | |
| # prompt_in.change( | |
| # handle_json, | |
| # inputs=prompt_in, | |
| # outputs=prompt_in_json) | |
| # negative_prompt.change(handle_json, inputs=negative_prompt, outputs=negative_prompt_json) | |
| submit_btn.click( | |
| fn = infer, | |
| inputs = [ | |
| prompt_in, | |
| negative_prompt, | |
| seed, | |
| randomize_seed, | |
| width, | |
| height, | |
| guidance_scale, | |
| num_inference_steps, | |
| ], | |
| outputs = [ | |
| result, prompt_in_json | |
| ] | |
| ) | |
| demo.queue().launch() |