| | --- |
| | library_name: diffusers |
| | license: other |
| | license_name: flux-1-dev-non-commercial-license |
| | license_link: LICENSE.md |
| | --- |
| | |
| | > [!NOTE] |
| | > Contains the NF4 checkpoints (`transformer` and `text_encoder_2`) of [`black-forest-labs/FLUX.1-Depth-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev). Please adhere to the original model licensing! |
| |
|
| | <details> |
| | <summary>Code</summary> |
| |
|
| | ```py |
| | # !pip install git+https://github.com/asomoza/image_gen_aux.git |
| | from diffusers import DiffusionPipeline, FluxControlPipeline, FluxTransformer2DModel |
| | import torch |
| | from transformers import T5EncoderModel |
| | from image_gen_aux import DepthPreprocessor |
| | from diffusers.utils import load_image |
| | import fire |
| | |
| | |
| | def load_pipeline(four_bit=False): |
| | orig_pipeline = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16) |
| | if four_bit: |
| | print("Using four bit.") |
| | transformer = FluxTransformer2DModel.from_pretrained( |
| | "sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="transformer", torch_dtype=torch.bfloat16 |
| | ) |
| | text_encoder_2 = T5EncoderModel.from_pretrained( |
| | "sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="text_encoder_2", torch_dtype=torch.bfloat16 |
| | ) |
| | pipeline = FluxControlPipeline.from_pipe( |
| | orig_pipeline, transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16 |
| | ) |
| | else: |
| | transformer = FluxTransformer2DModel.from_pretrained( |
| | "black-forest-labs/FLUX.1-Depth-dev", |
| | subfolder="transformer", |
| | revision="refs/pr/1", |
| | torch_dtype=torch.bfloat16, |
| | ) |
| | pipeline = FluxControlPipeline.from_pipe(orig_pipeline, transformer=transformer, torch_dtype=torch.bfloat16) |
| | |
| | pipeline.enable_model_cpu_offload() |
| | return pipeline |
| | |
| | @torch.no_grad() |
| | def get_depth(control_image): |
| | processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf") |
| | control_image = processor(control_image)[0].convert("RGB") |
| | return control_image |
| | |
| | def load_conditions(): |
| | prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts." |
| | control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png") |
| | control_image = get_depth(control_image) |
| | return prompt, control_image |
| | |
| | |
| | def main(four_bit: bool = False): |
| | ckpt_id = "sayakpaul/FLUX.1-Depth-dev-nf4" |
| | pipe = load_pipeline(four_bit=four_bit) |
| | prompt, control_image = load_conditions() |
| | image = pipe( |
| | prompt=prompt, |
| | control_image=control_image, |
| | height=1024, |
| | width=1024, |
| | num_inference_steps=30, |
| | guidance_scale=10.0, |
| | max_sequence_length=512, |
| | generator=torch.Generator("cpu").manual_seed(0), |
| | ).images[0] |
| | filename = "output_" + ckpt_id.split("/")[-1].replace(".", "_") |
| | filename += "_4bit" if four_bit else "" |
| | image.save(f"{filename}.png") |
| | |
| | |
| | if __name__ == "__main__": |
| | fire.Fire(main) |
| | ``` |
| |
|
| | </details> |
| |
|
| | ## Outputs |
| |
|
| | <table> |
| | <thead> |
| | <tr> |
| | <th>Original</th> |
| | <th>NF4</th> |
| | </tr> |
| | </thead> |
| | <tbody> |
| | <tr> |
| | <td> |
| | <img src="./assets/output_FLUX_1-Depth-dev.png" alt="Original"> |
| | </td> |
| | <td> |
| | <img src="./assets/output_FLUX_1-Depth-dev_4bit.png" alt="NF4"> |
| | </td> |
| | </tr> |
| | </tbody> |
| | </table> |