Spaces:
Runtime error
Runtime error
| import os | |
| import math | |
| import tqdm | |
| import logging | |
| import argparse | |
| import itertools | |
| import PIL.Image | |
| import numpy as np | |
| from PIL import Image | |
| import safetensors.torch | |
| from datetime import datetime | |
| from typing import Union, List | |
| from spandrel import ModelLoader | |
| import torch | |
| import torch.nn.functional as F | |
| from diffusers.utils import export_to_video | |
| logger = logging.getLogger(__file__) | |
| def get_args(): | |
| parser = argparse.ArgumentParser(description="Simple example of a training script for ConsisID.") | |
| # ConsisID information | |
| parser.add_argument("--train_type", choices=['t2v', 'i2v'], help="t2v or i2v") | |
| parser.add_argument("--is_train_face", action='store_true') | |
| parser.add_argument("--is_diff_lr", action='store_true') | |
| parser.add_argument("--is_train_lora", action='store_true') | |
| parser.add_argument("--is_kps", action='store_true') | |
| parser.add_argument("--is_shuffle_data", action='store_true') | |
| parser.add_argument("--enable_mask_loss", action='store_true') | |
| parser.add_argument("--is_single_face", action='store_true') | |
| parser.add_argument("--is_cross_face", action='store_true') | |
| parser.add_argument("--is_align_face", action='store_true') | |
| parser.add_argument("--is_reserve_face", action='store_true') | |
| parser.add_argument("--is_accelerator_state_dict", action='store_true') | |
| parser.add_argument("--is_validation", action='store_true') | |
| parser.add_argument("--config_path", type=str, default=None) | |
| parser.add_argument("--mask_path", type=str, default=None) | |
| parser.add_argument("--pretrained_weight", type=str, default=None) | |
| parser.add_argument("--sample_stride", type=int, default=3, help=".") | |
| parser.add_argument("--skip_frames_start_percent", type=float, default=0.0, help=".") | |
| parser.add_argument("--skip_frames_end_percent", type=float, default=1.0, help=".") | |
| parser.add_argument("--miss_tolerance", type=int, default=6) | |
| parser.add_argument("--min_distance", type=int, default=3) | |
| parser.add_argument("--min_frames", type=int, default=1) | |
| parser.add_argument("--max_frames", type=int, default=5) | |
| parser.add_argument("--LFE_num_tokens", type=int, default=32) | |
| parser.add_argument("--LFE_output_dim", type=int, default=768) | |
| parser.add_argument("--LFE_heads", type=int, default=12) | |
| parser.add_argument("--cross_attn_interval", type=int, default=1) | |
| parser.add_argument("--use_ema", action="store_true", help="Whether to use EMA model.") | |
| parser.add_argument( | |
| "--non_ema_revision", | |
| type=str, | |
| default=None, | |
| required=False, | |
| help=( | |
| "Revision of pretrained non-ema model identifier. Must be a branch, tag or git identifier of the local or" | |
| " remote repository specified with --pretrained_model_name_or_path." | |
| ), | |
| ) | |
| # Model information | |
| parser.add_argument( | |
| "--pretrained_model_name_or_path", | |
| type=str, | |
| default=None, | |
| required=True, | |
| help="Path to pretrained model or model identifier from huggingface.co/models.", | |
| ) | |
| parser.add_argument( | |
| "--revision", | |
| type=str, | |
| default=None, | |
| required=False, | |
| help="Revision of pretrained model identifier from huggingface.co/models.", | |
| ) | |
| parser.add_argument( | |
| "--variant", | |
| type=str, | |
| default=None, | |
| help="Variant of the model files of the pretrained model identifier from huggingface.co/models, 'e.g.' fp16", | |
| ) | |
| parser.add_argument( | |
| "--cache_dir", | |
| type=str, | |
| default=None, | |
| help="The directory where the downloaded models and datasets will be stored.", | |
| ) | |
| # Dataset information | |
| parser.add_argument( | |
| "--dataset_name", | |
| type=str, | |
| default=None, | |
| help=( | |
| "The name of the Dataset (from the HuggingFace hub) containing the training data of instance images (could be your own, possibly private," | |
| " dataset). It can also be a path pointing to a local copy of a dataset in your filesystem," | |
| " or to a folder containing files that 🤗 Datasets can understand." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--dataset_config_name", | |
| type=str, | |
| default=None, | |
| help="The config of the Dataset, leave as None if there's only one config.", | |
| ) | |
| parser.add_argument( | |
| "--instance_data_root", | |
| type=str, | |
| default=None, | |
| help=("A folder containing the training data."), | |
| ) | |
| parser.add_argument( | |
| "--video_column", | |
| type=str, | |
| default="video", | |
| help="The column of the dataset containing videos. Or, the name of the file in `--instance_data_root` folder containing the line-separated path to video data.", | |
| ) | |
| parser.add_argument( | |
| "--caption_column", | |
| type=str, | |
| default="text", | |
| help="The column of the dataset containing the instance prompt for each video. Or, the name of the file in `--instance_data_root` folder containing the line-separated instance prompts.", | |
| ) | |
| parser.add_argument( | |
| "--id_token", type=str, default=None, help="Identifier token appended to the start of each prompt if provided." | |
| ) | |
| parser.add_argument( | |
| "--dataloader_num_workers", | |
| type=int, | |
| default=0, | |
| help=( | |
| "Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process." | |
| ), | |
| ) | |
| # Validation | |
| parser.add_argument( | |
| "--validation_prompt", | |
| type=str, | |
| default=None, | |
| help="One or more prompt(s) that is used during validation to verify that the model is learning. Multiple validation prompts should be separated by the '--validation_prompt_seperator' string.", | |
| ) | |
| parser.add_argument( | |
| "--validation_images", | |
| type=str, | |
| default=None, | |
| help="One or more image path(s) that is used during validation to verify that the model is learning. Multiple validation paths should be separated by the '--validation_prompt_seperator' string. These should correspond to the order of the validation prompts.", | |
| ) | |
| parser.add_argument( | |
| "--validation_prompt_separator", | |
| type=str, | |
| default=":::", | |
| help="String that separates multiple validation prompts", | |
| ) | |
| parser.add_argument( | |
| "--num_validation_videos", | |
| type=int, | |
| default=1, | |
| help="Number of videos that should be generated during validation per `validation_prompt`.", | |
| ) | |
| parser.add_argument( | |
| "--validation_epochs", | |
| type=int, | |
| default=50, | |
| help=( | |
| "Run validation every X epochs. Validation consists of running the prompt `args.validation_prompt` multiple times: `args.num_validation_videos`." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--low_vram", action="store_true", help="Whether enable low_vram mode." | |
| ) | |
| parser.add_argument( | |
| "--guidance_scale", | |
| type=float, | |
| default=6, | |
| help="The guidance scale to use while sampling validation videos.", | |
| ) | |
| parser.add_argument( | |
| "--use_dynamic_cfg", | |
| action="store_true", | |
| default=False, | |
| help="Whether or not to use the default cosine dynamic guidance schedule when sampling validation videos.", | |
| ) | |
| # Training information | |
| parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") | |
| parser.add_argument( | |
| "--rank", | |
| type=int, | |
| default=128, | |
| help=("The dimension of the LoRA update matrices."), | |
| ) | |
| parser.add_argument( | |
| "--lora_alpha", | |
| type=float, | |
| default=128, | |
| help=("The scaling factor to scale LoRA weight update. The actual scaling factor is `lora_alpha / rank`"), | |
| ) | |
| parser.add_argument( | |
| "--mixed_precision", | |
| type=str, | |
| default=None, | |
| choices=["no", "fp16", "bf16"], | |
| help=( | |
| "Whether to use mixed precision. Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >=" | |
| " 1.10.and an Nvidia Ampere GPU. Default to the value of accelerate config of the current system or the" | |
| " flag passed with the `accelerate.launch` command. Use this argument to override the accelerate config." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--output_dir", | |
| type=str, | |
| default="cogvideox-i2v-lora", | |
| help="The output directory where the model predictions and checkpoints will be written.", | |
| ) | |
| parser.add_argument( | |
| "--height", | |
| type=int, | |
| default=480, | |
| help="All input videos are resized to this height.", | |
| ) | |
| parser.add_argument( | |
| "--width", | |
| type=int, | |
| default=720, | |
| help="All input videos are resized to this width.", | |
| ) | |
| parser.add_argument("--fps", type=int, default=8, help="All input videos will be used at this FPS.") | |
| parser.add_argument( | |
| "--max_num_frames", type=int, default=49, help="All input videos will be truncated to these many frames." | |
| ) | |
| parser.add_argument( | |
| "--skip_frames_start", | |
| type=int, | |
| default=0, | |
| help="Number of frames to skip from the beginning of each input video. Useful if training data contains intro sequences.", | |
| ) | |
| parser.add_argument( | |
| "--skip_frames_end", | |
| type=int, | |
| default=0, | |
| help="Number of frames to skip from the end of each input video. Useful if training data contains outro sequences.", | |
| ) | |
| parser.add_argument( | |
| "--random_flip", | |
| action="store_true", | |
| help="whether to randomly flip videos horizontally", | |
| ) | |
| parser.add_argument( | |
| "--train_batch_size", type=int, default=4, help="Batch size (per device) for the training dataloader." | |
| ) | |
| parser.add_argument("--num_train_epochs", type=int, default=1) | |
| parser.add_argument( | |
| "--max_train_steps", | |
| type=int, | |
| default=None, | |
| help="Total number of training steps to perform. If provided, overrides `--num_train_epochs`.", | |
| ) | |
| parser.add_argument( | |
| "--checkpointing_steps", | |
| type=int, | |
| default=500, | |
| help=( | |
| "Save a checkpoint of the training state every X updates. These checkpoints can be used both as final" | |
| " checkpoints in case they are better than the last checkpoint, and are also suitable for resuming" | |
| " training using `--resume_from_checkpoint`." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--checkpoints_total_limit", | |
| type=int, | |
| default=None, | |
| help=("Max number of checkpoints to store."), | |
| ) | |
| parser.add_argument( | |
| "--resume_from_checkpoint", | |
| type=str, | |
| default=None, | |
| help=( | |
| "Whether training should be resumed from a previous checkpoint. Use a path saved by" | |
| ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.' | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--gradient_accumulation_steps", | |
| type=int, | |
| default=1, | |
| help="Number of updates steps to accumulate before performing a backward/update pass.", | |
| ) | |
| parser.add_argument( | |
| "--gradient_checkpointing", | |
| action="store_true", | |
| help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.", | |
| ) | |
| parser.add_argument( | |
| "--learning_rate", | |
| type=float, | |
| default=3e-5, | |
| help="Initial learning rate (after the potential warmup period) to use.", | |
| ) | |
| parser.add_argument( | |
| "--scale_lr", | |
| action="store_true", | |
| default=False, | |
| help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.", | |
| ) | |
| parser.add_argument( | |
| "--lr_scheduler", | |
| type=str, | |
| default="cosine_with_restarts", | |
| help=( | |
| 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",' | |
| ' "constant", "constant_with_warmup"]' | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler." | |
| ) | |
| parser.add_argument( | |
| "--lr_num_cycles", | |
| type=int, | |
| default=1, | |
| help="Number of hard resets of the lr in cosine_with_restarts scheduler.", | |
| ) | |
| parser.add_argument("--lr_power", type=float, default=1.0, help="Power factor of the polynomial scheduler.") | |
| parser.add_argument( | |
| "--enable_slicing", | |
| action="store_true", | |
| default=False, | |
| help="Whether or not to use VAE slicing for saving memory.", | |
| ) | |
| parser.add_argument( | |
| "--enable_tiling", | |
| action="store_true", | |
| default=False, | |
| help="Whether or not to use VAE tiling for saving memory.", | |
| ) | |
| parser.add_argument( | |
| "--noised_image_dropout", | |
| type=float, | |
| default=0.05, | |
| help="Image condition dropout probability.", | |
| ) | |
| # Optimizer | |
| parser.add_argument( | |
| "--optimizer", | |
| type=lambda s: s.lower(), | |
| default="adam", | |
| choices=["adam", "adamw", "prodigy"], | |
| help=("The optimizer type to use."), | |
| ) | |
| parser.add_argument( | |
| "--use_8bit_adam", | |
| action="store_true", | |
| help="Whether or not to use 8-bit Adam from bitsandbytes. Ignored if optimizer is not set to AdamW", | |
| ) | |
| parser.add_argument( | |
| "--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam and Prodigy optimizers." | |
| ) | |
| parser.add_argument( | |
| "--adam_beta2", type=float, default=0.95, help="The beta2 parameter for the Adam and Prodigy optimizers." | |
| ) | |
| parser.add_argument( | |
| "--prodigy_beta3", | |
| type=float, | |
| default=None, | |
| help="Coefficients for computing the Prodigy optimizer's stepsize using running averages. If set to None, uses the value of square root of beta2.", | |
| ) | |
| parser.add_argument("--prodigy_decouple", action="store_true", help="Use AdamW style decoupled weight decay") | |
| parser.add_argument("--adam_weight_decay", type=float, default=1e-04, help="Weight decay to use for unet params") | |
| parser.add_argument( | |
| "--adam_epsilon", | |
| type=float, | |
| default=1e-08, | |
| help="Epsilon value for the Adam optimizer and Prodigy optimizers.", | |
| ) | |
| parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") | |
| parser.add_argument("--prodigy_use_bias_correction", action="store_true", help="Turn on Adam's bias correction.") | |
| parser.add_argument( | |
| "--prodigy_safeguard_warmup", | |
| action="store_true", | |
| help="Remove lr from the denominator of D estimate to avoid issues during warm-up stage.", | |
| ) | |
| # Other information | |
| parser.add_argument("--tracker_name", type=str, default=None, help="Project tracker name") | |
| parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") | |
| parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.") | |
| parser.add_argument( | |
| "--hub_model_id", | |
| type=str, | |
| default=None, | |
| help="The name of the repository to keep in sync with the local `output_dir`.", | |
| ) | |
| parser.add_argument( | |
| "--logging_dir", | |
| type=str, | |
| default="logs", | |
| help="Directory where logs are stored.", | |
| ) | |
| parser.add_argument( | |
| "--allow_tf32", | |
| action="store_true", | |
| help=( | |
| "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see" | |
| " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--report_to", | |
| type=str, | |
| default=None, | |
| help=( | |
| 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`' | |
| ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.' | |
| ), | |
| ) | |
| parser.add_argument( | |
| '--trainable_modules', | |
| nargs='+', | |
| help='Enter a list of trainable modules' | |
| ) | |
| parser.add_argument("--nccl_timeout", type=int, default=600, help="NCCL backend timeout in seconds.") | |
| return parser.parse_args() | |
| def resize_mask(mask, latent, process_first_frame_only=True): | |
| latent_size = latent.size() | |
| if process_first_frame_only: | |
| target_size = list(latent_size[2:]) | |
| target_size[0] = 1 | |
| first_frame_resized = F.interpolate( | |
| mask[:, :, 0:1, :, :], | |
| size=target_size, | |
| mode='trilinear', | |
| align_corners=False | |
| ) | |
| target_size = list(latent_size[2:]) | |
| target_size[0] = target_size[0] - 1 | |
| if target_size[0] != 0: | |
| remaining_frames_resized = F.interpolate( | |
| mask[:, :, 1:, :, :], | |
| size=target_size, | |
| mode='trilinear', | |
| align_corners=False | |
| ) | |
| resized_mask = torch.cat([first_frame_resized, remaining_frames_resized], dim=2) | |
| else: | |
| resized_mask = first_frame_resized | |
| else: | |
| target_size = list(latent_size[2:]) | |
| resized_mask = F.interpolate( | |
| mask, | |
| size=target_size, | |
| mode='trilinear', | |
| align_corners=False | |
| ) | |
| return resized_mask | |
| def save_tensor_as_image(tensor, file_path): | |
| """ | |
| Saves a PyTorch tensor as an image file. | |
| Args: | |
| tensor (torch.Tensor): The image tensor to save. | |
| file_path (str): Path to save the image file. | |
| """ | |
| # Ensure the tensor is in CPU memory and detach it from the computation graph | |
| tensor = tensor.cpu().detach() | |
| # Convert from PyTorch to NumPy format, and handle the scaling from [0, 1] to [0, 255] | |
| tensor = tensor.squeeze() # Remove unnecessary dimensions if any | |
| tensor = tensor.permute(1, 2, 0) # Change from (C, H, W) to (H, W, C) | |
| tensor = tensor.numpy() * 255 # Scale from [0, 1] to [0, 255] | |
| tensor = tensor.astype(np.uint8) # Convert to uint8 | |
| # Convert the NumPy array to a PIL Image and save it | |
| image = Image.fromarray(tensor) | |
| image.save(file_path) | |
| def pixel_values_to_pil(pixel_values, frame_index=0): | |
| if pixel_values.is_cuda: | |
| pixel_values = pixel_values.clone().cpu() | |
| pixel_values = (pixel_values + 1.0) / 2.0 * 255.0 | |
| pixel_values = pixel_values.clamp(0, 255).byte() | |
| frame = pixel_values[frame_index] # [C, H, W] | |
| frame = frame.permute(1, 2, 0) # [H, W, C] | |
| frame_np = frame.numpy() | |
| image = Image.fromarray(frame_np) | |
| return image | |
| def load_torch_file(ckpt, device=None, dtype=torch.float16): | |
| if device is None: | |
| device = torch.device("cpu") | |
| if ckpt.lower().endswith(".safetensors") or ckpt.lower().endswith(".sft"): | |
| sd = safetensors.torch.load_file(ckpt, device=device.type) | |
| else: | |
| if not "weights_only" in torch.load.__code__.co_varnames: | |
| logger.warning( | |
| "Warning torch.load doesn't support weights_only on this pytorch version, loading unsafely." | |
| ) | |
| pl_sd = torch.load(ckpt, map_location=device, weights_only=True) | |
| if "global_step" in pl_sd: | |
| logger.debug(f"Global Step: {pl_sd['global_step']}") | |
| if "state_dict" in pl_sd: | |
| sd = pl_sd["state_dict"] | |
| elif "params_ema" in pl_sd: | |
| sd = pl_sd["params_ema"] | |
| else: | |
| sd = pl_sd | |
| sd = {k: v.to(dtype) for k, v in sd.items()} | |
| return sd | |
| def state_dict_prefix_replace(state_dict, replace_prefix, filter_keys=False): | |
| if filter_keys: | |
| out = {} | |
| else: | |
| out = state_dict | |
| for rp in replace_prefix: | |
| replace = list( | |
| map( | |
| lambda a: (a, "{}{}".format(replace_prefix[rp], a[len(rp) :])), | |
| filter(lambda a: a.startswith(rp), state_dict.keys()), | |
| ) | |
| ) | |
| for x in replace: | |
| w = state_dict.pop(x[0]) | |
| out[x[1]] = w | |
| return out | |
| def module_size(module): | |
| module_mem = 0 | |
| sd = module.state_dict() | |
| for k in sd: | |
| t = sd[k] | |
| module_mem += t.nelement() * t.element_size() | |
| return module_mem | |
| def get_tiled_scale_steps(width, height, tile_x, tile_y, overlap): | |
| return math.ceil((height / (tile_y - overlap))) * math.ceil((width / (tile_x - overlap))) | |
| def tiled_scale_multidim( | |
| samples, function, tile=(64, 64), overlap=8, upscale_amount=4, out_channels=3, output_device="cpu", pbar=None | |
| ): | |
| dims = len(tile) | |
| print(f"samples dtype:{samples.dtype}") | |
| output = torch.empty( | |
| [samples.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), samples.shape[2:])), | |
| device=output_device, | |
| ) | |
| for b in range(samples.shape[0]): | |
| s = samples[b : b + 1] | |
| out = torch.zeros( | |
| [s.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), s.shape[2:])), | |
| device=output_device, | |
| ) | |
| out_div = torch.zeros( | |
| [s.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), s.shape[2:])), | |
| device=output_device, | |
| ) | |
| for it in itertools.product(*map(lambda a: range(0, a[0], a[1] - overlap), zip(s.shape[2:], tile))): | |
| s_in = s | |
| upscaled = [] | |
| for d in range(dims): | |
| pos = max(0, min(s.shape[d + 2] - overlap, it[d])) | |
| l = min(tile[d], s.shape[d + 2] - pos) | |
| s_in = s_in.narrow(d + 2, pos, l) | |
| upscaled.append(round(pos * upscale_amount)) | |
| ps = function(s_in).to(output_device) | |
| mask = torch.ones_like(ps) | |
| feather = round(overlap * upscale_amount) | |
| for t in range(feather): | |
| for d in range(2, dims + 2): | |
| m = mask.narrow(d, t, 1) | |
| m *= (1.0 / feather) * (t + 1) | |
| m = mask.narrow(d, mask.shape[d] - 1 - t, 1) | |
| m *= (1.0 / feather) * (t + 1) | |
| o = out | |
| o_d = out_div | |
| for d in range(dims): | |
| o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2]) | |
| o_d = o_d.narrow(d + 2, upscaled[d], mask.shape[d + 2]) | |
| o += ps * mask | |
| o_d += mask | |
| if pbar is not None: | |
| pbar.update(1) | |
| output[b : b + 1] = out / out_div | |
| return output | |
| def tiled_scale( | |
| samples, | |
| function, | |
| tile_x=64, | |
| tile_y=64, | |
| overlap=8, | |
| upscale_amount=4, | |
| out_channels=3, | |
| output_device="cpu", | |
| pbar=None, | |
| ): | |
| return tiled_scale_multidim( | |
| samples, function, (tile_y, tile_x), overlap, upscale_amount, out_channels, output_device, pbar | |
| ) | |
| def load_sd_upscale(ckpt, inf_device): | |
| sd = load_torch_file(ckpt, device=inf_device) | |
| if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: | |
| sd = state_dict_prefix_replace(sd, {"module.": ""}) | |
| out = ModelLoader().load_from_state_dict(sd).half() | |
| return out | |
| def upscale(upscale_model, tensor: torch.Tensor, inf_device, output_device="cpu") -> torch.Tensor: | |
| memory_required = module_size(upscale_model.model) | |
| memory_required += ( | |
| (512 * 512 * 3) * tensor.element_size() * max(upscale_model.scale, 1.0) * 384.0 | |
| ) # The 384.0 is an estimate of how much some of these models take, TODO: make it more accurate | |
| memory_required += tensor.nelement() * tensor.element_size() | |
| print(f"UPScaleMemory required: {memory_required / 1024 / 1024 / 1024} GB") | |
| upscale_model.to(inf_device) | |
| tile = 512 | |
| overlap = 32 | |
| steps = tensor.shape[0] * get_tiled_scale_steps( | |
| tensor.shape[3], tensor.shape[2], tile_x=tile, tile_y=tile, overlap=overlap | |
| ) | |
| pbar = ProgressBar(steps, desc="Tiling and Upscaling") | |
| s = tiled_scale( | |
| samples=tensor.to(torch.float16), | |
| function=lambda a: upscale_model(a), | |
| tile_x=tile, | |
| tile_y=tile, | |
| overlap=overlap, | |
| upscale_amount=upscale_model.scale, | |
| pbar=pbar, | |
| ) | |
| upscale_model.to(output_device) | |
| return s | |
| def upscale_batch_and_concatenate(upscale_model, latents, inf_device, output_device="cpu") -> torch.Tensor: | |
| upscaled_latents = [] | |
| for i in range(latents.size(0)): | |
| latent = latents[i] | |
| upscaled_latent = upscale(upscale_model, latent, inf_device, output_device) | |
| upscaled_latents.append(upscaled_latent) | |
| return torch.stack(upscaled_latents) | |
| def save_video(tensor: Union[List[np.ndarray], List[PIL.Image.Image]], fps: int = 8): | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| video_path = f"./output/{timestamp}.mp4" | |
| os.makedirs(os.path.dirname(video_path), exist_ok=True) | |
| export_to_video(tensor, video_path, fps=fps) | |
| return video_path | |
| class ProgressBar: | |
| def __init__(self, total, desc=None): | |
| self.total = total | |
| self.current = 0 | |
| self.b_unit = tqdm.tqdm(total=total, desc="ProgressBar context index: 0" if desc is None else desc) | |
| def update(self, value): | |
| if value > self.total: | |
| value = self.total | |
| self.current = value | |
| if self.b_unit is not None: | |
| self.b_unit.set_description("ProgressBar context index: {}".format(self.current)) | |
| self.b_unit.refresh() | |
| self.b_unit.update(self.current) |