Spaces:

guangkaixu
/

GenPercept

Runtime error

App Files Files Community

guangkaixu commited on Apr 7, 2024

Commit

4d5065f

1 Parent(s): 562c833

upload

Browse files

Files changed (4) hide show

util/__init__.py +0 -0
util/batchsize.py +59 -0
util/image_util.py +172 -0
util/seed_all.py +13 -0

util/__init__.py ADDED Viewed

File without changes

util/batchsize.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+import math
+# Search table for suggested max. inference batch size
+bs_search_table = [
+    # tested on A100-PCIE-80GB
+    {"res": 768, "total_vram": 79, "bs": 35, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 79, "bs": 20, "dtype": torch.float32},
+    # tested on A100-PCIE-40GB
+    {"res": 768, "total_vram": 39, "bs": 15, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 39, "bs": 8, "dtype": torch.float32},
+    {"res": 768, "total_vram": 39, "bs": 30, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 39, "bs": 15, "dtype": torch.float16},
+    # tested on RTX3090, RTX4090
+    {"res": 512, "total_vram": 23, "bs": 20, "dtype": torch.float32},
+    {"res": 768, "total_vram": 23, "bs": 7, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 23, "bs": 3, "dtype": torch.float32},
+    {"res": 512, "total_vram": 23, "bs": 40, "dtype": torch.float16},
+    {"res": 768, "total_vram": 23, "bs": 18, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 23, "bs": 10, "dtype": torch.float16},
+    # tested on GTX1080Ti
+    {"res": 512, "total_vram": 10, "bs": 5, "dtype": torch.float32},
+    {"res": 768, "total_vram": 10, "bs": 2, "dtype": torch.float32},
+    {"res": 512, "total_vram": 10, "bs": 10, "dtype": torch.float16},
+    {"res": 768, "total_vram": 10, "bs": 5, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 10, "bs": 3, "dtype": torch.float16},
+]
+def find_batch_size(ensemble_size: int, input_res: int, dtype: torch.dtype) -> int:
+    """
+    Automatically search for suitable operating batch size.
+    Args:
+        ensemble_size (int): Number of predictions to be ensembled
+        input_res (int): Operating resolution of the input image.
+    Returns:
+        int: Operating batch size
+    """
+    if not torch.cuda.is_available():
+        return 1
+    total_vram = torch.cuda.mem_get_info()[1] / 1024.0**3
+    filtered_bs_search_table = [s for s in bs_search_table if s["dtype"] == dtype]
+    for settings in sorted(
+        filtered_bs_search_table,
+        key=lambda k: (k["res"], -k["total_vram"]),
+    ):
+        if input_res <= settings["res"] and total_vram >= settings["total_vram"]:
+            bs = settings["bs"]
+            if bs > ensemble_size:
+                bs = ensemble_size
+            elif bs > math.ceil(ensemble_size / 2) and bs < ensemble_size:
+                bs = math.ceil(ensemble_size / 2)
+            return bs
+    return 1

util/image_util.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import matplotlib
+import numpy as np
+import torch
+from PIL import Image
+from torchvision import transforms
+def norm_to_rgb(norm):
+    # norm: (3, H, W), range from [-1, 1]
+    norm_rgb = ((norm + 1) * 0.5) * 255
+    norm_rgb = np.clip(norm_rgb, a_min=0, a_max=255)
+    norm_rgb = norm_rgb.astype(np.uint8)
+    return norm_rgb
+def colorize_depth_maps(
+    depth_map, min_depth, max_depth, cmap="Spectral", valid_mask=None
+):
+    """
+    Colorize depth maps.
+    """
+    assert len(depth_map.shape) >= 2, "Invalid dimension"
+    if isinstance(depth_map, torch.Tensor):
+        depth = depth_map.detach().clone().squeeze().numpy()
+    elif isinstance(depth_map, np.ndarray):
+        depth = np.squeeze(depth_map.copy())
+    # reshape to [ (B,) H, W ]
+    if depth.ndim < 3:
+        depth = depth[np.newaxis, :, :]
+    # colorize
+    cm = matplotlib.colormaps[cmap]
+    depth = ((depth - min_depth) / (max_depth - min_depth)).clip(0, 1)
+    img_colored_np = cm(depth, bytes=False)[:, :, :, 0:3]  # value from 0 to 1
+    img_colored_np = np.rollaxis(img_colored_np, 3, 1)
+    if valid_mask is not None:
+        if isinstance(depth_map, torch.Tensor):
+            valid_mask = valid_mask.detach().numpy()
+        valid_mask = np.squeeze(valid_mask)  # [H, W] or [B, H, W]
+        if valid_mask.ndim < 3:
+            valid_mask = valid_mask[np.newaxis, np.newaxis, :, :]
+        else:
+            valid_mask = valid_mask[:, np.newaxis, :, :]
+        valid_mask = np.repeat(valid_mask, 3, axis=1)
+        img_colored_np[~valid_mask] = 0
+    if isinstance(depth_map, torch.Tensor):
+        img_colored = torch.from_numpy(img_colored_np).float()
+    elif isinstance(depth_map, np.ndarray):
+        img_colored = img_colored_np
+    return img_colored
+def chw2hwc(chw):
+    assert 3 == len(chw.shape)
+    if isinstance(chw, torch.Tensor):
+        hwc = torch.permute(chw, (1, 2, 0))
+    elif isinstance(chw, np.ndarray):
+        hwc = np.moveaxis(chw, 0, -1)
+    return hwc
+def resize_max_res(img: Image.Image, max_edge_resolution: int) -> Image.Image:
+    """
+    Resize image to limit maximum edge length while keeping aspect ratio
+    Args:
+        img (Image.Image): Image to be resized
+        max_edge_resolution (int): Maximum edge length (px).
+    Returns:
+        Image.Image: Resized image.
+    """
+    original_width, original_height = img.size
+    downscale_factor = min(
+        max_edge_resolution / original_width, max_edge_resolution / original_height
+    )
+    new_width = int(original_width * downscale_factor)
+    new_height = int(original_height * downscale_factor)
+    resized_img = img.resize((new_width, new_height))
+    return resized_img
+def resize_max_res_integer_16(img: Image.Image, max_edge_resolution: int) -> Image.Image:
+    """
+    Resize image to limit maximum edge length while keeping aspect ratio
+    Args:
+        img (Image.Image): Image to be resized
+        max_edge_resolution (int): Maximum edge length (px).
+    Returns:
+        Image.Image: Resized image.
+    """
+    original_width, original_height = img.size
+    downscale_factor = min(
+        max_edge_resolution / original_width, max_edge_resolution / original_height
+    )
+    new_width = int(original_width * downscale_factor) // 16 * 16 # make sure it is integer multiples of 16, used for pixart
+    new_height = int(original_height * downscale_factor) // 16 * 16 # make sure it is integer multiples of 16, used for pixart
+    resized_img = img.resize((new_width, new_height))
+    return resized_img
+def resize_res(img: Image.Image, max_edge_resolution: int) -> Image.Image:
+    """
+    Resize image to limit maximum edge length while keeping aspect ratio
+    Args:
+        img (Image.Image): Image to be resized
+        max_edge_resolution (int): Maximum edge length (px).
+    Returns:
+        Image.Image: Resized image.
+    """
+    resized_img = img.resize((max_edge_resolution, max_edge_resolution))
+    return resized_img
+class ResizeLongestEdge:
+    def __init__(self, max_size, interpolation=transforms.InterpolationMode.BILINEAR):
+        self.max_size = max_size
+        self.interpolation = interpolation
+    def __call__(self, img):
+        scale = self.max_size / max(img.width, img.height)
+        new_size = (int(img.height * scale), int(img.width * scale))
+        return transforms.functional.resize(img, new_size, self.interpolation)
+class ResizeShortestEdge:
+    def __init__(self, min_size, interpolation=transforms.InterpolationMode.BILINEAR):
+        self.min_size = min_size
+        self.interpolation = interpolation
+    def __call__(self, img):
+        scale = self.min_size / min(img.width, img.height)
+        new_size = (int(img.height * scale), int(img.width * scale))
+        return transforms.functional.resize(img, new_size, self.interpolation)
+class ResizeHard:
+    def __init__(self, size, interpolation=transforms.InterpolationMode.BILINEAR):
+        self.size = size
+        self.interpolation = interpolation
+    def __call__(self, img):
+        new_size = (int(self.size), int(self.size))
+        return transforms.functional.resize(img, new_size, self.interpolation)
+class ResizeLongestEdgeInteger:
+    def __init__(self, max_size, interpolation=transforms.InterpolationMode.BILINEAR, integer=16):
+        self.max_size = max_size
+        self.interpolation = interpolation
+        self.integer = integer
+    def __call__(self, img):
+        scale = self.max_size / max(img.width, img.height)
+        new_size_h = int(img.height * scale) // self.integer * self.integer
+        new_size_w = int(img.width * scale) // self.integer * self.integer
+        new_size = (new_size_h, new_size_w)
+        return transforms.functional.resize(img, new_size, self.interpolation)

util/seed_all.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import numpy as np
+import random
+import torch
+def seed_all(seed: int = 0):
+    """
+    Set random seeds of all components.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)