dylanebert
/

LGM

dylanebert commited on Apr 11, 2024

Commit

1bad10f

1 Parent(s): 5e1c565

Correct final rotation

Files changed (2) hide show

lgm/lgm.py CHANGED Viewed

@@ -285,6 +285,31 @@ class LGM(ModelMixin, ConfigMixin):
         rotation = self.rot_act(x[..., 7:11])
         rgbs = self.rgb_act(x[..., 11:])
         gaussians = torch.cat([pos, opacity, scale, rotation, rgbs], dim=-1)
         return gaussians

         rotation = self.rot_act(x[..., 7:11])
         rgbs = self.rgb_act(x[..., 11:])
+        q = torch.tensor([0, 0, 1, 0], dtype=pos.dtype, device=pos.device)
+        R = torch.tensor(
+            [
+                [-1, 0, 0],
+                [0, -1, 0],
+                [0, 0, 1],
+            ],
+            dtype=pos.dtype,
+            device=pos.device,
+        )
+        pos = torch.matmul(pos, R.T)
+        def multiply_quat(q1, q2):
+            w1, x1, y1, z1 = q1.unbind(-1)
+            w2, x2, y2, z2 = q2.unbind(-1)
+            w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+            x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+            y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
+            z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2
+            return torch.stack([w, x, y, z], dim=-1)
+        for i in range(B):
+            rotation[i, :] = multiply_quat(q, rotation[i, :])
         gaussians = torch.cat([pos, opacity, scale, rotation, rgbs], dim=-1)
         return gaussians

pipeline.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import numpy as np
-import rembg
 import torch
 import torch.nn.functional as F
 import torchvision.transforms.functional as TF
@@ -10,8 +9,6 @@ class LGMPipeline(DiffusionPipeline):
     def __init__(self, lgm):
         super().__init__()
-        self.bg_remover = rembg.new_session()
         self.imagenet_default_mean = (0.485, 0.456, 0.406)
         self.imagenet_default_std = (0.229, 0.224, 0.225)
@@ -23,19 +20,6 @@ class LGMPipeline(DiffusionPipeline):
     @torch.no_grad()
     def __call__(self, images):
-        unstacked = []
-        for i in range(4):
-            image = rembg.remove(images[i], session=self.bg_remover)
-            image = images.astype(np.float32) / 255.0
-            image = image[..., :3] * image[..., -1:] + (1 - image[..., -1:])
-            unstacked.append(image)
-        images = np.concatenate(
-            [
-                np.concatenate([unstacked[1], unstacked[2]], axis=1),
-                np.concatenate([unstacked[3], unstacked[0]], axis=1),
-            ],
-            axis=0,
-        )
         images = np.stack([images[1], images[2], images[3], images[0]], axis=0)
         images = torch.from_numpy(images).permute(0, 3, 1, 2).float().cuda()
         images = F.interpolate(

 import numpy as np
 import torch
 import torch.nn.functional as F
 import torchvision.transforms.functional as TF
     def __init__(self, lgm):
         super().__init__()
         self.imagenet_default_mean = (0.485, 0.456, 0.406)
         self.imagenet_default_std = (0.229, 0.224, 0.225)
     @torch.no_grad()
     def __call__(self, images):
         images = np.stack([images[1], images[2], images[3], images[0]], axis=0)
         images = torch.from_numpy(images).permute(0, 3, 1, 2).float().cuda()
         images = F.interpolate(