haodongli commited on
Commit
6b12a63
·
1 Parent(s): d9a5049
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. .gitignore +2 -0
  3. app.py +165 -0
  4. assets/badges/icon2.png +3 -0
  5. assets/badges/teaser.jpg +3 -0
  6. assets/demos/a0.png +3 -0
  7. assets/demos/a1.png +3 -0
  8. assets/demos/a10.png +3 -0
  9. assets/demos/a11.png +3 -0
  10. assets/demos/a2.png +3 -0
  11. assets/demos/a3.png +3 -0
  12. assets/demos/a4.png +3 -0
  13. assets/demos/a5.png +3 -0
  14. assets/demos/a6.png +3 -0
  15. assets/demos/a7.png +3 -0
  16. assets/demos/a8.png +3 -0
  17. assets/demos/a9.png +3 -0
  18. assets/demos/b0.png +3 -0
  19. assets/demos/b1.png +3 -0
  20. assets/demos/b2.png +3 -0
  21. assets/demos/b3.png +3 -0
  22. assets/demos/b4.png +3 -0
  23. assets/demos/b5.png +3 -0
  24. assets/masks/b0.png +3 -0
  25. assets/masks/b1.png +3 -0
  26. assets/masks/b2.png +3 -0
  27. assets/masks/b3.png +3 -0
  28. assets/masks/b4.png +3 -0
  29. assets/masks/b5.png +3 -0
  30. configs/accelerate/0.yaml +16 -0
  31. configs/infer.json +39 -0
  32. requirements.txt +1 -0
  33. src/da2.egg-info/PKG-INFO +23 -0
  34. src/da2.egg-info/SOURCES.txt +28 -0
  35. src/da2.egg-info/dependency_links.txt +1 -0
  36. src/da2.egg-info/requires.txt +18 -0
  37. src/da2.egg-info/top_level.txt +1 -0
  38. src/da2/__init__.py +25 -0
  39. src/da2/__pycache__/__init__.cpython-312.pyc +0 -0
  40. src/da2/model/__init__.py +11 -0
  41. src/da2/model/__pycache__/__init__.cpython-312.pyc +0 -0
  42. src/da2/model/__pycache__/base.cpython-312.pyc +0 -0
  43. src/da2/model/__pycache__/sphere.cpython-312.pyc +0 -0
  44. src/da2/model/__pycache__/spherevit.cpython-312.pyc +0 -0
  45. src/da2/model/__pycache__/vit_w_esphere.cpython-312.pyc +0 -0
  46. src/da2/model/base.py +393 -0
  47. src/da2/model/dinov2/__init__.py +13 -0
  48. src/da2/model/dinov2/__pycache__/__init__.cpython-312.pyc +0 -0
  49. src/da2/model/dinov2/__pycache__/attention.cpython-312.pyc +0 -0
  50. src/da2/model/dinov2/__pycache__/block.cpython-312.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
37
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ cache/
2
+ output/
app.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from accelerate import Accelerator
3
+ from accelerate.logging import get_logger
4
+ from accelerate.utils import (
5
+ InitProcessGroupKwargs,
6
+ ProjectConfiguration,
7
+ set_seed
8
+ )
9
+ import torch
10
+ from contextlib import nullcontext
11
+ import trimesh
12
+ import gradio as gr
13
+ from gradio_imageslider import ImageSlider
14
+ from da2.utils.base import load_config
15
+ from da2.utils.model import load_model
16
+ from da2.utils.io import (
17
+ read_cv2_image,
18
+ torch_transform,
19
+ tensorize
20
+ )
21
+ from da2.utils.vis import colorize_distance
22
+ from da2.utils.d2pc import distance2pointcloud
23
+ from datetime import (
24
+ timedelta,
25
+ datetime
26
+ )
27
+ import cv2
28
+ import numpy as np
29
+
30
+ last_glb_path = None
31
+
32
+ def prepare_to_run_demo():
33
+ config = load_config('configs/infer.json')
34
+ kwargs = InitProcessGroupKwargs(timeout=timedelta(seconds=config['accelerator']['timeout']))
35
+ output_dir = f'output/infer'
36
+ if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
37
+ accu_steps = config['accelerator']['accumulation_nsteps']
38
+ accelerator = Accelerator(
39
+ gradient_accumulation_steps=accu_steps,
40
+ mixed_precision=config['accelerator']['mixed_precision'],
41
+ log_with=config['accelerator']['report_to'],
42
+ project_config=ProjectConfiguration(project_dir=output_dir),
43
+ kwargs_handlers=[kwargs]
44
+ )
45
+ logger = get_logger(__name__, log_level='INFO')
46
+ config['env']['logger'] = logger
47
+ set_seed(config['env']['seed'])
48
+ return config, accelerator
49
+
50
+ def read_mask_demo(mask_path, shape):
51
+ if mask_path is None:
52
+ return np.ones(shape[1:]) > 0
53
+ mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
54
+ mask = mask > 0
55
+ return mask
56
+
57
+ def load_infer_data_demo(image, mask, model_dtype, device):
58
+ cv2_image = read_cv2_image(image)
59
+ image = torch_transform(cv2_image)
60
+ mask = read_mask_demo(mask, image.shape)
61
+ image = tensorize(image, model_dtype, device)
62
+ return image, cv2_image, mask
63
+
64
+ def ply2glb(ply_path, glb_path):
65
+ pcd = trimesh.load(ply_path)
66
+ points = np.asarray(pcd.vertices)
67
+ colors = np.asarray(pcd.visual.vertex_colors)
68
+ cloud = trimesh.points.PointCloud(vertices=points, colors=colors)
69
+ cloud.export(glb_path)
70
+ os.remove(ply_path)
71
+
72
+ def fn(image_path, mask_path):
73
+ global last_glb_path
74
+ config, accelerator = prepare_to_run_demo()
75
+ model = load_model(config, accelerator)
76
+ image, cv2_image, mask = load_infer_data_demo(image_path, mask_path,
77
+ model_dtype=config['spherevit']['dtype'], device=accelerator.device)
78
+ if torch.backends.mps.is_available():
79
+ autocast_ctx = nullcontext()
80
+ else:
81
+ autocast_ctx = torch.autocast(accelerator.device.type)
82
+ with autocast_ctx, torch.no_grad():
83
+ distance = model(image).cpu().numpy()[0]
84
+ if last_glb_path is not None:
85
+ os.remove(last_glb_path)
86
+ distance_vis = colorize_distance(distance, mask)
87
+ save_path = f'cache/tmp_{datetime.now().strftime("%Y%m%d_%H%M%S")}.glb'
88
+ last_glb_path = save_path
89
+ normal_image = distance2pointcloud(distance, cv2_image, mask, save_path=save_path.replace('.glb', '.ply'), return_normal=True, save_distance=False)
90
+ ply2glb(save_path.replace('.glb', '.ply'), save_path)
91
+ return save_path, [distance_vis, normal_image]
92
+
93
+ inputs = [
94
+ gr.Image(label="Input Image", type="filepath"),
95
+ gr.Image(label="Input Mask", type="filepath"),
96
+ ]
97
+ outputs = [
98
+ gr.Model3D(clear_color=[0.0, 0.0, 0.0, 0.0], label="3D Point Cloud"),
99
+ gr.ImageSlider(
100
+ label="Output Depth / Normal (transformed from the depth)",
101
+ type="pil",
102
+ slider_position=75,
103
+ )
104
+ ]
105
+
106
+ demo = gr.Interface(
107
+ fn=fn,
108
+ title="DA<sup>2</sup>: <u>D</u>epth <u>A</u>nything in <u>A</u>ny <u>D</u>irection",
109
+ description="""
110
+ <p align="center">
111
+ <a title="Project Page" href="https://depth-any-in-any-dir.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
112
+ <img src="https://img.shields.io/badge/Project-Website-pink?logo=googlechrome&logoColor=white">
113
+ </a>
114
+ <a title="arXiv" href="http://arxiv.org/abs/2509.26618" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
115
+ <img src="https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=white">
116
+ </a>
117
+ <a title="Github" href="https://github.com/EnVision-Research/DA-2" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
118
+ <img src="https://img.shields.io/github/stars/EnVision-Research/DA-2?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="badge-github-stars">
119
+ </a>
120
+ <a title="Social" href="https://x.com/_akhaliq/status/1973283687652606411" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
121
+ <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
122
+ </a>
123
+ <a title="Social" href="https://x.com/haodongli00/status/1973287870317338747" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
124
+ <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
125
+ </a>
126
+ <br>
127
+ <strong>Please consider starring <span style="color: orange">&#9733;</span> our <a href="https://github.com/EnVision-Research/DA-2" target="_blank" rel="noopener noreferrer">GitHub Repo</a> if you find this demo useful!</strong>
128
+ </p>
129
+ <p><strong>Note: the "Input Mask" is optional, all pixels are assumed to be valid if mask is None.</strong></p>
130
+ """,
131
+ inputs=inputs,
132
+ outputs=outputs,
133
+ examples=[
134
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a1.png"), None],
135
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a2.png"), None],
136
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a3.png"), None],
137
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a4.png"), None],
138
+ [os.path.join(os.path.dirname(__file__), "assets/demos/b0.png"),
139
+ os.path.join(os.path.dirname(__file__), "assets/masks/b0.png")],
140
+ [os.path.join(os.path.dirname(__file__), "assets/demos/b1.png"),
141
+ os.path.join(os.path.dirname(__file__), "assets/masks/b1.png")],
142
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a5.png"), None],
143
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a6.png"), None],
144
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a7.png"), None],
145
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a8.png"), None],
146
+ [os.path.join(os.path.dirname(__file__), "assets/demos/b2.png"),
147
+ os.path.join(os.path.dirname(__file__), "assets/masks/b2.png")],
148
+ [os.path.join(os.path.dirname(__file__), "assets/demos/b3.png"),
149
+ os.path.join(os.path.dirname(__file__), "assets/masks/b3.png")],
150
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a9.png"), None],
151
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a10.png"), None],
152
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a11.png"), None],
153
+ [os.path.join(os.path.dirname(__file__), "assets/demos/a0.png"), None],
154
+ [os.path.join(os.path.dirname(__file__), "assets/demos/b4.png"),
155
+ os.path.join(os.path.dirname(__file__), "assets/masks/b4.png")],
156
+ [os.path.join(os.path.dirname(__file__), "assets/demos/b5.png"),
157
+ os.path.join(os.path.dirname(__file__), "assets/masks/b5.png")],
158
+ ],
159
+ examples_per_page=20
160
+ )
161
+
162
+ demo.launch(
163
+ server_name="0.0.0.0",
164
+ server_port=6381,
165
+ )
assets/badges/icon2.png ADDED

Git LFS Details

  • SHA256: d254fc5009dd41b367790aa9e45f05770b81ed62c67d8cc713bee4608567218f
  • Pointer size: 129 Bytes
  • Size of remote file: 6.77 kB
assets/badges/teaser.jpg ADDED

Git LFS Details

  • SHA256: 5c6786218d0a17115e6ed1320434b2b47101290a7e244f2eed1ebe70e4822464
  • Pointer size: 132 Bytes
  • Size of remote file: 1.2 MB
assets/demos/a0.png ADDED

Git LFS Details

  • SHA256: eedc66f98cf0a949602f691c3eed51511ae520cf8f63674abe542741ba6090b8
  • Pointer size: 131 Bytes
  • Size of remote file: 744 kB
assets/demos/a1.png ADDED

Git LFS Details

  • SHA256: 906f336ab4c6561ee85b9cb883a6aa34cf11289fc86b6a4e4382baed56981aa7
  • Pointer size: 131 Bytes
  • Size of remote file: 822 kB
assets/demos/a10.png ADDED

Git LFS Details

  • SHA256: d6d058aef9322964f5d36de90ab91470e283acab248604bcd488a43c680a9e7d
  • Pointer size: 131 Bytes
  • Size of remote file: 882 kB
assets/demos/a11.png ADDED

Git LFS Details

  • SHA256: 45af8c71b8d44880503b5da1b5f67a0d5638860b9f9149cae7d16a3a3975d090
  • Pointer size: 131 Bytes
  • Size of remote file: 848 kB
assets/demos/a2.png ADDED

Git LFS Details

  • SHA256: 6fa931d70c6220cec0b56a9cdf651f12fa35436d937cd2cf481d10dddb2a114e
  • Pointer size: 131 Bytes
  • Size of remote file: 810 kB
assets/demos/a3.png ADDED

Git LFS Details

  • SHA256: a85573ac5d51a261d82b23475488e769bd9b3e392948e60e6dc73f0c7ace762b
  • Pointer size: 131 Bytes
  • Size of remote file: 854 kB
assets/demos/a4.png ADDED

Git LFS Details

  • SHA256: d0a544ec4b542c59f1fbfaf99f86eb60b4c0dbce7c8e4b1bac9e6e23e889c7ec
  • Pointer size: 131 Bytes
  • Size of remote file: 813 kB
assets/demos/a5.png ADDED

Git LFS Details

  • SHA256: 7e36ed78b74223eae24f8c85f1cdab00d1a3a5b494fec807240cb7d3427fad87
  • Pointer size: 131 Bytes
  • Size of remote file: 848 kB
assets/demos/a6.png ADDED

Git LFS Details

  • SHA256: e48031fcd3e5a84e4ea4513a23e2ec8150f8ec3fbdae1d4b2d51fc67ac588fe6
  • Pointer size: 131 Bytes
  • Size of remote file: 818 kB
assets/demos/a7.png ADDED

Git LFS Details

  • SHA256: 12b99fdddea8eefb6885114bd386fc4fad0484e13c85c88364a43396f9cef3f9
  • Pointer size: 131 Bytes
  • Size of remote file: 905 kB
assets/demos/a8.png ADDED

Git LFS Details

  • SHA256: 5b29df5b6294742acc43d8ce41073b335e98024459273b77d9b943fd3583ac35
  • Pointer size: 131 Bytes
  • Size of remote file: 784 kB
assets/demos/a9.png ADDED

Git LFS Details

  • SHA256: ba92bf3adf1d1b2a775d5b0f895a16876159fc1a43d98328c923fdc994d6e346
  • Pointer size: 131 Bytes
  • Size of remote file: 910 kB
assets/demos/b0.png ADDED

Git LFS Details

  • SHA256: 3b610ae826372778853553810ef0e07e4f91d8507549dc0f5f32eca038348a37
  • Pointer size: 131 Bytes
  • Size of remote file: 850 kB
assets/demos/b1.png ADDED

Git LFS Details

  • SHA256: 2df3207be859cf8524e9a00a76efb606e626ca4cc9dbd81178fe24de43a6b97b
  • Pointer size: 131 Bytes
  • Size of remote file: 798 kB
assets/demos/b2.png ADDED

Git LFS Details

  • SHA256: 790218133cd507f1f9ca65fcdff60f74325df39ebd0df1d5b6e6261a8dfd29a8
  • Pointer size: 131 Bytes
  • Size of remote file: 863 kB
assets/demos/b3.png ADDED

Git LFS Details

  • SHA256: 843b680077e114451285efc6536e811739cbbab07ade423459a5bc24e747455f
  • Pointer size: 131 Bytes
  • Size of remote file: 651 kB
assets/demos/b4.png ADDED

Git LFS Details

  • SHA256: 5615e49fa1bea5ee049a66bbe577d48dd63f441e86a4ae5b225136e7e2295187
  • Pointer size: 131 Bytes
  • Size of remote file: 804 kB
assets/demos/b5.png ADDED

Git LFS Details

  • SHA256: 7957ee9e54dd6b61b74014412ece3de7bbe999ae0c0be41c4d762d62d8352656
  • Pointer size: 131 Bytes
  • Size of remote file: 669 kB
assets/masks/b0.png ADDED

Git LFS Details

  • SHA256: 7495c6c7672f1b0551f5640a0344a3730744cfa535697307afa917fbf46466ad
  • Pointer size: 129 Bytes
  • Size of remote file: 6.99 kB
assets/masks/b1.png ADDED

Git LFS Details

  • SHA256: 1aea3b6a9a99adbcdb71fcbc9eb5c5f18fbdc36b38829d7ba972183a7ec564e3
  • Pointer size: 129 Bytes
  • Size of remote file: 5.36 kB
assets/masks/b2.png ADDED

Git LFS Details

  • SHA256: 4360d8523cb2309b29ed549c6a7c84dd0d6a3ca5f55720ae43b728668dfe6c9b
  • Pointer size: 129 Bytes
  • Size of remote file: 7.7 kB
assets/masks/b3.png ADDED

Git LFS Details

  • SHA256: d1e6f1d40d8f9e8e5593bf3f5fe67967528b8afcbfaf605658f19004edbdb10d
  • Pointer size: 129 Bytes
  • Size of remote file: 4.57 kB
assets/masks/b4.png ADDED

Git LFS Details

  • SHA256: 8a2a1018ad95749d83193fc0f333e1af04de119857e2564c5fbefa41301f2226
  • Pointer size: 129 Bytes
  • Size of remote file: 5.43 kB
assets/masks/b5.png ADDED

Git LFS Details

  • SHA256: c38cca29eec4baaeb7b765f595f28d13e1fcaf7707bed7ad83277b12eee1f504
  • Pointer size: 129 Bytes
  • Size of remote file: 4.88 kB
configs/accelerate/0.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ debug: false
3
+ distributed_type: 'NO'
4
+ downcast_bf16: 'no'
5
+ gpu_ids: '0'
6
+ machine_rank: 0
7
+ main_training_function: main
8
+ mixed_precision: 'no'
9
+ num_machines: 1
10
+ num_processes: 1
11
+ rdzv_backend: static
12
+ same_network: true
13
+ tpu_env: []
14
+ tpu_use_cluster: false
15
+ tpu_use_sudo: false
16
+ use_cpu: false
configs/infer.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env": {
3
+ "seed": 42,
4
+ "verbose": true
5
+ },
6
+ "accelerator": {
7
+ "report_to": ["tensorboard"],
8
+ "mixed_precision": "fp16",
9
+ "accumulation_nsteps": 4,
10
+ "timeout": 36000
11
+ },
12
+ "inference": {
13
+ "images": "assets/demos",
14
+ "masks": "assets/masks",
15
+ "min_pixels": 580000,
16
+ "max_pixels": 620000
17
+ },
18
+ "spherevit": {
19
+ "vit_w_esphere": {
20
+ "input_dims": [1024, 1024, 1024, 1024],
21
+ "hidden_dim": 512,
22
+ "num_heads": 8,
23
+ "expansion": 4,
24
+ "num_layers_head": [2, 2, 2],
25
+ "dropout": 0.0,
26
+ "layer_scale": 0.0001,
27
+ "out_dim": 64,
28
+ "kernel_size": 3,
29
+ "num_prompt_blocks": 1,
30
+ "use_norm": false
31
+ },
32
+ "sphere": {
33
+ "width": 1092,
34
+ "height": 546,
35
+ "hfov": 6.2832,
36
+ "vfov": 3.1416
37
+ }
38
+ }
39
+ }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip install -e src/
src/da2.egg-info/PKG-INFO ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: da2
3
+ Version: 0.1.0
4
+ Summary: For the implementation of DA^2: Depth Anything in Any Direction
5
+ Author-email: "H. Li" <hal211@ucsd.edu>
6
+ Requires-Dist: torch==2.5.0
7
+ Requires-Dist: torchvision==0.20.0
8
+ Requires-Dist: torchaudio==2.5.0
9
+ Requires-Dist: xformers==0.0.28.post2
10
+ Requires-Dist: diffusers==0.32.0
11
+ Requires-Dist: tensorboard==2.18.0
12
+ Requires-Dist: utils3d@ git+https://github.com/EasternJournalist/utils3d.git@3913c65d81e05e47b9f367250cf8c0f7462a0900
13
+ Requires-Dist: opencv-python==4.12.0.88
14
+ Requires-Dist: gradio==5.49.0
15
+ Requires-Dist: gradio-client==1.13.3
16
+ Requires-Dist: gradio-imageslider==0.0.20
17
+ Requires-Dist: accelerate==1.1.1
18
+ Requires-Dist: omegaconf==2.3.0
19
+ Requires-Dist: tabulate==0.9.0
20
+ Requires-Dist: einops==0.8.0
21
+ Requires-Dist: timm==1.0.15
22
+ Requires-Dist: trimesh==4.5.2
23
+ Requires-Dist: transformers==4.46.3
src/da2.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pyproject.toml
2
+ da2/__init__.py
3
+ da2.egg-info/PKG-INFO
4
+ da2.egg-info/SOURCES.txt
5
+ da2.egg-info/dependency_links.txt
6
+ da2.egg-info/requires.txt
7
+ da2.egg-info/top_level.txt
8
+ da2/model/__init__.py
9
+ da2/model/base.py
10
+ da2/model/sphere.py
11
+ da2/model/spherevit.py
12
+ da2/model/vit_w_esphere.py
13
+ da2/model/dinov2/__init__.py
14
+ da2/model/dinov2/attention.py
15
+ da2/model/dinov2/block.py
16
+ da2/model/dinov2/dino_head.py
17
+ da2/model/dinov2/dinovit.py
18
+ da2/model/dinov2/drop_path.py
19
+ da2/model/dinov2/layer_scale.py
20
+ da2/model/dinov2/mlp.py
21
+ da2/model/dinov2/patch_embed.py
22
+ da2/model/dinov2/swiglu_ffn.py
23
+ da2/utils/__init__.py
24
+ da2/utils/base.py
25
+ da2/utils/d2pc.py
26
+ da2/utils/io.py
27
+ da2/utils/model.py
28
+ da2/utils/vis.py
src/da2.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
src/da2.egg-info/requires.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.5.0
2
+ torchvision==0.20.0
3
+ torchaudio==2.5.0
4
+ xformers==0.0.28.post2
5
+ diffusers==0.32.0
6
+ tensorboard==2.18.0
7
+ utils3d@ git+https://github.com/EasternJournalist/utils3d.git@3913c65d81e05e47b9f367250cf8c0f7462a0900
8
+ opencv-python==4.12.0.88
9
+ gradio==5.49.0
10
+ gradio-client==1.13.3
11
+ gradio-imageslider==0.0.20
12
+ accelerate==1.1.1
13
+ omegaconf==2.3.0
14
+ tabulate==0.9.0
15
+ einops==0.8.0
16
+ timm==1.0.15
17
+ trimesh==4.5.2
18
+ transformers==4.46.3
src/da2.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ da2
src/da2/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .utils.base import (
2
+ prepare_to_run
3
+ )
4
+ from .utils.model import (
5
+ load_model
6
+ )
7
+ from .utils.io import (
8
+ load_infer_data
9
+ )
10
+ from .utils.vis import (
11
+ colorize_distance,
12
+ concatenate_images
13
+ )
14
+ from .utils.d2pc import (
15
+ distance2pointcloud
16
+ )
17
+
18
+ __all__ = [
19
+ 'prepare_to_run',
20
+ 'load_model',
21
+ 'load_infer_data',
22
+ 'colorize_distance',
23
+ 'concatenate_images',
24
+ 'distance2pointcloud'
25
+ ]
src/da2/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (494 Bytes). View file
 
src/da2/model/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .spherevit import (
2
+ SphereViT
3
+ )
4
+ from .vit_w_esphere import (
5
+ ViT_w_Esphere
6
+ )
7
+
8
+ __all__ = [
9
+ 'SphereViT',
10
+ 'ViT_w_Esphere',
11
+ ]
src/da2/model/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (294 Bytes). View file
 
src/da2/model/__pycache__/base.cpython-312.pyc ADDED
Binary file (18.4 kB). View file
 
src/da2/model/__pycache__/sphere.cpython-312.pyc ADDED
Binary file (2.38 kB). View file
 
src/da2/model/__pycache__/spherevit.cpython-312.pyc ADDED
Binary file (3.95 kB). View file
 
src/da2/model/__pycache__/vit_w_esphere.cpython-312.pyc ADDED
Binary file (10.5 kB). View file
 
src/da2/model/base.py ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from math import log2, pi
4
+ from typing import Tuple
5
+ import torch.nn.functional as F
6
+ from einops import rearrange
7
+ from functools import partial
8
+
9
+
10
+ def fourier_dimension_expansion(
11
+ x: torch.Tensor,
12
+ dim: int = 512,
13
+ max_freq: int = 64,
14
+ use_cos: bool = True,
15
+ use_log: bool = True,
16
+ ):
17
+ device, dtype, input_dim = x.device, x.dtype, x.shape[-1]
18
+ # input_dim: 2
19
+ num_bands = dim // (2 * input_dim) if use_cos else dim // input_dim
20
+ # num_bands = 512 // 2 = 256
21
+ if use_log:
22
+ scales = 2.0 ** torch.linspace(
23
+ 0.0, log2(max_freq), steps=num_bands, device=device, dtype=dtype
24
+ )
25
+ else:
26
+ scales = torch.linspace(
27
+ 1.0, max_freq / 2, num_bands, device=device, dtype=dtype
28
+ )
29
+ x = x.unsqueeze(-1)
30
+ scales = scales[(*((None,) * (len(x.shape) - 1)), Ellipsis)]
31
+ x = x * scales * pi
32
+ x = torch.cat(
33
+ (
34
+ [x.sin(), x.cos()]
35
+ if use_cos
36
+ else [
37
+ x.sin(),
38
+ ]
39
+ ),
40
+ dim=-1,
41
+ )
42
+ x = x.flatten(-2)
43
+ return x
44
+
45
+ def flatten(
46
+ flat_tensor: torch.Tensor,
47
+ old: Tuple[int, int],
48
+ new: Tuple[int, int],
49
+ ) -> torch.Tensor:
50
+ if old[0] == new[0] and old[1] == new[1]:
51
+ return flat_tensor
52
+ tensor = flat_tensor.view(flat_tensor.shape[0], old[0], old[1], -1).permute(
53
+ 0, 3, 1, 2
54
+ ) # b c h w
55
+ tensor_interp = F.interpolate(
56
+ tensor,
57
+ size=(new[0], new[1]),
58
+ mode='nearest',
59
+ )
60
+ flat_tensor_interp = tensor_interp.view(
61
+ flat_tensor.shape[0], -1, new[0] * new[1]
62
+ ).permute(
63
+ 0, 2, 1
64
+ ) # b (h w) c
65
+ return flat_tensor_interp.contiguous()
66
+
67
+
68
+ class DimensionAligner(nn.Module):
69
+ def __init__(self, input_dims: list[int], hidden_dim: int):
70
+ super().__init__()
71
+ self.aligners = nn.ModuleList([])
72
+ self.num_chunks = len(input_dims)
73
+ self.checkpoint = True
74
+ for input_dim in input_dims:
75
+ self.aligners.append(nn.Linear(input_dim, hidden_dim))
76
+
77
+ def forward(self, xs: torch.Tensor) -> torch.Tensor:
78
+ outs = [self.aligners[i](x) for i, x in enumerate(xs)]
79
+ return outs
80
+
81
+
82
+ class LayerScale(nn.Module):
83
+ def __init__(
84
+ self,
85
+ dim: int,
86
+ init_values: float | torch.Tensor = 1e-5,
87
+ inplace: bool = False,
88
+ ) -> None:
89
+ super().__init__()
90
+ self.inplace = inplace
91
+ self.gamma = nn.Parameter(init_values * torch.ones(dim))
92
+
93
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
94
+ return x.mul_(self.gamma) if self.inplace else x * self.gamma
95
+
96
+
97
+ def exists(val):
98
+ return val is not None
99
+
100
+ def default(val, d):
101
+ if exists(val):
102
+ return val
103
+ return d() if callable(d) else d
104
+
105
+
106
+ class SwiGLU(nn.Module):
107
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
108
+ x, gates = x.chunk(2, dim=-1)
109
+ return x * F.silu(gates)
110
+
111
+
112
+ class MLP(nn.Module):
113
+ def __init__(
114
+ self,
115
+ input_dim: int,
116
+ expansion: int = 4,
117
+ dropout: float = 0.0,
118
+ gated: bool = False,
119
+ output_dim: int | None = None,
120
+ ):
121
+ super().__init__()
122
+ if gated:
123
+ expansion = int(expansion * 2 / 3)
124
+ hidden_dim = int(input_dim * expansion)
125
+ output_dim = default(output_dim, input_dim)
126
+ self.norm = nn.LayerNorm(input_dim)
127
+ self.proj1 = nn.Linear(input_dim, hidden_dim)
128
+ self.proj2 = nn.Linear(hidden_dim, output_dim)
129
+ self.act = nn.GELU() if not gated else SwiGLU()
130
+ self.dropout = nn.Dropout(dropout) if dropout > 0.0 else nn.Identity()
131
+
132
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
133
+ x = self.norm(x)
134
+ x = self.proj1(x)
135
+ x = self.act(x)
136
+ x = self.proj2(x)
137
+ x = self.dropout(x)
138
+ return x
139
+
140
+
141
+ class AttentionBlock(nn.Module):
142
+ def __init__(
143
+ self,
144
+ dim: int,
145
+ num_heads: int = 4,
146
+ expansion: int = 4,
147
+ dropout: float = 0.0,
148
+ cosine: bool = False,
149
+ gated: bool = False,
150
+ layer_scale: float = 1.0,
151
+ context_dim: int | None = None,
152
+ detach_query: bool = False,
153
+ residual_ls: bool = False,
154
+ ):
155
+ super().__init__()
156
+ self.dropout = dropout
157
+ self.num_heads = num_heads
158
+ self.hidden_dim = dim
159
+ context_dim = dim if context_dim is None else context_dim
160
+ self.mlp = MLP(dim, expansion=expansion, dropout=dropout, gated=gated)
161
+ self.kv = nn.Linear(context_dim, dim * 2, bias=False)
162
+ self.q = nn.Linear(dim, dim, bias=False)
163
+ self.norm_attnx = nn.LayerNorm(dim)
164
+ self.norm_attnctx = nn.LayerNorm(context_dim)
165
+ self.cosine = cosine
166
+ self.out = nn.Linear(dim, dim, bias=False)
167
+ self.ls1_1 = (
168
+ LayerScale(dim, layer_scale)
169
+ if layer_scale > 0.0 and not residual_ls
170
+ else nn.Identity()
171
+ )
172
+ self.ls1_2 = (
173
+ LayerScale(dim, layer_scale)
174
+ if layer_scale > 0.0 and residual_ls
175
+ else nn.Identity()
176
+ )
177
+ self.ls2 = LayerScale(dim, layer_scale) if layer_scale > 0.0 else nn.Identity()
178
+ self.detach_query = detach_query
179
+
180
+ def attn(
181
+ self,
182
+ x: torch.Tensor,
183
+ attn_bias: torch.Tensor | None = None,
184
+ context: torch.Tensor | None = None,
185
+ pos_embed: torch.Tensor | None = None,
186
+ pos_embed_context: torch.Tensor | None = None,
187
+ rope: nn.Module | None = None,
188
+ rope_pos: torch.Tensor | None = None,
189
+ ) -> torch.Tensor:
190
+ if self.detach_query:
191
+ x = x.detach()
192
+ x = self.norm_attnx(x)
193
+ context = self.norm_attnctx(context)
194
+ k, v = rearrange(
195
+ self.kv(context), 'b n (kv h d) -> b h n d kv', h=self.num_heads, kv=2
196
+ ).unbind(dim=-1)
197
+ q = rearrange(self.q(x), 'b n (h d) -> b h n d', h=self.num_heads)
198
+
199
+ if rope is not None:
200
+ q = rope(q.permute(0, 2, 1, 3), input_pos=rope_pos).permute(0, 2, 1, 3)
201
+ k = rope(k.permute(0, 2, 1, 3), input_pos=rope_pos).permute(0, 2, 1, 3)
202
+ else:
203
+ if pos_embed is not None:
204
+ pos_embed = rearrange(
205
+ pos_embed, 'b n (h d) -> b h n d', h=self.num_heads
206
+ )
207
+ q = q + pos_embed
208
+ if pos_embed_context is not None:
209
+ pos_embed_context = rearrange(
210
+ pos_embed_context, 'b n (h d) -> b h n d', h=self.num_heads
211
+ )
212
+ k = k + pos_embed_context
213
+
214
+ if self.cosine:
215
+ q, k = map(partial(F.normalize, p=2, dim=-1), (q, k)) # cosine sim
216
+
217
+ x = F.scaled_dot_product_attention(
218
+ q, k, v, dropout_p=self.dropout, attn_mask=attn_bias
219
+ )
220
+ x = rearrange(x, 'b h n d -> b n (h d)')
221
+ x = self.out(x)
222
+ return x
223
+
224
+ def forward(
225
+ self,
226
+ x: torch.Tensor,
227
+ context: torch.Tensor | None = None,
228
+ pos_embed: torch.Tensor | None = None,
229
+ pos_embed_context: torch.Tensor | None = None,
230
+ attn_bias: torch.Tensor | None = None,
231
+ rope: nn.Module | None = None,
232
+ rope_pos: torch.Tensor | None = None,
233
+ ) -> torch.Tensor:
234
+ context = x if context is None else context
235
+ x = self.ls1_1(
236
+ self.attn(
237
+ x,
238
+ rope=rope,
239
+ rope_pos=rope_pos,
240
+ attn_bias=attn_bias,
241
+ context=context,
242
+ pos_embed=pos_embed,
243
+ pos_embed_context=pos_embed_context,
244
+ )
245
+ ) + self.ls1_2(x)
246
+ x = self.ls2(self.mlp(x)) + x
247
+ return x
248
+
249
+
250
+ class AttentionSeq(nn.Module):
251
+ def __init__(
252
+ self,
253
+ num_blocks: int,
254
+ dim: int,
255
+ num_heads: int = 4,
256
+ expansion: int = 4,
257
+ dropout: float = 0.0,
258
+ cosine: bool = False,
259
+ gated: bool = False,
260
+ layer_scale: float = 1.0,
261
+ context_dim: int | None = None,
262
+ detach_query: bool = False,
263
+ residual_ls: bool = False,
264
+ ):
265
+ super().__init__()
266
+ self.layers = nn.ModuleList(
267
+ [
268
+ AttentionBlock(
269
+ dim=dim,
270
+ num_heads=num_heads,
271
+ expansion=expansion,
272
+ dropout=dropout,
273
+ cosine=cosine,
274
+ gated=gated,
275
+ layer_scale=layer_scale,
276
+ context_dim=context_dim,
277
+ detach_query=detach_query,
278
+ residual_ls=residual_ls,
279
+ )
280
+ for _ in range(num_blocks)
281
+ ]
282
+ )
283
+
284
+ def forward(
285
+ self,
286
+ x: torch.Tensor,
287
+ context: torch.Tensor | None = None,
288
+ pos_embed: torch.Tensor | None = None,
289
+ pos_embed_context: torch.Tensor | None = None,
290
+ attn_bias: torch.Tensor | None = None,
291
+ rope: nn.Module | None = None,
292
+ rope_pos: torch.Tensor | None = None,
293
+ ) -> torch.Tensor:
294
+ for layer in self.layers:
295
+ x = layer(
296
+ x,
297
+ context=context,
298
+ pos_embed=pos_embed,
299
+ pos_embed_context=pos_embed_context,
300
+ attn_bias=attn_bias,
301
+ rope=rope,
302
+ rope_pos=rope_pos,
303
+ )
304
+ return x
305
+
306
+
307
+ class ResidualConvNet(nn.Module):
308
+ def __init__(
309
+ self,
310
+ dim,
311
+ kernel_size: int = 3,
312
+ padding_mode: str = 'zeros',
313
+ dilation: int = 1,
314
+ layer_scale: float = 1.0,
315
+ use_norm: bool = False,
316
+ ):
317
+ super().__init__()
318
+ self.conv1 = nn.Conv2d(
319
+ dim,
320
+ dim,
321
+ kernel_size=kernel_size,
322
+ padding=dilation * (kernel_size - 1) // 2,
323
+ dilation=dilation,
324
+ padding_mode=padding_mode,
325
+ )
326
+ self.conv2 = nn.Conv2d(
327
+ dim,
328
+ dim,
329
+ kernel_size=kernel_size,
330
+ padding=dilation * (kernel_size - 1) // 2,
331
+ dilation=dilation,
332
+ padding_mode=padding_mode,
333
+ )
334
+ self.activation = nn.LeakyReLU()
335
+ self.gamma = (
336
+ nn.Parameter(layer_scale * torch.ones(1, dim, 1, 1))
337
+ if layer_scale > 0.0
338
+ else 1.0
339
+ )
340
+ self.norm1 = nn.GroupNorm(dim // 16, dim) if use_norm else nn.Identity()
341
+ self.norm2 = nn.GroupNorm(dim // 16, dim) if use_norm else nn.Identity()
342
+
343
+ def forward(self, x):
344
+ out = self.activation(x)
345
+ out = self.conv1(out)
346
+ out = self.norm1(out)
347
+ out = self.activation(out)
348
+ out = self.conv2(out)
349
+ out = self.norm2(out)
350
+ return self.gamma * out + x
351
+
352
+
353
+ class ResidualUpsampler(nn.Module):
354
+ def __init__(
355
+ self,
356
+ hidden_dim,
357
+ output_dim: int = None,
358
+ num_layers: int = 2,
359
+ kernel_size: int = 3,
360
+ layer_scale: float = 1.0,
361
+ padding_mode: str = 'zeros',
362
+ use_norm: bool = False,
363
+ **kwargs,
364
+ ):
365
+ super().__init__()
366
+ output_dim = output_dim if output_dim is not None else hidden_dim // 2
367
+ self.convs = nn.ModuleList([])
368
+ for _ in range(num_layers):
369
+ self.convs.append(
370
+ ResidualConvNet(
371
+ hidden_dim,
372
+ kernel_size=kernel_size,
373
+ layer_scale=layer_scale,
374
+ padding_mode=padding_mode,
375
+ use_norm=use_norm,
376
+ )
377
+ )
378
+ self.up = nn.Sequential(
379
+ nn.Conv2d(
380
+ hidden_dim,
381
+ output_dim,
382
+ kernel_size=1,
383
+ padding=0,
384
+ padding_mode=padding_mode,
385
+ ),
386
+ nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
387
+ )
388
+
389
+ def forward(self, x: torch.Tensor):
390
+ for conv in self.convs:
391
+ x = conv(x)
392
+ x = self.up(x)
393
+ return x
src/da2/model/dinov2/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from .dinovit import (
8
+ DINOViT
9
+ )
10
+
11
+ __all__ = [
12
+ 'DINOViT'
13
+ ]
src/da2/model/dinov2/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (233 Bytes). View file
 
src/da2/model/dinov2/__pycache__/attention.cpython-312.pyc ADDED
Binary file (4.13 kB). View file
 
src/da2/model/dinov2/__pycache__/block.cpython-312.pyc ADDED
Binary file (13.5 kB). View file