responsible AI
Browse files
app.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
"""
|
| 2 |
-
Advanced 3D Reconstruction from Single Images
|
| 3 |
-
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import numpy as np
|
| 8 |
import torch
|
| 9 |
from PIL import Image
|
| 10 |
-
from transformers import GLPNForDepthEstimation, GLPNImageProcessor
|
| 11 |
import open3d as o3d
|
| 12 |
import plotly.graph_objects as go
|
| 13 |
import matplotlib.pyplot as plt
|
|
@@ -17,182 +17,144 @@ import time
|
|
| 17 |
from pathlib import Path
|
| 18 |
import tempfile
|
| 19 |
import zipfile
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# ============================================================================
|
| 22 |
-
#
|
| 23 |
# ============================================================================
|
| 24 |
-
THEORY_TEXT = """
|
| 25 |
-
## Theoretical Background
|
| 26 |
-
|
| 27 |
-
## About This Tool
|
| 28 |
-
|
| 29 |
-
This application demonstrates how artificial intelligence can convert single 2D photographs into interactive 3D models automatically.
|
| 30 |
-
|
| 31 |
-
### What Makes This Special
|
| 32 |
-
|
| 33 |
-
**Traditional Approach:**
|
| 34 |
-
- Need special equipment (3D scanner, multiple cameras)
|
| 35 |
-
- Requires technical expertise
|
| 36 |
-
- Time-consuming process
|
| 37 |
-
- Expensive
|
| 38 |
-
---
|
| 39 |
-
|
| 40 |
-
## The Technology
|
| 41 |
-
|
| 42 |
-
### AI Models Used
|
| 43 |
-
|
| 44 |
-
This tool uses state-of-the-art artificial intelligence models:
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
### Depth Estimation Technology
|
| 48 |
-
|
| 49 |
-
**GLPN (Global-Local Path Networks)**
|
| 50 |
-
- Paper: Kim et al., CVPR 2022
|
| 51 |
-
- Optimized for: Indoor/outdoor architectural scenes
|
| 52 |
-
- Training: NYU Depth V2 (urban indoor environments)
|
| 53 |
-
- Best for: Building interiors, street-level views, architectural details
|
| 54 |
-
- Geographic advantage: Fast processing for field documentation
|
| 55 |
-
|
| 56 |
-
**DPT (Dense Prediction Transformer)**
|
| 57 |
-
- Paper: Ranftl et al., ICCV 2021
|
| 58 |
-
- Optimized for: Complex urban scenes
|
| 59 |
-
- Training: Multiple datasets (urban and natural environments)
|
| 60 |
-
- Best for: Wide-area urban landscapes, complex built environments
|
| 61 |
-
- Geographic advantage: Superior accuracy for planning-grade documentation
|
| 62 |
-
|
| 63 |
-
### How It Works (Simple)
|
| 64 |
-
1. **AI looks at photo** β Recognizes objects, patterns, perspective
|
| 65 |
-
2. **Estimates distance** β Figures out what's close, what's far
|
| 66 |
-
3. **Creates 3D points** β Places colored dots in 3D space
|
| 67 |
-
4. **Builds surface** β Connects dots into smooth shape
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
**
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
**
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
-
|
| 96 |
-
-
|
| 97 |
-
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
**
|
| 101 |
-
|
| 102 |
-
1. **Scale Ambiguity**: Requires ground control points for absolute measurements
|
| 103 |
-
2. **Single Viewpoint**: Cannot capture occluded facades or hidden spaces
|
| 104 |
-
3. **No Georeferencing**: Outputs in local coordinates, not global (lat/lon)
|
| 105 |
-
4. **Weather Dependent**: Best results with clear, well-lit conditions
|
| 106 |
-
|
| 107 |
-
### Comparison with Traditional Geospatial Methods
|
| 108 |
-
|
| 109 |
-
**vs. Terrestrial Laser Scanning (TLS):**
|
| 110 |
-
- Pro: Much cheaper, faster, more accessible
|
| 111 |
-
- Con: Lower accuracy, no absolute scale
|
| 112 |
-
- Use case: Preliminary surveys, community documentation
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
- Use case: Quick assessments, emergency documentation
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
- Use case: Educational purposes, preliminary studies
|
| 128 |
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
# ============================================================================
|
| 132 |
# MODEL LOADING
|
| 133 |
# ============================================================================
|
| 134 |
|
| 135 |
-
print("Loading GLPN model...")
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
# DPT will be loaded on demand
|
| 141 |
dpt_model = None
|
| 142 |
dpt_processor = None
|
| 143 |
|
| 144 |
# ============================================================================
|
| 145 |
-
# CORE 3D RECONSTRUCTION
|
| 146 |
# ============================================================================
|
| 147 |
|
| 148 |
def process_image(image, model_choice="GLPN (Recommended)", visualization_type="mesh"):
|
| 149 |
-
"""
|
| 150 |
|
| 151 |
def _generate_quality_assessment(metrics):
|
| 152 |
-
"""Generate quality assessment based on metrics"""
|
| 153 |
assessment = []
|
| 154 |
-
|
| 155 |
-
# Check outlier removal
|
| 156 |
outlier_pct = (metrics['outliers_removed'] / metrics['initial_points']) * 100
|
|
|
|
| 157 |
if outlier_pct < 5:
|
| 158 |
-
assessment.append("Very clean depth estimation
|
| 159 |
elif outlier_pct < 15:
|
| 160 |
-
assessment.append("Good depth quality
|
| 161 |
else:
|
| 162 |
assessment.append("High noise in depth estimation")
|
| 163 |
|
| 164 |
-
# Check manifold properties
|
| 165 |
if metrics['is_edge_manifold'] and metrics['is_vertex_manifold']:
|
| 166 |
-
assessment.append("Excellent topology
|
| 167 |
elif metrics['is_vertex_manifold']:
|
| 168 |
-
assessment.append("Good local topology
|
| 169 |
else:
|
| 170 |
-
assessment.append("Topology issues present
|
| 171 |
|
| 172 |
-
# Check watertight
|
| 173 |
if metrics['is_watertight']:
|
| 174 |
assessment.append("Watertight mesh - ready for 3D printing!")
|
| 175 |
else:
|
| 176 |
-
assessment.append("Not watertight -
|
| 177 |
-
|
| 178 |
-
# Check complexity
|
| 179 |
-
if metrics['triangles'] > 1000000:
|
| 180 |
-
assessment.append("Very detailed mesh - may be slow in some software")
|
| 181 |
-
elif metrics['triangles'] > 500000:
|
| 182 |
-
assessment.append("High detail mesh - good quality")
|
| 183 |
-
else:
|
| 184 |
-
assessment.append("Moderate detail - good balance of quality and performance")
|
| 185 |
|
| 186 |
return "\n".join(f"- {item}" for item in assessment)
|
| 187 |
|
| 188 |
-
if
|
| 189 |
-
return None, None, None, "
|
| 190 |
|
| 191 |
try:
|
| 192 |
-
print(
|
| 193 |
|
| 194 |
-
#
|
| 195 |
-
print("Step 1: Preprocessing image...")
|
| 196 |
new_height = 480 if image.height > 480 else image.height
|
| 197 |
new_height -= (new_height % 32)
|
| 198 |
new_width = int(new_height * image.width / image.height)
|
|
@@ -200,19 +162,19 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 200 |
new_width = new_width - diff if diff < 16 else new_width + (32 - diff)
|
| 201 |
new_size = (new_width, new_height)
|
| 202 |
image = image.resize(new_size, Image.LANCZOS)
|
| 203 |
-
print(f"Image resized to: {new_size}")
|
| 204 |
|
| 205 |
-
#
|
| 206 |
-
print("Step 2: Estimating depth...")
|
| 207 |
if model_choice == "GLPN (Recommended)":
|
| 208 |
processor = glpn_processor
|
| 209 |
model = glpn_model
|
| 210 |
-
else:
|
| 211 |
global dpt_model, dpt_processor
|
| 212 |
if dpt_model is None:
|
| 213 |
print("Loading DPT model (first time only)...")
|
|
|
|
| 214 |
dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
| 215 |
dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
|
|
|
| 216 |
processor = dpt_processor
|
| 217 |
model = dpt_model
|
| 218 |
|
|
@@ -222,34 +184,25 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 222 |
with torch.no_grad():
|
| 223 |
outputs = model(**inputs)
|
| 224 |
predicted_depth = outputs.predicted_depth
|
| 225 |
-
|
| 226 |
depth_time = time.time() - start_time
|
| 227 |
-
print(f"Depth estimation completed in {depth_time:.2f}s")
|
| 228 |
|
| 229 |
-
# Process depth
|
| 230 |
pad = 16
|
| 231 |
output = predicted_depth.squeeze().cpu().numpy() * 1000.0
|
| 232 |
output = output[pad:-pad, pad:-pad]
|
| 233 |
image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad))
|
| 234 |
|
| 235 |
-
# Ensure depth and image have same dimensions
|
| 236 |
depth_height, depth_width = output.shape
|
| 237 |
img_width, img_height = image_cropped.size
|
| 238 |
|
| 239 |
-
print(f"After crop - Depth shape: {output.shape}, Image size: {image_cropped.size}")
|
| 240 |
-
|
| 241 |
-
# Resize depth to match image if needed
|
| 242 |
if depth_height != img_height or depth_width != img_width:
|
| 243 |
-
print(f"Resizing depth from ({depth_height}, {depth_width}) to ({img_height}, {img_width})")
|
| 244 |
from scipy import ndimage
|
| 245 |
zoom_factors = (img_height / depth_height, img_width / depth_width)
|
| 246 |
output = ndimage.zoom(output, zoom_factors, order=1)
|
| 247 |
-
print(f"Depth resized to: {output.shape}")
|
| 248 |
|
| 249 |
image = image_cropped
|
| 250 |
|
| 251 |
-
#
|
| 252 |
-
print("Step 3: Creating depth visualization...")
|
| 253 |
fig, ax = plt.subplots(1, 2, figsize=(14, 7))
|
| 254 |
ax[0].imshow(image)
|
| 255 |
ax[0].set_title('Original Image', fontsize=14, fontweight='bold')
|
|
@@ -267,13 +220,10 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 267 |
depth_viz = Image.open(buf)
|
| 268 |
plt.close()
|
| 269 |
|
| 270 |
-
#
|
| 271 |
-
print("Step 4: Generating point cloud...")
|
| 272 |
width, height = image.size
|
| 273 |
|
| 274 |
-
# Ensure depth map matches image size exactly
|
| 275 |
if output.shape != (height, width):
|
| 276 |
-
print(f"Final check - resizing depth from {output.shape} to ({height}, {width})")
|
| 277 |
from scipy import ndimage
|
| 278 |
zoom_factors = (height / output.shape[0], width / output.shape[1])
|
| 279 |
output = ndimage.zoom(output, zoom_factors, order=1)
|
|
@@ -281,8 +231,6 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 281 |
depth_image = (output * 255 / np.max(output)).astype(np.uint8)
|
| 282 |
image_array = np.array(image)
|
| 283 |
|
| 284 |
-
print(f"Creating RGBD - Image: {image_array.shape}, Depth: {depth_image.shape}")
|
| 285 |
-
|
| 286 |
depth_o3d = o3d.geometry.Image(depth_image)
|
| 287 |
image_o3d = o3d.geometry.Image(image_array)
|
| 288 |
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
|
@@ -294,46 +242,35 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 294 |
|
| 295 |
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
|
| 296 |
initial_points = len(pcd.points)
|
| 297 |
-
print(f"Initial point cloud: {initial_points} points")
|
| 298 |
|
| 299 |
-
#
|
| 300 |
-
print("Step 5: Cleaning point cloud...")
|
| 301 |
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
|
| 302 |
pcd = pcd.select_by_index(ind)
|
| 303 |
outliers_removed = initial_points - len(pcd.points)
|
| 304 |
-
print(f"Removed {outliers_removed} outliers")
|
| 305 |
|
| 306 |
-
#
|
| 307 |
-
print("Step 6: Estimating normals...")
|
| 308 |
pcd.estimate_normals()
|
| 309 |
pcd.orient_normals_to_align_with_direction()
|
| 310 |
|
| 311 |
-
#
|
| 312 |
-
print("Step 7: Creating mesh...")
|
| 313 |
mesh_start = time.time()
|
| 314 |
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
|
| 315 |
-
pcd, depth=
|
| 316 |
)[0]
|
| 317 |
|
| 318 |
-
# Transfer colors
|
| 319 |
-
print("Transferring colors to mesh...")
|
| 320 |
pcd_tree = o3d.geometry.KDTreeFlann(pcd)
|
| 321 |
mesh_colors = []
|
| 322 |
for vertex in mesh.vertices:
|
| 323 |
-
# Find nearest point in point cloud
|
| 324 |
[_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1)
|
| 325 |
-
# Get color from nearest point
|
| 326 |
mesh_colors.append(pcd.colors[idx[0]])
|
| 327 |
mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors))
|
| 328 |
|
| 329 |
-
# Rotate mesh
|
| 330 |
rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
|
| 331 |
mesh.rotate(rotation, center=(0, 0, 0))
|
| 332 |
mesh_time = time.time() - mesh_start
|
| 333 |
-
print(f"Mesh created in {mesh_time:.2f}s")
|
| 334 |
|
| 335 |
-
#
|
| 336 |
-
print("Step 8: Computing metrics...")
|
| 337 |
mesh.compute_vertex_normals()
|
| 338 |
|
| 339 |
metrics = {
|
|
@@ -351,68 +288,37 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 351 |
'is_watertight': mesh.is_watertight(),
|
| 352 |
}
|
| 353 |
|
| 354 |
-
#
|
| 355 |
-
surface_area_computed = False
|
| 356 |
try:
|
| 357 |
surface_area = mesh.get_surface_area()
|
| 358 |
-
print(f"Surface area (Open3D method): {surface_area}")
|
| 359 |
if surface_area > 0:
|
| 360 |
metrics['surface_area'] = float(surface_area)
|
| 361 |
-
surface_area_computed = True
|
| 362 |
else:
|
| 363 |
-
print("Open3D returned 0, trying manual calculation...")
|
| 364 |
-
except Exception as e:
|
| 365 |
-
print(f"Open3D surface area failed: {e}")
|
| 366 |
-
|
| 367 |
-
# Fallback: Manual triangle area calculation
|
| 368 |
-
if not surface_area_computed:
|
| 369 |
-
try:
|
| 370 |
vertices = np.asarray(mesh.vertices)
|
| 371 |
triangles = np.asarray(mesh.triangles)
|
| 372 |
-
|
| 373 |
-
# Get vertices for each triangle
|
| 374 |
v0 = vertices[triangles[:, 0]]
|
| 375 |
v1 = vertices[triangles[:, 1]]
|
| 376 |
v2 = vertices[triangles[:, 2]]
|
| 377 |
-
|
| 378 |
-
# Calculate area using cross product
|
| 379 |
cross = np.cross(v1 - v0, v2 - v0)
|
| 380 |
areas = 0.5 * np.linalg.norm(cross, axis=1)
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
print(f"Surface area (manual calculation): {total_area}")
|
| 384 |
-
metrics['surface_area'] = float(total_area)
|
| 385 |
-
surface_area_computed = True
|
| 386 |
-
except Exception as e:
|
| 387 |
-
print(f"Manual surface area calculation failed: {e}")
|
| 388 |
-
metrics['surface_area'] = "Unable to compute"
|
| 389 |
-
|
| 390 |
-
if not surface_area_computed:
|
| 391 |
metrics['surface_area'] = "Unable to compute"
|
| 392 |
|
| 393 |
-
#
|
| 394 |
try:
|
| 395 |
if mesh.is_watertight():
|
| 396 |
-
volume = mesh.get_volume()
|
| 397 |
-
metrics['volume'] = float(volume)
|
| 398 |
else:
|
| 399 |
metrics['volume'] = None
|
| 400 |
-
except
|
| 401 |
-
print(f"Could not compute volume: {e}")
|
| 402 |
metrics['volume'] = None
|
| 403 |
|
| 404 |
-
|
| 405 |
-
print(f"Final surface area: {metrics['surface_area']}")
|
| 406 |
-
print(f"Edge manifold: {metrics['is_edge_manifold']}")
|
| 407 |
-
print(f"Watertight: {metrics['is_watertight']}")
|
| 408 |
-
|
| 409 |
-
# STEP 9: Create 3D visualization
|
| 410 |
-
print("Step 9: Creating 3D visualization...")
|
| 411 |
points = np.asarray(pcd.points)
|
| 412 |
colors = np.asarray(pcd.colors)
|
| 413 |
|
| 414 |
if visualization_type == "point_cloud":
|
| 415 |
-
# Only point cloud
|
| 416 |
scatter = go.Scatter3d(
|
| 417 |
x=points[:, 0], y=points[:, 1], z=points[:, 2],
|
| 418 |
mode='markers',
|
|
@@ -424,23 +330,18 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 424 |
name='Point Cloud'
|
| 425 |
)
|
| 426 |
|
| 427 |
-
|
|
|
|
| 428 |
scene=dict(
|
| 429 |
xaxis=dict(visible=False),
|
| 430 |
yaxis=dict(visible=False),
|
| 431 |
zaxis=dict(visible=False),
|
| 432 |
-
aspectmode='data'
|
| 433 |
-
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 434 |
),
|
| 435 |
-
margin=dict(l=0, r=0, t=30, b=0),
|
| 436 |
height=700,
|
| 437 |
title="Point Cloud"
|
| 438 |
)
|
| 439 |
-
|
| 440 |
-
plotly_fig = go.Figure(data=[scatter], layout=layout)
|
| 441 |
-
|
| 442 |
-
elif visualization_type == "mesh":
|
| 443 |
-
# Only mesh
|
| 444 |
vertices = np.asarray(mesh.vertices)
|
| 445 |
triangles = np.asarray(mesh.triangles)
|
| 446 |
|
|
@@ -453,139 +354,47 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 453 |
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 454 |
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 455 |
vertexcolor=colors_rgb,
|
| 456 |
-
opacity=0.95
|
| 457 |
-
name='Mesh',
|
| 458 |
-
lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2),
|
| 459 |
-
lightposition=dict(x=100, y=100, z=100)
|
| 460 |
)
|
| 461 |
else:
|
| 462 |
mesh_trace = go.Mesh3d(
|
| 463 |
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 464 |
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 465 |
color='lightblue',
|
| 466 |
-
opacity=0.9
|
| 467 |
-
name='Mesh'
|
| 468 |
)
|
| 469 |
|
| 470 |
-
|
|
|
|
| 471 |
scene=dict(
|
| 472 |
xaxis=dict(visible=False),
|
| 473 |
yaxis=dict(visible=False),
|
| 474 |
zaxis=dict(visible=False),
|
| 475 |
-
aspectmode='data'
|
| 476 |
-
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 477 |
),
|
| 478 |
-
margin=dict(l=0, r=0, t=30, b=0),
|
| 479 |
height=700,
|
| 480 |
title="3D Mesh"
|
| 481 |
)
|
| 482 |
-
|
| 483 |
-
plotly_fig = go.Figure(data=[mesh_trace], layout=layout)
|
| 484 |
-
|
| 485 |
-
else: # both
|
| 486 |
-
# Create side-by-side subplots
|
| 487 |
-
from plotly.subplots import make_subplots
|
| 488 |
-
|
| 489 |
-
vertices = np.asarray(mesh.vertices)
|
| 490 |
-
triangles = np.asarray(mesh.triangles)
|
| 491 |
-
|
| 492 |
-
# Point cloud scatter
|
| 493 |
-
scatter = go.Scatter3d(
|
| 494 |
-
x=points[:, 0], y=points[:, 1], z=points[:, 2],
|
| 495 |
-
mode='markers',
|
| 496 |
-
marker=dict(
|
| 497 |
-
size=2,
|
| 498 |
-
color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 499 |
-
for r, g, b in colors],
|
| 500 |
-
),
|
| 501 |
-
name='Point Cloud'
|
| 502 |
-
)
|
| 503 |
-
|
| 504 |
-
# Mesh trace
|
| 505 |
-
if mesh.has_vertex_colors():
|
| 506 |
-
vertex_colors = np.asarray(mesh.vertex_colors)
|
| 507 |
-
colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 508 |
-
for r, g, b in vertex_colors]
|
| 509 |
-
|
| 510 |
-
mesh_trace = go.Mesh3d(
|
| 511 |
-
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 512 |
-
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 513 |
-
vertexcolor=colors_rgb,
|
| 514 |
-
opacity=0.95,
|
| 515 |
-
name='Mesh',
|
| 516 |
-
lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2),
|
| 517 |
-
lightposition=dict(x=100, y=100, z=100)
|
| 518 |
-
)
|
| 519 |
-
else:
|
| 520 |
-
mesh_trace = go.Mesh3d(
|
| 521 |
-
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 522 |
-
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 523 |
-
color='lightblue',
|
| 524 |
-
opacity=0.9,
|
| 525 |
-
name='Mesh'
|
| 526 |
-
)
|
| 527 |
-
|
| 528 |
-
# Create side-by-side subplots
|
| 529 |
-
plotly_fig = make_subplots(
|
| 530 |
-
rows=1, cols=2,
|
| 531 |
-
specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]],
|
| 532 |
-
subplot_titles=('Point Cloud', '3D Mesh'),
|
| 533 |
-
horizontal_spacing=0.05
|
| 534 |
-
)
|
| 535 |
-
|
| 536 |
-
# Add traces
|
| 537 |
-
plotly_fig.add_trace(scatter, row=1, col=1)
|
| 538 |
-
plotly_fig.add_trace(mesh_trace, row=1, col=2)
|
| 539 |
-
|
| 540 |
-
# Update layout
|
| 541 |
-
plotly_fig.update_layout(
|
| 542 |
-
scene=dict(
|
| 543 |
-
xaxis=dict(visible=False),
|
| 544 |
-
yaxis=dict(visible=False),
|
| 545 |
-
zaxis=dict(visible=False),
|
| 546 |
-
aspectmode='data',
|
| 547 |
-
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 548 |
-
),
|
| 549 |
-
scene2=dict(
|
| 550 |
-
xaxis=dict(visible=False),
|
| 551 |
-
yaxis=dict(visible=False),
|
| 552 |
-
zaxis=dict(visible=False),
|
| 553 |
-
aspectmode='data',
|
| 554 |
-
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 555 |
-
),
|
| 556 |
-
height=600,
|
| 557 |
-
showlegend=False,
|
| 558 |
-
margin=dict(l=0, r=0, t=50, b=0)
|
| 559 |
-
)
|
| 560 |
-
|
| 561 |
-
print("3D visualization created!")
|
| 562 |
|
| 563 |
-
#
|
| 564 |
-
print("Step 10: Exporting files...")
|
| 565 |
temp_dir = tempfile.mkdtemp()
|
| 566 |
|
| 567 |
-
# Save point cloud
|
| 568 |
pcd_path = Path(temp_dir) / "point_cloud.ply"
|
| 569 |
o3d.io.write_point_cloud(str(pcd_path), pcd)
|
| 570 |
|
| 571 |
-
# Save mesh
|
| 572 |
mesh_path = Path(temp_dir) / "mesh.ply"
|
| 573 |
o3d.io.write_triangle_mesh(str(mesh_path), mesh)
|
| 574 |
|
| 575 |
-
# Save mesh as OBJ
|
| 576 |
mesh_obj_path = Path(temp_dir) / "mesh.obj"
|
| 577 |
o3d.io.write_triangle_mesh(str(mesh_obj_path), mesh)
|
| 578 |
|
| 579 |
-
# Save mesh as STL
|
| 580 |
mesh_stl_path = Path(temp_dir) / "mesh.stl"
|
| 581 |
o3d.io.write_triangle_mesh(str(mesh_stl_path), mesh)
|
| 582 |
|
| 583 |
-
# Save metrics
|
| 584 |
metrics_path = Path(temp_dir) / "metrics.json"
|
| 585 |
with open(metrics_path, 'w') as f:
|
| 586 |
json.dump(metrics, f, indent=2, default=str)
|
| 587 |
|
| 588 |
-
# Create zip
|
| 589 |
zip_path = Path(temp_dir) / "reconstruction_complete.zip"
|
| 590 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 591 |
zipf.write(pcd_path, pcd_path.name)
|
|
@@ -594,78 +403,111 @@ def process_image(image, model_choice="GLPN (Recommended)", visualization_type="
|
|
| 594 |
zipf.write(mesh_stl_path, mesh_stl_path.name)
|
| 595 |
zipf.write(metrics_path, metrics_path.name)
|
| 596 |
|
| 597 |
-
print("Files exported!")
|
| 598 |
-
|
| 599 |
-
# Create metrics report
|
| 600 |
assessment = _generate_quality_assessment(metrics)
|
| 601 |
|
| 602 |
report = f"""
|
| 603 |
## Reconstruction Complete!
|
| 604 |
|
| 605 |
-
### Performance
|
| 606 |
-
- **
|
| 607 |
-
- **
|
| 608 |
-
- **Mesh Reconstruction Time**: {metrics['mesh_reconstruction_time']}
|
| 609 |
-
- **Total Processing Time**: {metrics['total_time']}
|
| 610 |
-
|
| 611 |
-
### Point Cloud Statistics
|
| 612 |
-
- **Initial Points**: {metrics['initial_points']:,}
|
| 613 |
-
- **Outliers Removed**: {metrics['outliers_removed']:,} ({(metrics['outliers_removed']/metrics['initial_points']*100):.1f}%)
|
| 614 |
-
- **Final Points**: {metrics['final_points']:,}
|
| 615 |
-
|
| 616 |
-
### Mesh Quality
|
| 617 |
-
- **Vertices**: {metrics['vertices']:,}
|
| 618 |
- **Triangles**: {metrics['triangles']:,}
|
| 619 |
-
- **Edge Manifold**: {'β Good topology' if metrics['is_edge_manifold'] else 'β Has non-manifold edges'}
|
| 620 |
-
- **Vertex Manifold**: {'β Clean vertices' if metrics['is_vertex_manifold'] else 'β Has non-manifold vertices'}
|
| 621 |
-
- **Watertight**: {'β Closed surface (3D printable)' if metrics['is_watertight'] else 'β Has boundaries (needs repair for 3D printing)'}
|
| 622 |
-
- **Surface Area**: {metrics['surface_area'] if isinstance(metrics['surface_area'], str) else f"{metrics['surface_area']:.2f}"}
|
| 623 |
-
- **Volume**: {f"{metrics['volume']:.2f}" if metrics.get('volume') else 'N/A (not watertight)'}
|
| 624 |
|
| 625 |
-
### Quality
|
| 626 |
-
{
|
|
|
|
| 627 |
|
| 628 |
-
###
|
| 629 |
-
|
| 630 |
-
- Mesh: PLY, OBJ, STL formats
|
| 631 |
-
- Quality Metrics: JSON
|
| 632 |
|
| 633 |
**Download the complete package below!**
|
| 634 |
"""
|
| 635 |
|
| 636 |
-
print("SUCCESS! Returning results...")
|
| 637 |
return depth_viz, plotly_fig, str(zip_path), report, json.dumps(metrics, indent=2, default=str)
|
| 638 |
|
| 639 |
except Exception as e:
|
| 640 |
import traceback
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
|
| 645 |
# ============================================================================
|
| 646 |
# GRADIO INTERFACE
|
| 647 |
# ============================================================================
|
| 648 |
|
| 649 |
-
with gr.Blocks(title="
|
| 650 |
|
| 651 |
gr.Markdown("""
|
| 652 |
-
# ποΈ 3D
|
| 653 |
|
| 654 |
|
| 655 |
Transform 2D photographs into 3D spatial models
|
| 656 |
|
| 657 |
-
|
|
|
|
|
|
|
|
|
|
| 658 |
""")
|
| 659 |
|
| 660 |
with gr.Tabs():
|
| 661 |
|
| 662 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
with gr.Tab("Reconstruction"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 664 |
with gr.Row():
|
| 665 |
with gr.Column(scale=1):
|
| 666 |
-
input_image = gr.Image(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
-
gr.Markdown("### Model Settings")
|
| 669 |
model_choice = gr.Radio(
|
| 670 |
choices=["GLPN (Recommended)", "DPT (High Quality)"],
|
| 671 |
value="GLPN (Recommended)",
|
|
@@ -673,153 +515,211 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de
|
|
| 673 |
)
|
| 674 |
|
| 675 |
visualization_type = gr.Radio(
|
| 676 |
-
choices=["mesh", "point_cloud"
|
| 677 |
value="mesh",
|
| 678 |
-
label="
|
| 679 |
)
|
| 680 |
|
| 681 |
reconstruct_btn = gr.Button("Start Reconstruction", variant="primary", size="lg")
|
| 682 |
|
| 683 |
with gr.Column(scale=2):
|
| 684 |
-
depth_output = gr.Image(label="Depth Map
|
| 685 |
-
viewer_3d = gr.Plot(label="Interactive 3D Viewer
|
| 686 |
|
| 687 |
with gr.Row():
|
| 688 |
with gr.Column():
|
| 689 |
-
metrics_output = gr.Markdown(label="
|
| 690 |
with gr.Column():
|
| 691 |
-
json_output = gr.Textbox(label="
|
| 692 |
|
| 693 |
-
|
| 694 |
-
download_output = gr.File(label="Download Complete Package (ZIP)")
|
| 695 |
|
| 696 |
reconstruct_btn.click(
|
| 697 |
-
fn=
|
| 698 |
-
inputs=[input_image, model_choice, visualization_type],
|
| 699 |
outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output]
|
| 700 |
)
|
| 701 |
|
| 702 |
-
# ========== THEORY TAB ==========
|
| 703 |
with gr.Tab("Theory & Background"):
|
| 704 |
-
gr.Markdown(THEORY_TEXT)
|
| 705 |
-
|
| 706 |
gr.Markdown("""
|
| 707 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
|
| 709 |
-
|
|
|
|
|
|
|
| 710 |
|
| 711 |
-
|
| 712 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
3. **Depth Visualization**: Create comparison images
|
| 714 |
-
4. **Point Cloud Generation**: Back-project using
|
| 715 |
-
5. **Outlier Removal**: Statistical filtering
|
| 716 |
-
6. **Normal Estimation**:
|
| 717 |
-
7. **Mesh Reconstruction**: Poisson surface reconstruction
|
| 718 |
-
8. **Quality Metrics**: Compute
|
| 719 |
-
9. **3D Visualization**: Create interactive
|
| 720 |
-
10. **File Export**: Generate
|
| 721 |
-
|
| 722 |
-
###
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
- **Normal Radius**: 0.1 (search radius)
|
| 729 |
-
|
| 730 |
-
These parameters are optimized for general use cases and provide good results for most indoor scenes.
|
| 731 |
-
|
| 732 |
-
## Key References
|
| 733 |
-
|
| 734 |
-
1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth." *CVPR 2022*
|
| 735 |
-
2. **Ranftl, R., et al. (2021)**. "Vision Transformers for Dense Prediction." *ICCV 2021*
|
| 736 |
-
3. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction." *Eurographics Symposium on Geometry Processing*
|
| 737 |
-
|
| 738 |
-
## Model Comparison
|
| 739 |
-
|
| 740 |
-
| Feature | GLPN (Recommended) | DPT (High Quality) |
|
| 741 |
-
|---------|-------------------|-------------------|
|
| 742 |
-
| **Speed** | Fast (~0.3-2.5s) | Slower (~0.8-6.5s) |
|
| 743 |
-
| **Quality** | Good | Excellent |
|
| 744 |
-
| **Memory** | Low (~2GB) | High (~5GB) |
|
| 745 |
-
| **Best For** | Indoor scenes, Real-time | Complex scenes, Highest quality |
|
| 746 |
-
| **Training** | NYU Depth V2 | Multiple datasets |
|
| 747 |
-
|
| 748 |
-
### When to Use Each Model:
|
| 749 |
-
|
| 750 |
-
**Choose GLPN if:**
|
| 751 |
-
- Processing indoor scenes (rooms, furniture)
|
| 752 |
-
- Speed is important
|
| 753 |
-
- Running on limited hardware
|
| 754 |
-
- Need real-time performance
|
| 755 |
-
|
| 756 |
-
**Choose DPT if:**
|
| 757 |
-
- Need highest quality results
|
| 758 |
-
- Processing complex/outdoor scenes
|
| 759 |
-
- Speed is not critical
|
| 760 |
-
- Have sufficient memory/GPU
|
| 761 |
""")
|
| 762 |
|
| 763 |
-
# ========== USAGE GUIDE TAB ==========
|
| 764 |
with gr.Tab("Usage Guide"):
|
| 765 |
gr.Markdown("""
|
| 766 |
## How to Use This Application
|
| 767 |
|
| 768 |
-
### Step 1:
|
| 769 |
-
-
|
| 770 |
-
-
|
| 771 |
-
-
|
| 772 |
-
-
|
| 773 |
-
|
| 774 |
-
### Step 2:
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
-
|
| 786 |
-
-
|
| 787 |
-
-
|
| 788 |
-
|
| 789 |
-
### Step
|
| 790 |
-
- Click "
|
| 791 |
-
-
|
| 792 |
-
-
|
| 793 |
-
-
|
| 794 |
-
|
| 795 |
-
### Step
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
-
|
| 806 |
-
-
|
| 807 |
-
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
-
|
| 813 |
-
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 823 |
|
| 824 |
## Viewing Downloaded 3D Files
|
| 825 |
|
|
@@ -845,31 +745,27 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de
|
|
| 845 |
- https://www.creators3d.com/online-viewer
|
| 846 |
- Just drag and drop your OBJ/PLY file
|
| 847 |
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
2. Check metrics: Look for "Watertight: β"
|
| 851 |
-
3. Import into your slicer (Cura, PrusaSlicer, etc.)
|
| 852 |
-
4. Scale to desired size
|
| 853 |
-
5. Slice and print!
|
| 854 |
|
| 855 |
## Tips for Best Results
|
| 856 |
|
| 857 |
### DO:
|
| 858 |
-
- Use well-lit images
|
| 859 |
-
- Include
|
| 860 |
-
-
|
| 861 |
-
-
|
| 862 |
-
-
|
| 863 |
-
-
|
| 864 |
|
| 865 |
### AVOID:
|
| 866 |
-
- Motion blur or
|
| 867 |
-
- Reflective surfaces (mirrors,
|
| 868 |
-
-
|
| 869 |
-
-
|
| 870 |
-
-
|
| 871 |
-
-
|
| 872 |
-
|
| 873 |
|
| 874 |
## Understanding the Metrics
|
| 875 |
|
|
@@ -879,16 +775,18 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de
|
|
| 879 |
- **Final Points**: Clean points used for mesh generation
|
| 880 |
|
| 881 |
### Mesh Quality Indicators:
|
| 882 |
-
-
|
| 883 |
-
-
|
| 884 |
-
-
|
| 885 |
-
-
|
| 886 |
|
| 887 |
### Processing Times:
|
| 888 |
-
- **Depth Estimation**: 0.3-
|
| 889 |
- **Mesh Reconstruction**: 2-10s (depends on point cloud size)
|
| 890 |
- **Total Time**: Usually 10-60 seconds
|
| 891 |
|
|
|
|
|
|
|
| 892 |
## Troubleshooting
|
| 893 |
|
| 894 |
**Problem: No output appears**
|
|
@@ -908,7 +806,6 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de
|
|
| 908 |
- Colors on point cloud are more accurate
|
| 909 |
|
| 910 |
**Problem: Processing is very slow**
|
| 911 |
-
- Try GLPN model instead of DPT
|
| 912 |
- Use smaller images
|
| 913 |
- This is normal on CPU (GPU is much faster)
|
| 914 |
|
|
@@ -916,11 +813,97 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de
|
|
| 916 |
- Common for complex scenes
|
| 917 |
- Still usable for visualization
|
| 918 |
- For 3D printing: use mesh repair in MeshLab
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
with gr.Tab("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
gr.Markdown("""
|
| 923 |
-
|
| 924 |
|
| 925 |
### For GLPN Model:
|
| 926 |
```bibtex
|
|
@@ -932,43 +915,64 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de
|
|
| 932 |
}
|
| 933 |
```
|
| 934 |
|
| 935 |
-
### For
|
| 936 |
```bibtex
|
| 937 |
-
@inproceedings{
|
| 938 |
-
title={
|
| 939 |
-
author={
|
| 940 |
-
booktitle={
|
| 941 |
-
year={
|
| 942 |
}
|
| 943 |
```
|
| 944 |
|
| 945 |
## Open Source Components
|
| 946 |
|
| 947 |
This application is built with:
|
| 948 |
-
|
|
|
|
| 949 |
- **Open3D**: Point cloud and mesh processing
|
| 950 |
- **PyTorch**: Deep learning framework
|
| 951 |
- **Plotly**: Interactive 3D visualization
|
| 952 |
-
- **Gradio**: Web interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 953 |
|
|
|
|
| 954 |
|
| 955 |
""")
|
| 956 |
|
| 957 |
-
# ========== FOOTER ==========
|
| 958 |
gr.Markdown("""
|
| 959 |
---
|
| 960 |
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
- Research methodology and evaluation
|
| 966 |
-
|
| 967 |
""")
|
| 968 |
|
| 969 |
-
# ============================================================================
|
| 970 |
-
# LAUNCH
|
| 971 |
-
# ============================================================================
|
| 972 |
-
|
| 973 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
demo.launch(share=True)
|
|
|
|
| 1 |
"""
|
| 2 |
+
Advanced 3D Reconstruction from Single Images with Responsible AI Features
|
| 3 |
+
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import numpy as np
|
| 8 |
import torch
|
| 9 |
from PIL import Image
|
| 10 |
+
from transformers import GLPNForDepthEstimation, GLPNImageProcessor
|
| 11 |
import open3d as o3d
|
| 12 |
import plotly.graph_objects as go
|
| 13 |
import matplotlib.pyplot as plt
|
|
|
|
| 17 |
from pathlib import Path
|
| 18 |
import tempfile
|
| 19 |
import zipfile
|
| 20 |
+
import hashlib
|
| 21 |
+
from datetime import datetime
|
| 22 |
|
| 23 |
# ============================================================================
|
| 24 |
+
# RESPONSIBLE AI GUIDELINES
|
| 25 |
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
RESPONSIBLE_AI_NOTICE = """
|
| 28 |
+
## β οΈ Responsible Use Guidelines
|
| 29 |
+
|
| 30 |
+
### Privacy & Consent
|
| 31 |
+
- **Do not upload images containing identifiable people without their explicit consent**
|
| 32 |
+
- **Do not use for surveillance, tracking, or monitoring individuals**
|
| 33 |
+
- Facial features may be reconstructed in 3D - consider privacy implications
|
| 34 |
+
- Remove metadata (EXIF) that may contain location or personal information
|
| 35 |
+
|
| 36 |
+
### Ethical Use
|
| 37 |
+
- This tool is for **educational, research, and creative purposes only**
|
| 38 |
+
- **Prohibited uses:**
|
| 39 |
+
- Creating deepfakes or misleading 3D content
|
| 40 |
+
- Unauthorized documentation of private property
|
| 41 |
+
- Circumventing security systems
|
| 42 |
+
- Generating 3D models for harassment or stalking
|
| 43 |
+
- Commercial use without proper rights to source images
|
| 44 |
+
|
| 45 |
+
### Limitations & Bias
|
| 46 |
+
- Models trained primarily on indoor Western architecture
|
| 47 |
+
- May perform poorly on non-Western architectural styles
|
| 48 |
+
- Scale is relative, not absolute - not suitable for precision measurements
|
| 49 |
+
- Single viewpoint limitations - occluded areas are inferred, not captured
|
| 50 |
+
|
| 51 |
+
### Data Usage
|
| 52 |
+
- Images are processed locally during your session
|
| 53 |
+
- No images are stored or transmitted to external servers
|
| 54 |
+
- Processing logs contain only technical metrics, no image content
|
| 55 |
+
- You retain all rights to your uploaded images and generated 3D models
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
**By using this tool, you agree to these responsible use guidelines.**
|
| 59 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
# ============================================================================
|
| 62 |
+
# PRIVACY & SAFETY FUNCTIONS
|
| 63 |
+
# ============================================================================
|
|
|
|
| 64 |
|
| 65 |
+
def check_image_safety(image):
|
| 66 |
+
"""Basic safety checks for uploaded images"""
|
| 67 |
+
warnings = []
|
| 68 |
+
|
| 69 |
+
width, height = image.size
|
| 70 |
+
if width * height > 10_000_000:
|
| 71 |
+
warnings.append("β οΈ Very large image - consider resizing to improve processing speed")
|
| 72 |
+
|
| 73 |
+
aspect_ratio = max(width, height) / min(width, height)
|
| 74 |
+
if aspect_ratio > 3:
|
| 75 |
+
warnings.append("β οΈ Unusual aspect ratio detected - ensure image doesn't contain unintended content")
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
exif = image.getexif()
|
| 79 |
+
if exif:
|
| 80 |
+
has_gps = any(k for k in exif.keys() if k in [34853, 0x8825])
|
| 81 |
+
if has_gps:
|
| 82 |
+
warnings.append("β οΈ GPS location data detected in image - consider removing EXIF data for privacy")
|
| 83 |
+
except:
|
| 84 |
+
pass
|
| 85 |
+
|
| 86 |
+
return True, "\n".join(warnings) if warnings else None
|
| 87 |
|
| 88 |
+
def generate_session_id():
|
| 89 |
+
"""Generate anonymous session ID for logging"""
|
| 90 |
+
return hashlib.sha256(str(datetime.now()).encode()).hexdigest()[:16]
|
|
|
|
| 91 |
|
| 92 |
+
def content_policy_check(image):
|
| 93 |
+
"""Check if image content violates usage policies"""
|
| 94 |
+
width, height = image.size
|
| 95 |
+
|
| 96 |
+
if width < 100 or height < 100:
|
| 97 |
+
return False, "Image too small - minimum 100x100 pixels required for meaningful reconstruction"
|
| 98 |
+
|
| 99 |
+
return True, None
|
| 100 |
|
| 101 |
# ============================================================================
|
| 102 |
# MODEL LOADING
|
| 103 |
# ============================================================================
|
| 104 |
|
| 105 |
+
print("Loading GLPN model (lightweight)...")
|
| 106 |
+
try:
|
| 107 |
+
glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
|
| 108 |
+
glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")
|
| 109 |
+
print("β GLPN model loaded successfully!")
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"Error loading model: {e}")
|
| 112 |
+
glpn_processor = None
|
| 113 |
+
glpn_model = None
|
| 114 |
|
| 115 |
# DPT will be loaded on demand
|
| 116 |
dpt_model = None
|
| 117 |
dpt_processor = None
|
| 118 |
|
| 119 |
# ============================================================================
|
| 120 |
+
# CORE 3D RECONSTRUCTION
|
| 121 |
# ============================================================================
|
| 122 |
|
| 123 |
def process_image(image, model_choice="GLPN (Recommended)", visualization_type="mesh"):
|
| 124 |
+
"""Optimized processing pipeline"""
|
| 125 |
|
| 126 |
def _generate_quality_assessment(metrics):
|
|
|
|
| 127 |
assessment = []
|
|
|
|
|
|
|
| 128 |
outlier_pct = (metrics['outliers_removed'] / metrics['initial_points']) * 100
|
| 129 |
+
|
| 130 |
if outlier_pct < 5:
|
| 131 |
+
assessment.append("Very clean depth estimation")
|
| 132 |
elif outlier_pct < 15:
|
| 133 |
+
assessment.append("Good depth quality")
|
| 134 |
else:
|
| 135 |
assessment.append("High noise in depth estimation")
|
| 136 |
|
|
|
|
| 137 |
if metrics['is_edge_manifold'] and metrics['is_vertex_manifold']:
|
| 138 |
+
assessment.append("Excellent topology")
|
| 139 |
elif metrics['is_vertex_manifold']:
|
| 140 |
+
assessment.append("Good local topology")
|
| 141 |
else:
|
| 142 |
+
assessment.append("Topology issues present")
|
| 143 |
|
|
|
|
| 144 |
if metrics['is_watertight']:
|
| 145 |
assessment.append("Watertight mesh - ready for 3D printing!")
|
| 146 |
else:
|
| 147 |
+
assessment.append("Not watertight - needs repair for 3D printing")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
return "\n".join(f"- {item}" for item in assessment)
|
| 150 |
|
| 151 |
+
if glpn_model is None:
|
| 152 |
+
return None, None, None, "β Model failed to load. Please refresh the page.", None
|
| 153 |
|
| 154 |
try:
|
| 155 |
+
print("Starting reconstruction...")
|
| 156 |
|
| 157 |
+
# Preprocess
|
|
|
|
| 158 |
new_height = 480 if image.height > 480 else image.height
|
| 159 |
new_height -= (new_height % 32)
|
| 160 |
new_width = int(new_height * image.width / image.height)
|
|
|
|
| 162 |
new_width = new_width - diff if diff < 16 else new_width + (32 - diff)
|
| 163 |
new_size = (new_width, new_height)
|
| 164 |
image = image.resize(new_size, Image.LANCZOS)
|
|
|
|
| 165 |
|
| 166 |
+
# Depth estimation - select model
|
|
|
|
| 167 |
if model_choice == "GLPN (Recommended)":
|
| 168 |
processor = glpn_processor
|
| 169 |
model = glpn_model
|
| 170 |
+
else: # DPT (High Quality)
|
| 171 |
global dpt_model, dpt_processor
|
| 172 |
if dpt_model is None:
|
| 173 |
print("Loading DPT model (first time only)...")
|
| 174 |
+
from transformers import DPTForDepthEstimation, DPTImageProcessor
|
| 175 |
dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
| 176 |
dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
| 177 |
+
print("β DPT model loaded!")
|
| 178 |
processor = dpt_processor
|
| 179 |
model = dpt_model
|
| 180 |
|
|
|
|
| 184 |
with torch.no_grad():
|
| 185 |
outputs = model(**inputs)
|
| 186 |
predicted_depth = outputs.predicted_depth
|
|
|
|
| 187 |
depth_time = time.time() - start_time
|
|
|
|
| 188 |
|
| 189 |
+
# Process depth
|
| 190 |
pad = 16
|
| 191 |
output = predicted_depth.squeeze().cpu().numpy() * 1000.0
|
| 192 |
output = output[pad:-pad, pad:-pad]
|
| 193 |
image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad))
|
| 194 |
|
|
|
|
| 195 |
depth_height, depth_width = output.shape
|
| 196 |
img_width, img_height = image_cropped.size
|
| 197 |
|
|
|
|
|
|
|
|
|
|
| 198 |
if depth_height != img_height or depth_width != img_width:
|
|
|
|
| 199 |
from scipy import ndimage
|
| 200 |
zoom_factors = (img_height / depth_height, img_width / depth_width)
|
| 201 |
output = ndimage.zoom(output, zoom_factors, order=1)
|
|
|
|
| 202 |
|
| 203 |
image = image_cropped
|
| 204 |
|
| 205 |
+
# Depth visualization
|
|
|
|
| 206 |
fig, ax = plt.subplots(1, 2, figsize=(14, 7))
|
| 207 |
ax[0].imshow(image)
|
| 208 |
ax[0].set_title('Original Image', fontsize=14, fontweight='bold')
|
|
|
|
| 220 |
depth_viz = Image.open(buf)
|
| 221 |
plt.close()
|
| 222 |
|
| 223 |
+
# Point cloud generation
|
|
|
|
| 224 |
width, height = image.size
|
| 225 |
|
|
|
|
| 226 |
if output.shape != (height, width):
|
|
|
|
| 227 |
from scipy import ndimage
|
| 228 |
zoom_factors = (height / output.shape[0], width / output.shape[1])
|
| 229 |
output = ndimage.zoom(output, zoom_factors, order=1)
|
|
|
|
| 231 |
depth_image = (output * 255 / np.max(output)).astype(np.uint8)
|
| 232 |
image_array = np.array(image)
|
| 233 |
|
|
|
|
|
|
|
| 234 |
depth_o3d = o3d.geometry.Image(depth_image)
|
| 235 |
image_o3d = o3d.geometry.Image(image_array)
|
| 236 |
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
|
|
|
| 242 |
|
| 243 |
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
|
| 244 |
initial_points = len(pcd.points)
|
|
|
|
| 245 |
|
| 246 |
+
# Clean point cloud
|
|
|
|
| 247 |
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
|
| 248 |
pcd = pcd.select_by_index(ind)
|
| 249 |
outliers_removed = initial_points - len(pcd.points)
|
|
|
|
| 250 |
|
| 251 |
+
# Estimate normals
|
|
|
|
| 252 |
pcd.estimate_normals()
|
| 253 |
pcd.orient_normals_to_align_with_direction()
|
| 254 |
|
| 255 |
+
# Create mesh
|
|
|
|
| 256 |
mesh_start = time.time()
|
| 257 |
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
|
| 258 |
+
pcd, depth=9, n_threads=1
|
| 259 |
)[0]
|
| 260 |
|
| 261 |
+
# Transfer colors
|
|
|
|
| 262 |
pcd_tree = o3d.geometry.KDTreeFlann(pcd)
|
| 263 |
mesh_colors = []
|
| 264 |
for vertex in mesh.vertices:
|
|
|
|
| 265 |
[_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1)
|
|
|
|
| 266 |
mesh_colors.append(pcd.colors[idx[0]])
|
| 267 |
mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors))
|
| 268 |
|
|
|
|
| 269 |
rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
|
| 270 |
mesh.rotate(rotation, center=(0, 0, 0))
|
| 271 |
mesh_time = time.time() - mesh_start
|
|
|
|
| 272 |
|
| 273 |
+
# Metrics
|
|
|
|
| 274 |
mesh.compute_vertex_normals()
|
| 275 |
|
| 276 |
metrics = {
|
|
|
|
| 288 |
'is_watertight': mesh.is_watertight(),
|
| 289 |
}
|
| 290 |
|
| 291 |
+
# Surface area
|
|
|
|
| 292 |
try:
|
| 293 |
surface_area = mesh.get_surface_area()
|
|
|
|
| 294 |
if surface_area > 0:
|
| 295 |
metrics['surface_area'] = float(surface_area)
|
|
|
|
| 296 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
vertices = np.asarray(mesh.vertices)
|
| 298 |
triangles = np.asarray(mesh.triangles)
|
|
|
|
|
|
|
| 299 |
v0 = vertices[triangles[:, 0]]
|
| 300 |
v1 = vertices[triangles[:, 1]]
|
| 301 |
v2 = vertices[triangles[:, 2]]
|
|
|
|
|
|
|
| 302 |
cross = np.cross(v1 - v0, v2 - v0)
|
| 303 |
areas = 0.5 * np.linalg.norm(cross, axis=1)
|
| 304 |
+
metrics['surface_area'] = float(np.sum(areas))
|
| 305 |
+
except:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
metrics['surface_area'] = "Unable to compute"
|
| 307 |
|
| 308 |
+
# Volume
|
| 309 |
try:
|
| 310 |
if mesh.is_watertight():
|
| 311 |
+
metrics['volume'] = float(mesh.get_volume())
|
|
|
|
| 312 |
else:
|
| 313 |
metrics['volume'] = None
|
| 314 |
+
except:
|
|
|
|
| 315 |
metrics['volume'] = None
|
| 316 |
|
| 317 |
+
# 3D visualization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
points = np.asarray(pcd.points)
|
| 319 |
colors = np.asarray(pcd.colors)
|
| 320 |
|
| 321 |
if visualization_type == "point_cloud":
|
|
|
|
| 322 |
scatter = go.Scatter3d(
|
| 323 |
x=points[:, 0], y=points[:, 1], z=points[:, 2],
|
| 324 |
mode='markers',
|
|
|
|
| 330 |
name='Point Cloud'
|
| 331 |
)
|
| 332 |
|
| 333 |
+
plotly_fig = go.Figure(data=[scatter])
|
| 334 |
+
plotly_fig.update_layout(
|
| 335 |
scene=dict(
|
| 336 |
xaxis=dict(visible=False),
|
| 337 |
yaxis=dict(visible=False),
|
| 338 |
zaxis=dict(visible=False),
|
| 339 |
+
aspectmode='data'
|
|
|
|
| 340 |
),
|
|
|
|
| 341 |
height=700,
|
| 342 |
title="Point Cloud"
|
| 343 |
)
|
| 344 |
+
else: # mesh
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
vertices = np.asarray(mesh.vertices)
|
| 346 |
triangles = np.asarray(mesh.triangles)
|
| 347 |
|
|
|
|
| 354 |
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 355 |
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 356 |
vertexcolor=colors_rgb,
|
| 357 |
+
opacity=0.95
|
|
|
|
|
|
|
|
|
|
| 358 |
)
|
| 359 |
else:
|
| 360 |
mesh_trace = go.Mesh3d(
|
| 361 |
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 362 |
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 363 |
color='lightblue',
|
| 364 |
+
opacity=0.9
|
|
|
|
| 365 |
)
|
| 366 |
|
| 367 |
+
plotly_fig = go.Figure(data=[mesh_trace])
|
| 368 |
+
plotly_fig.update_layout(
|
| 369 |
scene=dict(
|
| 370 |
xaxis=dict(visible=False),
|
| 371 |
yaxis=dict(visible=False),
|
| 372 |
zaxis=dict(visible=False),
|
| 373 |
+
aspectmode='data'
|
|
|
|
| 374 |
),
|
|
|
|
| 375 |
height=700,
|
| 376 |
title="3D Mesh"
|
| 377 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
|
| 379 |
+
# Export files
|
|
|
|
| 380 |
temp_dir = tempfile.mkdtemp()
|
| 381 |
|
|
|
|
| 382 |
pcd_path = Path(temp_dir) / "point_cloud.ply"
|
| 383 |
o3d.io.write_point_cloud(str(pcd_path), pcd)
|
| 384 |
|
|
|
|
| 385 |
mesh_path = Path(temp_dir) / "mesh.ply"
|
| 386 |
o3d.io.write_triangle_mesh(str(mesh_path), mesh)
|
| 387 |
|
|
|
|
| 388 |
mesh_obj_path = Path(temp_dir) / "mesh.obj"
|
| 389 |
o3d.io.write_triangle_mesh(str(mesh_obj_path), mesh)
|
| 390 |
|
|
|
|
| 391 |
mesh_stl_path = Path(temp_dir) / "mesh.stl"
|
| 392 |
o3d.io.write_triangle_mesh(str(mesh_stl_path), mesh)
|
| 393 |
|
|
|
|
| 394 |
metrics_path = Path(temp_dir) / "metrics.json"
|
| 395 |
with open(metrics_path, 'w') as f:
|
| 396 |
json.dump(metrics, f, indent=2, default=str)
|
| 397 |
|
|
|
|
| 398 |
zip_path = Path(temp_dir) / "reconstruction_complete.zip"
|
| 399 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 400 |
zipf.write(pcd_path, pcd_path.name)
|
|
|
|
| 403 |
zipf.write(mesh_stl_path, mesh_stl_path.name)
|
| 404 |
zipf.write(metrics_path, metrics_path.name)
|
| 405 |
|
|
|
|
|
|
|
|
|
|
| 406 |
assessment = _generate_quality_assessment(metrics)
|
| 407 |
|
| 408 |
report = f"""
|
| 409 |
## Reconstruction Complete!
|
| 410 |
|
| 411 |
+
### Performance
|
| 412 |
+
- **Processing Time**: {metrics['total_time']}
|
| 413 |
+
- **Points**: {metrics['final_points']:,}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
- **Triangles**: {metrics['triangles']:,}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
+
### Quality
|
| 417 |
+
- **Topology**: {'Good' if metrics['is_vertex_manifold'] else 'Issues'}
|
| 418 |
+
- **Watertight**: {'Yes' if metrics['is_watertight'] else 'No'}
|
| 419 |
|
| 420 |
+
### Assessment
|
| 421 |
+
{assessment}
|
|
|
|
|
|
|
| 422 |
|
| 423 |
**Download the complete package below!**
|
| 424 |
"""
|
| 425 |
|
|
|
|
| 426 |
return depth_viz, plotly_fig, str(zip_path), report, json.dumps(metrics, indent=2, default=str)
|
| 427 |
|
| 428 |
except Exception as e:
|
| 429 |
import traceback
|
| 430 |
+
return None, None, None, f"Error: {str(e)}\n\n{traceback.format_exc()}", None
|
| 431 |
+
|
| 432 |
+
def process_image_with_safeguards(image, model_choice="GLPN (Recommended)", visualization_type="mesh", consent_given=False):
|
| 433 |
+
"""Main processing with safeguards"""
|
| 434 |
+
session_id = generate_session_id()
|
| 435 |
+
|
| 436 |
+
if not consent_given:
|
| 437 |
+
return None, None, None, "**You must agree to the Responsible Use Guidelines first.**", None
|
| 438 |
+
|
| 439 |
+
if image is None:
|
| 440 |
+
return None, None, None, "Please upload an image first.", None
|
| 441 |
+
|
| 442 |
+
is_safe, safety_warning = check_image_safety(image)
|
| 443 |
+
passes_policy, policy_message = content_policy_check(image)
|
| 444 |
+
|
| 445 |
+
if not passes_policy:
|
| 446 |
+
return None, None, None, f"{policy_message}", None
|
| 447 |
+
|
| 448 |
+
try:
|
| 449 |
+
result = process_image(image, model_choice, visualization_type)
|
| 450 |
+
depth_viz, plotly_fig, zip_path, report, json_metrics = result
|
| 451 |
+
|
| 452 |
+
if safety_warning:
|
| 453 |
+
report = f"**Privacy Notice:**\n{safety_warning}\n\n{report}"
|
| 454 |
+
|
| 455 |
+
metrics = json.loads(json_metrics)
|
| 456 |
+
metrics['responsible_ai'] = {
|
| 457 |
+
'session_id': session_id,
|
| 458 |
+
'timestamp': datetime.now().isoformat(),
|
| 459 |
+
'consent_given': True
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
return depth_viz, plotly_fig, zip_path, report, json.dumps(metrics, indent=2)
|
| 463 |
+
|
| 464 |
+
except Exception as e:
|
| 465 |
+
return None, None, None, f"Error: {str(e)}", None
|
| 466 |
|
| 467 |
# ============================================================================
|
| 468 |
# GRADIO INTERFACE
|
| 469 |
# ============================================================================
|
| 470 |
|
| 471 |
+
with gr.Blocks(title="Responsible AI 3D Reconstruction", theme=gr.themes.Soft()) as demo:
|
| 472 |
|
| 473 |
gr.Markdown("""
|
| 474 |
+
# ποΈ 3D Reconstruction from Single Images
|
| 475 |
|
| 476 |
|
| 477 |
Transform 2D photographs into 3D spatial models
|
| 478 |
|
| 479 |
+
<div style="background-color: #fff3cd; border: 2px solid #ffc107; padding: 15px; border-radius: 5px; margin: 10px 0;">
|
| 480 |
+
<h3 style="color: #856404; margin-top: 0;">β οΈ Responsible Use Required</h3>
|
| 481 |
+
<p style="color: #856404; margin-bottom: 0;">This tool must be used ethically and legally. Review the guidelines in the <b>first tab</b>.</p>
|
| 482 |
+
</div>
|
| 483 |
""")
|
| 484 |
|
| 485 |
with gr.Tabs():
|
| 486 |
|
| 487 |
+
with gr.Tab("β οΈ Responsible Use (READ FIRST)"):
|
| 488 |
+
gr.Markdown(RESPONSIBLE_AI_NOTICE)
|
| 489 |
+
gr.Markdown("""
|
| 490 |
+
### Known Limitations & Biases
|
| 491 |
+
- Trained primarily on Western indoor architecture
|
| 492 |
+
- May underperform on non-Western styles
|
| 493 |
+
- Scale is relative, not absolute
|
| 494 |
+
- Single viewpoint captures only visible surfaces
|
| 495 |
+
""")
|
| 496 |
+
|
| 497 |
with gr.Tab("Reconstruction"):
|
| 498 |
+
consent_checkbox = gr.Checkbox(
|
| 499 |
+
label="**I have read and agree to the Responsible Use Guidelines**",
|
| 500 |
+
value=False
|
| 501 |
+
)
|
| 502 |
+
|
| 503 |
with gr.Row():
|
| 504 |
with gr.Column(scale=1):
|
| 505 |
+
input_image = gr.Image(
|
| 506 |
+
type="pil",
|
| 507 |
+
label="Upload Image",
|
| 508 |
+
sources=["upload", "clipboard"]
|
| 509 |
+
)
|
| 510 |
|
|
|
|
| 511 |
model_choice = gr.Radio(
|
| 512 |
choices=["GLPN (Recommended)", "DPT (High Quality)"],
|
| 513 |
value="GLPN (Recommended)",
|
|
|
|
| 515 |
)
|
| 516 |
|
| 517 |
visualization_type = gr.Radio(
|
| 518 |
+
choices=["mesh", "point_cloud"],
|
| 519 |
value="mesh",
|
| 520 |
+
label="Visualization Type"
|
| 521 |
)
|
| 522 |
|
| 523 |
reconstruct_btn = gr.Button("Start Reconstruction", variant="primary", size="lg")
|
| 524 |
|
| 525 |
with gr.Column(scale=2):
|
| 526 |
+
depth_output = gr.Image(label="Depth Map")
|
| 527 |
+
viewer_3d = gr.Plot(label="Interactive 3D Viewer")
|
| 528 |
|
| 529 |
with gr.Row():
|
| 530 |
with gr.Column():
|
| 531 |
+
metrics_output = gr.Markdown(label="Report")
|
| 532 |
with gr.Column():
|
| 533 |
+
json_output = gr.Textbox(label="Metrics (JSON)", lines=8)
|
| 534 |
|
| 535 |
+
download_output = gr.File(label="Download Package (ZIP)")
|
|
|
|
| 536 |
|
| 537 |
reconstruct_btn.click(
|
| 538 |
+
fn=process_image_with_safeguards,
|
| 539 |
+
inputs=[input_image, model_choice, visualization_type, consent_checkbox],
|
| 540 |
outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output]
|
| 541 |
)
|
| 542 |
|
|
|
|
| 543 |
with gr.Tab("Theory & Background"):
|
|
|
|
|
|
|
| 544 |
gr.Markdown("""
|
| 545 |
+
## About This Tool
|
| 546 |
+
|
| 547 |
+
This application demonstrates how artificial intelligence can convert single 2D photographs
|
| 548 |
+
into interactive 3D models automatically.
|
| 549 |
+
|
| 550 |
+
### What Makes This Special
|
| 551 |
+
|
| 552 |
+
**Traditional Approach:**
|
| 553 |
+
- Need special equipment (3D scanner, multiple cameras)
|
| 554 |
+
- Requires technical expertise
|
| 555 |
+
- Time-consuming process
|
| 556 |
+
- Expensive
|
| 557 |
+
|
| 558 |
+
**This AI Approach:**
|
| 559 |
+
- Works with any single photograph
|
| 560 |
+
- No special equipment needed
|
| 561 |
+
- Automatic processing
|
| 562 |
+
- Free and accessible
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
## The Technology
|
| 567 |
+
|
| 568 |
+
### AI Model Used: GLPN
|
| 569 |
+
|
| 570 |
+
**GLPN (Global-Local Path Networks)**
|
| 571 |
+
- Paper: Kim et al., CVPR 2022
|
| 572 |
+
- Optimized for: Indoor/outdoor architectural scenes
|
| 573 |
+
- Training: NYU Depth V2 (urban indoor environments)
|
| 574 |
+
- Best for: Building interiors, street-level views
|
| 575 |
+
- Speed: Fast (~0.3-2.5s)
|
| 576 |
+
|
| 577 |
+
### How It Works (Simplified)
|
| 578 |
+
|
| 579 |
+
1. **AI analyzes photo** β Recognizes objects, patterns, perspective
|
| 580 |
+
2. **Estimates distance** β Figures out what's close, what's far
|
| 581 |
+
3. **Creates 3D points** β Places colored dots in 3D space
|
| 582 |
+
4. **Builds surface** β Connects dots into smooth shape
|
| 583 |
+
|
| 584 |
+
### Spatial Data Pipeline
|
| 585 |
+
|
| 586 |
+
**1. Monocular Depth Estimation**
|
| 587 |
+
- Challenge: Extracting 3D spatial information from 2D photographs
|
| 588 |
+
- Application: Similar to photogrammetry but from single images
|
| 589 |
+
- Output: Relative depth maps for spatial analysis
|
| 590 |
+
|
| 591 |
+
**2. Point Cloud Generation**
|
| 592 |
+
- Creates 3D coordinate system (X, Y, Z) from pixels
|
| 593 |
+
- Each point: Spatial location + RGB color information
|
| 594 |
+
- Compatible with: GIS software, CAD tools, spatial databases
|
| 595 |
+
|
| 596 |
+
**3. 3D Mesh Generation**
|
| 597 |
+
- Creates continuous surface from discrete points
|
| 598 |
+
- Similar to: Digital terrain models (DTMs) for buildings
|
| 599 |
+
- Output formats: Compatible with ArcGIS, QGIS, SketchUp
|
| 600 |
+
|
| 601 |
+
### Quality Metrics Explained
|
| 602 |
+
|
| 603 |
+
- **Point Cloud Density**: Higher points = better spatial resolution
|
| 604 |
+
- **Geometric Accuracy**: Manifold checks ensure valid topology
|
| 605 |
+
- **Surface Continuity**: Watertight meshes = complete volume calculations
|
| 606 |
+
- **Data Fidelity**: Triangle count indicates level of detail
|
| 607 |
+
|
| 608 |
+
### Limitations for Geographic Applications
|
| 609 |
+
|
| 610 |
+
1. **Scale Ambiguity**: Requires ground control points for absolute measurements
|
| 611 |
+
2. **Single Viewpoint**: Cannot capture occluded facades or hidden spaces
|
| 612 |
+
3. **No Georeferencing**: Outputs in local coordinates, not global (lat/lon)
|
| 613 |
+
4. **Weather Dependent**: Best results with clear, well-lit conditions
|
| 614 |
+
|
| 615 |
+
### Comparison with Traditional Methods
|
| 616 |
+
|
| 617 |
+
**vs. Terrestrial Laser Scanning (TLS):**
|
| 618 |
+
- Much cheaper, faster, more accessible
|
| 619 |
+
- Lower accuracy, no absolute scale
|
| 620 |
+
|
| 621 |
+
**vs. Photogrammetry (Structure-from-Motion):**
|
| 622 |
+
- Works with single image, faster processing
|
| 623 |
+
- Less accurate, cannot resolve scale
|
| 624 |
|
| 625 |
+
**vs. LiDAR:**
|
| 626 |
+
- Much lower cost, consumer cameras sufficient
|
| 627 |
+
- Lower precision, no absolute measurements
|
| 628 |
|
| 629 |
+
|
| 630 |
+
|
| 631 |
+
## Reconstruction Pipeline (10 Steps)
|
| 632 |
+
|
| 633 |
+
1. **Image Preprocessing**: Resize to model requirements
|
| 634 |
+
2. **Depth Estimation**: Neural network inference
|
| 635 |
3. **Depth Visualization**: Create comparison images
|
| 636 |
+
4. **Point Cloud Generation**: Back-project using camera model
|
| 637 |
+
5. **Outlier Removal**: Statistical filtering
|
| 638 |
+
6. **Normal Estimation**: Surface orientation calculation
|
| 639 |
+
7. **Mesh Reconstruction**: Poisson surface reconstruction
|
| 640 |
+
8. **Quality Metrics**: Compute geometric measures
|
| 641 |
+
9. **3D Visualization**: Create interactive viewer
|
| 642 |
+
10. **File Export**: Generate multiple formats
|
| 643 |
+
|
| 644 |
+
### Key References
|
| 645 |
+
|
| 646 |
+
1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation
|
| 647 |
+
with Vertical CutDepth." *CVPR 2022*
|
| 648 |
+
2. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction."
|
| 649 |
+
*Eurographics Symposium on Geometry Processing*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
""")
|
| 651 |
|
|
|
|
| 652 |
with gr.Tab("Usage Guide"):
|
| 653 |
gr.Markdown("""
|
| 654 |
## How to Use This Application
|
| 655 |
|
| 656 |
+
### Step 1: Read Responsible Use Guidelines
|
| 657 |
+
- **REQUIRED**: Review the "Responsible Use" tab first
|
| 658 |
+
- Understand privacy implications
|
| 659 |
+
- Acknowledge model limitations and biases
|
| 660 |
+
- Ensure you have rights to use source images
|
| 661 |
+
|
| 662 |
+
### Step 2: Prepare Your Image
|
| 663 |
+
|
| 664 |
+
**Best Practices:**
|
| 665 |
+
- Remove EXIF metadata (GPS, timestamps) for privacy
|
| 666 |
+
- Ensure you have consent if image contains people
|
| 667 |
+
- Use well-lit, clear photographs
|
| 668 |
+
- Recommended resolution: 512-1024 pixels
|
| 669 |
+
- Indoor scenes work best
|
| 670 |
+
|
| 671 |
+
**Privacy Checklist:**
|
| 672 |
+
- [ ] No identifiable people (or consent obtained)
|
| 673 |
+
- [ ] No sensitive/private locations
|
| 674 |
+
- [ ] EXIF data removed
|
| 675 |
+
- [ ] You own rights to the image
|
| 676 |
+
|
| 677 |
+
### Step 3: Upload Image
|
| 678 |
+
- Click "Upload Image" area
|
| 679 |
+
- Select JPG, PNG, or BMP file
|
| 680 |
+
- **Note:** Webcam option removed for privacy protection
|
| 681 |
+
- You can also paste from clipboard
|
| 682 |
+
|
| 683 |
+
### Step 4: Check Consent Box
|
| 684 |
+
- Check "I have read and agree to Responsible Use Guidelines"
|
| 685 |
+
- This confirms you've reviewed ethical guidelines
|
| 686 |
+
- Processing won't start without consent
|
| 687 |
+
|
| 688 |
+
### Step 5: Choose Visualization
|
| 689 |
+
- **Mesh**: Solid 3D surface (recommended)
|
| 690 |
+
- **Point Cloud**: Individual 3D points with colors
|
| 691 |
+
|
| 692 |
+
### Step 6: Start Reconstruction
|
| 693 |
+
- Click "Start Reconstruction"
|
| 694 |
+
- Processing takes 10-60 seconds
|
| 695 |
+
- All processing is local (no cloud upload)
|
| 696 |
+
|
| 697 |
+
### Step 7: Explore Results
|
| 698 |
+
|
| 699 |
+
**Depth Map:**
|
| 700 |
+
- Yellow/Red = Farther objects
|
| 701 |
+
- Purple/Blue = Closer objects
|
| 702 |
+
- Shows AI's depth understanding
|
| 703 |
+
|
| 704 |
+
**3D Viewer:**
|
| 705 |
+
- Rotate: Click and drag
|
| 706 |
+
- Zoom: Scroll wheel
|
| 707 |
+
- Pan: Right-click and drag
|
| 708 |
+
- Reset: Double-click
|
| 709 |
+
|
| 710 |
+
**Metrics Report:**
|
| 711 |
+
- Processing performance
|
| 712 |
+
- Quality indicators
|
| 713 |
+
- Topology validation
|
| 714 |
+
|
| 715 |
+
### Step 8: Download Files
|
| 716 |
+
- ZIP package contains:
|
| 717 |
+
- Point cloud (PLY)
|
| 718 |
+
- Mesh (PLY, OBJ, STL)
|
| 719 |
+
- Quality metrics (JSON)
|
| 720 |
+
- All files include responsible AI metadata
|
| 721 |
+
|
| 722 |
+
|
| 723 |
|
| 724 |
## Viewing Downloaded 3D Files
|
| 725 |
|
|
|
|
| 745 |
- https://www.creators3d.com/online-viewer
|
| 746 |
- Just drag and drop your OBJ/PLY file
|
| 747 |
|
| 748 |
+
|
| 749 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
|
| 751 |
## Tips for Best Results
|
| 752 |
|
| 753 |
### DO:
|
| 754 |
+
- Use well-lit images
|
| 755 |
+
- Include depth cues (corners, edges)
|
| 756 |
+
- Indoor scenes work best
|
| 757 |
+
- Medium resolution (512-1024px)
|
| 758 |
+
- Remove personal metadata
|
| 759 |
+
- Obtain consent for people in images
|
| 760 |
|
| 761 |
### AVOID:
|
| 762 |
+
- Motion blur or low resolution
|
| 763 |
+
- Reflective surfaces (mirrors, glass)
|
| 764 |
+
- Images without consent
|
| 765 |
+
- Private property without permission
|
| 766 |
+
- Surveillance or monitoring purposes
|
| 767 |
+
- Heavy shadows or darkness
|
| 768 |
+
|
| 769 |
|
| 770 |
## Understanding the Metrics
|
| 771 |
|
|
|
|
| 775 |
- **Final Points**: Clean points used for mesh generation
|
| 776 |
|
| 777 |
### Mesh Quality Indicators:
|
| 778 |
+
- ** Edge Manifold**: Each edge connects exactly 2 faces (good topology)
|
| 779 |
+
- ** Vertex Manifold**: Clean vertex connections
|
| 780 |
+
- ** Watertight**: No holes, ready for 3D printing
|
| 781 |
+
- ** Marks**: Indicate potential issues (still usable, may need repair)
|
| 782 |
|
| 783 |
### Processing Times:
|
| 784 |
+
- **Depth Estimation**: 0.3-2.5s (GLPN model)
|
| 785 |
- **Mesh Reconstruction**: 2-10s (depends on point cloud size)
|
| 786 |
- **Total Time**: Usually 10-60 seconds
|
| 787 |
|
| 788 |
+
---
|
| 789 |
+
|
| 790 |
## Troubleshooting
|
| 791 |
|
| 792 |
**Problem: No output appears**
|
|
|
|
| 806 |
- Colors on point cloud are more accurate
|
| 807 |
|
| 808 |
**Problem: Processing is very slow**
|
|
|
|
| 809 |
- Use smaller images
|
| 810 |
- This is normal on CPU (GPU is much faster)
|
| 811 |
|
|
|
|
| 813 |
- Common for complex scenes
|
| 814 |
- Still usable for visualization
|
| 815 |
- For 3D printing: use mesh repair in MeshLab
|
| 816 |
+
""")
|
| 817 |
+
|
| 818 |
+
with gr.Tab(" Ethics & Impact"):
|
| 819 |
+
gr.Markdown("""
|
| 820 |
+
|
| 821 |
+
## Algorithmic Bias & Fairness
|
| 822 |
+
|
| 823 |
+
### Training Data Representation
|
| 824 |
+
|
| 825 |
+
**Geographic Bias:**
|
| 826 |
+
- Heavy representation: North America, Europe
|
| 827 |
+
- Underrepresented: Africa, South Asia, Pacific Islands
|
| 828 |
+
- Impact: Lower accuracy for non-Western architecture
|
| 829 |
+
|
| 830 |
+
**Architectural Style Bias:**
|
| 831 |
+
- Well-represented: Modern interiors, Western buildings
|
| 832 |
+
- Underrepresented: Traditional, vernacular, indigenous structures
|
| 833 |
+
- Impact: May misinterpret non-standard spatial layouts
|
| 834 |
+
|
| 835 |
+
**Socioeconomic Bias:**
|
| 836 |
+
- Training data skewed toward middle/upper-class interiors
|
| 837 |
+
- Limited representation of informal settlements
|
| 838 |
+
- May not generalize well to all socioeconomic contexts
|
| 839 |
+
|
| 840 |
+
|
| 841 |
+
|
| 842 |
+
|
| 843 |
+
|
| 844 |
+
### Potential Harms
|
| 845 |
+
|
| 846 |
+
** Privacy Violations:**
|
| 847 |
+
- Unauthorized 3D reconstruction of private spaces
|
| 848 |
+
- Creating models of individuals without consent
|
| 849 |
+
- Surveillance and tracking applications
|
| 850 |
+
|
| 851 |
+
** Misinformation:**
|
| 852 |
+
- Generating fake 3D evidence
|
| 853 |
+
- Manipulating spatial understanding
|
| 854 |
+
- Creating misleading visualizations
|
| 855 |
+
|
| 856 |
+
** Property Rights:**
|
| 857 |
+
- Unauthorized documentation of copyrighted designs
|
| 858 |
+
- Intellectual property theft
|
| 859 |
+
- Commercial exploitation without permission
|
| 860 |
+
|
| 861 |
+
### Harm Prevention
|
| 862 |
+
|
| 863 |
+
1. **Mandatory consent**: Require user acknowledgment
|
| 864 |
+
2. **Use case restriction**: Prohibit surveillance and deceptive uses
|
| 865 |
+
3. **Privacy protection**: Disable webcam, encourage EXIF removal
|
| 866 |
+
4. **Transparency**: Clear documentation of limitations
|
| 867 |
+
|
| 868 |
+
|
| 869 |
+
|
| 870 |
+
## Accountability & Governance
|
| 871 |
+
|
| 872 |
+
### User Responsibilities
|
| 873 |
+
|
| 874 |
+
As a user, you are responsible for:
|
| 875 |
+
- Ensuring lawful use of source images
|
| 876 |
+
- Obtaining necessary consents and permissions
|
| 877 |
+
- Respecting privacy and intellectual property
|
| 878 |
+
- Using outputs ethically and transparently
|
| 879 |
+
- Understanding and accounting for model biases
|
| 880 |
+
|
| 881 |
+
### Developer Responsibilities
|
| 882 |
+
|
| 883 |
+
This tool implements:
|
| 884 |
+
- Clear responsible use guidelines
|
| 885 |
+
- Privacy-protective design (no webcam, local processing)
|
| 886 |
+
- Bias documentation and transparency
|
| 887 |
+
- Prohibited use cases explicitly stated
|
| 888 |
+
|
| 889 |
+
|
| 890 |
+
## Future Directions
|
| 891 |
+
|
| 892 |
+
### Improving Fairness
|
| 893 |
+
- Train on more diverse geographic datasets
|
| 894 |
+
- Include underrepresented architectural styles
|
| 895 |
+
- Develop bias mitigation techniques
|
| 896 |
+
- Community-driven model evaluation
|
| 897 |
+
|
| 898 |
+
### Enhancing Privacy
|
| 899 |
+
- Face/person detection and redaction
|
| 900 |
+
- Automatic EXIF stripping
|
| 901 |
+
- Differential privacy techniques
|
| 902 |
+
""")
|
| 903 |
+
|
| 904 |
+
with gr.Tab(" Citation"):
|
| 905 |
gr.Markdown("""
|
| 906 |
+
## Academic Citation
|
| 907 |
|
| 908 |
### For GLPN Model:
|
| 909 |
```bibtex
|
|
|
|
| 915 |
}
|
| 916 |
```
|
| 917 |
|
| 918 |
+
### For Poisson Surface Reconstruction:
|
| 919 |
```bibtex
|
| 920 |
+
@inproceedings{kazhdan2006poisson,
|
| 921 |
+
title={Poisson Surface Reconstruction},
|
| 922 |
+
author={Kazhdan, Michael and Bolitho, Matthew and Hoppe, Hugues},
|
| 923 |
+
booktitle={Symposium on Geometry Processing},
|
| 924 |
+
year={2006}
|
| 925 |
}
|
| 926 |
```
|
| 927 |
|
| 928 |
## Open Source Components
|
| 929 |
|
| 930 |
This application is built with:
|
| 931 |
+
|
| 932 |
+
- **Transformers** (Hugging Face): Model inference framework
|
| 933 |
- **Open3D**: Point cloud and mesh processing
|
| 934 |
- **PyTorch**: Deep learning framework
|
| 935 |
- **Plotly**: Interactive 3D visualization
|
| 936 |
+
- **Gradio**: Web interface framework
|
| 937 |
+
- **NumPy** & **SciPy**: Numerical computing
|
| 938 |
+
- **Matplotlib**: Data visualization
|
| 939 |
+
- **Pillow (PIL)**: Image processing
|
| 940 |
+
|
| 941 |
+
## Model Credits
|
| 942 |
+
|
| 943 |
+
**GLPN Model:**
|
| 944 |
+
- Developed by: KAIST (Korea Advanced Institute of Science and Technology)
|
| 945 |
+
- Hosted by: Hugging Face (vinvino02/glpn-nyu)
|
| 946 |
+
- License: Apache 2.0
|
| 947 |
+
|
| 948 |
+
## Responsible AI Features
|
| 949 |
+
|
| 950 |
+
This implementation includes:
|
| 951 |
+
- Privacy-protective design (no webcam option)
|
| 952 |
+
- Mandatory consent acknowledgment
|
| 953 |
+
- Bias documentation and transparency
|
| 954 |
+
- Ethical use guidelines
|
| 955 |
|
| 956 |
+
|
| 957 |
|
| 958 |
""")
|
| 959 |
|
|
|
|
| 960 |
gr.Markdown("""
|
| 961 |
---
|
| 962 |
|
| 963 |
+
**Version:** 2.0 (Responsible AI Edition - Optimized)
|
| 964 |
+
**Last Updated:** 2025
|
| 965 |
+
**License:** Educational and Research Use
|
| 966 |
+
|
|
|
|
|
|
|
| 967 |
""")
|
| 968 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 969 |
if __name__ == "__main__":
|
| 970 |
+
print("="*60)
|
| 971 |
+
print("RESPONSIBLE AI 3D RECONSTRUCTION")
|
| 972 |
+
print("="*60)
|
| 973 |
+
print("β Lightweight model (GLPN only)")
|
| 974 |
+
print("β No webcam option")
|
| 975 |
+
print("β Local processing")
|
| 976 |
+
print("β Consent required")
|
| 977 |
+
print("="*60)
|
| 978 |
demo.launch(share=True)
|