|
|
""" |
|
|
Advanced 3D Reconstruction from Single Images with Responsible AI Features |
|
|
|
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import torch |
|
|
from PIL import Image |
|
|
from transformers import GLPNForDepthEstimation, GLPNImageProcessor |
|
|
import open3d as o3d |
|
|
import plotly.graph_objects as go |
|
|
import matplotlib.pyplot as plt |
|
|
import io |
|
|
import json |
|
|
import time |
|
|
from pathlib import Path |
|
|
import tempfile |
|
|
import zipfile |
|
|
import hashlib |
|
|
from datetime import datetime |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RESPONSIBLE_AI_NOTICE = """ |
|
|
## ⚠️ Responsible Use Guidelines |
|
|
|
|
|
### Privacy & Consent |
|
|
- **Do not upload images containing identifiable people without their explicit consent** |
|
|
- **Do not use for surveillance, tracking, or monitoring individuals** |
|
|
- Facial features may be reconstructed in 3D - consider privacy implications |
|
|
- Remove metadata (EXIF) that may contain location or personal information |
|
|
|
|
|
### Ethical Use |
|
|
- This tool is for **educational, research, and creative purposes only** |
|
|
- **Prohibited uses:** |
|
|
- Creating deepfakes or misleading 3D content |
|
|
- Unauthorized documentation of private property |
|
|
- Circumventing security systems |
|
|
- Generating 3D models for harassment or stalking |
|
|
- Commercial use without proper rights to source images |
|
|
|
|
|
### Limitations & Bias |
|
|
- Models trained primarily on indoor Western architecture |
|
|
- May perform poorly on non-Western architectural styles |
|
|
- Scale is relative, not absolute - not suitable for precision measurements |
|
|
- Single viewpoint limitations - occluded areas are inferred, not captured |
|
|
|
|
|
### Data Usage |
|
|
- Images are processed locally during your session |
|
|
- No images are stored or transmitted to external servers |
|
|
- Processing logs contain only technical metrics, no image content |
|
|
- You retain all rights to your uploaded images and generated 3D models |
|
|
|
|
|
|
|
|
**By using this tool, you agree to these responsible use guidelines.** |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_image_safety(image): |
|
|
"""Basic safety checks for uploaded images""" |
|
|
warnings = [] |
|
|
|
|
|
width, height = image.size |
|
|
if width * height > 10_000_000: |
|
|
warnings.append("⚠️ Very large image - consider resizing to improve processing speed") |
|
|
|
|
|
aspect_ratio = max(width, height) / min(width, height) |
|
|
if aspect_ratio > 3: |
|
|
warnings.append("⚠️ Unusual aspect ratio detected - ensure image doesn't contain unintended content") |
|
|
|
|
|
try: |
|
|
exif = image.getexif() |
|
|
if exif: |
|
|
has_gps = any(k for k in exif.keys() if k in [34853, 0x8825]) |
|
|
if has_gps: |
|
|
warnings.append("⚠️ GPS location data detected in image - consider removing EXIF data for privacy") |
|
|
except: |
|
|
pass |
|
|
|
|
|
return True, "\n".join(warnings) if warnings else None |
|
|
|
|
|
def generate_session_id(): |
|
|
"""Generate anonymous session ID for logging""" |
|
|
return hashlib.sha256(str(datetime.now()).encode()).hexdigest()[:16] |
|
|
|
|
|
def content_policy_check(image): |
|
|
"""Check if image content violates usage policies""" |
|
|
width, height = image.size |
|
|
|
|
|
if width < 100 or height < 100: |
|
|
return False, "Image too small - minimum 100x100 pixels required for meaningful reconstruction" |
|
|
|
|
|
return True, None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Loading GLPN model (lightweight)...") |
|
|
try: |
|
|
glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") |
|
|
glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu") |
|
|
print("✓ GLPN model loaded successfully!") |
|
|
except Exception as e: |
|
|
print(f"Error loading model: {e}") |
|
|
glpn_processor = None |
|
|
glpn_model = None |
|
|
|
|
|
|
|
|
dpt_model = None |
|
|
dpt_processor = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_image(image, model_choice="GLPN (Recommended)", visualization_type="mesh"): |
|
|
"""Optimized processing pipeline""" |
|
|
|
|
|
def _generate_quality_assessment(metrics): |
|
|
assessment = [] |
|
|
outlier_pct = (metrics['outliers_removed'] / metrics['initial_points']) * 100 |
|
|
|
|
|
if outlier_pct < 5: |
|
|
assessment.append("Very clean depth estimation") |
|
|
elif outlier_pct < 15: |
|
|
assessment.append("Good depth quality") |
|
|
else: |
|
|
assessment.append("High noise in depth estimation") |
|
|
|
|
|
if metrics['is_edge_manifold'] and metrics['is_vertex_manifold']: |
|
|
assessment.append("Excellent topology") |
|
|
elif metrics['is_vertex_manifold']: |
|
|
assessment.append("Good local topology") |
|
|
else: |
|
|
assessment.append("Topology issues present") |
|
|
|
|
|
if metrics['is_watertight']: |
|
|
assessment.append("Watertight mesh - ready for 3D printing!") |
|
|
else: |
|
|
assessment.append("Not watertight - needs repair for 3D printing") |
|
|
|
|
|
return "\n".join(f"- {item}" for item in assessment) |
|
|
|
|
|
if glpn_model is None: |
|
|
return None, None, None, "❌ Model failed to load. Please refresh the page.", None |
|
|
|
|
|
try: |
|
|
print("Starting reconstruction...") |
|
|
|
|
|
|
|
|
new_height = 480 if image.height > 480 else image.height |
|
|
new_height -= (new_height % 32) |
|
|
new_width = int(new_height * image.width / image.height) |
|
|
diff = new_width % 32 |
|
|
new_width = new_width - diff if diff < 16 else new_width + (32 - diff) |
|
|
new_size = (new_width, new_height) |
|
|
image = image.resize(new_size, Image.LANCZOS) |
|
|
|
|
|
|
|
|
if model_choice == "GLPN (Recommended)": |
|
|
processor = glpn_processor |
|
|
model = glpn_model |
|
|
else: |
|
|
global dpt_model, dpt_processor |
|
|
if dpt_model is None: |
|
|
print("Loading DPT model (first time only)...") |
|
|
from transformers import DPTForDepthEstimation, DPTImageProcessor |
|
|
dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large") |
|
|
dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") |
|
|
print("✓ DPT model loaded!") |
|
|
processor = dpt_processor |
|
|
model = dpt_model |
|
|
|
|
|
inputs = processor(images=image, return_tensors="pt") |
|
|
|
|
|
start_time = time.time() |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
predicted_depth = outputs.predicted_depth |
|
|
depth_time = time.time() - start_time |
|
|
|
|
|
|
|
|
pad = 16 |
|
|
output = predicted_depth.squeeze().cpu().numpy() * 1000.0 |
|
|
output = output[pad:-pad, pad:-pad] |
|
|
image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad)) |
|
|
|
|
|
depth_height, depth_width = output.shape |
|
|
img_width, img_height = image_cropped.size |
|
|
|
|
|
if depth_height != img_height or depth_width != img_width: |
|
|
from scipy import ndimage |
|
|
zoom_factors = (img_height / depth_height, img_width / depth_width) |
|
|
output = ndimage.zoom(output, zoom_factors, order=1) |
|
|
|
|
|
image = image_cropped |
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(1, 2, figsize=(14, 7)) |
|
|
ax[0].imshow(image) |
|
|
ax[0].set_title('Original Image', fontsize=14, fontweight='bold') |
|
|
ax[0].axis('off') |
|
|
|
|
|
im = ax[1].imshow(output, cmap='plasma') |
|
|
ax[1].set_title('Estimated Depth Map', fontsize=14, fontweight='bold') |
|
|
ax[1].axis('off') |
|
|
plt.colorbar(im, ax=ax[1], fraction=0.046, pad=0.04) |
|
|
plt.tight_layout() |
|
|
|
|
|
buf = io.BytesIO() |
|
|
plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') |
|
|
buf.seek(0) |
|
|
depth_viz = Image.open(buf) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
width, height = image.size |
|
|
|
|
|
if output.shape != (height, width): |
|
|
from scipy import ndimage |
|
|
zoom_factors = (height / output.shape[0], width / output.shape[1]) |
|
|
output = ndimage.zoom(output, zoom_factors, order=1) |
|
|
|
|
|
depth_image = (output * 255 / np.max(output)).astype(np.uint8) |
|
|
image_array = np.array(image) |
|
|
|
|
|
depth_o3d = o3d.geometry.Image(depth_image) |
|
|
image_o3d = o3d.geometry.Image(image_array) |
|
|
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( |
|
|
image_o3d, depth_o3d, convert_rgb_to_intensity=False |
|
|
) |
|
|
|
|
|
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() |
|
|
camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2) |
|
|
|
|
|
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic) |
|
|
initial_points = len(pcd.points) |
|
|
|
|
|
|
|
|
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) |
|
|
pcd = pcd.select_by_index(ind) |
|
|
outliers_removed = initial_points - len(pcd.points) |
|
|
|
|
|
|
|
|
pcd.estimate_normals() |
|
|
pcd.orient_normals_to_align_with_direction() |
|
|
|
|
|
|
|
|
mesh_start = time.time() |
|
|
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( |
|
|
pcd, depth=9, n_threads=1 |
|
|
)[0] |
|
|
|
|
|
|
|
|
pcd_tree = o3d.geometry.KDTreeFlann(pcd) |
|
|
mesh_colors = [] |
|
|
for vertex in mesh.vertices: |
|
|
[_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) |
|
|
mesh_colors.append(pcd.colors[idx[0]]) |
|
|
mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) |
|
|
|
|
|
rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) |
|
|
mesh.rotate(rotation, center=(0, 0, 0)) |
|
|
mesh_time = time.time() - mesh_start |
|
|
|
|
|
|
|
|
mesh.compute_vertex_normals() |
|
|
|
|
|
metrics = { |
|
|
'model_used': model_choice, |
|
|
'depth_estimation_time': f"{depth_time:.2f}s", |
|
|
'mesh_reconstruction_time': f"{mesh_time:.2f}s", |
|
|
'total_time': f"{depth_time + mesh_time:.2f}s", |
|
|
'initial_points': initial_points, |
|
|
'outliers_removed': outliers_removed, |
|
|
'final_points': len(pcd.points), |
|
|
'vertices': len(mesh.vertices), |
|
|
'triangles': len(mesh.triangles), |
|
|
'is_edge_manifold': mesh.is_edge_manifold(), |
|
|
'is_vertex_manifold': mesh.is_vertex_manifold(), |
|
|
'is_watertight': mesh.is_watertight(), |
|
|
} |
|
|
|
|
|
|
|
|
try: |
|
|
surface_area = mesh.get_surface_area() |
|
|
if surface_area > 0: |
|
|
metrics['surface_area'] = float(surface_area) |
|
|
else: |
|
|
vertices = np.asarray(mesh.vertices) |
|
|
triangles = np.asarray(mesh.triangles) |
|
|
v0 = vertices[triangles[:, 0]] |
|
|
v1 = vertices[triangles[:, 1]] |
|
|
v2 = vertices[triangles[:, 2]] |
|
|
cross = np.cross(v1 - v0, v2 - v0) |
|
|
areas = 0.5 * np.linalg.norm(cross, axis=1) |
|
|
metrics['surface_area'] = float(np.sum(areas)) |
|
|
except: |
|
|
metrics['surface_area'] = "Unable to compute" |
|
|
|
|
|
|
|
|
try: |
|
|
if mesh.is_watertight(): |
|
|
metrics['volume'] = float(mesh.get_volume()) |
|
|
else: |
|
|
metrics['volume'] = None |
|
|
except: |
|
|
metrics['volume'] = None |
|
|
|
|
|
|
|
|
points = np.asarray(pcd.points) |
|
|
colors = np.asarray(pcd.colors) |
|
|
|
|
|
if visualization_type == "point_cloud": |
|
|
scatter = go.Scatter3d( |
|
|
x=points[:, 0], y=points[:, 1], z=points[:, 2], |
|
|
mode='markers', |
|
|
marker=dict( |
|
|
size=2, |
|
|
color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) |
|
|
for r, g, b in colors], |
|
|
), |
|
|
name='Point Cloud' |
|
|
) |
|
|
|
|
|
plotly_fig = go.Figure(data=[scatter]) |
|
|
plotly_fig.update_layout( |
|
|
scene=dict( |
|
|
xaxis=dict(visible=False), |
|
|
yaxis=dict(visible=False), |
|
|
zaxis=dict(visible=False), |
|
|
aspectmode='data' |
|
|
), |
|
|
height=700, |
|
|
title="Point Cloud" |
|
|
) |
|
|
else: |
|
|
vertices = np.asarray(mesh.vertices) |
|
|
triangles = np.asarray(mesh.triangles) |
|
|
|
|
|
if mesh.has_vertex_colors(): |
|
|
vertex_colors = np.asarray(mesh.vertex_colors) |
|
|
colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) |
|
|
for r, g, b in vertex_colors] |
|
|
|
|
|
mesh_trace = go.Mesh3d( |
|
|
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], |
|
|
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], |
|
|
vertexcolor=colors_rgb, |
|
|
opacity=0.95 |
|
|
) |
|
|
else: |
|
|
mesh_trace = go.Mesh3d( |
|
|
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], |
|
|
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], |
|
|
color='lightblue', |
|
|
opacity=0.9 |
|
|
) |
|
|
|
|
|
plotly_fig = go.Figure(data=[mesh_trace]) |
|
|
plotly_fig.update_layout( |
|
|
scene=dict( |
|
|
xaxis=dict(visible=False), |
|
|
yaxis=dict(visible=False), |
|
|
zaxis=dict(visible=False), |
|
|
aspectmode='data' |
|
|
), |
|
|
height=700, |
|
|
title="3D Mesh" |
|
|
) |
|
|
|
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
pcd_path = Path(temp_dir) / "point_cloud.ply" |
|
|
o3d.io.write_point_cloud(str(pcd_path), pcd) |
|
|
|
|
|
mesh_path = Path(temp_dir) / "mesh.ply" |
|
|
o3d.io.write_triangle_mesh(str(mesh_path), mesh) |
|
|
|
|
|
mesh_obj_path = Path(temp_dir) / "mesh.obj" |
|
|
o3d.io.write_triangle_mesh(str(mesh_obj_path), mesh) |
|
|
|
|
|
mesh_stl_path = Path(temp_dir) / "mesh.stl" |
|
|
o3d.io.write_triangle_mesh(str(mesh_stl_path), mesh) |
|
|
|
|
|
metrics_path = Path(temp_dir) / "metrics.json" |
|
|
with open(metrics_path, 'w') as f: |
|
|
json.dump(metrics, f, indent=2, default=str) |
|
|
|
|
|
zip_path = Path(temp_dir) / "reconstruction_complete.zip" |
|
|
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: |
|
|
zipf.write(pcd_path, pcd_path.name) |
|
|
zipf.write(mesh_path, mesh_path.name) |
|
|
zipf.write(mesh_obj_path, mesh_obj_path.name) |
|
|
zipf.write(mesh_stl_path, mesh_stl_path.name) |
|
|
zipf.write(metrics_path, metrics_path.name) |
|
|
|
|
|
assessment = _generate_quality_assessment(metrics) |
|
|
|
|
|
report = f""" |
|
|
## Reconstruction Complete! |
|
|
|
|
|
### Performance |
|
|
- **Processing Time**: {metrics['total_time']} |
|
|
- **Points**: {metrics['final_points']:,} |
|
|
- **Triangles**: {metrics['triangles']:,} |
|
|
|
|
|
### Quality |
|
|
- **Topology**: {'Good' if metrics['is_vertex_manifold'] else 'Issues'} |
|
|
- **Watertight**: {'Yes' if metrics['is_watertight'] else 'No'} |
|
|
|
|
|
### Assessment |
|
|
{assessment} |
|
|
|
|
|
**Download the complete package below!** |
|
|
""" |
|
|
|
|
|
return depth_viz, plotly_fig, str(zip_path), report, json.dumps(metrics, indent=2, default=str) |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
return None, None, None, f"Error: {str(e)}\n\n{traceback.format_exc()}", None |
|
|
|
|
|
def process_image_with_safeguards(image, model_choice="GLPN (Recommended)", visualization_type="mesh", consent_given=False): |
|
|
"""Main processing with safeguards""" |
|
|
session_id = generate_session_id() |
|
|
|
|
|
if not consent_given: |
|
|
return None, None, None, "**You must agree to the Responsible Use Guidelines first.**", None |
|
|
|
|
|
if image is None: |
|
|
return None, None, None, "Please upload an image first.", None |
|
|
|
|
|
is_safe, safety_warning = check_image_safety(image) |
|
|
passes_policy, policy_message = content_policy_check(image) |
|
|
|
|
|
if not passes_policy: |
|
|
return None, None, None, f"{policy_message}", None |
|
|
|
|
|
try: |
|
|
result = process_image(image, model_choice, visualization_type) |
|
|
depth_viz, plotly_fig, zip_path, report, json_metrics = result |
|
|
|
|
|
if safety_warning: |
|
|
report = f"**Privacy Notice:**\n{safety_warning}\n\n{report}" |
|
|
|
|
|
metrics = json.loads(json_metrics) |
|
|
metrics['responsible_ai'] = { |
|
|
'session_id': session_id, |
|
|
'timestamp': datetime.now().isoformat(), |
|
|
'consent_given': True |
|
|
} |
|
|
|
|
|
return depth_viz, plotly_fig, zip_path, report, json.dumps(metrics, indent=2) |
|
|
|
|
|
except Exception as e: |
|
|
return None, None, None, f"Error: {str(e)}", None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Responsible AI 3D Reconstruction", theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
gr.Markdown(""" |
|
|
# 🏗️ 3D Reconstruction from Single Images |
|
|
|
|
|
|
|
|
Transform 2D photographs into 3D spatial models |
|
|
|
|
|
<div style="background-color: #fff3cd; border: 2px solid #ffc107; padding: 15px; border-radius: 5px; margin: 10px 0;"> |
|
|
<h3 style="color: #856404; margin-top: 0;">⚠️ Responsible Use Required</h3> |
|
|
<p style="color: #856404; margin-bottom: 0;">This tool must be used ethically and legally. Review the guidelines in the <b>first tab</b>.</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.Tab("⚠️ Responsible Use (READ FIRST)"): |
|
|
gr.Markdown(RESPONSIBLE_AI_NOTICE) |
|
|
gr.Markdown(""" |
|
|
### Known Limitations & Biases |
|
|
- Trained primarily on Western indoor architecture |
|
|
- May underperform on non-Western styles |
|
|
- Scale is relative, not absolute |
|
|
- Single viewpoint captures only visible surfaces |
|
|
""") |
|
|
|
|
|
with gr.Tab("Reconstruction"): |
|
|
consent_checkbox = gr.Checkbox( |
|
|
label="**I have read and agree to the Responsible Use Guidelines**", |
|
|
value=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
input_image = gr.Image( |
|
|
type="pil", |
|
|
label="Upload Image", |
|
|
sources=["upload", "clipboard"] |
|
|
) |
|
|
|
|
|
model_choice = gr.Radio( |
|
|
choices=["GLPN (Recommended)", "DPT (High Quality)"], |
|
|
value="GLPN (Recommended)", |
|
|
label="Depth Estimation Model" |
|
|
) |
|
|
|
|
|
visualization_type = gr.Radio( |
|
|
choices=["mesh", "point_cloud"], |
|
|
value="mesh", |
|
|
label="Visualization Type" |
|
|
) |
|
|
|
|
|
reconstruct_btn = gr.Button("Start Reconstruction", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
depth_output = gr.Image(label="Depth Map") |
|
|
viewer_3d = gr.Plot(label="Interactive 3D Viewer") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
metrics_output = gr.Markdown(label="Report") |
|
|
with gr.Column(): |
|
|
json_output = gr.Textbox(label="Metrics (JSON)", lines=8) |
|
|
|
|
|
download_output = gr.File(label="Download Package (ZIP)") |
|
|
|
|
|
reconstruct_btn.click( |
|
|
fn=process_image_with_safeguards, |
|
|
inputs=[input_image, model_choice, visualization_type, consent_checkbox], |
|
|
outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output] |
|
|
) |
|
|
|
|
|
with gr.Tab("Theory & Background"): |
|
|
gr.Markdown(""" |
|
|
## About This Tool |
|
|
|
|
|
This application demonstrates how artificial intelligence can convert single 2D photographs |
|
|
into interactive 3D models automatically. |
|
|
|
|
|
### What Makes This Special |
|
|
|
|
|
**Traditional Approach:** |
|
|
- Need special equipment (3D scanner, multiple cameras) |
|
|
- Requires technical expertise |
|
|
- Time-consuming process |
|
|
- Expensive |
|
|
|
|
|
**This AI Approach:** |
|
|
- Works with any single photograph |
|
|
- No special equipment needed |
|
|
- Automatic processing |
|
|
- Free and accessible |
|
|
|
|
|
|
|
|
|
|
|
## The Technology |
|
|
|
|
|
### AI Model Used: GLPN |
|
|
|
|
|
**GLPN (Global-Local Path Networks)** |
|
|
- Paper: Kim et al., CVPR 2022 |
|
|
- Optimized for: Indoor/outdoor architectural scenes |
|
|
- Training: NYU Depth V2 (urban indoor environments) |
|
|
- Best for: Building interiors, street-level views |
|
|
- Speed: Fast (~0.3-2.5s) |
|
|
|
|
|
### How It Works (Simplified) |
|
|
|
|
|
1. **AI analyzes photo** → Recognizes objects, patterns, perspective |
|
|
2. **Estimates distance** → Figures out what's close, what's far |
|
|
3. **Creates 3D points** → Places colored dots in 3D space |
|
|
4. **Builds surface** → Connects dots into smooth shape |
|
|
|
|
|
### Spatial Data Pipeline |
|
|
|
|
|
**1. Monocular Depth Estimation** |
|
|
- Challenge: Extracting 3D spatial information from 2D photographs |
|
|
- Application: Similar to photogrammetry but from single images |
|
|
- Output: Relative depth maps for spatial analysis |
|
|
|
|
|
**2. Point Cloud Generation** |
|
|
- Creates 3D coordinate system (X, Y, Z) from pixels |
|
|
- Each point: Spatial location + RGB color information |
|
|
- Compatible with: GIS software, CAD tools, spatial databases |
|
|
|
|
|
**3. 3D Mesh Generation** |
|
|
- Creates continuous surface from discrete points |
|
|
- Similar to: Digital terrain models (DTMs) for buildings |
|
|
- Output formats: Compatible with ArcGIS, QGIS, SketchUp |
|
|
|
|
|
### Quality Metrics Explained |
|
|
|
|
|
- **Point Cloud Density**: Higher points = better spatial resolution |
|
|
- **Geometric Accuracy**: Manifold checks ensure valid topology |
|
|
- **Surface Continuity**: Watertight meshes = complete volume calculations |
|
|
- **Data Fidelity**: Triangle count indicates level of detail |
|
|
|
|
|
### Limitations for Geographic Applications |
|
|
|
|
|
1. **Scale Ambiguity**: Requires ground control points for absolute measurements |
|
|
2. **Single Viewpoint**: Cannot capture occluded facades or hidden spaces |
|
|
3. **No Georeferencing**: Outputs in local coordinates, not global (lat/lon) |
|
|
4. **Weather Dependent**: Best results with clear, well-lit conditions |
|
|
|
|
|
### Comparison with Traditional Methods |
|
|
|
|
|
**vs. Terrestrial Laser Scanning (TLS):** |
|
|
- Much cheaper, faster, more accessible |
|
|
- Lower accuracy, no absolute scale |
|
|
|
|
|
**vs. Photogrammetry (Structure-from-Motion):** |
|
|
- Works with single image, faster processing |
|
|
- Less accurate, cannot resolve scale |
|
|
|
|
|
**vs. LiDAR:** |
|
|
- Much lower cost, consumer cameras sufficient |
|
|
- Lower precision, no absolute measurements |
|
|
|
|
|
|
|
|
|
|
|
## Reconstruction Pipeline (10 Steps) |
|
|
|
|
|
1. **Image Preprocessing**: Resize to model requirements |
|
|
2. **Depth Estimation**: Neural network inference |
|
|
3. **Depth Visualization**: Create comparison images |
|
|
4. **Point Cloud Generation**: Back-project using camera model |
|
|
5. **Outlier Removal**: Statistical filtering |
|
|
6. **Normal Estimation**: Surface orientation calculation |
|
|
7. **Mesh Reconstruction**: Poisson surface reconstruction |
|
|
8. **Quality Metrics**: Compute geometric measures |
|
|
9. **3D Visualization**: Create interactive viewer |
|
|
10. **File Export**: Generate multiple formats |
|
|
|
|
|
### Key References |
|
|
|
|
|
1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation |
|
|
with Vertical CutDepth." *CVPR 2022* |
|
|
2. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction." |
|
|
*Eurographics Symposium on Geometry Processing* |
|
|
""") |
|
|
|
|
|
with gr.Tab("Usage Guide"): |
|
|
gr.Markdown(""" |
|
|
## How to Use This Application |
|
|
|
|
|
### Step 1: Read Responsible Use Guidelines |
|
|
- **REQUIRED**: Review the "Responsible Use" tab first |
|
|
- Understand privacy implications |
|
|
- Acknowledge model limitations and biases |
|
|
- Ensure you have rights to use source images |
|
|
|
|
|
### Step 2: Prepare Your Image |
|
|
|
|
|
**Best Practices:** |
|
|
- Remove EXIF metadata (GPS, timestamps) for privacy |
|
|
- Ensure you have consent if image contains people |
|
|
- Use well-lit, clear photographs |
|
|
- Recommended resolution: 512-1024 pixels |
|
|
- Indoor scenes work best |
|
|
|
|
|
**Privacy Checklist:** |
|
|
- [ ] No identifiable people (or consent obtained) |
|
|
- [ ] No sensitive/private locations |
|
|
- [ ] EXIF data removed |
|
|
- [ ] You own rights to the image |
|
|
|
|
|
### Step 3: Upload Image |
|
|
- Click "Upload Image" area |
|
|
- Select JPG, PNG, or BMP file |
|
|
- **Note:** Webcam option removed for privacy protection |
|
|
- You can also paste from clipboard |
|
|
|
|
|
### Step 4: Check Consent Box |
|
|
- Check "I have read and agree to Responsible Use Guidelines" |
|
|
- This confirms you've reviewed ethical guidelines |
|
|
- Processing won't start without consent |
|
|
|
|
|
### Step 5: Choose Visualization |
|
|
- **Mesh**: Solid 3D surface (recommended) |
|
|
- **Point Cloud**: Individual 3D points with colors |
|
|
|
|
|
### Step 6: Start Reconstruction |
|
|
- Click "Start Reconstruction" |
|
|
- Processing takes 10-60 seconds |
|
|
- All processing is local (no cloud upload) |
|
|
|
|
|
### Step 7: Explore Results |
|
|
|
|
|
**Depth Map:** |
|
|
- Yellow/Red = Farther objects |
|
|
- Purple/Blue = Closer objects |
|
|
- Shows AI's depth understanding |
|
|
|
|
|
**3D Viewer:** |
|
|
- Rotate: Click and drag |
|
|
- Zoom: Scroll wheel |
|
|
- Pan: Right-click and drag |
|
|
- Reset: Double-click |
|
|
|
|
|
**Metrics Report:** |
|
|
- Processing performance |
|
|
- Quality indicators |
|
|
- Topology validation |
|
|
|
|
|
### Step 8: Download Files |
|
|
- ZIP package contains: |
|
|
- Point cloud (PLY) |
|
|
- Mesh (PLY, OBJ, STL) |
|
|
- Quality metrics (JSON) |
|
|
- All files include responsible AI metadata |
|
|
|
|
|
|
|
|
|
|
|
## Viewing Downloaded 3D Files |
|
|
|
|
|
### Free Software Options: |
|
|
|
|
|
**MeshLab** (Recommended for beginners) |
|
|
- Download: https://www.meshlab.net/ |
|
|
- Open PLY, OBJ, STL files |
|
|
- Great for viewing and basic editing |
|
|
|
|
|
**Blender** (For advanced users) |
|
|
- Download: https://www.blender.org/ |
|
|
- Import → Wavefront (.obj) or PLY |
|
|
- Full 3D modeling and rendering capabilities |
|
|
|
|
|
**CloudCompare** (For point clouds) |
|
|
- Download: https://www.cloudcompare.org/ |
|
|
- Best for analyzing point cloud data |
|
|
- Measurement and analysis tools |
|
|
|
|
|
**Online Viewers** (No installation) |
|
|
- https://3dviewer.net/ |
|
|
- https://www.creators3d.com/online-viewer |
|
|
- Just drag and drop your OBJ/PLY file |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## Tips for Best Results |
|
|
|
|
|
### DO: |
|
|
- Use well-lit images |
|
|
- Include depth cues (corners, edges) |
|
|
- Indoor scenes work best |
|
|
- Medium resolution (512-1024px) |
|
|
- Remove personal metadata |
|
|
- Obtain consent for people in images |
|
|
|
|
|
### AVOID: |
|
|
- Motion blur or low resolution |
|
|
- Reflective surfaces (mirrors, glass) |
|
|
- Images without consent |
|
|
- Private property without permission |
|
|
- Surveillance or monitoring purposes |
|
|
- Heavy shadows or darkness |
|
|
|
|
|
|
|
|
## Understanding the Metrics |
|
|
|
|
|
### Point Cloud Statistics: |
|
|
- **Initial Points**: Raw points generated from depth |
|
|
- **Outliers Removed**: Noisy points filtered out (typically 5-15%) |
|
|
- **Final Points**: Clean points used for mesh generation |
|
|
|
|
|
### Mesh Quality Indicators: |
|
|
- ** Edge Manifold**: Each edge connects exactly 2 faces (good topology) |
|
|
- ** Vertex Manifold**: Clean vertex connections |
|
|
- ** Watertight**: No holes, ready for 3D printing |
|
|
- ** Marks**: Indicate potential issues (still usable, may need repair) |
|
|
|
|
|
### Processing Times: |
|
|
- **Depth Estimation**: 0.3-2.5s (GLPN model) |
|
|
- **Mesh Reconstruction**: 2-10s (depends on point cloud size) |
|
|
- **Total Time**: Usually 10-60 seconds |
|
|
|
|
|
--- |
|
|
|
|
|
## Troubleshooting |
|
|
|
|
|
**Problem: No output appears** |
|
|
- Check browser console for errors |
|
|
- Try refreshing the page |
|
|
- Try a smaller/simpler image first |
|
|
- Check that image uploaded successfully |
|
|
|
|
|
**Problem: Mesh has holes or artifacts** |
|
|
- This is normal for single-view reconstruction |
|
|
- Hidden surfaces cannot be reconstructed |
|
|
- Use mesh repair tools in MeshLab if needed |
|
|
|
|
|
**Problem: Colors look wrong on mesh** |
|
|
- Vertex color interpolation is approximate |
|
|
- This is expected behavior |
|
|
- Colors on point cloud are more accurate |
|
|
|
|
|
**Problem: Processing is very slow** |
|
|
- Use smaller images |
|
|
- This is normal on CPU (GPU is much faster) |
|
|
|
|
|
**Problem: "Not watertight" in metrics** |
|
|
- Common for complex scenes |
|
|
- Still usable for visualization |
|
|
- For 3D printing: use mesh repair in MeshLab |
|
|
""") |
|
|
|
|
|
with gr.Tab(" Ethics & Impact"): |
|
|
gr.Markdown(""" |
|
|
|
|
|
## Algorithmic Bias & Fairness |
|
|
|
|
|
### Training Data Representation |
|
|
|
|
|
**Geographic Bias:** |
|
|
- Heavy representation: North America, Europe |
|
|
- Underrepresented: Africa, South Asia, Pacific Islands |
|
|
- Impact: Lower accuracy for non-Western architecture |
|
|
|
|
|
**Architectural Style Bias:** |
|
|
- Well-represented: Modern interiors, Western buildings |
|
|
- Underrepresented: Traditional, vernacular, indigenous structures |
|
|
- Impact: May misinterpret non-standard spatial layouts |
|
|
|
|
|
**Socioeconomic Bias:** |
|
|
- Training data skewed toward middle/upper-class interiors |
|
|
- Limited representation of informal settlements |
|
|
- May not generalize well to all socioeconomic contexts |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Potential Harms |
|
|
|
|
|
** Privacy Violations:** |
|
|
- Unauthorized 3D reconstruction of private spaces |
|
|
- Creating models of individuals without consent |
|
|
- Surveillance and tracking applications |
|
|
|
|
|
** Misinformation:** |
|
|
- Generating fake 3D evidence |
|
|
- Manipulating spatial understanding |
|
|
- Creating misleading visualizations |
|
|
|
|
|
** Property Rights:** |
|
|
- Unauthorized documentation of copyrighted designs |
|
|
- Intellectual property theft |
|
|
- Commercial exploitation without permission |
|
|
|
|
|
### Harm Prevention |
|
|
|
|
|
1. **Mandatory consent**: Require user acknowledgment |
|
|
2. **Use case restriction**: Prohibit surveillance and deceptive uses |
|
|
3. **Privacy protection**: Disable webcam, encourage EXIF removal |
|
|
4. **Transparency**: Clear documentation of limitations |
|
|
|
|
|
|
|
|
|
|
|
## Accountability & Governance |
|
|
|
|
|
### User Responsibilities |
|
|
|
|
|
As a user, you are responsible for: |
|
|
- Ensuring lawful use of source images |
|
|
- Obtaining necessary consents and permissions |
|
|
- Respecting privacy and intellectual property |
|
|
- Using outputs ethically and transparently |
|
|
- Understanding and accounting for model biases |
|
|
|
|
|
### Developer Responsibilities |
|
|
|
|
|
This tool implements: |
|
|
- Clear responsible use guidelines |
|
|
- Privacy-protective design (no webcam, local processing) |
|
|
- Bias documentation and transparency |
|
|
- Prohibited use cases explicitly stated |
|
|
|
|
|
|
|
|
## Future Directions |
|
|
|
|
|
### Improving Fairness |
|
|
- Train on more diverse geographic datasets |
|
|
- Include underrepresented architectural styles |
|
|
- Develop bias mitigation techniques |
|
|
- Community-driven model evaluation |
|
|
|
|
|
### Enhancing Privacy |
|
|
- Face/person detection and redaction |
|
|
- Automatic EXIF stripping |
|
|
- Differential privacy techniques |
|
|
""") |
|
|
|
|
|
with gr.Tab(" Citation"): |
|
|
gr.Markdown(""" |
|
|
## Academic Citation |
|
|
|
|
|
### For GLPN Model: |
|
|
```bibtex |
|
|
@inproceedings{kim2022global, |
|
|
title={Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth}, |
|
|
author={Kim, Doyeon and Ga, Woonghyun and Ahn, Pyungwhan and Joo, Donggyu and Chun, Sehwan and Kim, Junmo}, |
|
|
booktitle={CVPR}, |
|
|
year={2022} |
|
|
} |
|
|
``` |
|
|
|
|
|
### For Poisson Surface Reconstruction: |
|
|
```bibtex |
|
|
@inproceedings{kazhdan2006poisson, |
|
|
title={Poisson Surface Reconstruction}, |
|
|
author={Kazhdan, Michael and Bolitho, Matthew and Hoppe, Hugues}, |
|
|
booktitle={Symposium on Geometry Processing}, |
|
|
year={2006} |
|
|
} |
|
|
``` |
|
|
|
|
|
## Open Source Components |
|
|
|
|
|
This application is built with: |
|
|
|
|
|
- **Transformers** (Hugging Face): Model inference framework |
|
|
- **Open3D**: Point cloud and mesh processing |
|
|
- **PyTorch**: Deep learning framework |
|
|
- **Plotly**: Interactive 3D visualization |
|
|
- **Gradio**: Web interface framework |
|
|
- **NumPy** & **SciPy**: Numerical computing |
|
|
- **Matplotlib**: Data visualization |
|
|
- **Pillow (PIL)**: Image processing |
|
|
|
|
|
## Model Credits |
|
|
|
|
|
**GLPN Model:** |
|
|
- Developed by: KAIST (Korea Advanced Institute of Science and Technology) |
|
|
- Hosted by: Hugging Face (vinvino02/glpn-nyu) |
|
|
- License: Apache 2.0 |
|
|
|
|
|
## Responsible AI Features |
|
|
|
|
|
This implementation includes: |
|
|
- Privacy-protective design (no webcam option) |
|
|
- Mandatory consent acknowledgment |
|
|
- Bias documentation and transparency |
|
|
- Ethical use guidelines |
|
|
|
|
|
|
|
|
|
|
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
|
|
|
**Version:** 2.0 (Responsible AI Edition - Optimized) |
|
|
**Last Updated:** 2025 |
|
|
**License:** Educational and Research Use |
|
|
|
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("="*60) |
|
|
print("RESPONSIBLE AI 3D RECONSTRUCTION") |
|
|
print("="*60) |
|
|
print("✓ Lightweight model (GLPN only)") |
|
|
print("✓ No webcam option") |
|
|
print("✓ Local processing") |
|
|
print("✓ Consent required") |
|
|
print("="*60) |
|
|
demo.launch(share=True) |