Spaces:

HarshithReddy01
/

srmamamba-liver-segmentation

Paused

srmamamba-liver-segmentation / inference.py

Harshith Reddy

Add detailed logging for API requests to show in Hugging Face logs

a1bd718 about 1 month ago

63.4 kB

	import os
	import sys
	import time
	import tempfile
	import base64
	import io
	import warnings
	import threading
	import numpy as np
	import torch
	import nibabel as nib
	from PIL import Image
	from typing import Optional
	from torch.amp import autocast
	from scipy import ndimage
	import model_loader
	from processing import preprocess_nifti, calculate_liver_volume, analyze_liver_morphology, generate_medical_report, refine_liver_mask, refine_liver_mask_enhanced

	warnings.filterwarnings('ignore', category=UserWarning, message='.non-tuple sequence for multidimensional indexing.')
	warnings.filterwarnings('ignore', category=FutureWarning, message='.non-tuple sequence.')

	PROCESSING_LOCK = threading.Lock()

	def adjust_roi_for_volume(volume_shape):
	if model_loader.WINDOW_INFER is None:
	return

	if len(volume_shape) < 4:
	return

	if len(volume_shape) == 5:
	_, _, depth, height, width = volume_shape
	elif len(volume_shape) == 4:
	_, depth, height, width = volume_shape
	else:
	depth, height, width = volume_shape[-3:]

	current_roi = list(model_loader.WINDOW_INFER.roi_size) if isinstance(model_loader.WINDOW_INFER.roi_size, (list, tuple)) else [model_loader.WINDOW_INFER.roi_size[0], model_loader.WINDOW_INFER.roi_size[1], model_loader.WINDOW_INFER.roi_size[2]]
	original_roi = current_roi.copy()

	roi_d, roi_h, roi_w = current_roi

	adjusted = False

	if roi_d > depth:
	current_roi[0] = min(depth, 64)
	adjusted = True
	print(f" ⚠ ROI depth ({roi_d}) > volume depth ({depth}). Adjusting to {current_roi[0]}")

	if roi_h > height:
	current_roi[1] = min(height, current_roi[1])
	adjusted = True
	print(f" ⚠ ROI height ({roi_h}) > volume height ({height}). Adjusting to {current_roi[1]}")

	if roi_w > width:
	current_roi[2] = min(width, current_roi[2])
	adjusted = True
	print(f" ⚠ ROI width ({roi_w}) > volume width ({width}). Adjusting to {current_roi[2]}")

	total_volume = depth * height * width
	roi_volume = current_roi[0] * current_roi[1] * current_roi[2]

	if total_volume > 20_000_000:
	overlap = 0.1
	if model_loader.WINDOW_INFER.overlap > 0.1:
	model_loader.WINDOW_INFER.overlap = overlap
	adjusted = True
	print(f" → Large volume detected ({total_volume:,} voxels). Reducing overlap to {overlap} for faster processing")
	elif total_volume > 10_000_000:
	overlap = 0.12
	if model_loader.WINDOW_INFER.overlap > 0.12:
	model_loader.WINDOW_INFER.overlap = overlap
	adjusted = True
	print(f" → Medium-large volume detected ({total_volume:,} voxels). Reducing overlap to {overlap}")

	if depth < 64 and current_roi[0] > depth * 0.8:
	current_roi[0] = max(32, int(depth * 0.8))
	adjusted = True
	print(f" → Small depth dimension ({depth}). Optimizing ROI depth to {current_roi[0]}")

	if adjusted:
	model_loader.WINDOW_INFER.roi_size = tuple(current_roi)
	num_windows_approx = ((depth / current_roi[0]) * (height / current_roi[1]) * (width / current_roi[2])) / (1 - model_loader.WINDOW_INFER.overlap)
	print(f" ✓ ROI adjusted: {original_roi} → {current_roi}")
	print(f" → Estimated windows: ~{int(num_windows_approx)} (overlap={model_loader.WINDOW_INFER.overlap:.2f})")

	def predict_volume(nifti_file, modality, slice_idx=None):
	global PROCESSING_LOCK

	if not PROCESSING_LOCK.acquire(blocking=False):
	return None, "Already processing a file. Please wait for the current inference to complete.", "", None

	if nifti_file is None:
	PROCESSING_LOCK.release()
	return None, "Please upload a NIfTI file (.nii.gz)", "", None

	image_tensor = None
	pred = None
	output_path = None

	try:
	print(f"Starting prediction for modality: {modality}")

	if modality == 'T1':
	if model_loader.MODEL_T1 is None:
	print("Loading T1 model_loader...")
	model_loader.MODEL_T1 = model_loader.load_model('T1')
	model_instance = model_loader.MODEL_T1
	else:
	if model_loader.MODEL_T2 is None:
	print("Loading T2 model_loader...")
	model_loader.MODEL_T2 = model_loader.load_model('T2')
	model_instance = model_loader.MODEL_T2

	print("Preprocessing NIfTI file...")
	from pathlib import Path
	fp = getattr(nifti_file, "name", nifti_file)
	file_path = str(Path(fp).resolve())
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"Uploaded file not found: {file_path}")

	nifti_img = nib.load(file_path)
	nifti_img = nib.as_closest_canonical(nifti_img)
	voxel_spacing = nifti_img.header.get_zooms()[:3] if len(nifti_img.header.get_zooms()) >= 3 else (1.0, 1.0, 1.0)
	aff = nifti_img.affine
	hdr = nifti_img.header.copy()
	original_data = nifti_img.get_fdata(dtype=np.float32)

	if model_loader.DEVICE.type == 'cuda':
	if not torch.cuda.is_available():
	raise RuntimeError("CUDA not available but device is set to CUDA")
	torch.cuda.empty_cache()
	free_mem = (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / (1024**3)
	if free_mem < 0.5:
	raise RuntimeError(f"Insufficient GPU memory: {free_mem:.2f} GB free. Please restart.")

	image_data = preprocess_nifti(file_path, device=model_loader.DEVICE)
	if len(image_data.shape) == 3:
	image_tensor = image_data.unsqueeze(0).unsqueeze(0)
	elif len(image_data.shape) == 4:
	image_tensor = image_data.unsqueeze(0)
	else:
	image_tensor = image_data

	if model_loader.DEVICE.type == 'cuda' and len(image_tensor.shape) >= 4:
	try:
	image_tensor = image_tensor.contiguous(memory_format=torch.channels_last_3d)
	if image_tensor.is_contiguous(memory_format=torch.channels_last_3d):
	print(f" → Using channels-last 3D memory layout (optimized for GPU)")
	except:
	pass

	print(f"Input tensor shape: {image_tensor.shape}")

	if model_loader.WINDOW_INFER is not None:
	print("Adjusting ROI size based on volume dimensions...")
	adjust_roi_for_volume(image_tensor.shape)

	print("Running inference...")
	inference_start = time.time()

	y1, y2, y3, y4 = None, None, None, None

	gpu_handle = None
	if model_loader.DEVICE.type == 'cuda':
	allocated = torch.cuda.memory_allocated(0) / (1024**3)
	free_memory_gb = (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / (1024**3)
	print(f" → Pre-inference GPU memory: {allocated:.2f} GB allocated, {free_memory_gb:.2f} GB free")

	try:
	import pynvml
	pynvml.nvmlInit()
	gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(0)
	util = pynvml.nvmlDeviceGetUtilizationRates(gpu_handle)
	print(f" → GPU utilization: {util.gpu}%")
	except:
	pass

	if free_memory_gb < 1.0:
	print(f" ⚠ WARNING: Very low free memory ({free_memory_gb:.2f} GB). Adjusting settings...")
	if model_loader.WINDOW_INFER is not None:
	model_loader.WINDOW_INFER.sw_batch_size = 1
	if free_memory_gb < 0.5:
	model_loader.WINDOW_INFER.roi_size = [192, 192, 32]
	model_loader.WINDOW_INFER.overlap = 0.25

	if free_memory_gb < 0.5:
	torch.cuda.empty_cache()
	free_memory_gb = (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / (1024**3)
	print(f" → After cache clear: {free_memory_gb:.2f} GB free")

	if free_memory_gb < 0.5:
	error_msg = f"Insufficient GPU memory ({free_memory_gb:.2f} GB free). The model is using {allocated:.2f} GB. Please use a smaller input volume."
	print(f" ✗ {error_msg}")
	raise RuntimeError(error_msg)

	import config
	import threading

	timeout_occurred = threading.Event()

	def timeout_handler():
	timeout_occurred.set()

	timeout_timer = threading.Timer(config.INFERENCE_TIMEOUT, timeout_handler)
	timeout_timer.start()

	try:
	with torch.no_grad():
	if model_loader.DEVICE.type == 'cuda':
	torch.cuda.synchronize()
	print(" → AMP enabled: YES")
	sys.stdout.flush()

	last_heartbeat = time.time()
	heartbeat_interval = 30

	try:
	with autocast(device_type='cuda'):
	print(f" → Starting inference (timeout: {config.INFERENCE_TIMEOUT}s)...")
	sys.stdout.flush()

	start_time = time.time()

	if timeout_occurred.is_set():
	raise TimeoutError(f"Inference timeout: {config.INFERENCE_TIMEOUT}s")

	gpu_utils = []
	if gpu_handle is not None:
	import threading
	monitor_active = threading.Event()
	monitor_active.set()

	def monitor_gpu():
	while monitor_active.is_set():
	try:
	import pynvml
	util = pynvml.nvmlDeviceGetUtilizationRates(gpu_handle)
	gpu_utils.append(util.gpu)
	time.sleep(1.0)
	except:
	break

	monitor_thread = threading.Thread(target=monitor_gpu, daemon=True)
	monitor_thread.start()

	torch.cuda.synchronize()
	inference_start_sync = time.time()

	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)

	if gpu_handle is not None:
	monitor_active.clear()
	if monitor_thread.is_alive():
	monitor_thread.join(timeout=2.0)

	if timeout_occurred.is_set():
	raise TimeoutError(f"Inference timeout: {config.INFERENCE_TIMEOUT}s")

	torch.cuda.synchronize()
	inference_time = time.time() - inference_start_sync
	elapsed = time.time() - start_time

	if gpu_handle is not None and len(gpu_utils) > 0:
	try:
	import pynvml
	final_util = pynvml.nvmlDeviceGetUtilizationRates(gpu_handle)
	avg_util = sum(gpu_utils) / len(gpu_utils) if gpu_utils else final_util.gpu
	max_util = max(gpu_utils) if gpu_utils else final_util.gpu
	print(f" ✓ Inference completed in {inference_time:.1f}s (total: {elapsed:.1f}s)")
	print(f" → GPU utilization: avg={avg_util:.1f}%, max={max_util:.1f}%, final={final_util.gpu}%")
	if avg_util < 50:
	print(f" ⚠ WARNING: Low GPU utilization ({avg_util:.1f}%). May be CPU/transfer-bound.")
	print(f" → Consider: 1) Install CUDA extensions (mamba_ssm, selective_scan_cuda_oflex)")
	print(f" → 2) Check data transfer efficiency (channels-last enabled)")
	print(f" → 3) Verify GPU aggregation is enabled (check logs above)")
	except:
	print(f" ✓ Inference completed in {inference_time:.1f}s (total: {elapsed:.1f}s)")
	else:
	print(f" ✓ Inference completed in {inference_time:.1f}s (total: {elapsed:.1f}s)")
	except TimeoutError:
	timeout_timer.cancel()
	raise
	except RuntimeError as e:
	if "out of memory" in str(e):
	print(f" GPU OOM error: {e}")
	print(" Applying progressive OOM fallback...")
	torch.cuda.empty_cache()

	original_batch = model_loader.WINDOW_INFER.sw_batch_size
	original_roi = list(model_loader.WINDOW_INFER.roi_size)
	original_overlap = model_loader.WINDOW_INFER.overlap
	original_device = model_loader.WINDOW_INFER.device
	original_device_str = str(original_device)

	fallback_applied = None

	try:
	if original_batch > 1:
	print(f" Fallback 1: Reducing sw_batch_size {original_batch} -> 1")
	model_loader.WINDOW_INFER.sw_batch_size = 1
	with autocast(device_type='cuda'):
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)
	torch.cuda.synchronize()
	fallback_applied = "sw_batch_size=1"
	print(f" Retry successful with {fallback_applied}")
	else:
	raise RuntimeError("Already at batch_size=1")
	except RuntimeError as e2:
	if "out of memory" not in str(e2):
	raise

	try:
	if original_roi[0] > 48:
	new_depth = 48
	print(f" Fallback 2: Reducing ROI depth {original_roi[0]} -> {new_depth}")
	model_loader.WINDOW_INFER.roi_size = (new_depth, original_roi[1], original_roi[2])
	with autocast(device_type='cuda'):
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)
	torch.cuda.synchronize()
	fallback_applied = f"roi_depth={new_depth}"
	print(f" Retry successful with {fallback_applied}")
	else:
	raise RuntimeError("Already at depth=48")
	except RuntimeError as e3:
	if "out of memory" not in str(e3):
	raise

	try:
	if original_roi[0] > 32:
	new_depth = 32
	print(f" Fallback 3: Reducing ROI depth {original_roi[0]} -> {new_depth}")
	model_loader.WINDOW_INFER.roi_size = (new_depth, original_roi[1], original_roi[2])
	with autocast(device_type='cuda'):
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)
	torch.cuda.synchronize()
	fallback_applied = f"roi_depth={new_depth}"
	print(f" Retry successful with {fallback_applied}")
	else:
	raise RuntimeError("Already at depth=32")
	except RuntimeError as e4:
	if "out of memory" not in str(e4):
	raise

	try:
	if original_device == 'cuda':
	print(f" Fallback 4: Switching aggregation to CPU")
	model_loader.WINDOW_INFER.device = torch.device('cpu')
	with autocast(device_type='cuda'):
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)
	torch.cuda.synchronize()
	fallback_applied = "cpu_aggregation"
	print(f" Retry successful with {fallback_applied}")
	else:
	raise RuntimeError("Already using CPU aggregation")
	except RuntimeError as e5:
	model_loader.WINDOW_INFER.sw_batch_size = original_batch
	model_loader.WINDOW_INFER.roi_size = tuple(original_roi)
	model_loader.WINDOW_INFER.overlap = original_overlap
	model_loader.WINDOW_INFER.device = original_device if isinstance(original_device, torch.device) else torch.device(original_device_str)
	allocated = torch.cuda.memory_allocated(0) / (1024**3)
	total_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
	free_memory_gb = (total_gb - allocated)
	error_msg = f"GPU out of memory after all fallbacks. GPU: {allocated:.2f} GB / {total_gb:.2f} GB ({100*allocated/total_gb:.1f}% full), {free_memory_gb:.2f} GB free. Please restart Space or use smaller input. Error: {e5}"
	print(f" {error_msg}")
	raise RuntimeError(error_msg)

	finally:
	if fallback_applied:
	print(f" Note: Applied fallback ({fallback_applied}). Restoring original settings after inference.")
	model_loader.WINDOW_INFER.sw_batch_size = original_batch
	model_loader.WINDOW_INFER.roi_size = tuple(original_roi)
	model_loader.WINDOW_INFER.overlap = original_overlap
	model_loader.WINDOW_INFER.device = original_device
	else:
	raise
	else:
	print(" → AMP: Not available on CPU")
	print(" → Starting sliding window inference (this may take 5-10 minutes on CPU)...")
	start_time = time.time()
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)
	elapsed = time.time() - start_time
	print(f" ✓ Inference completed in {elapsed:.1f}s")
	finally:
	timeout_timer.cancel()

	if y1 is not None:
	print(f" → Raw model output shapes: y1={y1.shape if hasattr(y1, 'shape') else 'N/A'}, y2={y2.shape if hasattr(y2, 'shape') else 'N/A'}")
	sys.stdout.flush()
	else:
	print(" → Inference failed before model output was generated")
	sys.stdout.flush()
	raise RuntimeError("Inference failed: model did not produce output")

	pred = y1
	if isinstance(pred, torch.Tensor):
	raw_min = pred.min().item()
	raw_max = pred.max().item()
	raw_mean = pred.mean().item()
	print(f" → Raw logits (before sigmoid): min={raw_min:.6f}, max={raw_max:.6f}, mean={raw_mean:.6f}")
	sys.stdout.flush()

	print(" → Applying sigmoid activation (confirming: using sigmoid, not softmax)")
	sys.stdout.flush()
	pred = torch.sigmoid(pred)

	if len(pred.shape) == 5:
	if pred.shape[1] > 1:
	print(f" ⚠ WARNING: Multi-channel output detected (C={pred.shape[1]}). Using channel 0 for binary segmentation.")
	pred = pred[0, 0]
	else:
	pred = pred[0, 0]
	elif len(pred.shape) == 4:
	pred = pred[0] if pred.shape[0] == 1 else pred

	print(f" → Final prediction shape after sigmoid: {pred.shape}")
	sys.stdout.flush()

	inference_time = time.time() - inference_start
	print(f"✓ Inference completed in {inference_time:.1f} seconds")

	del y1, y2, y3, y4
	if model_loader.DEVICE.type == 'cuda' and (torch.cuda.memory_allocated(0) / (1024**3)) > 40:
	torch.cuda.empty_cache()

	print("Post-processing predictions...")
	sys.stdout.flush()

	pred = pred.detach().float().cpu()
	if pred.ndim == 5:
	pred_np = pred[0, 0].numpy()
	elif pred.ndim == 4 and pred.shape[0] == 1:
	pred_np = pred[0].numpy()
	else:
	pred_np = pred.numpy()

	pred_max = pred_np.max()
	pred_min = pred_np.min()
	pred_mean = pred_np.mean()

	if pred_max < 0.3:
	print(f" ⚠ WARNING: Maximum prediction value is very low ({pred_max:.4f}). Model confidence may be low.")
	print(f" This can occur with:")
	print(f" - Low resolution/compressed input (lost texture cues)")
	print(f" - Normalization mismatch (input not properly preprocessed)")
	print(f" - Missing metadata (wrong voxel spacing/scaling)")

	if pred_mean < 0.1:
	print(f" ⚠ WARNING: Average prediction is very low ({pred_mean:.4f}). Most voxels have low confidence.")
	print(f" Consider checking input quality and preprocessing.")

	total_voxels = pred_np.size

	if modality.upper() == "T1":
	candidates = np.linspace(0.60, 0.80, 10)
	frac_min, frac_max = 0.015, 0.040
	else:
	candidates = np.linspace(0.30, 0.70, 9)
	frac_min, frac_max = 0.008, 0.040

	best = None
	for t in candidates:
	mb = (pred_np > t).astype(np.uint8)
	if mb.sum() < 1000:
	continue
	frac = mb.mean()
	if not (frac_min <= frac <= frac_max):
	continue
	lbl, n = ndimage.label(mb)
	if n == 0:
	continue
	sizes = ndimage.sum(mb, lbl, range(1, n + 1))
	score = sizes.max() - (n - 1) * 1e5
	if best is None or score > best[0]:
	best = (score, t, mb)

	if best:
	_, threshold, pred_binary = best
	print(f" Grid search selected threshold: {threshold:.3f}")
	else:
	default_threshold = float(os.environ.get("T1_THRESHOLD", "0.65")) if modality.upper() == "T1" else float(os.environ.get("SEGMENTATION_THRESHOLD", "0.5"))
	threshold = default_threshold
	pred_binary = (pred_np > threshold).astype(np.uint8)
	print(f" Grid search failed, using default threshold: {threshold:.3f}")

	count_default = pred_binary.sum()
	count_035 = (pred_np > 0.35).astype(np.uint8).sum()
	count_03 = (pred_np > 0.3).astype(np.uint8).sum()

	if count_default == 0 and default_threshold >= 0.5:
	if count_035 > 0:
	print(f" ⚠ WARNING: No voxels > {default_threshold}, but {count_035:,} voxels > 0.35. Trying threshold 0.35...")
	threshold = 0.35
	elif count_03 > 0:
	print(f" ⚠ WARNING: No voxels > {default_threshold} or 0.35, but {count_03:,} voxels > 0.3. Trying threshold 0.3...")
	threshold = 0.3
	else:
	percentile_thresh = float(np.quantile(pred_np.flatten(), 0.995))
	threshold = max(0.3, percentile_thresh)
	print(f" ⚠ WARNING: No voxels > {default_threshold}. Using percentile threshold: {threshold:.4f}")
	pred_binary = (pred_np > threshold).astype(np.uint8)
	elif count_default == 0 and default_threshold < 0.5:
	if count_03 > 0:
	print(f" ⚠ WARNING: No voxels > {default_threshold}, but {count_03:,} voxels > 0.3. Trying threshold 0.3...")
	threshold = 0.3
	else:
	percentile_thresh = float(np.quantile(pred_np.flatten(), 0.995))
	threshold = max(0.3, percentile_thresh)
	print(f" ⚠ WARNING: No voxels > {default_threshold}. Using percentile threshold: {threshold:.4f}")
	pred_binary = (pred_np > threshold).astype(np.uint8)

	if modality.upper() == "T1":
	try:
	h, w, d = original_data.shape

	is_prenormalized = (original_data.max() <= 1.0 and original_data.min() >= 0.0)

	if not is_prenormalized:
	right_upper_quadrant = original_data[h//4:3h//4, w//2:, d//3:2d//3]
	if right_upper_quadrant.size > 0 and (right_upper_quadrant > 0).sum() > 0:
	median_intensity = np.median(right_upper_quadrant[right_upper_quadrant > 0])
	mad = np.median(np.abs(right_upper_quadrant[right_upper_quadrant > 0] - median_intensity))
	k = 1.5
	intensity_lower = median_intensity - k * mad
	intensity_upper = median_intensity + k * mad

	if intensity_upper - intensity_lower > 0.5:
	if len(pred_np.shape) == 4:
	pred_3d = pred_np[0, 0]
	elif len(pred_np.shape) == 5:
	pred_3d = pred_np[0, 0, 0]
	else:
	pred_3d = pred_np

	if pred_3d.shape == original_data.shape:
	intensity_mask = (original_data >= intensity_lower) & (original_data <= intensity_upper)
	pred_3d_clamped = pred_3d.copy()
	pred_3d_clamped[~intensity_mask] = np.minimum(pred_3d_clamped[~intensity_mask], threshold * 0.6)

	if pred_3d_clamped.std() > 1e-6:
	if len(pred_np.shape) == 4:
	pred_np[0, 0] = pred_3d_clamped
	elif len(pred_np.shape) == 5:
	pred_np[0, 0, 0] = pred_3d_clamped
	else:
	pred_np = pred_3d_clamped
	print(f" Intensity gate: Clamped predictions outside liver-like intensity range [{intensity_lower:.2f}, {intensity_upper:.2f}]")
	else:
	print(f" Intensity gate: Shape mismatch (pred: {pred_3d.shape}, original: {original_data.shape}). Skipping intensity gate.")
	else:
	left_upper_region = original_data[h//4:3h//4, :w//2, d//3:2d//3]
	if left_upper_region.size > 0 and (left_upper_region > 0).sum() > 100:
	stomach_like_intensity = np.percentile(left_upper_region[left_upper_region > 0], 75)
	if len(pred_np.shape) == 4:
	pred_3d = pred_np[0, 0]
	elif len(pred_np.shape) == 5:
	pred_3d = pred_np[0, 0, 0]
	else:
	pred_3d = pred_np

	if pred_3d.shape == original_data.shape:
	stomach_mask = (original_data > stomach_like_intensity * 0.9) & (original_data < stomach_like_intensity * 1.1)
	pred_3d_clamped = pred_3d.copy()
	pred_3d_clamped[stomach_mask] = np.minimum(pred_3d_clamped[stomach_mask], threshold * 0.5)

	if pred_3d_clamped.std() > 1e-6:
	if len(pred_np.shape) == 4:
	pred_np[0, 0] = pred_3d_clamped
	elif len(pred_np.shape) == 5:
	pred_np[0, 0, 0] = pred_3d_clamped
	else:
	pred_np = pred_3d_clamped
	print(f" Intensity gate: Suppressed stomach-like intensities (threshold: {stomach_like_intensity:.3f})")
	else:
	print(f" Intensity gate: Shape mismatch (pred: {pred_3d.shape}, original: {original_data.shape}). Skipping intensity gate.")
	except Exception as e:
	print(f" Intensity gate failed (non-critical): {e}")

	pred_binary = (pred_np > threshold).astype(np.uint8)

	count_after_thresh = pred_binary.sum()
	fraction = count_after_thresh / total_voxels if total_voxels > 0 else 0.0
	volume_ml = calculate_liver_volume(pred_binary, voxel_spacing)

	print(f" After threshold ({threshold:.3f}): {count_after_thresh:,} voxels ({100*fraction:.2f}%), volume: {volume_ml:.1f} ml")

	if fraction > 0.040 or volume_ml > 2200:
	threshold_max = float(os.environ.get("THRESHOLD_MAX", "0.90"))
	new_threshold = min(threshold_max, threshold + 0.05)
	if new_threshold > threshold:

	print(f" Size-aware auto-tune: Mask too large (fraction={100*fraction:.2f}%, volume={volume_ml:.1f}ml). Increasing threshold {threshold:.3f} -> {new_threshold:.3f}")
	threshold = new_threshold
	pred_binary = (pred_np > threshold).astype(np.uint8)
	count_after_thresh = pred_binary.sum()
	fraction = count_after_thresh / total_voxels if total_voxels > 0 else 0.0
	volume_ml = calculate_liver_volume(pred_binary, voxel_spacing)
	print(f" After auto-tune threshold ({threshold:.3f}): {count_after_thresh:,} voxels ({100*fraction:.2f}%), volume: {volume_ml:.1f} ml")

	if fraction < 0.005 or fraction > 0.040:
	print(f" WARNING: Mask fraction {100*fraction:.3f}% outside typical range (0.5%-4.0%). May indicate segmentation issues.")

	sys.stdout.flush()

	print("Refining liver segmentation mask...")
	sys.stdout.flush()

	pred_np_copy = pred_np.copy()

	try:
	pred_binary_refined, refinement_metrics, confidence_score = refine_liver_mask_enhanced(
	pred_binary, voxel_spacing, pred_np_copy, threshold, modality
	)
	pred_binary = pred_binary_refined
	guards_ok = refinement_metrics.get('guards_ok', True)
	print(f" Refinement applied: {refinement_metrics['connected_components_before']} -> {refinement_metrics['connected_components_after']} components")
	print(f" Volume change: {refinement_metrics['volume_change_percent']:.2f}% ({refinement_metrics['volume_change_ml']:.2f} ml)")
	print(f" Confidence score: {confidence_score:.1f}% {'(guards OK)' if guards_ok else '(guards triggered)'}")
	except Exception as e:
	print(f" Refinement failed (using original mask): {e}")
	import traceback
	traceback.print_exc()
	confidence_score = 50.0
	guards_ok = False

	del pred_np, pred_np_copy

	print("Loading original volume for visualization...")
	original_volume = original_data.copy()
	if original_volume.max() > original_volume.min():
	original_volume = (original_volume - original_volume.min()) / (original_volume.max() - original_volume.min())
	else:
	original_volume = np.zeros_like(original_volume)

	slice_idx = max(0, min(slice_idx if slice_idx is not None else (pred_binary.shape[-1] // 2), pred_binary.shape[-1] - 1))

	if len(original_volume.shape) == 3:
	original_slice = original_volume[:, :, slice_idx]
	else:
	original_slice = original_volume[:, :, slice_idx] if original_volume.shape[2] > slice_idx else original_volume[:, :, 0]

	if len(pred_binary.shape) == 4:
	pred_slice = pred_binary[0, :, :, slice_idx] if pred_binary.shape[3] > slice_idx else pred_binary[0, :, :, 0]
	elif len(pred_binary.shape) == 5:
	pred_slice = pred_binary[0, 0, :, :, slice_idx] if pred_binary.shape[4] > slice_idx else pred_binary[0, 0, :, :, 0]
	else:
	pred_slice = pred_binary[:, :, slice_idx] if pred_binary.shape[2] > slice_idx else pred_binary[:, :, 0]

	if pred_slice.shape != original_slice.shape:
	print(f" Warning: Shape mismatch in overlay (pred_slice: {pred_slice.shape}, original_slice: {original_slice.shape}). Resizing pred_slice to match.")
	from scipy.ndimage import zoom
	zoom_factors = (original_slice.shape[0] / pred_slice.shape[0], original_slice.shape[1] / pred_slice.shape[1])
	pred_slice = zoom(pred_slice, zoom_factors, order=0)

	overlay = np.zeros((*original_slice.shape, 3), dtype=np.uint8)
	overlay[:, :, 0] = (original_slice * 255).astype(np.uint8)
	overlay[:, :, 1] = (original_slice * 255).astype(np.uint8)
	overlay[:, :, 2] = (original_slice * 255).astype(np.uint8)

	mask = pred_slice > 0
	overlay[mask, 0] = 0
	overlay[mask, 1] = 255
	overlay[mask, 2] = 0

	overlay_img = Image.fromarray(overlay)

	print("Saving segmentation mask...")
	aff = nifti_img.affine
	hdr = nifti_img.header.copy()
	hdr.set_data_dtype(np.uint8)

	pred_save = pred_binary[0] if pred_binary.ndim == 4 else pred_binary
	if pred_binary.ndim == 5:
	pred_save = pred_binary[0, 0]

	original_shape = original_data.shape
	pred_shape = pred_save.shape

	if pred_shape != original_shape:
	print(f" Resampling prediction from {pred_shape} to original shape {original_shape}...")
	from scipy.ndimage import zoom
	zoom_factors = tuple(orig / pred for orig, pred in zip(original_shape, pred_shape))
	pred_save = zoom(pred_save.astype(np.float32), zoom_factors, order=0).astype(np.uint8)
	print(f" ✓ Resampled to match original dimensions")

	with tempfile.NamedTemporaryFile(suffix='.nii.gz', delete=False) as tmp_file:
	nifti_pred = nib.Nifti1Image(pred_save.astype(np.uint8), affine=aff, header=hdr)
	nib.save(nifti_pred, tmp_file.name)
	output_path = tmp_file.name

	total_voxels = pred_binary.size
	liver_voxels = pred_binary.sum()
	liver_percentage = (liver_voxels / total_voxels) * 100

	print(f"✓ Segmentation complete. Liver: {liver_voxels:,} voxels ({liver_percentage:.2f}%)")

	volume_ml = calculate_liver_volume(pred_binary, voxel_spacing)

	if len(pred_binary.shape) == 4:
	mask_3d = pred_binary[0]
	elif len(pred_binary.shape) == 5:
	mask_3d = pred_binary[0, 0]
	else:
	mask_3d = pred_binary
	labels_final, num_components_final = ndimage.label(mask_3d)
	morphology = analyze_liver_morphology(pred_binary)

	from processing import check_volume_sanity
	volume_sanity_status, volume_sanity_msg = check_volume_sanity(volume_ml)
	if volume_sanity_status != "OK":
	print(f" ⚠ {volume_sanity_status}: {volume_sanity_msg}")

	if num_components_final != 1:
	print(f" ⚠ WARNING: Expected 1 connected component, found {num_components_final}. Quality check recommended.")

	medical_report = generate_medical_report(
	{
	"volume_shape": list(pred_binary.shape),
	"liver_voxels": int(liver_voxels),
	"total_voxels": int(total_voxels),
	"liver_percentage": float(liver_percentage),
	"slice_index": int(slice_idx),
	"total_slices": int(pred_binary.shape[-1]),
	"modality": modality,
	"confidence_score": float(confidence_score)
	},
	volume_ml,
	morphology,
	modality,
	confidence_score
	)

	info_text = f"""
	## Segmentation Results

	- Volume Shape: {pred_binary.shape}
	- Liver Voxels: {liver_voxels:,} ({liver_percentage:.2f}% of volume)
	- Liver Volume: {volume_ml:.2f} ml
	- Slice: {slice_idx + 1} of {pred_binary.shape[-1]} shown
	- Modality: {modality}
	"""

	severity_text = medical_report['severity'].upper().replace('_', ' ')
	confidence = medical_report['measurements'].get('confidence_score', 0.0)

	report_text = f"""
	## Automated Liver Segmentation Report

	---

	### Study Information
	- Study Date & Time: {medical_report['study_date']}
	- Imaging Modality: {medical_report['modality']}
	- Report Status: {severity_text}
	- Confidence Score: {confidence:.1f}%

	---

	### Key Findings

	{chr(10).join(f"{finding}" for finding in medical_report['findings'])}

	---

	### Quantitative Measurements

	Volume Analysis:
	- Liver Volume: {medical_report['measurements']['liver_volume_ml']} ml ({medical_report['measurements']['liver_volume_liters']} L)
	- Liver Percentage of Scan: {medical_report['measurements']['liver_percentage']}%
	- Segmented Voxels: {medical_report['measurements']['liver_voxels']:,} / {medical_report['measurements']['total_voxels']:,} total voxels

	Morphological Analysis:
	- Connected Components: {medical_report['measurements']['morphology']['connected_components']}
	- Fragmentation Level: {medical_report['measurements']['morphology']['fragmentation'].upper()}
	- Largest Component Ratio: {medical_report['measurements']['morphology']['largest_component_ratio']*100:.1f}%

	Image Characteristics:
	- Volume Dimensions: {medical_report['measurements']['volume_shape']}

	Confidence Metrics:
	- Overall Confidence: {confidence:.1f}%
	- Confidence Interpretation: {'High' if confidence >= 80 else 'Moderate' if confidence >= 60 else 'Low'} confidence segmentation

	---

	### Quality Assessment

	{chr(10).join(f"- {note}" for note in medical_report.get('quality_assessment', []))}

	---

	### Clinical Context

	{chr(10).join(f"- {note}" for note in medical_report.get('clinical_notes', [])) if medical_report.get('clinical_notes') else "- No additional clinical notes at this time."}

	---

	### Impression

	{medical_report['impression']}

	---

	{f"### Recommendations{chr(10)}{chr(10)}{chr(10).join(f'- {rec}' for rec in medical_report['recommendations'])}{chr(10)}" if medical_report['recommendations'] else ""}
	### Methodology

	{medical_report.get('methodology', 'SRMA-Mamba deep learning model for automated liver segmentation')}

	---

	### Important Notice

	{medical_report['disclaimer']}

	---

	Report generated automatically by SRMA-Mamba Liver Segmentation System
	"""

	del original_volume, pred_binary
	if model_loader.DEVICE.type == 'cuda':
	torch.cuda.empty_cache()

	return overlay_img, info_text, report_text, output_path

	except FileNotFoundError as e:
	error_msg = f"Checkpoint Error: {str(e)}\n\nPlease ensure checkpoint files are uploaded to the Space."
	print(f"✗ {error_msg}")
	import traceback
	traceback.print_exc()
	return None, f"## ❌ Error\n\n{error_msg}", "", None
	except RuntimeError as e:
	error_msg = f"Runtime Error: {str(e)}\n\nThis might be a GPU/CUDA issue. The model will try to use CPU instead."
	print(f"✗ {error_msg}")
	import traceback
	traceback.print_exc()
	return None, f"## ❌ Error\n\n{error_msg}", "", None
	except Exception as e:
	error_msg = f"Error during inference: {str(e)}\n\nPlease check the logs for more details."
	print(f"✗ {error_msg}")
	import traceback
	tb = traceback.format_exc()
	print(f"Full traceback:\n{tb}")
	return None, f"## ❌ Error\n\n{error_msg}\n\nError Type: `{type(e).__name__}`", "", None
	finally:
	PROCESSING_LOCK.release()
	try:
	if image_tensor is not None:
	del image_tensor
	except (NameError, UnboundLocalError):
	pass
	try:
	if pred is not None:
	del pred
	except (NameError, UnboundLocalError):
	pass
	if model_loader.DEVICE.type == 'cuda':
	torch.cuda.empty_cache()
	print("✓ Memory cleaned up, lock released")

	def predict_volume_api(file_path: str, modality: str = 'T1', slice_idx: Optional[int] = None):
	confidence_score = 50.0

	try:
	print("=" * 60)
	print(f"API: Starting prediction for modality: {modality}, file: {file_path}")
	print("=" * 60)

	if modality == 'T1':
	if model_loader.MODEL_T1 is None:
	print("API: Loading T1 model_loader...")
	model_loader.MODEL_T1 = model_loader.load_model('T1')
	model_instance = model_loader.MODEL_T1
	else:
	if model_loader.MODEL_T2 is None:
	print("API: Loading T2 model_loader...")
	model_loader.MODEL_T2 = model_loader.load_model('T2')
	model_instance = model_loader.MODEL_T2

	print("API: Preprocessing NIfTI file...")
	from pathlib import Path
	file_path = str(Path(file_path).resolve())
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"File not found: {file_path}")

	nifti_img = nib.load(file_path)
	nifti_img = nib.as_closest_canonical(nifti_img)
	voxel_spacing = nifti_img.header.get_zooms()[:3] if len(nifti_img.header.get_zooms()) >= 3 else (1.0, 1.0, 1.0)
	aff = nifti_img.affine
	hdr = nifti_img.header.copy()
	original_data = nifti_img.get_fdata(dtype=np.float32)

	if model_loader.DEVICE.type == 'cuda':
	if not torch.cuda.is_available():
	raise RuntimeError("CUDA not available but device is set to CUDA")
	torch.cuda.empty_cache()
	free_mem = (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / (1024**3)
	if free_mem < 0.5:
	raise RuntimeError(f"Insufficient GPU memory: {free_mem:.2f} GB free. Please restart.")

	image_data = preprocess_nifti(file_path, device=model_loader.DEVICE)
	if len(image_data.shape) == 3:
	image_tensor = image_data.unsqueeze(0).unsqueeze(0)
	elif len(image_data.shape) == 4:
	image_tensor = image_data.unsqueeze(0)
	else:
	image_tensor = image_data
	print(f"API: Input tensor shape: {image_tensor.shape}")

	if model_loader.WINDOW_INFER is not None:
	print("API: Adjusting ROI size based on volume dimensions...")
	adjust_roi_for_volume(image_tensor.shape)

	print("API: Running inference...")

	if model_loader.DEVICE.type == 'cuda':
	allocated = torch.cuda.memory_allocated(0) / (1024**3)
	free_memory_gb = (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / (1024**3)
	print(f"API: Pre-inference GPU memory: {allocated:.2f} GB allocated, {free_memory_gb:.2f} GB free")

	if free_memory_gb < 0.5:
	print(f"API: ⚠ WARNING: Very low free memory ({free_memory_gb:.2f} GB). Adjusting settings...")
	if model_loader.WINDOW_INFER is not None:
	model_loader.WINDOW_INFER.sw_batch_size = 1
	model_loader.WINDOW_INFER.roi_size = [192, 192, 32]
	model_loader.WINDOW_INFER.overlap = 0.25

	torch.cuda.empty_cache()
	free_memory_gb = (torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / (1024**3)
	print(f"API: → After cache clear: {free_memory_gb:.2f} GB free")

	if free_memory_gb < 0.3:
	error_msg = f"API: Insufficient GPU memory ({free_memory_gb:.2f} GB free). Please use a smaller input volume."
	print(f"API: ✗ {error_msg}")
	raise RuntimeError(error_msg)

	with torch.no_grad():
	if model_loader.DEVICE.type == 'cuda':
	print("API: AMP (Automatic Mixed Precision) enabled: YES")
	sys.stdout.flush()
	try:
	with autocast(device_type='cuda'):
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)
	except RuntimeError as e:
	if "out of memory" in str(e):
	print(f"API: ⚠ GPU OOM error: {e}")
	print("API: → Clearing cache and retrying with minimal settings...")
	torch.cuda.empty_cache()
	torch.cuda.synchronize()
	original_batch = model_loader.WINDOW_INFER.sw_batch_size
	original_roi = model_loader.WINDOW_INFER.roi_size
	original_overlap = model_loader.WINDOW_INFER.overlap
	model_loader.WINDOW_INFER.sw_batch_size = 1
	model_loader.WINDOW_INFER.roi_size = [192, 192, 32]
	model_loader.WINDOW_INFER.overlap = 0.25
	try:
	with autocast(device_type='cuda'):
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)
	print("API: ✓ Retry successful with minimal settings")
	except RuntimeError as e2:
	model_loader.WINDOW_INFER.sw_batch_size = original_batch
	model_loader.WINDOW_INFER.roi_size = original_roi
	model_loader.WINDOW_INFER.overlap = original_overlap
	allocated = torch.cuda.memory_allocated(0) / (1024**3)
	total_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
	free_memory_gb = (total_gb - allocated)
	error_msg = f"GPU out of memory even with minimal settings. GPU is {allocated:.2f} GB / {total_gb:.2f} GB ({100*allocated/total_gb:.1f}% full), only {free_memory_gb:.2f} GB free. The model requires CUDA. Please restart the Space or use a smaller input volume. Error: {e2}"
	print(f"API: ✗ {error_msg}")
	raise RuntimeError(error_msg)
	finally:
	model_loader.WINDOW_INFER.sw_batch_size = original_batch
	model_loader.WINDOW_INFER.roi_size = original_roi
	model_loader.WINDOW_INFER.overlap = original_overlap
	else:
	raise
	else:
	print("API: AMP: Not available on CPU")
	y1, y2, y3, y4 = model_loader.WINDOW_INFER(image_tensor, model_instance)

	print(f"API: Raw model output shapes: y1={y1.shape if hasattr(y1, 'shape') else 'N/A'}")
	sys.stdout.flush()

	pred = y1
	if isinstance(pred, torch.Tensor):
	raw_min = pred.min().item()
	raw_max = pred.max().item()
	raw_mean = pred.mean().item()
	print(f"API: Raw logits (before sigmoid): min={raw_min:.6f}, max={raw_max:.6f}, mean={raw_mean:.6f}")
	sys.stdout.flush()

	print("API: Applying sigmoid activation (confirming: using sigmoid, not softmax)")
	sys.stdout.flush()
	pred = torch.sigmoid(pred)

	if len(pred.shape) == 5:
	if pred.shape[1] > 1:
	print(f"API: ⚠ WARNING: Multi-channel output detected (C={pred.shape[1]}). Using channel 0 for binary segmentation.")
	pred = pred[0, 0]
	else:
	pred = pred[0, 0]
	elif len(pred.shape) == 4:
	pred = pred[0] if pred.shape[0] == 1 else pred

	print(f"API: Final prediction shape after sigmoid: {pred.shape}")
	sys.stdout.flush()

	print("API: Post-processing predictions...")
	sys.stdout.flush()

	pred_np = pred.detach().cpu().numpy()
	print("=" * 60)
	print("API: DIAGNOSTIC: SIGMOID OUTPUT STATS (before thresholding)")
	print("=" * 60)
	print(f" Max: {pred_np.max():.6f}")
	print(f" Mean: {pred_np.mean():.6f}")
	print(f" Min: {pred_np.min():.6f}")
	print(f" Std: {pred_np.std():.6f}")
	print(f" Shape: {pred_np.shape}")
	sys.stdout.flush()

	default_threshold = float(os.environ.get("SEGMENTATION_THRESHOLD", "0.5"))
	count_035 = np.sum(pred_np > 0.35)
	count_03 = np.sum(pred_np > 0.3)
	count_05 = np.sum(pred_np > 0.5)
	total_voxels = pred_np.size

	threshold = default_threshold

	if count_05 == 0 and default_threshold >= 0.5:
	if count_035 > 0:
	print(f"API: ⚠ WARNING: No voxels > 0.5, but {count_035:,} voxels > 0.35. Trying threshold 0.35...")
	threshold = 0.35
	elif count_03 > 0:
	print(f"API: ⚠ WARNING: No voxels > 0.5 or 0.35, but {count_03:,} voxels > 0.3. Trying threshold 0.3...")
	threshold = 0.3
	else:
	percentile_thresh = float(np.quantile(pred_np.flatten(), 0.995))
	threshold = max(0.3, percentile_thresh)
	print(f"API: ⚠ WARNING: No voxels > 0.5. Using percentile threshold: {threshold:.4f}")
	elif count_035 == 0 and default_threshold < 0.5:
	if count_03 > 0:
	print(f"API: ⚠ WARNING: No voxels > {default_threshold}, but {count_03:,} voxels > 0.3. Trying threshold 0.3...")
	threshold = 0.3
	elif count_05 == 0:
	percentile_thresh = float(np.quantile(pred_np.flatten(), 0.995))
	threshold = max(0.3, percentile_thresh)
	print(f"API: ⚠ WARNING: No voxels > {default_threshold}. Using percentile threshold: {threshold:.4f}")

	pred_binary = (pred_np > threshold).astype(np.uint8)

	fraction = pred_binary.sum() / total_voxels if total_voxels > 0 else 0.0
	if fraction < 0.005 or fraction > 0.040:
	print(f"API: ⚠ WARNING: Mask fraction {100*fraction:.3f}% outside typical range (0.5%-4.0%). May indicate segmentation issues.")

	count_after_thresh = pred_binary.sum()
	print(f"API: After threshold ({threshold}): {count_after_thresh:,} voxels ({100*count_after_thresh/total_voxels:.2f}%)")
	if count_05 > 0 and count_after_thresh == 0 and threshold == 0.5:
	print(f"API: ⚠ WARNING: Had {count_05:,} voxels > 0.5 but 0 after thresholding - check threshold logic!")
	sys.stdout.flush()

	print("API: Refining liver segmentation mask...")

	try:
	pred_np_copy = pred_np.copy()
	pred_binary_refined, refinement_metrics, confidence_score = refine_liver_mask_enhanced(
	pred_binary, voxel_spacing, pred_np_copy, threshold, modality
	)
	pred_binary = pred_binary_refined
	print(f"API: ✓ Refinement applied: {refinement_metrics['connected_components_before']} → {refinement_metrics['connected_components_after']} components")
	print(f"API: → Volume change: {refinement_metrics['volume_change_percent']:.2f}% ({refinement_metrics['volume_change_ml']:.2f} ml)")
	print(f"API: → Confidence score: {confidence_score:.1f}%")
	except Exception as e:
	print(f"API: ⚠ Refinement failed (using original mask): {e}")
	confidence_score = 50.0

	print("API: Loading original volume for visualization...")
	original_volume = original_data.copy()
	if original_volume.max() > original_volume.min():
	original_volume = (original_volume - original_volume.min()) / (original_volume.max() - original_volume.min())
	else:
	original_volume = np.zeros_like(original_volume)

	slice_idx = max(0, min(slice_idx if slice_idx is not None else (pred_binary.shape[-1] // 2), pred_binary.shape[-1] - 1))

	if len(original_volume.shape) == 3:
	original_slice = original_volume[:, :, slice_idx]
	else:
	original_slice = original_volume[:, :, slice_idx] if original_volume.shape[2] > slice_idx else original_volume[:, :, 0]

	if len(pred_binary.shape) == 4:
	pred_slice = pred_binary[0, :, :, slice_idx] if pred_binary.shape[3] > slice_idx else pred_binary[0, :, :, 0]
	elif len(pred_binary.shape) == 5:
	pred_slice = pred_binary[0, 0, :, :, slice_idx] if pred_binary.shape[4] > slice_idx else pred_binary[0, 0, :, :, 0]
	else:
	pred_slice = pred_binary[:, :, slice_idx] if pred_binary.shape[2] > slice_idx else pred_binary[:, :, 0]

	if pred_slice.shape != original_slice.shape:
	print(f"API: Warning: Shape mismatch in overlay (pred_slice: {pred_slice.shape}, original_slice: {original_slice.shape}). Resizing pred_slice to match.")
	from scipy.ndimage import zoom
	zoom_factors = (original_slice.shape[0] / pred_slice.shape[0], original_slice.shape[1] / pred_slice.shape[1])
	pred_slice = zoom(pred_slice, zoom_factors, order=0)

	overlay = np.zeros((*original_slice.shape, 3), dtype=np.uint8)
	overlay[:, :, 0] = (original_slice * 255).astype(np.uint8)
	overlay[:, :, 1] = (original_slice * 255).astype(np.uint8)
	overlay[:, :, 2] = (original_slice * 255).astype(np.uint8)

	mask = pred_slice > 0
	overlay[mask, 0] = 0
	overlay[mask, 1] = 255
	overlay[mask, 2] = 0

	overlay_img = Image.fromarray(overlay)

	img_buffer = io.BytesIO()
	overlay_img.save(img_buffer, format='PNG')
	img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')

	print("API: Saving segmentation mask...")
	aff = nifti_img.affine
	hdr = nifti_img.header.copy()
	hdr.set_data_dtype(np.uint8)

	pred_save = pred_binary[0] if pred_binary.ndim == 4 else pred_binary
	if pred_binary.ndim == 5:
	pred_save = pred_binary[0, 0]

	original_shape = original_data.shape
	pred_shape = pred_save.shape

	if pred_shape != original_shape:
	print(f"API: Resampling prediction from {pred_shape} to original shape {original_shape}...")
	from scipy.ndimage import zoom
	zoom_factors = tuple(orig / pred for orig, pred in zip(original_shape, pred_shape))
	pred_save = zoom(pred_save.astype(np.float32), zoom_factors, order=0).astype(np.uint8)
	print(f"API: ✓ Resampled to match original dimensions")

	with tempfile.NamedTemporaryFile(suffix='.nii.gz', delete=False) as tmp_file:
	nifti_pred = nib.Nifti1Image(pred_save.astype(np.uint8), affine=aff, header=hdr)
	nib.save(nifti_pred, tmp_file.name)
	output_path = tmp_file.name

	total_voxels = pred_binary.size
	liver_voxels = pred_binary.sum()
	liver_percentage = (liver_voxels / total_voxels) * 100

	print(f"API: Calculating volume and morphology...")
	volume_ml = calculate_liver_volume(pred_binary, voxel_spacing)

	if len(pred_binary.shape) == 4:
	mask_3d = pred_binary[0]
	elif len(pred_binary.shape) == 5:
	mask_3d = pred_binary[0, 0]
	else:
	mask_3d = pred_binary
	labels_final, num_components_final = ndimage.label(mask_3d)
	morphology = analyze_liver_morphology(pred_binary)

	from processing import check_volume_sanity
	volume_sanity_status, volume_sanity_msg = check_volume_sanity(volume_ml)
	if volume_sanity_status != "OK":
	print(f"API: ⚠ {volume_sanity_status}: {volume_sanity_msg}")

	if num_components_final != 1:
	print(f"API: ⚠ WARNING: Expected 1 connected component, found {num_components_final}. Quality check recommended.")

	medical_report = generate_medical_report(
	{
	"volume_shape": list(pred_binary.shape),
	"liver_voxels": int(liver_voxels),
	"total_voxels": int(total_voxels),
	"liver_percentage": float(liver_percentage),
	"slice_index": int(slice_idx),
	"total_slices": int(pred_binary.shape[-1]),
	"modality": modality,
	"confidence_score": float(confidence_score)
	},
	volume_ml,
	morphology,
	modality,
	confidence_score
	)

	print(f"API: ✓ Segmentation complete. Liver: {liver_voxels:,} voxels ({liver_percentage:.2f}%), Volume: {volume_ml:.2f} ml")

	return {
	"success": True,
	"overlay_image": f"data:image/png;base64,{img_base64}",
	"segmentation_path": output_path,
	"statistics": {
	"volume_shape": list(pred_binary.shape),
	"liver_voxels": int(liver_voxels),
	"total_voxels": int(total_voxels),
	"liver_percentage": float(liver_percentage),
	"slice_index": int(slice_idx),
	"total_slices": int(pred_binary.shape[-1]),
	"modality": modality,
	"liver_volume_ml": round(volume_ml, 2)
	},
	"medical_report": medical_report
	}

	except FileNotFoundError as e:
	error_msg = f"Checkpoint file not found: {str(e)}"
	print(error_msg)
	import traceback
	traceback.print_exc()
	return {
	"success": False,
	"error": error_msg,
	"error_type": "checkpoint_not_found"
	}
	except RuntimeError as e:
	error_msg = f"Runtime error (possibly CUDA/GPU): {str(e)}"
	print(error_msg)
	import traceback
	traceback.print_exc()
	return {
	"success": False,
	"error": error_msg,
	"error_type": "runtime_error"
	}
	except Exception as e:
	error_msg = f"Error during inference: {str(e)}"
	print(error_msg)
	import traceback
	tb = traceback.format_exc()
	print(f"Full traceback: {tb}")
	return {
	"success": False,
	"error": error_msg,
	"error_type": type(e).__name__
	}

	def safe_predict_volume(nifti_file, modality, slice_idx=None):
	try:
	result = predict_volume(nifti_file, modality, slice_idx)
	if result is None or len(result) != 4:
	return None, "## ❌ Error\n\nUnexpected return value from prediction function.", "", None
	overlay_img, info_text, report_text, output_path = result

	if overlay_img is None and info_text and "Error" in info_text:
	return None, info_text, report_text or "", output_path or None

	return overlay_img, info_text, report_text, output_path
	except Exception as e:
	error_msg = f"Unexpected error: {str(e)}\n\nPlease check the logs for details."
	print(f"✗ Safe wrapper caught error: {error_msg}")
	import traceback
	traceback.print_exc()
	return None, f"## ❌ Error\n\n{error_msg}", "", None