File size: 2,398 Bytes
from typing import Dict, List, Any
from ultralytics import YOLO
import base64
from io import BytesIO
from PIL import Image

class EndpointHandler:
    def __init__(self, path=""):
        # Load the YOLO model
        self.model = YOLO(f"{path}/FFDNet-L.pt")
        self.id_to_cls = {0: "TextBox", 1: "ChoiceButton", 2: "Signature"}
    
    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Args:
            data: A dictionary containing:
                - "inputs": base64 encoded image or image URL
                - "parameters": optional dict with confidence, iou, imgsz
        Returns:
            List of predictions with bounding boxes and classes
        """
        # Extract image from request
        inputs = data.pop("inputs", data)
        parameters = data.pop("parameters", {})
        
        # Handle image input (base64 or URL)
        if isinstance(inputs, str):
            if inputs.startswith("http"):
                image = inputs
            else:
                # Decode base64
                image_data = base64.b64decode(inputs)
                image = Image.open(BytesIO(image_data))
        else:
            image = inputs
        
        # Get parameters with defaults
        confidence = parameters.get("conf", 0.3)
        iou = parameters.get("iou", 0.1)
        imgsz = parameters.get("imgsz", 1600)
        augment = parameters.get("augment", True)
        
        # Run inference
        results = self.model.predict(
            image,
            conf=confidence,
            iou=iou,
            imgsz=imgsz,
            augment=augment
        )
        
        # Format results
        predictions = []
        for result in results:
            if result.boxes is not None:
                for box in result.boxes.cpu().numpy():
                    x, y, w, h = box.xywhn[0]
                    cls_id = int(box.cls.item())
                    
                    predictions.append({
                        "widget_type": self.id_to_cls[cls_id],
                        "confidence": float(box.conf[0]),
                        "bounding_box": {
                            "cx": float(x),
                            "cy": float(y),
                            "w": float(w),
                            "h": float(h)
                        }
                    })
        
        return predictions