|
|
from typing import Dict, List, Any |
|
|
from ultralytics import YOLO |
|
|
import base64 |
|
|
from io import BytesIO |
|
|
from PIL import Image |
|
|
|
|
|
class EndpointHandler: |
|
|
def __init__(self, path=""): |
|
|
|
|
|
self.model = YOLO(f"{path}/FFDNet-L.pt") |
|
|
self.id_to_cls = {0: "TextBox", 1: "ChoiceButton", 2: "Signature"} |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Args: |
|
|
data: A dictionary containing: |
|
|
- "inputs": base64 encoded image or image URL |
|
|
- "parameters": optional dict with confidence, iou, imgsz |
|
|
Returns: |
|
|
List of predictions with bounding boxes and classes |
|
|
""" |
|
|
|
|
|
inputs = data.pop("inputs", data) |
|
|
parameters = data.pop("parameters", {}) |
|
|
|
|
|
|
|
|
if isinstance(inputs, str): |
|
|
if inputs.startswith("http"): |
|
|
image = inputs |
|
|
else: |
|
|
|
|
|
image_data = base64.b64decode(inputs) |
|
|
image = Image.open(BytesIO(image_data)) |
|
|
else: |
|
|
image = inputs |
|
|
|
|
|
|
|
|
confidence = parameters.get("conf", 0.3) |
|
|
iou = parameters.get("iou", 0.1) |
|
|
imgsz = parameters.get("imgsz", 1600) |
|
|
augment = parameters.get("augment", True) |
|
|
|
|
|
|
|
|
results = self.model.predict( |
|
|
image, |
|
|
conf=confidence, |
|
|
iou=iou, |
|
|
imgsz=imgsz, |
|
|
augment=augment |
|
|
) |
|
|
|
|
|
|
|
|
predictions = [] |
|
|
for result in results: |
|
|
if result.boxes is not None: |
|
|
for box in result.boxes.cpu().numpy(): |
|
|
x, y, w, h = box.xywhn[0] |
|
|
cls_id = int(box.cls.item()) |
|
|
|
|
|
predictions.append({ |
|
|
"widget_type": self.id_to_cls[cls_id], |
|
|
"confidence": float(box.conf[0]), |
|
|
"bounding_box": { |
|
|
"cx": float(x), |
|
|
"cy": float(y), |
|
|
"w": float(w), |
|
|
"h": float(h) |
|
|
} |
|
|
}) |
|
|
|
|
|
return predictions |