FFDNET-L / handler.py
ogamaniuk's picture
Create handler.py
6322921 verified
from typing import Dict, List, Any
from ultralytics import YOLO
import base64
from io import BytesIO
from PIL import Image
class EndpointHandler:
def __init__(self, path=""):
# Load the YOLO model
self.model = YOLO(f"{path}/FFDNet-L.pt")
self.id_to_cls = {0: "TextBox", 1: "ChoiceButton", 2: "Signature"}
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Args:
data: A dictionary containing:
- "inputs": base64 encoded image or image URL
- "parameters": optional dict with confidence, iou, imgsz
Returns:
List of predictions with bounding boxes and classes
"""
# Extract image from request
inputs = data.pop("inputs", data)
parameters = data.pop("parameters", {})
# Handle image input (base64 or URL)
if isinstance(inputs, str):
if inputs.startswith("http"):
image = inputs
else:
# Decode base64
image_data = base64.b64decode(inputs)
image = Image.open(BytesIO(image_data))
else:
image = inputs
# Get parameters with defaults
confidence = parameters.get("conf", 0.3)
iou = parameters.get("iou", 0.1)
imgsz = parameters.get("imgsz", 1600)
augment = parameters.get("augment", True)
# Run inference
results = self.model.predict(
image,
conf=confidence,
iou=iou,
imgsz=imgsz,
augment=augment
)
# Format results
predictions = []
for result in results:
if result.boxes is not None:
for box in result.boxes.cpu().numpy():
x, y, w, h = box.xywhn[0]
cls_id = int(box.cls.item())
predictions.append({
"widget_type": self.id_to_cls[cls_id],
"confidence": float(box.conf[0]),
"bounding_box": {
"cx": float(x),
"cy": float(y),
"w": float(w),
"h": float(h)
}
})
return predictions