File size: 2,398 Bytes
6322921 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
from typing import Dict, List, Any
from ultralytics import YOLO
import base64
from io import BytesIO
from PIL import Image
class EndpointHandler:
def __init__(self, path=""):
# Load the YOLO model
self.model = YOLO(f"{path}/FFDNet-L.pt")
self.id_to_cls = {0: "TextBox", 1: "ChoiceButton", 2: "Signature"}
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Args:
data: A dictionary containing:
- "inputs": base64 encoded image or image URL
- "parameters": optional dict with confidence, iou, imgsz
Returns:
List of predictions with bounding boxes and classes
"""
# Extract image from request
inputs = data.pop("inputs", data)
parameters = data.pop("parameters", {})
# Handle image input (base64 or URL)
if isinstance(inputs, str):
if inputs.startswith("http"):
image = inputs
else:
# Decode base64
image_data = base64.b64decode(inputs)
image = Image.open(BytesIO(image_data))
else:
image = inputs
# Get parameters with defaults
confidence = parameters.get("conf", 0.3)
iou = parameters.get("iou", 0.1)
imgsz = parameters.get("imgsz", 1600)
augment = parameters.get("augment", True)
# Run inference
results = self.model.predict(
image,
conf=confidence,
iou=iou,
imgsz=imgsz,
augment=augment
)
# Format results
predictions = []
for result in results:
if result.boxes is not None:
for box in result.boxes.cpu().numpy():
x, y, w, h = box.xywhn[0]
cls_id = int(box.cls.item())
predictions.append({
"widget_type": self.id_to_cls[cls_id],
"confidence": float(box.conf[0]),
"bounding_box": {
"cx": float(x),
"cy": float(y),
"w": float(w),
"h": float(h)
}
})
return predictions |