Spaces:

BiasLab2025
/

Demo-2025

Sleeping

App Files Files Community

zye0616 commited on 20 days ago

Commit

2ab6e0a

1 Parent(s): bfe562f

initial commit

Browse files

Files changed (6) hide show

models/model_loader.py +20 -0
utils/.gitignore +6 -0
utils/app.py +89 -0
utils/inference.py +69 -0
utils/requirements.txt +9 -0
utils/video.py +41 -0

models/model_loader.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import logging
+from typing import Tuple
+import torch
+from transformers import Owlv2ForObjectDetection, Owlv2Processor
+MODEL_NAME = "google/owlv2-large-patch14"
+_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+logging.info("Loading %s onto %s", MODEL_NAME, _DEVICE)
+_PROCESSOR = Owlv2Processor.from_pretrained(MODEL_NAME)
+torch_dtype = torch.float16 if _DEVICE.type == "cuda" else torch.float32
+_MODEL = Owlv2ForObjectDetection.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype)
+_MODEL.to(_DEVICE)
+_MODEL.eval()
+def load_model() -> Tuple[Owlv2Processor, Owlv2ForObjectDetection, torch.device]:
+    """Expose processor/model singletons so the API never reloads weights."""
+    return _PROCESSOR, _MODEL, _DEVICE

utils/.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+__pycache__/
+.venv/
+*.mp4
+*.log
+*.tmp
+.DS_Store

utils/app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import logging
+import os
+import tempfile
+from pathlib import Path
+from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
+from fastapi.responses import FileResponse, JSONResponse
+import uvicorn
+from inference import run_inference
+logging.basicConfig(level=logging.INFO)
+app = FastAPI(title="Video Processing Backend")
+def _save_upload_to_tmp(upload: UploadFile) -> str:
+    suffix = Path(upload.filename or "upload.mp4").suffix or ".mp4"
+    fd, path = tempfile.mkstemp(prefix="input_", suffix=suffix, dir="/tmp")
+    os.close(fd)
+    with open(path, "wb") as buffer:
+        data = upload.file.read()
+        buffer.write(data)
+    return path
+def _safe_delete(path: str) -> None:
+    try:
+        os.remove(path)
+    except FileNotFoundError:
+        return
+    except Exception:
+        logging.exception("Failed to remove temporary file: %s", path)
+def _schedule_cleanup(background_tasks: BackgroundTasks, path: str) -> None:
+    def _cleanup(target: str = path) -> None:
+        _safe_delete(target)
+    background_tasks.add_task(_cleanup)
+@app.post("/process_video")
+async def process_video(
+    background_tasks: BackgroundTasks,
+    video: UploadFile = File(...),
+    prompt: str = Form(...),
+):
+    if video is None:
+        raise HTTPException(status_code=400, detail="Video file is required.")
+    if not prompt:
+        raise HTTPException(status_code=400, detail="Prompt is required.")
+    try:
+        input_path = _save_upload_to_tmp(video)
+    except Exception:
+        logging.exception("Failed to save uploaded file.")
+        raise HTTPException(status_code=500, detail="Failed to save uploaded video.")
+    finally:
+        await video.close()
+    fd, output_path = tempfile.mkstemp(prefix="output_", suffix=".mp4", dir="/tmp")
+    os.close(fd)
+    try:
+        run_inference(input_path, output_path, prompt, max_frames=10)
+    except ValueError as exc:
+        logging.exception("Video decoding failed.")
+        _safe_delete(input_path)
+        _safe_delete(output_path)
+        raise HTTPException(status_code=500, detail=str(exc))
+    except Exception as exc:
+        logging.exception("Inference failed.")
+        _safe_delete(input_path)
+        _safe_delete(output_path)
+        return JSONResponse(status_code=500, content={"error": str(exc)})
+    _schedule_cleanup(background_tasks, input_path)
+    _schedule_cleanup(background_tasks, output_path)
+    return FileResponse(
+        path=output_path,
+        media_type="video/mp4",
+        filename="processed.mp4",
+    )
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

utils/inference.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import logging
+from typing import List, Optional
+import cv2
+import numpy as np
+import torch
+from models.model_loader import load_model
+from utils.video import extract_frames, write_video
+def draw_boxes(frame: np.ndarray, boxes: np.ndarray) -> np.ndarray:
+    output = frame.copy()
+    if boxes is None:
+        return output
+    for box in boxes:
+        x1, y1, x2, y2 = [int(coord) for coord in box]
+        cv2.rectangle(output, (x1, y1), (x2, y2), (0, 255, 0), thickness=2)
+    return output
+def infer_frame(frame: np.ndarray, prompt: str) -> np.ndarray:
+    processor, model, device = load_model()
+    try:
+        inputs = processor(text=[prompt], images=frame, return_tensors="pt")
+        if hasattr(inputs, "to"):
+            inputs = inputs.to(device)
+        else:
+            inputs = {k: v.to(device) if hasattr(v, "to") else v for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)
+        results = processor.post_process_object_detection(
+            outputs,
+            threshold=0.3,
+            target_sizes=[frame.shape[:2]],
+        )[0]
+        boxes = results["boxes"]
+        if hasattr(boxes, "cpu"):
+            boxes_np = boxes.cpu().numpy()
+        else:
+            boxes_np = np.asarray(boxes)
+    except Exception:
+        logging.exception("Inference failed for prompt '%s'", prompt)
+        raise
+    return draw_boxes(frame, boxes_np)
+def run_inference(
+    input_video_path: str,
+    output_video_path: str,
+    prompt: str,
+    max_frames: Optional[int] = None,
+) -> str:
+    try:
+        frames, fps, width, height = extract_frames(input_video_path)
+    except ValueError as exc:
+        logging.exception("Failed to decode video at %s", input_video_path)
+        raise
+    processed_frames: List[np.ndarray] = []
+    for idx, frame in enumerate(frames):
+        if max_frames is not None and idx >= max_frames:
+            break
+        logging.debug("Processing frame %d", idx)
+        processed_frame = infer_frame(frame, prompt)
+        processed_frames.append(processed_frame)
+    write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
+    return output_video_path

utils/requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+torch
+transformers
+opencv-python
+python-multipart
+accelerate
+pillow
+scipy

utils/video.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import List, Tuple
+import cv2
+import numpy as np
+def extract_frames(video_path: str) -> Tuple[List[np.ndarray], float, int, int]:
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise ValueError("Unable to open video.")
+    fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    frames: List[np.ndarray] = []
+    success, frame = cap.read()
+    while success:
+        frames.append(frame)
+        success, frame = cap.read()
+    cap.release()
+    if not frames:
+        raise ValueError("Video decode produced zero frames.")
+    return frames, fps, width, height
+def write_video(frames: List[np.ndarray], output_path: str, fps: float, width: int, height: int) -> None:
+    if not frames:
+        raise ValueError("No frames available for writing.")
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(output_path, fourcc, fps or 1.0, (width, height))
+    if not writer.isOpened():
+        raise ValueError("Failed to open VideoWriter.")
+    for frame in frames:
+        writer.write(frame)
+    writer.release()