blender-mask-peoples/server/detector.py

"""
YOLOv8 Face Detector using PyTorch with ROCm support.

This module provides high-performance face detection using
YOLOv8-face model with AMD GPU (ROCm) acceleration.
"""

import os
from typing import List, Tuple, Optional
from pathlib import Path
import numpy as np


class YOLOFaceDetector:
    """
    YOLOv8 face detector with PyTorch ROCm support.

    Features:
    - ROCm GPU acceleration for AMD GPUs
    - High accuracy face detection
    - Automatic NMS for overlapping detections
    """

    # Default model path relative to this file
    DEFAULT_MODEL = "yolov8n-face-lindevs.pt"

    def __init__(
        self,
        model_path: Optional[str] = None,
        conf_threshold: float = 0.25,
        iou_threshold: float = 0.45,
        input_size: Tuple[int, int] = (640, 640),
    ):
        """
        Initialize the YOLO face detector.

        Args:
            model_path: Path to PyTorch model file. If None, uses default model.
            conf_threshold: Confidence threshold for detections
            iou_threshold: IoU threshold for NMS
            input_size: Model input size (width, height)
        """
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold
        self.input_size = input_size
        self._model = None
        self._model_path = model_path
        self._device = None

    @property
    def model(self):
        """Lazy-load YOLO model."""
        if self._model is None:
            from ultralytics import YOLO
            import torch

            # Determine model path
            if self._model_path is None:
                # Assuming models are in ../models relative to server/detector.py
                models_dir = Path(__file__).parent.parent / "models"
                model_path = str(models_dir / self.DEFAULT_MODEL)
            else:
                model_path = self._model_path

            if not os.path.exists(model_path):
                raise FileNotFoundError(f"Model not found: {model_path}")

            # Detect device (ROCm GPU or CPU)
            if torch.cuda.is_available():
                self._device = 'cuda'
                device_name = torch.cuda.get_device_name(0)
                print(f"[FaceMask] Using ROCm GPU for inference: {device_name}")
            else:
                self._device = 'cpu'
                print("[FaceMask] Using CPU for inference (ROCm GPU not available)")

            # Load model (let Ultralytics handle device management)
            try:
                self._model = YOLO(model_path)
                # Don't call .to() - let predict() handle device assignment
                print(f"[FaceMask] Model loaded, will use device: {self._device}")
            except Exception as e:
                print(f"[FaceMask] Error loading model: {e}")
                import traceback
                traceback.print_exc()
                raise

            print(f"[FaceMask] YOLO model loaded: {model_path}")
            print(f"[FaceMask] Device: {self._device}")

        return self._model

    def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
        """
        Detect faces in a frame.

        Args:
            frame: BGR image as numpy array (H, W, C)

        Returns:
            List of detections as (x, y, width, height, confidence)
        """
        # Run inference
        import torch
        print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
        try:
            results = self.model.predict(
                frame,
                conf=self.conf_threshold,
                iou=self.iou_threshold,
                imgsz=self.input_size[0],
                verbose=False,
                device=self._device,
            )
        except Exception as e:
            print(f"[FaceMask] ERROR during inference: {e}")
            import traceback
            traceback.print_exc()
            # Fallback to CPU
            print("[FaceMask] Falling back to CPU inference...")
            self._device = 'cpu'
            results = self.model.predict(
                frame,
                conf=self.conf_threshold,
                iou=self.iou_threshold,
                imgsz=self.input_size[0],
                verbose=False,
                device='cpu',
            )

        # Extract detections
        detections = []
        if len(results) > 0 and results[0].boxes is not None:
            boxes = results[0].boxes
            for box in boxes:
                # Get coordinates in xyxy format
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                conf = float(box.conf[0].cpu().numpy())

                # Convert to x, y, width, height
                x = int(x1)
                y = int(y1)
                w = int(x2 - x1)
                h = int(y2 - y1)

                detections.append((x, y, w, h, conf))

        return detections

    def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
        """
        Detect faces in multiple frames at once (batch processing).

        Args:
            frames: List of BGR images as numpy arrays (H, W, C)

        Returns:
            List of detection lists, one per frame.
            Each detection: (x, y, width, height, confidence)
        """
        if not frames:
            return []

        # Run batch inference
        try:
            results = self.model.predict(
                frames,
                conf=self.conf_threshold,
                iou=self.iou_threshold,
                imgsz=self.input_size[0],
                verbose=False,
                device=self._device,
            )
        except Exception as e:
            print(f"[FaceMask] ERROR during batch inference: {e}")
            import traceback
            traceback.print_exc()
            # Fallback to CPU
            print("[FaceMask] Falling back to CPU inference...")
            self._device = 'cpu'
            results = self.model.predict(
                frames,
                conf=self.conf_threshold,
                iou=self.iou_threshold,
                imgsz=self.input_size[0],
                verbose=False,
                device='cpu',
            )

        # Extract detections for each frame
        all_detections = []
        for result in results:
            detections = []
            if result.boxes is not None:
                boxes = result.boxes
                for box in boxes:
                    # Get coordinates in xyxy format
                    x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                    conf = float(box.conf[0].cpu().numpy())

                    # Convert to x, y, width, height
                    x = int(x1)
                    y = int(y1)
                    w = int(x2 - x1)
                    h = int(y2 - y1)

                    detections.append((x, y, w, h, conf))

            all_detections.append(detections)

        return all_detections

    def generate_mask(
        self,
        frame_shape: Tuple[int, int, int],
        detections: List[Tuple[int, int, int, int, float]],
        mask_scale: float = 1.5,
        feather_radius: int = 20,
    ) -> np.ndarray:
        """
        Generate a mask image from face detections.

        Args:
            frame_shape: Shape of the original frame (height, width, channels)
            detections: List of face detections (x, y, w, h, conf)
            mask_scale: Scale factor for mask region
            feather_radius: Radius for edge feathering

        Returns:
            Grayscale mask image (white = blur, black = keep)
        """
        import cv2

        height, width = frame_shape[:2]
        mask = np.zeros((height, width), dtype=np.uint8)

        for (x, y, w, h, conf) in detections:
            # Scale the bounding box
            center_x = x + w // 2
            center_y = y + h // 2

            scaled_w = int(w * mask_scale)
            scaled_h = int(h * mask_scale)

            # Draw ellipse for natural face shape
            cv2.ellipse(
                mask,
                (center_x, center_y),
                (scaled_w // 2, scaled_h // 2),
                0,  # angle
                0, 360,  # arc
                255,  # color (white)
                -1,  # filled
            )

        # Apply Gaussian blur for feathering
        if feather_radius > 0 and len(detections) > 0:
            kernel_size = feather_radius * 2 + 1
            mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)

        return mask


# Singleton instance
_detector: Optional[YOLOFaceDetector] = None


def get_detector(**kwargs) -> YOLOFaceDetector:
    """Get or create the global YOLO detector instance."""
    global _detector
    if _detector is None:
        _detector = YOLOFaceDetector(**kwargs)
    return _detector