""" YOLOv8 Face Detector using PyTorch with ROCm support. This module provides high-performance face detection using YOLOv8-face model with AMD GPU (ROCm) acceleration. """ import os from typing import List, Tuple, Optional from pathlib import Path import numpy as np class YOLOFaceDetector: """ YOLOv8 face detector with PyTorch ROCm support. Features: - ROCm GPU acceleration for AMD GPUs - High accuracy face detection - Automatic NMS for overlapping detections """ # Default model path relative to this file DEFAULT_MODEL = "yolov8n-face-lindevs.pt" def __init__( self, model_path: Optional[str] = None, conf_threshold: float = 0.25, iou_threshold: float = 0.45, input_size: Tuple[int, int] = (640, 640), ): """ Initialize the YOLO face detector. Args: model_path: Path to PyTorch model file. If None, uses default model. conf_threshold: Confidence threshold for detections iou_threshold: IoU threshold for NMS input_size: Model input size (width, height) """ self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold self.input_size = input_size self._model = None self._model_path = model_path self._device = None @property def model(self): """Lazy-load YOLO model.""" if self._model is None: from ultralytics import YOLO import torch # Determine model path if self._model_path is None: # Assuming models are in ../models relative to server/detector.py models_dir = Path(__file__).parent.parent / "models" model_path = str(models_dir / self.DEFAULT_MODEL) else: model_path = self._model_path if not os.path.exists(model_path): raise FileNotFoundError(f"Model not found: {model_path}") # Detect device (ROCm GPU or CPU) if torch.cuda.is_available(): self._device = 'cuda' device_name = torch.cuda.get_device_name(0) print(f"[FaceMask] Using ROCm GPU for inference: {device_name}") else: self._device = 'cpu' print("[FaceMask] Using CPU for inference (ROCm GPU not available)") # Load model (let Ultralytics handle device management) try: self._model = YOLO(model_path) # Don't call .to() - let predict() handle device assignment print(f"[FaceMask] Model loaded, will use device: {self._device}") except Exception as e: print(f"[FaceMask] Error loading model: {e}") import traceback traceback.print_exc() raise print(f"[FaceMask] YOLO model loaded: {model_path}") print(f"[FaceMask] Device: {self._device}") return self._model def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]: """ Detect faces in a frame. Args: frame: BGR image as numpy array (H, W, C) Returns: List of detections as (x, y, width, height, confidence) """ # Run inference import torch print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}") try: results = self.model.predict( frame, conf=self.conf_threshold, iou=self.iou_threshold, imgsz=self.input_size[0], verbose=False, device=self._device, ) except Exception as e: print(f"[FaceMask] ERROR during inference: {e}") import traceback traceback.print_exc() # Fallback to CPU print("[FaceMask] Falling back to CPU inference...") self._device = 'cpu' results = self.model.predict( frame, conf=self.conf_threshold, iou=self.iou_threshold, imgsz=self.input_size[0], verbose=False, device='cpu', ) # Extract detections detections = [] if len(results) > 0 and results[0].boxes is not None: boxes = results[0].boxes for box in boxes: # Get coordinates in xyxy format x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() conf = float(box.conf[0].cpu().numpy()) # Convert to x, y, width, height x = int(x1) y = int(y1) w = int(x2 - x1) h = int(y2 - y1) detections.append((x, y, w, h, conf)) return detections def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]: """ Detect faces in multiple frames at once (batch processing). Args: frames: List of BGR images as numpy arrays (H, W, C) Returns: List of detection lists, one per frame. Each detection: (x, y, width, height, confidence) """ if not frames: return [] # Run batch inference try: results = self.model.predict( frames, conf=self.conf_threshold, iou=self.iou_threshold, imgsz=self.input_size[0], verbose=False, device=self._device, ) except Exception as e: print(f"[FaceMask] ERROR during batch inference: {e}") import traceback traceback.print_exc() # Fallback to CPU print("[FaceMask] Falling back to CPU inference...") self._device = 'cpu' results = self.model.predict( frames, conf=self.conf_threshold, iou=self.iou_threshold, imgsz=self.input_size[0], verbose=False, device='cpu', ) # Extract detections for each frame all_detections = [] for result in results: detections = [] if result.boxes is not None: boxes = result.boxes for box in boxes: # Get coordinates in xyxy format x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() conf = float(box.conf[0].cpu().numpy()) # Convert to x, y, width, height x = int(x1) y = int(y1) w = int(x2 - x1) h = int(y2 - y1) detections.append((x, y, w, h, conf)) all_detections.append(detections) return all_detections def generate_mask( self, frame_shape: Tuple[int, int, int], detections: List[Tuple[int, int, int, int, float]], mask_scale: float = 1.5, feather_radius: int = 20, ) -> np.ndarray: """ Generate a mask image from face detections. Args: frame_shape: Shape of the original frame (height, width, channels) detections: List of face detections (x, y, w, h, conf) mask_scale: Scale factor for mask region feather_radius: Radius for edge feathering Returns: Grayscale mask image (white = blur, black = keep) """ import cv2 height, width = frame_shape[:2] mask = np.zeros((height, width), dtype=np.uint8) for (x, y, w, h, conf) in detections: # Scale the bounding box center_x = x + w // 2 center_y = y + h // 2 scaled_w = int(w * mask_scale) scaled_h = int(h * mask_scale) # Draw ellipse for natural face shape cv2.ellipse( mask, (center_x, center_y), (scaled_w // 2, scaled_h // 2), 0, # angle 0, 360, # arc 255, # color (white) -1, # filled ) # Apply Gaussian blur for feathering if feather_radius > 0 and len(detections) > 0: kernel_size = feather_radius * 2 + 1 mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0) return mask # Singleton instance _detector: Optional[YOLOFaceDetector] = None def get_detector(**kwargs) -> YOLOFaceDetector: """Get or create the global YOLO detector instance.""" global _detector if _detector is None: _detector = YOLOFaceDetector(**kwargs) return _detector