From 08f20fa6feb7a798f1fff1ba169352bfe2b05941 Mon Sep 17 00:00:00 2001 From: Hare Date: Wed, 18 Feb 2026 20:18:53 +0900 Subject: [PATCH] Change model: face -> pose --- server/detector.py | 237 ++++++++++++++++++++++++++------------------- 1 file changed, 138 insertions(+), 99 deletions(-) diff --git a/server/detector.py b/server/detector.py index 1ca4acd..25c46e1 100644 --- a/server/detector.py +++ b/server/detector.py @@ -1,28 +1,104 @@ """ -YOLOv8 Face Detector using PyTorch with ROCm support. +YOLOv8 Pose Head Detector using PyTorch with ROCm support. -This module provides high-performance face detection using -YOLOv8-face model with AMD GPU (ROCm) acceleration. +Detects human heads from all angles (frontal, profile, rear) by using +YOLOv8 pose estimation and extracting head bounding boxes from keypoints. """ import os from typing import List, Tuple, Optional -from pathlib import Path import numpy as np -class YOLOFaceDetector: - """ - YOLOv8 face detector with PyTorch ROCm support. +# COCO pose keypoint indices +_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear +_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder +_KP_CONF_THRESH = 0.3 - Features: - - ROCm GPU acceleration for AMD GPUs - - High accuracy face detection - - Automatic NMS for overlapping detections + +def _head_bbox_from_pose( + kp_xy: np.ndarray, + kp_conf: np.ndarray, + person_x1: float, + person_y1: float, + person_x2: float, + person_y2: float, +) -> Tuple[int, int, int, int]: + """ + Estimate head bounding box (x, y, w, h) from COCO pose keypoints. + + Strategy: + 1. Use head keypoints (0-4: nose, eyes, ears) if visible. + 2. Fall back to shoulder keypoints (5-6) to infer head position. + 3. Last resort: use top of the person bounding box. + """ + person_w = max(person_x2 - person_x1, 1.0) + + # --- Step 1: head keypoints --- + visible_head = [ + (float(kp_xy[i][0]), float(kp_xy[i][1])) + for i in _HEAD_KP + if float(kp_conf[i]) > _KP_CONF_THRESH + ] + if visible_head: + xs = [p[0] for p in visible_head] + ys = [p[1] for p in visible_head] + kp_x1, kp_y1 = min(xs), min(ys) + kp_x2, kp_y2 = max(xs), max(ys) + span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0) + cx = (kp_x1 + kp_x2) / 2.0 + cy = (kp_y1 + kp_y2) / 2.0 + + # Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25 + # Shift center upward slightly to include scalp + r = max(span * 1.25, person_w * 0.20) + x1 = int(cx - r) + y1 = int(cy - r * 1.15) # extra margin above (scalp) + x2 = int(cx + r) + y2 = int(cy + r * 0.85) # less margin below (chin) + return x1, y1, x2 - x1, y2 - y1 + + # --- Step 2: shoulder keypoints --- + visible_shoulder = [ + (float(kp_xy[i][0]), float(kp_xy[i][1])) + for i in _SHOULDER_KP + if float(kp_conf[i]) > _KP_CONF_THRESH + ] + if visible_shoulder: + cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder) + cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder) + if len(visible_shoulder) == 2: + sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0]) + else: + sh_width = person_w * 0.5 + r = max(sh_width * 0.5, person_w * 0.20) + cy = cy_sh - r * 1.3 # head center is above shoulders + x1 = int(cx - r) + y1 = int(cy - r) + x2 = int(cx + r) + y2 = int(cy + r) + return x1, y1, x2 - x1, y2 - y1 + + # --- Step 3: person bbox top --- + r = max(person_w * 0.35, 20.0) + cx = (person_x1 + person_x2) / 2.0 + x1 = int(cx - r) + y1 = int(person_y1) + x2 = int(cx + r) + y2 = int(person_y1 + r * 2.0) + return x1, y1, x2 - x1, y2 - y1 + + +class YOLOPoseHeadDetector: + """ + Head detector using YOLOv8 pose estimation with PyTorch ROCm support. + + Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears) + so that detection works regardless of the person's facing direction. """ - # Default model path relative to this file - DEFAULT_MODEL = "yolov8n-face-lindevs.pt" + # Standard Ultralytics model — auto-downloaded on first use + DEFAULT_MODEL = "yolov8n-pose.pt" def __init__( self, @@ -31,15 +107,6 @@ class YOLOFaceDetector: iou_threshold: float = 0.45, input_size: Tuple[int, int] = (640, 640), ): - """ - Initialize the YOLO face detector. - - Args: - model_path: Path to PyTorch model file. If None, uses default model. - conf_threshold: Confidence threshold for detections - iou_threshold: IoU threshold for NMS - input_size: Model input size (width, height) - """ self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold self.input_size = input_size @@ -49,23 +116,19 @@ class YOLOFaceDetector: @property def model(self): - """Lazy-load YOLO model.""" + """Lazy-load YOLO pose model.""" if self._model is None: from ultralytics import YOLO import torch - # Determine model path - if self._model_path is None: - # Assuming models are in ../models relative to server/detector.py - models_dir = Path(__file__).parent.parent / "models" - model_path = str(models_dir / self.DEFAULT_MODEL) - else: + # Use provided path or let Ultralytics auto-download the default + if self._model_path is not None: + if not os.path.exists(self._model_path): + raise FileNotFoundError(f"Model not found: {self._model_path}") model_path = self._model_path + else: + model_path = self.DEFAULT_MODEL - if not os.path.exists(model_path): - raise FileNotFoundError(f"Model not found: {model_path}") - - # Detect device (ROCm GPU or CPU) if torch.cuda.is_available(): self._device = 'cuda' device_name = torch.cuda.get_device_name(0) @@ -74,25 +137,47 @@ class YOLOFaceDetector: self._device = 'cpu' print("[FaceMask] Using CPU for inference (ROCm GPU not available)") - # Load model (let Ultralytics handle device management) try: self._model = YOLO(model_path) - # Don't call .to() - let predict() handle device assignment - print(f"[FaceMask] Model loaded, will use device: {self._device}") + print(f"[FaceMask] Pose model loaded: {model_path}") + print(f"[FaceMask] Device: {self._device}") except Exception as e: print(f"[FaceMask] Error loading model: {e}") import traceback traceback.print_exc() raise - print(f"[FaceMask] YOLO model loaded: {model_path}") - print(f"[FaceMask] Device: {self._device}") - return self._model + def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]: + """Convert a single YOLO pose result to (x, y, w, h, conf) tuples.""" + detections = [] + if result.boxes is None or result.keypoints is None: + return detections + + boxes = result.boxes + keypoints = result.keypoints + + for i, box in enumerate(boxes): + conf = float(box.conf[0].cpu().numpy()) + x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() + + # Extract keypoints for this person + kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf + kp_xy = kp_data[:, :2] + kp_conf = kp_data[:, 2] + + hx, hy, hw, hh = _head_bbox_from_pose( + kp_xy, kp_conf, + float(x1), float(y1), float(x2), float(y2), + ) + detections.append((hx, hy, hw, hh, conf)) + + return detections + def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]: """ - Detect faces in a frame. + Detect heads in a frame. Args: frame: BGR image as numpy array (H, W, C) @@ -100,7 +185,6 @@ class YOLOFaceDetector: Returns: List of detections as (x, y, width, height, confidence) """ - # Run inference import torch print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}") try: @@ -116,7 +200,6 @@ class YOLOFaceDetector: print(f"[FaceMask] ERROR during inference: {e}") import traceback traceback.print_exc() - # Fallback to CPU print("[FaceMask] Falling back to CPU inference...") self._device = 'cpu' results = self.model.predict( @@ -128,28 +211,13 @@ class YOLOFaceDetector: device='cpu', ) - # Extract detections - detections = [] - if len(results) > 0 and results[0].boxes is not None: - boxes = results[0].boxes - for box in boxes: - # Get coordinates in xyxy format - x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() - conf = float(box.conf[0].cpu().numpy()) - - # Convert to x, y, width, height - x = int(x1) - y = int(y1) - w = int(x2 - x1) - h = int(y2 - y1) - - detections.append((x, y, w, h, conf)) - - return detections + if results: + return self._results_to_detections(results[0]) + return [] def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]: """ - Detect faces in multiple frames at once (batch processing). + Detect heads in multiple frames at once (batch processing). Args: frames: List of BGR images as numpy arrays (H, W, C) @@ -161,7 +229,6 @@ class YOLOFaceDetector: if not frames: return [] - # Run batch inference try: results = self.model.predict( frames, @@ -175,7 +242,6 @@ class YOLOFaceDetector: print(f"[FaceMask] ERROR during batch inference: {e}") import traceback traceback.print_exc() - # Fallback to CPU print("[FaceMask] Falling back to CPU inference...") self._device = 'cpu' results = self.model.predict( @@ -187,28 +253,7 @@ class YOLOFaceDetector: device='cpu', ) - # Extract detections for each frame - all_detections = [] - for result in results: - detections = [] - if result.boxes is not None: - boxes = result.boxes - for box in boxes: - # Get coordinates in xyxy format - x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() - conf = float(box.conf[0].cpu().numpy()) - - # Convert to x, y, width, height - x = int(x1) - y = int(y1) - w = int(x2 - x1) - h = int(y2 - y1) - - detections.append((x, y, w, h, conf)) - - all_detections.append(detections) - - return all_detections + return [self._results_to_detections(r) for r in results] def generate_mask( self, @@ -218,11 +263,11 @@ class YOLOFaceDetector: feather_radius: int = 20, ) -> np.ndarray: """ - Generate a mask image from face detections. + Generate a mask image from head detections. Args: frame_shape: Shape of the original frame (height, width, channels) - detections: List of face detections (x, y, w, h, conf) + detections: List of head detections (x, y, w, h, conf) mask_scale: Scale factor for mask region feather_radius: Radius for edge feathering @@ -235,25 +280,19 @@ class YOLOFaceDetector: mask = np.zeros((height, width), dtype=np.uint8) for (x, y, w, h, conf) in detections: - # Scale the bounding box center_x = x + w // 2 center_y = y + h // 2 - scaled_w = int(w * mask_scale) scaled_h = int(h * mask_scale) - # Draw ellipse for natural face shape cv2.ellipse( mask, (center_x, center_y), (scaled_w // 2, scaled_h // 2), - 0, # angle - 0, 360, # arc - 255, # color (white) - -1, # filled + 0, 0, 360, + 255, -1, ) - # Apply Gaussian blur for feathering if feather_radius > 0 and len(detections) > 0: kernel_size = feather_radius * 2 + 1 mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0) @@ -262,12 +301,12 @@ class YOLOFaceDetector: # Singleton instance -_detector: Optional[YOLOFaceDetector] = None +_detector: Optional[YOLOPoseHeadDetector] = None -def get_detector(**kwargs) -> YOLOFaceDetector: - """Get or create the global YOLO detector instance.""" +def get_detector(**kwargs) -> YOLOPoseHeadDetector: + """Get or create the global YOLO pose head detector instance.""" global _detector if _detector is None: - _detector = YOLOFaceDetector(**kwargs) + _detector = YOLOPoseHeadDetector(**kwargs) return _detector