Change model: face -> pose

This commit is contained in:
Keisuke Hirata 2026-02-18 20:18:53 +09:00
parent 920695696b
commit 08f20fa6fe

View File

@ -1,28 +1,104 @@
"""
YOLOv8 Face Detector using PyTorch with ROCm support.
YOLOv8 Pose Head Detector using PyTorch with ROCm support.
This module provides high-performance face detection using
YOLOv8-face model with AMD GPU (ROCm) acceleration.
Detects human heads from all angles (frontal, profile, rear) by using
YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
"""
import os
from typing import List, Tuple, Optional
from pathlib import Path
import numpy as np
class YOLOFaceDetector:
"""
YOLOv8 face detector with PyTorch ROCm support.
# COCO pose keypoint indices
_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear
_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder
_KP_CONF_THRESH = 0.3
Features:
- ROCm GPU acceleration for AMD GPUs
- High accuracy face detection
- Automatic NMS for overlapping detections
def _head_bbox_from_pose(
kp_xy: np.ndarray,
kp_conf: np.ndarray,
person_x1: float,
person_y1: float,
person_x2: float,
person_y2: float,
) -> Tuple[int, int, int, int]:
"""
Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
Strategy:
1. Use head keypoints (0-4: nose, eyes, ears) if visible.
2. Fall back to shoulder keypoints (5-6) to infer head position.
3. Last resort: use top of the person bounding box.
"""
person_w = max(person_x2 - person_x1, 1.0)
# --- Step 1: head keypoints ---
visible_head = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _HEAD_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_head:
xs = [p[0] for p in visible_head]
ys = [p[1] for p in visible_head]
kp_x1, kp_y1 = min(xs), min(ys)
kp_x2, kp_y2 = max(xs), max(ys)
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
cx = (kp_x1 + kp_x2) / 2.0
cy = (kp_y1 + kp_y2) / 2.0
# Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25
# Shift center upward slightly to include scalp
r = max(span * 1.25, person_w * 0.20)
x1 = int(cx - r)
y1 = int(cy - r * 1.15) # extra margin above (scalp)
x2 = int(cx + r)
y2 = int(cy + r * 0.85) # less margin below (chin)
return x1, y1, x2 - x1, y2 - y1
# --- Step 2: shoulder keypoints ---
visible_shoulder = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _SHOULDER_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_shoulder:
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
if len(visible_shoulder) == 2:
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
else:
sh_width = person_w * 0.5
r = max(sh_width * 0.5, person_w * 0.20)
cy = cy_sh - r * 1.3 # head center is above shoulders
x1 = int(cx - r)
y1 = int(cy - r)
x2 = int(cx + r)
y2 = int(cy + r)
return x1, y1, x2 - x1, y2 - y1
# --- Step 3: person bbox top ---
r = max(person_w * 0.35, 20.0)
cx = (person_x1 + person_x2) / 2.0
x1 = int(cx - r)
y1 = int(person_y1)
x2 = int(cx + r)
y2 = int(person_y1 + r * 2.0)
return x1, y1, x2 - x1, y2 - y1
class YOLOPoseHeadDetector:
"""
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
so that detection works regardless of the person's facing direction.
"""
# Default model path relative to this file
DEFAULT_MODEL = "yolov8n-face-lindevs.pt"
# Standard Ultralytics model — auto-downloaded on first use
DEFAULT_MODEL = "yolov8n-pose.pt"
def __init__(
self,
@ -31,15 +107,6 @@ class YOLOFaceDetector:
iou_threshold: float = 0.45,
input_size: Tuple[int, int] = (640, 640),
):
"""
Initialize the YOLO face detector.
Args:
model_path: Path to PyTorch model file. If None, uses default model.
conf_threshold: Confidence threshold for detections
iou_threshold: IoU threshold for NMS
input_size: Model input size (width, height)
"""
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
self.input_size = input_size
@ -49,23 +116,19 @@ class YOLOFaceDetector:
@property
def model(self):
"""Lazy-load YOLO model."""
"""Lazy-load YOLO pose model."""
if self._model is None:
from ultralytics import YOLO
import torch
# Determine model path
if self._model_path is None:
# Assuming models are in ../models relative to server/detector.py
models_dir = Path(__file__).parent.parent / "models"
model_path = str(models_dir / self.DEFAULT_MODEL)
else:
# Use provided path or let Ultralytics auto-download the default
if self._model_path is not None:
if not os.path.exists(self._model_path):
raise FileNotFoundError(f"Model not found: {self._model_path}")
model_path = self._model_path
else:
model_path = self.DEFAULT_MODEL
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model not found: {model_path}")
# Detect device (ROCm GPU or CPU)
if torch.cuda.is_available():
self._device = 'cuda'
device_name = torch.cuda.get_device_name(0)
@ -74,25 +137,47 @@ class YOLOFaceDetector:
self._device = 'cpu'
print("[FaceMask] Using CPU for inference (ROCm GPU not available)")
# Load model (let Ultralytics handle device management)
try:
self._model = YOLO(model_path)
# Don't call .to() - let predict() handle device assignment
print(f"[FaceMask] Model loaded, will use device: {self._device}")
print(f"[FaceMask] Pose model loaded: {model_path}")
print(f"[FaceMask] Device: {self._device}")
except Exception as e:
print(f"[FaceMask] Error loading model: {e}")
import traceback
traceback.print_exc()
raise
print(f"[FaceMask] YOLO model loaded: {model_path}")
print(f"[FaceMask] Device: {self._device}")
return self._model
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
detections = []
if result.boxes is None or result.keypoints is None:
return detections
boxes = result.boxes
keypoints = result.keypoints
for i, box in enumerate(boxes):
conf = float(box.conf[0].cpu().numpy())
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
# Extract keypoints for this person
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
kp_xy = kp_data[:, :2]
kp_conf = kp_data[:, 2]
hx, hy, hw, hh = _head_bbox_from_pose(
kp_xy, kp_conf,
float(x1), float(y1), float(x2), float(y2),
)
detections.append((hx, hy, hw, hh, conf))
return detections
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
"""
Detect faces in a frame.
Detect heads in a frame.
Args:
frame: BGR image as numpy array (H, W, C)
@ -100,7 +185,6 @@ class YOLOFaceDetector:
Returns:
List of detections as (x, y, width, height, confidence)
"""
# Run inference
import torch
print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
try:
@ -116,7 +200,6 @@ class YOLOFaceDetector:
print(f"[FaceMask] ERROR during inference: {e}")
import traceback
traceback.print_exc()
# Fallback to CPU
print("[FaceMask] Falling back to CPU inference...")
self._device = 'cpu'
results = self.model.predict(
@ -128,28 +211,13 @@ class YOLOFaceDetector:
device='cpu',
)
# Extract detections
detections = []
if len(results) > 0 and results[0].boxes is not None:
boxes = results[0].boxes
for box in boxes:
# Get coordinates in xyxy format
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
conf = float(box.conf[0].cpu().numpy())
# Convert to x, y, width, height
x = int(x1)
y = int(y1)
w = int(x2 - x1)
h = int(y2 - y1)
detections.append((x, y, w, h, conf))
return detections
if results:
return self._results_to_detections(results[0])
return []
def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
"""
Detect faces in multiple frames at once (batch processing).
Detect heads in multiple frames at once (batch processing).
Args:
frames: List of BGR images as numpy arrays (H, W, C)
@ -161,7 +229,6 @@ class YOLOFaceDetector:
if not frames:
return []
# Run batch inference
try:
results = self.model.predict(
frames,
@ -175,7 +242,6 @@ class YOLOFaceDetector:
print(f"[FaceMask] ERROR during batch inference: {e}")
import traceback
traceback.print_exc()
# Fallback to CPU
print("[FaceMask] Falling back to CPU inference...")
self._device = 'cpu'
results = self.model.predict(
@ -187,28 +253,7 @@ class YOLOFaceDetector:
device='cpu',
)
# Extract detections for each frame
all_detections = []
for result in results:
detections = []
if result.boxes is not None:
boxes = result.boxes
for box in boxes:
# Get coordinates in xyxy format
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
conf = float(box.conf[0].cpu().numpy())
# Convert to x, y, width, height
x = int(x1)
y = int(y1)
w = int(x2 - x1)
h = int(y2 - y1)
detections.append((x, y, w, h, conf))
all_detections.append(detections)
return all_detections
return [self._results_to_detections(r) for r in results]
def generate_mask(
self,
@ -218,11 +263,11 @@ class YOLOFaceDetector:
feather_radius: int = 20,
) -> np.ndarray:
"""
Generate a mask image from face detections.
Generate a mask image from head detections.
Args:
frame_shape: Shape of the original frame (height, width, channels)
detections: List of face detections (x, y, w, h, conf)
detections: List of head detections (x, y, w, h, conf)
mask_scale: Scale factor for mask region
feather_radius: Radius for edge feathering
@ -235,25 +280,19 @@ class YOLOFaceDetector:
mask = np.zeros((height, width), dtype=np.uint8)
for (x, y, w, h, conf) in detections:
# Scale the bounding box
center_x = x + w // 2
center_y = y + h // 2
scaled_w = int(w * mask_scale)
scaled_h = int(h * mask_scale)
# Draw ellipse for natural face shape
cv2.ellipse(
mask,
(center_x, center_y),
(scaled_w // 2, scaled_h // 2),
0, # angle
0, 360, # arc
255, # color (white)
-1, # filled
0, 0, 360,
255, -1,
)
# Apply Gaussian blur for feathering
if feather_radius > 0 and len(detections) > 0:
kernel_size = feather_radius * 2 + 1
mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
@ -262,12 +301,12 @@ class YOLOFaceDetector:
# Singleton instance
_detector: Optional[YOLOFaceDetector] = None
_detector: Optional[YOLOPoseHeadDetector] = None
def get_detector(**kwargs) -> YOLOFaceDetector:
"""Get or create the global YOLO detector instance."""
def get_detector(**kwargs) -> YOLOPoseHeadDetector:
"""Get or create the global YOLO pose head detector instance."""
global _detector
if _detector is None:
_detector = YOLOFaceDetector(**kwargs)
_detector = YOLOPoseHeadDetector(**kwargs)
return _detector