Change model: face -> pose
This commit is contained in:
parent
920695696b
commit
08f20fa6fe
|
|
@ -1,28 +1,104 @@
|
|||
"""
|
||||
YOLOv8 Face Detector using PyTorch with ROCm support.
|
||||
YOLOv8 Pose Head Detector using PyTorch with ROCm support.
|
||||
|
||||
This module provides high-performance face detection using
|
||||
YOLOv8-face model with AMD GPU (ROCm) acceleration.
|
||||
Detects human heads from all angles (frontal, profile, rear) by using
|
||||
YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Tuple, Optional
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
|
||||
|
||||
class YOLOFaceDetector:
|
||||
"""
|
||||
YOLOv8 face detector with PyTorch ROCm support.
|
||||
# COCO pose keypoint indices
|
||||
_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear
|
||||
_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder
|
||||
_KP_CONF_THRESH = 0.3
|
||||
|
||||
Features:
|
||||
- ROCm GPU acceleration for AMD GPUs
|
||||
- High accuracy face detection
|
||||
- Automatic NMS for overlapping detections
|
||||
|
||||
def _head_bbox_from_pose(
|
||||
kp_xy: np.ndarray,
|
||||
kp_conf: np.ndarray,
|
||||
person_x1: float,
|
||||
person_y1: float,
|
||||
person_x2: float,
|
||||
person_y2: float,
|
||||
) -> Tuple[int, int, int, int]:
|
||||
"""
|
||||
Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
|
||||
|
||||
Strategy:
|
||||
1. Use head keypoints (0-4: nose, eyes, ears) if visible.
|
||||
2. Fall back to shoulder keypoints (5-6) to infer head position.
|
||||
3. Last resort: use top of the person bounding box.
|
||||
"""
|
||||
person_w = max(person_x2 - person_x1, 1.0)
|
||||
|
||||
# --- Step 1: head keypoints ---
|
||||
visible_head = [
|
||||
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
||||
for i in _HEAD_KP
|
||||
if float(kp_conf[i]) > _KP_CONF_THRESH
|
||||
]
|
||||
if visible_head:
|
||||
xs = [p[0] for p in visible_head]
|
||||
ys = [p[1] for p in visible_head]
|
||||
kp_x1, kp_y1 = min(xs), min(ys)
|
||||
kp_x2, kp_y2 = max(xs), max(ys)
|
||||
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
|
||||
cx = (kp_x1 + kp_x2) / 2.0
|
||||
cy = (kp_y1 + kp_y2) / 2.0
|
||||
|
||||
# Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25
|
||||
# Shift center upward slightly to include scalp
|
||||
r = max(span * 1.25, person_w * 0.20)
|
||||
x1 = int(cx - r)
|
||||
y1 = int(cy - r * 1.15) # extra margin above (scalp)
|
||||
x2 = int(cx + r)
|
||||
y2 = int(cy + r * 0.85) # less margin below (chin)
|
||||
return x1, y1, x2 - x1, y2 - y1
|
||||
|
||||
# --- Step 2: shoulder keypoints ---
|
||||
visible_shoulder = [
|
||||
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
||||
for i in _SHOULDER_KP
|
||||
if float(kp_conf[i]) > _KP_CONF_THRESH
|
||||
]
|
||||
if visible_shoulder:
|
||||
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
|
||||
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
|
||||
if len(visible_shoulder) == 2:
|
||||
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
|
||||
else:
|
||||
sh_width = person_w * 0.5
|
||||
r = max(sh_width * 0.5, person_w * 0.20)
|
||||
cy = cy_sh - r * 1.3 # head center is above shoulders
|
||||
x1 = int(cx - r)
|
||||
y1 = int(cy - r)
|
||||
x2 = int(cx + r)
|
||||
y2 = int(cy + r)
|
||||
return x1, y1, x2 - x1, y2 - y1
|
||||
|
||||
# --- Step 3: person bbox top ---
|
||||
r = max(person_w * 0.35, 20.0)
|
||||
cx = (person_x1 + person_x2) / 2.0
|
||||
x1 = int(cx - r)
|
||||
y1 = int(person_y1)
|
||||
x2 = int(cx + r)
|
||||
y2 = int(person_y1 + r * 2.0)
|
||||
return x1, y1, x2 - x1, y2 - y1
|
||||
|
||||
|
||||
class YOLOPoseHeadDetector:
|
||||
"""
|
||||
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
|
||||
|
||||
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
|
||||
so that detection works regardless of the person's facing direction.
|
||||
"""
|
||||
|
||||
# Default model path relative to this file
|
||||
DEFAULT_MODEL = "yolov8n-face-lindevs.pt"
|
||||
# Standard Ultralytics model — auto-downloaded on first use
|
||||
DEFAULT_MODEL = "yolov8n-pose.pt"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -31,15 +107,6 @@ class YOLOFaceDetector:
|
|||
iou_threshold: float = 0.45,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
):
|
||||
"""
|
||||
Initialize the YOLO face detector.
|
||||
|
||||
Args:
|
||||
model_path: Path to PyTorch model file. If None, uses default model.
|
||||
conf_threshold: Confidence threshold for detections
|
||||
iou_threshold: IoU threshold for NMS
|
||||
input_size: Model input size (width, height)
|
||||
"""
|
||||
self.conf_threshold = conf_threshold
|
||||
self.iou_threshold = iou_threshold
|
||||
self.input_size = input_size
|
||||
|
|
@ -49,23 +116,19 @@ class YOLOFaceDetector:
|
|||
|
||||
@property
|
||||
def model(self):
|
||||
"""Lazy-load YOLO model."""
|
||||
"""Lazy-load YOLO pose model."""
|
||||
if self._model is None:
|
||||
from ultralytics import YOLO
|
||||
import torch
|
||||
|
||||
# Determine model path
|
||||
if self._model_path is None:
|
||||
# Assuming models are in ../models relative to server/detector.py
|
||||
models_dir = Path(__file__).parent.parent / "models"
|
||||
model_path = str(models_dir / self.DEFAULT_MODEL)
|
||||
else:
|
||||
# Use provided path or let Ultralytics auto-download the default
|
||||
if self._model_path is not None:
|
||||
if not os.path.exists(self._model_path):
|
||||
raise FileNotFoundError(f"Model not found: {self._model_path}")
|
||||
model_path = self._model_path
|
||||
else:
|
||||
model_path = self.DEFAULT_MODEL
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
raise FileNotFoundError(f"Model not found: {model_path}")
|
||||
|
||||
# Detect device (ROCm GPU or CPU)
|
||||
if torch.cuda.is_available():
|
||||
self._device = 'cuda'
|
||||
device_name = torch.cuda.get_device_name(0)
|
||||
|
|
@ -74,25 +137,47 @@ class YOLOFaceDetector:
|
|||
self._device = 'cpu'
|
||||
print("[FaceMask] Using CPU for inference (ROCm GPU not available)")
|
||||
|
||||
# Load model (let Ultralytics handle device management)
|
||||
try:
|
||||
self._model = YOLO(model_path)
|
||||
# Don't call .to() - let predict() handle device assignment
|
||||
print(f"[FaceMask] Model loaded, will use device: {self._device}")
|
||||
print(f"[FaceMask] Pose model loaded: {model_path}")
|
||||
print(f"[FaceMask] Device: {self._device}")
|
||||
except Exception as e:
|
||||
print(f"[FaceMask] Error loading model: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
print(f"[FaceMask] YOLO model loaded: {model_path}")
|
||||
print(f"[FaceMask] Device: {self._device}")
|
||||
|
||||
return self._model
|
||||
|
||||
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
|
||||
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
|
||||
detections = []
|
||||
if result.boxes is None or result.keypoints is None:
|
||||
return detections
|
||||
|
||||
boxes = result.boxes
|
||||
keypoints = result.keypoints
|
||||
|
||||
for i, box in enumerate(boxes):
|
||||
conf = float(box.conf[0].cpu().numpy())
|
||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
||||
|
||||
# Extract keypoints for this person
|
||||
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
|
||||
kp_xy = kp_data[:, :2]
|
||||
kp_conf = kp_data[:, 2]
|
||||
|
||||
hx, hy, hw, hh = _head_bbox_from_pose(
|
||||
kp_xy, kp_conf,
|
||||
float(x1), float(y1), float(x2), float(y2),
|
||||
)
|
||||
detections.append((hx, hy, hw, hh, conf))
|
||||
|
||||
return detections
|
||||
|
||||
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
|
||||
"""
|
||||
Detect faces in a frame.
|
||||
Detect heads in a frame.
|
||||
|
||||
Args:
|
||||
frame: BGR image as numpy array (H, W, C)
|
||||
|
|
@ -100,7 +185,6 @@ class YOLOFaceDetector:
|
|||
Returns:
|
||||
List of detections as (x, y, width, height, confidence)
|
||||
"""
|
||||
# Run inference
|
||||
import torch
|
||||
print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
|
||||
try:
|
||||
|
|
@ -116,7 +200,6 @@ class YOLOFaceDetector:
|
|||
print(f"[FaceMask] ERROR during inference: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Fallback to CPU
|
||||
print("[FaceMask] Falling back to CPU inference...")
|
||||
self._device = 'cpu'
|
||||
results = self.model.predict(
|
||||
|
|
@ -128,28 +211,13 @@ class YOLOFaceDetector:
|
|||
device='cpu',
|
||||
)
|
||||
|
||||
# Extract detections
|
||||
detections = []
|
||||
if len(results) > 0 and results[0].boxes is not None:
|
||||
boxes = results[0].boxes
|
||||
for box in boxes:
|
||||
# Get coordinates in xyxy format
|
||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
||||
conf = float(box.conf[0].cpu().numpy())
|
||||
|
||||
# Convert to x, y, width, height
|
||||
x = int(x1)
|
||||
y = int(y1)
|
||||
w = int(x2 - x1)
|
||||
h = int(y2 - y1)
|
||||
|
||||
detections.append((x, y, w, h, conf))
|
||||
|
||||
return detections
|
||||
if results:
|
||||
return self._results_to_detections(results[0])
|
||||
return []
|
||||
|
||||
def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
|
||||
"""
|
||||
Detect faces in multiple frames at once (batch processing).
|
||||
Detect heads in multiple frames at once (batch processing).
|
||||
|
||||
Args:
|
||||
frames: List of BGR images as numpy arrays (H, W, C)
|
||||
|
|
@ -161,7 +229,6 @@ class YOLOFaceDetector:
|
|||
if not frames:
|
||||
return []
|
||||
|
||||
# Run batch inference
|
||||
try:
|
||||
results = self.model.predict(
|
||||
frames,
|
||||
|
|
@ -175,7 +242,6 @@ class YOLOFaceDetector:
|
|||
print(f"[FaceMask] ERROR during batch inference: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Fallback to CPU
|
||||
print("[FaceMask] Falling back to CPU inference...")
|
||||
self._device = 'cpu'
|
||||
results = self.model.predict(
|
||||
|
|
@ -187,28 +253,7 @@ class YOLOFaceDetector:
|
|||
device='cpu',
|
||||
)
|
||||
|
||||
# Extract detections for each frame
|
||||
all_detections = []
|
||||
for result in results:
|
||||
detections = []
|
||||
if result.boxes is not None:
|
||||
boxes = result.boxes
|
||||
for box in boxes:
|
||||
# Get coordinates in xyxy format
|
||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
||||
conf = float(box.conf[0].cpu().numpy())
|
||||
|
||||
# Convert to x, y, width, height
|
||||
x = int(x1)
|
||||
y = int(y1)
|
||||
w = int(x2 - x1)
|
||||
h = int(y2 - y1)
|
||||
|
||||
detections.append((x, y, w, h, conf))
|
||||
|
||||
all_detections.append(detections)
|
||||
|
||||
return all_detections
|
||||
return [self._results_to_detections(r) for r in results]
|
||||
|
||||
def generate_mask(
|
||||
self,
|
||||
|
|
@ -218,11 +263,11 @@ class YOLOFaceDetector:
|
|||
feather_radius: int = 20,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Generate a mask image from face detections.
|
||||
Generate a mask image from head detections.
|
||||
|
||||
Args:
|
||||
frame_shape: Shape of the original frame (height, width, channels)
|
||||
detections: List of face detections (x, y, w, h, conf)
|
||||
detections: List of head detections (x, y, w, h, conf)
|
||||
mask_scale: Scale factor for mask region
|
||||
feather_radius: Radius for edge feathering
|
||||
|
||||
|
|
@ -235,25 +280,19 @@ class YOLOFaceDetector:
|
|||
mask = np.zeros((height, width), dtype=np.uint8)
|
||||
|
||||
for (x, y, w, h, conf) in detections:
|
||||
# Scale the bounding box
|
||||
center_x = x + w // 2
|
||||
center_y = y + h // 2
|
||||
|
||||
scaled_w = int(w * mask_scale)
|
||||
scaled_h = int(h * mask_scale)
|
||||
|
||||
# Draw ellipse for natural face shape
|
||||
cv2.ellipse(
|
||||
mask,
|
||||
(center_x, center_y),
|
||||
(scaled_w // 2, scaled_h // 2),
|
||||
0, # angle
|
||||
0, 360, # arc
|
||||
255, # color (white)
|
||||
-1, # filled
|
||||
0, 0, 360,
|
||||
255, -1,
|
||||
)
|
||||
|
||||
# Apply Gaussian blur for feathering
|
||||
if feather_radius > 0 and len(detections) > 0:
|
||||
kernel_size = feather_radius * 2 + 1
|
||||
mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
|
||||
|
|
@ -262,12 +301,12 @@ class YOLOFaceDetector:
|
|||
|
||||
|
||||
# Singleton instance
|
||||
_detector: Optional[YOLOFaceDetector] = None
|
||||
_detector: Optional[YOLOPoseHeadDetector] = None
|
||||
|
||||
|
||||
def get_detector(**kwargs) -> YOLOFaceDetector:
|
||||
"""Get or create the global YOLO detector instance."""
|
||||
def get_detector(**kwargs) -> YOLOPoseHeadDetector:
|
||||
"""Get or create the global YOLO pose head detector instance."""
|
||||
global _detector
|
||||
if _detector is None:
|
||||
_detector = YOLOFaceDetector(**kwargs)
|
||||
_detector = YOLOPoseHeadDetector(**kwargs)
|
||||
return _detector
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user