blender-mask-peoples/server/detector.py

231 lines
7.6 KiB
Python

"""
YOLOv8 Head Detector using CrowdHuman-trained model with PyTorch ROCm support.
Directly detects human heads (frontal, profile, rear) using the Owen718
CrowdHuman YOLOv8 model, which was trained on dense crowd scenes.
"""
import os
from typing import List, Tuple, Optional
import numpy as np
def _download_model(dest_path: str):
"""モデルが存在しない場合に手動ダウンロード手順を表示して例外を送出する。"""
gdrive_id = "1qlBmiEU4GBV13fxPhLZqjhjBbREvs8-m"
raise RuntimeError(
f"モデルファイルが見つかりません: {dest_path}\n"
"以下の手順でダウンロードしてください:\n"
f" 1. https://drive.google.com/file/d/{gdrive_id} を開く\n"
f" 2. ダウンロードしたファイルを {dest_path} に配置する"
)
class YOLOHeadDetector:
"""
Head detector using CrowdHuman-trained YOLOv8 model with PyTorch ROCm support.
Directly detects heads (class 0: head) without pose estimation,
enabling robust detection of rear-facing, side-facing, and partially
visible people in dense crowd scenes.
"""
DEFAULT_MODEL = os.path.join("models", "crowdhuman_yolov8_head.pt")
def __init__(
self,
model_path: Optional[str] = None,
conf_threshold: float = 0.25,
iou_threshold: float = 0.45,
input_size: Tuple[int, int] = (640, 640),
):
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
self.input_size = input_size
self._model = None
self._model_path = model_path
self._device = None
@property
def model(self):
"""Lazy-load YOLO head detection model."""
if self._model is None:
from ultralytics import YOLO
import torch
if self._model_path is not None:
if not os.path.exists(self._model_path):
raise FileNotFoundError(f"Model not found: {self._model_path}")
model_path = self._model_path
else:
model_path = self.DEFAULT_MODEL
if not os.path.exists(model_path):
_download_model(model_path)
if torch.cuda.is_available():
self._device = 'cuda'
device_name = torch.cuda.get_device_name(0)
print(f"[FaceMask] Using ROCm GPU for inference: {device_name}")
else:
self._device = 'cpu'
print("[FaceMask] Using CPU for inference (ROCm GPU not available)")
try:
self._model = YOLO(model_path)
print(f"[FaceMask] Head detection model loaded: {model_path}")
print(f"[FaceMask] Device: {self._device}")
except Exception as e:
print(f"[FaceMask] Error loading model: {e}")
import traceback
traceback.print_exc()
raise
return self._model
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
"""Convert a single YOLO result to (x, y, w, h, conf) tuples."""
if result.boxes is None:
return []
detections = []
for box in result.boxes:
conf = float(box.conf[0].cpu().numpy())
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
detections.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1), conf))
return detections
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
"""
Detect heads in a frame.
Args:
frame: BGR image as numpy array (H, W, C)
Returns:
List of detections as (x, y, width, height, confidence)
"""
import torch
print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
try:
results = self.model.predict(
frame,
conf=self.conf_threshold,
iou=self.iou_threshold,
imgsz=self.input_size[0],
verbose=False,
device=self._device,
)
except Exception as e:
print(f"[FaceMask] ERROR during inference: {e}")
import traceback
traceback.print_exc()
print("[FaceMask] Falling back to CPU inference...")
self._device = 'cpu'
results = self.model.predict(
frame,
conf=self.conf_threshold,
iou=self.iou_threshold,
imgsz=self.input_size[0],
verbose=False,
device='cpu',
)
if results:
return self._results_to_detections(results[0])
return []
def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
"""
Detect heads in multiple frames at once (batch processing).
Args:
frames: List of BGR images as numpy arrays (H, W, C)
Returns:
List of detection lists, one per frame.
Each detection: (x, y, width, height, confidence)
"""
if not frames:
return []
try:
results = self.model.predict(
frames,
conf=self.conf_threshold,
iou=self.iou_threshold,
imgsz=self.input_size[0],
verbose=False,
device=self._device,
)
except Exception as e:
print(f"[FaceMask] ERROR during batch inference: {e}")
import traceback
traceback.print_exc()
print("[FaceMask] Falling back to CPU inference...")
self._device = 'cpu'
results = self.model.predict(
frames,
conf=self.conf_threshold,
iou=self.iou_threshold,
imgsz=self.input_size[0],
verbose=False,
device='cpu',
)
return [self._results_to_detections(r) for r in results]
def generate_mask(
self,
frame_shape: Tuple[int, int, int],
detections: List[Tuple[int, int, int, int, float]],
mask_scale: float = 1.5,
feather_radius: int = 20,
) -> np.ndarray:
"""
Generate a mask image from head detections.
Args:
frame_shape: Shape of the original frame (height, width, channels)
detections: List of head detections (x, y, w, h, conf)
mask_scale: Scale factor for mask region
feather_radius: Radius for edge feathering
Returns:
Grayscale mask image (white = blur, black = keep)
"""
import cv2
height, width = frame_shape[:2]
mask = np.zeros((height, width), dtype=np.uint8)
for (x, y, w, h, conf) in detections:
center_x = x + w // 2
center_y = y + h // 2
scaled_w = int(w * mask_scale)
scaled_h = int(h * mask_scale)
cv2.ellipse(
mask,
(center_x, center_y),
(scaled_w // 2, scaled_h // 2),
0, 0, 360,
255, -1,
)
if feather_radius > 0 and len(detections) > 0:
kernel_size = feather_radius * 2 + 1
mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
return mask
# Singleton instance
_detector: Optional[YOLOHeadDetector] = None
def get_detector(**kwargs) -> YOLOHeadDetector:
"""Get or create the global YOLO head detector instance."""
global _detector
if _detector is None:
_detector = YOLOHeadDetector(**kwargs)
return _detector