姿勢推定モデルを止め、頭部特化モデルに変更してテスト

This commit is contained in:
Keisuke Hirata 2026-02-23 03:56:23 +09:00
parent dc41327cea
commit de99aef9ad
2 changed files with 34 additions and 116 deletions

View File

@ -239,7 +239,9 @@ class SEQUENCER_PT_face_mask(Panel):
if not selected_movies: if not selected_movies:
return return
count = len(selected_movies) count = len(selected_movies)
label = f"Batch ({count} strip{'s' if count > 1 else ''} selected)" image_count = sum(1 for s in selected_movies if s.type == "IMAGE")
video_count = sum(1 for s in selected_movies if s.type == "MOVIE")
label = f"Batch ({count} selected, image: {image_count}, video: {video_count})"
box = layout.box() box = layout.box()
box.label(text=label, icon='RENDER_ANIMATION') box.label(text=label, icon='RENDER_ANIMATION')
box.operator( box.operator(

View File

@ -1,8 +1,8 @@
""" """
YOLOv8 Pose Head Detector using PyTorch with ROCm support. YOLOv8 Head Detector using CrowdHuman-trained model with PyTorch ROCm support.
Detects human heads from all angles (frontal, profile, rear) by using Directly detects human heads (frontal, profile, rear) using the Owen718
YOLOv8 pose estimation and extracting head bounding boxes from keypoints. CrowdHuman YOLOv8 model, which was trained on dense crowd scenes.
""" """
import os import os
@ -10,96 +10,27 @@ from typing import List, Tuple, Optional
import numpy as np import numpy as np
# COCO pose keypoint indices def _download_model(dest_path: str):
_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear """モデルが存在しない場合に手動ダウンロード手順を表示して例外を送出する。"""
_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder gdrive_id = "1qlBmiEU4GBV13fxPhLZqjhjBbREvs8-m"
_KP_CONF_THRESH = 0.3 raise RuntimeError(
f"モデルファイルが見つかりません: {dest_path}\n"
"以下の手順でダウンロードしてください:\n"
f" 1. https://drive.google.com/file/d/{gdrive_id} を開く\n"
f" 2. ダウンロードしたファイルを {dest_path} に配置する"
)
def _head_bbox_from_pose( class YOLOHeadDetector:
kp_xy: np.ndarray,
kp_conf: np.ndarray,
person_x1: float,
person_y1: float,
person_x2: float,
person_y2: float,
) -> Tuple[int, int, int, int]:
""" """
Estimate head bounding box (x, y, w, h) from COCO pose keypoints. Head detector using CrowdHuman-trained YOLOv8 model with PyTorch ROCm support.
Strategy: Directly detects heads (class 0: head) without pose estimation,
1. Use head keypoints (0-4: nose, eyes, ears) if visible. enabling robust detection of rear-facing, side-facing, and partially
2. Fall back to shoulder keypoints (5-6) to infer head position. visible people in dense crowd scenes.
3. Last resort: use top of the person bounding box.
"""
person_w = max(person_x2 - person_x1, 1.0)
# --- Step 1: head keypoints ---
visible_head = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _HEAD_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_head:
xs = [p[0] for p in visible_head]
ys = [p[1] for p in visible_head]
kp_x1, kp_y1 = min(xs), min(ys)
kp_x2, kp_y2 = max(xs), max(ys)
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
cx = (kp_x1 + kp_x2) / 2.0
cy = (kp_y1 + kp_y2) / 2.0
# span はキーポイントの外接幅(≒顔幅)なので、半径 = span/2 で顔と等倍になる
r = max(span * 0.5, person_w * 0.10)
x1 = int(cx - r)
y1 = int(cy - r)
x2 = int(cx + r)
y2 = int(cy + r)
return x1, y1, x2 - x1, y2 - y1
# --- Step 2: shoulder keypoints ---
visible_shoulder = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _SHOULDER_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_shoulder:
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
if len(visible_shoulder) == 2:
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
else:
sh_width = person_w * 0.5
# 肩幅は顔幅の約1.5〜2倍なので、0.3倍で顔サイズに近い半径になる
r = max(sh_width * 0.3, person_w * 0.12)
cy = cy_sh - r * 1.3 # 頭の中心は肩より上
x1 = int(cx - r)
y1 = int(cy - r)
x2 = int(cx + r)
y2 = int(cy + r)
return x1, y1, x2 - x1, y2 - y1
# --- Step 3: person bbox top ---
# 顔幅は人物幅の約20〜30%なので、半径 = person_w * 0.15 で顔サイズに近い
r = max(person_w * 0.15, 20.0)
cx = (person_x1 + person_x2) / 2.0
x1 = int(cx - r)
y1 = int(person_y1)
x2 = int(cx + r)
y2 = int(person_y1 + r * 2.0)
return x1, y1, x2 - x1, y2 - y1
class YOLOPoseHeadDetector:
"""
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
so that detection works regardless of the person's facing direction.
""" """
# Standard Ultralytics model — auto-downloaded on first use DEFAULT_MODEL = os.path.join("models", "crowdhuman_yolov8_head.pt")
DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
def __init__( def __init__(
self, self,
@ -117,19 +48,19 @@ class YOLOPoseHeadDetector:
@property @property
def model(self): def model(self):
"""Lazy-load YOLO pose model.""" """Lazy-load YOLO head detection model."""
if self._model is None: if self._model is None:
from ultralytics import YOLO from ultralytics import YOLO
import torch import torch
# Use provided path or let Ultralytics auto-download the default
if self._model_path is not None: if self._model_path is not None:
if not os.path.exists(self._model_path): if not os.path.exists(self._model_path):
raise FileNotFoundError(f"Model not found: {self._model_path}") raise FileNotFoundError(f"Model not found: {self._model_path}")
model_path = self._model_path model_path = self._model_path
else: else:
model_path = self.DEFAULT_MODEL model_path = self.DEFAULT_MODEL
os.makedirs(os.path.dirname(model_path), exist_ok=True) if not os.path.exists(model_path):
_download_model(model_path)
if torch.cuda.is_available(): if torch.cuda.is_available():
self._device = 'cuda' self._device = 'cuda'
@ -141,7 +72,7 @@ class YOLOPoseHeadDetector:
try: try:
self._model = YOLO(model_path) self._model = YOLO(model_path)
print(f"[FaceMask] Pose model loaded: {model_path}") print(f"[FaceMask] Head detection model loaded: {model_path}")
print(f"[FaceMask] Device: {self._device}") print(f"[FaceMask] Device: {self._device}")
except Exception as e: except Exception as e:
print(f"[FaceMask] Error loading model: {e}") print(f"[FaceMask] Error loading model: {e}")
@ -152,29 +83,14 @@ class YOLOPoseHeadDetector:
return self._model return self._model
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]: def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples.""" """Convert a single YOLO result to (x, y, w, h, conf) tuples."""
if result.boxes is None:
return []
detections = [] detections = []
if result.boxes is None or result.keypoints is None: for box in result.boxes:
return detections
boxes = result.boxes
keypoints = result.keypoints
for i, box in enumerate(boxes):
conf = float(box.conf[0].cpu().numpy()) conf = float(box.conf[0].cpu().numpy())
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
detections.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1), conf))
# Extract keypoints for this person
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
kp_xy = kp_data[:, :2]
kp_conf = kp_data[:, 2]
hx, hy, hw, hh = _head_bbox_from_pose(
kp_xy, kp_conf,
float(x1), float(y1), float(x2), float(y2),
)
detections.append((hx, hy, hw, hh, conf))
return detections return detections
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]: def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
@ -303,12 +219,12 @@ class YOLOPoseHeadDetector:
# Singleton instance # Singleton instance
_detector: Optional[YOLOPoseHeadDetector] = None _detector: Optional[YOLOHeadDetector] = None
def get_detector(**kwargs) -> YOLOPoseHeadDetector: def get_detector(**kwargs) -> YOLOHeadDetector:
"""Get or create the global YOLO pose head detector instance.""" """Get or create the global YOLO head detector instance."""
global _detector global _detector
if _detector is None: if _detector is None:
_detector = YOLOPoseHeadDetector(**kwargs) _detector = YOLOHeadDetector(**kwargs)
return _detector return _detector