姿勢推定モデルを止め、頭部特化モデルに変更してテスト

This commit is contained in:
Keisuke Hirata 2026-02-23 03:56:23 +09:00
parent dc41327cea
commit de99aef9ad
2 changed files with 34 additions and 116 deletions

View File

@ -239,7 +239,9 @@ class SEQUENCER_PT_face_mask(Panel):
if not selected_movies:
return
count = len(selected_movies)
label = f"Batch ({count} strip{'s' if count > 1 else ''} selected)"
image_count = sum(1 for s in selected_movies if s.type == "IMAGE")
video_count = sum(1 for s in selected_movies if s.type == "MOVIE")
label = f"Batch ({count} selected, image: {image_count}, video: {video_count})"
box = layout.box()
box.label(text=label, icon='RENDER_ANIMATION')
box.operator(

View File

@ -1,8 +1,8 @@
"""
YOLOv8 Pose Head Detector using PyTorch with ROCm support.
YOLOv8 Head Detector using CrowdHuman-trained model with PyTorch ROCm support.
Detects human heads from all angles (frontal, profile, rear) by using
YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
Directly detects human heads (frontal, profile, rear) using the Owen718
CrowdHuman YOLOv8 model, which was trained on dense crowd scenes.
"""
import os
@ -10,96 +10,27 @@ from typing import List, Tuple, Optional
import numpy as np
# COCO pose keypoint indices
_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear
_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder
_KP_CONF_THRESH = 0.3
def _download_model(dest_path: str):
"""モデルが存在しない場合に手動ダウンロード手順を表示して例外を送出する。"""
gdrive_id = "1qlBmiEU4GBV13fxPhLZqjhjBbREvs8-m"
raise RuntimeError(
f"モデルファイルが見つかりません: {dest_path}\n"
"以下の手順でダウンロードしてください:\n"
f" 1. https://drive.google.com/file/d/{gdrive_id} を開く\n"
f" 2. ダウンロードしたファイルを {dest_path} に配置する"
)
def _head_bbox_from_pose(
kp_xy: np.ndarray,
kp_conf: np.ndarray,
person_x1: float,
person_y1: float,
person_x2: float,
person_y2: float,
) -> Tuple[int, int, int, int]:
class YOLOHeadDetector:
"""
Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
Head detector using CrowdHuman-trained YOLOv8 model with PyTorch ROCm support.
Strategy:
1. Use head keypoints (0-4: nose, eyes, ears) if visible.
2. Fall back to shoulder keypoints (5-6) to infer head position.
3. Last resort: use top of the person bounding box.
"""
person_w = max(person_x2 - person_x1, 1.0)
# --- Step 1: head keypoints ---
visible_head = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _HEAD_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_head:
xs = [p[0] for p in visible_head]
ys = [p[1] for p in visible_head]
kp_x1, kp_y1 = min(xs), min(ys)
kp_x2, kp_y2 = max(xs), max(ys)
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
cx = (kp_x1 + kp_x2) / 2.0
cy = (kp_y1 + kp_y2) / 2.0
# span はキーポイントの外接幅(≒顔幅)なので、半径 = span/2 で顔と等倍になる
r = max(span * 0.5, person_w * 0.10)
x1 = int(cx - r)
y1 = int(cy - r)
x2 = int(cx + r)
y2 = int(cy + r)
return x1, y1, x2 - x1, y2 - y1
# --- Step 2: shoulder keypoints ---
visible_shoulder = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _SHOULDER_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_shoulder:
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
if len(visible_shoulder) == 2:
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
else:
sh_width = person_w * 0.5
# 肩幅は顔幅の約1.5〜2倍なので、0.3倍で顔サイズに近い半径になる
r = max(sh_width * 0.3, person_w * 0.12)
cy = cy_sh - r * 1.3 # 頭の中心は肩より上
x1 = int(cx - r)
y1 = int(cy - r)
x2 = int(cx + r)
y2 = int(cy + r)
return x1, y1, x2 - x1, y2 - y1
# --- Step 3: person bbox top ---
# 顔幅は人物幅の約20〜30%なので、半径 = person_w * 0.15 で顔サイズに近い
r = max(person_w * 0.15, 20.0)
cx = (person_x1 + person_x2) / 2.0
x1 = int(cx - r)
y1 = int(person_y1)
x2 = int(cx + r)
y2 = int(person_y1 + r * 2.0)
return x1, y1, x2 - x1, y2 - y1
class YOLOPoseHeadDetector:
"""
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
so that detection works regardless of the person's facing direction.
Directly detects heads (class 0: head) without pose estimation,
enabling robust detection of rear-facing, side-facing, and partially
visible people in dense crowd scenes.
"""
# Standard Ultralytics model — auto-downloaded on first use
DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
DEFAULT_MODEL = os.path.join("models", "crowdhuman_yolov8_head.pt")
def __init__(
self,
@ -117,19 +48,19 @@ class YOLOPoseHeadDetector:
@property
def model(self):
"""Lazy-load YOLO pose model."""
"""Lazy-load YOLO head detection model."""
if self._model is None:
from ultralytics import YOLO
import torch
# Use provided path or let Ultralytics auto-download the default
if self._model_path is not None:
if not os.path.exists(self._model_path):
raise FileNotFoundError(f"Model not found: {self._model_path}")
model_path = self._model_path
else:
model_path = self.DEFAULT_MODEL
os.makedirs(os.path.dirname(model_path), exist_ok=True)
if not os.path.exists(model_path):
_download_model(model_path)
if torch.cuda.is_available():
self._device = 'cuda'
@ -141,7 +72,7 @@ class YOLOPoseHeadDetector:
try:
self._model = YOLO(model_path)
print(f"[FaceMask] Pose model loaded: {model_path}")
print(f"[FaceMask] Head detection model loaded: {model_path}")
print(f"[FaceMask] Device: {self._device}")
except Exception as e:
print(f"[FaceMask] Error loading model: {e}")
@ -152,29 +83,14 @@ class YOLOPoseHeadDetector:
return self._model
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
"""Convert a single YOLO result to (x, y, w, h, conf) tuples."""
if result.boxes is None:
return []
detections = []
if result.boxes is None or result.keypoints is None:
return detections
boxes = result.boxes
keypoints = result.keypoints
for i, box in enumerate(boxes):
for box in result.boxes:
conf = float(box.conf[0].cpu().numpy())
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
# Extract keypoints for this person
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
kp_xy = kp_data[:, :2]
kp_conf = kp_data[:, 2]
hx, hy, hw, hh = _head_bbox_from_pose(
kp_xy, kp_conf,
float(x1), float(y1), float(x2), float(y2),
)
detections.append((hx, hy, hw, hh, conf))
detections.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1), conf))
return detections
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
@ -303,12 +219,12 @@ class YOLOPoseHeadDetector:
# Singleton instance
_detector: Optional[YOLOPoseHeadDetector] = None
_detector: Optional[YOLOHeadDetector] = None
def get_detector(**kwargs) -> YOLOPoseHeadDetector:
"""Get or create the global YOLO pose head detector instance."""
def get_detector(**kwargs) -> YOLOHeadDetector:
"""Get or create the global YOLO head detector instance."""
global _detector
if _detector is None:
_detector = YOLOPoseHeadDetector(**kwargs)
_detector = YOLOHeadDetector(**kwargs)
return _detector