姿勢推定モデルを止め、頭部特化モデルに変更してテスト
This commit is contained in:
parent
dc41327cea
commit
de99aef9ad
|
|
@ -239,7 +239,9 @@ class SEQUENCER_PT_face_mask(Panel):
|
|||
if not selected_movies:
|
||||
return
|
||||
count = len(selected_movies)
|
||||
label = f"Batch ({count} strip{'s' if count > 1 else ''} selected)"
|
||||
image_count = sum(1 for s in selected_movies if s.type == "IMAGE")
|
||||
video_count = sum(1 for s in selected_movies if s.type == "MOVIE")
|
||||
label = f"Batch ({count} selected, image: {image_count}, video: {video_count})"
|
||||
box = layout.box()
|
||||
box.label(text=label, icon='RENDER_ANIMATION')
|
||||
box.operator(
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""
|
||||
YOLOv8 Pose Head Detector using PyTorch with ROCm support.
|
||||
YOLOv8 Head Detector using CrowdHuman-trained model with PyTorch ROCm support.
|
||||
|
||||
Detects human heads from all angles (frontal, profile, rear) by using
|
||||
YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
|
||||
Directly detects human heads (frontal, profile, rear) using the Owen718
|
||||
CrowdHuman YOLOv8 model, which was trained on dense crowd scenes.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
|
@ -10,96 +10,27 @@ from typing import List, Tuple, Optional
|
|||
import numpy as np
|
||||
|
||||
|
||||
# COCO pose keypoint indices
|
||||
_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear
|
||||
_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder
|
||||
_KP_CONF_THRESH = 0.3
|
||||
def _download_model(dest_path: str):
|
||||
"""モデルが存在しない場合に手動ダウンロード手順を表示して例外を送出する。"""
|
||||
gdrive_id = "1qlBmiEU4GBV13fxPhLZqjhjBbREvs8-m"
|
||||
raise RuntimeError(
|
||||
f"モデルファイルが見つかりません: {dest_path}\n"
|
||||
"以下の手順でダウンロードしてください:\n"
|
||||
f" 1. https://drive.google.com/file/d/{gdrive_id} を開く\n"
|
||||
f" 2. ダウンロードしたファイルを {dest_path} に配置する"
|
||||
)
|
||||
|
||||
|
||||
def _head_bbox_from_pose(
|
||||
kp_xy: np.ndarray,
|
||||
kp_conf: np.ndarray,
|
||||
person_x1: float,
|
||||
person_y1: float,
|
||||
person_x2: float,
|
||||
person_y2: float,
|
||||
) -> Tuple[int, int, int, int]:
|
||||
class YOLOHeadDetector:
|
||||
"""
|
||||
Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
|
||||
Head detector using CrowdHuman-trained YOLOv8 model with PyTorch ROCm support.
|
||||
|
||||
Strategy:
|
||||
1. Use head keypoints (0-4: nose, eyes, ears) if visible.
|
||||
2. Fall back to shoulder keypoints (5-6) to infer head position.
|
||||
3. Last resort: use top of the person bounding box.
|
||||
"""
|
||||
person_w = max(person_x2 - person_x1, 1.0)
|
||||
|
||||
# --- Step 1: head keypoints ---
|
||||
visible_head = [
|
||||
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
||||
for i in _HEAD_KP
|
||||
if float(kp_conf[i]) > _KP_CONF_THRESH
|
||||
]
|
||||
if visible_head:
|
||||
xs = [p[0] for p in visible_head]
|
||||
ys = [p[1] for p in visible_head]
|
||||
kp_x1, kp_y1 = min(xs), min(ys)
|
||||
kp_x2, kp_y2 = max(xs), max(ys)
|
||||
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
|
||||
cx = (kp_x1 + kp_x2) / 2.0
|
||||
cy = (kp_y1 + kp_y2) / 2.0
|
||||
|
||||
# span はキーポイントの外接幅(≒顔幅)なので、半径 = span/2 で顔と等倍になる
|
||||
r = max(span * 0.5, person_w * 0.10)
|
||||
x1 = int(cx - r)
|
||||
y1 = int(cy - r)
|
||||
x2 = int(cx + r)
|
||||
y2 = int(cy + r)
|
||||
return x1, y1, x2 - x1, y2 - y1
|
||||
|
||||
# --- Step 2: shoulder keypoints ---
|
||||
visible_shoulder = [
|
||||
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
||||
for i in _SHOULDER_KP
|
||||
if float(kp_conf[i]) > _KP_CONF_THRESH
|
||||
]
|
||||
if visible_shoulder:
|
||||
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
|
||||
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
|
||||
if len(visible_shoulder) == 2:
|
||||
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
|
||||
else:
|
||||
sh_width = person_w * 0.5
|
||||
# 肩幅は顔幅の約1.5〜2倍なので、0.3倍で顔サイズに近い半径になる
|
||||
r = max(sh_width * 0.3, person_w * 0.12)
|
||||
cy = cy_sh - r * 1.3 # 頭の中心は肩より上
|
||||
x1 = int(cx - r)
|
||||
y1 = int(cy - r)
|
||||
x2 = int(cx + r)
|
||||
y2 = int(cy + r)
|
||||
return x1, y1, x2 - x1, y2 - y1
|
||||
|
||||
# --- Step 3: person bbox top ---
|
||||
# 顔幅は人物幅の約20〜30%なので、半径 = person_w * 0.15 で顔サイズに近い
|
||||
r = max(person_w * 0.15, 20.0)
|
||||
cx = (person_x1 + person_x2) / 2.0
|
||||
x1 = int(cx - r)
|
||||
y1 = int(person_y1)
|
||||
x2 = int(cx + r)
|
||||
y2 = int(person_y1 + r * 2.0)
|
||||
return x1, y1, x2 - x1, y2 - y1
|
||||
|
||||
|
||||
class YOLOPoseHeadDetector:
|
||||
"""
|
||||
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
|
||||
|
||||
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
|
||||
so that detection works regardless of the person's facing direction.
|
||||
Directly detects heads (class 0: head) without pose estimation,
|
||||
enabling robust detection of rear-facing, side-facing, and partially
|
||||
visible people in dense crowd scenes.
|
||||
"""
|
||||
|
||||
# Standard Ultralytics model — auto-downloaded on first use
|
||||
DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
|
||||
DEFAULT_MODEL = os.path.join("models", "crowdhuman_yolov8_head.pt")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -117,19 +48,19 @@ class YOLOPoseHeadDetector:
|
|||
|
||||
@property
|
||||
def model(self):
|
||||
"""Lazy-load YOLO pose model."""
|
||||
"""Lazy-load YOLO head detection model."""
|
||||
if self._model is None:
|
||||
from ultralytics import YOLO
|
||||
import torch
|
||||
|
||||
# Use provided path or let Ultralytics auto-download the default
|
||||
if self._model_path is not None:
|
||||
if not os.path.exists(self._model_path):
|
||||
raise FileNotFoundError(f"Model not found: {self._model_path}")
|
||||
model_path = self._model_path
|
||||
else:
|
||||
model_path = self.DEFAULT_MODEL
|
||||
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
||||
if not os.path.exists(model_path):
|
||||
_download_model(model_path)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
self._device = 'cuda'
|
||||
|
|
@ -141,7 +72,7 @@ class YOLOPoseHeadDetector:
|
|||
|
||||
try:
|
||||
self._model = YOLO(model_path)
|
||||
print(f"[FaceMask] Pose model loaded: {model_path}")
|
||||
print(f"[FaceMask] Head detection model loaded: {model_path}")
|
||||
print(f"[FaceMask] Device: {self._device}")
|
||||
except Exception as e:
|
||||
print(f"[FaceMask] Error loading model: {e}")
|
||||
|
|
@ -152,29 +83,14 @@ class YOLOPoseHeadDetector:
|
|||
return self._model
|
||||
|
||||
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
|
||||
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
|
||||
"""Convert a single YOLO result to (x, y, w, h, conf) tuples."""
|
||||
if result.boxes is None:
|
||||
return []
|
||||
detections = []
|
||||
if result.boxes is None or result.keypoints is None:
|
||||
return detections
|
||||
|
||||
boxes = result.boxes
|
||||
keypoints = result.keypoints
|
||||
|
||||
for i, box in enumerate(boxes):
|
||||
for box in result.boxes:
|
||||
conf = float(box.conf[0].cpu().numpy())
|
||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
||||
|
||||
# Extract keypoints for this person
|
||||
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
|
||||
kp_xy = kp_data[:, :2]
|
||||
kp_conf = kp_data[:, 2]
|
||||
|
||||
hx, hy, hw, hh = _head_bbox_from_pose(
|
||||
kp_xy, kp_conf,
|
||||
float(x1), float(y1), float(x2), float(y2),
|
||||
)
|
||||
detections.append((hx, hy, hw, hh, conf))
|
||||
|
||||
detections.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1), conf))
|
||||
return detections
|
||||
|
||||
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
|
||||
|
|
@ -303,12 +219,12 @@ class YOLOPoseHeadDetector:
|
|||
|
||||
|
||||
# Singleton instance
|
||||
_detector: Optional[YOLOPoseHeadDetector] = None
|
||||
_detector: Optional[YOLOHeadDetector] = None
|
||||
|
||||
|
||||
def get_detector(**kwargs) -> YOLOPoseHeadDetector:
|
||||
"""Get or create the global YOLO pose head detector instance."""
|
||||
def get_detector(**kwargs) -> YOLOHeadDetector:
|
||||
"""Get or create the global YOLO head detector instance."""
|
||||
global _detector
|
||||
if _detector is None:
|
||||
_detector = YOLOPoseHeadDetector(**kwargs)
|
||||
_detector = YOLOHeadDetector(**kwargs)
|
||||
return _detector
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user