姿勢推定モデルを止め、頭部特化モデルに変更してテスト
This commit is contained in:
parent
dc41327cea
commit
de99aef9ad
|
|
@ -239,7 +239,9 @@ class SEQUENCER_PT_face_mask(Panel):
|
||||||
if not selected_movies:
|
if not selected_movies:
|
||||||
return
|
return
|
||||||
count = len(selected_movies)
|
count = len(selected_movies)
|
||||||
label = f"Batch ({count} strip{'s' if count > 1 else ''} selected)"
|
image_count = sum(1 for s in selected_movies if s.type == "IMAGE")
|
||||||
|
video_count = sum(1 for s in selected_movies if s.type == "MOVIE")
|
||||||
|
label = f"Batch ({count} selected, image: {image_count}, video: {video_count})"
|
||||||
box = layout.box()
|
box = layout.box()
|
||||||
box.label(text=label, icon='RENDER_ANIMATION')
|
box.label(text=label, icon='RENDER_ANIMATION')
|
||||||
box.operator(
|
box.operator(
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
"""
|
"""
|
||||||
YOLOv8 Pose Head Detector using PyTorch with ROCm support.
|
YOLOv8 Head Detector using CrowdHuman-trained model with PyTorch ROCm support.
|
||||||
|
|
||||||
Detects human heads from all angles (frontal, profile, rear) by using
|
Directly detects human heads (frontal, profile, rear) using the Owen718
|
||||||
YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
|
CrowdHuman YOLOv8 model, which was trained on dense crowd scenes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
@ -10,96 +10,27 @@ from typing import List, Tuple, Optional
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
# COCO pose keypoint indices
|
def _download_model(dest_path: str):
|
||||||
_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear
|
"""モデルが存在しない場合に手動ダウンロード手順を表示して例外を送出する。"""
|
||||||
_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder
|
gdrive_id = "1qlBmiEU4GBV13fxPhLZqjhjBbREvs8-m"
|
||||||
_KP_CONF_THRESH = 0.3
|
raise RuntimeError(
|
||||||
|
f"モデルファイルが見つかりません: {dest_path}\n"
|
||||||
|
"以下の手順でダウンロードしてください:\n"
|
||||||
|
f" 1. https://drive.google.com/file/d/{gdrive_id} を開く\n"
|
||||||
|
f" 2. ダウンロードしたファイルを {dest_path} に配置する"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _head_bbox_from_pose(
|
class YOLOHeadDetector:
|
||||||
kp_xy: np.ndarray,
|
|
||||||
kp_conf: np.ndarray,
|
|
||||||
person_x1: float,
|
|
||||||
person_y1: float,
|
|
||||||
person_x2: float,
|
|
||||||
person_y2: float,
|
|
||||||
) -> Tuple[int, int, int, int]:
|
|
||||||
"""
|
"""
|
||||||
Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
|
Head detector using CrowdHuman-trained YOLOv8 model with PyTorch ROCm support.
|
||||||
|
|
||||||
Strategy:
|
Directly detects heads (class 0: head) without pose estimation,
|
||||||
1. Use head keypoints (0-4: nose, eyes, ears) if visible.
|
enabling robust detection of rear-facing, side-facing, and partially
|
||||||
2. Fall back to shoulder keypoints (5-6) to infer head position.
|
visible people in dense crowd scenes.
|
||||||
3. Last resort: use top of the person bounding box.
|
|
||||||
"""
|
|
||||||
person_w = max(person_x2 - person_x1, 1.0)
|
|
||||||
|
|
||||||
# --- Step 1: head keypoints ---
|
|
||||||
visible_head = [
|
|
||||||
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
|
||||||
for i in _HEAD_KP
|
|
||||||
if float(kp_conf[i]) > _KP_CONF_THRESH
|
|
||||||
]
|
|
||||||
if visible_head:
|
|
||||||
xs = [p[0] for p in visible_head]
|
|
||||||
ys = [p[1] for p in visible_head]
|
|
||||||
kp_x1, kp_y1 = min(xs), min(ys)
|
|
||||||
kp_x2, kp_y2 = max(xs), max(ys)
|
|
||||||
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
|
|
||||||
cx = (kp_x1 + kp_x2) / 2.0
|
|
||||||
cy = (kp_y1 + kp_y2) / 2.0
|
|
||||||
|
|
||||||
# span はキーポイントの外接幅(≒顔幅)なので、半径 = span/2 で顔と等倍になる
|
|
||||||
r = max(span * 0.5, person_w * 0.10)
|
|
||||||
x1 = int(cx - r)
|
|
||||||
y1 = int(cy - r)
|
|
||||||
x2 = int(cx + r)
|
|
||||||
y2 = int(cy + r)
|
|
||||||
return x1, y1, x2 - x1, y2 - y1
|
|
||||||
|
|
||||||
# --- Step 2: shoulder keypoints ---
|
|
||||||
visible_shoulder = [
|
|
||||||
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
|
||||||
for i in _SHOULDER_KP
|
|
||||||
if float(kp_conf[i]) > _KP_CONF_THRESH
|
|
||||||
]
|
|
||||||
if visible_shoulder:
|
|
||||||
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
|
|
||||||
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
|
|
||||||
if len(visible_shoulder) == 2:
|
|
||||||
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
|
|
||||||
else:
|
|
||||||
sh_width = person_w * 0.5
|
|
||||||
# 肩幅は顔幅の約1.5〜2倍なので、0.3倍で顔サイズに近い半径になる
|
|
||||||
r = max(sh_width * 0.3, person_w * 0.12)
|
|
||||||
cy = cy_sh - r * 1.3 # 頭の中心は肩より上
|
|
||||||
x1 = int(cx - r)
|
|
||||||
y1 = int(cy - r)
|
|
||||||
x2 = int(cx + r)
|
|
||||||
y2 = int(cy + r)
|
|
||||||
return x1, y1, x2 - x1, y2 - y1
|
|
||||||
|
|
||||||
# --- Step 3: person bbox top ---
|
|
||||||
# 顔幅は人物幅の約20〜30%なので、半径 = person_w * 0.15 で顔サイズに近い
|
|
||||||
r = max(person_w * 0.15, 20.0)
|
|
||||||
cx = (person_x1 + person_x2) / 2.0
|
|
||||||
x1 = int(cx - r)
|
|
||||||
y1 = int(person_y1)
|
|
||||||
x2 = int(cx + r)
|
|
||||||
y2 = int(person_y1 + r * 2.0)
|
|
||||||
return x1, y1, x2 - x1, y2 - y1
|
|
||||||
|
|
||||||
|
|
||||||
class YOLOPoseHeadDetector:
|
|
||||||
"""
|
|
||||||
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
|
|
||||||
|
|
||||||
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
|
|
||||||
so that detection works regardless of the person's facing direction.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Standard Ultralytics model — auto-downloaded on first use
|
DEFAULT_MODEL = os.path.join("models", "crowdhuman_yolov8_head.pt")
|
||||||
DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -117,19 +48,19 @@ class YOLOPoseHeadDetector:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def model(self):
|
def model(self):
|
||||||
"""Lazy-load YOLO pose model."""
|
"""Lazy-load YOLO head detection model."""
|
||||||
if self._model is None:
|
if self._model is None:
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
# Use provided path or let Ultralytics auto-download the default
|
|
||||||
if self._model_path is not None:
|
if self._model_path is not None:
|
||||||
if not os.path.exists(self._model_path):
|
if not os.path.exists(self._model_path):
|
||||||
raise FileNotFoundError(f"Model not found: {self._model_path}")
|
raise FileNotFoundError(f"Model not found: {self._model_path}")
|
||||||
model_path = self._model_path
|
model_path = self._model_path
|
||||||
else:
|
else:
|
||||||
model_path = self.DEFAULT_MODEL
|
model_path = self.DEFAULT_MODEL
|
||||||
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
if not os.path.exists(model_path):
|
||||||
|
_download_model(model_path)
|
||||||
|
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
self._device = 'cuda'
|
self._device = 'cuda'
|
||||||
|
|
@ -141,7 +72,7 @@ class YOLOPoseHeadDetector:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._model = YOLO(model_path)
|
self._model = YOLO(model_path)
|
||||||
print(f"[FaceMask] Pose model loaded: {model_path}")
|
print(f"[FaceMask] Head detection model loaded: {model_path}")
|
||||||
print(f"[FaceMask] Device: {self._device}")
|
print(f"[FaceMask] Device: {self._device}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[FaceMask] Error loading model: {e}")
|
print(f"[FaceMask] Error loading model: {e}")
|
||||||
|
|
@ -152,29 +83,14 @@ class YOLOPoseHeadDetector:
|
||||||
return self._model
|
return self._model
|
||||||
|
|
||||||
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
|
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
|
||||||
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
|
"""Convert a single YOLO result to (x, y, w, h, conf) tuples."""
|
||||||
|
if result.boxes is None:
|
||||||
|
return []
|
||||||
detections = []
|
detections = []
|
||||||
if result.boxes is None or result.keypoints is None:
|
for box in result.boxes:
|
||||||
return detections
|
|
||||||
|
|
||||||
boxes = result.boxes
|
|
||||||
keypoints = result.keypoints
|
|
||||||
|
|
||||||
for i, box in enumerate(boxes):
|
|
||||||
conf = float(box.conf[0].cpu().numpy())
|
conf = float(box.conf[0].cpu().numpy())
|
||||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
||||||
|
detections.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1), conf))
|
||||||
# Extract keypoints for this person
|
|
||||||
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
|
|
||||||
kp_xy = kp_data[:, :2]
|
|
||||||
kp_conf = kp_data[:, 2]
|
|
||||||
|
|
||||||
hx, hy, hw, hh = _head_bbox_from_pose(
|
|
||||||
kp_xy, kp_conf,
|
|
||||||
float(x1), float(y1), float(x2), float(y2),
|
|
||||||
)
|
|
||||||
detections.append((hx, hy, hw, hh, conf))
|
|
||||||
|
|
||||||
return detections
|
return detections
|
||||||
|
|
||||||
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
|
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
|
||||||
|
|
@ -303,12 +219,12 @@ class YOLOPoseHeadDetector:
|
||||||
|
|
||||||
|
|
||||||
# Singleton instance
|
# Singleton instance
|
||||||
_detector: Optional[YOLOPoseHeadDetector] = None
|
_detector: Optional[YOLOHeadDetector] = None
|
||||||
|
|
||||||
|
|
||||||
def get_detector(**kwargs) -> YOLOPoseHeadDetector:
|
def get_detector(**kwargs) -> YOLOHeadDetector:
|
||||||
"""Get or create the global YOLO pose head detector instance."""
|
"""Get or create the global YOLO head detector instance."""
|
||||||
global _detector
|
global _detector
|
||||||
if _detector is None:
|
if _detector is None:
|
||||||
_detector = YOLOPoseHeadDetector(**kwargs)
|
_detector = YOLOHeadDetector(**kwargs)
|
||||||
return _detector
|
return _detector
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user