姿勢推定モデルを止め、頭部特化モデルに変更してテスト

2026-02-23 03:56:23 +09:00 · 2026-02-23 03:56:23 +09:00 · de99aef9ad
commit de99aef9ad
parent dc41327cea
2 changed files with 34 additions and 116 deletions
--- a/panels/vse_panel.py
+++ b/panels/vse_panel.py
@ -239,7 +239,9 @@ class SEQUENCER_PT_face_mask(Panel):
        if not selected_movies:
            return
        count = len(selected_movies)
-        label = f"Batch ({count} strip{'s' if count > 1 else ''} selected)"
+        image_count = sum(1 for s in selected_movies if s.type == "IMAGE")
        video_count = sum(1 for s in selected_movies if s.type == "MOVIE")
        label = f"Batch ({count} selected, image: {image_count}, video: {video_count})"
        box = layout.box()
        box.label(text=label, icon='RENDER_ANIMATION')
        box.operator(
--- a/server/detector.py
+++ b/server/detector.py
@ -1,8 +1,8 @@
 """
-YOLOv8 Pose Head Detector using PyTorch with ROCm support.
+YOLOv8 Head Detector using CrowdHuman-trained model with PyTorch ROCm support.
-Detects human heads from all angles (frontal, profile, rear) by using
+Directly detects human heads (frontal, profile, rear) using the Owen718
-YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
+CrowdHuman YOLOv8 model, which was trained on dense crowd scenes.
 """
 import os
@ -10,96 +10,27 @@ from typing import List, Tuple, Optional
 import numpy as np
-# COCO pose keypoint indices
+def _download_model(dest_path: str):
-_HEAD_KP = [0, 1, 2, 3, 4]      # nose, left_eye, right_eye, left_ear, right_ear
+    """モデルが存在しない場合に手動ダウンロード手順を表示して例外を送出する。"""
-_SHOULDER_KP = [5, 6]            # left_shoulder, right_shoulder
+    gdrive_id = "1qlBmiEU4GBV13fxPhLZqjhjBbREvs8-m"
-_KP_CONF_THRESH = 0.3
+    raise RuntimeError(
        f"モデルファイルが見つかりません: {dest_path}\n"
        "以下の手順でダウンロードしてください:\n"
        f"  1. https://drive.google.com/file/d/{gdrive_id} を開く\n"
        f"  2. ダウンロードしたファイルを {dest_path} に配置する"
    )
-def _head_bbox_from_pose(
+class YOLOHeadDetector:
    kp_xy: np.ndarray,
    kp_conf: np.ndarray,
    person_x1: float,
    person_y1: float,
    person_x2: float,
    person_y2: float,
 ) -> Tuple[int, int, int, int]:
    """
-    Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
+    Head detector using CrowdHuman-trained YOLOv8 model with PyTorch ROCm support.
-    Strategy:
+    Directly detects heads (class 0: head) without pose estimation,
-    1. Use head keypoints (0-4: nose, eyes, ears) if visible.
+    enabling robust detection of rear-facing, side-facing, and partially
-    2. Fall back to shoulder keypoints (5-6) to infer head position.
+    visible people in dense crowd scenes.
    3. Last resort: use top of the person bounding box.
    """
    person_w = max(person_x2 - person_x1, 1.0)
    # --- Step 1: head keypoints ---
    visible_head = [
        (float(kp_xy[i][0]), float(kp_xy[i][1]))
        for i in _HEAD_KP
        if float(kp_conf[i]) > _KP_CONF_THRESH
    ]
    if visible_head:
        xs = [p[0] for p in visible_head]
        ys = [p[1] for p in visible_head]
        kp_x1, kp_y1 = min(xs), min(ys)
        kp_x2, kp_y2 = max(xs), max(ys)
        span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
        cx = (kp_x1 + kp_x2) / 2.0
        cy = (kp_y1 + kp_y2) / 2.0
        # span はキーポイントの外接幅（≒顔幅）なので、半径 = span/2 で顔と等倍になる
        r = max(span * 0.5, person_w * 0.10)
        x1 = int(cx - r)
        y1 = int(cy - r)
        x2 = int(cx + r)
        y2 = int(cy + r)
        return x1, y1, x2 - x1, y2 - y1
    # --- Step 2: shoulder keypoints ---
    visible_shoulder = [
        (float(kp_xy[i][0]), float(kp_xy[i][1]))
        for i in _SHOULDER_KP
        if float(kp_conf[i]) > _KP_CONF_THRESH
    ]
    if visible_shoulder:
        cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
        cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
        if len(visible_shoulder) == 2:
            sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
        else:
            sh_width = person_w * 0.5
        # 肩幅は顔幅の約1.5〜2倍なので、0.3倍で顔サイズに近い半径になる
        r = max(sh_width * 0.3, person_w * 0.12)
        cy = cy_sh - r * 1.3   # 頭の中心は肩より上
        x1 = int(cx - r)
        y1 = int(cy - r)
        x2 = int(cx + r)
        y2 = int(cy + r)
        return x1, y1, x2 - x1, y2 - y1
    # --- Step 3: person bbox top ---
    # 顔幅は人物幅の約20〜30%なので、半径 = person_w * 0.15 で顔サイズに近い
    r = max(person_w * 0.15, 20.0)
    cx = (person_x1 + person_x2) / 2.0
    x1 = int(cx - r)
    y1 = int(person_y1)
    x2 = int(cx + r)
    y2 = int(person_y1 + r * 2.0)
    return x1, y1, x2 - x1, y2 - y1
 class YOLOPoseHeadDetector:
    """
    Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
    Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
    so that detection works regardless of the person's facing direction.
    """
-    # Standard Ultralytics model — auto-downloaded on first use
+    DEFAULT_MODEL = os.path.join("models", "crowdhuman_yolov8_head.pt")
    DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
    def __init__(
        self,
@ -117,19 +48,19 @@ class YOLOPoseHeadDetector:
    @property
    def model(self):
-        """Lazy-load YOLO pose model."""
+        """Lazy-load YOLO head detection model."""
        if self._model is None:
            from ultralytics import YOLO
            import torch
            # Use provided path or let Ultralytics auto-download the default
            if self._model_path is not None:
                if not os.path.exists(self._model_path):
                    raise FileNotFoundError(f"Model not found: {self._model_path}")
                model_path = self._model_path
            else:
                model_path = self.DEFAULT_MODEL
-                os.makedirs(os.path.dirname(model_path), exist_ok=True)
+                if not os.path.exists(model_path):
                    _download_model(model_path)
            if torch.cuda.is_available():
                self._device = 'cuda'
@ -141,7 +72,7 @@ class YOLOPoseHeadDetector:
            try:
                self._model = YOLO(model_path)
-                print(f"[FaceMask] Pose model loaded: {model_path}")
+                print(f"[FaceMask] Head detection model loaded: {model_path}")
                print(f"[FaceMask] Device: {self._device}")
            except Exception as e:
                print(f"[FaceMask] Error loading model: {e}")
@ -152,29 +83,14 @@ class YOLOPoseHeadDetector:
        return self._model
    def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
-        """Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
+        """Convert a single YOLO result to (x, y, w, h, conf) tuples."""
        if result.boxes is None:
            return []
        detections = []
-        if result.boxes is None or result.keypoints is None:
+        for box in result.boxes:
            return detections
        boxes = result.boxes
        keypoints = result.keypoints
        for i, box in enumerate(boxes):
            conf = float(box.conf[0].cpu().numpy())
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
-
+            detections.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1), conf))
            # Extract keypoints for this person
            kp_data = keypoints.data[i].cpu().numpy()  # shape (17, 3): x, y, conf
            kp_xy = kp_data[:, :2]
            kp_conf = kp_data[:, 2]
            hx, hy, hw, hh = _head_bbox_from_pose(
                kp_xy, kp_conf,
                float(x1), float(y1), float(x2), float(y2),
            )
            detections.append((hx, hy, hw, hh, conf))
        return detections
    def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
@ -303,12 +219,12 @@ class YOLOPoseHeadDetector:
 # Singleton instance
-_detector: Optional[YOLOPoseHeadDetector] = None
+_detector: Optional[YOLOHeadDetector] = None
-def get_detector(**kwargs) -> YOLOPoseHeadDetector:
+def get_detector(**kwargs) -> YOLOHeadDetector:
-    """Get or create the global YOLO pose head detector instance."""
+    """Get or create the global YOLO head detector instance."""
    global _detector
    if _detector is None:
-        _detector = YOLOPoseHeadDetector(**kwargs)
+        _detector = YOLOHeadDetector(**kwargs)
    return _detector