姿勢推定と頭部検知を合成する手段を追加

2026-02-23 04:21:53 +09:00 · 2026-02-23 04:21:53 +09:00 · eb028ed278
commit eb028ed278
parent de99aef9ad
6 changed files with 540 additions and 1 deletions
--- a/core/async_generator.py
+++ b/core/async_generator.py
@ -141,6 +141,90 @@ class AsyncMaskGenerator:
        self.worker_thread.start()
        bpy.app.timers.register(self._check_progress, first_interval=0.1)
    def start_augment_pose(
        self,
        detections_path: str,
        total_frames: int,
        conf_threshold: float = 0.5,
        iou_threshold: float = 0.45,
        on_complete=None,
        on_progress=None,
    ):
        """既存キャッシュへの pose 補完を非同期で開始する。"""
        global bpy
        import bpy as _bpy
        bpy = _bpy
        if self.is_running:
            raise RuntimeError("Mask generation already in progress")
        self.is_running = True
        self.total_frames = total_frames
        self.current_frame = 0
        self._on_complete = on_complete
        self._on_progress = on_progress
        self.worker_thread = threading.Thread(
            target=self._worker_augment_pose,
            args=(detections_path, conf_threshold, iou_threshold),
            daemon=True,
        )
        self.worker_thread.start()
        bpy.app.timers.register(self._check_progress, first_interval=0.1)
    def _worker_augment_pose(
        self,
        detections_path: str,
        conf_threshold: float,
        iou_threshold: float,
    ):
        """client.augment_pose() を呼んで task_id でポーリング。"""
        import time
        from .inference_client import get_client
        try:
            client = get_client()
            task_id = client.augment_pose(
                detections_path=detections_path,
                conf_threshold=conf_threshold,
                iou_threshold=iou_threshold,
            )
            while self.is_running:
                status = client.get_task_status(task_id)
                state = status.get("status")
                total = status.get("total", 0)
                if total > 0:
                    self.total_frames = total
                if state == "completed":
                    progress = status.get("progress", self.total_frames)
                    if progress >= 0:
                        self.progress_queue.put(("progress", progress))
                    result_path = status.get("result_path", detections_path)
                    self.result_queue.put(("done", result_path))
                    return
                elif state == "failed":
                    self.result_queue.put(("error", status.get("message", "Unknown error")))
                    return
                elif state == "cancelled":
                    self.result_queue.put(("cancelled", None))
                    return
                progress = status.get("progress", 0)
                if progress >= 0:
                    self.progress_queue.put(("progress", progress))
                time.sleep(0.5)
            client.cancel_task(task_id)
            self.result_queue.put(("cancelled", None))
        except Exception as e:
            import traceback
            print(f"[FaceMask] Error: {e}")
            traceback.print_exc()
            self.result_queue.put(("error", str(e)))
    def cancel(self):
        """Cancel the current processing."""
        self.is_running = False
--- a/core/inference_client.py
+++ b/core/inference_client.py
@ -237,6 +237,36 @@ class InferenceClient:
        except urllib.error.HTTPError as e:
            raise RuntimeError(f"Server error: {e.read().decode('utf-8')}")
    def augment_pose(
        self,
        detections_path: str,
        conf_threshold: float,
        iou_threshold: float,
    ) -> str:
        """既存キャッシュに pose 推定結果を追加合成する。task_id を返す。"""
        if not self.is_server_running():
            self.start_server()
        data = {
            "detections_path": detections_path,
            "conf_threshold": conf_threshold,
            "iou_threshold": iou_threshold,
        }
        req = urllib.request.Request(
            f"{self.SERVER_URL}/augment_pose",
            data=json.dumps(data).encode("utf-8"),
            headers={"Content-Type": "application/json"},
            method="POST",
        )
        try:
            with urllib.request.urlopen(req) as response:
                result = json.loads(response.read().decode("utf-8"))
                return result["id"]
        except urllib.error.HTTPError as e:
            raise RuntimeError(f"Server error: {e.read().decode('utf-8')}")
    def get_task_status(self, task_id: str) -> Dict[str, Any]:
        """Get status of a task."""
        try:
--- a/operators/generate_mask.py
+++ b/operators/generate_mask.py
@ -205,10 +205,88 @@ class SEQUENCER_OT_cancel_mask_generation(Operator):
        return {'FINISHED'}
 class SEQUENCER_OT_augment_pose_mask(Operator):
    """Add pose-based head detections to existing detection cache."""
    bl_idname = "sequencer.augment_pose_mask"
    bl_label = "Augment with Pose"
    bl_description = "Run pose estimation and merge results into existing detection cache"
    bl_options = {'REGISTER'}
    @classmethod
    def poll(cls, context):
        if not context.scene.sequence_editor:
            return False
        strip = context.scene.sequence_editor.active_strip
        if not strip or strip.type != 'MOVIE':
            return False
        return check_detection_cache(strip.name)
    def execute(self, context):
        strip = context.scene.sequence_editor.active_strip
        output_dir = get_cache_dir_for_strip(strip.name)
        detections_path = os.path.join(output_dir, "detections.msgpack")
        if not os.path.exists(detections_path):
            self.report({'ERROR'}, f"Detection cache not found: {detections_path}")
            return {'CANCELLED'}
        generator = get_generator()
        scene = context.scene
        wm = context.window_manager
        wm.mask_progress = 0
        wm.mask_total = 0  # サーバー側から実際の値に更新される
        def on_complete(status, data):
            wm.mask_total = max(wm.mask_total, generator.total_frames)
            if status == "done":
                wm.mask_progress = wm.mask_total
            elif status in {"error", "cancelled"}:
                wm.mask_progress = min(wm.mask_progress, wm.mask_total)
            if status == "done":
                print(f"[FaceMask] Pose augmentation completed: {data}")
            elif status == "error":
                print(f"[FaceMask] Error: {data}")
            elif status == "cancelled":
                print("[FaceMask] Pose augmentation cancelled")
            for area in context.screen.areas:
                if area.type == 'SEQUENCE_EDITOR':
                    area.tag_redraw()
        def on_progress(current, total_f):
            wm.mask_progress = current
            wm.mask_total = total_f
            for area in context.screen.areas:
                if area.type == 'SEQUENCE_EDITOR':
                    area.tag_redraw()
        try:
            generator.start_augment_pose(
                detections_path=detections_path,
                total_frames=0,
                conf_threshold=scene.facemask_conf_threshold,
                iou_threshold=scene.facemask_iou_threshold,
                on_complete=on_complete,
                on_progress=on_progress,
            )
        except RuntimeError as e:
            self.report({'WARNING'}, str(e))
            return {'CANCELLED'}
        except Exception as e:
            self.report({'ERROR'}, f"Failed to start pose augmentation: {e}")
            return {'CANCELLED'}
        self.report({'INFO'}, f"Started pose augmentation for {strip.name}")
        return {'FINISHED'}
 # Registration
 classes = [
    SEQUENCER_OT_generate_face_mask,
    SEQUENCER_OT_cancel_mask_generation,
    SEQUENCER_OT_augment_pose_mask,
 ]
--- a/panels/vse_panel.py
+++ b/panels/vse_panel.py
@ -289,6 +289,12 @@ class SEQUENCER_PT_face_mask(Panel):
                icon='FILE_REFRESH',
            )
            op.force = True
            if strip.type == 'MOVIE':
                box.operator(
                    "sequencer.augment_pose_mask",
                    text="Augment with Pose",
                    icon='MOD_ARMATURE',
                )
    def _draw_blur_controls(self, layout, context, strip):
        """Draw blur application controls."""
--- a/server/detector.py
+++ b/server/detector.py
@ -228,3 +228,205 @@ def get_detector(**kwargs) -> YOLOHeadDetector:
    if _detector is None:
        _detector = YOLOHeadDetector(**kwargs)
    return _detector
 # ---------------------------------------------------------------------------
 # Pose-based head detector (YOLOv8 pose estimation)
 # ---------------------------------------------------------------------------
 # COCO pose keypoint indices
 _HEAD_KP = [0, 1, 2, 3, 4]      # nose, left_eye, right_eye, left_ear, right_ear
 _SHOULDER_KP = [5, 6]            # left_shoulder, right_shoulder
 _KP_CONF_THRESH = 0.3
 def _head_bbox_from_pose(
    kp_xy: np.ndarray,
    kp_conf: np.ndarray,
    person_x1: float,
    person_y1: float,
    person_x2: float,
    person_y2: float,
 ) -> Tuple[int, int, int, int]:
    """
    Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
    Strategy:
    1. Use head keypoints (0-4: nose, eyes, ears) if visible.
    2. Fall back to shoulder keypoints (5-6) to infer head position.
    3. Last resort: use top of the person bounding box.
    """
    person_w = max(person_x2 - person_x1, 1.0)
    # --- Step 1: head keypoints ---
    visible_head = [
        (float(kp_xy[i][0]), float(kp_xy[i][1]))
        for i in _HEAD_KP
        if float(kp_conf[i]) > _KP_CONF_THRESH
    ]
    if visible_head:
        xs = [p[0] for p in visible_head]
        ys = [p[1] for p in visible_head]
        kp_x1, kp_y1 = min(xs), min(ys)
        kp_x2, kp_y2 = max(xs), max(ys)
        span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
        cx = (kp_x1 + kp_x2) / 2.0
        cy = (kp_y1 + kp_y2) / 2.0
        r = max(span * 0.5, person_w * 0.10)
        x1 = int(cx - r)
        y1 = int(cy - r)
        x2 = int(cx + r)
        y2 = int(cy + r)
        return x1, y1, x2 - x1, y2 - y1
    # --- Step 2: shoulder keypoints ---
    visible_shoulder = [
        (float(kp_xy[i][0]), float(kp_xy[i][1]))
        for i in _SHOULDER_KP
        if float(kp_conf[i]) > _KP_CONF_THRESH
    ]
    if visible_shoulder:
        cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
        cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
        if len(visible_shoulder) == 2:
            sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
        else:
            sh_width = person_w * 0.5
        r = max(sh_width * 0.3, person_w * 0.12)
        cy = cy_sh - r * 1.3
        x1 = int(cx - r)
        y1 = int(cy - r)
        x2 = int(cx + r)
        y2 = int(cy + r)
        return x1, y1, x2 - x1, y2 - y1
    # --- Step 3: person bbox top ---
    r = max(person_w * 0.15, 20.0)
    cx = (person_x1 + person_x2) / 2.0
    x1 = int(cx - r)
    y1 = int(person_y1)
    x2 = int(cx + r)
    y2 = int(person_y1 + r * 2.0)
    return x1, y1, x2 - x1, y2 - y1
 class YOLOPoseHeadDetector:
    """
    Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
    Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears).
    yolov8l-pose.pt is auto-downloaded by Ultralytics on first use.
    """
    DEFAULT_MODEL = os.path.join("models", "yolov8l-pose.pt")
    def __init__(
        self,
        model_path: Optional[str] = None,
        conf_threshold: float = 0.25,
        iou_threshold: float = 0.45,
        input_size: Tuple[int, int] = (640, 640),
    ):
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold
        self.input_size = input_size
        self._model = None
        self._model_path = model_path
        self._device = None
    @property
    def model(self):
        """Lazy-load YOLO pose model."""
        if self._model is None:
            from ultralytics import YOLO
            import torch
            model_path = self._model_path if self._model_path is not None else self.DEFAULT_MODEL
            if torch.cuda.is_available():
                self._device = 'cuda'
                device_name = torch.cuda.get_device_name(0)
                print(f"[FaceMask] Using ROCm GPU for pose inference: {device_name}")
            else:
                self._device = 'cpu'
                print("[FaceMask] Using CPU for pose inference (ROCm GPU not available)")
            try:
                self._model = YOLO(model_path)
                print(f"[FaceMask] Pose model loaded: {model_path}")
                print(f"[FaceMask] Device: {self._device}")
            except Exception as e:
                print(f"[FaceMask] Error loading pose model: {e}")
                import traceback
                traceback.print_exc()
                raise
        return self._model
    def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
        """Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
        detections = []
        if result.boxes is None or result.keypoints is None:
            return detections
        boxes = result.boxes
        keypoints = result.keypoints
        for i, box in enumerate(boxes):
            conf = float(box.conf[0].cpu().numpy())
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            kp_data = keypoints.data[i].cpu().numpy()  # shape (17, 3): x, y, conf
            kp_xy = kp_data[:, :2]
            kp_conf = kp_data[:, 2]
            hx, hy, hw, hh = _head_bbox_from_pose(
                kp_xy, kp_conf,
                float(x1), float(y1), float(x2), float(y2),
            )
            detections.append((hx, hy, hw, hh, conf))
        return detections
    def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
        """Detect heads in multiple frames at once (batch processing)."""
        if not frames:
            return []
        try:
            results = self.model.predict(
                frames,
                conf=self.conf_threshold,
                iou=self.iou_threshold,
                imgsz=self.input_size[0],
                verbose=False,
                device=self._device,
            )
        except Exception as e:
            print(f"[FaceMask] ERROR during pose batch inference: {e}")
            import traceback
            traceback.print_exc()
            print("[FaceMask] Falling back to CPU inference...")
            self._device = 'cpu'
            results = self.model.predict(
                frames,
                conf=self.conf_threshold,
                iou=self.iou_threshold,
                imgsz=self.input_size[0],
                verbose=False,
                device='cpu',
            )
        return [self._results_to_detections(r) for r in results]
 # Pose detector singleton
 _pose_detector: Optional[YOLOPoseHeadDetector] = None
 def get_pose_detector(**kwargs) -> YOLOPoseHeadDetector:
    """Get or create the global YOLO pose head detector instance."""
    global _pose_detector
    if _pose_detector is None:
        _pose_detector = YOLOPoseHeadDetector(**kwargs)
    return _pose_detector
--- a/server/main.py
+++ b/server/main.py
@ -49,7 +49,7 @@ import msgpack  # noqa: E402
 # Add project root to path for imports if needed
 sys.path.append(str(Path(__file__).parent.parent))
-from server.detector import get_detector  # noqa: E402
+from server.detector import get_detector, get_pose_detector  # noqa: E402
 app = FastAPI(title="Face Mask Inference Server")
@ -142,6 +142,12 @@ class GenerateImagesRequest(BaseModel):
    iou_threshold: float = 0.45
 class AugmentPoseRequest(BaseModel):
    detections_path: str
    conf_threshold: float = 0.5
    iou_threshold: float = 0.45
 class BakeImagesRequest(BaseModel):
    image_dir: str
    filenames: List[str]
@ -582,6 +588,129 @@ def process_bake_images_task(task_id: str, req: BakeImagesRequest):
            del cancel_events[task_id]
 def augment_pose_task(task_id: str, req: AugmentPoseRequest):
    """Background task: run pose estimation and merge results into existing cache."""
    cap = None
    try:
        tasks[task_id].status = TaskStatus.PROCESSING
        cancel_event = cancel_events.get(task_id)
        if not os.path.exists(req.detections_path):
            tasks[task_id].status = TaskStatus.FAILED
            tasks[task_id].message = f"Detections file not found: {req.detections_path}"
            return
        with open(req.detections_path, "rb") as f:
            payload = msgpack.unpackb(f.read(), raw=False)
        existing_frames: List[List[List[float]]] = payload.get("frames", [])
        video_path = payload.get("video_path")
        start_frame = int(payload.get("start_frame", 0))
        total = len(existing_frames)
        if not video_path:
            tasks[task_id].status = TaskStatus.FAILED
            tasks[task_id].message = "Cache does not contain video_path (image caches not supported)"
            return
        if not os.path.exists(video_path):
            tasks[task_id].status = TaskStatus.FAILED
            tasks[task_id].message = f"Video not found: {video_path}"
            return
        if total == 0:
            tasks[task_id].status = TaskStatus.FAILED
            tasks[task_id].message = "Cache has no frames"
            return
        tasks[task_id].total = total
        detector = get_pose_detector(
            conf_threshold=req.conf_threshold,
            iou_threshold=req.iou_threshold,
        )
        _ = detector.model
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            tasks[task_id].status = TaskStatus.FAILED
            tasks[task_id].message = "Failed to open video"
            return
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        if start_frame > 0:
            seek_ok = cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
            if not seek_ok:
                for _ in range(start_frame):
                    ret, _ = cap.read()
                    if not ret:
                        tasks[task_id].status = TaskStatus.FAILED
                        tasks[task_id].message = f"Failed to seek to start frame: {start_frame}"
                        return
        frame_buffer: List[np.ndarray] = []
        buffer_indices: List[int] = []  # existing_frames インデックス対応
        current_count = 0
        batch_size = 5
        def process_pose_batch():
            nonlocal current_count
            if not frame_buffer:
                return
            batch_detections = detector.detect_batch(frame_buffer)
            for idx, detections in zip(buffer_indices, batch_detections):
                for x, y, w, h, conf in detections:
                    bx, by, bw, bh = int(x), int(y), int(w), int(h)
                    bx = max(0, bx)
                    by = max(0, by)
                    bw = min(width - bx, bw)
                    bh = min(height - by, bh)
                    if bw > 0 and bh > 0:
                        existing_frames[idx].append([bx, by, bw, bh, float(conf)])
                current_count += 1
                tasks[task_id].progress = current_count
            frame_buffer.clear()
            buffer_indices.clear()
        for i in range(total):
            if cancel_event and cancel_event.is_set():
                tasks[task_id].status = TaskStatus.CANCELLED
                tasks[task_id].message = "Cancelled by user"
                break
            ret, frame = cap.read()
            if not ret:
                break
            frame_buffer.append(frame)
            buffer_indices.append(i)
            if len(frame_buffer) >= batch_size:
                process_pose_batch()
        if frame_buffer:
            process_pose_batch()
        if tasks[task_id].status == TaskStatus.PROCESSING:
            payload["frames"] = existing_frames
            with open(req.detections_path, "wb") as f:
                f.write(msgpack.packb(payload, use_bin_type=True))
            tasks[task_id].status = TaskStatus.COMPLETED
            tasks[task_id].result_path = req.detections_path
            tasks[task_id].message = "Pose augmentation completed"
            print(f"[FaceMask] Pose augmentation completed: {req.detections_path}")
    except Exception as e:
        tasks[task_id].status = TaskStatus.FAILED
        tasks[task_id].message = str(e)
        traceback.print_exc()
    finally:
        if cap:
            cap.release()
        if task_id in cancel_events:
            del cancel_events[task_id]
 def process_video_task(task_id: str, req: GenerateRequest):
    """Background task to detect faces and save bbox cache as msgpack."""
    cap = None
@ -1261,6 +1390,16 @@ def generate_images_endpoint(req: GenerateImagesRequest, background_tasks: Backg
    return task
@app.post("/augment_pose", response_model=Task)
 def augment_pose_endpoint(req: AugmentPoseRequest, background_tasks: BackgroundTasks):
    task_id = str(uuid.uuid4())
    task = Task(id=task_id, status=TaskStatus.PENDING)
    tasks[task_id] = task
    cancel_events[task_id] = threading.Event()
    background_tasks.add_task(augment_pose_task, task_id, req)
    return task
@app.post("/bake_image_blur", response_model=Task)
 def bake_image_blur_endpoint(req: BakeImagesRequest, background_tasks: BackgroundTasks):
    task_id = str(uuid.uuid4())