ROI修正/ハードウェアエンコード

2026-02-16 18:20:31 +09:00 · 2026-02-16 18:20:31 +09:00 · 914667edbf
commit 914667edbf
parent 0d63b2ef6d
2 changed files with 140 additions and 30 deletions
--- a/flake.nix
+++ b/flake.nix
@ -73,9 +73,11 @@
              pip install --quiet -r "$PWD/requirements.txt"
            fi

-            # opencv-pythonが入っていた場合はheadlessに統一
-            pip uninstall -y opencv-python opencv 2>/dev/null || true
-            pip install --quiet --upgrade opencv-python-headless
+            # OpenCVは壊れやすいので、import失敗時のみheadlessを強制再導入
+            if ! python -c "import cv2" >/dev/null 2>&1; then
+              echo "[Setup] Repairing OpenCV (opencv-python-headless)..."
+              pip install --quiet --force-reinstall --no-cache-dir opencv-python-headless
+            fi

            # Pythonパスにカレントディレクトリを追加
            export PYTHONPATH="$PWD:$PYTHONPATH"
--- a/server/main.py
+++ b/server/main.py
@ -34,6 +34,7 @@ fix_library_path()
 import threading
 import uuid
 import traceback
+import subprocess
 from typing import Dict, Optional, List
 from pathlib import Path

@ -92,15 +93,100 @@ class BakeRequest(BaseModel):
    format: str = "mp4"


+class _FFmpegPipeWriter:
+    """Write BGR frames to ffmpeg stdin."""
+
+    def __init__(self, cmd: List[str]):
+        self._proc = subprocess.Popen(
+            cmd,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.PIPE,
+        )
+        if self._proc.stdin is None:
+            self._proc.kill()
+            raise RuntimeError("Failed to open ffmpeg stdin")
+
+    def write(self, frame: np.ndarray) -> None:
+        if self._proc.stdin is None:
+            raise RuntimeError("ffmpeg stdin is not available")
+        self._proc.stdin.write(frame.tobytes())
+
+    def release(self) -> None:
+        if self._proc.stdin:
+            try:
+                self._proc.stdin.close()
+            except Exception:
+                pass
+
+        stderr_output = ""
+        if self._proc.stderr:
+            try:
+                stderr_output = self._proc.stderr.read().decode("utf-8", errors="replace")
+            except Exception:
+                stderr_output = ""
+
+        rc = self._proc.wait()
+        if rc != 0:
+            tail = "\n".join(stderr_output.strip().splitlines()[-8:])
+            raise RuntimeError(f"ffmpeg writer failed (code={rc}): {tail}")
+
+
+def _build_ffmpeg_vaapi_writer(
+    output_path: str,
+    fps: float,
+    width: int,
+    height: int,
+) -> _FFmpegPipeWriter:
+    """Create ffmpeg h264_vaapi writer with QP=24."""
+    cmd = [
+        "ffmpeg",
+        "-hide_banner",
+        "-loglevel",
+        "error",
+        "-y",
+        "-vaapi_device",
+        "/dev/dri/renderD128",
+        "-f",
+        "rawvideo",
+        "-pix_fmt",
+        "bgr24",
+        "-s",
+        f"{width}x{height}",
+        "-r",
+        f"{fps}",
+        "-i",
+        "-",
+        "-an",
+        "-vf",
+        "format=nv12,hwupload",
+        "-c:v",
+        "h264_vaapi",
+        "-qp",
+        "24",
+        output_path,
+    ]
+    return _FFmpegPipeWriter(cmd)
+
+
 def _build_video_writer(
    output_path: str,
    fmt: str,
    fps: float,
    width: int,
    height: int,
-) -> cv2.VideoWriter:
-    """Create VideoWriter with codec fallback per format."""
+) -> object:
+    """Create writer with VAAPI preference and OpenCV fallback."""
    format_key = fmt.lower()
+
+    if format_key in {"mp4", "mov"}:
+        try:
+            writer = _build_ffmpeg_vaapi_writer(output_path, fps, width, height)
+            print("[FaceMask] Using output encoder: ffmpeg h264_vaapi (-qp 24)")
+            return writer
+        except Exception as e:
+            print(f"[FaceMask] VAAPI writer unavailable, fallback to OpenCV: {e}")
+
    codec_candidates = {
        "mp4": ["avc1", "mp4v"],
        "mov": ["avc1", "mp4v"],
@ -372,52 +458,68 @@ def process_bake_task(task_id: str, req: BakeRequest):
                tasks[task_id].progress = idx + 1
                continue

-            mask_gray = np.zeros((src_height, src_width), dtype=np.uint8)
+            # Step 1: Calculate ROI bounds from all boxes first
+            min_x, min_y = src_width, src_height
+            max_x, max_y = 0, 0
+            valid_boxes = []
+
            for box in frame_boxes:
                if not isinstance(box, list) or len(box) < 4:
                    continue
                x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
                if w <= 0 or h <= 0:
                    continue
-                center = (x + w // 2, y + h // 2)
-                axes = (max(1, w // 2), max(1, h // 2))
-                cv2.ellipse(mask_gray, center, axes, 0, 0, 360, 255, -1)
+                valid_boxes.append((x, y, w, h))
+                min_x = min(min_x, x)
+                min_y = min(min_y, y)
+                max_x = max(max_x, x + w)
+                max_y = max(max_y, y + h)

-            if cv2.countNonZero(mask_gray) == 0:
+            if not valid_boxes:
                writer.write(src_frame)
                tasks[task_id].progress = idx + 1
                continue

-            mask_gray = cv2.GaussianBlur(mask_gray, (feather_kernel, feather_kernel), 0)
-            _, mask_binary = cv2.threshold(mask_gray, 2, 255, cv2.THRESH_BINARY)
-            non_zero_coords = cv2.findNonZero(mask_binary)
-            if non_zero_coords is None:
-                writer.write(src_frame)
-                tasks[task_id].progress = idx + 1
-                continue
-
-            x, y, w, h = cv2.boundingRect(non_zero_coords)
+            # Step 2: Expand ROI bounds with blur margin
            blur_margin = max(1, (blur_size // 2) + feather_radius)
-            x1 = max(0, x - blur_margin)
-            y1 = max(0, y - blur_margin)
-            x2 = min(src_width, x + w + blur_margin)
-            y2 = min(src_height, y + h + blur_margin)
+            roi_x1 = max(0, min_x - blur_margin)
+            roi_y1 = max(0, min_y - blur_margin)
+            roi_x2 = min(src_width, max_x + blur_margin)
+            roi_y2 = min(src_height, max_y + blur_margin)
+            roi_width = roi_x2 - roi_x1
+            roi_height = roi_y2 - roi_y1

-            roi_src = src_frame[y1:y2, x1:x2]
-            roi_mask = mask_gray[y1:y2, x1:x2]
-            if roi_src.size == 0:
+            if roi_width <= 0 or roi_height <= 0:
                writer.write(src_frame)
                tasks[task_id].progress = idx + 1
                continue

+            # Step 3: Create ROI-sized mask only
+            roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
+            for x, y, w, h in valid_boxes:
+                # Convert to ROI coordinate system
+                center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
+                axes = (max(1, w // 2), max(1, h // 2))
+                cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
+
+            # Step 4: Apply feathering to ROI mask only
+            roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
+
+            # Step 5: Extract ROI from source frame
+            roi_src = src_frame[roi_y1:roi_y2, roi_x1:roi_x2]
+
+            # Step 6: Apply blur to ROI
            roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
+
+            # Step 7: Alpha blend
            roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
            roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
                roi_blurred.astype(np.float32) * roi_alpha
            )
-            output_frame = src_frame.copy()
-            output_frame[y1:y2, x1:x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
-            writer.write(output_frame)
+
+            # Step 8: Write directly to source frame (no copy needed)
+            src_frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
+            writer.write(src_frame)
            tasks[task_id].progress = idx + 1

        if tasks[task_id].status == TaskStatus.PROCESSING:
@ -435,7 +537,13 @@ def process_bake_task(task_id: str, req: BakeRequest):
        if src_cap:
            src_cap.release()
        if writer:
-            writer.release()
+            try:
+                writer.release()
+            except Exception as e:
+                if tasks[task_id].status not in (TaskStatus.FAILED, TaskStatus.CANCELLED):
+                    tasks[task_id].status = TaskStatus.FAILED
+                    tasks[task_id].message = f"Writer finalization failed: {e}"
+                print(f"[FaceMask] Writer release error for task {task_id}: {e}")
        if task_id in cancel_events:
            del cancel_events[task_id]