ROI修正/ハードウェアエンコード

2026-02-16 18:20:31 +09:00 · 2026-02-16 18:20:31 +09:00 · 914667edbf
commit 914667edbf
parent 0d63b2ef6d
2 changed files with 140 additions and 30 deletions
--- a/flake.nix
+++ b/flake.nix
@ -73,9 +73,11 @@
              pip install --quiet -r "$PWD/requirements.txt"
            fi
-            # opencv-pythonが入っていた場合はheadlessに統一
+            # OpenCVは壊れやすいので、import失敗時のみheadlessを強制再導入
-            pip uninstall -y opencv-python opencv 2>/dev/null || true
+            if ! python -c "import cv2" >/dev/null 2>&1; then
-            pip install --quiet --upgrade opencv-python-headless
+              echo "[Setup] Repairing OpenCV (opencv-python-headless)..."
              pip install --quiet --force-reinstall --no-cache-dir opencv-python-headless
            fi
            # Pythonパスにカレントディレクトリを追加
            export PYTHONPATH="$PWD:$PYTHONPATH"
--- a/server/main.py
+++ b/server/main.py
@ -34,6 +34,7 @@ fix_library_path()
 import threading
 import uuid
 import traceback
 import subprocess
 from typing import Dict, Optional, List
 from pathlib import Path
@ -92,15 +93,100 @@ class BakeRequest(BaseModel):
    format: str = "mp4"
 class _FFmpegPipeWriter:
    """Write BGR frames to ffmpeg stdin."""
    def __init__(self, cmd: List[str]):
        self._proc = subprocess.Popen(
            cmd,
            stdin=subprocess.PIPE,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.PIPE,
        )
        if self._proc.stdin is None:
            self._proc.kill()
            raise RuntimeError("Failed to open ffmpeg stdin")
    def write(self, frame: np.ndarray) -> None:
        if self._proc.stdin is None:
            raise RuntimeError("ffmpeg stdin is not available")
        self._proc.stdin.write(frame.tobytes())
    def release(self) -> None:
        if self._proc.stdin:
            try:
                self._proc.stdin.close()
            except Exception:
                pass
        stderr_output = ""
        if self._proc.stderr:
            try:
                stderr_output = self._proc.stderr.read().decode("utf-8", errors="replace")
            except Exception:
                stderr_output = ""
        rc = self._proc.wait()
        if rc != 0:
            tail = "\n".join(stderr_output.strip().splitlines()[-8:])
            raise RuntimeError(f"ffmpeg writer failed (code={rc}): {tail}")
 def _build_ffmpeg_vaapi_writer(
    output_path: str,
    fps: float,
    width: int,
    height: int,
 ) -> _FFmpegPipeWriter:
    """Create ffmpeg h264_vaapi writer with QP=24."""
    cmd = [
        "ffmpeg",
        "-hide_banner",
        "-loglevel",
        "error",
        "-y",
        "-vaapi_device",
        "/dev/dri/renderD128",
        "-f",
        "rawvideo",
        "-pix_fmt",
        "bgr24",
        "-s",
        f"{width}x{height}",
        "-r",
        f"{fps}",
        "-i",
        "-",
        "-an",
        "-vf",
        "format=nv12,hwupload",
        "-c:v",
        "h264_vaapi",
        "-qp",
        "24",
        output_path,
    ]
    return _FFmpegPipeWriter(cmd)
 def _build_video_writer(
    output_path: str,
    fmt: str,
    fps: float,
    width: int,
    height: int,
-) -> cv2.VideoWriter:
+) -> object:
-    """Create VideoWriter with codec fallback per format."""
+    """Create writer with VAAPI preference and OpenCV fallback."""
    format_key = fmt.lower()
    if format_key in {"mp4", "mov"}:
        try:
            writer = _build_ffmpeg_vaapi_writer(output_path, fps, width, height)
            print("[FaceMask] Using output encoder: ffmpeg h264_vaapi (-qp 24)")
            return writer
        except Exception as e:
            print(f"[FaceMask] VAAPI writer unavailable, fallback to OpenCV: {e}")
    codec_candidates = {
        "mp4": ["avc1", "mp4v"],
        "mov": ["avc1", "mp4v"],
@ -372,52 +458,68 @@ def process_bake_task(task_id: str, req: BakeRequest):
                tasks[task_id].progress = idx + 1
                continue
-            mask_gray = np.zeros((src_height, src_width), dtype=np.uint8)
+            # Step 1: Calculate ROI bounds from all boxes first
            min_x, min_y = src_width, src_height
            max_x, max_y = 0, 0
            valid_boxes = []
            for box in frame_boxes:
                if not isinstance(box, list) or len(box) < 4:
                    continue
                x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
                if w <= 0 or h <= 0:
                    continue
-                center = (x + w // 2, y + h // 2)
+                valid_boxes.append((x, y, w, h))
-                axes = (max(1, w // 2), max(1, h // 2))
+                min_x = min(min_x, x)
-                cv2.ellipse(mask_gray, center, axes, 0, 0, 360, 255, -1)
+                min_y = min(min_y, y)
                max_x = max(max_x, x + w)
                max_y = max(max_y, y + h)
-            if cv2.countNonZero(mask_gray) == 0:
+            if not valid_boxes:
                writer.write(src_frame)
                tasks[task_id].progress = idx + 1
                continue
-            mask_gray = cv2.GaussianBlur(mask_gray, (feather_kernel, feather_kernel), 0)
+            # Step 2: Expand ROI bounds with blur margin
            _, mask_binary = cv2.threshold(mask_gray, 2, 255, cv2.THRESH_BINARY)
            non_zero_coords = cv2.findNonZero(mask_binary)
            if non_zero_coords is None:
                writer.write(src_frame)
                tasks[task_id].progress = idx + 1
                continue
            x, y, w, h = cv2.boundingRect(non_zero_coords)
            blur_margin = max(1, (blur_size // 2) + feather_radius)
-            x1 = max(0, x - blur_margin)
+            roi_x1 = max(0, min_x - blur_margin)
-            y1 = max(0, y - blur_margin)
+            roi_y1 = max(0, min_y - blur_margin)
-            x2 = min(src_width, x + w + blur_margin)
+            roi_x2 = min(src_width, max_x + blur_margin)
-            y2 = min(src_height, y + h + blur_margin)
+            roi_y2 = min(src_height, max_y + blur_margin)
            roi_width = roi_x2 - roi_x1
            roi_height = roi_y2 - roi_y1
-            roi_src = src_frame[y1:y2, x1:x2]
+            if roi_width <= 0 or roi_height <= 0:
            roi_mask = mask_gray[y1:y2, x1:x2]
            if roi_src.size == 0:
                writer.write(src_frame)
                tasks[task_id].progress = idx + 1
                continue
            # Step 3: Create ROI-sized mask only
            roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
            for x, y, w, h in valid_boxes:
                # Convert to ROI coordinate system
                center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
                axes = (max(1, w // 2), max(1, h // 2))
                cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
            # Step 4: Apply feathering to ROI mask only
            roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
            # Step 5: Extract ROI from source frame
            roi_src = src_frame[roi_y1:roi_y2, roi_x1:roi_x2]
            # Step 6: Apply blur to ROI
            roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
            # Step 7: Alpha blend
            roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
            roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
                roi_blurred.astype(np.float32) * roi_alpha
            )
-            output_frame = src_frame.copy()
+
-            output_frame[y1:y2, x1:x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
+            # Step 8: Write directly to source frame (no copy needed)
-            writer.write(output_frame)
+            src_frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
            writer.write(src_frame)
            tasks[task_id].progress = idx + 1
        if tasks[task_id].status == TaskStatus.PROCESSING:
@ -435,7 +537,13 @@ def process_bake_task(task_id: str, req: BakeRequest):
        if src_cap:
            src_cap.release()
        if writer:
-            writer.release()
+            try:
                writer.release()
            except Exception as e:
                if tasks[task_id].status not in (TaskStatus.FAILED, TaskStatus.CANCELLED):
                    tasks[task_id].status = TaskStatus.FAILED
                    tasks[task_id].message = f"Writer finalization failed: {e}"
                print(f"[FaceMask] Writer release error for task {task_id}: {e}")
        if task_id in cancel_events:
            del cancel_events[task_id]