YOLO

2026-02-06 10:13:26 +09:00 · 2026-02-06 10:13:26 +09:00 · c0ad2a551d
commit c0ad2a551d
parent 3c28cb0c94
13 changed files with 893 additions and 250 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1 @@
 use flake
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,7 @@
 .mask_cache/
 *.mp4
 test.blend
-wheels/
+models/
 # Python
 __pycache__/
--- a/README.md
+++ b/README.md
@ -0,0 +1,5 @@
 # Blender Plugin: Mask Peoples
 街歩き映像に対して自動モザイクを掛けるために開発しました。
 使用：https://github.com/akanametov/yolo-face
--- a/blender_manifest.toml
+++ b/blender_manifest.toml
@ -1,9 +1,9 @@
 schema_version = "1.0.0"
 id = "mask_peoples"
-version = "0.2.0"
+version = "0.3.0"
 name = "Face Mask Blur"
-tagline = "Detect faces and apply blur in VSE for privacy protection"
+tagline = "GPU-accelerated face detection and blur in VSE using YOLOv11"
 maintainer = "Hare"
 type = "add-on"
 license = ["SPDX:GPL-3.0-or-later"]
@ -15,11 +15,5 @@ copyright = ["2026 Hare"]
 # Valid tags from Blender extension platform
 tags = ["Sequencer"]
 # Bundled Python wheels - Blender will install these automatically
 wheels = [
    "./wheels/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
    "./wheels/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl",
 ]
 [permissions]
 files = "Read video frames and write mask image cache"
--- a/core/init.py
+++ b/core/init.py
@ -1,5 +1,4 @@
 """Core module exports."""
 from .face_detector import FaceDetector
 from .async_generator import AsyncMaskGenerator, get_generator
 from .compositor_setup import create_mask_blur_node_tree, get_or_create_blur_node_tree
--- a/core/async_generator.py
+++ b/core/async_generator.py
@ -43,14 +43,14 @@ class AsyncMaskGenerator:
        start_frame: int,
        end_frame: int,
        fps: float,
-        scale_factor: float = 1.1,
+        conf_threshold: float = 0.5,
-        min_neighbors: int = 5,
+        iou_threshold: float = 0.45,
        mask_scale: float = 1.5,
        on_complete: Optional[Callable] = None,
        on_progress: Optional[Callable] = None,
    ):
        """
-        Start asynchronous mask generation.
+        Start asynchronous mask generation with YOLO GPU acceleration.
        Args:
            video_path: Path to source video file
@ -58,8 +58,8 @@ class AsyncMaskGenerator:
            start_frame: First frame to process
            end_frame: Last frame to process
            fps: Video frame rate (for seeking)
-            scale_factor: Face detection scale factor
+            conf_threshold: YOLO confidence threshold
-            min_neighbors: Face detection min neighbors
+            iou_threshold: YOLO NMS IoU threshold
            mask_scale: Mask region scale factor
            on_complete: Callback when processing completes (called from main thread)
            on_progress: Callback for progress updates (called from main thread)
@ -93,8 +93,8 @@ class AsyncMaskGenerator:
                start_frame,
                end_frame,
                fps,
-                scale_factor,
+                conf_threshold,
-                min_neighbors,
+                iou_threshold,
                mask_scale,
            ),
            daemon=True,
@ -120,77 +120,62 @@ class AsyncMaskGenerator:
        start_frame: int,
        end_frame: int,
        fps: float,
-        scale_factor: float,
+        conf_threshold: float,
-        min_neighbors: int,
+        iou_threshold: float,
        mask_scale: float,
    ):
        """
-        Worker thread function. Runs face detection and saves masks.
+        Worker thread function. Delegates to inference server and polls status.
        IMPORTANT: Do NOT use bpy in this function!
        """
-        try:
+        import time
-            import cv2
+        from .inference_client import get_client
            print(f"[FaceMask] OpenCV loaded: {cv2.__version__}")
            from .face_detector import FaceDetector
        except ImportError as e:
            print(f"[FaceMask] Import error: {e}")
            self.result_queue.put(("error", str(e)))
            return
        try:
-            # Initialize detector
+            client = get_client()
-            detector = FaceDetector(
+            
-                scale_factor=scale_factor,
+            # Start task on server
-                min_neighbors=min_neighbors,
+            print(f"[FaceMask] Requesting generation on server...")
            task_id = client.generate_mask(
                video_path=video_path,
                output_dir=output_dir,
                start_frame=start_frame,
                end_frame=end_frame,
                conf_threshold=conf_threshold,
                iou_threshold=iou_threshold,
                mask_scale=mask_scale,
            )
            print(f"[FaceMask] Task started: {task_id}")
-            # Open video
+            # Poll loop
-            cap = cv2.VideoCapture(video_path)
+            while self.is_running:
-            if not cap.isOpened():
+                status = client.get_task_status(task_id)
-                print(f"[FaceMask] Failed to open video: {video_path}")
+                state = status.get("status")
                self.result_queue.put(("error", f"Failed to open video: {video_path}"))
                return
-            total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                if state == "completed":
-            print(f"[FaceMask] Video opened, total frames: {total_video_frames}")
+                    self.result_queue.put(("done", output_dir))
                    return
-            # Process frames
+                elif state == "failed":
-            for frame_idx in range(start_frame, end_frame + 1):
+                    error_msg = status.get("message", "Unknown server error")
-                if not self.is_running:
+                    print(f"[FaceMask] Server task failed: {error_msg}")
                    self.result_queue.put(("error", error_msg))
                    return
                elif state == "cancelled":
                    self.result_queue.put(("cancelled", None))
                    return
                # Seek to frame
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
                ret, frame = cap.read()
                if not ret:
                    # Skip unreadable frames
                    continue
                # Detect faces
                detections = detector.detect(frame)
                # Generate mask
                mask = detector.generate_mask(
                    frame.shape,
                    detections,
                    mask_scale=mask_scale,
                )
                # Save mask
                mask_filename = f"mask_{frame_idx:06d}.png"
                mask_path = os.path.join(output_dir, mask_filename)
                cv2.imwrite(mask_path, mask)
                # Report progress
-                self.progress_queue.put(("progress", frame_idx - start_frame + 1))
+                progress = status.get("progress", 0)
                if progress > 0:
                    self.progress_queue.put(("progress", progress))
-            cap.release()
+                time.sleep(0.5)
-            # Report completion
+            # If loop exited but task not done, cancel server task
-            self.result_queue.put(("done", output_dir))
+            print("[FaceMask] Cancelling server task...")
            client.cancel_task(task_id)
            self.result_queue.put(("cancelled", None))
        except Exception as e:
            import traceback
--- a/core/face_detector.py
+++ b/core/face_detector.py
@ -1,160 +0,0 @@
 """
 Face detector using OpenCV Haar Cascades.
 This module provides face detection functionality optimized for
 privacy blur in video editing workflows.
 """
 import os
 from typing import List, Tuple, Optional
 import numpy as np
 class FaceDetector:
    """
    Face detector using OpenCV Haar Cascades.
    Optimized for privacy blur use case:
    - Detects frontal faces
    - Configurable detection sensitivity
    - Generates feathered masks for smooth blur edges
    """
    def __init__(
        self,
        scale_factor: float = 1.1,
        min_neighbors: int = 5,
        min_size: Tuple[int, int] = (30, 30),
    ):
        """
        Initialize the face detector.
        Args:
            scale_factor: Image pyramid scale factor
            min_neighbors: Minimum neighbors for detection
            min_size: Minimum face size in pixels
        """
        self.scale_factor = scale_factor
        self.min_neighbors = min_neighbors
        self.min_size = min_size
        self._classifier = None
    @property
    def classifier(self):
        """Lazy-load the Haar cascade classifier."""
        if self._classifier is None:
            import cv2
            # Use haarcascade for frontal face detection
            cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            if not os.path.exists(cascade_path):
                raise RuntimeError(f"Haar cascade not found: {cascade_path}")
            self._classifier = cv2.CascadeClassifier(cascade_path)
        return self._classifier
    def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int]]:
        """
        Detect faces in a frame.
        Args:
            frame: BGR image as numpy array
        Returns:
            List of face bounding boxes as (x, y, width, height)
        """
        import cv2
        # Convert to grayscale for detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        # Detect faces
        faces = self.classifier.detectMultiScale(
            gray,
            scaleFactor=self.scale_factor,
            minNeighbors=self.min_neighbors,
            minSize=self.min_size,
            flags=cv2.CASCADE_SCALE_IMAGE,
        )
        # Convert to list of tuples
        return [tuple(face) for face in faces]
    def generate_mask(
        self,
        frame_shape: Tuple[int, int, int],
        detections: List[Tuple[int, int, int, int]],
        mask_scale: float = 1.5,
        feather_radius: int = 20,
    ) -> np.ndarray:
        """
        Generate a mask image from face detections.
        Args:
            frame_shape: Shape of the original frame (height, width, channels)
            detections: List of face bounding boxes
            mask_scale: Scale factor for mask region (1.0 = exact bounding box)
            feather_radius: Radius for edge feathering
        Returns:
            Grayscale mask image (white = blur, black = keep)
        """
        import cv2
        height, width = frame_shape[:2]
        mask = np.zeros((height, width), dtype=np.uint8)
        for (x, y, w, h) in detections:
            # Scale the bounding box
            center_x = x + w // 2
            center_y = y + h // 2
            scaled_w = int(w * mask_scale)
            scaled_h = int(h * mask_scale)
            # Calculate scaled bounding box
            x1 = max(0, center_x - scaled_w // 2)
            y1 = max(0, center_y - scaled_h // 2)
            x2 = min(width, center_x + scaled_w // 2)
            y2 = min(height, center_y + scaled_h // 2)
            # Draw ellipse for more natural face shape
            cv2.ellipse(
                mask,
                (center_x, center_y),
                (scaled_w // 2, scaled_h // 2),
                0,  # angle
                0, 360,  # arc
                255,  # color (white)
                -1,  # filled
            )
        # Apply Gaussian blur for feathering
        if feather_radius > 0 and len(detections) > 0:
            # Ensure kernel size is odd
            kernel_size = feather_radius * 2 + 1
            mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
        return mask
 def detect_faces_batch(
    frames: List[np.ndarray],
    detector: Optional[FaceDetector] = None,
 ) -> List[List[Tuple[int, int, int, int]]]:
    """
    Detect faces in multiple frames.
    Args:
        frames: List of BGR images
        detector: Optional detector instance (creates one if not provided)
    Returns:
        List of detection lists, one per frame
    """
    if detector is None:
        detector = FaceDetector()
    return [detector.detect(frame) for frame in frames]
--- a/core/inference_client.py
+++ b/core/inference_client.py
@ -0,0 +1,159 @@
 """
 Client for interacting with the external inference server.
 Manages the server process and handles HTTP communication
 using standard library (avoiding requests dependency).
 """
 import subprocess
 import time
 import json
 import urllib.request
 import urllib.error
 import threading
 import os
 import signal
 from typing import Optional, Dict, Any, Tuple
 class InferenceClient:
    """Client for the YOLO inference server."""
    SERVER_URL = "http://127.0.0.1:8181"
    def __init__(self):
        self.server_process: Optional[subprocess.Popen] = None
        self._server_lock = threading.Lock()
    def start_server(self):
        """Start the inference server process."""
        with self._server_lock:
            if self.is_server_running():
                return
            print("[FaceMask] Starting inference server...")
            # Find project root
            # Assuming this file is in core/inference_client.py
            root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
            server_script = os.path.join(root_dir, "server", "main.py")
            # Use system python (assumed to have dependencies via Nix/venv)
            # In user's environment, 'python' should refer to the environment python
            python_cmd = "python"
            # Start process
            self.server_process = subprocess.Popen(
                [python_cmd, server_script],
                cwd=root_dir,
                text=True,
                preexec_fn=os.setsid,  # Create new process group
            )
            # Wait for startup
            for _ in range(20):  # Wait up to 10 seconds
                if self.is_server_running():
                    print("[FaceMask] Server started successfully")
                    return
                # Check if process died
                if self.server_process.poll() is not None:
                    raise RuntimeError(f"Server failed to start (rc={self.server_process.returncode})")
                time.sleep(0.5)
            raise RuntimeError("Server startup timed out")
    def stop_server(self):
        """Stop the inference server."""
        with self._server_lock:
            if self.server_process:
                print("[FaceMask] Stopping server...")
                try:
                    os.killpg(os.getpgid(self.server_process.pid), signal.SIGTERM)
                    self.server_process.wait(timeout=3)
                except (ProcessLookupError, subprocess.TimeoutExpired):
                    pass
                finally:
                    self.server_process = None
    def is_server_running(self) -> bool:
        """Check if server is responding."""
        try:
            with urllib.request.urlopen(f"{self.SERVER_URL}/status", timeout=1) as response:
                return response.status == 200
        except (urllib.error.URLError, ConnectionRefusedError, TimeoutError):
            return False
    def generate_mask(
        self,
        video_path: str,
        output_dir: str,
        start_frame: int,
        end_frame: int,
        conf_threshold: float,
        iou_threshold: float,
        mask_scale: float,
    ) -> str:
        """
        Request mask generation.
        Returns:
            task_id (str)
        """
        if not self.is_server_running():
            self.start_server()
        data = {
            "video_path": video_path,
            "output_dir": output_dir,
            "start_frame": start_frame,
            "end_frame": end_frame,
            "conf_threshold": conf_threshold,
            "iou_threshold": iou_threshold,
            "mask_scale": mask_scale,
        }
        req = urllib.request.Request(
            f"{self.SERVER_URL}/generate",
            data=json.dumps(data).encode('utf-8'),
            headers={'Content-Type': 'application/json'},
            method='POST'
        )
        try:
            with urllib.request.urlopen(req) as response:
                result = json.loads(response.read().decode('utf-8'))
                return result['id']
        except urllib.error.HTTPError as e:
            raise RuntimeError(f"Server error: {e.read().decode('utf-8')}")
    def get_task_status(self, task_id: str) -> Dict[str, Any]:
        """Get status of a task."""
        try:
            with urllib.request.urlopen(f"{self.SERVER_URL}/tasks/{task_id}") as response:
                return json.loads(response.read().decode('utf-8'))
        except urllib.error.HTTPError:
            return {"status": "unknown"}
    def cancel_task(self, task_id: str):
        """Cancel a task."""
        try:
            req = urllib.request.Request(
                f"{self.SERVER_URL}/tasks/{task_id}/cancel",
                method='POST'
            )
            with urllib.request.urlopen(req):
                pass
        except urllib.error.HTTPError:
            pass
 # Singleton
 _client: Optional[InferenceClient] = None
 def get_client() -> InferenceClient:
    global _client
    if _client is None:
        _client = InferenceClient()
    return _client
--- a/flake.lock
+++ b/flake.lock
@ -0,0 +1,61 @@
 {
  "nodes": {
    "flake-utils": {
      "inputs": {
        "systems": "systems"
      },
      "locked": {
        "lastModified": 1731533236,
        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1770115704,
        "narHash": "sha256-KHFT9UWOF2yRPlAnSXQJh6uVcgNcWlFqqiAZ7OVlHNc=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "e6eae2ee2110f3d31110d5c222cd395303343b08",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-unstable",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "flake-utils": "flake-utils",
        "nixpkgs": "nixpkgs"
      }
    },
    "systems": {
      "locked": {
        "lastModified": 1681028828,
        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
        "owner": "nix-systems",
        "repo": "default",
        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
        "type": "github"
      },
      "original": {
        "owner": "nix-systems",
        "repo": "default",
        "type": "github"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/flake.nix
+++ b/flake.nix
@ -0,0 +1,48 @@
 {
  description = "Blender VoiceVox Plugin Development Environment";
  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
    flake-utils.url = "github:numtide/flake-utils";
  };
  outputs =
    {
      self,
      nixpkgs,
      flake-utils,
    }:
    flake-utils.lib.eachDefaultSystem (
      system:
      let
        pkgs = nixpkgs.legacyPackages.${system};
      in
      {
        devShells.default = pkgs.mkShell {
          buildInputs = with pkgs; [
            python311
            python311Packages.pip
            python311Packages.requests
            python311Packages.fastapi
            python311Packages.uvicorn
            python311Packages.numpy
            python311Packages.opencv4
            python311Packages.onnxruntime
            git
          ];
          shellHook = ''
            python --version
            blender --version | head -n 1
            # Pythonパスにカレントディレクトリを追加
            export PYTHONPATH="$PWD:$PYTHONPATH"
            # アドオンのインストールパスを環境変数として設定
            export BLENDER_USER_SCRIPTS="$HOME/.config/blender/5.0/scripts"
            export BLENDER_USER_ADDONS="$BLENDER_USER_SCRIPTS/addons"
          '';
        };
      }
    );
 }
--- a/operators/generate_mask.py
+++ b/operators/generate_mask.py
@ -21,21 +21,21 @@ class SEQUENCER_OT_generate_face_mask(Operator):
    bl_description = "Detect faces and generate mask image sequence"
    bl_options = {'REGISTER', 'UNDO'}
-    # Detection parameters
+    # YOLO Detection parameters
-    scale_factor: FloatProperty(
+    conf_threshold: FloatProperty(
-        name="Scale Factor",
+        name="Confidence",
-        description="Detection scale factor (larger = faster but less accurate)",
+        description="YOLO confidence threshold (higher = fewer false positives)",
-        default=1.1,
+        default=0.25,
-        min=1.01,
+        min=0.1,
-        max=2.0,
+        max=1.0,
    )
-    min_neighbors: IntProperty(
+    iou_threshold: FloatProperty(
-        name="Min Neighbors",
+        name="IOU Threshold",
-        description="Minimum neighbors for detection (higher = fewer false positives)",
+        description="Non-maximum suppression IOU threshold",
-        default=5,
+        default=0.45,
-        min=1,
+        min=0.1,
-        max=20,
+        max=1.0,
    )
    mask_scale: FloatProperty(
@ -133,8 +133,8 @@ class SEQUENCER_OT_generate_face_mask(Operator):
            start_frame=0,  # Frame indices in video
            end_frame=end_frame - start_frame,
            fps=fps,
-            scale_factor=self.scale_factor,
+            conf_threshold=self.conf_threshold,
-            min_neighbors=self.min_neighbors,
+            iou_threshold=self.iou_threshold,
            mask_scale=self.mask_scale,
            on_complete=on_complete,
            on_progress=on_progress,
--- a/server/detector.py
+++ b/server/detector.py
@ -0,0 +1,371 @@
 """
 YOLOv11 Face Detector using ONNX Runtime with GPU support.
 This module provides high-performance face detection using
 YOLOv11-face model with CUDA acceleration.
 """
 import os
 from typing import List, Tuple, Optional
 from pathlib import Path
 import numpy as np
 class YOLOFaceDetector:
    """
    YOLOv11 face detector with ONNX Runtime GPU support.
    Features:
    - CUDA GPU acceleration
    - High accuracy face detection
    - NMS for overlapping detections
    """
    # Default model path relative to this file
    DEFAULT_MODEL = "yolov11n-face.onnx"
    def __init__(
        self,
        model_path: Optional[str] = None,
        conf_threshold: float = 0.25,
        iou_threshold: float = 0.45,
        input_size: Tuple[int, int] = (640, 640),
    ):
        """
        Initialize the YOLO face detector.
        Args:
            model_path: Path to ONNX model file. If None, uses default model.
            conf_threshold: Confidence threshold for detections
            iou_threshold: IoU threshold for NMS
            input_size: Model input size (width, height)
        """
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold
        self.input_size = input_size
        self._session = None
        self._model_path = model_path
    @property
    def session(self):
        """Lazy-load ONNX Runtime session."""
        if self._session is None:
            import onnxruntime as ort
            # Determine model path
            if self._model_path is None:
                # Assuming models are in ../models relative to server/detector.py
                models_dir = Path(__file__).parent.parent / "models"
                model_path = str(models_dir / self.DEFAULT_MODEL)
            else:
                model_path = self._model_path
            if not os.path.exists(model_path):
                raise FileNotFoundError(f"Model not found: {model_path}")
            # Configure providers (prefer CUDA)
            providers = []
            if 'CUDAExecutionProvider' in ort.get_available_providers():
                providers.append('CUDAExecutionProvider')
                print("[FaceMask] Using CUDA GPU for inference")
            providers.append('CPUExecutionProvider')
            # Create session
            sess_options = ort.SessionOptions()
            sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
            self._session = ort.InferenceSession(
                model_path,
                sess_options=sess_options,
                providers=providers,
            )
            print(f"[FaceMask] YOLO model loaded: {model_path}")
            print(f"[FaceMask] Providers: {self._session.get_providers()}")
        return self._session
    def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
        """
        Detect faces in a frame.
        Args:
            frame: BGR image as numpy array (H, W, C)
        Returns:
            List of detections as (x, y, width, height, confidence)
        """
        import cv2
        original_height, original_width = frame.shape[:2]
        input_tensor = self._preprocess(frame)
        # print(f"[DEBUG] Input tensor shape: {input_tensor.shape}, Range: [{input_tensor.min():.3f}, {input_tensor.max():.3f}]", flush=True)
        # Run inference
        input_name = self.session.get_inputs()[0].name
        outputs = self.session.run(None, {input_name: input_tensor})
        raw_output = outputs[0]
        # print(f"[DEBUG] Raw output shape: {raw_output.shape}, Range: [{raw_output.min():.3f}, {raw_output.max():.3f}]", flush=True)
        # Postprocess
        detections = self._postprocess(
            raw_output,
            original_width,
            original_height,
        )
        # print(f"[DEBUG] Detections found: {len(detections)}", flush=True)
        return detections
    def _preprocess(self, frame: np.ndarray) -> np.ndarray:
        """Preprocess frame for YOLO input with letterbox resizing."""
        import cv2
        # Letterbox resize
        shape = frame.shape[:2]  # current shape [height, width]
        new_shape = self.input_size
        # Scale ratio (new / old)
        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        # Compute padding
        ratio = r, r  # width, height ratios
        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
        dw /= 2  # divide padding into 2 sides
        dh /= 2
        if shape[::-1] != new_unpad:  # resize
            frame = cv2.resize(frame, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
        # Add border
        frame = cv2.copyMakeBorder(frame, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
        # Store metadata for postprocessing
        self._last_letterbox_meta = {'ratio': ratio, 'dwdh': (dw, dh)}
        # Convert BGR to RGB
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Normalize to [0, 1]
        normalized = rgb.astype(np.float32) / 255.0
        # Transpose to CHW format
        transposed = np.transpose(normalized, (2, 0, 1))
        # Add batch dimension
        batched = np.expand_dims(transposed, axis=0)
        return batched
    def _postprocess(
        self,
        output: np.ndarray,
        original_width: int,
        original_height: int,
    ) -> List[Tuple[int, int, int, int, float]]:
        """
        Postprocess YOLO output to get detections.
        """
        # Output shape: [1, num_detections, 5+] where 5 = x_center, y_center, w, h, conf
        # Handle different output formats
        if output.shape[1] < output.shape[2]:
            # Format: [1, 5+, num_detections] - transpose
            output = np.transpose(output[0], (1, 0))
        else:
            output = output[0]
        # Debug confidence stats
        # if output.shape[1] >= 5:
        #     max_conf = output[:, 4].max()
        #     print(f"[DEBUG] Max confidence in raw output: {max_conf:.4f}", flush=True)
        # Filter by confidence
        confidences = output[:, 4]
        mask = confidences > self.conf_threshold
        filtered = output[mask]
        if len(filtered) == 0:
            return []
        # Get letterbox metadata
        if hasattr(self, '_last_letterbox_meta') and self._last_letterbox_meta:
            ratio = self._last_letterbox_meta['ratio']
            dw, dh = self._last_letterbox_meta['dwdh']
            # Extract coordinates
            x_center = filtered[:, 0]
            y_center = filtered[:, 1]
            width = filtered[:, 2]
            height = filtered[:, 3]
            confidences = filtered[:, 4]
            # Convert center to corner
            x1 = x_center - width / 2
            y1 = y_center - height / 2
            x2 = x_center + width / 2
            y2 = y_center + height / 2
            # Adjust for letterbox padding
            x1 -= dw
            y1 -= dh
            x2 -= dw
            y2 -= dh
            # Adjust for resizing
            x1 /= ratio[0]
            y1 /= ratio[1]
            x2 /= ratio[0]
            y2 /= ratio[1]
            # Clip to image bounds
            x1 = np.clip(x1, 0, original_width)
            y1 = np.clip(y1, 0, original_height)
            x2 = np.clip(x2, 0, original_width)
            y2 = np.clip(y2, 0, original_height)
            # Convert back to x, y, w, h
            final_x = x1
            final_y = y1
            final_w = x2 - x1
            final_h = y2 - y1
        else:
            # Fallback for non-letterbox (legacy)
            scale_x = original_width / self.input_size[0]
            scale_y = original_height / self.input_size[1]
            x_center = filtered[:, 0] * scale_x
            y_center = filtered[:, 1] * scale_y
            width = filtered[:, 2] * scale_x
            height = filtered[:, 3] * scale_y
            confidences = filtered[:, 4]
            final_x = x_center - width / 2
            final_y = y_center - height / 2
            final_w = width
            final_h = height
        # Apply NMS
        boxes = np.stack([final_x, final_y, final_w, final_h], axis=1)
        indices = self._nms(boxes, confidences, self.iou_threshold)
        # Format output
        detections = []
        for i in indices:
            x = int(final_x[i])
            y = int(final_y[i])
            w = int(final_w[i])
            h = int(final_h[i])
            conf = float(confidences[i])
            detections.append((x, y, w, h, conf))
        return detections
    def _nms(
        self,
        boxes: np.ndarray,
        scores: np.ndarray,
        iou_threshold: float,
    ) -> List[int]:
        """Non-Maximum Suppression."""
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = x1 + boxes[:, 2]
        y2 = y1 + boxes[:, 3]
        areas = boxes[:, 2] * boxes[:, 3]
        order = scores.argsort()[::-1]
        keep = []
        while len(order) > 0:
            i = order[0]
            keep.append(i)
            if len(order) == 1:
                break
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])
            w = np.maximum(0, xx2 - xx1)
            h = np.maximum(0, yy2 - yy1)
            inter = w * h
            iou = inter / (areas[i] + areas[order[1:]] - inter)
            inds = np.where(iou <= iou_threshold)[0]
            order = order[inds + 1]
        return keep
    def generate_mask(
        self,
        frame_shape: Tuple[int, int, int],
        detections: List[Tuple[int, int, int, int, float]],
        mask_scale: float = 1.5,
        feather_radius: int = 20,
    ) -> np.ndarray:
        """
        Generate a mask image from face detections.
        Args:
            frame_shape: Shape of the original frame (height, width, channels)
            detections: List of face detections (x, y, w, h, conf)
            mask_scale: Scale factor for mask region
            feather_radius: Radius for edge feathering
        Returns:
            Grayscale mask image (white = blur, black = keep)
        """
        import cv2
        height, width = frame_shape[:2]
        mask = np.zeros((height, width), dtype=np.uint8)
        for (x, y, w, h, conf) in detections:
            # Scale the bounding box
            center_x = x + w // 2
            center_y = y + h // 2
            scaled_w = int(w * mask_scale)
            scaled_h = int(h * mask_scale)
            # Draw ellipse for natural face shape
            cv2.ellipse(
                mask,
                (center_x, center_y),
                (scaled_w // 2, scaled_h // 2),
                0,  # angle
                0, 360,  # arc
                255,  # color (white)
                -1,  # filled
            )
        # Apply Gaussian blur for feathering
        if feather_radius > 0 and len(detections) > 0:
            kernel_size = feather_radius * 2 + 1
            mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
        return mask
 # Singleton instance
 _detector: Optional[YOLOFaceDetector] = None
 def get_detector(**kwargs) -> YOLOFaceDetector:
    """Get or create the global YOLO detector instance."""
    global _detector
    if _detector is None:
        _detector = YOLOFaceDetector(**kwargs)
    return _detector
--- a/server/main.py
+++ b/server/main.py
@ -0,0 +1,180 @@
 """
 Face Detection Inference Server.
 This FastAPI application runs in a separate process to handle
 GPU-accelerated face detection using ONNX Runtime.
 """
 import os
 import sys
 import threading
 import uuid
 import queue
 import traceback
 from typing import Dict, Optional, List
 from pathlib import Path
 from fastapi import FastAPI, HTTPException, BackgroundTasks
 from pydantic import BaseModel
 import uvicorn
 import cv2
 import numpy as np
 # Add project root to path for imports if needed
 sys.path.append(str(Path(__file__).parent.parent))
 from server.detector import YOLOFaceDetector, get_detector
 app = FastAPI(title="Face Mask Inference Server")
 # Task storage
 class TaskStatus:
    PENDING = "pending"
    PROCESSING = "processing"
    COMPLETED = "completed"
    FAILED = "failed"
    CANCELLED = "cancelled"
 class Task(BaseModel):
    id: str
    status: str
    progress: int = 0
    total: int = 0
    message: Optional[str] = None
    result_path: Optional[str] = None
 # In-memory storage
 tasks: Dict[str, Task] = {}
 cancel_events: Dict[str, threading.Event] = {}
 class GenerateRequest(BaseModel):
    video_path: str
    output_dir: str
    start_frame: int
    end_frame: int
    conf_threshold: float = 0.5
    iou_threshold: float = 0.45
    mask_scale: float = 1.5
 def process_video_task(task_id: str, req: GenerateRequest):
    """Background task to process video."""
    try:
        tasks[task_id].status = TaskStatus.PROCESSING
        cancel_event = cancel_events.get(task_id)
        # Verify video exists
        if not os.path.exists(req.video_path):
            tasks[task_id].status = TaskStatus.FAILED
            tasks[task_id].message = f"Video not found: {req.video_path}"
            return
        # Initialize detector (will load model on first run)
        print(f"Loading detector for task {task_id}...")
        detector = get_detector(
            conf_threshold=req.conf_threshold,
            iou_threshold=req.iou_threshold
        )
        # Ensure session is loaded
        _ = detector.session
        # Open video
        cap = cv2.VideoCapture(req.video_path)
        if not cap.isOpened():
            tasks[task_id].status = TaskStatus.FAILED
            tasks[task_id].message = "Failed to open video"
            return
        # Determine frame range
        total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        end_frame = min(req.end_frame, total_video_frames - 1)
        frames_to_process = end_frame - req.start_frame + 1
        tasks[task_id].total = frames_to_process
        # Ensure output directory exists
        os.makedirs(req.output_dir, exist_ok=True)
        print(f"Starting processing: {req.video_path} ({frames_to_process} frames)")
        # Process loop
        current_count = 0
        for frame_idx in range(req.start_frame, end_frame + 1):
            if cancel_event and cancel_event.is_set():
                tasks[task_id].status = TaskStatus.CANCELLED
                tasks[task_id].message = "Cancelled by user"
                break
            # Read frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if ret:
                # Detect
                detections = detector.detect(frame)
                # Generate mask
                mask = detector.generate_mask(
                    frame.shape,
                    detections,
                    mask_scale=req.mask_scale
                )
                # Save
                mask_filename = f"mask_{current_count:06d}.png" # Note: using relative index for filename
                mask_path = os.path.join(req.output_dir, mask_filename)
                cv2.imwrite(mask_path, mask)
            # Update progress
            current_count += 1
            tasks[task_id].progress = current_count
        cap.release()
        if tasks[task_id].status == TaskStatus.PROCESSING:
            tasks[task_id].status = TaskStatus.COMPLETED
            tasks[task_id].result_path = req.output_dir
            tasks[task_id].message = "Processing completed successfully"
            print(f"Task {task_id} completed.")
    except Exception as e:
        tasks[task_id].status = TaskStatus.FAILED
        tasks[task_id].message = str(e)
        print(f"Error in task {task_id}: {e}")
        traceback.print_exc()
    finally:
        # Cleanup
        if task_id in cancel_events:
            del cancel_events[task_id]
@app.get("/status")
 def get_status():
    return {"status": "running", "gpu_available": True} # TODO: check GPU
@app.post("/generate", response_model=Task)
 def generate_mask_endpoint(req: GenerateRequest, background_tasks: BackgroundTasks):
    task_id = str(uuid.uuid4())
    task = Task(id=task_id, status=TaskStatus.PENDING)
    tasks[task_id] = task
    cancel_events[task_id] = threading.Event()
    background_tasks.add_task(process_video_task, task_id, req)
    return task
@app.get("/tasks/{task_id}", response_model=Task)
 def get_task(task_id: str):
    if task_id not in tasks:
        raise HTTPException(status_code=404, detail="Task not found")
    return tasks[task_id]
@app.post("/tasks/{task_id}/cancel")
 def cancel_task(task_id: str):
    if task_id not in tasks:
        raise HTTPException(status_code=404, detail="Task not found")
    if task_id in cancel_events:
        cancel_events[task_id].set()
    return {"message": "Cancellation requested"}
 if __name__ == "__main__":
    uvicorn.run(app, host="127.0.0.1", port=8181)