9 changed files with 167 additions and 209 deletions
--- a/init.py
+++ b/init.py
@ -40,6 +40,15 @@ def register():
        step=0.01,
    )

+    bpy.types.Scene.facemask_mask_scale = FloatProperty(
+        name="Mask Scale",
+        description="Scale factor for mask region (1.0 = exact face size)",
+        default=1.5,
+        min=1.0,
+        max=3.0,
+        step=0.1,
+    )
+
    bpy.types.Scene.facemask_cache_dir = StringProperty(
        name="Cache Directory",
        description="Optional cache root directory (empty = default .mask_cache)",
@ -55,15 +64,6 @@ def register():
        max=501,
    )

-    bpy.types.Scene.facemask_bake_display_scale = FloatProperty(
-        name="Mask Scale",
-        description="Scale factor for the blur mask ellipse at bake time (1.0 = raw detection size)",
-        default=1.3,
-        min=0.5,
-        max=3.0,
-        step=0.1,
-    )
-
    bpy.types.Scene.facemask_bake_format = EnumProperty(
        name="Bake Format",
        description="Output format for baked blur video",
@ -91,9 +91,9 @@ def unregister():
    # Unregister scene properties
    del bpy.types.Scene.facemask_conf_threshold
    del bpy.types.Scene.facemask_iou_threshold
+    del bpy.types.Scene.facemask_mask_scale
    del bpy.types.Scene.facemask_cache_dir
    del bpy.types.Scene.facemask_bake_blur_size
-    del bpy.types.Scene.facemask_bake_display_scale
    del bpy.types.Scene.facemask_bake_format


--- a/core/async_bake_generator.py
+++ b/core/async_bake_generator.py
@ -32,7 +32,6 @@ class AsyncBakeGenerator:
        detections_path: str,
        output_path: str,
        blur_size: int,
-        display_scale: float,
        fmt: str,
        on_complete: Optional[Callable] = None,
        on_progress: Optional[Callable] = None,
@ -54,7 +53,7 @@ class AsyncBakeGenerator:

        self.worker_thread = threading.Thread(
            target=self._worker,
-            args=(video_path, detections_path, output_path, blur_size, display_scale, fmt),
+            args=(video_path, detections_path, output_path, blur_size, fmt),
            daemon=True,
        )
        self.worker_thread.start()
@ -76,7 +75,6 @@ class AsyncBakeGenerator:
        detections_path: str,
        output_path: str,
        blur_size: int,
-        display_scale: float,
        fmt: str,
    ):
        import time
@ -90,7 +88,6 @@ class AsyncBakeGenerator:
                detections_path=detections_path,
                output_path=output_path,
                blur_size=blur_size,
-                display_scale=display_scale,
                fmt=fmt,
            )

--- a/core/async_generator.py
+++ b/core/async_generator.py
@ -44,6 +44,7 @@ class AsyncMaskGenerator:
        fps: float,
        conf_threshold: float = 0.5,
        iou_threshold: float = 0.45,
+        mask_scale: float = 1.5,
        on_complete: Optional[Callable] = None,
        on_progress: Optional[Callable] = None,
    ):
@ -93,6 +94,7 @@ class AsyncMaskGenerator:
                fps,
                conf_threshold,
                iou_threshold,
+                mask_scale,
            ),
            daemon=True,
        )
@ -119,6 +121,7 @@ class AsyncMaskGenerator:
        fps: float,
        conf_threshold: float,
        iou_threshold: float,
+        mask_scale: float,
    ):
        """
        Worker thread function. Delegates to inference server and polls status.
@ -138,6 +141,7 @@ class AsyncMaskGenerator:
                end_frame=end_frame,
                conf_threshold=conf_threshold,
                iou_threshold=iou_threshold,
+                mask_scale=mask_scale,
            )
            print(f"[FaceMask] Task started: {task_id}")
            
--- a/core/inference_client.py
+++ b/core/inference_client.py
@ -204,6 +204,7 @@ class InferenceClient:
        end_frame: int,
        conf_threshold: float,
        iou_threshold: float,
+        mask_scale: float,
    ) -> str:
        """
        Request mask generation.
@ -221,6 +222,7 @@ class InferenceClient:
            "end_frame": end_frame,
            "conf_threshold": conf_threshold,
            "iou_threshold": iou_threshold,
+            "mask_scale": mask_scale,
        }

        req = urllib.request.Request(
@ -253,7 +255,6 @@ class InferenceClient:
        detections_path: str,
        output_path: str,
        blur_size: int,
-        display_scale: float,
        fmt: str,
    ) -> str:
        """
@ -270,7 +271,6 @@ class InferenceClient:
            "detections_path": detections_path,
            "output_path": output_path,
            "blur_size": blur_size,
-            "display_scale": display_scale,
            "format": fmt,
        }

--- a/operators/apply_blur.py
+++ b/operators/apply_blur.py
@ -20,7 +20,6 @@ KEY_BAKED = "facemask_baked_filepath"
 KEY_MODE = "facemask_source_mode"
 KEY_FORMAT = "facemask_bake_format"
 KEY_BLUR_SIZE = "facemask_bake_blur_size"
-KEY_DISPLAY_SCALE = "facemask_bake_display_scale"


 FORMAT_EXT = {
@ -87,27 +86,20 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
        bake_format = scene.facemask_bake_format
        output_path = _output_path(video_strip, detections_path, bake_format)
        blur_size = int(scene.facemask_bake_blur_size)
-        display_scale = float(scene.facemask_bake_display_scale)

        # Reuse baked cache when parameters match and file still exists.
        cached_baked_path = video_strip.get(KEY_BAKED)
        cached_format = video_strip.get(KEY_FORMAT)
        cached_blur_size = video_strip.get(KEY_BLUR_SIZE)
-        cached_display_scale = video_strip.get(KEY_DISPLAY_SCALE)
        try:
            cached_blur_size_int = int(cached_blur_size)
        except (TypeError, ValueError):
            cached_blur_size_int = None
-        try:
-            cached_display_scale_f = float(cached_display_scale)
-        except (TypeError, ValueError):
-            cached_display_scale_f = None
        if (
            cached_baked_path
            and os.path.exists(cached_baked_path)
            and cached_format == bake_format
            and cached_blur_size_int == blur_size
-            and cached_display_scale_f == display_scale
        ):
            if video_strip.get(KEY_MODE) != "baked":
                video_strip[KEY_MODE] = "baked"
@ -134,7 +126,6 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
                strip[KEY_MODE] = "baked"
                strip[KEY_FORMAT] = bake_format
                strip[KEY_BLUR_SIZE] = blur_size
-                strip[KEY_DISPLAY_SCALE] = display_scale
                _set_strip_source(strip, result_path)
                print(f"[FaceMask] Bake completed and source swapped: {result_path}")
            elif status == "error":
@ -162,7 +153,6 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
                detections_path=detections_path,
                output_path=output_path,
                blur_size=blur_size,
-                display_scale=display_scale,
                fmt=bake_format.lower(),
                on_complete=on_complete,
                on_progress=on_progress,
--- a/operators/generate_mask.py
+++ b/operators/generate_mask.py
@ -110,6 +110,7 @@ class SEQUENCER_OT_generate_face_mask(Operator):
        # Get parameters from scene properties
        conf_threshold = scene.facemask_conf_threshold
        iou_threshold = scene.facemask_iou_threshold
+        mask_scale = scene.facemask_mask_scale

        # Start generation
        generator.start(
@ -120,6 +121,7 @@ class SEQUENCER_OT_generate_face_mask(Operator):
            fps=fps,
            conf_threshold=conf_threshold,
            iou_threshold=iou_threshold,
+            mask_scale=mask_scale,
            on_complete=on_complete,
            on_progress=on_progress,
        )
--- a/panels/vse_panel.py
+++ b/panels/vse_panel.py
@ -74,6 +74,7 @@ class SEQUENCER_PT_face_mask(Panel):
        col = box.column(align=True)
        col.prop(scene, "facemask_conf_threshold")
        col.prop(scene, "facemask_iou_threshold")
+        col.prop(scene, "facemask_mask_scale")

    def _draw_server_status(self, layout):
        """Draw server status and GPU info."""
@ -224,7 +225,6 @@ class SEQUENCER_PT_face_mask(Panel):
        # Bake parameters
        col = box.column(align=True)
        col.prop(context.scene, "facemask_bake_blur_size")
-        col.prop(context.scene, "facemask_bake_display_scale")
        col.prop(context.scene, "facemask_bake_format")

        # Source status
--- a/server/detector.py
+++ b/server/detector.py
@ -1,104 +1,28 @@
 """
-YOLOv8 Pose Head Detector using PyTorch with ROCm support.
+YOLOv8 Face Detector using PyTorch with ROCm support.

-Detects human heads from all angles (frontal, profile, rear) by using
-YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
+This module provides high-performance face detection using
+YOLOv8-face model with AMD GPU (ROCm) acceleration.
 """

 import os
 from typing import List, Tuple, Optional
+from pathlib import Path
 import numpy as np


-# COCO pose keypoint indices
-_HEAD_KP = [0, 1, 2, 3, 4]      # nose, left_eye, right_eye, left_ear, right_ear
-_SHOULDER_KP = [5, 6]            # left_shoulder, right_shoulder
-_KP_CONF_THRESH = 0.3
-
-
-def _head_bbox_from_pose(
-    kp_xy: np.ndarray,
-    kp_conf: np.ndarray,
-    person_x1: float,
-    person_y1: float,
-    person_x2: float,
-    person_y2: float,
-) -> Tuple[int, int, int, int]:
+class YOLOFaceDetector:
    """
-    Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
+    YOLOv8 face detector with PyTorch ROCm support.

-    Strategy:
-    1. Use head keypoints (0-4: nose, eyes, ears) if visible.
-    2. Fall back to shoulder keypoints (5-6) to infer head position.
-    3. Last resort: use top of the person bounding box.
-    """
-    person_w = max(person_x2 - person_x1, 1.0)
-
-    # --- Step 1: head keypoints ---
-    visible_head = [
-        (float(kp_xy[i][0]), float(kp_xy[i][1]))
-        for i in _HEAD_KP
-        if float(kp_conf[i]) > _KP_CONF_THRESH
-    ]
-    if visible_head:
-        xs = [p[0] for p in visible_head]
-        ys = [p[1] for p in visible_head]
-        kp_x1, kp_y1 = min(xs), min(ys)
-        kp_x2, kp_y2 = max(xs), max(ys)
-        span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
-        cx = (kp_x1 + kp_x2) / 2.0
-        cy = (kp_y1 + kp_y2) / 2.0
-
-        # Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25
-        # Shift center upward slightly to include scalp
-        r = max(span * 1.25, person_w * 0.20)
-        x1 = int(cx - r)
-        y1 = int(cy - r * 1.15)   # extra margin above (scalp)
-        x2 = int(cx + r)
-        y2 = int(cy + r * 0.85)   # less margin below (chin)
-        return x1, y1, x2 - x1, y2 - y1
-
-    # --- Step 2: shoulder keypoints ---
-    visible_shoulder = [
-        (float(kp_xy[i][0]), float(kp_xy[i][1]))
-        for i in _SHOULDER_KP
-        if float(kp_conf[i]) > _KP_CONF_THRESH
-    ]
-    if visible_shoulder:
-        cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
-        cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
-        if len(visible_shoulder) == 2:
-            sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
-        else:
-            sh_width = person_w * 0.5
-        r = max(sh_width * 0.5, person_w * 0.20)
-        cy = cy_sh - r * 1.3   # head center is above shoulders
-        x1 = int(cx - r)
-        y1 = int(cy - r)
-        x2 = int(cx + r)
-        y2 = int(cy + r)
-        return x1, y1, x2 - x1, y2 - y1
-
-    # --- Step 3: person bbox top ---
-    r = max(person_w * 0.35, 20.0)
-    cx = (person_x1 + person_x2) / 2.0
-    x1 = int(cx - r)
-    y1 = int(person_y1)
-    x2 = int(cx + r)
-    y2 = int(person_y1 + r * 2.0)
-    return x1, y1, x2 - x1, y2 - y1
-
-
-class YOLOPoseHeadDetector:
-    """
-    Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
-
-    Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
-    so that detection works regardless of the person's facing direction.
+    Features:
+    - ROCm GPU acceleration for AMD GPUs
+    - High accuracy face detection
+    - Automatic NMS for overlapping detections
    """

-    # Standard Ultralytics model — auto-downloaded on first use
-    DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
+    # Default model path relative to this file
+    DEFAULT_MODEL = "yolov8n-face-lindevs.pt"

    def __init__(
        self,
@ -107,6 +31,15 @@ class YOLOPoseHeadDetector:
        iou_threshold: float = 0.45,
        input_size: Tuple[int, int] = (640, 640),
    ):
+        """
+        Initialize the YOLO face detector.
+
+        Args:
+            model_path: Path to PyTorch model file. If None, uses default model.
+            conf_threshold: Confidence threshold for detections
+            iou_threshold: IoU threshold for NMS
+            input_size: Model input size (width, height)
+        """
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold
        self.input_size = input_size
@ -116,20 +49,23 @@ class YOLOPoseHeadDetector:

    @property
    def model(self):
-        """Lazy-load YOLO pose model."""
+        """Lazy-load YOLO model."""
        if self._model is None:
            from ultralytics import YOLO
            import torch

-            # Use provided path or let Ultralytics auto-download the default
-            if self._model_path is not None:
-                if not os.path.exists(self._model_path):
-                    raise FileNotFoundError(f"Model not found: {self._model_path}")
-                model_path = self._model_path
+            # Determine model path
+            if self._model_path is None:
+                # Assuming models are in ../models relative to server/detector.py
+                models_dir = Path(__file__).parent.parent / "models"
+                model_path = str(models_dir / self.DEFAULT_MODEL)
            else:
-                model_path = self.DEFAULT_MODEL
-                os.makedirs(os.path.dirname(model_path), exist_ok=True)
+                model_path = self._model_path

+            if not os.path.exists(model_path):
+                raise FileNotFoundError(f"Model not found: {model_path}")
+
+            # Detect device (ROCm GPU or CPU)
            if torch.cuda.is_available():
                self._device = 'cuda'
                device_name = torch.cuda.get_device_name(0)
@ -138,47 +74,25 @@ class YOLOPoseHeadDetector:
                self._device = 'cpu'
                print("[FaceMask] Using CPU for inference (ROCm GPU not available)")

+            # Load model (let Ultralytics handle device management)
            try:
                self._model = YOLO(model_path)
-                print(f"[FaceMask] Pose model loaded: {model_path}")
-                print(f"[FaceMask] Device: {self._device}")
+                # Don't call .to() - let predict() handle device assignment
+                print(f"[FaceMask] Model loaded, will use device: {self._device}")
            except Exception as e:
                print(f"[FaceMask] Error loading model: {e}")
                import traceback
                traceback.print_exc()
                raise

+            print(f"[FaceMask] YOLO model loaded: {model_path}")
+            print(f"[FaceMask] Device: {self._device}")
+
        return self._model

-    def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
-        """Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
-        detections = []
-        if result.boxes is None or result.keypoints is None:
-            return detections
-
-        boxes = result.boxes
-        keypoints = result.keypoints
-
-        for i, box in enumerate(boxes):
-            conf = float(box.conf[0].cpu().numpy())
-            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
-
-            # Extract keypoints for this person
-            kp_data = keypoints.data[i].cpu().numpy()  # shape (17, 3): x, y, conf
-            kp_xy = kp_data[:, :2]
-            kp_conf = kp_data[:, 2]
-
-            hx, hy, hw, hh = _head_bbox_from_pose(
-                kp_xy, kp_conf,
-                float(x1), float(y1), float(x2), float(y2),
-            )
-            detections.append((hx, hy, hw, hh, conf))
-
-        return detections
-
    def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
        """
-        Detect heads in a frame.
+        Detect faces in a frame.

        Args:
            frame: BGR image as numpy array (H, W, C)
@ -186,6 +100,7 @@ class YOLOPoseHeadDetector:
        Returns:
            List of detections as (x, y, width, height, confidence)
        """
+        # Run inference
        import torch
        print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
        try:
@ -201,6 +116,7 @@ class YOLOPoseHeadDetector:
            print(f"[FaceMask] ERROR during inference: {e}")
            import traceback
            traceback.print_exc()
+            # Fallback to CPU
            print("[FaceMask] Falling back to CPU inference...")
            self._device = 'cpu'
            results = self.model.predict(
@ -212,13 +128,28 @@ class YOLOPoseHeadDetector:
                device='cpu',
            )

-        if results:
-            return self._results_to_detections(results[0])
-        return []
+        # Extract detections
+        detections = []
+        if len(results) > 0 and results[0].boxes is not None:
+            boxes = results[0].boxes
+            for box in boxes:
+                # Get coordinates in xyxy format
+                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
+                conf = float(box.conf[0].cpu().numpy())
+
+                # Convert to x, y, width, height
+                x = int(x1)
+                y = int(y1)
+                w = int(x2 - x1)
+                h = int(y2 - y1)
+
+                detections.append((x, y, w, h, conf))
+
+        return detections

    def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
        """
-        Detect heads in multiple frames at once (batch processing).
+        Detect faces in multiple frames at once (batch processing).

        Args:
            frames: List of BGR images as numpy arrays (H, W, C)
@ -230,6 +161,7 @@ class YOLOPoseHeadDetector:
        if not frames:
            return []

+        # Run batch inference
        try:
            results = self.model.predict(
                frames,
@ -243,6 +175,7 @@ class YOLOPoseHeadDetector:
            print(f"[FaceMask] ERROR during batch inference: {e}")
            import traceback
            traceback.print_exc()
+            # Fallback to CPU
            print("[FaceMask] Falling back to CPU inference...")
            self._device = 'cpu'
            results = self.model.predict(
@ -254,7 +187,28 @@ class YOLOPoseHeadDetector:
                device='cpu',
            )

-        return [self._results_to_detections(r) for r in results]
+        # Extract detections for each frame
+        all_detections = []
+        for result in results:
+            detections = []
+            if result.boxes is not None:
+                boxes = result.boxes
+                for box in boxes:
+                    # Get coordinates in xyxy format
+                    x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
+                    conf = float(box.conf[0].cpu().numpy())
+
+                    # Convert to x, y, width, height
+                    x = int(x1)
+                    y = int(y1)
+                    w = int(x2 - x1)
+                    h = int(y2 - y1)
+
+                    detections.append((x, y, w, h, conf))
+
+            all_detections.append(detections)
+
+        return all_detections

    def generate_mask(
        self,
@ -264,11 +218,11 @@ class YOLOPoseHeadDetector:
        feather_radius: int = 20,
    ) -> np.ndarray:
        """
-        Generate a mask image from head detections.
+        Generate a mask image from face detections.

        Args:
            frame_shape: Shape of the original frame (height, width, channels)
-            detections: List of head detections (x, y, w, h, conf)
+            detections: List of face detections (x, y, w, h, conf)
            mask_scale: Scale factor for mask region
            feather_radius: Radius for edge feathering

@ -281,19 +235,25 @@ class YOLOPoseHeadDetector:
        mask = np.zeros((height, width), dtype=np.uint8)

        for (x, y, w, h, conf) in detections:
+            # Scale the bounding box
            center_x = x + w // 2
            center_y = y + h // 2
+
            scaled_w = int(w * mask_scale)
            scaled_h = int(h * mask_scale)

+            # Draw ellipse for natural face shape
            cv2.ellipse(
                mask,
                (center_x, center_y),
                (scaled_w // 2, scaled_h // 2),
-                0, 0, 360,
-                255, -1,
+                0,  # angle
+                0, 360,  # arc
+                255,  # color (white)
+                -1,  # filled
            )

+        # Apply Gaussian blur for feathering
        if feather_radius > 0 and len(detections) > 0:
            kernel_size = feather_radius * 2 + 1
            mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
@ -302,12 +262,12 @@ class YOLOPoseHeadDetector:


 # Singleton instance
-_detector: Optional[YOLOPoseHeadDetector] = None
+_detector: Optional[YOLOFaceDetector] = None


-def get_detector(**kwargs) -> YOLOPoseHeadDetector:
-    """Get or create the global YOLO pose head detector instance."""
+def get_detector(**kwargs) -> YOLOFaceDetector:
+    """Get or create the global YOLO detector instance."""
    global _detector
    if _detector is None:
-        _detector = YOLOPoseHeadDetector(**kwargs)
+        _detector = YOLOFaceDetector(**kwargs)
    return _detector
--- a/server/main.py
+++ b/server/main.py
@ -83,6 +83,7 @@ class GenerateRequest(BaseModel):
    end_frame: int
    conf_threshold: float = 0.5
    iou_threshold: float = 0.45
+    mask_scale: float = 1.5


 class BakeRequest(BaseModel):
@ -90,7 +91,6 @@ class BakeRequest(BaseModel):
    detections_path: str
    output_path: str
    blur_size: int = 50
-    display_scale: float = 1.0
    format: str = "mp4"


@ -305,15 +305,20 @@ def process_video_task(task_id: str, req: GenerateRequest):
            for detections in batch_detections:
                packed_detections: List[List[float]] = []
                for x, y, w, h, conf in detections:
-                    # bboxをそのまま保存（表示スケールはBake時に適用）
-                    bx, by, bw, bh = int(x), int(y), int(w), int(h)
-                    bx = max(0, bx)
-                    by = max(0, by)
-                    bw = min(width - bx, bw)
-                    bh = min(height - by, bh)
-                    if bw <= 0 or bh <= 0:
+                    scaled = _scale_bbox(
+                        int(x),
+                        int(y),
+                        int(w),
+                        int(h),
+                        float(req.mask_scale),
+                        width,
+                        height,
+                    )
+                    if scaled is None:
                        continue
-                    packed_detections.append([bx, by, bw, bh, float(conf)])
+                    packed_detections.append(
+                        [scaled[0], scaled[1], scaled[2], scaled[3], float(conf)]
+                    )
                frame_detections.append(packed_detections)
                current_count += 1
                tasks[task_id].progress = current_count
@ -351,7 +356,7 @@ def process_video_task(task_id: str, req: GenerateRequest):
                "width": width,
                "height": height,
                "fps": fps,
-                "mask_scale": 1.0,
+                "mask_scale": float(req.mask_scale),
                "frames": frame_detections,
            }
            with open(output_msgpack_path, "wb") as f:
@ -430,9 +435,9 @@ def process_bake_task(task_id: str, req: BakeRequest):
        blur_size = max(1, int(req.blur_size))
        if blur_size % 2 == 0:
            blur_size += 1
-        display_scale = max(0.1, float(req.display_scale))
-        # blur_margin は境界問題回避のための計算用余白のみ（表示には使わない）
-        blur_margin = blur_size // 2
+        feather_radius = max(3, min(25, blur_size // 3))
+        feather_kernel = feather_radius * 2 + 1
+        blur_margin = max(1, (blur_size // 2) + feather_radius)

        # Queues
        queue_size = 8
@ -487,8 +492,11 @@ def process_bake_task(task_id: str, req: BakeRequest):
                        process_queue.put((idx, frame))
                        continue

-                    # 各人物ごとに個別ROIで処理（全員まとめると離れた人物間が巨大ROIになるため）
+                    # ROI processing (same as original)
+                    min_x, min_y = src_width, src_height
+                    max_x, max_y = 0, 0
                    valid_boxes = []
+
                    for box in frame_boxes:
                        if not isinstance(box, list) or len(box) < 4:
                            continue
@ -496,45 +504,42 @@ def process_bake_task(task_id: str, req: BakeRequest):
                        if w <= 0 or h <= 0:
                            continue
                        valid_boxes.append((x, y, w, h))
+                        min_x = min(min_x, x)
+                        min_y = min(min_y, y)
+                        max_x = max(max_x, x + w)
+                        max_y = max(max_y, y + h)

                    if not valid_boxes:
                        process_queue.put((idx, frame))
                        continue

+                    roi_x1 = max(0, min_x - blur_margin)
+                    roi_y1 = max(0, min_y - blur_margin)
+                    roi_x2 = min(src_width, max_x + blur_margin)
+                    roi_y2 = min(src_height, max_y + blur_margin)
+                    roi_width = roi_x2 - roi_x1
+                    roi_height = roi_y2 - roi_y1
+
+                    if roi_width <= 0 or roi_height <= 0:
+                        process_queue.put((idx, frame))
+                        continue
+
+                    roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
                    for x, y, w, h in valid_boxes:
-                        # display_scale で表示サイズを決定
-                        cx = x + w / 2
-                        cy = y + h / 2
-                        dw = max(1, int(w * display_scale))
-                        dh = max(1, int(h * display_scale))
-                        dx = int(cx - dw / 2)
-                        dy = int(cy - dh / 2)
-
-                        # ROIは表示サイズ + blur_margin（計算用余白、境界問題回避のみ）
-                        roi_x1 = max(0, dx - blur_margin)
-                        roi_y1 = max(0, dy - blur_margin)
-                        roi_x2 = min(src_width, dx + dw + blur_margin)
-                        roi_y2 = min(src_height, dy + dh + blur_margin)
-                        roi_width = roi_x2 - roi_x1
-                        roi_height = roi_y2 - roi_y1
-
-                        if roi_width <= 0 or roi_height <= 0:
-                            continue
-
-                        # ブラーはROI全体で計算（余白があるので端の精度が保証される）
-                        roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
-                        roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
-
-                        # 合成マスクはdisplay_scaleサイズの楕円のみ（featheringなし）
-                        roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
-                        center = (int(cx) - roi_x1, int(cy) - roi_y1)
-                        axes = (max(1, dw // 2), max(1, dh // 2))
+                        center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
+                        axes = (max(1, w // 2), max(1, h // 2))
                        cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)

-                        roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
-                        roi_composed = roi_src.astype(np.float32) * (1.0 - roi_alpha) + roi_blurred.astype(np.float32) * roi_alpha
-                        frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
+                    roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
+                    roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
+                    roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)

+                    roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
+                    roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
+                        roi_blurred.astype(np.float32) * roi_alpha
+                    )
+
+                    frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
                    process_queue.put((idx, frame))

            except Exception as e: