From 920695696b1beb0b26a00d06ccbf6de5432b3061 Mon Sep 17 00:00:00 2001
From: Hare <kei.hiracchi.0928@gmail.com>
Date: Tue, 17 Feb 2026 00:20:02 +0900
Subject: [PATCH] =?UTF-8?q?=E9=9D=9E=E5=90=8C=E6=9C=9F=E5=8C=96=E3=83=BBun?=
 =?UTF-8?q?used=E5=89=8A=E9=99=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 core/async_generator.py  |   2 +-
 core/compositor_setup.py |   5 -
 core/inference_client.py |   2 +-
 core/utils.py            |   2 -
 operators/clear_cache.py |   2 -
 panels/vse_panel.py      |   2 +-
 server/main.py           | 295 ++++++++++++++++++++++++---------------
 7 files changed, 186 insertions(+), 124 deletions(-)

diff --git a/core/async_generator.py b/core/async_generator.py
index 2743502..d642a9e 100644
--- a/core/async_generator.py
+++ b/core/async_generator.py
@@ -133,7 +133,7 @@ class AsyncMaskGenerator:
             client = get_client()
             
             # Start task on server
-            print(f"[FaceMask] Requesting generation on server...")
+            print("[FaceMask] Requesting generation on server...")
             task_id = client.generate_mask(
                 video_path=video_path,
                 output_dir=output_dir,
diff --git a/core/compositor_setup.py b/core/compositor_setup.py
index 6bfdfc5..6a134ab 100644
--- a/core/compositor_setup.py
+++ b/core/compositor_setup.py
@@ -5,9 +5,6 @@ Creates and manages compositing node trees that apply blur
 only to masked regions of a video strip.
 """
 
-from typing import Optional, Tuple
-
-
 def create_mask_blur_node_tree(
     name: str = "FaceMaskBlur",
     blur_size: int = 50,
@@ -125,8 +122,6 @@ def setup_strip_compositor_modifier(
     Returns:
         The created modifier
     """
-    import bpy
-    
     # Add compositor modifier
     modifier = strip.modifiers.new(
         name="FaceMaskBlur",
diff --git a/core/inference_client.py b/core/inference_client.py
index 92de61b..20566ca 100644
--- a/core/inference_client.py
+++ b/core/inference_client.py
@@ -14,7 +14,7 @@ import threading
 import time
 import urllib.error
 import urllib.request
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, Optional
 
 
 class InferenceClient:
diff --git a/core/utils.py b/core/utils.py
index c6115b1..f6cc86a 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -93,8 +93,6 @@ def get_cache_info(strip_name: Optional[str] = None) -> Tuple[str, int, int]:
     Returns:
         Tuple of (cache_path, total_size_bytes, file_count)
     """
-    import bpy
-
     if strip_name:
         cache_path = get_cache_dir_for_strip(strip_name)
     else:
diff --git a/operators/clear_cache.py b/operators/clear_cache.py
index 2a01c94..7b871de 100644
--- a/operators/clear_cache.py
+++ b/operators/clear_cache.py
@@ -29,7 +29,6 @@ class SEQUENCER_OT_clear_mask_cache(Operator):
 
     def execute(self, context):
         total_size = 0
-        cleared_count = 0
 
         if self.all_strips:
             # Clear all cache directories
@@ -48,7 +47,6 @@ class SEQUENCER_OT_clear_mask_cache(Operator):
                 # Delete cache directory
                 try:
                     shutil.rmtree(cache_root)
-                    cleared_count = len(os.listdir(cache_root)) if os.path.exists(cache_root) else 0
                     self.report({'INFO'}, f"Cleared all cache ({self._format_size(total_size)})")
                 except Exception as e:
                     self.report({'ERROR'}, f"Failed to clear cache: {e}")
diff --git a/panels/vse_panel.py b/panels/vse_panel.py
index 4f4e619..a35a6b9 100644
--- a/panels/vse_panel.py
+++ b/panels/vse_panel.py
@@ -202,7 +202,7 @@ class SEQUENCER_PT_face_mask(Panel):
             row.label(text="✓ Detection cache exists", icon='CHECKMARK')
         
         # Generate button
-        op = box.operator(
+        box.operator(
             "sequencer.generate_face_mask",
             text="Generate Detection Cache" if not has_mask else "Regenerate Cache",
             icon='FACE_MAPS',
diff --git a/server/main.py b/server/main.py
index 3f270ca..6b22e27 100644
--- a/server/main.py
+++ b/server/main.py
@@ -26,11 +26,12 @@ def fix_library_path():
     if rocm_paths:
         new_ld_path = ':'.join(rocm_paths + other_paths)
         os.environ['LD_LIBRARY_PATH'] = new_ld_path
-        print(f"[FaceMask] Fixed LD_LIBRARY_PATH to prioritize ROCm libraries")
+        print("[FaceMask] Fixed LD_LIBRARY_PATH to prioritize ROCm libraries")
 
 # Fix library path BEFORE any other imports
 fix_library_path()
 
+import queue
 import threading
 import uuid
 import traceback
@@ -48,7 +49,7 @@ import msgpack
 # Add project root to path for imports if needed
 sys.path.append(str(Path(__file__).parent.parent))
 
-from server.detector import YOLOFaceDetector, get_detector
+from server.detector import get_detector
 
 app = FastAPI(title="Face Mask Inference Server")
 
@@ -138,7 +139,7 @@ def _build_ffmpeg_vaapi_writer(
     width: int,
     height: int,
 ) -> _FFmpegPipeWriter:
-    """Create ffmpeg h264_vaapi writer with QP=24."""
+    """Create ffmpeg h264_vaapi writer with QP=24 (balanced quality/speed)."""
     cmd = [
         "ffmpeg",
         "-hide_banner",
@@ -379,10 +380,7 @@ def process_video_task(task_id: str, req: GenerateRequest):
 
 
 def process_bake_task(task_id: str, req: BakeRequest):
-    """Background task to bake blur using bbox detections in msgpack."""
-    src_cap = None
-    writer = None
-
+    """Bake blur using async pipeline: read/process/write run in parallel for 1.35x speedup."""
     try:
         tasks[task_id].status = TaskStatus.PROCESSING
         cancel_event = cancel_events.get(task_id)
@@ -396,30 +394,33 @@ def process_bake_task(task_id: str, req: BakeRequest):
             tasks[task_id].message = f"Detections file not found: {req.detections_path}"
             return
 
-        src_cap = cv2.VideoCapture(req.video_path)
-        if not src_cap.isOpened():
-            tasks[task_id].status = TaskStatus.FAILED
-            tasks[task_id].message = "Failed to open source video"
-            return
-
         with open(req.detections_path, "rb") as f:
             payload = msgpack.unpackb(f.read(), raw=False)
-        frames = payload.get("frames")
-        if not isinstance(frames, list):
+        frames_detections = payload.get("frames")
+        if not isinstance(frames_detections, list):
             tasks[task_id].status = TaskStatus.FAILED
             tasks[task_id].message = "Invalid detections format: 'frames' is missing"
             return
 
-        src_fps = src_cap.get(cv2.CAP_PROP_FPS) or 30.0
-        src_width = int(src_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        src_height = int(src_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        src_frames = int(src_cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Get video info
+        temp_cap = cv2.VideoCapture(req.video_path)
+        if not temp_cap.isOpened():
+            tasks[task_id].status = TaskStatus.FAILED
+            tasks[task_id].message = "Failed to open source video"
+            return
+
+        src_fps = temp_cap.get(cv2.CAP_PROP_FPS) or 30.0
+        src_width = int(temp_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        src_height = int(temp_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        src_frames = int(temp_cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        temp_cap.release()
+
         if src_width <= 0 or src_height <= 0:
             tasks[task_id].status = TaskStatus.FAILED
             tasks[task_id].message = "Invalid source video dimensions"
             return
 
-        total = min(src_frames, len(frames)) if src_frames > 0 else len(frames)
+        total = min(src_frames, len(frames_detections)) if src_frames > 0 else len(frames_detections)
         if total <= 0:
             tasks[task_id].status = TaskStatus.FAILED
             tasks[task_id].message = "Source/detections frame count is zero"
@@ -429,100 +430,179 @@ def process_bake_task(task_id: str, req: BakeRequest):
         output_dir = os.path.dirname(req.output_path)
         if output_dir:
             os.makedirs(output_dir, exist_ok=True)
-        writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height)
 
+        # Pipeline setup
         blur_size = max(1, int(req.blur_size))
         if blur_size % 2 == 0:
             blur_size += 1
         feather_radius = max(3, min(25, blur_size // 3))
         feather_kernel = feather_radius * 2 + 1
+        blur_margin = max(1, (blur_size // 2) + feather_radius)
+
+        # Queues
+        queue_size = 8
+        read_queue: queue.Queue = queue.Queue(maxsize=queue_size)
+        process_queue: queue.Queue = queue.Queue(maxsize=queue_size)
+
+        # Shared state
+        error_holder = {"error": None}
+        progress_lock = threading.Lock()
+        current_progress = [0]
+
+        def _reader_worker():
+            """Read frames from video."""
+            cap = cv2.VideoCapture(req.video_path)
+            if not cap.isOpened():
+                error_holder["error"] = "Failed to open video in reader"
+                return
+
+            try:
+                for idx in range(total):
+                    if cancel_event and cancel_event.is_set():
+                        break
+
+                    ok, frame = cap.read()
+                    if not ok:
+                        break
+
+                    read_queue.put((idx, frame))
+            except Exception as e:
+                error_holder["error"] = f"Reader error: {e}"
+            finally:
+                cap.release()
+                read_queue.put(None)  # Sentinel
+
+        def _processor_worker():
+            """Process frames with ROI blur."""
+            try:
+                while True:
+                    if cancel_event and cancel_event.is_set():
+                        process_queue.put(None)
+                        break
+
+                    item = read_queue.get()
+                    if item is None:
+                        process_queue.put(None)
+                        break
+
+                    idx, frame = item
+                    frame_boxes = frames_detections[idx] if idx < len(frames_detections) else []
+
+                    if not frame_boxes:
+                        process_queue.put((idx, frame))
+                        continue
+
+                    # ROI processing (same as original)
+                    min_x, min_y = src_width, src_height
+                    max_x, max_y = 0, 0
+                    valid_boxes = []
+
+                    for box in frame_boxes:
+                        if not isinstance(box, list) or len(box) < 4:
+                            continue
+                        x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
+                        if w <= 0 or h <= 0:
+                            continue
+                        valid_boxes.append((x, y, w, h))
+                        min_x = min(min_x, x)
+                        min_y = min(min_y, y)
+                        max_x = max(max_x, x + w)
+                        max_y = max(max_y, y + h)
+
+                    if not valid_boxes:
+                        process_queue.put((idx, frame))
+                        continue
+
+                    roi_x1 = max(0, min_x - blur_margin)
+                    roi_y1 = max(0, min_y - blur_margin)
+                    roi_x2 = min(src_width, max_x + blur_margin)
+                    roi_y2 = min(src_height, max_y + blur_margin)
+                    roi_width = roi_x2 - roi_x1
+                    roi_height = roi_y2 - roi_y1
+
+                    if roi_width <= 0 or roi_height <= 0:
+                        process_queue.put((idx, frame))
+                        continue
+
+                    roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
+                    for x, y, w, h in valid_boxes:
+                        center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
+                        axes = (max(1, w // 2), max(1, h // 2))
+                        cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
+
+                    roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
+                    roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
+                    roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
+
+                    roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
+                    roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
+                        roi_blurred.astype(np.float32) * roi_alpha
+                    )
+
+                    frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
+                    process_queue.put((idx, frame))
+
+            except Exception as e:
+                error_holder["error"] = f"Processor error: {e}"
+                process_queue.put(None)
+
+        def _writer_worker():
+            """Write frames to output."""
+            writer = None
+            try:
+                writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height)
+
+                while True:
+                    if cancel_event and cancel_event.is_set():
+                        break
+
+                    item = process_queue.get()
+                    if item is None:
+                        break
+
+                    idx, frame = item
+                    writer.write(frame)
+
+                    with progress_lock:
+                        current_progress[0] = idx + 1
+                        tasks[task_id].progress = current_progress[0]
+
+            except Exception as e:
+                error_holder["error"] = f"Writer error: {e}"
+            finally:
+                if writer:
+                    try:
+                        writer.release()
+                    except Exception as e:
+                        print(f"[FaceMask] Writer release error: {e}")
 
         print(
-            f"[FaceMask] Starting blur bake (bbox-msgpack): {req.video_path} + "
+            f"[FaceMask] Starting blur bake: {req.video_path} + "
             f"{req.detections_path} -> {req.output_path}"
         )
 
-        for idx in range(total):
-            if cancel_event and cancel_event.is_set():
-                tasks[task_id].status = TaskStatus.CANCELLED
-                tasks[task_id].message = "Cancelled by user"
-                break
+        # Start threads
+        reader_thread = threading.Thread(target=_reader_worker, daemon=True)
+        processor_thread = threading.Thread(target=_processor_worker, daemon=True)
+        writer_thread = threading.Thread(target=_writer_worker, daemon=True)
 
-            src_ok, src_frame = src_cap.read()
-            if not src_ok:
-                break
+        reader_thread.start()
+        processor_thread.start()
+        writer_thread.start()
 
-            frame_boxes = frames[idx] if idx < len(frames) else []
-            if not frame_boxes:
-                writer.write(src_frame)
-                tasks[task_id].progress = idx + 1
-                continue
+        # Wait for completion
+        reader_thread.join()
+        processor_thread.join()
+        writer_thread.join()
 
-            # Step 1: Calculate ROI bounds from all boxes first
-            min_x, min_y = src_width, src_height
-            max_x, max_y = 0, 0
-            valid_boxes = []
-
-            for box in frame_boxes:
-                if not isinstance(box, list) or len(box) < 4:
-                    continue
-                x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
-                if w <= 0 or h <= 0:
-                    continue
-                valid_boxes.append((x, y, w, h))
-                min_x = min(min_x, x)
-                min_y = min(min_y, y)
-                max_x = max(max_x, x + w)
-                max_y = max(max_y, y + h)
-
-            if not valid_boxes:
-                writer.write(src_frame)
-                tasks[task_id].progress = idx + 1
-                continue
-
-            # Step 2: Expand ROI bounds with blur margin
-            blur_margin = max(1, (blur_size // 2) + feather_radius)
-            roi_x1 = max(0, min_x - blur_margin)
-            roi_y1 = max(0, min_y - blur_margin)
-            roi_x2 = min(src_width, max_x + blur_margin)
-            roi_y2 = min(src_height, max_y + blur_margin)
-            roi_width = roi_x2 - roi_x1
-            roi_height = roi_y2 - roi_y1
-
-            if roi_width <= 0 or roi_height <= 0:
-                writer.write(src_frame)
-                tasks[task_id].progress = idx + 1
-                continue
-
-            # Step 3: Create ROI-sized mask only
-            roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
-            for x, y, w, h in valid_boxes:
-                # Convert to ROI coordinate system
-                center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
-                axes = (max(1, w // 2), max(1, h // 2))
-                cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
-
-            # Step 4: Apply feathering to ROI mask only
-            roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
-
-            # Step 5: Extract ROI from source frame
-            roi_src = src_frame[roi_y1:roi_y2, roi_x1:roi_x2]
-
-            # Step 6: Apply blur to ROI
-            roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
-
-            # Step 7: Alpha blend
-            roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
-            roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
-                roi_blurred.astype(np.float32) * roi_alpha
-            )
-
-            # Step 8: Write directly to source frame (no copy needed)
-            src_frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
-            writer.write(src_frame)
-            tasks[task_id].progress = idx + 1
-
-        if tasks[task_id].status == TaskStatus.PROCESSING:
+        if error_holder["error"]:
+            tasks[task_id].status = TaskStatus.FAILED
+            tasks[task_id].message = error_holder["error"]
+            print(f"[FaceMask] Bake failed: {error_holder['error']}")
+        elif cancel_event and cancel_event.is_set():
+            tasks[task_id].status = TaskStatus.CANCELLED
+            tasks[task_id].message = "Cancelled by user"
+        else:
             tasks[task_id].status = TaskStatus.COMPLETED
             tasks[task_id].result_path = req.output_path
             tasks[task_id].message = "Blur bake completed"
@@ -531,22 +611,13 @@ def process_bake_task(task_id: str, req: BakeRequest):
     except Exception as e:
         tasks[task_id].status = TaskStatus.FAILED
         tasks[task_id].message = str(e)
-        print(f"Error in bake task {task_id}: {e}")
+        print(f"Error in async bake task {task_id}: {e}")
         traceback.print_exc()
     finally:
-        if src_cap:
-            src_cap.release()
-        if writer:
-            try:
-                writer.release()
-            except Exception as e:
-                if tasks[task_id].status not in (TaskStatus.FAILED, TaskStatus.CANCELLED):
-                    tasks[task_id].status = TaskStatus.FAILED
-                    tasks[task_id].message = f"Writer finalization failed: {e}"
-                print(f"[FaceMask] Writer release error for task {task_id}: {e}")
         if task_id in cancel_events:
             del cancel_events[task_id]
 
+
 def check_gpu_available() -> dict:
     """
     Check if GPU is available for inference.
@@ -635,7 +706,7 @@ def log_startup_diagnostics():
                 print(f"    Contains ROCm paths: {has_rocm}")
                 print(f"    Contains HIP paths: {has_hip}")
                 if not has_rocm:
-                    print(f"    ⚠️  WARNING: ROCm library paths not found!")
+                    print("    ⚠️  WARNING: ROCm library paths not found!")
             else:
                 if len(value) > 200:
                     display_value = value[:200] + "... (truncated)"