パフォーマンスの修正・Blurサイズ問題の修正

2026-02-19 11:29:01 +09:00 · 2026-02-19 11:29:01 +09:00 · a3de61d5ce
commit a3de61d5ce
parent da9de60697
2 changed files with 89 additions and 12 deletions
--- a/server/detector.py
+++ b/server/detector.py
@ -49,13 +49,12 @@ def _head_bbox_from_pose(
        cx = (kp_x1 + kp_x2) / 2.0
        cy = (kp_y1 + kp_y2) / 2.0
-        # Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25
+        # span はキーポイントの外接幅（≒顔幅）なので、半径 = span/2 で顔と等倍になる
-        # Shift center upward slightly to include scalp
+        r = max(span * 0.5, person_w * 0.10)
        r = max(span * 1.25, person_w * 0.20)
        x1 = int(cx - r)
-        y1 = int(cy - r * 1.15)   # extra margin above (scalp)
+        y1 = int(cy - r)
        x2 = int(cx + r)
-        y2 = int(cy + r * 0.85)   # less margin below (chin)
+        y2 = int(cy + r)
        return x1, y1, x2 - x1, y2 - y1
    # --- Step 2: shoulder keypoints ---
@ -71,8 +70,9 @@ def _head_bbox_from_pose(
            sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
        else:
            sh_width = person_w * 0.5
-        r = max(sh_width * 0.5, person_w * 0.20)
+        # 肩幅は顔幅の約1.5〜2倍なので、0.3倍で顔サイズに近い半径になる
-        cy = cy_sh - r * 1.3   # head center is above shoulders
+        r = max(sh_width * 0.3, person_w * 0.12)
        cy = cy_sh - r * 1.3   # 頭の中心は肩より上
        x1 = int(cx - r)
        y1 = int(cy - r)
        x2 = int(cx + r)
@ -80,7 +80,8 @@ def _head_bbox_from_pose(
        return x1, y1, x2 - x1, y2 - y1
    # --- Step 3: person bbox top ---
-    r = max(person_w * 0.35, 20.0)
+    # 顔幅は人物幅の約20〜30%なので、半径 = person_w * 0.15 で顔サイズに近い
    r = max(person_w * 0.15, 20.0)
    cx = (person_x1 + person_x2) / 2.0
    x1 = int(cx - r)
    y1 = int(person_y1)
--- a/server/main.py
+++ b/server/main.py
@ -446,36 +446,59 @@ def process_bake_task(task_id: str, req: BakeRequest):
        def _reader_worker():
            """Read frames from video."""
            import time as _time
            cap = cv2.VideoCapture(req.video_path)
            if not cap.isOpened():
                error_holder["error"] = "Failed to open video in reader"
                return
            t_read_total = 0.0
            frame_count = 0
            try:
                for idx in range(total):
                    if cancel_event and cancel_event.is_set():
                        break
                    t0 = _time.perf_counter()
                    ok, frame = cap.read()
                    t_read_total += _time.perf_counter() - t0
                    if not ok:
                        break
                    read_queue.put((idx, frame))
                    frame_count += 1
            except Exception as e:
                error_holder["error"] = f"Reader error: {e}"
            finally:
                cap.release()
                read_queue.put(None)  # Sentinel
                if frame_count > 0:
                    print(
                        f"[Perf/Reader] FINAL frame={frame_count}"
                        f"  read_avg={t_read_total/frame_count*1000:.1f}ms"
                        f"  throughput≈{frame_count/max(t_read_total,1e-9):.1f}fps"
                    )
        def _processor_worker():
            """Process frames with ROI blur."""
            import time as _time
            t_wait_total = 0.0
            t_blur_total = 0.0
            t_blend_total = 0.0
            frame_count = 0
            REPORT_INTERVAL = 50
            try:
                while True:
                    if cancel_event and cancel_event.is_set():
                        process_queue.put(None)
                        break
                    t0 = _time.perf_counter()
                    item = read_queue.get()
                    t_wait_total += _time.perf_counter() - t0
                    if item is None:
                        process_queue.put(None)
                        break
@ -485,6 +508,7 @@ def process_bake_task(task_id: str, req: BakeRequest):
                    if not frame_boxes:
                        process_queue.put((idx, frame))
                        frame_count += 1
                        continue
                    # 各人物ごとに個別ROIで処理（全員まとめると離れた人物間が巨大ROIになるため）
@ -499,6 +523,7 @@ def process_bake_task(task_id: str, req: BakeRequest):
                    if not valid_boxes:
                        process_queue.put((idx, frame))
                        frame_count += 1
                        continue
                    for x, y, w, h in valid_boxes:
@ -523,7 +548,16 @@ def process_bake_task(task_id: str, req: BakeRequest):
                        # ブラーはROI全体で計算（余白があるので端の精度が保証される）
                        roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
-                        roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
+
                        # ダウンサンプル→blur→アップサンプル（同等のぼかしを1/4の計算量で実現）
                        t1 = _time.perf_counter()
                        small_w = max(1, roi_width // 2)
                        small_h = max(1, roi_height // 2)
                        roi_small = cv2.resize(roi_src, (small_w, small_h), interpolation=cv2.INTER_LINEAR)
                        small_blur_size = max(3, (blur_size // 2) | 1)
                        roi_small_blurred = cv2.GaussianBlur(roi_small, (small_blur_size, small_blur_size), 0)
                        roi_blurred = cv2.resize(roi_small_blurred, (roi_width, roi_height), interpolation=cv2.INTER_LINEAR)
                        t_blur_total += _time.perf_counter() - t1
                        # 合成マスクはdisplay_scaleサイズの楕円のみ（featheringなし）
                        roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
@ -531,18 +565,47 @@ def process_bake_task(task_id: str, req: BakeRequest):
                        axes = (max(1, dw // 2), max(1, dh // 2))
                        cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
-                        roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
+                        # バイナリマスクなのでcopyToで高速合成（float32変換不要）
-                        roi_composed = roi_src.astype(np.float32) * (1.0 - roi_alpha) + roi_blurred.astype(np.float32) * roi_alpha
+                        t2 = _time.perf_counter()
-                        frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
+                        result = roi_src.copy()
                        cv2.copyTo(roi_blurred, roi_mask, result)
                        frame[roi_y1:roi_y2, roi_x1:roi_x2] = result
                        t_blend_total += _time.perf_counter() - t2
                    process_queue.put((idx, frame))
                    frame_count += 1
                    if frame_count % REPORT_INTERVAL == 0:
                        n = max(frame_count, 1)
                        fps_proc = frame_count / max(t_wait_total + t_blur_total + t_blend_total, 1e-9)
                        print(
                            f"[Perf/Processor] frame={frame_count}"
                            f"  wait={t_wait_total/n*1000:.1f}ms"
                            f"  blur={t_blur_total/n*1000:.1f}ms"
                            f"  blend={t_blend_total/n*1000:.1f}ms"
                            f"  ROI={roi_width}x{roi_height}"
                            f"  throughput≈{fps_proc:.1f}fps"
                        )
            except Exception as e:
                error_holder["error"] = f"Processor error: {e}"
                process_queue.put(None)
            finally:
                if frame_count > 0:
                    n = max(frame_count, 1)
                    print(
                        f"[Perf/Processor] FINAL frame={frame_count}"
                        f"  wait_avg={t_wait_total/n*1000:.1f}ms"
                        f"  blur_avg={t_blur_total/n*1000:.1f}ms"
                        f"  blend_avg={t_blend_total/n*1000:.1f}ms"
                    )
        def _writer_worker():
            """Write frames to output."""
            import time as _time
            t_wait_total = 0.0
            t_write_total = 0.0
            frame_count = 0
            writer = None
            try:
                writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height)
@ -551,12 +614,18 @@ def process_bake_task(task_id: str, req: BakeRequest):
                    if cancel_event and cancel_event.is_set():
                        break
                    t0 = _time.perf_counter()
                    item = process_queue.get()
                    t_wait_total += _time.perf_counter() - t0
                    if item is None:
                        break
                    idx, frame = item
                    t1 = _time.perf_counter()
                    writer.write(frame)
                    t_write_total += _time.perf_counter() - t1
                    frame_count += 1
                    with progress_lock:
                        current_progress[0] = idx + 1
@ -570,6 +639,13 @@ def process_bake_task(task_id: str, req: BakeRequest):
                        writer.release()
                    except Exception as e:
                        print(f"[FaceMask] Writer release error: {e}")
                if frame_count > 0:
                    n = max(frame_count, 1)
                    print(
                        f"[Perf/Writer] FINAL frame={frame_count}"
                        f"  wait_avg={t_wait_total/n*1000:.1f}ms"
                        f"  write_avg={t_write_total/n*1000:.1f}ms"
                    )
        print(
            f"[FaceMask] Starting blur bake: {req.video_path} + "