パフォーマンスの修正・Blurサイズ問題の修正

This commit is contained in:
Keisuke Hirata 2026-02-19 11:29:01 +09:00
parent da9de60697
commit a3de61d5ce
2 changed files with 89 additions and 12 deletions

View File

@ -49,13 +49,12 @@ def _head_bbox_from_pose(
cx = (kp_x1 + kp_x2) / 2.0 cx = (kp_x1 + kp_x2) / 2.0
cy = (kp_y1 + kp_y2) / 2.0 cy = (kp_y1 + kp_y2) / 2.0
# Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25 # span はキーポイントの外接幅(≒顔幅)なので、半径 = span/2 で顔と等倍になる
# Shift center upward slightly to include scalp r = max(span * 0.5, person_w * 0.10)
r = max(span * 1.25, person_w * 0.20)
x1 = int(cx - r) x1 = int(cx - r)
y1 = int(cy - r * 1.15) # extra margin above (scalp) y1 = int(cy - r)
x2 = int(cx + r) x2 = int(cx + r)
y2 = int(cy + r * 0.85) # less margin below (chin) y2 = int(cy + r)
return x1, y1, x2 - x1, y2 - y1 return x1, y1, x2 - x1, y2 - y1
# --- Step 2: shoulder keypoints --- # --- Step 2: shoulder keypoints ---
@ -71,8 +70,9 @@ def _head_bbox_from_pose(
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0]) sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
else: else:
sh_width = person_w * 0.5 sh_width = person_w * 0.5
r = max(sh_width * 0.5, person_w * 0.20) # 肩幅は顔幅の約1.5〜2倍なので、0.3倍で顔サイズに近い半径になる
cy = cy_sh - r * 1.3 # head center is above shoulders r = max(sh_width * 0.3, person_w * 0.12)
cy = cy_sh - r * 1.3 # 頭の中心は肩より上
x1 = int(cx - r) x1 = int(cx - r)
y1 = int(cy - r) y1 = int(cy - r)
x2 = int(cx + r) x2 = int(cx + r)
@ -80,7 +80,8 @@ def _head_bbox_from_pose(
return x1, y1, x2 - x1, y2 - y1 return x1, y1, x2 - x1, y2 - y1
# --- Step 3: person bbox top --- # --- Step 3: person bbox top ---
r = max(person_w * 0.35, 20.0) # 顔幅は人物幅の約20〜30%なので、半径 = person_w * 0.15 で顔サイズに近い
r = max(person_w * 0.15, 20.0)
cx = (person_x1 + person_x2) / 2.0 cx = (person_x1 + person_x2) / 2.0
x1 = int(cx - r) x1 = int(cx - r)
y1 = int(person_y1) y1 = int(person_y1)

View File

@ -446,36 +446,59 @@ def process_bake_task(task_id: str, req: BakeRequest):
def _reader_worker(): def _reader_worker():
"""Read frames from video.""" """Read frames from video."""
import time as _time
cap = cv2.VideoCapture(req.video_path) cap = cv2.VideoCapture(req.video_path)
if not cap.isOpened(): if not cap.isOpened():
error_holder["error"] = "Failed to open video in reader" error_holder["error"] = "Failed to open video in reader"
return return
t_read_total = 0.0
frame_count = 0
try: try:
for idx in range(total): for idx in range(total):
if cancel_event and cancel_event.is_set(): if cancel_event and cancel_event.is_set():
break break
t0 = _time.perf_counter()
ok, frame = cap.read() ok, frame = cap.read()
t_read_total += _time.perf_counter() - t0
if not ok: if not ok:
break break
read_queue.put((idx, frame)) read_queue.put((idx, frame))
frame_count += 1
except Exception as e: except Exception as e:
error_holder["error"] = f"Reader error: {e}" error_holder["error"] = f"Reader error: {e}"
finally: finally:
cap.release() cap.release()
read_queue.put(None) # Sentinel read_queue.put(None) # Sentinel
if frame_count > 0:
print(
f"[Perf/Reader] FINAL frame={frame_count}"
f" read_avg={t_read_total/frame_count*1000:.1f}ms"
f" throughput≈{frame_count/max(t_read_total,1e-9):.1f}fps"
)
def _processor_worker(): def _processor_worker():
"""Process frames with ROI blur.""" """Process frames with ROI blur."""
import time as _time
t_wait_total = 0.0
t_blur_total = 0.0
t_blend_total = 0.0
frame_count = 0
REPORT_INTERVAL = 50
try: try:
while True: while True:
if cancel_event and cancel_event.is_set(): if cancel_event and cancel_event.is_set():
process_queue.put(None) process_queue.put(None)
break break
t0 = _time.perf_counter()
item = read_queue.get() item = read_queue.get()
t_wait_total += _time.perf_counter() - t0
if item is None: if item is None:
process_queue.put(None) process_queue.put(None)
break break
@ -485,6 +508,7 @@ def process_bake_task(task_id: str, req: BakeRequest):
if not frame_boxes: if not frame_boxes:
process_queue.put((idx, frame)) process_queue.put((idx, frame))
frame_count += 1
continue continue
# 各人物ごとに個別ROIで処理全員まとめると離れた人物間が巨大ROIになるため # 各人物ごとに個別ROIで処理全員まとめると離れた人物間が巨大ROIになるため
@ -499,6 +523,7 @@ def process_bake_task(task_id: str, req: BakeRequest):
if not valid_boxes: if not valid_boxes:
process_queue.put((idx, frame)) process_queue.put((idx, frame))
frame_count += 1
continue continue
for x, y, w, h in valid_boxes: for x, y, w, h in valid_boxes:
@ -523,7 +548,16 @@ def process_bake_task(task_id: str, req: BakeRequest):
# ブラーはROI全体で計算余白があるので端の精度が保証される # ブラーはROI全体で計算余白があるので端の精度が保証される
roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2] roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
# ダウンサンプル→blur→アップサンプル同等のぼかしを1/4の計算量で実現
t1 = _time.perf_counter()
small_w = max(1, roi_width // 2)
small_h = max(1, roi_height // 2)
roi_small = cv2.resize(roi_src, (small_w, small_h), interpolation=cv2.INTER_LINEAR)
small_blur_size = max(3, (blur_size // 2) | 1)
roi_small_blurred = cv2.GaussianBlur(roi_small, (small_blur_size, small_blur_size), 0)
roi_blurred = cv2.resize(roi_small_blurred, (roi_width, roi_height), interpolation=cv2.INTER_LINEAR)
t_blur_total += _time.perf_counter() - t1
# 合成マスクはdisplay_scaleサイズの楕円のみfeatheringなし # 合成マスクはdisplay_scaleサイズの楕円のみfeatheringなし
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8) roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
@ -531,18 +565,47 @@ def process_bake_task(task_id: str, req: BakeRequest):
axes = (max(1, dw // 2), max(1, dh // 2)) axes = (max(1, dw // 2), max(1, dh // 2))
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1) cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis] # バイナリマスクなのでcopyToで高速合成float32変換不要
roi_composed = roi_src.astype(np.float32) * (1.0 - roi_alpha) + roi_blurred.astype(np.float32) * roi_alpha t2 = _time.perf_counter()
frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8) result = roi_src.copy()
cv2.copyTo(roi_blurred, roi_mask, result)
frame[roi_y1:roi_y2, roi_x1:roi_x2] = result
t_blend_total += _time.perf_counter() - t2
process_queue.put((idx, frame)) process_queue.put((idx, frame))
frame_count += 1
if frame_count % REPORT_INTERVAL == 0:
n = max(frame_count, 1)
fps_proc = frame_count / max(t_wait_total + t_blur_total + t_blend_total, 1e-9)
print(
f"[Perf/Processor] frame={frame_count}"
f" wait={t_wait_total/n*1000:.1f}ms"
f" blur={t_blur_total/n*1000:.1f}ms"
f" blend={t_blend_total/n*1000:.1f}ms"
f" ROI={roi_width}x{roi_height}"
f" throughput≈{fps_proc:.1f}fps"
)
except Exception as e: except Exception as e:
error_holder["error"] = f"Processor error: {e}" error_holder["error"] = f"Processor error: {e}"
process_queue.put(None) process_queue.put(None)
finally:
if frame_count > 0:
n = max(frame_count, 1)
print(
f"[Perf/Processor] FINAL frame={frame_count}"
f" wait_avg={t_wait_total/n*1000:.1f}ms"
f" blur_avg={t_blur_total/n*1000:.1f}ms"
f" blend_avg={t_blend_total/n*1000:.1f}ms"
)
def _writer_worker(): def _writer_worker():
"""Write frames to output.""" """Write frames to output."""
import time as _time
t_wait_total = 0.0
t_write_total = 0.0
frame_count = 0
writer = None writer = None
try: try:
writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height) writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height)
@ -551,12 +614,18 @@ def process_bake_task(task_id: str, req: BakeRequest):
if cancel_event and cancel_event.is_set(): if cancel_event and cancel_event.is_set():
break break
t0 = _time.perf_counter()
item = process_queue.get() item = process_queue.get()
t_wait_total += _time.perf_counter() - t0
if item is None: if item is None:
break break
idx, frame = item idx, frame = item
t1 = _time.perf_counter()
writer.write(frame) writer.write(frame)
t_write_total += _time.perf_counter() - t1
frame_count += 1
with progress_lock: with progress_lock:
current_progress[0] = idx + 1 current_progress[0] = idx + 1
@ -570,6 +639,13 @@ def process_bake_task(task_id: str, req: BakeRequest):
writer.release() writer.release()
except Exception as e: except Exception as e:
print(f"[FaceMask] Writer release error: {e}") print(f"[FaceMask] Writer release error: {e}")
if frame_count > 0:
n = max(frame_count, 1)
print(
f"[Perf/Writer] FINAL frame={frame_count}"
f" wait_avg={t_wait_total/n*1000:.1f}ms"
f" write_avg={t_write_total/n*1000:.1f}ms"
)
print( print(
f"[FaceMask] Starting blur bake: {req.video_path} + " f"[FaceMask] Starting blur bake: {req.video_path} + "