feat: 中間データのmsgpack移行

This commit is contained in:
Keisuke Hirata 2026-02-16 16:07:38 +09:00
parent 67178e0f52
commit e693f5b694
10 changed files with 281 additions and 395 deletions

View File

@ -29,7 +29,7 @@ class AsyncBakeGenerator:
def start( def start(
self, self,
video_path: str, video_path: str,
mask_path: str, detections_path: str,
output_path: str, output_path: str,
blur_size: int, blur_size: int,
fmt: str, fmt: str,
@ -53,7 +53,7 @@ class AsyncBakeGenerator:
self.worker_thread = threading.Thread( self.worker_thread = threading.Thread(
target=self._worker, target=self._worker,
args=(video_path, mask_path, output_path, blur_size, fmt), args=(video_path, detections_path, output_path, blur_size, fmt),
daemon=True, daemon=True,
) )
self.worker_thread.start() self.worker_thread.start()
@ -72,7 +72,7 @@ class AsyncBakeGenerator:
def _worker( def _worker(
self, self,
video_path: str, video_path: str,
mask_path: str, detections_path: str,
output_path: str, output_path: str,
blur_size: int, blur_size: int,
fmt: str, fmt: str,
@ -85,7 +85,7 @@ class AsyncBakeGenerator:
client = get_client() client = get_client()
task_id = client.bake_blur( task_id = client.bake_blur(
video_path=video_path, video_path=video_path,
mask_path=mask_path, detections_path=detections_path,
output_path=output_path, output_path=output_path,
blur_size=blur_size, blur_size=blur_size,
fmt=fmt, fmt=fmt,

View File

@ -9,8 +9,7 @@ Blender's UI remains responsive via bpy.app.timers.
import os import os
import threading import threading
import queue import queue
from functools import partial from typing import Optional, Callable
from typing import Optional, Callable, Tuple
from pathlib import Path from pathlib import Path
# Will be imported when running inside Blender # Will be imported when running inside Blender
@ -151,8 +150,19 @@ class AsyncMaskGenerator:
status = client.get_task_status(task_id) status = client.get_task_status(task_id)
state = status.get("status") state = status.get("status")
total = status.get("total", 0)
if total > 0:
self.total_frames = total
if state == "completed": if state == "completed":
self.result_queue.put(("done", output_dir)) final_progress = status.get("progress", self.total_frames)
if final_progress >= 0:
self.progress_queue.put(("progress", final_progress))
result_path = status.get(
"result_path",
os.path.join(output_dir, "detections.msgpack"),
)
self.result_queue.put(("done", result_path))
return return
elif state == "failed": elif state == "failed":
@ -167,7 +177,7 @@ class AsyncMaskGenerator:
# Report progress # Report progress
progress = status.get("progress", 0) progress = status.get("progress", 0)
if progress > 0: if progress >= 0:
self.progress_queue.put(("progress", progress)) self.progress_queue.put(("progress", progress))
time.sleep(0.5) time.sleep(0.5)
@ -207,6 +217,16 @@ class AsyncMaskGenerator:
msg_type, data = self.result_queue.get_nowait() msg_type, data = self.result_queue.get_nowait()
self.is_running = False self.is_running = False
# Ensure UI receives a final progress update before completion.
if (
msg_type == "done"
and self.total_frames > 0
and self.current_frame < self.total_frames
and self._on_progress
):
self.current_frame = self.total_frames
self._on_progress(self.current_frame, self.total_frames)
if self._on_complete: if self._on_complete:
self._on_complete(msg_type, data) self._on_complete(msg_type, data)

View File

@ -252,7 +252,7 @@ class InferenceClient:
def bake_blur( def bake_blur(
self, self,
video_path: str, video_path: str,
mask_path: str, detections_path: str,
output_path: str, output_path: str,
blur_size: int, blur_size: int,
fmt: str, fmt: str,
@ -268,7 +268,7 @@ class InferenceClient:
data = { data = {
"video_path": video_path, "video_path": video_path,
"mask_path": mask_path, "detections_path": detections_path,
"output_path": output_path, "output_path": output_path,
"blur_size": blur_size, "blur_size": blur_size,
"format": fmt, "format": fmt,

View File

@ -78,6 +78,11 @@ def get_cache_dir_for_strip(strip_name: str) -> str:
return os.path.join(get_cache_root(), strip_name) return os.path.join(get_cache_root(), strip_name)
def get_detections_path_for_strip(strip_name: str) -> str:
"""Get msgpack detection cache path for a specific strip."""
return os.path.join(get_cache_dir_for_strip(strip_name), "detections.msgpack")
def get_cache_info(strip_name: Optional[str] = None) -> Tuple[str, int, int]: def get_cache_info(strip_name: Optional[str] = None) -> Tuple[str, int, int]:
""" """
Get cache directory information. Get cache directory information.

View File

@ -61,26 +61,22 @@
# venvをアクティベート # venvをアクティベート
source "$VENV_DIR/bin/activate" source "$VENV_DIR/bin/activate"
# 必要なパッケージのインストール確認とインストール # PyTorch ROCm版の導入GPU未認識時のみ
if ! python -c "import torch; print(torch.cuda.is_available())" 2>/dev/null | grep -q "True"; then if ! python -c "import torch; print(torch.cuda.is_available())" 2>/dev/null | grep -q "True"; then
echo "[Setup] Installing Python dependencies..." echo "[Setup] Installing PyTorch ROCm dependencies..."
# まずPyTorch ROCm版をインストールROCm 7.0 nightly - ROCm 7.1.1環境で動作確認済み)
pip install --quiet --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.0 pip install --quiet --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.0
# 次に通常のPyPIから他のパッケージをインストール
pip install --quiet \
ultralytics \
opencv-python-headless \
numpy \
fastapi \
uvicorn \
pydantic
# opencv-pythonがインストールされていたら削除headless版のみ使用
pip uninstall -y opencv-python opencv 2>/dev/null || true
# opencv-python-headlessを再インストールして確実にする
pip install --quiet --force-reinstall opencv-python-headless
echo "[Setup] Dependencies installed successfully"
fi fi
# プロジェクト依存requirements.txtを同期
if [ -f "$PWD/requirements.txt" ]; then
echo "[Setup] Syncing Python dependencies from requirements.txt..."
pip install --quiet -r "$PWD/requirements.txt"
fi
# opencv-pythonが入っていた場合はheadlessに統一
pip uninstall -y opencv-python opencv 2>/dev/null || true
pip install --quiet --upgrade opencv-python-headless
# Pythonパスにカレントディレクトリを追加 # Pythonパスにカレントディレクトリを追加
export PYTHONPATH="$PWD:$PYTHONPATH" export PYTHONPATH="$PWD:$PYTHONPATH"

View File

@ -12,6 +12,7 @@ from bpy.types import Operator
from ..core.async_bake_generator import get_bake_generator from ..core.async_bake_generator import get_bake_generator
from ..core.async_generator import get_generator as get_mask_generator from ..core.async_generator import get_generator as get_mask_generator
from ..core.utils import get_detections_path_for_strip
KEY_ORIGINAL = "facemask_original_filepath" KEY_ORIGINAL = "facemask_original_filepath"
@ -28,19 +29,9 @@ FORMAT_EXT = {
} }
def _find_mask_strip(seq_editor, strip_name: str): def _output_path(video_strip, detections_path: str, fmt: str) -> str:
return seq_editor.strips.get(f"{strip_name}_mask")
def _resolve_mask_path(mask_strip) -> str:
if mask_strip.type == "MOVIE":
return bpy.path.abspath(mask_strip.filepath)
return ""
def _output_path(video_strip, mask_path: str, fmt: str) -> str:
ext = FORMAT_EXT.get(fmt, "mp4") ext = FORMAT_EXT.get(fmt, "mp4")
out_dir = os.path.dirname(mask_path) out_dir = os.path.dirname(detections_path)
safe_name = video_strip.name.replace("/", "_").replace("\\", "_") safe_name = video_strip.name.replace("/", "_").replace("\\", "_")
return os.path.join(out_dir, f"{safe_name}_blurred.{ext}") return os.path.join(out_dir, f"{safe_name}_blurred.{ext}")
@ -83,22 +74,17 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
scene = context.scene scene = context.scene
video_strip = seq_editor.active_strip video_strip = seq_editor.active_strip
mask_strip = _find_mask_strip(seq_editor, video_strip.name)
if not mask_strip:
self.report({"ERROR"}, f"Mask strip not found: {video_strip.name}_mask")
return {"CANCELLED"}
video_path = bpy.path.abspath(video_strip.filepath) video_path = bpy.path.abspath(video_strip.filepath)
mask_path = _resolve_mask_path(mask_strip) detections_path = get_detections_path_for_strip(video_strip.name)
if not os.path.exists(video_path): if not os.path.exists(video_path):
self.report({"ERROR"}, f"Source video not found: {video_path}") self.report({"ERROR"}, f"Source video not found: {video_path}")
return {"CANCELLED"} return {"CANCELLED"}
if not mask_path or not os.path.exists(mask_path): if not os.path.exists(detections_path):
self.report({"ERROR"}, f"Mask video not found: {mask_path}") self.report({"ERROR"}, f"Detection cache not found: {detections_path}")
return {"CANCELLED"} return {"CANCELLED"}
bake_format = scene.facemask_bake_format bake_format = scene.facemask_bake_format
output_path = _output_path(video_strip, mask_path, bake_format) output_path = _output_path(video_strip, detections_path, bake_format)
blur_size = int(scene.facemask_bake_blur_size) blur_size = int(scene.facemask_bake_blur_size)
# Reuse baked cache when parameters match and file still exists. # Reuse baked cache when parameters match and file still exists.
@ -164,7 +150,7 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
try: try:
bake_generator.start( bake_generator.start(
video_path=video_path, video_path=video_path,
mask_path=mask_path, detections_path=detections_path,
output_path=output_path, output_path=output_path,
blur_size=blur_size, blur_size=blur_size,
fmt=bake_format.lower(), fmt=bake_format.lower(),

View File

@ -55,8 +55,7 @@ class SEQUENCER_OT_generate_face_mask(Operator):
# Check cache - if masks already exist, use them # Check cache - if masks already exist, use them
expected_frame_count = strip.frame_final_end - strip.frame_final_start + 1 expected_frame_count = strip.frame_final_end - strip.frame_final_start + 1
if self._check_cache(output_dir, expected_frame_count): if self._check_cache(output_dir, expected_frame_count):
self.report({'INFO'}, f"Using cached masks from {output_dir}") self.report({'INFO'}, f"Using cached detections from {output_dir}")
self._add_mask_strip(context, strip.name, output_dir)
return {'FINISHED'} return {'FINISHED'}
# Get frame range # Get frame range
@ -71,20 +70,26 @@ class SEQUENCER_OT_generate_face_mask(Operator):
self.report({'WARNING'}, "Mask generation already in progress") self.report({'WARNING'}, "Mask generation already in progress")
return {'CANCELLED'} return {'CANCELLED'}
# Store strip name for callback
strip_name = strip.name
def on_complete(status, data): def on_complete(status, data):
"""Called when mask generation completes.""" """Called when mask generation completes."""
wm = context.window_manager
wm.mask_total = max(wm.mask_total, generator.total_frames)
if status == "done":
wm.mask_progress = wm.mask_total
elif status in {"error", "cancelled"}:
wm.mask_progress = min(wm.mask_progress, wm.mask_total)
if status == "done": if status == "done":
# Add mask strip to sequence editor
self._add_mask_strip(context, strip_name, data)
print(f"[FaceMask] Mask generation completed: {data}") print(f"[FaceMask] Mask generation completed: {data}")
elif status == "error": elif status == "error":
print(f"[FaceMask] Error: {data}") print(f"[FaceMask] Error: {data}")
elif status == "cancelled": elif status == "cancelled":
print("[FaceMask] Generation cancelled") print("[FaceMask] Generation cancelled")
for area in context.screen.areas:
if area.type == 'SEQUENCE_EDITOR':
area.tag_redraw()
def on_progress(current, total): def on_progress(current, total):
"""Called on progress updates.""" """Called on progress updates."""
# Update window manager properties for UI # Update window manager properties for UI
@ -143,120 +148,28 @@ class SEQUENCER_OT_generate_face_mask(Operator):
if not os.path.exists(cache_dir): if not os.path.exists(cache_dir):
return False return False
# Check for MP4 video (new format) detections_path = os.path.join(cache_dir, "detections.msgpack")
mask_video = os.path.join(cache_dir, "mask.mp4") if not os.path.exists(detections_path):
if os.path.exists(mask_video):
# Prefer frame-count verification when cv2 is available, but do not
# hard-fail on Blender Python environments without cv2.
try:
import cv2
cap = cv2.VideoCapture(mask_video)
if cap.isOpened():
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
# Accept cache if at least 90% of frames exist
return frame_count >= expected_frames * 0.9
cap.release()
return False return False
# Quick sanity check: non-empty file
try:
if os.path.getsize(detections_path) <= 0:
return False
except OSError:
return False
# Optional frame count verification if msgpack is available
try:
import msgpack
with open(detections_path, "rb") as f:
payload = msgpack.unpackb(f.read(), raw=False)
frames = payload.get("frames", [])
return len(frames) >= expected_frames * 0.9
except Exception: except Exception:
# Fallback: treat existing MP4 cache as valid when cv2 is unavailable.
return True return True
# Fallback: check for PNG sequence (backward compatibility)
mask_files = [f for f in os.listdir(cache_dir)
if f.startswith("mask_") and f.endswith(".png")]
# Accept cache if at least 90% of frames exist
return len(mask_files) >= expected_frames * 0.9
def _add_mask_strip(self, context, source_strip_name: str, mask_path: str):
"""Add mask video as a new strip.
Args:
context: Blender context
source_strip_name: Name of the source video strip
mask_path: Path to mask video file or directory (for backward compatibility)
"""
scene = context.scene
seq_editor = scene.sequence_editor
if not seq_editor:
return
# Find source strip (Blender 5.0 uses 'strips' instead of 'sequences')
source_strip = seq_editor.strips.get(source_strip_name)
if not source_strip:
return
# Check if mask_path is a video file or directory (backward compatibility)
if os.path.isfile(mask_path):
# New format: single MP4 file
mask_video = mask_path
else:
# Old format: directory with PNG sequence (backward compatibility)
mask_video = os.path.join(mask_path, "mask.mp4")
if not os.path.exists(mask_video):
# Fallback to PNG sequence
mask_files = sorted([
f for f in os.listdir(mask_path)
if f.startswith("mask_") and f.endswith(".png")
])
if not mask_files:
return
first_mask = os.path.join(mask_path, mask_files[0])
self._add_mask_strip_png_sequence(context, source_strip_name, mask_path, mask_files, first_mask)
return
# Find an empty channel
used_channels = {s.channel for s in seq_editor.strips}
new_channel = source_strip.channel + 1
while new_channel in used_channels:
new_channel += 1
# Add movie strip (Blender 5.0 API)
mask_strip = seq_editor.strips.new_movie(
name=f"{source_strip_name}_mask",
filepath=mask_video,
channel=new_channel,
frame_start=source_strip.frame_final_start,
)
# Set blend mode for mask
mask_strip.blend_type = 'ALPHA_OVER'
mask_strip.blend_alpha = 0.5
def _add_mask_strip_png_sequence(self, context, source_strip_name, mask_dir, mask_files, first_mask):
"""Backward compatibility: Add PNG sequence as mask strip."""
scene = context.scene
seq_editor = scene.sequence_editor
source_strip = seq_editor.strips.get(source_strip_name)
if not source_strip:
return
# Find an empty channel
used_channels = {s.channel for s in seq_editor.strips}
new_channel = source_strip.channel + 1
while new_channel in used_channels:
new_channel += 1
# Add image sequence (Blender 5.0 API)
mask_strip = seq_editor.strips.new_image(
name=f"{source_strip_name}_mask",
filepath=first_mask,
channel=new_channel,
frame_start=source_strip.frame_final_start,
)
# Add remaining frames
for mask_file in mask_files[1:]:
mask_strip.elements.append(mask_file)
# Set blend mode for mask
mask_strip.blend_type = 'ALPHA_OVER'
mask_strip.blend_alpha = 0.5
class SEQUENCER_OT_cancel_mask_generation(Operator): class SEQUENCER_OT_cancel_mask_generation(Operator):
"""Cancel ongoing mask generation.""" """Cancel ongoing mask generation."""

View File

@ -5,12 +5,18 @@ Provides a sidebar panel in the Video Sequence Editor
for controlling mask generation and blur application. for controlling mask generation and blur application.
""" """
import os
import bpy import bpy
from bpy.types import Panel from bpy.types import Panel
from ..core.async_bake_generator import get_bake_generator from ..core.async_bake_generator import get_bake_generator
from ..core.async_generator import get_generator from ..core.async_generator import get_generator
from ..core.utils import get_server_status, get_cache_info, format_size from ..core.utils import (
get_server_status,
get_cache_info,
format_size,
get_detections_path_for_strip,
)
class SEQUENCER_PT_face_mask(Panel): class SEQUENCER_PT_face_mask(Panel):
@ -186,19 +192,19 @@ class SEQUENCER_PT_face_mask(Panel):
row = box.row() row = box.row()
row.label(text=f"Strip: {strip.name}") row.label(text=f"Strip: {strip.name}")
# Check for existing mask detections_path = get_detections_path_for_strip(strip.name)
seq_editor = context.scene.sequence_editor has_mask = bpy.path.abspath(detections_path) and os.path.exists(
mask_name = f"{strip.name}_mask" bpy.path.abspath(detections_path)
has_mask = mask_name in seq_editor.strips )
if has_mask: if has_mask:
row = box.row() row = box.row()
row.label(text="Mask exists", icon='CHECKMARK') row.label(text="Detection cache exists", icon='CHECKMARK')
# Generate button # Generate button
op = box.operator( op = box.operator(
"sequencer.generate_face_mask", "sequencer.generate_face_mask",
text="Generate Face Mask" if not has_mask else "Regenerate Mask", text="Generate Detection Cache" if not has_mask else "Regenerate Cache",
icon='FACE_MAPS', icon='FACE_MAPS',
) )
@ -207,13 +213,13 @@ class SEQUENCER_PT_face_mask(Panel):
box = layout.box() box = layout.box()
box.label(text="Blur Bake", icon='MATFLUID') box.label(text="Blur Bake", icon='MATFLUID')
# Check for mask strip detections_path = get_detections_path_for_strip(strip.name)
seq_editor = context.scene.sequence_editor has_mask = bpy.path.abspath(detections_path) and os.path.exists(
mask_name = f"{strip.name}_mask" bpy.path.abspath(detections_path)
has_mask = mask_name in seq_editor.strips )
if not has_mask: if not has_mask:
box.label(text="Generate a mask first", icon='INFO') box.label(text="Generate detection cache first", icon='INFO')
return return
# Bake parameters # Bake parameters

7
requirements.txt Normal file
View File

@ -0,0 +1,7 @@
ultralytics
opencv-python-headless
msgpack
numpy
fastapi
uvicorn
pydantic

View File

@ -33,7 +33,6 @@ fix_library_path()
import threading import threading
import uuid import uuid
import queue
import traceback import traceback
from typing import Dict, Optional, List from typing import Dict, Optional, List
from pathlib import Path from pathlib import Path
@ -43,6 +42,7 @@ from pydantic import BaseModel
import uvicorn import uvicorn
import cv2 import cv2
import numpy as np import numpy as np
import msgpack
# Add project root to path for imports if needed # Add project root to path for imports if needed
sys.path.append(str(Path(__file__).parent.parent)) sys.path.append(str(Path(__file__).parent.parent))
@ -86,7 +86,7 @@ class GenerateRequest(BaseModel):
class BakeRequest(BaseModel): class BakeRequest(BaseModel):
video_path: str video_path: str
mask_path: str detections_path: str
output_path: str output_path: str
blur_size: int = 50 blur_size: int = 50
format: str = "mp4" format: str = "mp4"
@ -122,185 +122,79 @@ def _build_video_writer(
raise RuntimeError(f"Failed to create video writer for format='{fmt}'") raise RuntimeError(f"Failed to create video writer for format='{fmt}'")
def _scale_bbox(
x: int,
y: int,
w: int,
h: int,
scale: float,
frame_width: int,
frame_height: int,
) -> Optional[List[int]]:
"""Scale bbox around center and clamp to frame boundaries."""
if w <= 0 or h <= 0:
return None
center_x = x + (w * 0.5)
center_y = y + (h * 0.5)
scaled_w = max(1, int(w * scale))
scaled_h = max(1, int(h * scale))
x1 = max(0, int(center_x - scaled_w * 0.5))
y1 = max(0, int(center_y - scaled_h * 0.5))
x2 = min(frame_width, x1 + scaled_w)
y2 = min(frame_height, y1 + scaled_h)
out_w = x2 - x1
out_h = y2 - y1
if out_w <= 0 or out_h <= 0:
return None
return [x1, y1, out_w, out_h]
def process_video_task(task_id: str, req: GenerateRequest): def process_video_task(task_id: str, req: GenerateRequest):
"""Background task to process video with async MP4 output.""" """Background task to detect faces and save bbox cache as msgpack."""
writer = None cap = None
write_queue = None
writer_thread = None
try: try:
tasks[task_id].status = TaskStatus.PROCESSING tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id) cancel_event = cancel_events.get(task_id)
# Verify video exists
if not os.path.exists(req.video_path): if not os.path.exists(req.video_path):
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Video not found: {req.video_path}" tasks[task_id].message = f"Video not found: {req.video_path}"
return return
# Initialize detector (will load model on first run)
print(f"Loading detector for task {task_id}...") print(f"Loading detector for task {task_id}...")
detector = get_detector( detector = get_detector(
conf_threshold=req.conf_threshold, conf_threshold=req.conf_threshold,
iou_threshold=req.iou_threshold iou_threshold=req.iou_threshold,
) )
_ = detector.model _ = detector.model
# Open video
cap = cv2.VideoCapture(req.video_path) cap = cv2.VideoCapture(req.video_path)
if not cap.isOpened(): if not cap.isOpened():
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open video" tasks[task_id].message = "Failed to open video"
return return
# Get video properties fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
end_frame = min(req.end_frame, total_video_frames - 1) end_frame = min(req.end_frame, total_video_frames - 1)
frames_to_process = end_frame - req.start_frame + 1 frames_to_process = end_frame - req.start_frame + 1
if frames_to_process <= 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid frame range"
return
tasks[task_id].total = frames_to_process tasks[task_id].total = frames_to_process
# Ensure output directory exists
os.makedirs(req.output_dir, exist_ok=True) os.makedirs(req.output_dir, exist_ok=True)
output_msgpack_path = os.path.join(req.output_dir, "detections.msgpack")
# Setup MP4 writer (grayscale)
output_video_path = os.path.join(req.output_dir, "mask.mp4")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height), isColor=False)
if not writer.isOpened():
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to create video writer"
cap.release()
return
# Async writer setup
write_queue = queue.Queue(maxsize=30) # Buffer up to 30 frames
writer_running = threading.Event()
writer_running.set()
def async_writer():
"""Background thread for writing frames to video."""
while writer_running.is_set() or not write_queue.empty():
try:
mask = write_queue.get(timeout=0.1)
if mask is not None:
writer.write(mask)
write_queue.task_done()
except queue.Empty:
continue
writer_thread = threading.Thread(target=async_writer, daemon=True)
writer_thread.start()
print(f"Starting processing: {req.video_path} ({frames_to_process} frames) -> {output_video_path}")
# Batch processing configuration
BATCH_SIZE = 5 # Optimal batch size for 4K video (72.9% improvement)
frame_buffer = []
TEMPORAL_SIDE_WEIGHT = 0.7
TEMPORAL_CENTER_WEIGHT = 1.0
# Temporal blending state (streaming, low-memory)
prev_mask = None
curr_mask = None
wrote_first_frame = False
def _scale_mask(mask: np.ndarray, weight: float) -> np.ndarray:
"""Scale mask intensity for temporal blending."""
if weight == 1.0:
return mask
return cv2.convertScaleAbs(mask, alpha=weight, beta=0)
def _blend_edge(base: np.ndarray, neighbor: np.ndarray) -> np.ndarray:
"""Blend for first/last frame (one-sided temporal context)."""
base_w = _scale_mask(base, TEMPORAL_CENTER_WEIGHT)
neighbor_w = _scale_mask(neighbor, TEMPORAL_SIDE_WEIGHT)
return cv2.max(base_w, neighbor_w)
def _blend_middle(prev: np.ndarray, cur: np.ndarray, nxt: np.ndarray) -> np.ndarray:
"""Blend for middle frames (previous/current/next temporal context)."""
prev_w = _scale_mask(prev, TEMPORAL_SIDE_WEIGHT)
cur_w = _scale_mask(cur, TEMPORAL_CENTER_WEIGHT)
nxt_w = _scale_mask(nxt, TEMPORAL_SIDE_WEIGHT)
return cv2.max(cur_w, cv2.max(prev_w, nxt_w))
def push_mask_temporal(raw_mask: np.ndarray):
"""Push mask and emit blended output in frame order."""
nonlocal prev_mask, curr_mask, wrote_first_frame
if prev_mask is None:
prev_mask = raw_mask
return
if curr_mask is None:
curr_mask = raw_mask
return
if not wrote_first_frame:
write_queue.put(_blend_edge(prev_mask, curr_mask))
wrote_first_frame = True
# Emit blended current frame using prev/current/next
write_queue.put(_blend_middle(prev_mask, curr_mask, raw_mask))
# Slide temporal window
prev_mask = curr_mask
curr_mask = raw_mask
def flush_temporal_tail():
"""Flush remaining masks after all frames are processed."""
if prev_mask is None:
return
# Single-frame case
if curr_mask is None:
write_queue.put(_scale_mask(prev_mask, TEMPORAL_CENTER_WEIGHT))
return
# Two-frame case
if not wrote_first_frame:
write_queue.put(_blend_edge(prev_mask, curr_mask))
# Always emit last frame with one-sided blend
write_queue.put(_blend_edge(curr_mask, prev_mask))
def process_batch():
"""Process accumulated batch of frames."""
if not frame_buffer:
return
# Batch inference at full resolution
batch_detections = detector.detect_batch(frame_buffer)
# Generate masks for each frame
for i, detections in enumerate(batch_detections):
frame = frame_buffer[i]
# Generate mask at original resolution
mask = detector.generate_mask(
frame.shape,
detections,
mask_scale=req.mask_scale
)
# Temporal blend before async write
push_mask_temporal(mask)
# Clear buffer
frame_buffer.clear()
# Seek once to the starting frame. Avoid random-access seek on every frame.
if req.start_frame > 0: if req.start_frame > 0:
seek_ok = cap.set(cv2.CAP_PROP_POS_FRAMES, req.start_frame) seek_ok = cap.set(cv2.CAP_PROP_POS_FRAMES, req.start_frame)
if not seek_ok: if not seek_ok:
print(
f"[FaceMask] Warning: CAP_PROP_POS_FRAMES seek failed, "
f"fallback to sequential skip ({req.start_frame} frames)"
)
for _ in range(req.start_frame): for _ in range(req.start_frame):
ret, _ = cap.read() ret, _ = cap.read()
if not ret: if not ret:
@ -310,49 +204,81 @@ def process_video_task(task_id: str, req: GenerateRequest):
) )
return return
# Process loop with batching frame_buffer: List[np.ndarray] = []
frame_detections: List[List[List[float]]] = []
batch_size = 5
current_count = 0 current_count = 0
for frame_idx in range(req.start_frame, end_frame + 1):
def process_batch():
nonlocal current_count
if not frame_buffer:
return
batch_detections = detector.detect_batch(frame_buffer)
for detections in batch_detections:
packed_detections: List[List[float]] = []
for x, y, w, h, conf in detections:
scaled = _scale_bbox(
int(x),
int(y),
int(w),
int(h),
float(req.mask_scale),
width,
height,
)
if scaled is None:
continue
packed_detections.append(
[scaled[0], scaled[1], scaled[2], scaled[3], float(conf)]
)
frame_detections.append(packed_detections)
current_count += 1
tasks[task_id].progress = current_count
frame_buffer.clear()
print(
f"Starting detection cache generation: {req.video_path} "
f"({frames_to_process} frames) -> {output_msgpack_path}"
)
for _ in range(req.start_frame, end_frame + 1):
if cancel_event and cancel_event.is_set(): if cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user" tasks[task_id].message = "Cancelled by user"
break break
# Read next frame sequentially (after one-time initial seek)
ret, frame = cap.read() ret, frame = cap.read()
if not ret:
break
if ret:
# Store frame for batch processing
frame_buffer.append(frame) frame_buffer.append(frame)
if len(frame_buffer) >= batch_size:
# Process batch when full
if len(frame_buffer) >= BATCH_SIZE:
process_batch() process_batch()
# Update progress
current_count += 1
tasks[task_id].progress = current_count
# Process remaining frames in buffer
if frame_buffer: if frame_buffer:
process_batch() process_batch()
flush_temporal_tail()
# Cleanup
writer_running.clear()
write_queue.join() # Wait for all frames to be written
if writer_thread:
writer_thread.join(timeout=5)
cap.release()
if writer:
writer.release()
if tasks[task_id].status == TaskStatus.PROCESSING: if tasks[task_id].status == TaskStatus.PROCESSING:
payload = {
"version": 1,
"video_path": req.video_path,
"start_frame": req.start_frame,
"end_frame": req.start_frame + len(frame_detections) - 1,
"width": width,
"height": height,
"fps": fps,
"mask_scale": float(req.mask_scale),
"frames": frame_detections,
}
with open(output_msgpack_path, "wb") as f:
f.write(msgpack.packb(payload, use_bin_type=True))
tasks[task_id].status = TaskStatus.COMPLETED tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = output_video_path # Return video path tasks[task_id].result_path = output_msgpack_path
tasks[task_id].message = "Processing completed successfully" tasks[task_id].message = "Detection cache completed"
print(f"Task {task_id} completed: {output_video_path}") print(f"Task {task_id} completed: {output_msgpack_path}")
except Exception as e: except Exception as e:
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
@ -360,15 +286,15 @@ def process_video_task(task_id: str, req: GenerateRequest):
print(f"Error in task {task_id}: {e}") print(f"Error in task {task_id}: {e}")
traceback.print_exc() traceback.print_exc()
finally: finally:
# Cleanup if cap:
cap.release()
if task_id in cancel_events: if task_id in cancel_events:
del cancel_events[task_id] del cancel_events[task_id]
def process_bake_task(task_id: str, req: BakeRequest): def process_bake_task(task_id: str, req: BakeRequest):
"""Background task to bake blur into a regular video file.""" """Background task to bake blur using bbox detections in msgpack."""
src_cap = None src_cap = None
mask_cap = None
writer = None writer = None
try: try:
@ -379,58 +305,55 @@ def process_bake_task(task_id: str, req: BakeRequest):
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Video not found: {req.video_path}" tasks[task_id].message = f"Video not found: {req.video_path}"
return return
if not os.path.exists(req.detections_path):
if not os.path.exists(req.mask_path):
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Mask video not found: {req.mask_path}" tasks[task_id].message = f"Detections file not found: {req.detections_path}"
return return
src_cap = cv2.VideoCapture(req.video_path) src_cap = cv2.VideoCapture(req.video_path)
mask_cap = cv2.VideoCapture(req.mask_path)
if not src_cap.isOpened(): if not src_cap.isOpened():
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open source video" tasks[task_id].message = "Failed to open source video"
return return
if not mask_cap.isOpened():
with open(req.detections_path, "rb") as f:
payload = msgpack.unpackb(f.read(), raw=False)
frames = payload.get("frames")
if not isinstance(frames, list):
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open mask video" tasks[task_id].message = "Invalid detections format: 'frames' is missing"
return return
src_fps = src_cap.get(cv2.CAP_PROP_FPS) or 30.0 src_fps = src_cap.get(cv2.CAP_PROP_FPS) or 30.0
src_width = int(src_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) src_width = int(src_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
src_height = int(src_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) src_height = int(src_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
src_frames = int(src_cap.get(cv2.CAP_PROP_FRAME_COUNT)) src_frames = int(src_cap.get(cv2.CAP_PROP_FRAME_COUNT))
mask_frames = int(mask_cap.get(cv2.CAP_PROP_FRAME_COUNT))
if src_width <= 0 or src_height <= 0: if src_width <= 0 or src_height <= 0:
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid source video dimensions" tasks[task_id].message = "Invalid source video dimensions"
return return
total = min(src_frames, mask_frames) if src_frames > 0 and mask_frames > 0 else 0 total = min(src_frames, len(frames)) if src_frames > 0 else len(frames)
if total <= 0: if total <= 0:
tasks[task_id].status = TaskStatus.FAILED tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Source/mask frame count is zero" tasks[task_id].message = "Source/detections frame count is zero"
return return
tasks[task_id].total = total tasks[task_id].total = total
output_dir = os.path.dirname(req.output_path) output_dir = os.path.dirname(req.output_path)
if output_dir: if output_dir:
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height) writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height)
# Kernel size must be odd and >= 1
blur_size = max(1, int(req.blur_size)) blur_size = max(1, int(req.blur_size))
if blur_size % 2 == 0: if blur_size % 2 == 0:
blur_size += 1 blur_size += 1
feather_radius = max(3, min(25, blur_size // 3))
feather_kernel = feather_radius * 2 + 1
print(f"[FaceMask] Starting blur bake: {req.video_path} + {req.mask_path} -> {req.output_path}")
if src_frames != mask_frames:
print( print(
f"[FaceMask] Warning: frame count mismatch " f"[FaceMask] Starting blur bake (bbox-msgpack): {req.video_path} + "
f"(src={src_frames}, mask={mask_frames}), processing {total} frames" f"{req.detections_path} -> {req.output_path}"
) )
for idx in range(total): for idx in range(total):
@ -440,29 +363,61 @@ def process_bake_task(task_id: str, req: BakeRequest):
break break
src_ok, src_frame = src_cap.read() src_ok, src_frame = src_cap.read()
mask_ok, mask_frame = mask_cap.read() if not src_ok:
if not src_ok or not mask_ok:
break break
if mask_frame.ndim == 3: frame_boxes = frames[idx] if idx < len(frames) else []
mask_gray = cv2.cvtColor(mask_frame, cv2.COLOR_BGR2GRAY) if not frame_boxes:
else: writer.write(src_frame)
mask_gray = mask_frame tasks[task_id].progress = idx + 1
continue
if mask_gray.shape[0] != src_height or mask_gray.shape[1] != src_width: mask_gray = np.zeros((src_height, src_width), dtype=np.uint8)
mask_gray = cv2.resize( for box in frame_boxes:
mask_gray, if not isinstance(box, list) or len(box) < 4:
(src_width, src_height), continue
interpolation=cv2.INTER_LINEAR, x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
if w <= 0 or h <= 0:
continue
center = (x + w // 2, y + h // 2)
axes = (max(1, w // 2), max(1, h // 2))
cv2.ellipse(mask_gray, center, axes, 0, 0, 360, 255, -1)
if cv2.countNonZero(mask_gray) == 0:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
mask_gray = cv2.GaussianBlur(mask_gray, (feather_kernel, feather_kernel), 0)
_, mask_binary = cv2.threshold(mask_gray, 2, 255, cv2.THRESH_BINARY)
non_zero_coords = cv2.findNonZero(mask_binary)
if non_zero_coords is None:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
x, y, w, h = cv2.boundingRect(non_zero_coords)
blur_margin = max(1, (blur_size // 2) + feather_radius)
x1 = max(0, x - blur_margin)
y1 = max(0, y - blur_margin)
x2 = min(src_width, x + w + blur_margin)
y2 = min(src_height, y + h + blur_margin)
roi_src = src_frame[y1:y2, x1:x2]
roi_mask = mask_gray[y1:y2, x1:x2]
if roi_src.size == 0:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
roi_blurred.astype(np.float32) * roi_alpha
) )
output_frame = src_frame.copy()
blurred = cv2.GaussianBlur(src_frame, (blur_size, blur_size), 0) output_frame[y1:y2, x1:x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
alpha = (mask_gray.astype(np.float32) / 255.0)[..., np.newaxis] writer.write(output_frame)
composed = (src_frame.astype(np.float32) * (1.0 - alpha)) + (
blurred.astype(np.float32) * alpha
)
writer.write(np.clip(composed, 0, 255).astype(np.uint8))
tasks[task_id].progress = idx + 1 tasks[task_id].progress = idx + 1
if tasks[task_id].status == TaskStatus.PROCESSING: if tasks[task_id].status == TaskStatus.PROCESSING:
@ -479,8 +434,6 @@ def process_bake_task(task_id: str, req: BakeRequest):
finally: finally:
if src_cap: if src_cap:
src_cap.release() src_cap.release()
if mask_cap:
mask_cap.release()
if writer: if writer:
writer.release() writer.release()
if task_id in cancel_events: if task_id in cancel_events: