ROI修正/ハードウェアエンコード

This commit is contained in:
Keisuke Hirata 2026-02-16 18:20:31 +09:00
parent 0d63b2ef6d
commit 914667edbf
2 changed files with 140 additions and 30 deletions

View File

@ -73,9 +73,11 @@
pip install --quiet -r "$PWD/requirements.txt"
fi
# opencv-pythonが入っていた場合はheadlessに統一
pip uninstall -y opencv-python opencv 2>/dev/null || true
pip install --quiet --upgrade opencv-python-headless
# OpenCVは壊れやすいので、import失敗時のみheadlessを強制再導入
if ! python -c "import cv2" >/dev/null 2>&1; then
echo "[Setup] Repairing OpenCV (opencv-python-headless)..."
pip install --quiet --force-reinstall --no-cache-dir opencv-python-headless
fi
# Pythonパスにカレントディレクトリを追加
export PYTHONPATH="$PWD:$PYTHONPATH"

View File

@ -34,6 +34,7 @@ fix_library_path()
import threading
import uuid
import traceback
import subprocess
from typing import Dict, Optional, List
from pathlib import Path
@ -92,15 +93,100 @@ class BakeRequest(BaseModel):
format: str = "mp4"
class _FFmpegPipeWriter:
"""Write BGR frames to ffmpeg stdin."""
def __init__(self, cmd: List[str]):
self._proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
if self._proc.stdin is None:
self._proc.kill()
raise RuntimeError("Failed to open ffmpeg stdin")
def write(self, frame: np.ndarray) -> None:
if self._proc.stdin is None:
raise RuntimeError("ffmpeg stdin is not available")
self._proc.stdin.write(frame.tobytes())
def release(self) -> None:
if self._proc.stdin:
try:
self._proc.stdin.close()
except Exception:
pass
stderr_output = ""
if self._proc.stderr:
try:
stderr_output = self._proc.stderr.read().decode("utf-8", errors="replace")
except Exception:
stderr_output = ""
rc = self._proc.wait()
if rc != 0:
tail = "\n".join(stderr_output.strip().splitlines()[-8:])
raise RuntimeError(f"ffmpeg writer failed (code={rc}): {tail}")
def _build_ffmpeg_vaapi_writer(
output_path: str,
fps: float,
width: int,
height: int,
) -> _FFmpegPipeWriter:
"""Create ffmpeg h264_vaapi writer with QP=24."""
cmd = [
"ffmpeg",
"-hide_banner",
"-loglevel",
"error",
"-y",
"-vaapi_device",
"/dev/dri/renderD128",
"-f",
"rawvideo",
"-pix_fmt",
"bgr24",
"-s",
f"{width}x{height}",
"-r",
f"{fps}",
"-i",
"-",
"-an",
"-vf",
"format=nv12,hwupload",
"-c:v",
"h264_vaapi",
"-qp",
"24",
output_path,
]
return _FFmpegPipeWriter(cmd)
def _build_video_writer(
output_path: str,
fmt: str,
fps: float,
width: int,
height: int,
) -> cv2.VideoWriter:
"""Create VideoWriter with codec fallback per format."""
) -> object:
"""Create writer with VAAPI preference and OpenCV fallback."""
format_key = fmt.lower()
if format_key in {"mp4", "mov"}:
try:
writer = _build_ffmpeg_vaapi_writer(output_path, fps, width, height)
print("[FaceMask] Using output encoder: ffmpeg h264_vaapi (-qp 24)")
return writer
except Exception as e:
print(f"[FaceMask] VAAPI writer unavailable, fallback to OpenCV: {e}")
codec_candidates = {
"mp4": ["avc1", "mp4v"],
"mov": ["avc1", "mp4v"],
@ -372,52 +458,68 @@ def process_bake_task(task_id: str, req: BakeRequest):
tasks[task_id].progress = idx + 1
continue
mask_gray = np.zeros((src_height, src_width), dtype=np.uint8)
# Step 1: Calculate ROI bounds from all boxes first
min_x, min_y = src_width, src_height
max_x, max_y = 0, 0
valid_boxes = []
for box in frame_boxes:
if not isinstance(box, list) or len(box) < 4:
continue
x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
if w <= 0 or h <= 0:
continue
center = (x + w // 2, y + h // 2)
axes = (max(1, w // 2), max(1, h // 2))
cv2.ellipse(mask_gray, center, axes, 0, 0, 360, 255, -1)
valid_boxes.append((x, y, w, h))
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x + w)
max_y = max(max_y, y + h)
if cv2.countNonZero(mask_gray) == 0:
if not valid_boxes:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
mask_gray = cv2.GaussianBlur(mask_gray, (feather_kernel, feather_kernel), 0)
_, mask_binary = cv2.threshold(mask_gray, 2, 255, cv2.THRESH_BINARY)
non_zero_coords = cv2.findNonZero(mask_binary)
if non_zero_coords is None:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
x, y, w, h = cv2.boundingRect(non_zero_coords)
# Step 2: Expand ROI bounds with blur margin
blur_margin = max(1, (blur_size // 2) + feather_radius)
x1 = max(0, x - blur_margin)
y1 = max(0, y - blur_margin)
x2 = min(src_width, x + w + blur_margin)
y2 = min(src_height, y + h + blur_margin)
roi_x1 = max(0, min_x - blur_margin)
roi_y1 = max(0, min_y - blur_margin)
roi_x2 = min(src_width, max_x + blur_margin)
roi_y2 = min(src_height, max_y + blur_margin)
roi_width = roi_x2 - roi_x1
roi_height = roi_y2 - roi_y1
roi_src = src_frame[y1:y2, x1:x2]
roi_mask = mask_gray[y1:y2, x1:x2]
if roi_src.size == 0:
if roi_width <= 0 or roi_height <= 0:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
# Step 3: Create ROI-sized mask only
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
for x, y, w, h in valid_boxes:
# Convert to ROI coordinate system
center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
axes = (max(1, w // 2), max(1, h // 2))
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
# Step 4: Apply feathering to ROI mask only
roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
# Step 5: Extract ROI from source frame
roi_src = src_frame[roi_y1:roi_y2, roi_x1:roi_x2]
# Step 6: Apply blur to ROI
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
# Step 7: Alpha blend
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
roi_blurred.astype(np.float32) * roi_alpha
)
output_frame = src_frame.copy()
output_frame[y1:y2, x1:x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
writer.write(output_frame)
# Step 8: Write directly to source frame (no copy needed)
src_frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
writer.write(src_frame)
tasks[task_id].progress = idx + 1
if tasks[task_id].status == TaskStatus.PROCESSING:
@ -435,7 +537,13 @@ def process_bake_task(task_id: str, req: BakeRequest):
if src_cap:
src_cap.release()
if writer:
writer.release()
try:
writer.release()
except Exception as e:
if tasks[task_id].status not in (TaskStatus.FAILED, TaskStatus.CANCELLED):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Writer finalization failed: {e}"
print(f"[FaceMask] Writer release error for task {task_id}: {e}")
if task_id in cancel_events:
del cancel_events[task_id]