ROI修正/ハードウェアエンコード

This commit is contained in:
Keisuke Hirata 2026-02-16 18:20:31 +09:00
parent 0d63b2ef6d
commit 914667edbf
2 changed files with 140 additions and 30 deletions

View File

@ -73,9 +73,11 @@
pip install --quiet -r "$PWD/requirements.txt" pip install --quiet -r "$PWD/requirements.txt"
fi fi
# opencv-pythonが入っていた場合はheadlessに統一 # OpenCVは壊れやすいので、import失敗時のみheadlessを強制再導入
pip uninstall -y opencv-python opencv 2>/dev/null || true if ! python -c "import cv2" >/dev/null 2>&1; then
pip install --quiet --upgrade opencv-python-headless echo "[Setup] Repairing OpenCV (opencv-python-headless)..."
pip install --quiet --force-reinstall --no-cache-dir opencv-python-headless
fi
# Pythonパスにカレントディレクトリを追加 # Pythonパスにカレントディレクトリを追加
export PYTHONPATH="$PWD:$PYTHONPATH" export PYTHONPATH="$PWD:$PYTHONPATH"

View File

@ -34,6 +34,7 @@ fix_library_path()
import threading import threading
import uuid import uuid
import traceback import traceback
import subprocess
from typing import Dict, Optional, List from typing import Dict, Optional, List
from pathlib import Path from pathlib import Path
@ -92,15 +93,100 @@ class BakeRequest(BaseModel):
format: str = "mp4" format: str = "mp4"
class _FFmpegPipeWriter:
"""Write BGR frames to ffmpeg stdin."""
def __init__(self, cmd: List[str]):
self._proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
if self._proc.stdin is None:
self._proc.kill()
raise RuntimeError("Failed to open ffmpeg stdin")
def write(self, frame: np.ndarray) -> None:
if self._proc.stdin is None:
raise RuntimeError("ffmpeg stdin is not available")
self._proc.stdin.write(frame.tobytes())
def release(self) -> None:
if self._proc.stdin:
try:
self._proc.stdin.close()
except Exception:
pass
stderr_output = ""
if self._proc.stderr:
try:
stderr_output = self._proc.stderr.read().decode("utf-8", errors="replace")
except Exception:
stderr_output = ""
rc = self._proc.wait()
if rc != 0:
tail = "\n".join(stderr_output.strip().splitlines()[-8:])
raise RuntimeError(f"ffmpeg writer failed (code={rc}): {tail}")
def _build_ffmpeg_vaapi_writer(
output_path: str,
fps: float,
width: int,
height: int,
) -> _FFmpegPipeWriter:
"""Create ffmpeg h264_vaapi writer with QP=24."""
cmd = [
"ffmpeg",
"-hide_banner",
"-loglevel",
"error",
"-y",
"-vaapi_device",
"/dev/dri/renderD128",
"-f",
"rawvideo",
"-pix_fmt",
"bgr24",
"-s",
f"{width}x{height}",
"-r",
f"{fps}",
"-i",
"-",
"-an",
"-vf",
"format=nv12,hwupload",
"-c:v",
"h264_vaapi",
"-qp",
"24",
output_path,
]
return _FFmpegPipeWriter(cmd)
def _build_video_writer( def _build_video_writer(
output_path: str, output_path: str,
fmt: str, fmt: str,
fps: float, fps: float,
width: int, width: int,
height: int, height: int,
) -> cv2.VideoWriter: ) -> object:
"""Create VideoWriter with codec fallback per format.""" """Create writer with VAAPI preference and OpenCV fallback."""
format_key = fmt.lower() format_key = fmt.lower()
if format_key in {"mp4", "mov"}:
try:
writer = _build_ffmpeg_vaapi_writer(output_path, fps, width, height)
print("[FaceMask] Using output encoder: ffmpeg h264_vaapi (-qp 24)")
return writer
except Exception as e:
print(f"[FaceMask] VAAPI writer unavailable, fallback to OpenCV: {e}")
codec_candidates = { codec_candidates = {
"mp4": ["avc1", "mp4v"], "mp4": ["avc1", "mp4v"],
"mov": ["avc1", "mp4v"], "mov": ["avc1", "mp4v"],
@ -372,52 +458,68 @@ def process_bake_task(task_id: str, req: BakeRequest):
tasks[task_id].progress = idx + 1 tasks[task_id].progress = idx + 1
continue continue
mask_gray = np.zeros((src_height, src_width), dtype=np.uint8) # Step 1: Calculate ROI bounds from all boxes first
min_x, min_y = src_width, src_height
max_x, max_y = 0, 0
valid_boxes = []
for box in frame_boxes: for box in frame_boxes:
if not isinstance(box, list) or len(box) < 4: if not isinstance(box, list) or len(box) < 4:
continue continue
x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3]) x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
if w <= 0 or h <= 0: if w <= 0 or h <= 0:
continue continue
center = (x + w // 2, y + h // 2) valid_boxes.append((x, y, w, h))
axes = (max(1, w // 2), max(1, h // 2)) min_x = min(min_x, x)
cv2.ellipse(mask_gray, center, axes, 0, 0, 360, 255, -1) min_y = min(min_y, y)
max_x = max(max_x, x + w)
max_y = max(max_y, y + h)
if cv2.countNonZero(mask_gray) == 0: if not valid_boxes:
writer.write(src_frame) writer.write(src_frame)
tasks[task_id].progress = idx + 1 tasks[task_id].progress = idx + 1
continue continue
mask_gray = cv2.GaussianBlur(mask_gray, (feather_kernel, feather_kernel), 0) # Step 2: Expand ROI bounds with blur margin
_, mask_binary = cv2.threshold(mask_gray, 2, 255, cv2.THRESH_BINARY)
non_zero_coords = cv2.findNonZero(mask_binary)
if non_zero_coords is None:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
x, y, w, h = cv2.boundingRect(non_zero_coords)
blur_margin = max(1, (blur_size // 2) + feather_radius) blur_margin = max(1, (blur_size // 2) + feather_radius)
x1 = max(0, x - blur_margin) roi_x1 = max(0, min_x - blur_margin)
y1 = max(0, y - blur_margin) roi_y1 = max(0, min_y - blur_margin)
x2 = min(src_width, x + w + blur_margin) roi_x2 = min(src_width, max_x + blur_margin)
y2 = min(src_height, y + h + blur_margin) roi_y2 = min(src_height, max_y + blur_margin)
roi_width = roi_x2 - roi_x1
roi_height = roi_y2 - roi_y1
roi_src = src_frame[y1:y2, x1:x2] if roi_width <= 0 or roi_height <= 0:
roi_mask = mask_gray[y1:y2, x1:x2]
if roi_src.size == 0:
writer.write(src_frame) writer.write(src_frame)
tasks[task_id].progress = idx + 1 tasks[task_id].progress = idx + 1
continue continue
# Step 3: Create ROI-sized mask only
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
for x, y, w, h in valid_boxes:
# Convert to ROI coordinate system
center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
axes = (max(1, w // 2), max(1, h // 2))
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
# Step 4: Apply feathering to ROI mask only
roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
# Step 5: Extract ROI from source frame
roi_src = src_frame[roi_y1:roi_y2, roi_x1:roi_x2]
# Step 6: Apply blur to ROI
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0) roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
# Step 7: Alpha blend
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis] roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + ( roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
roi_blurred.astype(np.float32) * roi_alpha roi_blurred.astype(np.float32) * roi_alpha
) )
output_frame = src_frame.copy()
output_frame[y1:y2, x1:x2] = np.clip(roi_composed, 0, 255).astype(np.uint8) # Step 8: Write directly to source frame (no copy needed)
writer.write(output_frame) src_frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
writer.write(src_frame)
tasks[task_id].progress = idx + 1 tasks[task_id].progress = idx + 1
if tasks[task_id].status == TaskStatus.PROCESSING: if tasks[task_id].status == TaskStatus.PROCESSING:
@ -435,7 +537,13 @@ def process_bake_task(task_id: str, req: BakeRequest):
if src_cap: if src_cap:
src_cap.release() src_cap.release()
if writer: if writer:
writer.release() try:
writer.release()
except Exception as e:
if tasks[task_id].status not in (TaskStatus.FAILED, TaskStatus.CANCELLED):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Writer finalization failed: {e}"
print(f"[FaceMask] Writer release error for task {task_id}: {e}")
if task_id in cancel_events: if task_id in cancel_events:
del cancel_events[task_id] del cancel_events[task_id]