blender-mask-peoples/server/main.py

1432 lines
50 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Face Detection Inference Server.
This FastAPI application runs in a separate process to handle
GPU-accelerated face detection using ONNX Runtime.
"""
import os
import sys
import platform
# CRITICAL: Fix LD_LIBRARY_PATH before importing cv2 or torch
# cv2 adds its own lib path to the front, which can override ROCm libraries
def fix_library_path():
"""Ensure ROCm libraries are loaded before cv2's bundled libraries."""
ld_path = os.environ.get('LD_LIBRARY_PATH', '')
# Split and filter paths
paths = [p for p in ld_path.split(':') if p]
# Separate ROCm/GPU paths from other paths
rocm_paths = [p for p in paths if 'rocm' in p.lower() or 'clr-' in p or 'hip' in p.lower()]
other_paths = [p for p in paths if p not in rocm_paths]
# Rebuild with ROCm paths first
if rocm_paths:
new_ld_path = ':'.join(rocm_paths + other_paths)
os.environ['LD_LIBRARY_PATH'] = new_ld_path
print("[FaceMask] Fixed LD_LIBRARY_PATH to prioritize ROCm libraries")
# Fix library path BEFORE any other imports
fix_library_path()
import queue # noqa: E402
import threading # noqa: E402
import uuid # noqa: E402
import traceback # noqa: E402
import subprocess # noqa: E402
from typing import Dict, Optional, List # noqa: E402
from pathlib import Path # noqa: E402
from fastapi import FastAPI, HTTPException, BackgroundTasks # noqa: E402
from pydantic import BaseModel # noqa: E402
import uvicorn # noqa: E402
import cv2 # noqa: E402
import numpy as np # noqa: E402
import msgpack # noqa: E402
# Add project root to path for imports if needed
sys.path.append(str(Path(__file__).parent.parent))
from server.detector import get_detector, get_pose_detector # noqa: E402
app = FastAPI(title="Face Mask Inference Server")
def _get_r_frame_rate(video_path: str) -> tuple:
"""ffprobe でコンテナ宣言の r_frame_rate を取得する。
Returns:
(fps_float, fps_str): fps_str は "120/1" のような分数文字列。
取得失敗時は (0.0, "")。
"""
try:
result = subprocess.run(
[
"ffprobe", "-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=r_frame_rate",
"-of", "default=noprint_wrappers=1:nokey=1",
video_path,
],
capture_output=True,
text=True,
timeout=10,
)
if result.returncode == 0:
rate_str = result.stdout.strip()
if "/" in rate_str:
num, den = rate_str.split("/")
fps_float = float(num) / float(den)
else:
fps_float = float(rate_str)
rate_str = str(fps_float)
return fps_float, rate_str
except Exception:
pass
return 0.0, ""
# GPU status cache
_gpu_status_cache = None
# Task storage
class TaskStatus:
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class Task(BaseModel):
id: str
status: str
progress: int = 0
total: int = 0
message: Optional[str] = None
result_path: Optional[str] = None
# In-memory storage
tasks: Dict[str, Task] = {}
cancel_events: Dict[str, threading.Event] = {}
class GenerateRequest(BaseModel):
video_path: str
output_dir: str
start_frame: int
end_frame: int
conf_threshold: float = 0.5
iou_threshold: float = 0.45
class VideoInfoRequest(BaseModel):
video_path: str
class BakeRequest(BaseModel):
video_path: str
detections_path: str
output_path: str
blur_size: int = 50
display_scale: float = 1.0
format: str = "mp4"
class GenerateImagesRequest(BaseModel):
image_dir: str
filenames: List[str]
output_dir: str
start_index: int = 0
end_index: int = -1
conf_threshold: float = 0.5
iou_threshold: float = 0.45
class AugmentPoseRequest(BaseModel):
detections_path: str
conf_threshold: float = 0.5
iou_threshold: float = 0.45
class BakeImagesRequest(BaseModel):
image_dir: str
filenames: List[str]
output_dir: str
detections_path: str
blur_size: int = 50
display_scale: float = 1.0
class _FFmpegPipeWriter:
"""Write BGR frames to ffmpeg stdin."""
def __init__(self, cmd: List[str]):
self._proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
if self._proc.stdin is None:
self._proc.kill()
raise RuntimeError("Failed to open ffmpeg stdin")
def write(self, frame: np.ndarray) -> None:
if self._proc.stdin is None:
raise RuntimeError("ffmpeg stdin is not available")
self._proc.stdin.write(frame.tobytes())
def release(self) -> None:
if self._proc.stdin:
try:
self._proc.stdin.close()
except Exception:
pass
stderr_output = ""
if self._proc.stderr:
try:
stderr_output = self._proc.stderr.read().decode("utf-8", errors="replace")
except Exception:
stderr_output = ""
rc = self._proc.wait()
if rc != 0:
tail = "\n".join(stderr_output.strip().splitlines()[-8:])
raise RuntimeError(f"ffmpeg writer failed (code={rc}): {tail}")
def _build_ffmpeg_vaapi_writer(
output_path: str,
fps: float,
width: int,
height: int,
out_fps_str: str = "",
) -> _FFmpegPipeWriter:
"""Create ffmpeg h264_vaapi writer with QP=24 (balanced quality/speed).
fps: ソース動画の avg_frame_raterawパイプの入力レート
out_fps_str: 出力コンテナに宣言する r_frame_rate"120/1" 等)。
ソースと異なる場合は fps フィルタでフレームを補完する。
"""
# ソースの avg_fps と出力の r_fps が有意に異なる場合のみ fps フィルタを挿入
needs_fps_filter = bool(out_fps_str)
if needs_fps_filter:
try:
if "/" in out_fps_str:
num, den = out_fps_str.split("/")
out_fps_float = float(num) / float(den)
else:
out_fps_float = float(out_fps_str)
needs_fps_filter = abs(out_fps_float - fps) > 0.01
except ValueError:
needs_fps_filter = False
if needs_fps_filter:
vf = f"format=nv12,fps={out_fps_str},hwupload"
print(f"[FaceMask] fps filter: {fps:.3f} -> {out_fps_str}")
else:
vf = "format=nv12,hwupload"
cmd = [
"ffmpeg",
"-hide_banner",
"-loglevel",
"error",
"-y",
"-vaapi_device",
"/dev/dri/renderD128",
"-f",
"rawvideo",
"-pix_fmt",
"bgr24",
"-s",
f"{width}x{height}",
"-r",
f"{fps}",
"-i",
"-",
"-an",
"-vf",
vf,
"-c:v",
"h264_vaapi",
"-qp",
"24",
output_path,
]
return _FFmpegPipeWriter(cmd)
def _build_video_writer(
output_path: str,
fmt: str,
fps: float,
width: int,
height: int,
out_fps_str: str = "",
) -> object:
"""Create writer with VAAPI preference and OpenCV fallback."""
format_key = fmt.lower()
if format_key in {"mp4", "mov"}:
try:
writer = _build_ffmpeg_vaapi_writer(output_path, fps, width, height, out_fps_str)
print("[FaceMask] Using output encoder: ffmpeg h264_vaapi (-qp 24)")
return writer
except Exception as e:
print(f"[FaceMask] VAAPI writer unavailable, fallback to OpenCV: {e}")
codec_candidates = {
"mp4": ["avc1", "mp4v"],
"mov": ["avc1", "mp4v"],
"avi": ["MJPG", "XVID"],
}.get(format_key, ["mp4v"])
for codec in codec_candidates:
writer = cv2.VideoWriter(
output_path,
cv2.VideoWriter_fourcc(*codec),
fps,
(width, height),
isColor=True,
)
if writer.isOpened():
print(f"[FaceMask] Using output codec: {codec}")
return writer
writer.release()
raise RuntimeError(f"Failed to create video writer for format='{fmt}'")
def _scale_bbox(
x: int,
y: int,
w: int,
h: int,
scale: float,
frame_width: int,
frame_height: int,
) -> Optional[List[int]]:
"""Scale bbox around center and clamp to frame boundaries."""
if w <= 0 or h <= 0:
return None
center_x = x + (w * 0.5)
center_y = y + (h * 0.5)
scaled_w = max(1, int(w * scale))
scaled_h = max(1, int(h * scale))
x1 = max(0, int(center_x - scaled_w * 0.5))
y1 = max(0, int(center_y - scaled_h * 0.5))
x2 = min(frame_width, x1 + scaled_w)
y2 = min(frame_height, y1 + scaled_h)
out_w = x2 - x1
out_h = y2 - y1
if out_w <= 0 or out_h <= 0:
return None
return [x1, y1, out_w, out_h]
def _apply_face_blur_inplace(
frame: np.ndarray,
frame_boxes: list,
src_width: int,
src_height: int,
blur_size: int,
display_scale: float,
blur_margin: int,
) -> None:
"""検出済み顔領域にガウスぼかしを適用するin-place"""
if not frame_boxes:
return
for box in frame_boxes:
if not isinstance(box, list) or len(box) < 4:
continue
x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
if w <= 0 or h <= 0:
continue
cx = x + w / 2
cy = y + h / 2
dw = max(1, int(w * display_scale))
dh = max(1, int(h * display_scale))
dx = int(cx - dw / 2)
dy = int(cy - dh / 2)
roi_x1 = max(0, dx - blur_margin)
roi_y1 = max(0, dy - blur_margin)
roi_x2 = min(src_width, dx + dw + blur_margin)
roi_y2 = min(src_height, dy + dh + blur_margin)
roi_width = roi_x2 - roi_x1
roi_height = roi_y2 - roi_y1
if roi_width <= 0 or roi_height <= 0:
continue
roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
small_w = max(1, roi_width // 2)
small_h = max(1, roi_height // 2)
roi_small = cv2.resize(roi_src, (small_w, small_h), interpolation=cv2.INTER_LINEAR)
small_blur_size = max(3, (blur_size // 2) | 1)
roi_small_blurred = cv2.GaussianBlur(roi_small, (small_blur_size, small_blur_size), 0)
roi_blurred = cv2.resize(roi_small_blurred, (roi_width, roi_height), interpolation=cv2.INTER_LINEAR)
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
center = (int(cx) - roi_x1, int(cy) - roi_y1)
axes = (max(1, dw // 2), max(1, dh // 2))
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
result = roi_src.copy()
cv2.copyTo(roi_blurred, roi_mask, result)
frame[roi_y1:roi_y2, roi_x1:roi_x2] = result
def process_images_task(task_id: str, req: GenerateImagesRequest):
"""画像シーケンスから顔を検出して msgpack キャッシュを保存する。"""
try:
tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id)
if not os.path.exists(req.image_dir):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Image directory not found: {req.image_dir}"
return
if not req.filenames:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "No filenames provided"
return
detector = get_detector(
conf_threshold=req.conf_threshold,
iou_threshold=req.iou_threshold,
)
_ = detector.model
total_files = len(req.filenames)
start_idx = max(0, req.start_index)
end_idx = req.end_index if req.end_index >= 0 else total_files - 1
end_idx = min(end_idx, total_files - 1)
if start_idx > end_idx:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid index range"
return
indices = list(range(start_idx, end_idx + 1))
tasks[task_id].total = len(indices)
os.makedirs(req.output_dir, exist_ok=True)
output_msgpack_path = os.path.join(req.output_dir, "detections.msgpack")
# 画像サイズを最初のファイルから取得
first_path = os.path.join(req.image_dir, req.filenames[start_idx])
first_img = cv2.imread(first_path)
if first_img is None:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Cannot read image: {first_path}"
return
height, width = first_img.shape[:2]
frame_buffer: List[np.ndarray] = []
frame_detections: List[List[List[float]]] = []
batch_size = 5
current_count = 0
def process_batch():
nonlocal current_count
if not frame_buffer:
return
batch_det = detector.detect_batch(frame_buffer)
for detections in batch_det:
packed: List[List[float]] = []
for x, y, w, h, conf in detections:
bx, by, bw, bh = int(x), int(y), int(w), int(h)
bx = max(0, bx)
by = max(0, by)
bw = min(width - bx, bw)
bh = min(height - by, bh)
if bw <= 0 or bh <= 0:
continue
packed.append([bx, by, bw, bh, float(conf)])
frame_detections.append(packed)
current_count += 1
tasks[task_id].progress = current_count
frame_buffer.clear()
print(
f"[FaceMask] Starting image detection: {req.image_dir} "
f"({len(indices)} images) -> {output_msgpack_path}"
)
for file_idx in indices:
if cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user"
break
img_path = os.path.join(req.image_dir, req.filenames[file_idx])
frame = cv2.imread(img_path)
if frame is None:
frame_detections.append([])
current_count += 1
tasks[task_id].progress = current_count
continue
frame_buffer.append(frame)
if len(frame_buffer) >= batch_size:
process_batch()
if frame_buffer:
process_batch()
if tasks[task_id].status == TaskStatus.PROCESSING:
payload = {
"version": 1,
"image_dir": req.image_dir,
"filenames": req.filenames,
"start_frame": start_idx,
"end_frame": start_idx + len(frame_detections) - 1,
"width": width,
"height": height,
"fps": 0.0,
"mask_scale": 1.0,
"frames": frame_detections,
}
with open(output_msgpack_path, "wb") as f:
f.write(msgpack.packb(payload, use_bin_type=True))
tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = output_msgpack_path
tasks[task_id].message = "Image detection cache completed"
print(f"[FaceMask] Image detection done: {output_msgpack_path}")
except Exception as e:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = str(e)
traceback.print_exc()
finally:
if task_id in cancel_events:
del cancel_events[task_id]
def process_bake_images_task(task_id: str, req: BakeImagesRequest):
"""画像シーケンスに顔ぼかしを適用して新ディレクトリへ書き出す。"""
try:
tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id)
if not os.path.exists(req.image_dir):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Image directory not found: {req.image_dir}"
return
if not os.path.exists(req.detections_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Detections file not found: {req.detections_path}"
return
with open(req.detections_path, "rb") as f:
payload = msgpack.unpackb(f.read(), raw=False)
frames_detections = payload.get("frames")
if not isinstance(frames_detections, list):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid detections format: 'frames' is missing"
return
det_start_frame = int(payload.get("start_frame", 0))
blur_size = max(1, int(req.blur_size))
if blur_size % 2 == 0:
blur_size += 1
display_scale = max(0.1, float(req.display_scale))
blur_margin = blur_size // 2
os.makedirs(req.output_dir, exist_ok=True)
total = len(req.filenames)
tasks[task_id].total = total
print(
f"[FaceMask] Starting image bake: {req.image_dir} "
f"({total} images) -> {req.output_dir}"
)
for i, filename in enumerate(req.filenames):
if cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user"
return
src_path = os.path.join(req.image_dir, filename)
frame = cv2.imread(src_path)
if frame is None:
tasks[task_id].progress = i + 1
continue
h, w = frame.shape[:2]
det_idx = i - det_start_frame
frame_boxes = (
frames_detections[det_idx]
if 0 <= det_idx < len(frames_detections)
else []
)
_apply_face_blur_inplace(frame, frame_boxes, w, h, blur_size, display_scale, blur_margin)
out_path = os.path.join(req.output_dir, filename)
cv2.imwrite(out_path, frame)
tasks[task_id].progress = i + 1
if tasks[task_id].status == TaskStatus.PROCESSING:
tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = req.output_dir
tasks[task_id].message = "Image blur bake completed"
print(f"[FaceMask] Image bake completed: {req.output_dir}")
except Exception as e:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = str(e)
traceback.print_exc()
finally:
if task_id in cancel_events:
del cancel_events[task_id]
def augment_pose_task(task_id: str, req: AugmentPoseRequest):
"""Background task: run pose estimation and merge results into existing cache."""
cap = None
try:
tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id)
if not os.path.exists(req.detections_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Detections file not found: {req.detections_path}"
return
with open(req.detections_path, "rb") as f:
payload = msgpack.unpackb(f.read(), raw=False)
existing_frames: List[List[List[float]]] = payload.get("frames", [])
video_path = payload.get("video_path")
start_frame = int(payload.get("start_frame", 0))
total = len(existing_frames)
if not video_path:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Cache does not contain video_path (image caches not supported)"
return
if not os.path.exists(video_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Video not found: {video_path}"
return
if total == 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Cache has no frames"
return
tasks[task_id].total = total
detector = get_pose_detector(
conf_threshold=req.conf_threshold,
iou_threshold=req.iou_threshold,
)
_ = detector.model
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open video"
return
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
if start_frame > 0:
seek_ok = cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
if not seek_ok:
for _ in range(start_frame):
ret, _ = cap.read()
if not ret:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Failed to seek to start frame: {start_frame}"
return
frame_buffer: List[np.ndarray] = []
buffer_indices: List[int] = [] # existing_frames インデックス対応
current_count = 0
batch_size = 5
def process_pose_batch():
nonlocal current_count
if not frame_buffer:
return
batch_detections = detector.detect_batch(frame_buffer)
for idx, detections in zip(buffer_indices, batch_detections):
for x, y, w, h, conf in detections:
bx, by, bw, bh = int(x), int(y), int(w), int(h)
bx = max(0, bx)
by = max(0, by)
bw = min(width - bx, bw)
bh = min(height - by, bh)
if bw > 0 and bh > 0:
existing_frames[idx].append([bx, by, bw, bh, float(conf)])
current_count += 1
tasks[task_id].progress = current_count
frame_buffer.clear()
buffer_indices.clear()
for i in range(total):
if cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user"
break
ret, frame = cap.read()
if not ret:
break
frame_buffer.append(frame)
buffer_indices.append(i)
if len(frame_buffer) >= batch_size:
process_pose_batch()
if frame_buffer:
process_pose_batch()
if tasks[task_id].status == TaskStatus.PROCESSING:
payload["frames"] = existing_frames
with open(req.detections_path, "wb") as f:
f.write(msgpack.packb(payload, use_bin_type=True))
tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = req.detections_path
tasks[task_id].message = "Pose augmentation completed"
print(f"[FaceMask] Pose augmentation completed: {req.detections_path}")
except Exception as e:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = str(e)
traceback.print_exc()
finally:
if cap:
cap.release()
if task_id in cancel_events:
del cancel_events[task_id]
def process_video_task(task_id: str, req: GenerateRequest):
"""Background task to detect faces and save bbox cache as msgpack."""
cap = None
try:
tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id)
if not os.path.exists(req.video_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Video not found: {req.video_path}"
return
print(f"Loading detector for task {task_id}...")
detector = get_detector(
conf_threshold=req.conf_threshold,
iou_threshold=req.iou_threshold,
)
_ = detector.model
cap = cv2.VideoCapture(req.video_path)
if not cap.isOpened():
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open video"
return
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
end_frame = min(req.end_frame, total_video_frames - 1)
frames_to_process = end_frame - req.start_frame + 1
if frames_to_process <= 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid frame range"
return
tasks[task_id].total = frames_to_process
os.makedirs(req.output_dir, exist_ok=True)
output_msgpack_path = os.path.join(req.output_dir, "detections.msgpack")
if req.start_frame > 0:
seek_ok = cap.set(cv2.CAP_PROP_POS_FRAMES, req.start_frame)
if not seek_ok:
for _ in range(req.start_frame):
ret, _ = cap.read()
if not ret:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = (
f"Failed to seek to start frame: {req.start_frame}"
)
return
frame_buffer: List[np.ndarray] = []
frame_detections: List[List[List[float]]] = []
batch_size = 5
current_count = 0
def process_batch():
nonlocal current_count
if not frame_buffer:
return
batch_detections = detector.detect_batch(frame_buffer)
for detections in batch_detections:
packed_detections: List[List[float]] = []
for x, y, w, h, conf in detections:
# bboxをそのまま保存表示スケールはBake時に適用
bx, by, bw, bh = int(x), int(y), int(w), int(h)
bx = max(0, bx)
by = max(0, by)
bw = min(width - bx, bw)
bh = min(height - by, bh)
if bw <= 0 or bh <= 0:
continue
packed_detections.append([bx, by, bw, bh, float(conf)])
frame_detections.append(packed_detections)
current_count += 1
tasks[task_id].progress = current_count
frame_buffer.clear()
print(
f"Starting detection cache generation: {req.video_path} "
f"({frames_to_process} frames) -> {output_msgpack_path}"
)
for _ in range(req.start_frame, end_frame + 1):
if cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user"
break
ret, frame = cap.read()
if not ret:
break
frame_buffer.append(frame)
if len(frame_buffer) >= batch_size:
process_batch()
if frame_buffer:
process_batch()
if tasks[task_id].status == TaskStatus.PROCESSING:
payload = {
"version": 1,
"video_path": req.video_path,
"start_frame": req.start_frame,
"end_frame": req.start_frame + len(frame_detections) - 1,
"width": width,
"height": height,
"fps": fps,
"mask_scale": 1.0,
"frames": frame_detections,
}
with open(output_msgpack_path, "wb") as f:
f.write(msgpack.packb(payload, use_bin_type=True))
tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = output_msgpack_path
tasks[task_id].message = "Detection cache completed"
print(f"Task {task_id} completed: {output_msgpack_path}")
except Exception as e:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = str(e)
print(f"Error in task {task_id}: {e}")
traceback.print_exc()
finally:
if cap:
cap.release()
if task_id in cancel_events:
del cancel_events[task_id]
def process_bake_task(task_id: str, req: BakeRequest):
"""Bake blur using async pipeline: read/process/write run in parallel for 1.35x speedup."""
try:
tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id)
if not os.path.exists(req.video_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Video not found: {req.video_path}"
return
if not os.path.exists(req.detections_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Detections file not found: {req.detections_path}"
return
with open(req.detections_path, "rb") as f:
payload = msgpack.unpackb(f.read(), raw=False)
frames_detections = payload.get("frames")
if not isinstance(frames_detections, list):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid detections format: 'frames' is missing"
return
# 検出キャッシュの開始フレーム(ソース動画のフレームインデックス)
det_start_frame = int(payload.get("start_frame", 0))
# Get video info
temp_cap = cv2.VideoCapture(req.video_path)
if not temp_cap.isOpened():
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open source video"
return
src_fps = temp_cap.get(cv2.CAP_PROP_FPS) or 30.0
src_width = int(temp_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
src_height = int(temp_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
src_frames = int(temp_cap.get(cv2.CAP_PROP_FRAME_COUNT))
temp_cap.release()
# ffprobe で r_frame_rate を取得し、出力コンテナの宣言 FPS をソースに合わせる。
# 例: 120fps タイムベースで記録された 60fps 動画は r_frame_rate=120/1 だが
# cv2 は avg_frame_rate=60fps を返すため、Bake 後に Blender がFPSを別値で認識してしまう。
r_fps_float, r_fps_str = _get_r_frame_rate(req.video_path)
if r_fps_float > 0:
print(f"[FaceMask] r_frame_rate={r_fps_str}, avg_fps={src_fps:.3f}")
else:
r_fps_str = ""
if src_width <= 0 or src_height <= 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid source video dimensions"
return
# ソース動画の全フレームを出力(スワップ後もトリム設定が正しく機能するよう)
total = src_frames if src_frames > 0 else (det_start_frame + len(frames_detections))
if total <= 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Source/detections frame count is zero"
return
tasks[task_id].total = total
output_dir = os.path.dirname(req.output_path)
if output_dir:
os.makedirs(output_dir, exist_ok=True)
# Pipeline setup
blur_size = max(1, int(req.blur_size))
if blur_size % 2 == 0:
blur_size += 1
display_scale = max(0.1, float(req.display_scale))
# blur_margin は境界問題回避のための計算用余白のみ(表示には使わない)
blur_margin = blur_size // 2
# Queues
queue_size = 8
read_queue: queue.Queue = queue.Queue(maxsize=queue_size)
process_queue: queue.Queue = queue.Queue(maxsize=queue_size)
# Shared state
error_holder = {"error": None}
progress_lock = threading.Lock()
current_progress = [0]
def _reader_worker():
"""Read frames from video."""
import time as _time
cap = cv2.VideoCapture(req.video_path)
if not cap.isOpened():
error_holder["error"] = "Failed to open video in reader"
return
t_read_total = 0.0
frame_count = 0
try:
for idx in range(total):
if cancel_event and cancel_event.is_set():
break
t0 = _time.perf_counter()
ok, frame = cap.read()
t_read_total += _time.perf_counter() - t0
if not ok:
break
read_queue.put((idx, frame))
frame_count += 1
except Exception as e:
error_holder["error"] = f"Reader error: {e}"
finally:
cap.release()
read_queue.put(None) # Sentinel
if frame_count > 0:
print(
f"[Perf/Reader] FINAL frame={frame_count}"
f" read_avg={t_read_total/frame_count*1000:.1f}ms"
f" throughput≈{frame_count/max(t_read_total,1e-9):.1f}fps"
)
def _processor_worker():
"""Process frames with ROI blur."""
import time as _time
t_wait_total = 0.0
t_blur_total = 0.0
t_blend_total = 0.0
frame_count = 0
REPORT_INTERVAL = 50
try:
while True:
if cancel_event and cancel_event.is_set():
process_queue.put(None)
break
t0 = _time.perf_counter()
item = read_queue.get()
t_wait_total += _time.perf_counter() - t0
if item is None:
process_queue.put(None)
break
idx, frame = item
det_idx = idx - det_start_frame
frame_boxes = frames_detections[det_idx] if 0 <= det_idx < len(frames_detections) else []
if not frame_boxes:
process_queue.put((idx, frame))
frame_count += 1
continue
# 各人物ごとに個別ROIで処理全員まとめると離れた人物間が巨大ROIになるため
valid_boxes = []
for box in frame_boxes:
if not isinstance(box, list) or len(box) < 4:
continue
x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
if w <= 0 or h <= 0:
continue
valid_boxes.append((x, y, w, h))
if not valid_boxes:
process_queue.put((idx, frame))
frame_count += 1
continue
for x, y, w, h in valid_boxes:
# display_scale で表示サイズを決定
cx = x + w / 2
cy = y + h / 2
dw = max(1, int(w * display_scale))
dh = max(1, int(h * display_scale))
dx = int(cx - dw / 2)
dy = int(cy - dh / 2)
# ROIは表示サイズ + blur_margin計算用余白、境界問題回避のみ
roi_x1 = max(0, dx - blur_margin)
roi_y1 = max(0, dy - blur_margin)
roi_x2 = min(src_width, dx + dw + blur_margin)
roi_y2 = min(src_height, dy + dh + blur_margin)
roi_width = roi_x2 - roi_x1
roi_height = roi_y2 - roi_y1
if roi_width <= 0 or roi_height <= 0:
continue
# ブラーはROI全体で計算余白があるので端の精度が保証される
roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
# ダウンサンプル→blur→アップサンプル同等のぼかしを1/4の計算量で実現
t1 = _time.perf_counter()
small_w = max(1, roi_width // 2)
small_h = max(1, roi_height // 2)
roi_small = cv2.resize(roi_src, (small_w, small_h), interpolation=cv2.INTER_LINEAR)
small_blur_size = max(3, (blur_size // 2) | 1)
roi_small_blurred = cv2.GaussianBlur(roi_small, (small_blur_size, small_blur_size), 0)
roi_blurred = cv2.resize(roi_small_blurred, (roi_width, roi_height), interpolation=cv2.INTER_LINEAR)
t_blur_total += _time.perf_counter() - t1
# 合成マスクはdisplay_scaleサイズの楕円のみfeatheringなし
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
center = (int(cx) - roi_x1, int(cy) - roi_y1)
axes = (max(1, dw // 2), max(1, dh // 2))
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
# バイナリマスクなのでcopyToで高速合成float32変換不要
t2 = _time.perf_counter()
result = roi_src.copy()
cv2.copyTo(roi_blurred, roi_mask, result)
frame[roi_y1:roi_y2, roi_x1:roi_x2] = result
t_blend_total += _time.perf_counter() - t2
process_queue.put((idx, frame))
frame_count += 1
if frame_count % REPORT_INTERVAL == 0:
n = max(frame_count, 1)
fps_proc = frame_count / max(t_wait_total + t_blur_total + t_blend_total, 1e-9)
print(
f"[Perf/Processor] frame={frame_count}"
f" wait={t_wait_total/n*1000:.1f}ms"
f" blur={t_blur_total/n*1000:.1f}ms"
f" blend={t_blend_total/n*1000:.1f}ms"
f" ROI={roi_width}x{roi_height}"
f" throughput≈{fps_proc:.1f}fps"
)
except Exception as e:
error_holder["error"] = f"Processor error: {e}"
process_queue.put(None)
finally:
if frame_count > 0:
n = max(frame_count, 1)
print(
f"[Perf/Processor] FINAL frame={frame_count}"
f" wait_avg={t_wait_total/n*1000:.1f}ms"
f" blur_avg={t_blur_total/n*1000:.1f}ms"
f" blend_avg={t_blend_total/n*1000:.1f}ms"
)
def _writer_worker():
"""Write frames to output."""
import time as _time
t_wait_total = 0.0
t_write_total = 0.0
frame_count = 0
writer = None
try:
writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height, r_fps_str)
while True:
if cancel_event and cancel_event.is_set():
break
t0 = _time.perf_counter()
item = process_queue.get()
t_wait_total += _time.perf_counter() - t0
if item is None:
break
idx, frame = item
t1 = _time.perf_counter()
writer.write(frame)
t_write_total += _time.perf_counter() - t1
frame_count += 1
with progress_lock:
current_progress[0] = idx + 1
tasks[task_id].progress = current_progress[0]
except Exception as e:
error_holder["error"] = f"Writer error: {e}"
finally:
if writer:
try:
writer.release()
except Exception as e:
print(f"[FaceMask] Writer release error: {e}")
if frame_count > 0:
n = max(frame_count, 1)
print(
f"[Perf/Writer] FINAL frame={frame_count}"
f" wait_avg={t_wait_total/n*1000:.1f}ms"
f" write_avg={t_write_total/n*1000:.1f}ms"
)
print(
f"[FaceMask] Starting blur bake: {req.video_path} + "
f"{req.detections_path} -> {req.output_path}"
)
# Start threads
reader_thread = threading.Thread(target=_reader_worker, daemon=True)
processor_thread = threading.Thread(target=_processor_worker, daemon=True)
writer_thread = threading.Thread(target=_writer_worker, daemon=True)
reader_thread.start()
processor_thread.start()
writer_thread.start()
# Wait for completion
reader_thread.join()
processor_thread.join()
writer_thread.join()
if error_holder["error"]:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = error_holder["error"]
print(f"[FaceMask] Bake failed: {error_holder['error']}")
elif cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user"
else:
tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = req.output_path
tasks[task_id].message = "Blur bake completed"
print(f"[FaceMask] Bake completed: {req.output_path}")
except Exception as e:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = str(e)
print(f"Error in async bake task {task_id}: {e}")
traceback.print_exc()
finally:
if task_id in cancel_events:
del cancel_events[task_id]
def check_gpu_available() -> dict:
"""
Check if GPU is available for inference.
Returns a dict with GPU information:
{
"available": bool,
"device_name": str or None,
"device_count": int,
"rocm_version": str or None
}
"""
global _gpu_status_cache
# Return cached result if available
if _gpu_status_cache is not None:
return _gpu_status_cache
result = {
"available": False,
"device_name": None,
"device_count": 0,
"rocm_version": None
}
try:
import torch
result["available"] = torch.cuda.is_available()
if result["available"]:
result["device_count"] = torch.cuda.device_count()
if result["device_count"] > 0:
result["device_name"] = torch.cuda.get_device_name(0)
if hasattr(torch.version, 'hip'):
result["rocm_version"] = torch.version.hip
except Exception as e:
print(f"[FaceMask] Warning: GPU detection failed: {e}")
result["available"] = False
# Cache the result
_gpu_status_cache = result
return result
def log_startup_diagnostics():
"""Log diagnostic information about the environment and GPU."""
print("=" * 70)
print("[FaceMask Server] Startup Diagnostics")
print("=" * 70)
# Python Environment
print("\n[Python Environment]")
print(f" Python Version: {sys.version.split()[0]}")
print(f" Python Executable: {sys.executable}")
print(f" Platform: {platform.platform()}")
print(f" Working Directory: {os.getcwd()}")
# Check if in venv
in_venv = sys.prefix != sys.base_prefix
print(f" Virtual Environment: {'Yes' if in_venv else 'No'}")
if in_venv:
print(f" venv path: {sys.prefix}")
# ROCm Environment Variables
print("\n[ROCm Environment Variables]")
rocm_vars = [
'ROCM_PATH',
'HSA_OVERRIDE_GFX_VERSION',
'PYTORCH_ROCM_ARCH',
'ROCBLAS_TENSILE_LIBPATH',
'LD_LIBRARY_PATH'
]
for var in rocm_vars:
value = os.environ.get(var)
if value:
# For LD_LIBRARY_PATH, show if ROCm paths are included
if var == 'LD_LIBRARY_PATH':
has_rocm = 'rocm' in value.lower() or 'clr-' in value
has_hip = 'hip' in value.lower()
print(f" {var}: {value[:100]}...")
print(f" Contains ROCm paths: {has_rocm}")
print(f" Contains HIP paths: {has_hip}")
if not has_rocm:
print(" ⚠️ WARNING: ROCm library paths not found!")
else:
if len(value) > 200:
display_value = value[:200] + "... (truncated)"
else:
display_value = value
print(f" {var}: {display_value}")
else:
print(f" {var}: (not set)")
# GPU Detection
print("\n[GPU Detection]")
try:
import torch
cuda_available = torch.cuda.is_available()
print(f" torch.cuda.is_available(): {cuda_available}")
if cuda_available:
device_count = torch.cuda.device_count()
print(f" GPU Device Count: {device_count}")
if device_count > 0:
device_name = torch.cuda.get_device_name(0)
print(f" GPU Device 0: {device_name}")
# ROCm version
if hasattr(torch.version, 'hip'):
print(f" ROCm Version (HIP): {torch.version.hip}")
# CUDA version (might be emulated by ROCm)
if torch.version.cuda:
print(f" CUDA Version: {torch.version.cuda}")
else:
print(" WARNING: GPU not detected!")
print(" Server will use CPU for inference (slower)")
print(" Troubleshooting:")
print(" - Check ROCm environment variables above")
print(" - Run: python -c 'import torch; print(torch.cuda.is_available())'")
except ImportError as e:
print(f" ERROR: Cannot import torch: {e}")
print(" PyTorch must be installed for inference")
except Exception as e:
print(f" ERROR during GPU detection: {e}")
print("=" * 70)
print()
@app.get("/status")
def get_status():
gpu_info = check_gpu_available()
return {
"status": "running",
"gpu_available": gpu_info["available"],
"gpu_device": gpu_info["device_name"],
"gpu_count": gpu_info["device_count"],
"rocm_version": gpu_info["rocm_version"]
}
@app.post("/video_info")
def get_video_info(req: VideoInfoRequest):
if not os.path.exists(req.video_path):
raise HTTPException(status_code=404, detail=f"Video not found: {req.video_path}")
cap = cv2.VideoCapture(req.video_path)
if not cap.isOpened():
raise HTTPException(status_code=400, detail="Failed to open video")
try:
avg_fps = float(cap.get(cv2.CAP_PROP_FPS) or 0.0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
finally:
cap.release()
# Blender は r_frame_rate でタイムライン配置を計算するため、
# cv2 の avg_frame_rate ではなく r_frame_rate を fps として返す。
# 例: 120fps タイムベース記録の 60fps 動画で r_frame_rate=120 を返すことで
# compute_strip_frame_range の fps_ratio が Blender の解釈と一致する。
r_fps_float, _ = _get_r_frame_rate(req.video_path)
fps = r_fps_float if r_fps_float > 0 else avg_fps
return {
"video_path": req.video_path,
"fps": fps,
"width": width,
"height": height,
"frame_count": frame_count,
}
@app.post("/generate", response_model=Task)
def generate_mask_endpoint(req: GenerateRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
task = Task(id=task_id, status=TaskStatus.PENDING)
tasks[task_id] = task
cancel_events[task_id] = threading.Event()
background_tasks.add_task(process_video_task, task_id, req)
return task
@app.post("/bake_blur", response_model=Task)
def bake_blur_endpoint(req: BakeRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
task = Task(id=task_id, status=TaskStatus.PENDING)
tasks[task_id] = task
cancel_events[task_id] = threading.Event()
background_tasks.add_task(process_bake_task, task_id, req)
return task
@app.post("/generate_images", response_model=Task)
def generate_images_endpoint(req: GenerateImagesRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
task = Task(id=task_id, status=TaskStatus.PENDING)
tasks[task_id] = task
cancel_events[task_id] = threading.Event()
background_tasks.add_task(process_images_task, task_id, req)
return task
@app.post("/augment_pose", response_model=Task)
def augment_pose_endpoint(req: AugmentPoseRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
task = Task(id=task_id, status=TaskStatus.PENDING)
tasks[task_id] = task
cancel_events[task_id] = threading.Event()
background_tasks.add_task(augment_pose_task, task_id, req)
return task
@app.post("/bake_image_blur", response_model=Task)
def bake_image_blur_endpoint(req: BakeImagesRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
task = Task(id=task_id, status=TaskStatus.PENDING)
tasks[task_id] = task
cancel_events[task_id] = threading.Event()
background_tasks.add_task(process_bake_images_task, task_id, req)
return task
@app.get("/tasks/{task_id}", response_model=Task)
def get_task(task_id: str):
if task_id not in tasks:
raise HTTPException(status_code=404, detail="Task not found")
return tasks[task_id]
@app.post("/tasks/{task_id}/cancel")
def cancel_task(task_id: str):
if task_id not in tasks:
raise HTTPException(status_code=404, detail="Task not found")
if task_id in cancel_events:
cancel_events[task_id].set()
return {"message": "Cancellation requested"}
if __name__ == "__main__":
log_startup_diagnostics()
uvicorn.run(app, host="127.0.0.1", port=8181)