blender-mask-peoples/server/main.py

632 lines
21 KiB
Python

"""
Face Detection Inference Server.
This FastAPI application runs in a separate process to handle
GPU-accelerated face detection using ONNX Runtime.
"""
import os
import sys
import platform
# CRITICAL: Fix LD_LIBRARY_PATH before importing cv2 or torch
# cv2 adds its own lib path to the front, which can override ROCm libraries
def fix_library_path():
"""Ensure ROCm libraries are loaded before cv2's bundled libraries."""
ld_path = os.environ.get('LD_LIBRARY_PATH', '')
# Split and filter paths
paths = [p for p in ld_path.split(':') if p]
# Separate ROCm/GPU paths from other paths
rocm_paths = [p for p in paths if 'rocm' in p.lower() or 'clr-' in p or 'hip' in p.lower()]
other_paths = [p for p in paths if p not in rocm_paths]
# Rebuild with ROCm paths first
if rocm_paths:
new_ld_path = ':'.join(rocm_paths + other_paths)
os.environ['LD_LIBRARY_PATH'] = new_ld_path
print(f"[FaceMask] Fixed LD_LIBRARY_PATH to prioritize ROCm libraries")
# Fix library path BEFORE any other imports
fix_library_path()
import threading
import uuid
import traceback
from typing import Dict, Optional, List
from pathlib import Path
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
import uvicorn
import cv2
import numpy as np
import msgpack
# Add project root to path for imports if needed
sys.path.append(str(Path(__file__).parent.parent))
from server.detector import YOLOFaceDetector, get_detector
app = FastAPI(title="Face Mask Inference Server")
# GPU status cache
_gpu_status_cache = None
# Task storage
class TaskStatus:
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class Task(BaseModel):
id: str
status: str
progress: int = 0
total: int = 0
message: Optional[str] = None
result_path: Optional[str] = None
# In-memory storage
tasks: Dict[str, Task] = {}
cancel_events: Dict[str, threading.Event] = {}
class GenerateRequest(BaseModel):
video_path: str
output_dir: str
start_frame: int
end_frame: int
conf_threshold: float = 0.5
iou_threshold: float = 0.45
mask_scale: float = 1.5
class BakeRequest(BaseModel):
video_path: str
detections_path: str
output_path: str
blur_size: int = 50
format: str = "mp4"
def _build_video_writer(
output_path: str,
fmt: str,
fps: float,
width: int,
height: int,
) -> cv2.VideoWriter:
"""Create VideoWriter with codec fallback per format."""
format_key = fmt.lower()
codec_candidates = {
"mp4": ["avc1", "mp4v"],
"mov": ["avc1", "mp4v"],
"avi": ["MJPG", "XVID"],
}.get(format_key, ["mp4v"])
for codec in codec_candidates:
writer = cv2.VideoWriter(
output_path,
cv2.VideoWriter_fourcc(*codec),
fps,
(width, height),
isColor=True,
)
if writer.isOpened():
print(f"[FaceMask] Using output codec: {codec}")
return writer
writer.release()
raise RuntimeError(f"Failed to create video writer for format='{fmt}'")
def _scale_bbox(
x: int,
y: int,
w: int,
h: int,
scale: float,
frame_width: int,
frame_height: int,
) -> Optional[List[int]]:
"""Scale bbox around center and clamp to frame boundaries."""
if w <= 0 or h <= 0:
return None
center_x = x + (w * 0.5)
center_y = y + (h * 0.5)
scaled_w = max(1, int(w * scale))
scaled_h = max(1, int(h * scale))
x1 = max(0, int(center_x - scaled_w * 0.5))
y1 = max(0, int(center_y - scaled_h * 0.5))
x2 = min(frame_width, x1 + scaled_w)
y2 = min(frame_height, y1 + scaled_h)
out_w = x2 - x1
out_h = y2 - y1
if out_w <= 0 or out_h <= 0:
return None
return [x1, y1, out_w, out_h]
def process_video_task(task_id: str, req: GenerateRequest):
"""Background task to detect faces and save bbox cache as msgpack."""
cap = None
try:
tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id)
if not os.path.exists(req.video_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Video not found: {req.video_path}"
return
print(f"Loading detector for task {task_id}...")
detector = get_detector(
conf_threshold=req.conf_threshold,
iou_threshold=req.iou_threshold,
)
_ = detector.model
cap = cv2.VideoCapture(req.video_path)
if not cap.isOpened():
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open video"
return
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
end_frame = min(req.end_frame, total_video_frames - 1)
frames_to_process = end_frame - req.start_frame + 1
if frames_to_process <= 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid frame range"
return
tasks[task_id].total = frames_to_process
os.makedirs(req.output_dir, exist_ok=True)
output_msgpack_path = os.path.join(req.output_dir, "detections.msgpack")
if req.start_frame > 0:
seek_ok = cap.set(cv2.CAP_PROP_POS_FRAMES, req.start_frame)
if not seek_ok:
for _ in range(req.start_frame):
ret, _ = cap.read()
if not ret:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = (
f"Failed to seek to start frame: {req.start_frame}"
)
return
frame_buffer: List[np.ndarray] = []
frame_detections: List[List[List[float]]] = []
batch_size = 5
current_count = 0
def process_batch():
nonlocal current_count
if not frame_buffer:
return
batch_detections = detector.detect_batch(frame_buffer)
for detections in batch_detections:
packed_detections: List[List[float]] = []
for x, y, w, h, conf in detections:
scaled = _scale_bbox(
int(x),
int(y),
int(w),
int(h),
float(req.mask_scale),
width,
height,
)
if scaled is None:
continue
packed_detections.append(
[scaled[0], scaled[1], scaled[2], scaled[3], float(conf)]
)
frame_detections.append(packed_detections)
current_count += 1
tasks[task_id].progress = current_count
frame_buffer.clear()
print(
f"Starting detection cache generation: {req.video_path} "
f"({frames_to_process} frames) -> {output_msgpack_path}"
)
for _ in range(req.start_frame, end_frame + 1):
if cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user"
break
ret, frame = cap.read()
if not ret:
break
frame_buffer.append(frame)
if len(frame_buffer) >= batch_size:
process_batch()
if frame_buffer:
process_batch()
if tasks[task_id].status == TaskStatus.PROCESSING:
payload = {
"version": 1,
"video_path": req.video_path,
"start_frame": req.start_frame,
"end_frame": req.start_frame + len(frame_detections) - 1,
"width": width,
"height": height,
"fps": fps,
"mask_scale": float(req.mask_scale),
"frames": frame_detections,
}
with open(output_msgpack_path, "wb") as f:
f.write(msgpack.packb(payload, use_bin_type=True))
tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = output_msgpack_path
tasks[task_id].message = "Detection cache completed"
print(f"Task {task_id} completed: {output_msgpack_path}")
except Exception as e:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = str(e)
print(f"Error in task {task_id}: {e}")
traceback.print_exc()
finally:
if cap:
cap.release()
if task_id in cancel_events:
del cancel_events[task_id]
def process_bake_task(task_id: str, req: BakeRequest):
"""Background task to bake blur using bbox detections in msgpack."""
src_cap = None
writer = None
try:
tasks[task_id].status = TaskStatus.PROCESSING
cancel_event = cancel_events.get(task_id)
if not os.path.exists(req.video_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Video not found: {req.video_path}"
return
if not os.path.exists(req.detections_path):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = f"Detections file not found: {req.detections_path}"
return
src_cap = cv2.VideoCapture(req.video_path)
if not src_cap.isOpened():
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Failed to open source video"
return
with open(req.detections_path, "rb") as f:
payload = msgpack.unpackb(f.read(), raw=False)
frames = payload.get("frames")
if not isinstance(frames, list):
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid detections format: 'frames' is missing"
return
src_fps = src_cap.get(cv2.CAP_PROP_FPS) or 30.0
src_width = int(src_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
src_height = int(src_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
src_frames = int(src_cap.get(cv2.CAP_PROP_FRAME_COUNT))
if src_width <= 0 or src_height <= 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Invalid source video dimensions"
return
total = min(src_frames, len(frames)) if src_frames > 0 else len(frames)
if total <= 0:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = "Source/detections frame count is zero"
return
tasks[task_id].total = total
output_dir = os.path.dirname(req.output_path)
if output_dir:
os.makedirs(output_dir, exist_ok=True)
writer = _build_video_writer(req.output_path, req.format, src_fps, src_width, src_height)
blur_size = max(1, int(req.blur_size))
if blur_size % 2 == 0:
blur_size += 1
feather_radius = max(3, min(25, blur_size // 3))
feather_kernel = feather_radius * 2 + 1
print(
f"[FaceMask] Starting blur bake (bbox-msgpack): {req.video_path} + "
f"{req.detections_path} -> {req.output_path}"
)
for idx in range(total):
if cancel_event and cancel_event.is_set():
tasks[task_id].status = TaskStatus.CANCELLED
tasks[task_id].message = "Cancelled by user"
break
src_ok, src_frame = src_cap.read()
if not src_ok:
break
frame_boxes = frames[idx] if idx < len(frames) else []
if not frame_boxes:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
mask_gray = np.zeros((src_height, src_width), dtype=np.uint8)
for box in frame_boxes:
if not isinstance(box, list) or len(box) < 4:
continue
x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
if w <= 0 or h <= 0:
continue
center = (x + w // 2, y + h // 2)
axes = (max(1, w // 2), max(1, h // 2))
cv2.ellipse(mask_gray, center, axes, 0, 0, 360, 255, -1)
if cv2.countNonZero(mask_gray) == 0:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
mask_gray = cv2.GaussianBlur(mask_gray, (feather_kernel, feather_kernel), 0)
_, mask_binary = cv2.threshold(mask_gray, 2, 255, cv2.THRESH_BINARY)
non_zero_coords = cv2.findNonZero(mask_binary)
if non_zero_coords is None:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
x, y, w, h = cv2.boundingRect(non_zero_coords)
blur_margin = max(1, (blur_size // 2) + feather_radius)
x1 = max(0, x - blur_margin)
y1 = max(0, y - blur_margin)
x2 = min(src_width, x + w + blur_margin)
y2 = min(src_height, y + h + blur_margin)
roi_src = src_frame[y1:y2, x1:x2]
roi_mask = mask_gray[y1:y2, x1:x2]
if roi_src.size == 0:
writer.write(src_frame)
tasks[task_id].progress = idx + 1
continue
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
roi_blurred.astype(np.float32) * roi_alpha
)
output_frame = src_frame.copy()
output_frame[y1:y2, x1:x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
writer.write(output_frame)
tasks[task_id].progress = idx + 1
if tasks[task_id].status == TaskStatus.PROCESSING:
tasks[task_id].status = TaskStatus.COMPLETED
tasks[task_id].result_path = req.output_path
tasks[task_id].message = "Blur bake completed"
print(f"[FaceMask] Bake completed: {req.output_path}")
except Exception as e:
tasks[task_id].status = TaskStatus.FAILED
tasks[task_id].message = str(e)
print(f"Error in bake task {task_id}: {e}")
traceback.print_exc()
finally:
if src_cap:
src_cap.release()
if writer:
writer.release()
if task_id in cancel_events:
del cancel_events[task_id]
def check_gpu_available() -> dict:
"""
Check if GPU is available for inference.
Returns a dict with GPU information:
{
"available": bool,
"device_name": str or None,
"device_count": int,
"rocm_version": str or None
}
"""
global _gpu_status_cache
# Return cached result if available
if _gpu_status_cache is not None:
return _gpu_status_cache
result = {
"available": False,
"device_name": None,
"device_count": 0,
"rocm_version": None
}
try:
import torch
result["available"] = torch.cuda.is_available()
if result["available"]:
result["device_count"] = torch.cuda.device_count()
if result["device_count"] > 0:
result["device_name"] = torch.cuda.get_device_name(0)
if hasattr(torch.version, 'hip'):
result["rocm_version"] = torch.version.hip
except Exception as e:
print(f"[FaceMask] Warning: GPU detection failed: {e}")
result["available"] = False
# Cache the result
_gpu_status_cache = result
return result
def log_startup_diagnostics():
"""Log diagnostic information about the environment and GPU."""
print("=" * 70)
print("[FaceMask Server] Startup Diagnostics")
print("=" * 70)
# Python Environment
print("\n[Python Environment]")
print(f" Python Version: {sys.version.split()[0]}")
print(f" Python Executable: {sys.executable}")
print(f" Platform: {platform.platform()}")
print(f" Working Directory: {os.getcwd()}")
# Check if in venv
in_venv = sys.prefix != sys.base_prefix
print(f" Virtual Environment: {'Yes' if in_venv else 'No'}")
if in_venv:
print(f" venv path: {sys.prefix}")
# ROCm Environment Variables
print("\n[ROCm Environment Variables]")
rocm_vars = [
'ROCM_PATH',
'HSA_OVERRIDE_GFX_VERSION',
'PYTORCH_ROCM_ARCH',
'ROCBLAS_TENSILE_LIBPATH',
'LD_LIBRARY_PATH'
]
for var in rocm_vars:
value = os.environ.get(var)
if value:
# For LD_LIBRARY_PATH, show if ROCm paths are included
if var == 'LD_LIBRARY_PATH':
has_rocm = 'rocm' in value.lower() or 'clr-' in value
has_hip = 'hip' in value.lower()
print(f" {var}: {value[:100]}...")
print(f" Contains ROCm paths: {has_rocm}")
print(f" Contains HIP paths: {has_hip}")
if not has_rocm:
print(f" ⚠️ WARNING: ROCm library paths not found!")
else:
if len(value) > 200:
display_value = value[:200] + "... (truncated)"
else:
display_value = value
print(f" {var}: {display_value}")
else:
print(f" {var}: (not set)")
# GPU Detection
print("\n[GPU Detection]")
try:
import torch
cuda_available = torch.cuda.is_available()
print(f" torch.cuda.is_available(): {cuda_available}")
if cuda_available:
device_count = torch.cuda.device_count()
print(f" GPU Device Count: {device_count}")
if device_count > 0:
device_name = torch.cuda.get_device_name(0)
print(f" GPU Device 0: {device_name}")
# ROCm version
if hasattr(torch.version, 'hip'):
print(f" ROCm Version (HIP): {torch.version.hip}")
# CUDA version (might be emulated by ROCm)
if torch.version.cuda:
print(f" CUDA Version: {torch.version.cuda}")
else:
print(" WARNING: GPU not detected!")
print(" Server will use CPU for inference (slower)")
print(" Troubleshooting:")
print(" - Check ROCm environment variables above")
print(" - Run: python -c 'import torch; print(torch.cuda.is_available())'")
except ImportError as e:
print(f" ERROR: Cannot import torch: {e}")
print(" PyTorch must be installed for inference")
except Exception as e:
print(f" ERROR during GPU detection: {e}")
print("=" * 70)
print()
@app.get("/status")
def get_status():
gpu_info = check_gpu_available()
return {
"status": "running",
"gpu_available": gpu_info["available"],
"gpu_device": gpu_info["device_name"],
"gpu_count": gpu_info["device_count"],
"rocm_version": gpu_info["rocm_version"]
}
@app.post("/generate", response_model=Task)
def generate_mask_endpoint(req: GenerateRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
task = Task(id=task_id, status=TaskStatus.PENDING)
tasks[task_id] = task
cancel_events[task_id] = threading.Event()
background_tasks.add_task(process_video_task, task_id, req)
return task
@app.post("/bake_blur", response_model=Task)
def bake_blur_endpoint(req: BakeRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
task = Task(id=task_id, status=TaskStatus.PENDING)
tasks[task_id] = task
cancel_events[task_id] = threading.Event()
background_tasks.add_task(process_bake_task, task_id, req)
return task
@app.get("/tasks/{task_id}", response_model=Task)
def get_task(task_id: str):
if task_id not in tasks:
raise HTTPException(status_code=404, detail="Task not found")
return tasks[task_id]
@app.post("/tasks/{task_id}/cancel")
def cancel_task(task_id: str):
if task_id not in tasks:
raise HTTPException(status_code=404, detail="Task not found")
if task_id in cancel_events:
cancel_events[task_id].set()
return {"message": "Cancellation requested"}
if __name__ == "__main__":
log_startup_diagnostics()
uvicorn.run(app, host="127.0.0.1", port=8181)