Compare commits

...

3 Commits

Author SHA1 Message Date
da9de60697 Blurサイズ問題の修正 2026-02-19 09:45:05 +09:00
9ce6ec99d3 fix: ROIの実装ミス 2026-02-18 20:21:53 +09:00
08f20fa6fe Change model: face -> pose 2026-02-18 20:18:53 +09:00
9 changed files with 210 additions and 168 deletions

View File

@ -40,15 +40,6 @@ def register():
step=0.01, step=0.01,
) )
bpy.types.Scene.facemask_mask_scale = FloatProperty(
name="Mask Scale",
description="Scale factor for mask region (1.0 = exact face size)",
default=1.5,
min=1.0,
max=3.0,
step=0.1,
)
bpy.types.Scene.facemask_cache_dir = StringProperty( bpy.types.Scene.facemask_cache_dir = StringProperty(
name="Cache Directory", name="Cache Directory",
description="Optional cache root directory (empty = default .mask_cache)", description="Optional cache root directory (empty = default .mask_cache)",
@ -64,6 +55,15 @@ def register():
max=501, max=501,
) )
bpy.types.Scene.facemask_bake_display_scale = FloatProperty(
name="Mask Scale",
description="Scale factor for the blur mask ellipse at bake time (1.0 = raw detection size)",
default=1.3,
min=0.5,
max=3.0,
step=0.1,
)
bpy.types.Scene.facemask_bake_format = EnumProperty( bpy.types.Scene.facemask_bake_format = EnumProperty(
name="Bake Format", name="Bake Format",
description="Output format for baked blur video", description="Output format for baked blur video",
@ -91,9 +91,9 @@ def unregister():
# Unregister scene properties # Unregister scene properties
del bpy.types.Scene.facemask_conf_threshold del bpy.types.Scene.facemask_conf_threshold
del bpy.types.Scene.facemask_iou_threshold del bpy.types.Scene.facemask_iou_threshold
del bpy.types.Scene.facemask_mask_scale
del bpy.types.Scene.facemask_cache_dir del bpy.types.Scene.facemask_cache_dir
del bpy.types.Scene.facemask_bake_blur_size del bpy.types.Scene.facemask_bake_blur_size
del bpy.types.Scene.facemask_bake_display_scale
del bpy.types.Scene.facemask_bake_format del bpy.types.Scene.facemask_bake_format

View File

@ -32,6 +32,7 @@ class AsyncBakeGenerator:
detections_path: str, detections_path: str,
output_path: str, output_path: str,
blur_size: int, blur_size: int,
display_scale: float,
fmt: str, fmt: str,
on_complete: Optional[Callable] = None, on_complete: Optional[Callable] = None,
on_progress: Optional[Callable] = None, on_progress: Optional[Callable] = None,
@ -53,7 +54,7 @@ class AsyncBakeGenerator:
self.worker_thread = threading.Thread( self.worker_thread = threading.Thread(
target=self._worker, target=self._worker,
args=(video_path, detections_path, output_path, blur_size, fmt), args=(video_path, detections_path, output_path, blur_size, display_scale, fmt),
daemon=True, daemon=True,
) )
self.worker_thread.start() self.worker_thread.start()
@ -75,6 +76,7 @@ class AsyncBakeGenerator:
detections_path: str, detections_path: str,
output_path: str, output_path: str,
blur_size: int, blur_size: int,
display_scale: float,
fmt: str, fmt: str,
): ):
import time import time
@ -88,6 +90,7 @@ class AsyncBakeGenerator:
detections_path=detections_path, detections_path=detections_path,
output_path=output_path, output_path=output_path,
blur_size=blur_size, blur_size=blur_size,
display_scale=display_scale,
fmt=fmt, fmt=fmt,
) )

View File

@ -44,7 +44,6 @@ class AsyncMaskGenerator:
fps: float, fps: float,
conf_threshold: float = 0.5, conf_threshold: float = 0.5,
iou_threshold: float = 0.45, iou_threshold: float = 0.45,
mask_scale: float = 1.5,
on_complete: Optional[Callable] = None, on_complete: Optional[Callable] = None,
on_progress: Optional[Callable] = None, on_progress: Optional[Callable] = None,
): ):
@ -94,7 +93,6 @@ class AsyncMaskGenerator:
fps, fps,
conf_threshold, conf_threshold,
iou_threshold, iou_threshold,
mask_scale,
), ),
daemon=True, daemon=True,
) )
@ -121,7 +119,6 @@ class AsyncMaskGenerator:
fps: float, fps: float,
conf_threshold: float, conf_threshold: float,
iou_threshold: float, iou_threshold: float,
mask_scale: float,
): ):
""" """
Worker thread function. Delegates to inference server and polls status. Worker thread function. Delegates to inference server and polls status.
@ -141,7 +138,6 @@ class AsyncMaskGenerator:
end_frame=end_frame, end_frame=end_frame,
conf_threshold=conf_threshold, conf_threshold=conf_threshold,
iou_threshold=iou_threshold, iou_threshold=iou_threshold,
mask_scale=mask_scale,
) )
print(f"[FaceMask] Task started: {task_id}") print(f"[FaceMask] Task started: {task_id}")

View File

@ -204,7 +204,6 @@ class InferenceClient:
end_frame: int, end_frame: int,
conf_threshold: float, conf_threshold: float,
iou_threshold: float, iou_threshold: float,
mask_scale: float,
) -> str: ) -> str:
""" """
Request mask generation. Request mask generation.
@ -222,7 +221,6 @@ class InferenceClient:
"end_frame": end_frame, "end_frame": end_frame,
"conf_threshold": conf_threshold, "conf_threshold": conf_threshold,
"iou_threshold": iou_threshold, "iou_threshold": iou_threshold,
"mask_scale": mask_scale,
} }
req = urllib.request.Request( req = urllib.request.Request(
@ -255,6 +253,7 @@ class InferenceClient:
detections_path: str, detections_path: str,
output_path: str, output_path: str,
blur_size: int, blur_size: int,
display_scale: float,
fmt: str, fmt: str,
) -> str: ) -> str:
""" """
@ -271,6 +270,7 @@ class InferenceClient:
"detections_path": detections_path, "detections_path": detections_path,
"output_path": output_path, "output_path": output_path,
"blur_size": blur_size, "blur_size": blur_size,
"display_scale": display_scale,
"format": fmt, "format": fmt,
} }

View File

@ -20,6 +20,7 @@ KEY_BAKED = "facemask_baked_filepath"
KEY_MODE = "facemask_source_mode" KEY_MODE = "facemask_source_mode"
KEY_FORMAT = "facemask_bake_format" KEY_FORMAT = "facemask_bake_format"
KEY_BLUR_SIZE = "facemask_bake_blur_size" KEY_BLUR_SIZE = "facemask_bake_blur_size"
KEY_DISPLAY_SCALE = "facemask_bake_display_scale"
FORMAT_EXT = { FORMAT_EXT = {
@ -86,20 +87,27 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
bake_format = scene.facemask_bake_format bake_format = scene.facemask_bake_format
output_path = _output_path(video_strip, detections_path, bake_format) output_path = _output_path(video_strip, detections_path, bake_format)
blur_size = int(scene.facemask_bake_blur_size) blur_size = int(scene.facemask_bake_blur_size)
display_scale = float(scene.facemask_bake_display_scale)
# Reuse baked cache when parameters match and file still exists. # Reuse baked cache when parameters match and file still exists.
cached_baked_path = video_strip.get(KEY_BAKED) cached_baked_path = video_strip.get(KEY_BAKED)
cached_format = video_strip.get(KEY_FORMAT) cached_format = video_strip.get(KEY_FORMAT)
cached_blur_size = video_strip.get(KEY_BLUR_SIZE) cached_blur_size = video_strip.get(KEY_BLUR_SIZE)
cached_display_scale = video_strip.get(KEY_DISPLAY_SCALE)
try: try:
cached_blur_size_int = int(cached_blur_size) cached_blur_size_int = int(cached_blur_size)
except (TypeError, ValueError): except (TypeError, ValueError):
cached_blur_size_int = None cached_blur_size_int = None
try:
cached_display_scale_f = float(cached_display_scale)
except (TypeError, ValueError):
cached_display_scale_f = None
if ( if (
cached_baked_path cached_baked_path
and os.path.exists(cached_baked_path) and os.path.exists(cached_baked_path)
and cached_format == bake_format and cached_format == bake_format
and cached_blur_size_int == blur_size and cached_blur_size_int == blur_size
and cached_display_scale_f == display_scale
): ):
if video_strip.get(KEY_MODE) != "baked": if video_strip.get(KEY_MODE) != "baked":
video_strip[KEY_MODE] = "baked" video_strip[KEY_MODE] = "baked"
@ -126,6 +134,7 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
strip[KEY_MODE] = "baked" strip[KEY_MODE] = "baked"
strip[KEY_FORMAT] = bake_format strip[KEY_FORMAT] = bake_format
strip[KEY_BLUR_SIZE] = blur_size strip[KEY_BLUR_SIZE] = blur_size
strip[KEY_DISPLAY_SCALE] = display_scale
_set_strip_source(strip, result_path) _set_strip_source(strip, result_path)
print(f"[FaceMask] Bake completed and source swapped: {result_path}") print(f"[FaceMask] Bake completed and source swapped: {result_path}")
elif status == "error": elif status == "error":
@ -153,6 +162,7 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
detections_path=detections_path, detections_path=detections_path,
output_path=output_path, output_path=output_path,
blur_size=blur_size, blur_size=blur_size,
display_scale=display_scale,
fmt=bake_format.lower(), fmt=bake_format.lower(),
on_complete=on_complete, on_complete=on_complete,
on_progress=on_progress, on_progress=on_progress,

View File

@ -110,7 +110,6 @@ class SEQUENCER_OT_generate_face_mask(Operator):
# Get parameters from scene properties # Get parameters from scene properties
conf_threshold = scene.facemask_conf_threshold conf_threshold = scene.facemask_conf_threshold
iou_threshold = scene.facemask_iou_threshold iou_threshold = scene.facemask_iou_threshold
mask_scale = scene.facemask_mask_scale
# Start generation # Start generation
generator.start( generator.start(
@ -121,7 +120,6 @@ class SEQUENCER_OT_generate_face_mask(Operator):
fps=fps, fps=fps,
conf_threshold=conf_threshold, conf_threshold=conf_threshold,
iou_threshold=iou_threshold, iou_threshold=iou_threshold,
mask_scale=mask_scale,
on_complete=on_complete, on_complete=on_complete,
on_progress=on_progress, on_progress=on_progress,
) )

View File

@ -74,7 +74,6 @@ class SEQUENCER_PT_face_mask(Panel):
col = box.column(align=True) col = box.column(align=True)
col.prop(scene, "facemask_conf_threshold") col.prop(scene, "facemask_conf_threshold")
col.prop(scene, "facemask_iou_threshold") col.prop(scene, "facemask_iou_threshold")
col.prop(scene, "facemask_mask_scale")
def _draw_server_status(self, layout): def _draw_server_status(self, layout):
"""Draw server status and GPU info.""" """Draw server status and GPU info."""
@ -225,6 +224,7 @@ class SEQUENCER_PT_face_mask(Panel):
# Bake parameters # Bake parameters
col = box.column(align=True) col = box.column(align=True)
col.prop(context.scene, "facemask_bake_blur_size") col.prop(context.scene, "facemask_bake_blur_size")
col.prop(context.scene, "facemask_bake_display_scale")
col.prop(context.scene, "facemask_bake_format") col.prop(context.scene, "facemask_bake_format")
# Source status # Source status

View File

@ -1,28 +1,104 @@
""" """
YOLOv8 Face Detector using PyTorch with ROCm support. YOLOv8 Pose Head Detector using PyTorch with ROCm support.
This module provides high-performance face detection using Detects human heads from all angles (frontal, profile, rear) by using
YOLOv8-face model with AMD GPU (ROCm) acceleration. YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
""" """
import os import os
from typing import List, Tuple, Optional from typing import List, Tuple, Optional
from pathlib import Path
import numpy as np import numpy as np
class YOLOFaceDetector: # COCO pose keypoint indices
""" _HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear
YOLOv8 face detector with PyTorch ROCm support. _SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder
_KP_CONF_THRESH = 0.3
Features:
- ROCm GPU acceleration for AMD GPUs def _head_bbox_from_pose(
- High accuracy face detection kp_xy: np.ndarray,
- Automatic NMS for overlapping detections kp_conf: np.ndarray,
person_x1: float,
person_y1: float,
person_x2: float,
person_y2: float,
) -> Tuple[int, int, int, int]:
"""
Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
Strategy:
1. Use head keypoints (0-4: nose, eyes, ears) if visible.
2. Fall back to shoulder keypoints (5-6) to infer head position.
3. Last resort: use top of the person bounding box.
"""
person_w = max(person_x2 - person_x1, 1.0)
# --- Step 1: head keypoints ---
visible_head = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _HEAD_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_head:
xs = [p[0] for p in visible_head]
ys = [p[1] for p in visible_head]
kp_x1, kp_y1 = min(xs), min(ys)
kp_x2, kp_y2 = max(xs), max(ys)
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
cx = (kp_x1 + kp_x2) / 2.0
cy = (kp_y1 + kp_y2) / 2.0
# Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25
# Shift center upward slightly to include scalp
r = max(span * 1.25, person_w * 0.20)
x1 = int(cx - r)
y1 = int(cy - r * 1.15) # extra margin above (scalp)
x2 = int(cx + r)
y2 = int(cy + r * 0.85) # less margin below (chin)
return x1, y1, x2 - x1, y2 - y1
# --- Step 2: shoulder keypoints ---
visible_shoulder = [
(float(kp_xy[i][0]), float(kp_xy[i][1]))
for i in _SHOULDER_KP
if float(kp_conf[i]) > _KP_CONF_THRESH
]
if visible_shoulder:
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
if len(visible_shoulder) == 2:
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
else:
sh_width = person_w * 0.5
r = max(sh_width * 0.5, person_w * 0.20)
cy = cy_sh - r * 1.3 # head center is above shoulders
x1 = int(cx - r)
y1 = int(cy - r)
x2 = int(cx + r)
y2 = int(cy + r)
return x1, y1, x2 - x1, y2 - y1
# --- Step 3: person bbox top ---
r = max(person_w * 0.35, 20.0)
cx = (person_x1 + person_x2) / 2.0
x1 = int(cx - r)
y1 = int(person_y1)
x2 = int(cx + r)
y2 = int(person_y1 + r * 2.0)
return x1, y1, x2 - x1, y2 - y1
class YOLOPoseHeadDetector:
"""
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
so that detection works regardless of the person's facing direction.
""" """
# Default model path relative to this file # Standard Ultralytics model — auto-downloaded on first use
DEFAULT_MODEL = "yolov8n-face-lindevs.pt" DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
def __init__( def __init__(
self, self,
@ -31,15 +107,6 @@ class YOLOFaceDetector:
iou_threshold: float = 0.45, iou_threshold: float = 0.45,
input_size: Tuple[int, int] = (640, 640), input_size: Tuple[int, int] = (640, 640),
): ):
"""
Initialize the YOLO face detector.
Args:
model_path: Path to PyTorch model file. If None, uses default model.
conf_threshold: Confidence threshold for detections
iou_threshold: IoU threshold for NMS
input_size: Model input size (width, height)
"""
self.conf_threshold = conf_threshold self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold self.iou_threshold = iou_threshold
self.input_size = input_size self.input_size = input_size
@ -49,23 +116,20 @@ class YOLOFaceDetector:
@property @property
def model(self): def model(self):
"""Lazy-load YOLO model.""" """Lazy-load YOLO pose model."""
if self._model is None: if self._model is None:
from ultralytics import YOLO from ultralytics import YOLO
import torch import torch
# Determine model path # Use provided path or let Ultralytics auto-download the default
if self._model_path is None: if self._model_path is not None:
# Assuming models are in ../models relative to server/detector.py if not os.path.exists(self._model_path):
models_dir = Path(__file__).parent.parent / "models" raise FileNotFoundError(f"Model not found: {self._model_path}")
model_path = str(models_dir / self.DEFAULT_MODEL)
else:
model_path = self._model_path model_path = self._model_path
else:
model_path = self.DEFAULT_MODEL
os.makedirs(os.path.dirname(model_path), exist_ok=True)
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model not found: {model_path}")
# Detect device (ROCm GPU or CPU)
if torch.cuda.is_available(): if torch.cuda.is_available():
self._device = 'cuda' self._device = 'cuda'
device_name = torch.cuda.get_device_name(0) device_name = torch.cuda.get_device_name(0)
@ -74,25 +138,47 @@ class YOLOFaceDetector:
self._device = 'cpu' self._device = 'cpu'
print("[FaceMask] Using CPU for inference (ROCm GPU not available)") print("[FaceMask] Using CPU for inference (ROCm GPU not available)")
# Load model (let Ultralytics handle device management)
try: try:
self._model = YOLO(model_path) self._model = YOLO(model_path)
# Don't call .to() - let predict() handle device assignment print(f"[FaceMask] Pose model loaded: {model_path}")
print(f"[FaceMask] Model loaded, will use device: {self._device}") print(f"[FaceMask] Device: {self._device}")
except Exception as e: except Exception as e:
print(f"[FaceMask] Error loading model: {e}") print(f"[FaceMask] Error loading model: {e}")
import traceback import traceback
traceback.print_exc() traceback.print_exc()
raise raise
print(f"[FaceMask] YOLO model loaded: {model_path}")
print(f"[FaceMask] Device: {self._device}")
return self._model return self._model
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
detections = []
if result.boxes is None or result.keypoints is None:
return detections
boxes = result.boxes
keypoints = result.keypoints
for i, box in enumerate(boxes):
conf = float(box.conf[0].cpu().numpy())
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
# Extract keypoints for this person
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
kp_xy = kp_data[:, :2]
kp_conf = kp_data[:, 2]
hx, hy, hw, hh = _head_bbox_from_pose(
kp_xy, kp_conf,
float(x1), float(y1), float(x2), float(y2),
)
detections.append((hx, hy, hw, hh, conf))
return detections
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]: def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
""" """
Detect faces in a frame. Detect heads in a frame.
Args: Args:
frame: BGR image as numpy array (H, W, C) frame: BGR image as numpy array (H, W, C)
@ -100,7 +186,6 @@ class YOLOFaceDetector:
Returns: Returns:
List of detections as (x, y, width, height, confidence) List of detections as (x, y, width, height, confidence)
""" """
# Run inference
import torch import torch
print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}") print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
try: try:
@ -116,7 +201,6 @@ class YOLOFaceDetector:
print(f"[FaceMask] ERROR during inference: {e}") print(f"[FaceMask] ERROR during inference: {e}")
import traceback import traceback
traceback.print_exc() traceback.print_exc()
# Fallback to CPU
print("[FaceMask] Falling back to CPU inference...") print("[FaceMask] Falling back to CPU inference...")
self._device = 'cpu' self._device = 'cpu'
results = self.model.predict( results = self.model.predict(
@ -128,28 +212,13 @@ class YOLOFaceDetector:
device='cpu', device='cpu',
) )
# Extract detections if results:
detections = [] return self._results_to_detections(results[0])
if len(results) > 0 and results[0].boxes is not None: return []
boxes = results[0].boxes
for box in boxes:
# Get coordinates in xyxy format
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
conf = float(box.conf[0].cpu().numpy())
# Convert to x, y, width, height
x = int(x1)
y = int(y1)
w = int(x2 - x1)
h = int(y2 - y1)
detections.append((x, y, w, h, conf))
return detections
def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]: def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
""" """
Detect faces in multiple frames at once (batch processing). Detect heads in multiple frames at once (batch processing).
Args: Args:
frames: List of BGR images as numpy arrays (H, W, C) frames: List of BGR images as numpy arrays (H, W, C)
@ -161,7 +230,6 @@ class YOLOFaceDetector:
if not frames: if not frames:
return [] return []
# Run batch inference
try: try:
results = self.model.predict( results = self.model.predict(
frames, frames,
@ -175,7 +243,6 @@ class YOLOFaceDetector:
print(f"[FaceMask] ERROR during batch inference: {e}") print(f"[FaceMask] ERROR during batch inference: {e}")
import traceback import traceback
traceback.print_exc() traceback.print_exc()
# Fallback to CPU
print("[FaceMask] Falling back to CPU inference...") print("[FaceMask] Falling back to CPU inference...")
self._device = 'cpu' self._device = 'cpu'
results = self.model.predict( results = self.model.predict(
@ -187,28 +254,7 @@ class YOLOFaceDetector:
device='cpu', device='cpu',
) )
# Extract detections for each frame return [self._results_to_detections(r) for r in results]
all_detections = []
for result in results:
detections = []
if result.boxes is not None:
boxes = result.boxes
for box in boxes:
# Get coordinates in xyxy format
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
conf = float(box.conf[0].cpu().numpy())
# Convert to x, y, width, height
x = int(x1)
y = int(y1)
w = int(x2 - x1)
h = int(y2 - y1)
detections.append((x, y, w, h, conf))
all_detections.append(detections)
return all_detections
def generate_mask( def generate_mask(
self, self,
@ -218,11 +264,11 @@ class YOLOFaceDetector:
feather_radius: int = 20, feather_radius: int = 20,
) -> np.ndarray: ) -> np.ndarray:
""" """
Generate a mask image from face detections. Generate a mask image from head detections.
Args: Args:
frame_shape: Shape of the original frame (height, width, channels) frame_shape: Shape of the original frame (height, width, channels)
detections: List of face detections (x, y, w, h, conf) detections: List of head detections (x, y, w, h, conf)
mask_scale: Scale factor for mask region mask_scale: Scale factor for mask region
feather_radius: Radius for edge feathering feather_radius: Radius for edge feathering
@ -235,25 +281,19 @@ class YOLOFaceDetector:
mask = np.zeros((height, width), dtype=np.uint8) mask = np.zeros((height, width), dtype=np.uint8)
for (x, y, w, h, conf) in detections: for (x, y, w, h, conf) in detections:
# Scale the bounding box
center_x = x + w // 2 center_x = x + w // 2
center_y = y + h // 2 center_y = y + h // 2
scaled_w = int(w * mask_scale) scaled_w = int(w * mask_scale)
scaled_h = int(h * mask_scale) scaled_h = int(h * mask_scale)
# Draw ellipse for natural face shape
cv2.ellipse( cv2.ellipse(
mask, mask,
(center_x, center_y), (center_x, center_y),
(scaled_w // 2, scaled_h // 2), (scaled_w // 2, scaled_h // 2),
0, # angle 0, 0, 360,
0, 360, # arc 255, -1,
255, # color (white)
-1, # filled
) )
# Apply Gaussian blur for feathering
if feather_radius > 0 and len(detections) > 0: if feather_radius > 0 and len(detections) > 0:
kernel_size = feather_radius * 2 + 1 kernel_size = feather_radius * 2 + 1
mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0) mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
@ -262,12 +302,12 @@ class YOLOFaceDetector:
# Singleton instance # Singleton instance
_detector: Optional[YOLOFaceDetector] = None _detector: Optional[YOLOPoseHeadDetector] = None
def get_detector(**kwargs) -> YOLOFaceDetector: def get_detector(**kwargs) -> YOLOPoseHeadDetector:
"""Get or create the global YOLO detector instance.""" """Get or create the global YOLO pose head detector instance."""
global _detector global _detector
if _detector is None: if _detector is None:
_detector = YOLOFaceDetector(**kwargs) _detector = YOLOPoseHeadDetector(**kwargs)
return _detector return _detector

View File

@ -83,7 +83,6 @@ class GenerateRequest(BaseModel):
end_frame: int end_frame: int
conf_threshold: float = 0.5 conf_threshold: float = 0.5
iou_threshold: float = 0.45 iou_threshold: float = 0.45
mask_scale: float = 1.5
class BakeRequest(BaseModel): class BakeRequest(BaseModel):
@ -91,6 +90,7 @@ class BakeRequest(BaseModel):
detections_path: str detections_path: str
output_path: str output_path: str
blur_size: int = 50 blur_size: int = 50
display_scale: float = 1.0
format: str = "mp4" format: str = "mp4"
@ -305,20 +305,15 @@ def process_video_task(task_id: str, req: GenerateRequest):
for detections in batch_detections: for detections in batch_detections:
packed_detections: List[List[float]] = [] packed_detections: List[List[float]] = []
for x, y, w, h, conf in detections: for x, y, w, h, conf in detections:
scaled = _scale_bbox( # bboxをそのまま保存表示スケールはBake時に適用
int(x), bx, by, bw, bh = int(x), int(y), int(w), int(h)
int(y), bx = max(0, bx)
int(w), by = max(0, by)
int(h), bw = min(width - bx, bw)
float(req.mask_scale), bh = min(height - by, bh)
width, if bw <= 0 or bh <= 0:
height,
)
if scaled is None:
continue continue
packed_detections.append( packed_detections.append([bx, by, bw, bh, float(conf)])
[scaled[0], scaled[1], scaled[2], scaled[3], float(conf)]
)
frame_detections.append(packed_detections) frame_detections.append(packed_detections)
current_count += 1 current_count += 1
tasks[task_id].progress = current_count tasks[task_id].progress = current_count
@ -356,7 +351,7 @@ def process_video_task(task_id: str, req: GenerateRequest):
"width": width, "width": width,
"height": height, "height": height,
"fps": fps, "fps": fps,
"mask_scale": float(req.mask_scale), "mask_scale": 1.0,
"frames": frame_detections, "frames": frame_detections,
} }
with open(output_msgpack_path, "wb") as f: with open(output_msgpack_path, "wb") as f:
@ -435,9 +430,9 @@ def process_bake_task(task_id: str, req: BakeRequest):
blur_size = max(1, int(req.blur_size)) blur_size = max(1, int(req.blur_size))
if blur_size % 2 == 0: if blur_size % 2 == 0:
blur_size += 1 blur_size += 1
feather_radius = max(3, min(25, blur_size // 3)) display_scale = max(0.1, float(req.display_scale))
feather_kernel = feather_radius * 2 + 1 # blur_margin は境界問題回避のための計算用余白のみ(表示には使わない)
blur_margin = max(1, (blur_size // 2) + feather_radius) blur_margin = blur_size // 2
# Queues # Queues
queue_size = 8 queue_size = 8
@ -492,11 +487,8 @@ def process_bake_task(task_id: str, req: BakeRequest):
process_queue.put((idx, frame)) process_queue.put((idx, frame))
continue continue
# ROI processing (same as original) # 各人物ごとに個別ROIで処理全員まとめると離れた人物間が巨大ROIになるため
min_x, min_y = src_width, src_height
max_x, max_y = 0, 0
valid_boxes = [] valid_boxes = []
for box in frame_boxes: for box in frame_boxes:
if not isinstance(box, list) or len(box) < 4: if not isinstance(box, list) or len(box) < 4:
continue continue
@ -504,42 +496,45 @@ def process_bake_task(task_id: str, req: BakeRequest):
if w <= 0 or h <= 0: if w <= 0 or h <= 0:
continue continue
valid_boxes.append((x, y, w, h)) valid_boxes.append((x, y, w, h))
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x + w)
max_y = max(max_y, y + h)
if not valid_boxes: if not valid_boxes:
process_queue.put((idx, frame)) process_queue.put((idx, frame))
continue continue
roi_x1 = max(0, min_x - blur_margin)
roi_y1 = max(0, min_y - blur_margin)
roi_x2 = min(src_width, max_x + blur_margin)
roi_y2 = min(src_height, max_y + blur_margin)
roi_width = roi_x2 - roi_x1
roi_height = roi_y2 - roi_y1
if roi_width <= 0 or roi_height <= 0:
process_queue.put((idx, frame))
continue
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
for x, y, w, h in valid_boxes: for x, y, w, h in valid_boxes:
center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1) # display_scale で表示サイズを決定
axes = (max(1, w // 2), max(1, h // 2)) cx = x + w / 2
cy = y + h / 2
dw = max(1, int(w * display_scale))
dh = max(1, int(h * display_scale))
dx = int(cx - dw / 2)
dy = int(cy - dh / 2)
# ROIは表示サイズ + blur_margin計算用余白、境界問題回避のみ
roi_x1 = max(0, dx - blur_margin)
roi_y1 = max(0, dy - blur_margin)
roi_x2 = min(src_width, dx + dw + blur_margin)
roi_y2 = min(src_height, dy + dh + blur_margin)
roi_width = roi_x2 - roi_x1
roi_height = roi_y2 - roi_y1
if roi_width <= 0 or roi_height <= 0:
continue
# ブラーはROI全体で計算余白があるので端の精度が保証される
roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
# 合成マスクはdisplay_scaleサイズの楕円のみfeatheringなし
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
center = (int(cx) - roi_x1, int(cy) - roi_y1)
axes = (max(1, dw // 2), max(1, dh // 2))
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1) cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0) roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2] roi_composed = roi_src.astype(np.float32) * (1.0 - roi_alpha) + roi_blurred.astype(np.float32) * roi_alpha
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0) frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
roi_blurred.astype(np.float32) * roi_alpha
)
frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
process_queue.put((idx, frame)) process_queue.put((idx, frame))
except Exception as e: except Exception as e: