Compare commits
3 Commits
920695696b
...
da9de60697
| Author | SHA1 | Date | |
|---|---|---|---|
| da9de60697 | |||
| 9ce6ec99d3 | |||
| 08f20fa6fe |
20
__init__.py
20
__init__.py
|
|
@ -40,15 +40,6 @@ def register():
|
||||||
step=0.01,
|
step=0.01,
|
||||||
)
|
)
|
||||||
|
|
||||||
bpy.types.Scene.facemask_mask_scale = FloatProperty(
|
|
||||||
name="Mask Scale",
|
|
||||||
description="Scale factor for mask region (1.0 = exact face size)",
|
|
||||||
default=1.5,
|
|
||||||
min=1.0,
|
|
||||||
max=3.0,
|
|
||||||
step=0.1,
|
|
||||||
)
|
|
||||||
|
|
||||||
bpy.types.Scene.facemask_cache_dir = StringProperty(
|
bpy.types.Scene.facemask_cache_dir = StringProperty(
|
||||||
name="Cache Directory",
|
name="Cache Directory",
|
||||||
description="Optional cache root directory (empty = default .mask_cache)",
|
description="Optional cache root directory (empty = default .mask_cache)",
|
||||||
|
|
@ -64,6 +55,15 @@ def register():
|
||||||
max=501,
|
max=501,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
bpy.types.Scene.facemask_bake_display_scale = FloatProperty(
|
||||||
|
name="Mask Scale",
|
||||||
|
description="Scale factor for the blur mask ellipse at bake time (1.0 = raw detection size)",
|
||||||
|
default=1.3,
|
||||||
|
min=0.5,
|
||||||
|
max=3.0,
|
||||||
|
step=0.1,
|
||||||
|
)
|
||||||
|
|
||||||
bpy.types.Scene.facemask_bake_format = EnumProperty(
|
bpy.types.Scene.facemask_bake_format = EnumProperty(
|
||||||
name="Bake Format",
|
name="Bake Format",
|
||||||
description="Output format for baked blur video",
|
description="Output format for baked blur video",
|
||||||
|
|
@ -91,9 +91,9 @@ def unregister():
|
||||||
# Unregister scene properties
|
# Unregister scene properties
|
||||||
del bpy.types.Scene.facemask_conf_threshold
|
del bpy.types.Scene.facemask_conf_threshold
|
||||||
del bpy.types.Scene.facemask_iou_threshold
|
del bpy.types.Scene.facemask_iou_threshold
|
||||||
del bpy.types.Scene.facemask_mask_scale
|
|
||||||
del bpy.types.Scene.facemask_cache_dir
|
del bpy.types.Scene.facemask_cache_dir
|
||||||
del bpy.types.Scene.facemask_bake_blur_size
|
del bpy.types.Scene.facemask_bake_blur_size
|
||||||
|
del bpy.types.Scene.facemask_bake_display_scale
|
||||||
del bpy.types.Scene.facemask_bake_format
|
del bpy.types.Scene.facemask_bake_format
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ class AsyncBakeGenerator:
|
||||||
detections_path: str,
|
detections_path: str,
|
||||||
output_path: str,
|
output_path: str,
|
||||||
blur_size: int,
|
blur_size: int,
|
||||||
|
display_scale: float,
|
||||||
fmt: str,
|
fmt: str,
|
||||||
on_complete: Optional[Callable] = None,
|
on_complete: Optional[Callable] = None,
|
||||||
on_progress: Optional[Callable] = None,
|
on_progress: Optional[Callable] = None,
|
||||||
|
|
@ -53,7 +54,7 @@ class AsyncBakeGenerator:
|
||||||
|
|
||||||
self.worker_thread = threading.Thread(
|
self.worker_thread = threading.Thread(
|
||||||
target=self._worker,
|
target=self._worker,
|
||||||
args=(video_path, detections_path, output_path, blur_size, fmt),
|
args=(video_path, detections_path, output_path, blur_size, display_scale, fmt),
|
||||||
daemon=True,
|
daemon=True,
|
||||||
)
|
)
|
||||||
self.worker_thread.start()
|
self.worker_thread.start()
|
||||||
|
|
@ -75,6 +76,7 @@ class AsyncBakeGenerator:
|
||||||
detections_path: str,
|
detections_path: str,
|
||||||
output_path: str,
|
output_path: str,
|
||||||
blur_size: int,
|
blur_size: int,
|
||||||
|
display_scale: float,
|
||||||
fmt: str,
|
fmt: str,
|
||||||
):
|
):
|
||||||
import time
|
import time
|
||||||
|
|
@ -88,6 +90,7 @@ class AsyncBakeGenerator:
|
||||||
detections_path=detections_path,
|
detections_path=detections_path,
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
blur_size=blur_size,
|
blur_size=blur_size,
|
||||||
|
display_scale=display_scale,
|
||||||
fmt=fmt,
|
fmt=fmt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,6 @@ class AsyncMaskGenerator:
|
||||||
fps: float,
|
fps: float,
|
||||||
conf_threshold: float = 0.5,
|
conf_threshold: float = 0.5,
|
||||||
iou_threshold: float = 0.45,
|
iou_threshold: float = 0.45,
|
||||||
mask_scale: float = 1.5,
|
|
||||||
on_complete: Optional[Callable] = None,
|
on_complete: Optional[Callable] = None,
|
||||||
on_progress: Optional[Callable] = None,
|
on_progress: Optional[Callable] = None,
|
||||||
):
|
):
|
||||||
|
|
@ -94,7 +93,6 @@ class AsyncMaskGenerator:
|
||||||
fps,
|
fps,
|
||||||
conf_threshold,
|
conf_threshold,
|
||||||
iou_threshold,
|
iou_threshold,
|
||||||
mask_scale,
|
|
||||||
),
|
),
|
||||||
daemon=True,
|
daemon=True,
|
||||||
)
|
)
|
||||||
|
|
@ -121,7 +119,6 @@ class AsyncMaskGenerator:
|
||||||
fps: float,
|
fps: float,
|
||||||
conf_threshold: float,
|
conf_threshold: float,
|
||||||
iou_threshold: float,
|
iou_threshold: float,
|
||||||
mask_scale: float,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Worker thread function. Delegates to inference server and polls status.
|
Worker thread function. Delegates to inference server and polls status.
|
||||||
|
|
@ -141,7 +138,6 @@ class AsyncMaskGenerator:
|
||||||
end_frame=end_frame,
|
end_frame=end_frame,
|
||||||
conf_threshold=conf_threshold,
|
conf_threshold=conf_threshold,
|
||||||
iou_threshold=iou_threshold,
|
iou_threshold=iou_threshold,
|
||||||
mask_scale=mask_scale,
|
|
||||||
)
|
)
|
||||||
print(f"[FaceMask] Task started: {task_id}")
|
print(f"[FaceMask] Task started: {task_id}")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -204,7 +204,6 @@ class InferenceClient:
|
||||||
end_frame: int,
|
end_frame: int,
|
||||||
conf_threshold: float,
|
conf_threshold: float,
|
||||||
iou_threshold: float,
|
iou_threshold: float,
|
||||||
mask_scale: float,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Request mask generation.
|
Request mask generation.
|
||||||
|
|
@ -222,7 +221,6 @@ class InferenceClient:
|
||||||
"end_frame": end_frame,
|
"end_frame": end_frame,
|
||||||
"conf_threshold": conf_threshold,
|
"conf_threshold": conf_threshold,
|
||||||
"iou_threshold": iou_threshold,
|
"iou_threshold": iou_threshold,
|
||||||
"mask_scale": mask_scale,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
|
|
@ -255,6 +253,7 @@ class InferenceClient:
|
||||||
detections_path: str,
|
detections_path: str,
|
||||||
output_path: str,
|
output_path: str,
|
||||||
blur_size: int,
|
blur_size: int,
|
||||||
|
display_scale: float,
|
||||||
fmt: str,
|
fmt: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -271,6 +270,7 @@ class InferenceClient:
|
||||||
"detections_path": detections_path,
|
"detections_path": detections_path,
|
||||||
"output_path": output_path,
|
"output_path": output_path,
|
||||||
"blur_size": blur_size,
|
"blur_size": blur_size,
|
||||||
|
"display_scale": display_scale,
|
||||||
"format": fmt,
|
"format": fmt,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ KEY_BAKED = "facemask_baked_filepath"
|
||||||
KEY_MODE = "facemask_source_mode"
|
KEY_MODE = "facemask_source_mode"
|
||||||
KEY_FORMAT = "facemask_bake_format"
|
KEY_FORMAT = "facemask_bake_format"
|
||||||
KEY_BLUR_SIZE = "facemask_bake_blur_size"
|
KEY_BLUR_SIZE = "facemask_bake_blur_size"
|
||||||
|
KEY_DISPLAY_SCALE = "facemask_bake_display_scale"
|
||||||
|
|
||||||
|
|
||||||
FORMAT_EXT = {
|
FORMAT_EXT = {
|
||||||
|
|
@ -86,20 +87,27 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
|
||||||
bake_format = scene.facemask_bake_format
|
bake_format = scene.facemask_bake_format
|
||||||
output_path = _output_path(video_strip, detections_path, bake_format)
|
output_path = _output_path(video_strip, detections_path, bake_format)
|
||||||
blur_size = int(scene.facemask_bake_blur_size)
|
blur_size = int(scene.facemask_bake_blur_size)
|
||||||
|
display_scale = float(scene.facemask_bake_display_scale)
|
||||||
|
|
||||||
# Reuse baked cache when parameters match and file still exists.
|
# Reuse baked cache when parameters match and file still exists.
|
||||||
cached_baked_path = video_strip.get(KEY_BAKED)
|
cached_baked_path = video_strip.get(KEY_BAKED)
|
||||||
cached_format = video_strip.get(KEY_FORMAT)
|
cached_format = video_strip.get(KEY_FORMAT)
|
||||||
cached_blur_size = video_strip.get(KEY_BLUR_SIZE)
|
cached_blur_size = video_strip.get(KEY_BLUR_SIZE)
|
||||||
|
cached_display_scale = video_strip.get(KEY_DISPLAY_SCALE)
|
||||||
try:
|
try:
|
||||||
cached_blur_size_int = int(cached_blur_size)
|
cached_blur_size_int = int(cached_blur_size)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
cached_blur_size_int = None
|
cached_blur_size_int = None
|
||||||
|
try:
|
||||||
|
cached_display_scale_f = float(cached_display_scale)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
cached_display_scale_f = None
|
||||||
if (
|
if (
|
||||||
cached_baked_path
|
cached_baked_path
|
||||||
and os.path.exists(cached_baked_path)
|
and os.path.exists(cached_baked_path)
|
||||||
and cached_format == bake_format
|
and cached_format == bake_format
|
||||||
and cached_blur_size_int == blur_size
|
and cached_blur_size_int == blur_size
|
||||||
|
and cached_display_scale_f == display_scale
|
||||||
):
|
):
|
||||||
if video_strip.get(KEY_MODE) != "baked":
|
if video_strip.get(KEY_MODE) != "baked":
|
||||||
video_strip[KEY_MODE] = "baked"
|
video_strip[KEY_MODE] = "baked"
|
||||||
|
|
@ -126,6 +134,7 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
|
||||||
strip[KEY_MODE] = "baked"
|
strip[KEY_MODE] = "baked"
|
||||||
strip[KEY_FORMAT] = bake_format
|
strip[KEY_FORMAT] = bake_format
|
||||||
strip[KEY_BLUR_SIZE] = blur_size
|
strip[KEY_BLUR_SIZE] = blur_size
|
||||||
|
strip[KEY_DISPLAY_SCALE] = display_scale
|
||||||
_set_strip_source(strip, result_path)
|
_set_strip_source(strip, result_path)
|
||||||
print(f"[FaceMask] Bake completed and source swapped: {result_path}")
|
print(f"[FaceMask] Bake completed and source swapped: {result_path}")
|
||||||
elif status == "error":
|
elif status == "error":
|
||||||
|
|
@ -153,6 +162,7 @@ class SEQUENCER_OT_bake_and_swap_blur_source(Operator):
|
||||||
detections_path=detections_path,
|
detections_path=detections_path,
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
blur_size=blur_size,
|
blur_size=blur_size,
|
||||||
|
display_scale=display_scale,
|
||||||
fmt=bake_format.lower(),
|
fmt=bake_format.lower(),
|
||||||
on_complete=on_complete,
|
on_complete=on_complete,
|
||||||
on_progress=on_progress,
|
on_progress=on_progress,
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,6 @@ class SEQUENCER_OT_generate_face_mask(Operator):
|
||||||
# Get parameters from scene properties
|
# Get parameters from scene properties
|
||||||
conf_threshold = scene.facemask_conf_threshold
|
conf_threshold = scene.facemask_conf_threshold
|
||||||
iou_threshold = scene.facemask_iou_threshold
|
iou_threshold = scene.facemask_iou_threshold
|
||||||
mask_scale = scene.facemask_mask_scale
|
|
||||||
|
|
||||||
# Start generation
|
# Start generation
|
||||||
generator.start(
|
generator.start(
|
||||||
|
|
@ -121,7 +120,6 @@ class SEQUENCER_OT_generate_face_mask(Operator):
|
||||||
fps=fps,
|
fps=fps,
|
||||||
conf_threshold=conf_threshold,
|
conf_threshold=conf_threshold,
|
||||||
iou_threshold=iou_threshold,
|
iou_threshold=iou_threshold,
|
||||||
mask_scale=mask_scale,
|
|
||||||
on_complete=on_complete,
|
on_complete=on_complete,
|
||||||
on_progress=on_progress,
|
on_progress=on_progress,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,6 @@ class SEQUENCER_PT_face_mask(Panel):
|
||||||
col = box.column(align=True)
|
col = box.column(align=True)
|
||||||
col.prop(scene, "facemask_conf_threshold")
|
col.prop(scene, "facemask_conf_threshold")
|
||||||
col.prop(scene, "facemask_iou_threshold")
|
col.prop(scene, "facemask_iou_threshold")
|
||||||
col.prop(scene, "facemask_mask_scale")
|
|
||||||
|
|
||||||
def _draw_server_status(self, layout):
|
def _draw_server_status(self, layout):
|
||||||
"""Draw server status and GPU info."""
|
"""Draw server status and GPU info."""
|
||||||
|
|
@ -225,6 +224,7 @@ class SEQUENCER_PT_face_mask(Panel):
|
||||||
# Bake parameters
|
# Bake parameters
|
||||||
col = box.column(align=True)
|
col = box.column(align=True)
|
||||||
col.prop(context.scene, "facemask_bake_blur_size")
|
col.prop(context.scene, "facemask_bake_blur_size")
|
||||||
|
col.prop(context.scene, "facemask_bake_display_scale")
|
||||||
col.prop(context.scene, "facemask_bake_format")
|
col.prop(context.scene, "facemask_bake_format")
|
||||||
|
|
||||||
# Source status
|
# Source status
|
||||||
|
|
|
||||||
|
|
@ -1,28 +1,104 @@
|
||||||
"""
|
"""
|
||||||
YOLOv8 Face Detector using PyTorch with ROCm support.
|
YOLOv8 Pose Head Detector using PyTorch with ROCm support.
|
||||||
|
|
||||||
This module provides high-performance face detection using
|
Detects human heads from all angles (frontal, profile, rear) by using
|
||||||
YOLOv8-face model with AMD GPU (ROCm) acceleration.
|
YOLOv8 pose estimation and extracting head bounding boxes from keypoints.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import List, Tuple, Optional
|
from typing import List, Tuple, Optional
|
||||||
from pathlib import Path
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class YOLOFaceDetector:
|
# COCO pose keypoint indices
|
||||||
"""
|
_HEAD_KP = [0, 1, 2, 3, 4] # nose, left_eye, right_eye, left_ear, right_ear
|
||||||
YOLOv8 face detector with PyTorch ROCm support.
|
_SHOULDER_KP = [5, 6] # left_shoulder, right_shoulder
|
||||||
|
_KP_CONF_THRESH = 0.3
|
||||||
|
|
||||||
Features:
|
|
||||||
- ROCm GPU acceleration for AMD GPUs
|
def _head_bbox_from_pose(
|
||||||
- High accuracy face detection
|
kp_xy: np.ndarray,
|
||||||
- Automatic NMS for overlapping detections
|
kp_conf: np.ndarray,
|
||||||
|
person_x1: float,
|
||||||
|
person_y1: float,
|
||||||
|
person_x2: float,
|
||||||
|
person_y2: float,
|
||||||
|
) -> Tuple[int, int, int, int]:
|
||||||
|
"""
|
||||||
|
Estimate head bounding box (x, y, w, h) from COCO pose keypoints.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. Use head keypoints (0-4: nose, eyes, ears) if visible.
|
||||||
|
2. Fall back to shoulder keypoints (5-6) to infer head position.
|
||||||
|
3. Last resort: use top of the person bounding box.
|
||||||
|
"""
|
||||||
|
person_w = max(person_x2 - person_x1, 1.0)
|
||||||
|
|
||||||
|
# --- Step 1: head keypoints ---
|
||||||
|
visible_head = [
|
||||||
|
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
||||||
|
for i in _HEAD_KP
|
||||||
|
if float(kp_conf[i]) > _KP_CONF_THRESH
|
||||||
|
]
|
||||||
|
if visible_head:
|
||||||
|
xs = [p[0] for p in visible_head]
|
||||||
|
ys = [p[1] for p in visible_head]
|
||||||
|
kp_x1, kp_y1 = min(xs), min(ys)
|
||||||
|
kp_x2, kp_y2 = max(xs), max(ys)
|
||||||
|
span = max(kp_x2 - kp_x1, kp_y2 - kp_y1, 1.0)
|
||||||
|
cx = (kp_x1 + kp_x2) / 2.0
|
||||||
|
cy = (kp_y1 + kp_y2) / 2.0
|
||||||
|
|
||||||
|
# Head radius: inter-landmark span ≈ 80% of head width, so expand by ~1.25
|
||||||
|
# Shift center upward slightly to include scalp
|
||||||
|
r = max(span * 1.25, person_w * 0.20)
|
||||||
|
x1 = int(cx - r)
|
||||||
|
y1 = int(cy - r * 1.15) # extra margin above (scalp)
|
||||||
|
x2 = int(cx + r)
|
||||||
|
y2 = int(cy + r * 0.85) # less margin below (chin)
|
||||||
|
return x1, y1, x2 - x1, y2 - y1
|
||||||
|
|
||||||
|
# --- Step 2: shoulder keypoints ---
|
||||||
|
visible_shoulder = [
|
||||||
|
(float(kp_xy[i][0]), float(kp_xy[i][1]))
|
||||||
|
for i in _SHOULDER_KP
|
||||||
|
if float(kp_conf[i]) > _KP_CONF_THRESH
|
||||||
|
]
|
||||||
|
if visible_shoulder:
|
||||||
|
cx = sum(p[0] for p in visible_shoulder) / len(visible_shoulder)
|
||||||
|
cy_sh = sum(p[1] for p in visible_shoulder) / len(visible_shoulder)
|
||||||
|
if len(visible_shoulder) == 2:
|
||||||
|
sh_width = abs(visible_shoulder[1][0] - visible_shoulder[0][0])
|
||||||
|
else:
|
||||||
|
sh_width = person_w * 0.5
|
||||||
|
r = max(sh_width * 0.5, person_w * 0.20)
|
||||||
|
cy = cy_sh - r * 1.3 # head center is above shoulders
|
||||||
|
x1 = int(cx - r)
|
||||||
|
y1 = int(cy - r)
|
||||||
|
x2 = int(cx + r)
|
||||||
|
y2 = int(cy + r)
|
||||||
|
return x1, y1, x2 - x1, y2 - y1
|
||||||
|
|
||||||
|
# --- Step 3: person bbox top ---
|
||||||
|
r = max(person_w * 0.35, 20.0)
|
||||||
|
cx = (person_x1 + person_x2) / 2.0
|
||||||
|
x1 = int(cx - r)
|
||||||
|
y1 = int(person_y1)
|
||||||
|
x2 = int(cx + r)
|
||||||
|
y2 = int(person_y1 + r * 2.0)
|
||||||
|
return x1, y1, x2 - x1, y2 - y1
|
||||||
|
|
||||||
|
|
||||||
|
class YOLOPoseHeadDetector:
|
||||||
|
"""
|
||||||
|
Head detector using YOLOv8 pose estimation with PyTorch ROCm support.
|
||||||
|
|
||||||
|
Extracts head bounding boxes from COCO pose keypoints (nose, eyes, ears)
|
||||||
|
so that detection works regardless of the person's facing direction.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Default model path relative to this file
|
# Standard Ultralytics model — auto-downloaded on first use
|
||||||
DEFAULT_MODEL = "yolov8n-face-lindevs.pt"
|
DEFAULT_MODEL = os.path.join("models", "yolov8n-pose.pt")
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -31,15 +107,6 @@ class YOLOFaceDetector:
|
||||||
iou_threshold: float = 0.45,
|
iou_threshold: float = 0.45,
|
||||||
input_size: Tuple[int, int] = (640, 640),
|
input_size: Tuple[int, int] = (640, 640),
|
||||||
):
|
):
|
||||||
"""
|
|
||||||
Initialize the YOLO face detector.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_path: Path to PyTorch model file. If None, uses default model.
|
|
||||||
conf_threshold: Confidence threshold for detections
|
|
||||||
iou_threshold: IoU threshold for NMS
|
|
||||||
input_size: Model input size (width, height)
|
|
||||||
"""
|
|
||||||
self.conf_threshold = conf_threshold
|
self.conf_threshold = conf_threshold
|
||||||
self.iou_threshold = iou_threshold
|
self.iou_threshold = iou_threshold
|
||||||
self.input_size = input_size
|
self.input_size = input_size
|
||||||
|
|
@ -49,23 +116,20 @@ class YOLOFaceDetector:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def model(self):
|
def model(self):
|
||||||
"""Lazy-load YOLO model."""
|
"""Lazy-load YOLO pose model."""
|
||||||
if self._model is None:
|
if self._model is None:
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
# Determine model path
|
# Use provided path or let Ultralytics auto-download the default
|
||||||
if self._model_path is None:
|
if self._model_path is not None:
|
||||||
# Assuming models are in ../models relative to server/detector.py
|
if not os.path.exists(self._model_path):
|
||||||
models_dir = Path(__file__).parent.parent / "models"
|
raise FileNotFoundError(f"Model not found: {self._model_path}")
|
||||||
model_path = str(models_dir / self.DEFAULT_MODEL)
|
|
||||||
else:
|
|
||||||
model_path = self._model_path
|
model_path = self._model_path
|
||||||
|
else:
|
||||||
|
model_path = self.DEFAULT_MODEL
|
||||||
|
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
||||||
|
|
||||||
if not os.path.exists(model_path):
|
|
||||||
raise FileNotFoundError(f"Model not found: {model_path}")
|
|
||||||
|
|
||||||
# Detect device (ROCm GPU or CPU)
|
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
self._device = 'cuda'
|
self._device = 'cuda'
|
||||||
device_name = torch.cuda.get_device_name(0)
|
device_name = torch.cuda.get_device_name(0)
|
||||||
|
|
@ -74,25 +138,47 @@ class YOLOFaceDetector:
|
||||||
self._device = 'cpu'
|
self._device = 'cpu'
|
||||||
print("[FaceMask] Using CPU for inference (ROCm GPU not available)")
|
print("[FaceMask] Using CPU for inference (ROCm GPU not available)")
|
||||||
|
|
||||||
# Load model (let Ultralytics handle device management)
|
|
||||||
try:
|
try:
|
||||||
self._model = YOLO(model_path)
|
self._model = YOLO(model_path)
|
||||||
# Don't call .to() - let predict() handle device assignment
|
print(f"[FaceMask] Pose model loaded: {model_path}")
|
||||||
print(f"[FaceMask] Model loaded, will use device: {self._device}")
|
print(f"[FaceMask] Device: {self._device}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[FaceMask] Error loading model: {e}")
|
print(f"[FaceMask] Error loading model: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print(f"[FaceMask] YOLO model loaded: {model_path}")
|
|
||||||
print(f"[FaceMask] Device: {self._device}")
|
|
||||||
|
|
||||||
return self._model
|
return self._model
|
||||||
|
|
||||||
|
def _results_to_detections(self, result) -> List[Tuple[int, int, int, int, float]]:
|
||||||
|
"""Convert a single YOLO pose result to (x, y, w, h, conf) tuples."""
|
||||||
|
detections = []
|
||||||
|
if result.boxes is None or result.keypoints is None:
|
||||||
|
return detections
|
||||||
|
|
||||||
|
boxes = result.boxes
|
||||||
|
keypoints = result.keypoints
|
||||||
|
|
||||||
|
for i, box in enumerate(boxes):
|
||||||
|
conf = float(box.conf[0].cpu().numpy())
|
||||||
|
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
||||||
|
|
||||||
|
# Extract keypoints for this person
|
||||||
|
kp_data = keypoints.data[i].cpu().numpy() # shape (17, 3): x, y, conf
|
||||||
|
kp_xy = kp_data[:, :2]
|
||||||
|
kp_conf = kp_data[:, 2]
|
||||||
|
|
||||||
|
hx, hy, hw, hh = _head_bbox_from_pose(
|
||||||
|
kp_xy, kp_conf,
|
||||||
|
float(x1), float(y1), float(x2), float(y2),
|
||||||
|
)
|
||||||
|
detections.append((hx, hy, hw, hh, conf))
|
||||||
|
|
||||||
|
return detections
|
||||||
|
|
||||||
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
|
def detect(self, frame: np.ndarray) -> List[Tuple[int, int, int, int, float]]:
|
||||||
"""
|
"""
|
||||||
Detect faces in a frame.
|
Detect heads in a frame.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
frame: BGR image as numpy array (H, W, C)
|
frame: BGR image as numpy array (H, W, C)
|
||||||
|
|
@ -100,7 +186,6 @@ class YOLOFaceDetector:
|
||||||
Returns:
|
Returns:
|
||||||
List of detections as (x, y, width, height, confidence)
|
List of detections as (x, y, width, height, confidence)
|
||||||
"""
|
"""
|
||||||
# Run inference
|
|
||||||
import torch
|
import torch
|
||||||
print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
|
print(f"[FaceMask] Inference device: {self._device}, CUDA available: {torch.cuda.is_available()}")
|
||||||
try:
|
try:
|
||||||
|
|
@ -116,7 +201,6 @@ class YOLOFaceDetector:
|
||||||
print(f"[FaceMask] ERROR during inference: {e}")
|
print(f"[FaceMask] ERROR during inference: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
# Fallback to CPU
|
|
||||||
print("[FaceMask] Falling back to CPU inference...")
|
print("[FaceMask] Falling back to CPU inference...")
|
||||||
self._device = 'cpu'
|
self._device = 'cpu'
|
||||||
results = self.model.predict(
|
results = self.model.predict(
|
||||||
|
|
@ -128,28 +212,13 @@ class YOLOFaceDetector:
|
||||||
device='cpu',
|
device='cpu',
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract detections
|
if results:
|
||||||
detections = []
|
return self._results_to_detections(results[0])
|
||||||
if len(results) > 0 and results[0].boxes is not None:
|
return []
|
||||||
boxes = results[0].boxes
|
|
||||||
for box in boxes:
|
|
||||||
# Get coordinates in xyxy format
|
|
||||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
|
||||||
conf = float(box.conf[0].cpu().numpy())
|
|
||||||
|
|
||||||
# Convert to x, y, width, height
|
|
||||||
x = int(x1)
|
|
||||||
y = int(y1)
|
|
||||||
w = int(x2 - x1)
|
|
||||||
h = int(y2 - y1)
|
|
||||||
|
|
||||||
detections.append((x, y, w, h, conf))
|
|
||||||
|
|
||||||
return detections
|
|
||||||
|
|
||||||
def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
|
def detect_batch(self, frames: List[np.ndarray]) -> List[List[Tuple[int, int, int, int, float]]]:
|
||||||
"""
|
"""
|
||||||
Detect faces in multiple frames at once (batch processing).
|
Detect heads in multiple frames at once (batch processing).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
frames: List of BGR images as numpy arrays (H, W, C)
|
frames: List of BGR images as numpy arrays (H, W, C)
|
||||||
|
|
@ -161,7 +230,6 @@ class YOLOFaceDetector:
|
||||||
if not frames:
|
if not frames:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Run batch inference
|
|
||||||
try:
|
try:
|
||||||
results = self.model.predict(
|
results = self.model.predict(
|
||||||
frames,
|
frames,
|
||||||
|
|
@ -175,7 +243,6 @@ class YOLOFaceDetector:
|
||||||
print(f"[FaceMask] ERROR during batch inference: {e}")
|
print(f"[FaceMask] ERROR during batch inference: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
# Fallback to CPU
|
|
||||||
print("[FaceMask] Falling back to CPU inference...")
|
print("[FaceMask] Falling back to CPU inference...")
|
||||||
self._device = 'cpu'
|
self._device = 'cpu'
|
||||||
results = self.model.predict(
|
results = self.model.predict(
|
||||||
|
|
@ -187,28 +254,7 @@ class YOLOFaceDetector:
|
||||||
device='cpu',
|
device='cpu',
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract detections for each frame
|
return [self._results_to_detections(r) for r in results]
|
||||||
all_detections = []
|
|
||||||
for result in results:
|
|
||||||
detections = []
|
|
||||||
if result.boxes is not None:
|
|
||||||
boxes = result.boxes
|
|
||||||
for box in boxes:
|
|
||||||
# Get coordinates in xyxy format
|
|
||||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
|
||||||
conf = float(box.conf[0].cpu().numpy())
|
|
||||||
|
|
||||||
# Convert to x, y, width, height
|
|
||||||
x = int(x1)
|
|
||||||
y = int(y1)
|
|
||||||
w = int(x2 - x1)
|
|
||||||
h = int(y2 - y1)
|
|
||||||
|
|
||||||
detections.append((x, y, w, h, conf))
|
|
||||||
|
|
||||||
all_detections.append(detections)
|
|
||||||
|
|
||||||
return all_detections
|
|
||||||
|
|
||||||
def generate_mask(
|
def generate_mask(
|
||||||
self,
|
self,
|
||||||
|
|
@ -218,11 +264,11 @@ class YOLOFaceDetector:
|
||||||
feather_radius: int = 20,
|
feather_radius: int = 20,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Generate a mask image from face detections.
|
Generate a mask image from head detections.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
frame_shape: Shape of the original frame (height, width, channels)
|
frame_shape: Shape of the original frame (height, width, channels)
|
||||||
detections: List of face detections (x, y, w, h, conf)
|
detections: List of head detections (x, y, w, h, conf)
|
||||||
mask_scale: Scale factor for mask region
|
mask_scale: Scale factor for mask region
|
||||||
feather_radius: Radius for edge feathering
|
feather_radius: Radius for edge feathering
|
||||||
|
|
||||||
|
|
@ -235,25 +281,19 @@ class YOLOFaceDetector:
|
||||||
mask = np.zeros((height, width), dtype=np.uint8)
|
mask = np.zeros((height, width), dtype=np.uint8)
|
||||||
|
|
||||||
for (x, y, w, h, conf) in detections:
|
for (x, y, w, h, conf) in detections:
|
||||||
# Scale the bounding box
|
|
||||||
center_x = x + w // 2
|
center_x = x + w // 2
|
||||||
center_y = y + h // 2
|
center_y = y + h // 2
|
||||||
|
|
||||||
scaled_w = int(w * mask_scale)
|
scaled_w = int(w * mask_scale)
|
||||||
scaled_h = int(h * mask_scale)
|
scaled_h = int(h * mask_scale)
|
||||||
|
|
||||||
# Draw ellipse for natural face shape
|
|
||||||
cv2.ellipse(
|
cv2.ellipse(
|
||||||
mask,
|
mask,
|
||||||
(center_x, center_y),
|
(center_x, center_y),
|
||||||
(scaled_w // 2, scaled_h // 2),
|
(scaled_w // 2, scaled_h // 2),
|
||||||
0, # angle
|
0, 0, 360,
|
||||||
0, 360, # arc
|
255, -1,
|
||||||
255, # color (white)
|
|
||||||
-1, # filled
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Apply Gaussian blur for feathering
|
|
||||||
if feather_radius > 0 and len(detections) > 0:
|
if feather_radius > 0 and len(detections) > 0:
|
||||||
kernel_size = feather_radius * 2 + 1
|
kernel_size = feather_radius * 2 + 1
|
||||||
mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
|
mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
|
||||||
|
|
@ -262,12 +302,12 @@ class YOLOFaceDetector:
|
||||||
|
|
||||||
|
|
||||||
# Singleton instance
|
# Singleton instance
|
||||||
_detector: Optional[YOLOFaceDetector] = None
|
_detector: Optional[YOLOPoseHeadDetector] = None
|
||||||
|
|
||||||
|
|
||||||
def get_detector(**kwargs) -> YOLOFaceDetector:
|
def get_detector(**kwargs) -> YOLOPoseHeadDetector:
|
||||||
"""Get or create the global YOLO detector instance."""
|
"""Get or create the global YOLO pose head detector instance."""
|
||||||
global _detector
|
global _detector
|
||||||
if _detector is None:
|
if _detector is None:
|
||||||
_detector = YOLOFaceDetector(**kwargs)
|
_detector = YOLOPoseHeadDetector(**kwargs)
|
||||||
return _detector
|
return _detector
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,6 @@ class GenerateRequest(BaseModel):
|
||||||
end_frame: int
|
end_frame: int
|
||||||
conf_threshold: float = 0.5
|
conf_threshold: float = 0.5
|
||||||
iou_threshold: float = 0.45
|
iou_threshold: float = 0.45
|
||||||
mask_scale: float = 1.5
|
|
||||||
|
|
||||||
|
|
||||||
class BakeRequest(BaseModel):
|
class BakeRequest(BaseModel):
|
||||||
|
|
@ -91,6 +90,7 @@ class BakeRequest(BaseModel):
|
||||||
detections_path: str
|
detections_path: str
|
||||||
output_path: str
|
output_path: str
|
||||||
blur_size: int = 50
|
blur_size: int = 50
|
||||||
|
display_scale: float = 1.0
|
||||||
format: str = "mp4"
|
format: str = "mp4"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -305,20 +305,15 @@ def process_video_task(task_id: str, req: GenerateRequest):
|
||||||
for detections in batch_detections:
|
for detections in batch_detections:
|
||||||
packed_detections: List[List[float]] = []
|
packed_detections: List[List[float]] = []
|
||||||
for x, y, w, h, conf in detections:
|
for x, y, w, h, conf in detections:
|
||||||
scaled = _scale_bbox(
|
# bboxをそのまま保存(表示スケールはBake時に適用)
|
||||||
int(x),
|
bx, by, bw, bh = int(x), int(y), int(w), int(h)
|
||||||
int(y),
|
bx = max(0, bx)
|
||||||
int(w),
|
by = max(0, by)
|
||||||
int(h),
|
bw = min(width - bx, bw)
|
||||||
float(req.mask_scale),
|
bh = min(height - by, bh)
|
||||||
width,
|
if bw <= 0 or bh <= 0:
|
||||||
height,
|
|
||||||
)
|
|
||||||
if scaled is None:
|
|
||||||
continue
|
continue
|
||||||
packed_detections.append(
|
packed_detections.append([bx, by, bw, bh, float(conf)])
|
||||||
[scaled[0], scaled[1], scaled[2], scaled[3], float(conf)]
|
|
||||||
)
|
|
||||||
frame_detections.append(packed_detections)
|
frame_detections.append(packed_detections)
|
||||||
current_count += 1
|
current_count += 1
|
||||||
tasks[task_id].progress = current_count
|
tasks[task_id].progress = current_count
|
||||||
|
|
@ -356,7 +351,7 @@ def process_video_task(task_id: str, req: GenerateRequest):
|
||||||
"width": width,
|
"width": width,
|
||||||
"height": height,
|
"height": height,
|
||||||
"fps": fps,
|
"fps": fps,
|
||||||
"mask_scale": float(req.mask_scale),
|
"mask_scale": 1.0,
|
||||||
"frames": frame_detections,
|
"frames": frame_detections,
|
||||||
}
|
}
|
||||||
with open(output_msgpack_path, "wb") as f:
|
with open(output_msgpack_path, "wb") as f:
|
||||||
|
|
@ -435,9 +430,9 @@ def process_bake_task(task_id: str, req: BakeRequest):
|
||||||
blur_size = max(1, int(req.blur_size))
|
blur_size = max(1, int(req.blur_size))
|
||||||
if blur_size % 2 == 0:
|
if blur_size % 2 == 0:
|
||||||
blur_size += 1
|
blur_size += 1
|
||||||
feather_radius = max(3, min(25, blur_size // 3))
|
display_scale = max(0.1, float(req.display_scale))
|
||||||
feather_kernel = feather_radius * 2 + 1
|
# blur_margin は境界問題回避のための計算用余白のみ(表示には使わない)
|
||||||
blur_margin = max(1, (blur_size // 2) + feather_radius)
|
blur_margin = blur_size // 2
|
||||||
|
|
||||||
# Queues
|
# Queues
|
||||||
queue_size = 8
|
queue_size = 8
|
||||||
|
|
@ -492,11 +487,8 @@ def process_bake_task(task_id: str, req: BakeRequest):
|
||||||
process_queue.put((idx, frame))
|
process_queue.put((idx, frame))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# ROI processing (same as original)
|
# 各人物ごとに個別ROIで処理(全員まとめると離れた人物間が巨大ROIになるため)
|
||||||
min_x, min_y = src_width, src_height
|
|
||||||
max_x, max_y = 0, 0
|
|
||||||
valid_boxes = []
|
valid_boxes = []
|
||||||
|
|
||||||
for box in frame_boxes:
|
for box in frame_boxes:
|
||||||
if not isinstance(box, list) or len(box) < 4:
|
if not isinstance(box, list) or len(box) < 4:
|
||||||
continue
|
continue
|
||||||
|
|
@ -504,42 +496,45 @@ def process_bake_task(task_id: str, req: BakeRequest):
|
||||||
if w <= 0 or h <= 0:
|
if w <= 0 or h <= 0:
|
||||||
continue
|
continue
|
||||||
valid_boxes.append((x, y, w, h))
|
valid_boxes.append((x, y, w, h))
|
||||||
min_x = min(min_x, x)
|
|
||||||
min_y = min(min_y, y)
|
|
||||||
max_x = max(max_x, x + w)
|
|
||||||
max_y = max(max_y, y + h)
|
|
||||||
|
|
||||||
if not valid_boxes:
|
if not valid_boxes:
|
||||||
process_queue.put((idx, frame))
|
process_queue.put((idx, frame))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
roi_x1 = max(0, min_x - blur_margin)
|
|
||||||
roi_y1 = max(0, min_y - blur_margin)
|
|
||||||
roi_x2 = min(src_width, max_x + blur_margin)
|
|
||||||
roi_y2 = min(src_height, max_y + blur_margin)
|
|
||||||
roi_width = roi_x2 - roi_x1
|
|
||||||
roi_height = roi_y2 - roi_y1
|
|
||||||
|
|
||||||
if roi_width <= 0 or roi_height <= 0:
|
|
||||||
process_queue.put((idx, frame))
|
|
||||||
continue
|
|
||||||
|
|
||||||
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
|
|
||||||
for x, y, w, h in valid_boxes:
|
for x, y, w, h in valid_boxes:
|
||||||
center = (x + w // 2 - roi_x1, y + h // 2 - roi_y1)
|
# display_scale で表示サイズを決定
|
||||||
axes = (max(1, w // 2), max(1, h // 2))
|
cx = x + w / 2
|
||||||
|
cy = y + h / 2
|
||||||
|
dw = max(1, int(w * display_scale))
|
||||||
|
dh = max(1, int(h * display_scale))
|
||||||
|
dx = int(cx - dw / 2)
|
||||||
|
dy = int(cy - dh / 2)
|
||||||
|
|
||||||
|
# ROIは表示サイズ + blur_margin(計算用余白、境界問題回避のみ)
|
||||||
|
roi_x1 = max(0, dx - blur_margin)
|
||||||
|
roi_y1 = max(0, dy - blur_margin)
|
||||||
|
roi_x2 = min(src_width, dx + dw + blur_margin)
|
||||||
|
roi_y2 = min(src_height, dy + dh + blur_margin)
|
||||||
|
roi_width = roi_x2 - roi_x1
|
||||||
|
roi_height = roi_y2 - roi_y1
|
||||||
|
|
||||||
|
if roi_width <= 0 or roi_height <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ブラーはROI全体で計算(余白があるので端の精度が保証される)
|
||||||
|
roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
|
||||||
|
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
|
||||||
|
|
||||||
|
# 合成マスクはdisplay_scaleサイズの楕円のみ(featheringなし)
|
||||||
|
roi_mask = np.zeros((roi_height, roi_width), dtype=np.uint8)
|
||||||
|
center = (int(cx) - roi_x1, int(cy) - roi_y1)
|
||||||
|
axes = (max(1, dw // 2), max(1, dh // 2))
|
||||||
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
|
cv2.ellipse(roi_mask, center, axes, 0, 0, 360, 255, -1)
|
||||||
|
|
||||||
roi_mask = cv2.GaussianBlur(roi_mask, (feather_kernel, feather_kernel), 0)
|
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
|
||||||
roi_src = frame[roi_y1:roi_y2, roi_x1:roi_x2]
|
roi_composed = roi_src.astype(np.float32) * (1.0 - roi_alpha) + roi_blurred.astype(np.float32) * roi_alpha
|
||||||
roi_blurred = cv2.GaussianBlur(roi_src, (blur_size, blur_size), 0)
|
frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
|
||||||
|
|
||||||
roi_alpha = (roi_mask.astype(np.float32) / 255.0)[..., np.newaxis]
|
|
||||||
roi_composed = (roi_src.astype(np.float32) * (1.0 - roi_alpha)) + (
|
|
||||||
roi_blurred.astype(np.float32) * roi_alpha
|
|
||||||
)
|
|
||||||
|
|
||||||
frame[roi_y1:roi_y2, roi_x1:roi_x2] = np.clip(roi_composed, 0, 255).astype(np.uint8)
|
|
||||||
process_queue.put((idx, frame))
|
process_queue.put((idx, frame))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user