Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 23 additions & 22 deletions src/supervision/key_points/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,9 +537,10 @@ def from_mediapipe(
pose landmark detection inference result.

Args:
mediapipe_results: The output results from Mediapipe. It supports pose
and face landmarks from `PoseLandmarker`, `FaceLandmarker` and the
legacy ones from `Pose` and `FaceMesh`.
mediapipe_results: The output results from Mediapipe. It supports pose,
face, and hand landmarks from `PoseLandmarker`, `FaceLandmarker`,
`HandLandmarker`, and the legacy ones from `Pose`, `FaceMesh`, and
`Hands`.
resolution_wh: A tuple of the form `(width, height)` representing the
resolution of the frame.

Expand Down Expand Up @@ -606,28 +607,28 @@ def from_mediapipe(
```

"""
if hasattr(mediapipe_results, "pose_landmarks"):
if getattr(mediapipe_results, "pose_landmarks", None) is not None:
results = mediapipe_results.pose_landmarks
if not isinstance(mediapipe_results.pose_landmarks, list):
if mediapipe_results.pose_landmarks is None:
results = []
else:
results = [
[
landmark
for landmark in mediapipe_results.pose_landmarks.landmark
]
]
elif hasattr(mediapipe_results, "face_landmarks"):
results = mediapipe_results.face_landmarks
elif hasattr(mediapipe_results, "multi_face_landmarks"):
if mediapipe_results.multi_face_landmarks is None:
results = []
else:
results = [
face_landmark.landmark
for face_landmark in mediapipe_results.multi_face_landmarks
[landmark for landmark in mediapipe_results.pose_landmarks.landmark]
]
elif getattr(mediapipe_results, "face_landmarks", None) is not None:
results = mediapipe_results.face_landmarks
elif getattr(mediapipe_results, "hand_landmarks", None) is not None:
results = mediapipe_results.hand_landmarks
elif getattr(mediapipe_results, "multi_face_landmarks", None) is not None:
results = [
face_landmark.landmark
for face_landmark in mediapipe_results.multi_face_landmarks
]
elif getattr(mediapipe_results, "multi_hand_landmarks", None) is not None:
results = [
hand_landmark.landmark
for hand_landmark in mediapipe_results.multi_hand_landmarks
]
else:
results = []

if len(results) == 0:
return cls.empty()
Expand All @@ -643,7 +644,7 @@ def from_mediapipe(
landmark.y * resolution_wh[1],
]
prediction_xy.append(keypoint_xy)
prediction_confidence.append(landmark.visibility)
prediction_confidence.append(getattr(landmark, "visibility", 1.0))

xy.append(prediction_xy)
confidence.append(prediction_confidence)
Expand Down
23 changes: 23 additions & 0 deletions src/supervision/key_points/skeletons.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,29 @@ class Skeleton(Enum):
(17, 15),
)

HAND = (
(1, 2),
(2, 3),
(3, 4),
(4, 5),
(1, 6),
(6, 7),
(7, 8),
(8, 9),
(1, 10),
(10, 11),
(11, 12),
(12, 13),
(1, 14),
(14, 15),
(15, 16),
(16, 17),
(1, 18),
(18, 19),
(19, 20),
(20, 21),
)

GHUM = (
(1, 2),
(1, 5),
Expand Down
12 changes: 11 additions & 1 deletion tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ def __init__(self, x, y, visibility=1.0):
self.visibility = visibility


class _FakeMediapipeLandmarkWithoutVisibility:
def __init__(self, x, y):
self.x = x
self.y = y


class _FakeMediapipePose:
def __init__(self, landmarks: list[_FakeMediapipeLandmark]):
self.landmark = landmarks
Expand All @@ -341,11 +347,15 @@ def __init__(
| _FakeMediapipePose
| None = None,
face_landmarks: _FakeMediapipeLandmark | None = None,
multi_face_landmarks: list[_FakeMediapipeLandmark] | None = None,
hand_landmarks: list[list[_FakeMediapipeLandmark]] | None = None,
multi_face_landmarks: list[_FakeMediapipePose] | None = None,
multi_hand_landmarks: list[_FakeMediapipePose] | None = None,
):
self.pose_landmarks = pose_landmarks
self.face_landmarks = face_landmarks
self.hand_landmarks = hand_landmarks
self.multi_face_landmarks = multi_face_landmarks
self.multi_hand_landmarks = multi_hand_landmarks


def create_yolo_dataset(
Expand Down
35 changes: 35 additions & 0 deletions tests/key_points/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tests.helpers import (
_create_key_points,
_FakeMediapipeLandmark,
_FakeMediapipeLandmarkWithoutVisibility,
_FakeMediapipePose,
_FakeMediapipeResults,
_FakeYoloNasKeyPoint,
Expand Down Expand Up @@ -758,6 +759,40 @@ def test_from_yolo_nas_input(yolo_nas_results, expected_key_points):
class_id=None,
),
),
(
_FakeMediapipeResults(
hand_landmarks=[
[
_FakeMediapipeLandmarkWithoutVisibility(0.1, 0.2),
_FakeMediapipeLandmarkWithoutVisibility(0.3, 0.4),
]
]
),
(100, 200),
_create_key_points(
xy=[[[10.0, 40.0], [30.0, 80.0]]],
confidence=[[1.0, 1.0]],
class_id=None,
),
),
(
_FakeMediapipeResults(
multi_hand_landmarks=[
_FakeMediapipePose(
landmarks=[
_FakeMediapipeLandmarkWithoutVisibility(0.1, 0.2),
_FakeMediapipeLandmarkWithoutVisibility(0.3, 0.4),
]
)
]
),
(100, 200),
_create_key_points(
xy=[[[10.0, 40.0], [30.0, 80.0]]],
confidence=[[1.0, 1.0]],
class_id=None,
),
),
],
)
def test_from_mediapipe_input(mediapipe_results, resolution_wh, expected_key_points):
Expand Down
34 changes: 34 additions & 0 deletions tests/key_points/test_skeletons.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,37 @@ def test_skeletons_by_vertex_count_mapping_behaviour(self):
# For each vertex count, the stored skeleton should be the last one encountered
for vertex_count, skeleton_value in expected_mapping.items():
assert SKELETONS_BY_VERTEX_COUNT[vertex_count] == skeleton_value

def test_hand_skeleton_definition(self):
"""Test MediaPipe hand skeleton definition."""
hand_skeleton = Skeleton.HAND.value

assert len(hand_skeleton) == 20
assert len({vertex for edge in hand_skeleton for vertex in edge}) == 21
assert SKELETONS_BY_VERTEX_COUNT[21] == hand_skeleton
assert SKELETONS_BY_EDGE_COUNT[20] == hand_skeleton

def test_hand_skeleton_edges(self):
"""Test MediaPipe hand skeleton follows expected finger connections."""
assert Skeleton.HAND.value == (
(1, 2),
(2, 3),
(3, 4),
(4, 5),
(1, 6),
(6, 7),
(7, 8),
(8, 9),
(1, 10),
(10, 11),
(11, 12),
(12, 13),
(1, 14),
(14, 15),
(15, 16),
(16, 17),
(1, 18),
(18, 19),
(19, 20),
(20, 21),
)
Loading