From d154217f9c7fc022572cf3117f1e90d22cb964eb Mon Sep 17 00:00:00 2001
From: Tarun Bommawar <tarunbommawar027@gmail.com>
Date: Fri, 29 May 2026 17:09:13 -0400
Subject: [PATCH] feat: add MediaPipe hand landmark support

---
 src/supervision/key_points/core.py      | 45 +++++++++++++------------
 src/supervision/key_points/skeletons.py | 23 +++++++++++++
 tests/helpers.py                        | 12 ++++++-
 tests/key_points/test_core.py           | 35 +++++++++++++++++++
 tests/key_points/test_skeletons.py      | 34 +++++++++++++++++++
 5 files changed, 126 insertions(+), 23 deletions(-)

diff --git a/src/supervision/key_points/core.py b/src/supervision/key_points/core.py
index 823173352b..c0bd7392cf 100644
--- a/src/supervision/key_points/core.py
+++ b/src/supervision/key_points/core.py
@@ -537,9 +537,10 @@ def from_mediapipe(
         pose landmark detection inference result.
 
         Args:
-            mediapipe_results: The output results from Mediapipe. It supports pose
-                and face landmarks from `PoseLandmarker`, `FaceLandmarker` and the
-                legacy ones from `Pose` and `FaceMesh`.
+            mediapipe_results: The output results from Mediapipe. It supports pose,
+                face, and hand landmarks from `PoseLandmarker`, `FaceLandmarker`,
+                `HandLandmarker`, and the legacy ones from `Pose`, `FaceMesh`, and
+                `Hands`.
             resolution_wh: A tuple of the form `(width, height)` representing the
                 resolution of the frame.
 
@@ -606,28 +607,28 @@ def from_mediapipe(
             ```
 
         """
-        if hasattr(mediapipe_results, "pose_landmarks"):
+        if getattr(mediapipe_results, "pose_landmarks", None) is not None:
             results = mediapipe_results.pose_landmarks
             if not isinstance(mediapipe_results.pose_landmarks, list):
-                if mediapipe_results.pose_landmarks is None:
-                    results = []
-                else:
-                    results = [
-                        [
-                            landmark
-                            for landmark in mediapipe_results.pose_landmarks.landmark
-                        ]
-                    ]
-        elif hasattr(mediapipe_results, "face_landmarks"):
-            results = mediapipe_results.face_landmarks
-        elif hasattr(mediapipe_results, "multi_face_landmarks"):
-            if mediapipe_results.multi_face_landmarks is None:
-                results = []
-            else:
                 results = [
-                    face_landmark.landmark
-                    for face_landmark in mediapipe_results.multi_face_landmarks
+                    [landmark for landmark in mediapipe_results.pose_landmarks.landmark]
                 ]
+        elif getattr(mediapipe_results, "face_landmarks", None) is not None:
+            results = mediapipe_results.face_landmarks
+        elif getattr(mediapipe_results, "hand_landmarks", None) is not None:
+            results = mediapipe_results.hand_landmarks
+        elif getattr(mediapipe_results, "multi_face_landmarks", None) is not None:
+            results = [
+                face_landmark.landmark
+                for face_landmark in mediapipe_results.multi_face_landmarks
+            ]
+        elif getattr(mediapipe_results, "multi_hand_landmarks", None) is not None:
+            results = [
+                hand_landmark.landmark
+                for hand_landmark in mediapipe_results.multi_hand_landmarks
+            ]
+        else:
+            results = []
 
         if len(results) == 0:
             return cls.empty()
@@ -643,7 +644,7 @@ def from_mediapipe(
                     landmark.y * resolution_wh[1],
                 ]
                 prediction_xy.append(keypoint_xy)
-                prediction_confidence.append(landmark.visibility)
+                prediction_confidence.append(getattr(landmark, "visibility", 1.0))
 
             xy.append(prediction_xy)
             confidence.append(prediction_confidence)
diff --git a/src/supervision/key_points/skeletons.py b/src/supervision/key_points/skeletons.py
index 71edfd657b..71e473710a 100644
--- a/src/supervision/key_points/skeletons.py
+++ b/src/supervision/key_points/skeletons.py
@@ -24,6 +24,29 @@ class Skeleton(Enum):
         (17, 15),
     )
 
+    HAND = (
+        (1, 2),
+        (2, 3),
+        (3, 4),
+        (4, 5),
+        (1, 6),
+        (6, 7),
+        (7, 8),
+        (8, 9),
+        (1, 10),
+        (10, 11),
+        (11, 12),
+        (12, 13),
+        (1, 14),
+        (14, 15),
+        (15, 16),
+        (16, 17),
+        (1, 18),
+        (18, 19),
+        (19, 20),
+        (20, 21),
+    )
+
     GHUM = (
         (1, 2),
         (1, 5),
diff --git a/tests/helpers.py b/tests/helpers.py
index 1ce209b39c..b6506995f2 100644
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -329,6 +329,12 @@ def __init__(self, x, y, visibility=1.0):
         self.visibility = visibility
 
 
+class _FakeMediapipeLandmarkWithoutVisibility:
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+
 class _FakeMediapipePose:
     def __init__(self, landmarks: list[_FakeMediapipeLandmark]):
         self.landmark = landmarks
@@ -341,11 +347,15 @@ def __init__(
         | _FakeMediapipePose
         | None = None,
         face_landmarks: _FakeMediapipeLandmark | None = None,
-        multi_face_landmarks: list[_FakeMediapipeLandmark] | None = None,
+        hand_landmarks: list[list[_FakeMediapipeLandmark]] | None = None,
+        multi_face_landmarks: list[_FakeMediapipePose] | None = None,
+        multi_hand_landmarks: list[_FakeMediapipePose] | None = None,
     ):
         self.pose_landmarks = pose_landmarks
         self.face_landmarks = face_landmarks
+        self.hand_landmarks = hand_landmarks
         self.multi_face_landmarks = multi_face_landmarks
+        self.multi_hand_landmarks = multi_hand_landmarks
 
 
 def create_yolo_dataset(
diff --git a/tests/key_points/test_core.py b/tests/key_points/test_core.py
index e0f559f80d..82a496c23e 100644
--- a/tests/key_points/test_core.py
+++ b/tests/key_points/test_core.py
@@ -8,6 +8,7 @@
 from tests.helpers import (
     _create_key_points,
     _FakeMediapipeLandmark,
+    _FakeMediapipeLandmarkWithoutVisibility,
     _FakeMediapipePose,
     _FakeMediapipeResults,
     _FakeYoloNasKeyPoint,
@@ -758,6 +759,40 @@ def test_from_yolo_nas_input(yolo_nas_results, expected_key_points):
                 class_id=None,
             ),
         ),
+        (
+            _FakeMediapipeResults(
+                hand_landmarks=[
+                    [
+                        _FakeMediapipeLandmarkWithoutVisibility(0.1, 0.2),
+                        _FakeMediapipeLandmarkWithoutVisibility(0.3, 0.4),
+                    ]
+                ]
+            ),
+            (100, 200),
+            _create_key_points(
+                xy=[[[10.0, 40.0], [30.0, 80.0]]],
+                confidence=[[1.0, 1.0]],
+                class_id=None,
+            ),
+        ),
+        (
+            _FakeMediapipeResults(
+                multi_hand_landmarks=[
+                    _FakeMediapipePose(
+                        landmarks=[
+                            _FakeMediapipeLandmarkWithoutVisibility(0.1, 0.2),
+                            _FakeMediapipeLandmarkWithoutVisibility(0.3, 0.4),
+                        ]
+                    )
+                ]
+            ),
+            (100, 200),
+            _create_key_points(
+                xy=[[[10.0, 40.0], [30.0, 80.0]]],
+                confidence=[[1.0, 1.0]],
+                class_id=None,
+            ),
+        ),
     ],
 )
 def test_from_mediapipe_input(mediapipe_results, resolution_wh, expected_key_points):
diff --git a/tests/key_points/test_skeletons.py b/tests/key_points/test_skeletons.py
index e97562888f..ff9e16bb5b 100644
--- a/tests/key_points/test_skeletons.py
+++ b/tests/key_points/test_skeletons.py
@@ -58,3 +58,37 @@ def test_skeletons_by_vertex_count_mapping_behaviour(self):
         # For each vertex count, the stored skeleton should be the last one encountered
         for vertex_count, skeleton_value in expected_mapping.items():
             assert SKELETONS_BY_VERTEX_COUNT[vertex_count] == skeleton_value
+
+    def test_hand_skeleton_definition(self):
+        """Test MediaPipe hand skeleton definition."""
+        hand_skeleton = Skeleton.HAND.value
+
+        assert len(hand_skeleton) == 20
+        assert len({vertex for edge in hand_skeleton for vertex in edge}) == 21
+        assert SKELETONS_BY_VERTEX_COUNT[21] == hand_skeleton
+        assert SKELETONS_BY_EDGE_COUNT[20] == hand_skeleton
+
+    def test_hand_skeleton_edges(self):
+        """Test MediaPipe hand skeleton follows expected finger connections."""
+        assert Skeleton.HAND.value == (
+            (1, 2),
+            (2, 3),
+            (3, 4),
+            (4, 5),
+            (1, 6),
+            (6, 7),
+            (7, 8),
+            (8, 9),
+            (1, 10),
+            (10, 11),
+            (11, 12),
+            (12, 13),
+            (1, 14),
+            (14, 15),
+            (15, 16),
+            (16, 17),
+            (1, 18),
+            (18, 19),
+            (19, 20),
+            (20, 21),
+        )