isaac-sim · lgulich · Mar 20, 2026 · Mar 20, 2026
diff --git a/docker/setup/download_wbc_models.sh b/docker/setup/download_wbc_models.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+set -euo pipefail
+
+# Script to download external WBC policy models.
+# This script is called from the Dockerfile or can be run manually.
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
+MODELS_DIR="${REPO_ROOT}/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/models"
+
+# --- WBC-AGILE e2e velocity policy for G1 ---
+AGILE_MODEL_DIR="${MODELS_DIR}/agile"
+AGILE_MODEL_PATH="${AGILE_MODEL_DIR}/unitree_g1_velocity_e2e.onnx"
+AGILE_MODEL_URL="https://github.com/nvidia-isaac/WBC-AGILE/raw/main/agile/data/policy/velocity_g1/unitree_g1_velocity_e2e.onnx"
+AGILE_MODEL_SHA256="8995f2462ba2d0d83afe08905148f6373990d50018610663a539225d268ef33b"
+
+download_model() {
+    local url="$1"
+    local dest="$2"
+    local expected_sha256="$3"
+
+    if [ -f "$dest" ]; then
+        echo "Model already exists: $dest"
+        return 0
+    fi
+
+    mkdir -p "$(dirname "$dest")"
+    echo "Downloading $(basename "$dest") from ${url} ..."
+    curl -L -o "$dest" "$url"
+
+    actual_sha256=$(sha256sum "$dest" | awk '{print $1}')
+    if [ "$actual_sha256" != "$expected_sha256" ]; then
+        echo "ERROR: SHA256 mismatch for $dest"
+        echo "  expected: $expected_sha256"
+        echo "  actual:   $actual_sha256"
+        rm -f "$dest"
+        return 1
+    fi
+
+    echo "Downloaded and verified: $dest"
+}
+
+download_model "$AGILE_MODEL_URL" "$AGILE_MODEL_PATH" "$AGILE_MODEL_SHA256"
+
+echo "All WBC models downloaded successfully."
diff --git a/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/config/configs.py b/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/config/configs.py
@@ -71,3 +71,22 @@ class HomieV2Config(BaseConfig):
 
     policy_config_path: str = "config/g1_homie_v2.yaml"
     """Policy related configuration to specify inputs/outputs dim"""
+
+
+@dataclass
+class AgileConfig(BaseConfig):
+    """Config for the WBC-AGILE end-to-end velocity policy for G1."""
+
+    # WBC Configuration
+    wbc_version: Literal["agile"] = "agile"
+    """Version of the whole body controller."""
+
+    wbc_model_path: str = "models/agile/unitree_g1_velocity_e2e.onnx"
+    """Path to WBC model file (relative to wbc_policy directory)"""
+
+    # Robot Configuration
+    enable_waist: bool = False
+    """Whether to include waist joints in IK."""
+
+    policy_config_path: str = "config/g1_agile.yaml"
+    """Policy related configuration to specify inputs/outputs dim"""
diff --git a/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/config/g1_agile.yaml b/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/config/g1_agile.yaml
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Joint ordering for ONNX model inputs (29 body joints in agile order).
+# Must match the element_names from unitree_g1_velocity_e2e.yaml.
+onnx_input_joint_names:
+  - left_hip_pitch_joint
+  - right_hip_pitch_joint
+  - waist_yaw_joint
+  - left_hip_roll_joint
+  - right_hip_roll_joint
+  - waist_roll_joint
+  - left_hip_yaw_joint
+  - right_hip_yaw_joint
+  - waist_pitch_joint
+  - left_knee_joint
+  - right_knee_joint
+  - left_shoulder_pitch_joint
+  - right_shoulder_pitch_joint
+  - left_ankle_pitch_joint
+  - right_ankle_pitch_joint
+  - left_shoulder_roll_joint
+  - right_shoulder_roll_joint
+  - left_ankle_roll_joint
+  - right_ankle_roll_joint
+  - left_shoulder_yaw_joint
+  - right_shoulder_yaw_joint
+  - left_elbow_joint
+  - right_elbow_joint
+  - left_wrist_roll_joint
+  - right_wrist_roll_joint
+  - left_wrist_pitch_joint
+  - right_wrist_pitch_joint
+  - left_wrist_yaw_joint
+  - right_wrist_yaw_joint
+
+# Joint ordering for ONNX model outputs (14 controlled joints in agile output order).
+# Must match the action_joint_pos element_names from unitree_g1_velocity_e2e.yaml.
+controlled_joint_names:
+  - left_hip_pitch_joint
+  - right_hip_pitch_joint
+  - left_hip_roll_joint
+  - right_hip_roll_joint
+  - waist_roll_joint
+  - left_hip_yaw_joint
+  - right_hip_yaw_joint
+  - waist_pitch_joint
+  - left_knee_joint
+  - right_knee_joint
+  - left_ankle_pitch_joint
+  - right_ankle_pitch_joint
+  - left_ankle_roll_joint
+  - right_ankle_roll_joint
+
+num_actions: 14
+num_body_joints: 29
+
+# Initial commands
+cmd_init: [0.0, 0.0, 0.0]
diff --git a/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/policy/g1_agile_policy.py b/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/policy/g1_agile_policy.py
@@ -0,0 +1,228 @@
+# Copyright (c) 2025, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pathlib
+import torch
+from typing import Any
+
+import onnxruntime as ort
+
+from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.base import WBCPolicy
+from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.utils.homie_utils import load_config
+
+
+class G1AgilePolicy(WBCPolicy):
+    """G1 robot policy using the WBC-AGILE end-to-end neural network.
+
+    This policy uses a single ONNX model that takes raw sensor inputs and
+    manages observation history internally via feedback connections. The model
+    outputs target joint positions along with per-joint Kp/Kd gains for 14
+    controlled joints (legs + waist_roll + waist_pitch).
+    """
+
+    # Names of ONNX state inputs that receive feedback from the previous step's outputs.
+    _STATE_KEYS = [
+        "last_actions",
+        "base_ang_vel_history",
+        "projected_gravity_history",
+        "velocity_commands_history",
+        "controlled_joint_pos_history",
+        "controlled_joint_vel_history",
+        "actions_history",
+    ]
+
+    def __init__(self, robot_model, config_path: str, model_path: str, num_envs: int = 1):
+        """Initialize G1AgilePolicy.
+
+        Args:
+            robot_model: Robot model containing joint ordering info.
+            config_path: Path to policy YAML configuration file (relative to wbc_policy dir).
+            model_path: Path to the ONNX model file (relative to wbc_policy dir).
+            num_envs: Number of environments.
+        """
+        self.parent_dir = pathlib.Path(__file__).parent.parent
+        self.config = load_config(str(self.parent_dir / config_path))
+        self.robot_model = robot_model
+        self.num_envs = num_envs
+
+        # Load ONNX model (must be downloaded beforehand via docker/setup/download_wbc_models.sh)
+        model_full_path = self.parent_dir / model_path
+        if not model_full_path.exists():
+            raise FileNotFoundError(
+                f"AGILE ONNX model not found at {model_full_path}. "
+                "Run docker/setup/download_wbc_models.sh to download it."
+            )
+        self.session = ort.InferenceSession(str(model_full_path))
+        self.output_names = [out.name for out in self.session.get_outputs()]
+        print(f"Successfully loaded ONNX policy from {model_full_path}")
+
+        # Build joint index mappings between WBC order and agile ONNX order
+        self._build_joint_mappings()
+
+        # Initialize state
+        self._init_state()
+
+    def _build_joint_mappings(self):
+        """Build index mappings between WBC joint order and agile ONNX joint order."""
+        wbc_order = self.robot_model.wbc_g1_joints_order  # {joint_name: wbc_index}
+
+        # Mapping for ONNX input: indices into the WBC-ordered observation to select
+        # the 29 body joints in the order the ONNX model expects.
+        onnx_input_names = self.config["onnx_input_joint_names"]
+        self.wbc_to_agile_input = [wbc_order[name] for name in onnx_input_names]
+
+        # Mapping for ONNX output: for each of the 14 agile output joints, the
+        # position in the 15-element lower_body array to write to.
+        controlled_names = self.config["controlled_joint_names"]
+        lower_body_indices = self.robot_model.get_joint_group_indices("lower_body")
+        self.agile_output_to_lower_body = []
+        for name in controlled_names:
+            wbc_idx = wbc_order[name]
+            lb_pos = lower_body_indices.index(wbc_idx)
+            self.agile_output_to_lower_body.append(lb_pos)
+
+        self.num_lower_body = len(lower_body_indices)
+
+    def _init_state(self):
+        """Initialize all per-environment state variables."""
+        self.observation = None
+        self.use_policy_action = True
+        self.cmd = np.tile(self.config["cmd_init"], (self.num_envs, 1))
+
+        # Per-environment ONNX feedback state. Each entry is shaped for batch=1
+        # as the ONNX model expects, matching the input tensor shapes from the YAML.
+        self.states = [self._make_zero_state() for _ in range(self.num_envs)]
+
+    def _make_zero_state(self) -> dict[str, np.ndarray]:
+        """Create a zeroed feedback state dict for one environment."""
+        return {
+            "last_actions": np.zeros((1, 14), dtype=np.float32),
+            "base_ang_vel_history": np.zeros((1, 5, 3), dtype=np.float32),
+            "projected_gravity_history": np.zeros((1, 5, 3), dtype=np.float32),
+            "velocity_commands_history": np.zeros((1, 5, 3), dtype=np.float32),
+            "controlled_joint_pos_history": np.zeros((1, 5, 14), dtype=np.float32),
+            "controlled_joint_vel_history": np.zeros((1, 5, 14), dtype=np.float32),
+            "actions_history": np.zeros((1, 5, 14), dtype=np.float32),
+        }
+
+    def reset(self, env_ids: torch.Tensor):
+        """Reset the policy state for the given environment ids.
+
+        Args:
+            env_ids: The environment ids to reset.
+        """
+        for env_id in env_ids:
+            idx = int(env_id)
+            self.states[idx] = self._make_zero_state()
+        self.cmd = np.tile(self.config["cmd_init"], (self.num_envs, 1))
+
+    def set_observation(self, observation: dict[str, Any]):
+        """Store the current observation for the next get_action call.
+
+        Args:
+            observation: Dictionary containing robot state from prepare_observations().
+        """
+        self.observation = observation
+
+    def set_goal(self, goal: dict[str, Any]):
+        """Set the goal for the policy.
+
+        Args:
+            goal: Dictionary containing goals. Supported keys:
+                - "navigate_cmd": velocity command array of shape (num_envs, 3)
+                - "toggle_policy_action": bool to toggle policy action on/off
+        """
+        if "toggle_policy_action" in goal:
+            if goal["toggle_policy_action"]:
+                self.use_policy_action = not self.use_policy_action
+
+        if "navigate_cmd" in goal:
+            self.cmd = goal["navigate_cmd"]
+
+    def get_action(self) -> dict[str, Any]:
+        """Compute and return the next action based on current observation.
+
+        Returns:
+            Dictionary with "body_action" key containing joint position targets
+            of shape (num_envs, num_lower_body_joints).
+        """
+        if self.observation is None:
+            raise ValueError("No observation set. Call set_observation() first.")
+
+        obs = self.observation
+        body_action = np.zeros((self.num_envs, self.num_lower_body), dtype=np.float32)
+
+        for env_idx in range(self.num_envs):
+            # Build ONNX inputs for this environment
+            ort_inputs = self._build_onnx_inputs(obs, env_idx)
+
+            # Run inference
+            outputs = self.session.run(self.output_names, ort_inputs)
+            result = dict(zip(self.output_names, outputs))
+
+            # Extract action joint positions (shape [1, 14])
+            action_joint_pos = result["action_joint_pos"]
+
+            # Update feedback state for next step
+            state = self.states[env_idx]
+            state["last_actions"] = result["last_actions_out"]
+            state["base_ang_vel_history"] = result["base_ang_vel_history_out"]
+            state["projected_gravity_history"] = result["projected_gravity_history_out"]
+            state["velocity_commands_history"] = result["velocity_commands_history_out"]
+            state["controlled_joint_pos_history"] = result["controlled_joint_pos_history_out"]
+            state["controlled_joint_vel_history"] = result["controlled_joint_vel_history_out"]
+            state["actions_history"] = result["actions_history_out"]
+
+            # Map 14 agile output joints to the 15-joint lower_body array.
+            # waist_yaw (not controlled by agile) stays at 0.0.
+            assert self.use_policy_action
+            if self.use_policy_action:
+                for agile_idx, lb_idx in enumerate(self.agile_output_to_lower_body):
+                    body_action[env_idx, lb_idx] = action_joint_pos[0, agile_idx]
+            else:
+                body_action[env_idx] = obs["q"][env_idx, : self.num_lower_body]
+
+        return {"body_action": body_action}
+
+    def _build_onnx_inputs(self, obs: dict[str, Any], env_idx: int) -> dict[str, np.ndarray]:
+        """Build the ONNX input dict for a single environment.
+
+        Args:
+            obs: Observation dictionary from prepare_observations().
+            env_idx: Environment index.
+
+        Returns:
+            Dictionary mapping ONNX input names to numpy arrays.
+        """
+        # Quaternion (w, x, y, z) from floating base pose
+        root_link_quat_w = obs["floating_base_pose"][env_idx : env_idx + 1, 3:7].astype(np.float32)
+
+        # Angular velocity in body frame
+        root_ang_vel_b = obs["floating_base_vel"][env_idx : env_idx + 1, 3:6].astype(np.float32)
+
+        # Velocity commands
+        velocity_commands = self.cmd[env_idx : env_idx + 1].astype(np.float32)
+
+        # Joint positions and velocities: select 29 body joints and reorder to agile order
+        joint_pos = obs["q"][env_idx : env_idx + 1, self.wbc_to_agile_input].astype(np.float32)
+        joint_vel = obs["dq"][env_idx : env_idx + 1, self.wbc_to_agile_input].astype(np.float32)
+
+        state = self.states[env_idx]
+
+        return {
+            "root_link_quat_w": root_link_quat_w,
+            "root_ang_vel_b": root_ang_vel_b,
+            "velocity_commands": velocity_commands,
+            "joint_pos": joint_pos,
+            "joint_vel": joint_vel,
+            "last_actions": state["last_actions"],
+            "base_ang_vel_history": state["base_ang_vel_history"],
+            "projected_gravity_history": state["projected_gravity_history"],
+            "velocity_commands_history": state["velocity_commands_history"],
+            "controlled_joint_pos_history": state["controlled_joint_pos_history"],
+            "controlled_joint_vel_history": state["controlled_joint_vel_history"],
+            "actions_history": state["actions_history"],
+        }
diff --git a/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/policy/wbc_policy_factory.py b/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/policy/wbc_policy_factory.py
@@ -9,6 +9,7 @@
 from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.g1_decoupled_whole_body_policy import (
     G1DecoupledWholeBodyPolicy,
 )
+from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.g1_agile_policy import G1AgilePolicy
 from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.g1_homie_policy import G1HomiePolicyV2
 from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.identity_policy import IdentityPolicy
 
@@ -39,9 +40,16 @@ def get_wbc_policy(robot_type: str, robot_model: RobotModel, wbc_config: BaseCon
                 model_path=wbc_config.wbc_model_path,
                 num_envs=num_envs,
             )
+        elif lower_body_policy_type == "agile":
+            lower_body_policy = G1AgilePolicy(
+                robot_model=robot_model,
+                config_path=wbc_config.policy_config_path,
+                model_path=wbc_config.wbc_model_path,
+                num_envs=num_envs,
+            )
         else:
             raise ValueError(
-                f"Invalid lower body policy type: {lower_body_policy_type}, Supported lower body policy types: homie_v2"
+                f"Invalid lower body policy type: {lower_body_policy_type}, Supported lower body policy types: homie_v2, agile"
             )
 
         wbc_policy = G1DecoupledWholeBodyPolicy(