Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions docker/setup/download_wbc_models.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash
set -euo pipefail

# Script to download external WBC policy models.
# This script is called from the Dockerfile or can be run manually.

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
MODELS_DIR="${REPO_ROOT}/isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/models"

# --- WBC-AGILE e2e velocity policy for G1 ---
AGILE_MODEL_DIR="${MODELS_DIR}/agile"
AGILE_MODEL_PATH="${AGILE_MODEL_DIR}/unitree_g1_velocity_e2e.onnx"
AGILE_MODEL_URL="https://github.com/nvidia-isaac/WBC-AGILE/raw/main/agile/data/policy/velocity_g1/unitree_g1_velocity_e2e.onnx"
AGILE_MODEL_SHA256="8995f2462ba2d0d83afe08905148f6373990d50018610663a539225d268ef33b"

download_model() {
local url="$1"
local dest="$2"
local expected_sha256="$3"

if [ -f "$dest" ]; then
echo "Model already exists: $dest"
return 0
fi

mkdir -p "$(dirname "$dest")"
echo "Downloading $(basename "$dest") from ${url} ..."
curl -L -o "$dest" "$url"

actual_sha256=$(sha256sum "$dest" | awk '{print $1}')
if [ "$actual_sha256" != "$expected_sha256" ]; then
echo "ERROR: SHA256 mismatch for $dest"
echo " expected: $expected_sha256"
echo " actual: $actual_sha256"
rm -f "$dest"
return 1
fi

echo "Downloaded and verified: $dest"
}

download_model "$AGILE_MODEL_URL" "$AGILE_MODEL_PATH" "$AGILE_MODEL_SHA256"

echo "All WBC models downloaded successfully."
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,22 @@ class HomieV2Config(BaseConfig):

policy_config_path: str = "config/g1_homie_v2.yaml"
"""Policy related configuration to specify inputs/outputs dim"""


@dataclass
class AgileConfig(BaseConfig):
"""Config for the WBC-AGILE end-to-end velocity policy for G1."""

# WBC Configuration
wbc_version: Literal["agile"] = "agile"
"""Version of the whole body controller."""

wbc_model_path: str = "models/agile/unitree_g1_velocity_e2e.onnx"
"""Path to WBC model file (relative to wbc_policy directory)"""

# Robot Configuration
enable_waist: bool = False
"""Whether to include waist joints in IK."""

policy_config_path: str = "config/g1_agile.yaml"
"""Policy related configuration to specify inputs/outputs dim"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2025, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

# Joint ordering for ONNX model inputs (29 body joints in agile order).
# Must match the element_names from unitree_g1_velocity_e2e.yaml.
onnx_input_joint_names:
- left_hip_pitch_joint
- right_hip_pitch_joint
- waist_yaw_joint
- left_hip_roll_joint
- right_hip_roll_joint
- waist_roll_joint
- left_hip_yaw_joint
- right_hip_yaw_joint
- waist_pitch_joint
- left_knee_joint
- right_knee_joint
- left_shoulder_pitch_joint
- right_shoulder_pitch_joint
- left_ankle_pitch_joint
- right_ankle_pitch_joint
- left_shoulder_roll_joint
- right_shoulder_roll_joint
- left_ankle_roll_joint
- right_ankle_roll_joint
- left_shoulder_yaw_joint
- right_shoulder_yaw_joint
- left_elbow_joint
- right_elbow_joint
- left_wrist_roll_joint
- right_wrist_roll_joint
- left_wrist_pitch_joint
- right_wrist_pitch_joint
- left_wrist_yaw_joint
- right_wrist_yaw_joint

# Joint ordering for ONNX model outputs (14 controlled joints in agile output order).
# Must match the action_joint_pos element_names from unitree_g1_velocity_e2e.yaml.
controlled_joint_names:
- left_hip_pitch_joint
- right_hip_pitch_joint
- left_hip_roll_joint
- right_hip_roll_joint
- waist_roll_joint
- left_hip_yaw_joint
- right_hip_yaw_joint
- waist_pitch_joint
- left_knee_joint
- right_knee_joint
- left_ankle_pitch_joint
- right_ankle_pitch_joint
- left_ankle_roll_joint
- right_ankle_roll_joint

num_actions: 14
num_body_joints: 29

# Initial commands
cmd_init: [0.0, 0.0, 0.0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# Copyright (c) 2025, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import pathlib
import torch
from typing import Any

import onnxruntime as ort

from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.base import WBCPolicy
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.utils.homie_utils import load_config


class G1AgilePolicy(WBCPolicy):
"""G1 robot policy using the WBC-AGILE end-to-end neural network.

This policy uses a single ONNX model that takes raw sensor inputs and
manages observation history internally via feedback connections. The model
outputs target joint positions along with per-joint Kp/Kd gains for 14
controlled joints (legs + waist_roll + waist_pitch).
"""

# Names of ONNX state inputs that receive feedback from the previous step's outputs.
_STATE_KEYS = [
"last_actions",
"base_ang_vel_history",
"projected_gravity_history",
"velocity_commands_history",
"controlled_joint_pos_history",
"controlled_joint_vel_history",
"actions_history",
]

def __init__(self, robot_model, config_path: str, model_path: str, num_envs: int = 1):
"""Initialize G1AgilePolicy.

Args:
robot_model: Robot model containing joint ordering info.
config_path: Path to policy YAML configuration file (relative to wbc_policy dir).
model_path: Path to the ONNX model file (relative to wbc_policy dir).
num_envs: Number of environments.
"""
self.parent_dir = pathlib.Path(__file__).parent.parent
self.config = load_config(str(self.parent_dir / config_path))
self.robot_model = robot_model
self.num_envs = num_envs

# Load ONNX model (must be downloaded beforehand via docker/setup/download_wbc_models.sh)
model_full_path = self.parent_dir / model_path
if not model_full_path.exists():
raise FileNotFoundError(
f"AGILE ONNX model not found at {model_full_path}. "
"Run docker/setup/download_wbc_models.sh to download it."
)
self.session = ort.InferenceSession(str(model_full_path))
self.output_names = [out.name for out in self.session.get_outputs()]
print(f"Successfully loaded ONNX policy from {model_full_path}")

# Build joint index mappings between WBC order and agile ONNX order
self._build_joint_mappings()

# Initialize state
self._init_state()

def _build_joint_mappings(self):
"""Build index mappings between WBC joint order and agile ONNX joint order."""
wbc_order = self.robot_model.wbc_g1_joints_order # {joint_name: wbc_index}

# Mapping for ONNX input: indices into the WBC-ordered observation to select
# the 29 body joints in the order the ONNX model expects.
onnx_input_names = self.config["onnx_input_joint_names"]
self.wbc_to_agile_input = [wbc_order[name] for name in onnx_input_names]

# Mapping for ONNX output: for each of the 14 agile output joints, the
# position in the 15-element lower_body array to write to.
controlled_names = self.config["controlled_joint_names"]
lower_body_indices = self.robot_model.get_joint_group_indices("lower_body")
self.agile_output_to_lower_body = []
for name in controlled_names:
wbc_idx = wbc_order[name]
lb_pos = lower_body_indices.index(wbc_idx)
self.agile_output_to_lower_body.append(lb_pos)

self.num_lower_body = len(lower_body_indices)

def _init_state(self):
"""Initialize all per-environment state variables."""
self.observation = None
self.use_policy_action = True
self.cmd = np.tile(self.config["cmd_init"], (self.num_envs, 1))

# Per-environment ONNX feedback state. Each entry is shaped for batch=1
# as the ONNX model expects, matching the input tensor shapes from the YAML.
self.states = [self._make_zero_state() for _ in range(self.num_envs)]

def _make_zero_state(self) -> dict[str, np.ndarray]:
"""Create a zeroed feedback state dict for one environment."""
return {
"last_actions": np.zeros((1, 14), dtype=np.float32),
"base_ang_vel_history": np.zeros((1, 5, 3), dtype=np.float32),
"projected_gravity_history": np.zeros((1, 5, 3), dtype=np.float32),
"velocity_commands_history": np.zeros((1, 5, 3), dtype=np.float32),
"controlled_joint_pos_history": np.zeros((1, 5, 14), dtype=np.float32),
"controlled_joint_vel_history": np.zeros((1, 5, 14), dtype=np.float32),
"actions_history": np.zeros((1, 5, 14), dtype=np.float32),
}

def reset(self, env_ids: torch.Tensor):
"""Reset the policy state for the given environment ids.

Args:
env_ids: The environment ids to reset.
"""
for env_id in env_ids:
idx = int(env_id)
self.states[idx] = self._make_zero_state()
self.cmd = np.tile(self.config["cmd_init"], (self.num_envs, 1))

def set_observation(self, observation: dict[str, Any]):
"""Store the current observation for the next get_action call.

Args:
observation: Dictionary containing robot state from prepare_observations().
"""
self.observation = observation

def set_goal(self, goal: dict[str, Any]):
"""Set the goal for the policy.

Args:
goal: Dictionary containing goals. Supported keys:
- "navigate_cmd": velocity command array of shape (num_envs, 3)
- "toggle_policy_action": bool to toggle policy action on/off
"""
if "toggle_policy_action" in goal:
if goal["toggle_policy_action"]:
self.use_policy_action = not self.use_policy_action

if "navigate_cmd" in goal:
self.cmd = goal["navigate_cmd"]

def get_action(self) -> dict[str, Any]:
"""Compute and return the next action based on current observation.

Returns:
Dictionary with "body_action" key containing joint position targets
of shape (num_envs, num_lower_body_joints).
"""
if self.observation is None:
raise ValueError("No observation set. Call set_observation() first.")

obs = self.observation
body_action = np.zeros((self.num_envs, self.num_lower_body), dtype=np.float32)

for env_idx in range(self.num_envs):
# Build ONNX inputs for this environment
ort_inputs = self._build_onnx_inputs(obs, env_idx)

# Run inference
outputs = self.session.run(self.output_names, ort_inputs)
result = dict(zip(self.output_names, outputs))

# Extract action joint positions (shape [1, 14])
action_joint_pos = result["action_joint_pos"]

# Update feedback state for next step
state = self.states[env_idx]
state["last_actions"] = result["last_actions_out"]
state["base_ang_vel_history"] = result["base_ang_vel_history_out"]
state["projected_gravity_history"] = result["projected_gravity_history_out"]
state["velocity_commands_history"] = result["velocity_commands_history_out"]
state["controlled_joint_pos_history"] = result["controlled_joint_pos_history_out"]
state["controlled_joint_vel_history"] = result["controlled_joint_vel_history_out"]
state["actions_history"] = result["actions_history_out"]

# Map 14 agile output joints to the 15-joint lower_body array.
# waist_yaw (not controlled by agile) stays at 0.0.
assert self.use_policy_action
if self.use_policy_action:
for agile_idx, lb_idx in enumerate(self.agile_output_to_lower_body):
body_action[env_idx, lb_idx] = action_joint_pos[0, agile_idx]
else:
body_action[env_idx] = obs["q"][env_idx, : self.num_lower_body]

return {"body_action": body_action}

def _build_onnx_inputs(self, obs: dict[str, Any], env_idx: int) -> dict[str, np.ndarray]:
"""Build the ONNX input dict for a single environment.

Args:
obs: Observation dictionary from prepare_observations().
env_idx: Environment index.

Returns:
Dictionary mapping ONNX input names to numpy arrays.
"""
# Quaternion (w, x, y, z) from floating base pose
root_link_quat_w = obs["floating_base_pose"][env_idx : env_idx + 1, 3:7].astype(np.float32)

# Angular velocity in body frame
root_ang_vel_b = obs["floating_base_vel"][env_idx : env_idx + 1, 3:6].astype(np.float32)

# Velocity commands
velocity_commands = self.cmd[env_idx : env_idx + 1].astype(np.float32)

# Joint positions and velocities: select 29 body joints and reorder to agile order
joint_pos = obs["q"][env_idx : env_idx + 1, self.wbc_to_agile_input].astype(np.float32)
joint_vel = obs["dq"][env_idx : env_idx + 1, self.wbc_to_agile_input].astype(np.float32)

state = self.states[env_idx]

return {
"root_link_quat_w": root_link_quat_w,
"root_ang_vel_b": root_ang_vel_b,
"velocity_commands": velocity_commands,
"joint_pos": joint_pos,
"joint_vel": joint_vel,
"last_actions": state["last_actions"],
"base_ang_vel_history": state["base_ang_vel_history"],
"projected_gravity_history": state["projected_gravity_history"],
"velocity_commands_history": state["velocity_commands_history"],
"controlled_joint_pos_history": state["controlled_joint_pos_history"],
"controlled_joint_vel_history": state["controlled_joint_vel_history"],
"actions_history": state["actions_history"],
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.g1_decoupled_whole_body_policy import (
G1DecoupledWholeBodyPolicy,
)
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.g1_agile_policy import G1AgilePolicy
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.g1_homie_policy import G1HomiePolicyV2
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.identity_policy import IdentityPolicy

Expand Down Expand Up @@ -39,9 +40,16 @@ def get_wbc_policy(robot_type: str, robot_model: RobotModel, wbc_config: BaseCon
model_path=wbc_config.wbc_model_path,
num_envs=num_envs,
)
elif lower_body_policy_type == "agile":
lower_body_policy = G1AgilePolicy(
robot_model=robot_model,
config_path=wbc_config.policy_config_path,
model_path=wbc_config.wbc_model_path,
num_envs=num_envs,
)
else:
raise ValueError(
f"Invalid lower body policy type: {lower_body_policy_type}, Supported lower body policy types: homie_v2"
f"Invalid lower body policy type: {lower_body_policy_type}, Supported lower body policy types: homie_v2, agile"
)

wbc_policy = G1DecoupledWholeBodyPolicy(
Expand Down
Loading
Loading