diff --git a/docker/Dockerfile.isaaclab_arena b/docker/Dockerfile.isaaclab_arena index e5018fbf5..57c3d6be8 100644 --- a/docker/Dockerfile.isaaclab_arena +++ b/docker/Dockerfile.isaaclab_arena @@ -1,7 +1,10 @@ -ARG BASE_IMAGE=nvcr.io/nvidia/isaac-sim:5.0.0 +ARG BASE_IMAGE=nvcr.io/nvidia/isaac-sim:5.1.0 FROM ${BASE_IMAGE} +# Isaac Sim 5.1.0+ runs as a non-root user; switch to root for installation steps. +USER root + # GR00T Policy Build Arguments, these are only used if INSTALL_GROOT is true ARG INSTALL_GROOT=false @@ -22,9 +25,6 @@ RUN apt-get update && apt-get install -y \ sudo \ python3-pip -# Update pip to the latest version -RUN pip3 install --upgrade pip - ################################ # Install Isaac Lab ################################ @@ -37,9 +37,10 @@ ENV TERM=xterm # Symlink isaac sim to IsaacLab RUN ln -s /isaac-sim/ ${WORKDIR}/submodules/IsaacLab/_isaac_sim # Install IsaacLab dependencies -RUN for DIR in ${WORKDIR}/submodules/IsaacLab/source/isaaclab*/; do pip install --no-deps -e "$DIR"; done +RUN for DIR in ${WORKDIR}/submodules/IsaacLab/source/isaaclab*/; do /isaac-sim/python.sh -m pip install --no-deps -e "$DIR"; done # Logs and other stuff appear under dist-packages per default, so this dir has to be writeable. RUN chmod 777 -R /isaac-sim/kit/ +RUN chmod a+x /isaac-sim # NOTE(alexmillane, 2026-02-10): We started having issues with flatdict 4.0.1 installation # during IsaacLab install. We install here with build isolation which seems to fix the issue. RUN /isaac-sim/python.sh -m pip install flatdict==4.0.1 --no-build-isolation @@ -49,7 +50,7 @@ RUN ${ISAACLAB_PATH}/isaaclab.sh -i # Patch for osqp in IsaacLab. Downgrade qpsolvers # TODO(alexmillane): Watch the thread here: https://nvidia.slack.com/archives/C06HLQ6CB41/p1764680205807019 # and remove this thread when IsaacLab has a fix. -RUN if python -c "import qpsolvers; print(qpsolvers.available_solvers)" | grep -q "osqp"; then \ +RUN if /isaac-sim/python.sh -c "import qpsolvers; print(qpsolvers.available_solvers)" | grep -q "osqp"; then \ echo "OSQP is installed. You can remove this clause from the Arena dockerfile."; \ else \ echo "OSQP missing, installing... This is a patch for an Isaac Lab bug."; \ @@ -79,7 +80,7 @@ ENV LW_API_ENDPOINT="https://api-dev.lightwheel.net" # HuggingFace for downloading datasets and models. # NOTE(alexmillane, 2025-10-28): For some reason the CLI has issues when installed in the IsaacSim version of python. -RUN pip install huggingface-hub[cli] +RUN pip install huggingface-hub[cli] --break-system-packages # Create alias for hf command to use the system-installed version RUN echo "alias hf='/usr/local/bin/hf'" >> /etc/bash.bashrc @@ -136,7 +137,7 @@ RUN echo "alias pytest='/isaac-sim/python.sh -m pytest'" >> /etc/bash.bashrc # It will pause waiting for the debugger to attach. # 3) Attach to the running container with VSCode using the "Attach to debugpy session" # configuration from the Run and Debug panel. -RUN pip3 install debugpy +RUN /isaac-sim/python.sh -m pip install debugpy RUN echo "alias debugpy='python -Xfrozen_modules=off -m debugpy --listen localhost:5678 --wait-for-client'" >> /etc/bash.bashrc # Change prompt so it's obvious we're inside the arena container diff --git a/docker/setup/entrypoint.sh b/docker/setup/entrypoint.sh index 3aa0c3320..ce226e326 100755 --- a/docker/setup/entrypoint.sh +++ b/docker/setup/entrypoint.sh @@ -21,7 +21,7 @@ userdel ubuntu || true useradd --no-log-init \ --uid "$DOCKER_RUN_USER_ID" \ --gid "$DOCKER_RUN_GROUP_NAME" \ - --groups sudo \ + --groups sudo,isaac-sim \ --shell /bin/bash \ $DOCKER_RUN_USER_NAME chown $DOCKER_RUN_USER_NAME:$DOCKER_RUN_GROUP_NAME /home/$DOCKER_RUN_USER_NAME diff --git a/docs/README.md b/docs/README.md index aa9669ce6..1120527a2 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,45 +1,42 @@ -# `isaaclab_arena` Dox - Developer Guide +# `isaaclab_arena` Docs - Developer Guide -To build the `isaaclab_arena` docs locally follow the following instructions. +The docs are built on the **host machine** (not inside Docker) using a dedicated Python 3.11 venv. -Enter the `isaaclab_arena` docker. +## Prerequisites -``` -./docker/run_docker.sh -``` - -The version of sphinx that we use requires a newer version of python. -Install a newer version of `python` and `venv`: +`python3.11` and `python3.11-venv` must be installed on the host: -``` -sudo apt-get install python3.11 python3.11-venv +```bash +sudo apt-get install -y python3.11 python3.11-venv ``` -> It looks like this actually overwrites the currently installed version of python -> inside. +## First-time setup -Create a `venv` and install the dependencies +From the repo root, create the venv and install dependencies: -``` +```bash +cd docs python3.11 -m venv venv_docs -source venv_docs/bin/activate -cd ./docs -python3.11 -m pip install -r requirements.txt +venv_docs/bin/pip install -r requirements.txt ``` -To make the current version of docs -``` -make html +## Build and view + +```bash +cd docs +venv_docs/bin/sphinx-build -M html . _build/current +xdg-open _build/current/html/index.html ``` -To view the docs, navigate to `isaaclab_arena/docs/_build/current/html/index.html`, and double-click. -To make the multi version docs. Note that this will only build docs for the set branches, such -as release, main etc. Only docs committed to these branches will be reflected. +## Multi-version docs -``` +Builds docs for committed branches only (e.g. `main`, `release`). Local uncommitted changes are **not** reflected. + +```bash +cd docs +source venv_docs/bin/activate make multi-docs +xdg-open _build/index.html ``` - -To view the multi version docs, navigate to `isaaclab_arena/docs/_build/index.html`, and double-click. diff --git a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst index 7107c6f0e..41dea4fb7 100644 --- a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst +++ b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst @@ -155,17 +155,18 @@ See :doc:`../../concepts/concept_environment_design` for environment composition Validation: Run Random Policy ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To validate the environment setup, we can run a policy with random weights to ensure everything loads correctly: +To validate the environment loads correctly, run one training iteration and check for errors: .. code-block:: bash - python isaaclab_arena/scripts/reinforcement_learning/train.py \ + /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \ + --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \ + --task lift_object \ --num_envs 64 \ --max_iterations 1 \ - lift_object + --headless -This command will load the environment, initialize 64 parallel environments, and exit immediately -(``max_iterations=1``). If successful, the environment is ready for training. +If the environment is set up correctly, you will see one iteration of training output before the script exits. You should see output indicating the start of training: diff --git a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst index b28aefff4..0d4e4b090 100644 --- a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst +++ b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst @@ -1,135 +1,72 @@ Policy Training --------------- -This workflow covers training an RL policy from scratch using RSL-RL's PPO implementation. -The training is fully parallelized across hundreds of environments for sample-efficient learning. - **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details) :docker_run_default: - -Training Overview -^^^^^^^^^^^^^^^^^ - -We use **Proximal Policy Optimization (PPO)** from the `RSL-RL `_ library, -a proven on-policy RL algorithm for robot learning. The training process: - -1. **Parallel Simulation**: Runs 512 parallel environments simultaneously -2. **Dense Rewards**: Provides shaped rewards for reaching, grasping, lifting, and goal achievement -3. **Command Sampling**: Randomly samples target positions within a workspace range -4. **Automatic Checkpointing**: Saves model checkpoints every 500 iterations -5. **Tensorboard Logging**: Monitors training progress in real-time - Training Command ^^^^^^^^^^^^^^^^ -To train the policy, run: +Training uses IsaacLab's RSL-RL training script directly. The ``--external_callback`` argument +points to an Arena function that runs before training starts — it reads the ``--task`` argument, +builds the environment, and registers it with gym so IsaacLab's script can find it by name. .. code-block:: bash - python isaaclab_arena/scripts/reinforcement_learning/train.py \ - --env_spacing 5.0 \ + /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \ + --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \ + --task lift_object \ --num_envs 512 \ --max_iterations 12000 \ - --save_interval 500 \ - --headless \ - lift_object - -**Command Breakdown:** - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Argument - - Description - * - ``--env_spacing 5.0`` - - Spacing between parallel environments (meters) - * - ``--num_envs 512`` - - Number of parallel environments for training - * - ``--max_iterations 12000`` - - Total training iterations (each iteration = 24 timesteps × 512 envs = 12,288 samples) - * - ``--save_interval 500`` - - Save checkpoint every 500 iterations - * - ``--headless`` - - Run without GUI for faster training - * - ``lift_object`` - - Environment name (must be last argument) - -**Additional Arguments (Optional):** - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Argument - - Description - * - ``--seed `` - - Random seed for reproducibility (default: 42) - * - ``--device `` - - Device to use: 'cuda' or 'cpu' (default: 'cuda') - * - ``--video`` - - Record training videos periodically - * - ``--video_interval 2000`` - - Interval for recording videos (iterations) - - -Training Configuration -^^^^^^^^^^^^^^^^^^^^^^ - -The training uses the default RSL-RL PPO configuration, which can be found at: - -``isaaclab_arena/policy/rl_policy/generic_policy.json`` - -Key hyperparameters: - -.. code-block:: json - - { - "algorithm": { - "class_name": "PPO", - "num_learning_epochs": 5, - "num_mini_batches": 4, - "learning_rate": 0.001, - "gamma": 0.99, - "lam": 0.95, - "clip_param": 0.2 - }, - "policy": { - "class_name": "ActorCritic", - "activation": "elu", - "actor_hidden_dims": [256, 256, 256], - "critic_hidden_dims": [256, 256, 256] - } - } - -To use a custom configuration, specify the path with ``--agent_cfg_path ``. + --headless +Checkpoints are written to ``logs/rsl_rl/generic_experiment//``. +The agent configuration is saved alongside as ``params/agent.yaml``, +which the evaluation script uses to reconstruct the policy at inference time. -Monitoring Training -^^^^^^^^^^^^^^^^^^^ -Training logs are saved to ``logs/rsl_rl/generic_experiment//``. +Overriding Hyperparameters +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Hyperparameters come from ``RLPolicyCfg`` in ``isaaclab_arena_examples/policy/base_rsl_rl_policy.py`` +and can be overridden with Hydra syntax appended to the training command: + +.. code-block:: bash -**1. View Training Metrics with Tensorboard** + # Change network activation function to relu (default: elu) + agent.policy.activation=relu -Launch Tensorboard to monitor training progress: + # Adjust the learning rate (default: 0.0001) + agent.algorithm.learning_rate=0.001 + + # Save a checkpoint more frequently (default: every 200 iterations) + agent.save_interval=500 + +For example, to train with relu activation and a higher learning rate: .. code-block:: bash - tensorboard --logdir logs/rsl_rl + /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \ + --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \ + --task lift_object \ + --num_envs 512 \ + --max_iterations 12000 \ + --headless \ + agent.policy.activation=relu \ + agent.algorithm.learning_rate=0.001 + -Navigate to ``http://localhost:6006`` in your browser to view: +Monitoring Training +^^^^^^^^^^^^^^^^^^^ + +Launch Tensorboard to monitor progress: -- **Episode rewards**: Total reward per episode -- **Episode length**: Steps per episode -- **Policy loss**: Actor and critic losses -- **Learning rate**: Current learning rate schedule +.. code-block:: bash -**2. Training Output** + /isaac-sim/python.sh -m tensorboard.main --logdir logs/rsl_rl -During training, you'll see periodic console output: +During training, each iteration prints a summary to the console: .. code-block:: text @@ -159,43 +96,28 @@ During training, you'll see periodic console output: Time elapsed: 00:00:04 ETA: 00:00:49 - [INFO] Saved checkpoint to: logs/rsl_rl/generic_experiment//model_.pt - -**3. Checkpoints** - -Model checkpoints are saved to: - -``logs/rsl_rl/generic_experiment//model_.pt`` - -Example: ``logs/rsl_rl/generic_experiment/2026-01-29_12-30-00/model_2000.pt`` - Multi-GPU Training ^^^^^^^^^^^^^^^^^^ -For faster training on multi-GPU systems, use the ``--distributed`` flag: +Add ``--distributed`` to spread environments across all available GPUs: .. code-block:: bash - python isaaclab_arena/scripts/reinforcement_learning/train.py \ - --env_spacing 5.0 \ + /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \ + --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \ + --task lift_object \ --num_envs 512 \ --max_iterations 12000 \ - --save_interval 500 \ --headless \ - --distributed \ - lift_object - -This automatically distributes environments across available GPUs. + --distributed Expected Results ^^^^^^^^^^^^^^^^ -After 12,000 iterations (~6 hours on a single GPU with 512 environments): - -The trained policy should reliably grasp and lift objects to commanded target positions. -Please refer to the following gif for an example of the trained policy: +After 12,000 iterations (~6 hours on a single GPU with 512 environments), the trained +policy should reliably grasp and lift objects to commanded target positions. .. image:: ../../../images/lift_object_rl_task.gif :align: center diff --git a/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst b/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst index 05e2e08c1..5f71a4913 100644 --- a/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst +++ b/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst @@ -1,9 +1,6 @@ Closed-Loop Policy Inference and Evaluation ------------------------------------------- -This workflow demonstrates running the trained RSL-RL policy in closed-loop -and evaluating it in the Lift Object environment. - **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details) :docker_run_default: @@ -15,16 +12,12 @@ Once inside the container, set the models directory if you plan to download pre- export MODELS_DIR=models/isaaclab_arena/reinforcement_learning mkdir -p $MODELS_DIR -Note that this tutorial assumes that you've completed the -:doc:`preceding step (Policy Training) ` and have a trained checkpoint available, -or you can download a pre-trained checkpoint as described below. +This tutorial assumes you've completed :doc:`step_2_policy_training` and have a trained checkpoint, +or you can download a pre-trained one as described below. .. dropdown:: Download Pre-trained Model (skip preceding steps) :animate: fade-in - These commands can be used to download a pre-trained RSL-RL policy checkpoint, - such that the preceding training step can be skipped. - .. code-block:: bash hf download \ @@ -32,73 +25,29 @@ or you can download a pre-trained checkpoint as described below. model_11999.pt \ --local-dir $MODELS_DIR/lift_object_checkpoint - After downloading, you can use the checkpoint at: + After downloading, the checkpoint is at: ``$MODELS_DIR/lift_object_checkpoint/model_11999.pt`` - Replace checkpoint paths in the examples below with this path to evaluate the pre-trained model. + Replace checkpoint paths in the examples below with this path. Evaluation Methods ^^^^^^^^^^^^^^^^^^ -Isaac Lab Arena provides multiple ways to evaluate trained RL policies: - -1. **Quick Visualization (play.py)**: Fast visual inspection of policy behavior -2. **Single Environment Evaluation (policy_runner.py)**: Detailed evaluation with metrics -3. **Parallel Environment Evaluation (policy_runner.py)**: Large-scale statistical evaluation -4. **Batch Evaluation (eval_runner.py)**: Automated evaluation of multiple checkpoints - - -Method 1: Quick Visualization -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``play.py`` script provides the fastest way to visually inspect your trained policy. -This is useful for debugging and quick quality checks. - -.. code-block:: bash - - python isaaclab_arena/scripts/reinforcement_learning/play.py \ - --env_spacing 30.0 \ - --num_envs 16 \ - --checkpoint logs/rsl_rl/generic_experiment/2026-01-28_17-26-10/model_11999.pt \ - lift_object - -**Key Features:** +There are three ways to evaluate a trained policy: -- Fast startup with GUI enabled by default -- Visualizes policy rollouts in real-time -- No metrics computation (pure visualization) -- Useful for debugging policy behavior +1. **Single environment** (``policy_runner.py``): detailed evaluation with metrics +2. **Parallel environments** (``policy_runner.py``): larger-scale statistical evaluation +3. **Batch evaluation** (``eval_runner.py``): automated evaluation across multiple checkpoints -**Command Arguments:** -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Argument - - Description - * - ``--env_spacing 30.0`` - - Larger spacing for visualization (avoids visual clutter) - * - ``--num_envs 16`` - - Number of parallel environments to visualize - * - ``--checkpoint `` - - Path to the trained model checkpoint (.pt file) - * - ``lift_object`` - - Environment name (must be last) - -You should see multiple Franka robots simultaneously attempting to lift objects to various target positions. - - -Method 2: Single Environment Evaluation +Method 1: Single Environment Evaluation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``policy_runner.py`` provides comprehensive evaluation with task-specific metrics. - .. code-block:: bash - python isaaclab_arena/evaluation/policy_runner.py \ + /isaac-sim/python.sh isaaclab_arena/evaluation/policy_runner.py \ --policy_type rsl_rl \ --num_steps 1000 \ --checkpoint_path logs/rsl_rl/generic_experiment/2026-01-28_17-26-10/model_11999.pt \ @@ -107,54 +56,28 @@ The ``policy_runner.py`` provides comprehensive evaluation with task-specific me .. note:: - If you downloaded the pre-trained model from Hugging Face, replace the checkpoint path: - - ``--checkpoint_path $MODELS_DIR/lift_object_checkpoint/model_11999.pt`` - -**Important: Argument Order** - -Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, etc.) must come **before** the environment name. -Environment-specific arguments (``--rl_training_mode``, ``--object``, etc.) must come **after** the environment name. - -**Command Breakdown:** - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Argument - - Description - * - ``--policy_type rsl_rl`` - - Policy type to load (RSL-RL trained policy) - * - ``--num_steps 1000`` - - Total simulation steps to run - * - ``--checkpoint_path `` - - Path to the model checkpoint - * - ``lift_object`` - - Environment name - * - ``--rl_training_mode False`` - - Enable success termination for evaluation + If you downloaded the pre-trained model from Hugging Face, replace the checkpoint path with: + ``$MODELS_DIR/lift_object_checkpoint/model_11999.pt`` -**Expected Output:** +Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, etc.) must come **before** the +environment name. Environment-specific arguments (``--rl_training_mode``, ``--object``, etc.) must +come **after** it. -At the end of evaluation, you should see metrics similar to: +At the end of the run, metrics are printed to the console: .. code-block:: text Metrics: {'success_rate': 0.85, 'num_episodes': 12} -This indicates that 85% of episodes successfully lifted the object to the target position, -across 12 completed episodes in 1000 steps. - -Method 3: Parallel Environment Evaluation +Method 2: Parallel Environment Evaluation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For more statistically significant results, evaluate across many parallel environments: +For more statistically significant results, run across many environments in parallel: .. code-block:: bash - python isaaclab_arena/evaluation/policy_runner.py \ + /isaac-sim/python.sh isaaclab_arena/evaluation/policy_runner.py \ --policy_type rsl_rl \ --num_steps 5000 \ --num_envs 64 \ @@ -163,37 +86,17 @@ For more statistically significant results, evaluate across many parallel enviro lift_object \ --rl_training_mode False -**Additional Arguments:** - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Argument - - Description - * - ``--num_envs 64`` - - Run 64 parallel environments simultaneously - * - ``--headless`` - - Run without GUI for faster evaluation - * - ``--num_steps 5000`` - - More steps for more episodes - -**Expected Output:** - .. code-block:: text Metrics: {'success_rate': 0.83, 'num_episodes': 156} -Running more environments and steps provides better statistical estimates of policy performance. - -Method 4: Batch Evaluation with JSON Configuration -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Method 3: Batch Evaluation +^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For systematic evaluation of multiple checkpoints or hyperparameter sweeps, use ``eval_runner.py`` -with a JSON configuration file. +To evaluate multiple checkpoints in sequence, use ``eval_runner.py`` with a JSON config. -**1. Create Evaluation Configuration** +**1. Create an evaluation config** Create a file ``eval_config.json``: @@ -224,16 +127,11 @@ Create a file ``eval_config.json``: ] } -**2. Run Batch Evaluation** +**2. Run** .. code-block:: bash - python isaaclab_arena/evaluation/eval_runner.py --eval_jobs_config eval_config.json - -This will automatically evaluate all checkpoints listed in the configuration and output -a summary of metrics for each. - -**Expected Output:** + /isaac-sim/python.sh isaaclab_arena/evaluation/eval_runner.py --eval_jobs_config eval_config.json .. code-block:: text @@ -252,49 +150,15 @@ a summary of metrics for each. Understanding the Metrics ^^^^^^^^^^^^^^^^^^^^^^^^^^ -The Lift Object task reports the following metrics: - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Metric - - Description - * - ``success_rate`` - - Fraction of episodes where object reached target position within tolerance - * - ``num_episodes`` - - Total number of episodes completed during evaluation - -A well-trained policy should achieve: - -- **Success rate**: 70-90% (depends on target range difficulty) -- **Consistent performance**: Success rate stable across multiple evaluation runs - - -Troubleshooting -^^^^^^^^^^^^^^^ - -**Issue: Low success rate (<50%)** - -- Increase training iterations (try 20,000+) -- Check reward configuration in task definition -- Verify command sampling ranges are reasonable -- Try different random seeds - -**Issue: Policy gets stuck or drops object** - -- Ensure object mass and friction are reasonable -- Check gripper force limits -- Visualize with ``play.py`` to diagnose behavior -- Review episode recordings if ``--video`` was enabled during training +The Lift Object task reports two metrics: -**Issue: "Checkpoint not found" error** +- ``success_rate``: fraction of episodes where the object reached the target position within tolerance +- ``num_episodes``: total number of completed episodes during the evaluation run -- Verify checkpoint path is correct -- Use absolute paths if relative paths fail -- Check that training completed and saved checkpoints +A well-trained policy should reach 70–90% success rate. Results will vary with the target range, +random seed, and hardware. .. note:: - When running evaluation, always set ``--rl_training_mode False`` to enable success termination. - During training, this flag is ``True`` by default to prevent early episode termination. + Always set ``--rl_training_mode False`` when evaluating. During training this flag is ``True`` + to disable success termination; setting it to ``False`` re-enables it for proper evaluation. diff --git a/isaaclab_arena/environments/arena_env_builder.py b/isaaclab_arena/environments/arena_env_builder.py index 772923ae6..8898e63da 100644 --- a/isaaclab_arena/environments/arena_env_builder.py +++ b/isaaclab_arena/environments/arena_env_builder.py @@ -275,10 +275,17 @@ def build_registered( # THIS WILL BE REMOVED IN THE FUTURE. cfg_entry = self.modify_env_cfg(cfg_entry) entry_point = self.get_entry_point() + # Register the environment with the Gym registry. + kwargs = { + "env_cfg_entry_point": cfg_entry, + } + if self.arena_env.rl_framework is not None: + assert self.arena_env.rl_policy_cfg is not None + kwargs[self.arena_env.rl_framework.get_entry_point_string()] = self.arena_env.rl_policy_cfg gym.register( id=name, entry_point=entry_point, - kwargs={"env_cfg_entry_point": cfg_entry}, + kwargs=kwargs, disable_env_checker=True, ) cfg = parse_env_cfg( diff --git a/isaaclab_arena/environments/isaaclab_arena_environment.py b/isaaclab_arena/environments/isaaclab_arena_environment.py index a3ddc315a..9eb603777 100644 --- a/isaaclab_arena/environments/isaaclab_arena_environment.py +++ b/isaaclab_arena/environments/isaaclab_arena_environment.py @@ -13,6 +13,7 @@ from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase from isaaclab_arena.environments.isaaclab_arena_manager_based_env import IsaacLabArenaManagerBasedRLEnvCfg from isaaclab_arena.orchestrator.orchestrator_base import OrchestratorBase + from isaaclab_arena.reinforcement_learning.frameworks import RLFramework from isaaclab_arena.scene.scene import Scene from isaaclab_arena.tasks.task_base import TaskBase @@ -29,6 +30,8 @@ def __init__( teleop_device: TeleopDeviceBase | None = None, orchestrator: OrchestratorBase | None = None, env_cfg_callback: Callable[IsaacLabArenaManagerBasedRLEnvCfg] | None = None, + rl_framework: RLFramework | None = None, + rl_policy_cfg: str | None = None, ): """ Args: @@ -47,3 +50,5 @@ def __init__( self.teleop_device = teleop_device self.orchestrator = orchestrator self.env_cfg_callback = env_cfg_callback + self.rl_framework = rl_framework + self.rl_policy_cfg = rl_policy_cfg diff --git a/isaaclab_arena/environments/isaaclab_interop.py b/isaaclab_arena/environments/isaaclab_interop.py new file mode 100644 index 000000000..6fd924fe3 --- /dev/null +++ b/isaaclab_arena/environments/isaaclab_interop.py @@ -0,0 +1,51 @@ +# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +from isaaclab_arena_environments.cli import ExampleEnvironments + + +def environment_registration_callback() -> list[str]: + """This function is for use with Isaac Lab scripts to register an IsaacLab Arena environment. + + This function is passed to an Isaac Lab script as an external callback function. Example: + + python IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py + --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback + --task lift_object + --num_envs 512 + + In this case the "lift_object" environment is registered with Isaac Lab before + running the RSL RL training script. The training script will then run the + training for the lift_object environment. + + """ + from isaaclab.app import AppLauncher + + from isaaclab_arena.cli.isaaclab_arena_cli import add_isaac_lab_cli_args, add_isaaclab_arena_cli_args + from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder + + # Get the requested environment from the CLI. + parser = argparse.ArgumentParser() + # NOTE(alexmillane, 2026.02.12): With the Isaac Lab interop, we use the task name to + # determine the environment to register. The environment is also registered under this name. + # The result is that a single argument tells Arena what to register, and Lab what to run. + parser.add_argument("--task", type=str, required=True, help="Name of the IsaacLab Arena environment to register.") + environment_name = parser.parse_known_args()[0].task + environment = ExampleEnvironments[environment_name]() + # Get the full list of environment-specific CLI args. + AppLauncher.add_app_launcher_args(parser) + add_isaac_lab_cli_args(parser) + add_isaaclab_arena_cli_args(parser) + environment.add_cli_args(parser) + args, remaining_args = parser.parse_known_args() + # Create the environment config + isaaclab_arena_environment = environment.get_env(args) + # Build and register the environment + env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args) + env_builder.build_registered() + # Return the arguments that were not consumed by this callback + return remaining_args diff --git a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py deleted file mode 100644 index 181c7fd2f..000000000 --- a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import json -from dataclasses import field -from typing import Any - -from isaaclab.utils import configclass -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg - - -@configclass -class RLPolicyCfg(RslRlOnPolicyRunnerCfg): - - num_steps_per_env: int = 24 - max_iterations: int = 4000 - save_interval: int = 200 - experiment_name: str = "generic_experiment" - obs_groups = field( - default_factory=lambda: { - "policy": ["policy"], - "critic": ["policy"], - } - ) - policy: RslRlPpoActorCriticCfg = field(default_factory=RslRlPpoActorCriticCfg) - algorithm: RslRlPpoAlgorithmCfg = field(default_factory=RslRlPpoAlgorithmCfg) - - @classmethod - def update_cfg( - cls, - policy_cfg: dict[str, Any], - algorithm_cfg: dict[str, Any], - obs_groups: dict[str, list[str]], - num_steps_per_env: int, - max_iterations: int, - save_interval: int, - experiment_name: str, - ): - cfg = cls() - cfg.policy = RslRlPpoActorCriticCfg(**policy_cfg) - cfg.algorithm = RslRlPpoAlgorithmCfg(**algorithm_cfg) - cfg.obs_groups = obs_groups - cfg.num_steps_per_env = num_steps_per_env - cfg.max_iterations = max_iterations - cfg.save_interval = save_interval - cfg.experiment_name = experiment_name - return cfg - - -def get_agent_cfg(args_cli: argparse.Namespace) -> Any: - """Get the environment and agent configuration from the command line arguments.""" - - # Read a json file containing the agent configuration - with open(args_cli.agent_cfg_path) as f: - agent_cfg_dict = json.load(f) - - policy_cfg = agent_cfg_dict["policy_cfg"] - algorithm_cfg = agent_cfg_dict["algorithm_cfg"] - obs_groups = agent_cfg_dict["obs_groups"] - # Load all other arguments if they are in args_cli as policy arguments - num_steps_per_env = args_cli.num_steps_per_env - max_iterations = args_cli.max_iterations - save_interval = args_cli.save_interval - experiment_name = args_cli.experiment_name - - agent_cfg = RLPolicyCfg.update_cfg( - policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name - ) - - return agent_cfg diff --git a/isaaclab_arena/policy/rl_policy/generic_policy.json b/isaaclab_arena/policy/rl_policy/generic_policy.json deleted file mode 100644 index 6fdf9d2fa..000000000 --- a/isaaclab_arena/policy/rl_policy/generic_policy.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "policy_cfg": { - "init_noise_std": 1.0, - "actor_obs_normalization": false, - "critic_obs_normalization": false, - "actor_hidden_dims": [256, 128, 64], - "critic_hidden_dims": [256, 128, 64], - "activation": "elu" - }, - "algorithm_cfg": { - "value_loss_coef": 1.0, - "use_clipped_value_loss": true, - "clip_param": 0.2, - "entropy_coef": 0.006, - "num_learning_epochs": 5, - "num_mini_batches": 4, - "learning_rate": 0.0001, - "schedule": "adaptive", - "gamma": 0.98, - "lam": 0.95, - "desired_kl": 0.01, - "max_grad_norm": 1.0 - }, - "obs_groups": { - "policy": ["policy", "task_obs"], - "critic": ["policy", "task_obs"] - } - } diff --git a/isaaclab_arena/policy/rsl_rl_action_policy.py b/isaaclab_arena/policy/rsl_rl_action_policy.py index e5bb4b441..aee721404 100644 --- a/isaaclab_arena/policy/rsl_rl_action_policy.py +++ b/isaaclab_arena/policy/rsl_rl_action_policy.py @@ -5,67 +5,50 @@ import argparse import gymnasium as gym +import os import torch from dataclasses import dataclass from gymnasium.spaces.dict import Dict as GymSpacesDict -from pathlib import Path from isaaclab.utils.assets import retrieve_file_path +from isaaclab.utils.io import load_yaml from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab_arena.assets.register import register_policy from isaaclab_arena.policy.policy_base import PolicyBase -from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg -from isaaclab_arena.scripts.reinforcement_learning import cli_args @dataclass class RslRlActionPolicyConfig: - """ - Configuration dataclass for RSL-RL action policy. - - This dataclass serves as the single source of truth for policy configuration, - supporting both dict-based (from JSON) and CLI-based configuration paths. - """ + """Configuration dataclass for RSL-RL action policy.""" checkpoint_path: str - """Path to the RSL-RL checkpoint file.""" + """Path to the RSL-RL checkpoint file. - agent_cfg_path: Path = Path("isaaclab_arena/policy/rl_policy/generic_policy.json") - """Path to the RL agent configuration file.""" + The agent config is loaded automatically from ``params/agent.yaml`` in the + same directory, which is saved by IsaacLab's ``train.py`` alongside the checkpoint. + """ device: str = "cuda:0" """Device to run the policy on.""" @classmethod def from_cli_args(cls, args: argparse.Namespace) -> "RslRlActionPolicyConfig": - """ - Create configuration from parsed CLI arguments. - - Args: - args: Parsed command line arguments - - Returns: - RslRlActionPolicyConfig instance - """ return cls( checkpoint_path=args.checkpoint_path, - agent_cfg_path=args.agent_cfg_path, device=args.device if hasattr(args, "device") else "cuda:0", ) @register_policy class RslRlActionPolicy(PolicyBase): - """ - Policy that uses a trained RSL-RL model for inference. + """Policy that uses a trained RSL-RL model for inference. - This policy loads a checkpoint from RSL-RL training and uses it to generate - actions. It expects the environment to already be wrapped with RslRlVecEnvWrapper - if called from evaluation scripts. + Loads the checkpoint and agent config (``params/agent.yaml``) produced by + IsaacLab's ``train.py``. No separate JSON config file is required. - Example JSON configuration for eval runner: + Example configuration for eval runner: .. code-block:: json @@ -75,9 +58,8 @@ class RslRlActionPolicy(PolicyBase): "name": "eval_lift_cube", "policy_type": "rsl_rl", "policy_config_dict": { - "checkpoint_path": "logs/rsl_rl/lift_object/model_1000.pt", - "agent_cfg_path": "isaaclab_arena/policy/rl_policy/generic_policy.json", - "device": "cuda:0", + "checkpoint_path": "logs/rsl_rl/lift_object/2026-01-28_17-26-10/model_1000.pt", + "device": "cuda:0" }, "arena_env_args": ["lift_object", "--embodiment", "franka"] } @@ -88,137 +70,58 @@ class RslRlActionPolicy(PolicyBase): name = "rsl_rl" config_class = RslRlActionPolicyConfig - def __init__(self, config: RslRlActionPolicyConfig, args_cli: argparse.Namespace | None = None): - """ - Initialize RSL-RL action policy from a configuration dataclass. - - Args: - config: RslRlActionPolicyConfig configuration dataclass - args_cli: Optional CLI arguments namespace. If provided, uses get_agent_cfg(). - If None, loads agent config directly from JSON file. - """ + def __init__(self, config: RslRlActionPolicyConfig): super().__init__(config) self.config: RslRlActionPolicyConfig = config self._policy = None self._runner = None - self._env_is_wrapped = False - self.args_cli = args_cli def _load_policy(self, env: gym.Env) -> None: - """ - Load the RSL-RL policy from checkpoint. - - Args: - env: The gym environment (should already be wrapped with RslRlVecEnvWrapper) - """ - import json - - # Load agent configuration - # Prefer using get_agent_cfg() if args_cli is available (more robust) - # Otherwise, load directly from JSON (for from_dict() path) - if self.args_cli is not None: - agent_cfg = get_agent_cfg(self.args_cli) - else: - # Fallback: Load agent configuration directly from JSON file - with open(self.config.agent_cfg_path) as f: - agent_cfg_dict = json.load(f) - - # Import the config class and create agent config - from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import RLPolicyCfg - - policy_cfg = agent_cfg_dict["policy_cfg"] - algorithm_cfg = agent_cfg_dict["algorithm_cfg"] - obs_groups = agent_cfg_dict.get("obs_groups", {}) - - # Use defaults for training-specific parameters (not needed for inference) - num_steps_per_env = agent_cfg_dict.get("num_steps_per_env", 24) - max_iterations = agent_cfg_dict.get("max_iterations", 1500) - save_interval = agent_cfg_dict.get("save_interval", 100) - experiment_name = agent_cfg_dict.get("experiment_name", "rsl_rl") + """Load the RSL-RL policy from checkpoint and its accompanying agent.yaml.""" + checkpoint_path = retrieve_file_path(self.config.checkpoint_path) + agent_yaml_path = os.path.join(os.path.dirname(checkpoint_path), "params", "agent.yaml") - agent_cfg = RLPolicyCfg.update_cfg( - policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name + if not os.path.exists(agent_yaml_path): + raise FileNotFoundError( + f"No agent config found at {agent_yaml_path}. " + "Ensure the checkpoint was produced by IsaacLab's train.py." ) - # Override device from config - agent_cfg.device = self.config.device + agent_cfg_dict = load_yaml(agent_yaml_path) + agent_cfg_dict["device"] = self.config.device - # Check if environment is already wrapped - if isinstance(env, RslRlVecEnvWrapper): - wrapped_env = env - self._env_is_wrapped = True - else: - # Wrap if needed (for standalone policy runner usage) - wrapped_env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) - self._env_is_wrapped = False + clip_actions = agent_cfg_dict.get("clip_actions") + class_name = agent_cfg_dict.get("class_name", "OnPolicyRunner") - # Create the appropriate runner - if agent_cfg.class_name == "OnPolicyRunner": - self._runner = OnPolicyRunner( - wrapped_env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device # type: ignore[attr-defined] - ) - elif agent_cfg.class_name == "DistillationRunner": - self._runner = DistillationRunner( - wrapped_env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device # type: ignore[attr-defined] - ) + wrapped_env = RslRlVecEnvWrapper(env, clip_actions=clip_actions) + + if class_name == "OnPolicyRunner": + self._runner = OnPolicyRunner(wrapped_env, agent_cfg_dict, log_dir=None, device=self.config.device) + elif class_name == "DistillationRunner": + self._runner = DistillationRunner(wrapped_env, agent_cfg_dict, log_dir=None, device=self.config.device) else: - raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + raise ValueError(f"Unsupported runner class: {class_name}") - # Load the checkpoint - checkpoint_path = retrieve_file_path(self.config.checkpoint_path) print(f"[INFO] Loading RSL-RL checkpoint from: {checkpoint_path}") self._runner.load(checkpoint_path) - - # Get the inference policy self._policy = self._runner.get_inference_policy(device=wrapped_env.unwrapped.device) def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor: - """ - Get the action from the RSL-RL policy. - - Args: - env: The gym environment - observation: Current observation from the environment - - Returns: - Action tensor from the policy - """ - # Load policy on first call if self._policy is None: self._load_policy(env) - # Type checker doesn't know _policy is not None after _load_policy assert self._policy is not None, "Policy should be loaded after _load_policy()" with torch.inference_mode(): return self._policy(observation) def reset(self, env_ids: torch.Tensor | None = None) -> None: - """ - Reset the policy state for specific environments. - - Args: - env_ids: Indices of environments to reset. If None, reset all. - """ - # RSL-RL policies are typically stateless for evaluation - # Override if your policy has recurrent components pass @classmethod def from_dict(cls, config_dict: dict) -> "RslRlActionPolicy": - """ - Create a policy instance from a configuration dictionary. - - This override ensures args_cli is None when loading from JSON config. - - Args: - config_dict: Dictionary containing the configuration fields - - Returns: - RslRlActionPolicy instance - """ config = RslRlActionPolicyConfig(**config_dict) - return cls(config, args_cli=None) + return cls(config) @staticmethod def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: @@ -228,31 +131,14 @@ def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars "--checkpoint_path", type=str, required=True, - help="Path to the checkpoint file containing the RSL-RL policy", - ) - rsl_rl_group.add_argument( - "--agent_cfg_path", - type=Path, - default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"), - help="Path to the RL agent configuration file.", + help=( + "Path to the checkpoint file. Agent config is loaded automatically from params/agent.yaml in the same" + " directory." + ), ) - # append RSL-RL cli arguments - cli_args.add_rsl_rl_args(parser) - cli_args.add_rsl_rl_policy_args(parser) return parser @staticmethod def from_args(args: argparse.Namespace) -> "RslRlActionPolicy": - """ - Create a RSL-RL action policy instance from parsed CLI arguments. - - Path: CLI args → ConfigDataclass → init cls - - Args: - args: Parsed command line arguments - - Returns: - RslRlActionPolicy instance - """ config = RslRlActionPolicyConfig.from_cli_args(args) - return RslRlActionPolicy(config, args_cli=args) + return RslRlActionPolicy(config) diff --git a/isaaclab_arena/reinforcement_learning/frameworks.py b/isaaclab_arena/reinforcement_learning/frameworks.py new file mode 100644 index 000000000..7d4aaa08f --- /dev/null +++ b/isaaclab_arena/reinforcement_learning/frameworks.py @@ -0,0 +1,17 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + + +from enum import Enum + + +class RLFramework(Enum): + RSL_RL = "rsl_rl" + SKRL = "skrl" + RL_GAMES = "rl_games" + SB3 = "sb3" + + def get_entry_point_string(self) -> str: + return f"{self.value}_cfg_entry_point" diff --git a/isaaclab_arena/scripts/reinforcement_learning/cli_args.py b/isaaclab_arena/scripts/reinforcement_learning/cli_args.py deleted file mode 100644 index 8148c1226..000000000 --- a/isaaclab_arena/scripts/reinforcement_learning/cli_args.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import argparse -import random -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg - - -def add_rsl_rl_args(parser: argparse.ArgumentParser): - """Add RSL-RL arguments to the parser. - - Args: - parser: The parser to add the arguments to. - """ - # create a new argument group - arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.") - arg_group.add_argument("--run_name", type=str, default=None, help="Run name suffix to the log directory.") - # -- load arguments - arg_group.add_argument("--resume", action="store_true", default=False, help="Whether to resume from a checkpoint.") - arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.") - arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.") - # -- logger arguments - arg_group.add_argument( - "--logger", type=str, default=None, choices={"wandb", "tensorboard", "neptune"}, help="Logger module to use." - ) - arg_group.add_argument( - "--log_project_name", type=str, default=None, help="Name of the logging project when using wandb or neptune." - ) - - -def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg: - """Parse configuration for RSL-RL agent based on inputs. - - Args: - task_name: The name of the environment. - args_cli: The command line arguments. - - Returns: - The parsed configuration for RSL-RL agent based on inputs. - """ - from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry - - # load the default configuration - rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") - rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli) - return rslrl_cfg - - -def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace): - """Update configuration for RSL-RL agent based on inputs. - - Args: - agent_cfg: The configuration for RSL-RL agent. - args_cli: The command line arguments. - - Returns: - The updated configuration for RSL-RL agent based on inputs. - """ - # override the default configuration with CLI arguments - if hasattr(args_cli, "seed") and args_cli.seed is not None: - # randomly sample a seed if seed = -1 - if args_cli.seed == -1: - args_cli.seed = random.randint(0, 10000) - agent_cfg.seed = args_cli.seed - if args_cli.resume is not None: - agent_cfg.resume = args_cli.resume - if args_cli.load_run is not None: - agent_cfg.load_run = args_cli.load_run - if args_cli.checkpoint is not None: - agent_cfg.load_checkpoint = args_cli.checkpoint - if args_cli.run_name is not None: - agent_cfg.run_name = args_cli.run_name - if args_cli.logger is not None: - agent_cfg.logger = args_cli.logger - # set the project name for wandb and neptune - if agent_cfg.logger in {"wandb", "neptune"} and args_cli.log_project_name: - agent_cfg.wandb_project = args_cli.log_project_name - agent_cfg.neptune_project = args_cli.log_project_name - - return agent_cfg - - -def add_rsl_rl_policy_args(parser: argparse.ArgumentParser): - """Add RSL-RL policy arguments to the parser. - - Args: - parser: The parser to add the arguments to. - """ - arg_group = parser.add_argument_group("rsl_rl_policy", description="Arguments for RSL-RL policy.") - arg_group.add_argument("--num_steps_per_env", type=int, default=24, help="Number of steps per environment.") - arg_group.add_argument("--max_iterations", type=int, default=4000, help="Maximum number of iterations.") - arg_group.add_argument("--save_interval", type=int, default=200, help="Save interval.") - arg_group.add_argument( - "--experiment_name", - type=str, - default="generic_experiment", - help="Name of the experiment folder where logs will be stored.", - ) - return arg_group diff --git a/isaaclab_arena/scripts/reinforcement_learning/play.py b/isaaclab_arena/scripts/reinforcement_learning/play.py deleted file mode 100644 index e8edb3196..000000000 --- a/isaaclab_arena/scripts/reinforcement_learning/play.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Script to play a checkpoint if an RL agent from RSL-RL.""" - -"""Launch Isaac Sim Simulator first.""" - -from pathlib import Path - -from isaaclab.app import AppLauncher - -from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser -from isaaclab_arena_environments.cli import add_example_environments_cli_args - -# local imports -import cli_args # isort: skip - -# add argparse arguments -parser = get_isaaclab_arena_cli_parser() -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") -parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") -parser.add_argument( - "--agent_cfg_path", - type=Path, - default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"), - help="Path to the RL agent configuration file.", -) -parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") -# append RSL-RL cli arguments -cli_args.add_rsl_rl_args(parser) -cli_args.add_rsl_rl_policy_args(parser) -# Add the example environments CLI args -# NOTE(alexmillane, 2025.09.04): This has to be added last, because -# of the app specific flags being parsed after the global flags. -add_example_environments_cli_args(parser) -args_cli = parser.parse_args() - -# always enable cameras to record video -if args_cli.video: - args_cli.enable_cameras = True - -if args_cli.enable_pinocchio: - # Import pinocchio before AppLauncher to force the use of the version installed by IsaacLab and not the one installed by Isaac Sim - # pinocchio is required by the Pink IK controllers and the GR1T2 retargeter - import pinocchio # noqa: F401 - -# launch omniverse app -app_launcher = AppLauncher(args_cli) -simulation_app = app_launcher.app - -"""Rest everything follows.""" - -import gymnasium as gym -import os -import time -import torch - -import isaaclab_tasks # noqa: F401 -import omni.log -from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent -from isaaclab.utils.assets import retrieve_file_path -from isaaclab.utils.dict import print_dict -from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx -from isaaclab_tasks.utils import get_checkpoint_path -from rsl_rl.runners import DistillationRunner, OnPolicyRunner - -from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg -from isaaclab_arena_environments.cli import get_arena_builder_from_cli - -# PLACEHOLDER: Extension template (do not remove this comment) - - -def main(): - """Play with RSL-RL agent.""" - # We dont use hydra for the environment configuration, so we need to parse it manually - # parse configuration - try: - arena_builder = get_arena_builder_from_cli(args_cli) - env_name, env_cfg = arena_builder.build_registered() - - except Exception as e: - omni.log.error(f"Failed to parse environment configuration: {e}") - exit(1) - - agent_cfg = get_agent_cfg(args_cli) - - # override configurations with non-hydra CLI arguments - agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) - env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs - - # set the environment seed - # note: certain randomizations occur in the environment initialization so we set the seed here - env_cfg.seed = agent_cfg.seed - env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device - - # specify directory for logging experiments - log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) - log_root_path = os.path.abspath(log_root_path) - print(f"[INFO] Loading experiment from directory: {log_root_path}") - if args_cli.checkpoint: - resume_path = retrieve_file_path(args_cli.checkpoint) - else: - resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) - - log_dir = os.path.dirname(resume_path) - - # set the log directory for the environment (works for all environment types) - env_cfg.log_dir = log_dir - - # create isaac environment - env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) - - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - - # wrap for video recording - if args_cli.video: - video_kwargs = { - "video_folder": os.path.join(log_dir, "videos", "play"), - "step_trigger": lambda step: step == 0, - "video_length": args_cli.video_length, - "disable_logger": True, - } - print("[INFO] Recording videos during training.") - print_dict(video_kwargs, nesting=4) - env = gym.wrappers.RecordVideo(env, **video_kwargs) - - # wrap around environment for rsl-rl - env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) - - print(f"[INFO]: Loading model checkpoint from: {resume_path}") - # load previously trained model - if agent_cfg.class_name == "OnPolicyRunner": - runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - elif agent_cfg.class_name == "DistillationRunner": - runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - else: - raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") - runner.load(resume_path) - - # obtain the trained policy for inference - policy = runner.get_inference_policy(device=env.unwrapped.device) - - # extract the neural network module - # we do this in a try-except to maintain backwards compatibility. - try: - # version 2.3 onwards - policy_nn = runner.alg.policy - except AttributeError: - # version 2.2 and below - policy_nn = runner.alg.actor_critic - - # extract the normalizer - if hasattr(policy_nn, "actor_obs_normalizer"): - normalizer = policy_nn.actor_obs_normalizer - elif hasattr(policy_nn, "student_obs_normalizer"): - normalizer = policy_nn.student_obs_normalizer - else: - normalizer = None - - # export policy to onnx/jit - export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") - export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") - export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") - - dt = env.unwrapped.step_dt - - # reset environment - obs = env.get_observations() - timestep = 0 - # simulate environment - while simulation_app.is_running(): - start_time = time.time() - # run everything in inference mode - with torch.inference_mode(): - # agent stepping - actions = policy(obs) - # env stepping - obs, _, _, _ = env.step(actions) - if args_cli.video: - timestep += 1 - # Exit the play loop after recording one video - if timestep == args_cli.video_length: - break - - # time delay for real-time evaluation - sleep_time = dt - (time.time() - start_time) - if args_cli.real_time and sleep_time > 0: - time.sleep(sleep_time) - - # close the simulator - env.close() - - -if __name__ == "__main__": - # run the main function - main() - # close sim app - simulation_app.close() diff --git a/isaaclab_arena/scripts/reinforcement_learning/train.py b/isaaclab_arena/scripts/reinforcement_learning/train.py deleted file mode 100644 index a7ad52391..000000000 --- a/isaaclab_arena/scripts/reinforcement_learning/train.py +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -"""Script to train RL agent with RSL-RL.""" - -"""Launch Isaac Sim Simulator first.""" - -from pathlib import Path - -from isaaclab.app import AppLauncher - -from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser -from isaaclab_arena_environments.cli import add_example_environments_cli_args - -# local imports -import cli_args # isort: skip - -# add argparse arguments -parser = get_isaaclab_arena_cli_parser() -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") -parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") -parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") -parser.add_argument( - "--agent_cfg_path", - type=Path, - default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"), - help="Path to the RL agent configuration file.", -) -parser.add_argument( - "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." -) -parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.") -# append RSL-RL cli arguments -cli_args.add_rsl_rl_args(parser) -cli_args.add_rsl_rl_policy_args(parser) -# Add the example environments CLI args -# NOTE(alexmillane, 2025.09.04): This has to be added last, because -# of the app specific flags being parsed after the global flags. -add_example_environments_cli_args(parser) -args_cli = parser.parse_args() - -# always enable cameras to record video -if args_cli.video: - args_cli.enable_cameras = True - -if args_cli.enable_pinocchio: - # Import pinocchio before AppLauncher to force the use of the version installed by IsaacLab and not the one installed by Isaac Sim - # pinocchio is required by the Pink IK controllers and the GR1T2 retargeter - import pinocchio # noqa: F401 - -# launch omniverse app -app_launcher = AppLauncher(args_cli) -simulation_app = app_launcher.app - -"""Check for minimum supported RSL-RL version.""" - -import importlib.metadata as metadata -import platform - -from packaging import version - -# check minimum supported rsl-rl version -RSL_RL_VERSION = "3.0.1" -installed_version = metadata.version("rsl-rl-lib") -if version.parse(installed_version) < version.parse(RSL_RL_VERSION): - if platform.system() == "Windows": - cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] - else: - cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] - print( - f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'" - f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:" - f"\n\n\t{' '.join(cmd)}\n" - ) - exit(1) - -"""Rest everything follows.""" - -import gymnasium as gym -import os -import torch -from datetime import datetime - -import isaaclab_tasks # noqa: F401 -import omni.log -from isaaclab.envs import DirectMARLEnv, ManagerBasedRLEnvCfg, multi_agent_to_single_agent -from isaaclab.utils.dict import print_dict -from isaaclab.utils.io import dump_yaml -from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper -from isaaclab_tasks.utils import get_checkpoint_path -from rsl_rl.runners import DistillationRunner, OnPolicyRunner - -from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg -from isaaclab_arena_environments.cli import get_arena_builder_from_cli - -# PLACEHOLDER: Extension template (do not remove this comment) - -torch.backends.cuda.matmul.allow_tf32 = True -torch.backends.cudnn.allow_tf32 = True -torch.backends.cudnn.deterministic = False -torch.backends.cudnn.benchmark = False - - -def main(): - # We dont use hydra for the environment configuration, so we need to parse it manually - # parse configuration - try: - arena_builder = get_arena_builder_from_cli(args_cli) - env_name, env_cfg = arena_builder.build_registered() - - except Exception as e: - omni.log.error(f"Failed to parse environment configuration: {e}") - exit(1) - - agent_cfg = get_agent_cfg(args_cli) - - # set the environment seed - # note: certain randomizations occur in the environment initialization so we set the seed here - env_cfg.seed = agent_cfg.seed - env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device - # check for invalid combination of CPU device with distributed training - if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device: - raise ValueError( - "Distributed training is not supported when using CPU device. " - "Please use GPU device (e.g., --device cuda) for distributed training." - ) - - # multi-gpu training configuration - if args_cli.distributed: - env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" - agent_cfg.device = f"cuda:{app_launcher.local_rank}" - - # set seed to have diversity in different threads - seed = agent_cfg.seed + app_launcher.local_rank - env_cfg.seed = seed - agent_cfg.seed = seed - - # specify directory for logging experiments - log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) - log_root_path = os.path.abspath(log_root_path) - print(f"[INFO] Logging experiment in directory: {log_root_path}") - # specify directory for logging runs: {time-stamp}_{run_name} - log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849) - print(f"Exact experiment name requested from command line: {log_dir}") - if agent_cfg.run_name: - log_dir += f"_{agent_cfg.run_name}" - log_dir = os.path.join(log_root_path, log_dir) - - # set the IO descriptors export flag if requested - if isinstance(env_cfg, ManagerBasedRLEnvCfg): - env_cfg.export_io_descriptors = args_cli.export_io_descriptors - else: - omni.log.warn( - "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported." - ) - - # set the log directory for the environment (works for all environment types) - env_cfg.log_dir = log_dir - - # create isaac environment - env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) - - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - - # save resume path before creating a new log_dir - if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": - resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) - - # wrap for video recording - if args_cli.video: - video_kwargs = { - "video_folder": os.path.join(log_dir, "videos", "train"), - "step_trigger": lambda step: step % args_cli.video_interval == 0, - "video_length": args_cli.video_length, - "disable_logger": True, - } - print("[INFO] Recording videos during training.") - print_dict(video_kwargs, nesting=4) - env = gym.wrappers.RecordVideo(env, **video_kwargs) - - # wrap around environment for rsl-rl - env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) - - # create runner from rsl-rl - if agent_cfg.class_name == "OnPolicyRunner": - runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) - elif agent_cfg.class_name == "DistillationRunner": - runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) - else: - raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") - # write git state to logs - runner.add_git_repo_to_log(__file__) - # load the checkpoint - if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": - print(f"[INFO]: Loading model checkpoint from: {resume_path}") - # load previously trained model - runner.load(resume_path) - - # dump the configuration into log-directory - dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) - dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) - - # run training - runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) - - # close the simulator - env.close() - - -if __name__ == "__main__": - # run the main function - main() - # close sim app - simulation_app.close() diff --git a/isaaclab_arena/utils/cameras.py b/isaaclab_arena/utils/cameras.py index fcfd7b786..4567bb7e0 100644 --- a/isaaclab_arena/utils/cameras.py +++ b/isaaclab_arena/utils/cameras.py @@ -117,6 +117,10 @@ def get_viewer_cfg_look_at_object(lookat_object: Asset, offset: np.ndarray) -> V if isinstance(initial_pose, PoseRange): initial_pose = initial_pose.get_midpoint() - lookat = initial_pose.position_xyz - camera_position = tuple(np.array(lookat) + offset) + # TODO(cvolk): Add float coercion to Pose.__post_init__ so this conversion is unnecessary. + # Ensure we only pass primitive Python floats (not NumPy scalars) into ViewerCfg, + # since downstream config systems like Hydra/OmegaConf don't support np.float64. + lookat = tuple(float(x) for x in initial_pose.position_xyz) + camera_vec = np.array(lookat, dtype=float) + np.array(offset, dtype=float) + camera_position = tuple(float(x) for x in camera_vec.tolist()) return ViewerCfg(eye=camera_position, lookat=lookat, origin_type="env") diff --git a/isaaclab_arena_environments/lift_object_environment.py b/isaaclab_arena_environments/lift_object_environment.py index 17857b678..69e768a12 100644 --- a/isaaclab_arena_environments/lift_object_environment.py +++ b/isaaclab_arena_environments/lift_object_environment.py @@ -19,7 +19,9 @@ class LiftObjectEnvironment(ExampleEnvironmentBase): name: str = "lift_object" def get_env(self, args_cli: argparse.Namespace): # -> IsaacLabArenaEnvironment: + import isaaclab_arena_examples.policy.base_rsl_rl_policy as base_rsl_rl_policy from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment + from isaaclab_arena.reinforcement_learning.frameworks import RLFramework from isaaclab_arena.scene.scene import Scene from isaaclab_arena.tasks.lift_object_task import LiftObjectTaskRL from isaaclab_arena.utils.pose import Pose @@ -64,6 +66,8 @@ def get_env(self, args_cli: argparse.Namespace): # -> IsaacLabArenaEnvironment: scene=scene, task=task, teleop_device=teleop_device, + rl_framework=RLFramework.RSL_RL, + rl_policy_cfg=f"{base_rsl_rl_policy.__name__}:RLPolicyCfg", ) return isaaclab_arena_environment diff --git a/isaaclab_arena/scripts/reinforcement_learning/__init__.py b/isaaclab_arena_examples/policy/__init__.py similarity index 100% rename from isaaclab_arena/scripts/reinforcement_learning/__init__.py rename to isaaclab_arena_examples/policy/__init__.py diff --git a/isaaclab_arena_examples/policy/base_rsl_rl_policy.py b/isaaclab_arena_examples/policy/base_rsl_rl_policy.py new file mode 100644 index 000000000..7ae23a428 --- /dev/null +++ b/isaaclab_arena_examples/policy/base_rsl_rl_policy.py @@ -0,0 +1,51 @@ +# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import field + +from isaaclab.utils import configclass +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class RLPolicyCfg(RslRlOnPolicyRunnerCfg): + """Default RSL-RL runner configuration for Arena environments. + + Used as the ``rsl_rl_cfg_entry_point`` when registering environments with gym, + allowing IsaacLab's ``train.py`` to load it via ``@hydra_task_config``. + """ + + num_steps_per_env: int = 24 + max_iterations: int = 4000 + save_interval: int = 200 + experiment_name: str = "generic_experiment" + obs_groups = field( + default_factory=lambda: { + "policy": ["policy"], + "critic": ["policy"], + } + ) + policy: RslRlPpoActorCriticCfg = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm: RslRlPpoAlgorithmCfg = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=0.0001, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/submodules/IsaacLab b/submodules/IsaacLab index 6acdd82a1..e7607ed15 160000 --- a/submodules/IsaacLab +++ b/submodules/IsaacLab @@ -1 +1 @@ -Subproject commit 6acdd82a1633732d32bb575e3d792e34fdeb437e +Subproject commit e7607ed155853a64f824302456cd5975cccf36ee