From 46a7551a3f1ea2e85c070ad80a22ee292631e01b Mon Sep 17 00:00:00 2001
From: alex <amillane@nvidia.com>
Date: Thu, 12 Feb 2026 15:20:24 +0100
Subject: [PATCH 01/14] Works but dirty.

---
 .../step_2_policy_training.rst                |  2 +-
 .../environments/arena_env_builder.py         | 15 +++-
 isaaclab_arena/evaluation/policy_runner.py    |  2 +-
 .../examples/compile_env_notebook.py          | 52 ++++++++++--
 isaaclab_arena/examples/training_interop.py   | 79 +++++++++++++++++++
 .../policy/rl_policy/base_rsl_rl_policy.py    | 26 +++++-
 isaaclab_arena/utils/cameras.py               | 10 ++-
 7 files changed, 172 insertions(+), 14 deletions(-)
 create mode 100644 isaaclab_arena/examples/training_interop.py

diff --git a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
index b28aefff4..7fa0e4704 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
@@ -2,7 +2,7 @@ Policy Training
 ---------------
 
 This workflow covers training an RL policy from scratch using RSL-RL's PPO implementation.
-The training is fully parallelized across hundreds of environments for sample-efficient learning.
+The training is fully parallelized across hundreds of environments for efficient learning.
 
 **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details)
 
diff --git a/isaaclab_arena/environments/arena_env_builder.py b/isaaclab_arena/environments/arena_env_builder.py
index f7b89dfac..c22350193 100644
--- a/isaaclab_arena/environments/arena_env_builder.py
+++ b/isaaclab_arena/environments/arena_env_builder.py
@@ -263,10 +263,23 @@ def build_registered(
         # THIS WILL BE REMOVED IN THE FUTURE.
         cfg_entry = self.modify_env_cfg(cfg_entry)
         entry_point = self.get_entry_point()
+
+        import isaaclab_arena.policy.rl_policy.base_rsl_rl_policy as base_rsl_rl_policy
+
+        policy_cfg_entry_point = f"{base_rsl_rl_policy.__name__}:RLPolicyCfg"
+
+        # import isaaclab_tasks.manager_based.manipulation.lift.config.franka.joint_pos_env_cfg as joint_pos_env_cfg
+        # import isaaclab_tasks.manager_based.manipulation.lift.config.franka.agents.rsl_rl_ppo_cfg as rsl_rl_ppo_cfg
+        # cfg_entry = f"{joint_pos_env_cfg.__name__}:FrankaCubeLiftEnvCfg"
+        # policy_cfg_entry_point = f"{rsl_rl_ppo_cfg.__name__}:LiftCubePPORunnerCfg"
+
         gym.register(
             id=name,
             entry_point=entry_point,
-            kwargs={"env_cfg_entry_point": cfg_entry},
+            kwargs={
+                "env_cfg_entry_point": cfg_entry,
+                "rsl_rl_cfg_entry_point": policy_cfg_entry_point,
+            },
             disable_env_checker=True,
         )
         cfg = parse_env_cfg(
diff --git a/isaaclab_arena/evaluation/policy_runner.py b/isaaclab_arena/evaluation/policy_runner.py
index 135529baf..e091ce4d1 100644
--- a/isaaclab_arena/evaluation/policy_runner.py
+++ b/isaaclab_arena/evaluation/policy_runner.py
@@ -112,7 +112,7 @@ def main():
     """Script to run an IsaacLab Arena environment with a zero-action agent."""
     args_parser = get_isaaclab_arena_cli_parser()
     # We do this as the parser is shared between the example environment and policy runner
-    args_cli, unknown = args_parser.parse_known_args()
+    # args_cli, unknown = args_parser.parse_known_args()
 
     # Start the simulation app
     with SimulationAppContext(args_cli):
diff --git a/isaaclab_arena/examples/compile_env_notebook.py b/isaaclab_arena/examples/compile_env_notebook.py
index 409cdc901..7ee00df23 100644
--- a/isaaclab_arena/examples/compile_env_notebook.py
+++ b/isaaclab_arena/examples/compile_env_notebook.py
@@ -15,11 +15,14 @@
 simulation_app = AppLauncher()
 
 from isaaclab_arena.assets.asset_registry import AssetRegistry
+from isaaclab_arena.assets.object_reference import ObjectReference
+from isaaclab_arena.assets.object_set import RigidObjectSet
 from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser
 from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder
 from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
 from isaaclab_arena.relations.relations import IsAnchor, On
 from isaaclab_arena.scene.scene import Scene
+from isaaclab_arena.tasks.pick_and_place_task import PickAndPlaceTask
 from isaaclab_arena.utils.pose import Pose
 
 asset_registry = AssetRegistry()
@@ -29,18 +32,53 @@
 cracker_box = asset_registry.get_asset_by_name("cracker_box")()
 tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
 
-cracker_box.set_initial_pose(Pose(position_xyz=(0.4, 0.0, 0.1), rotation_wxyz=(1.0, 0.0, 0.0, 0.0)))
-cracker_box.add_relation(IsAnchor())
-tomato_soup_can.add_relation(On(cracker_box))
+# cracker_box.set_initial_pose(Pose(position_xyz=(0.4, 0.0, 0.1), rotation_wxyz=(1.0, 0.0, 0.0, 0.0)))
+# cracker_box.add_relation(IsAnchor())
+# tomato_soup_can.add_relation(On(cracker_box))
 
-scene = Scene(assets=[background, cracker_box, tomato_soup_can])
+# object_set = RigidObjectSet(
+#     name="object_set",
+#     objects=[cracker_box, cracker_box],
+# )
+OBJECT_SET_1_PRIM_PATH = "/World/envs/env_.*/ObjectSet_1"
+# object_set = RigidObjectSet(
+#     name="single_object_set", objects=[cracker_box, cracker_box], prim_path=OBJECT_SET_1_PRIM_PATH
+# )
+
+# # scene = Scene(assets=[background, cracker_box, tomato_soup_can])
+# scene = Scene(assets=[background, object_set])
+# isaaclab_arena_environment = IsaacLabArenaEnvironment(
+#     name="reference_object_test",
+#     embodiment=embodiment,
+#     scene=scene,
+# )
+
+asset_registry = AssetRegistry()
+background = asset_registry.get_asset_by_name("kitchen")()
+embodiment = asset_registry.get_asset_by_name("franka")()
+cracker_box = asset_registry.get_asset_by_name("cracker_box")()
+destination_location = ObjectReference(
+    name="destination_location",
+    prim_path="{ENV_REGEX_NS}/kitchen/Cabinet_B_02",
+    parent_asset=background,
+)
+obj_set = RigidObjectSet(
+    name="single_object_set", objects=[cracker_box, tomato_soup_can], prim_path=OBJECT_SET_1_PRIM_PATH
+)
+obj_set.set_initial_pose(Pose(position_xyz=(0.1, 0.0, 0.1), rotation_wxyz=(1.0, 0.0, 0.0, 0.0)))
+scene = Scene(assets=[background, obj_set])
 isaaclab_arena_environment = IsaacLabArenaEnvironment(
-    name="reference_object_test",
+    name="single_object_set_test",
     embodiment=embodiment,
     scene=scene,
+    task=PickAndPlaceTask(
+        pick_up_object=obj_set, destination_location=destination_location, background_scene=background
+    ),
+    teleop_device=None,
 )
 
-args_cli = get_isaaclab_arena_cli_parser().parse_args([])
+
+args_cli = get_isaaclab_arena_cli_parser().parse_args(["--num_envs", "3"])
 args_cli.solve_relations = True
 env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args_cli)
 env = env_builder.make_registered()
@@ -49,7 +87,7 @@
 # %%
 
 # Run some zero actions.
-NUM_STEPS = 1000
+NUM_STEPS = 500
 for _ in tqdm.tqdm(range(NUM_STEPS)):
     with torch.inference_mode():
         actions = torch.zeros(env.action_space.shape, device=env.unwrapped.device)
diff --git a/isaaclab_arena/examples/training_interop.py b/isaaclab_arena/examples/training_interop.py
new file mode 100644
index 000000000..3a654acd9
--- /dev/null
+++ b/isaaclab_arena/examples/training_interop.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+
+# from isaaclab_arena_environments.lift_object_environment import LiftObjectEnvironment
+from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
+from isaaclab_arena_environments.cli import ExampleEnvironments
+
+
+# def add_environment_registration_args(parser: argparse.ArgumentParser) -> argparse.ArgumentGroup:
+# def get_environment(argv: list[str]) -> tuple[IsaacLabArenaEnvironment, list[str]]:
+def get_environment(
+    arena_environment_name: str, remaining_args: list[str]
+) -> tuple[IsaacLabArenaEnvironment, list[str]]:
+    # Get the environment class
+    environment = ExampleEnvironments[arena_environment_name]()
+    # Get arguments associated with this environment
+    parser = argparse.ArgumentParser()
+    environment.add_cli_args(parser)
+    # args, remaining_args = parser.parse_known_args(remaining_args)
+    args, remaining_args = parser.parse_known_args(remaining_args)
+    # Build the environment (from the args)
+    isaaclab_arena_environment = environment.get_env(args)
+    return isaaclab_arena_environment, remaining_args
+
+
+# def my_env_registration_callback(argv: list[str]) -> list[str]:
+def my_env_registration_callback() -> list[str]:
+    """Parse arena-specific CLI args, register env, and return remaining args.
+
+    This function is designed to be called from the main training script with
+    the list of arguments that have not yet been consumed. It parses only the
+    arguments it knows about and returns the leftover list for downstream use
+    (e.g. Hydra / other parsers).
+    """
+
+    from isaaclab.app import AppLauncher
+
+    from isaaclab_arena.cli.isaaclab_arena_cli import add_isaac_lab_cli_args, add_isaaclab_arena_cli_args
+    from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder
+
+    # print("Hello from my_env_registration_callback!")
+    # Build parser for arena-specific CLI args and parse only from the provided argv
+    # parser = get_isaaclab_arena_cli_parser()
+    # LiftObjectEnvironment.add_cli_args(parser)
+    # args, remaining_args = parser.parse_known_args(argv)
+    # # args, _ = get_isaaclab_arena_environments_cli_parser().parse_known_args()
+    # print(f"args: {args}")
+    # isaaclab_arena_environment = LiftObjectEnvironment().get_env(args)
+    # print(f"isaaclab_arena_environment: {isaaclab_arena_environment}")
+    # isaaclab_arena_environment, remaining_args = get_environment(argv)
+    parser = argparse.ArgumentParser()
+    # NOTE(alexmillane, 2026.02.12): With the Isaac Lab interop, we use the task name to
+    # determine the environment to register. The environment is also registered under this name.
+    # The result is that a single arugment tells Arena what to register, and Lab what to run.
+    parser.add_argument("--task", type=str, required=True, help="Name of the IsaacLab Arena environment to register.")
+    # Get the environment class
+    environment_name = parser.parse_known_args()[0].task
+    environment = ExampleEnvironments[environment_name]()
+    # Get the full list of arguments
+    AppLauncher.add_app_launcher_args(parser)
+    add_isaac_lab_cli_args(parser)
+    add_isaaclab_arena_cli_args(parser)
+    environment.add_cli_args(parser)
+    args, remaining_args = parser.parse_known_args()
+
+    # Get the environment
+    # isaaclab_arena_environment, remaining_args = get_environment(args.task, remaining_args)
+    isaaclab_arena_environment = environment.get_env(args)
+
+    # Build and register the environment (from the args)
+    env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args)
+    env_builder.build_registered()
+    # print(f"env_cfg: {env_cfg}")
+    # Return only the arguments that were not consumed by this callback
+    return remaining_args
diff --git a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
index 181c7fd2f..114454e69 100644
--- a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
+++ b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
@@ -39,8 +39,30 @@ class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
             "critic": ["policy"],
         }
     )
-    policy: RslRlPpoActorCriticCfg = field(default_factory=RslRlPpoActorCriticCfg)
-    algorithm: RslRlPpoAlgorithmCfg = field(default_factory=RslRlPpoAlgorithmCfg)
+    # policy: RslRlPpoActorCriticCfg = field(default_factory=RslRlPpoActorCriticCfg)
+    # algorithm: RslRlPpoAlgorithmCfg = field(default_factory=RslRlPpoAlgorithmCfg)
+    policy: RslRlPpoActorCriticCfg = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_obs_normalization=False,
+        critic_obs_normalization=False,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+    algorithm: RslRlPpoAlgorithmCfg = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.006,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=0.0001,
+        schedule="adaptive",
+        gamma=0.98,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
 
     @classmethod
     def update_cfg(
diff --git a/isaaclab_arena/utils/cameras.py b/isaaclab_arena/utils/cameras.py
index fcfd7b786..d92977729 100644
--- a/isaaclab_arena/utils/cameras.py
+++ b/isaaclab_arena/utils/cameras.py
@@ -117,6 +117,12 @@ def get_viewer_cfg_look_at_object(lookat_object: Asset, offset: np.ndarray) -> V
     if isinstance(initial_pose, PoseRange):
         initial_pose = initial_pose.get_midpoint()
 
-    lookat = initial_pose.position_xyz
-    camera_position = tuple(np.array(lookat) + offset)
+    # WHEN STUFF WORKS, LOOK INTO WHY WE"RE GETTING np.float64 in the first place.
+    # probably need a validation step in the Pose object.
+
+    # Ensure we only pass primitive Python floats (not NumPy scalars) into ViewerCfg,
+    # since downstream config systems like Hydra/OmegaConf don't support np.float64.
+    lookat = tuple(float(x) for x in initial_pose.position_xyz)
+    camera_vec = np.array(lookat, dtype=float) + np.array(offset, dtype=float)
+    camera_position = tuple(float(x) for x in camera_vec.tolist())
     return ViewerCfg(eye=camera_position, lookat=lookat, origin_type="env")

From 1a260a1bbde5ac51ff6cda44fc16b3a2ecbbef1d Mon Sep 17 00:00:00 2001
From: alex <amillane@nvidia.com>
Date: Thu, 12 Feb 2026 16:22:08 +0100
Subject: [PATCH 02/14] Get hard coding of the policy out of the compiler.

---
 .../environments/arena_env_builder.py         | 22 +++++++------------
 .../isaaclab_arena_environment.py             |  6 +++++
 .../lift_object_environment.py                |  4 ++++
 submodules/IsaacLab                           |  2 +-
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/isaaclab_arena/environments/arena_env_builder.py b/isaaclab_arena/environments/arena_env_builder.py
index c22350193..3c843ab08 100644
--- a/isaaclab_arena/environments/arena_env_builder.py
+++ b/isaaclab_arena/environments/arena_env_builder.py
@@ -263,23 +263,17 @@ def build_registered(
         # THIS WILL BE REMOVED IN THE FUTURE.
         cfg_entry = self.modify_env_cfg(cfg_entry)
         entry_point = self.get_entry_point()
-
-        import isaaclab_arena.policy.rl_policy.base_rsl_rl_policy as base_rsl_rl_policy
-
-        policy_cfg_entry_point = f"{base_rsl_rl_policy.__name__}:RLPolicyCfg"
-
-        # import isaaclab_tasks.manager_based.manipulation.lift.config.franka.joint_pos_env_cfg as joint_pos_env_cfg
-        # import isaaclab_tasks.manager_based.manipulation.lift.config.franka.agents.rsl_rl_ppo_cfg as rsl_rl_ppo_cfg
-        # cfg_entry = f"{joint_pos_env_cfg.__name__}:FrankaCubeLiftEnvCfg"
-        # policy_cfg_entry_point = f"{rsl_rl_ppo_cfg.__name__}:LiftCubePPORunnerCfg"
-
+        # Register the environment with the Gym registry.
+        kwargs = {
+            "env_cfg_entry_point": cfg_entry,
+        }
+        if self.arena_env.rl_framework is not None:
+            assert self.arena_env.rl_policy_cfg is not None
+            kwargs[self.arena_env.rl_framework.get_entry_point_string()] = self.arena_env.rl_policy_cfg
         gym.register(
             id=name,
             entry_point=entry_point,
-            kwargs={
-                "env_cfg_entry_point": cfg_entry,
-                "rsl_rl_cfg_entry_point": policy_cfg_entry_point,
-            },
+            kwargs=kwargs,
             disable_env_checker=True,
         )
         cfg = parse_env_cfg(
diff --git a/isaaclab_arena/environments/isaaclab_arena_environment.py b/isaaclab_arena/environments/isaaclab_arena_environment.py
index a3ddc315a..e778b9fe5 100644
--- a/isaaclab_arena/environments/isaaclab_arena_environment.py
+++ b/isaaclab_arena/environments/isaaclab_arena_environment.py
@@ -16,6 +16,8 @@
     from isaaclab_arena.scene.scene import Scene
     from isaaclab_arena.tasks.task_base import TaskBase
 
+from isaaclab_arena.reinforment_learning.frameworks import RLFramework
+
 
 class IsaacLabArenaEnvironment:
     """Describes an environment in IsaacLab Arena."""
@@ -29,6 +31,8 @@ def __init__(
         teleop_device: TeleopDeviceBase | None = None,
         orchestrator: OrchestratorBase | None = None,
         env_cfg_callback: Callable[IsaacLabArenaManagerBasedRLEnvCfg] | None = None,
+        rl_framework: RLFramework | None = None,
+        rl_policy_cfg: str | None = None,
     ):
         """
         Args:
@@ -47,3 +51,5 @@ def __init__(
         self.teleop_device = teleop_device
         self.orchestrator = orchestrator
         self.env_cfg_callback = env_cfg_callback
+        self.rl_framework = rl_framework
+        self.rl_policy_cfg = rl_policy_cfg
diff --git a/isaaclab_arena_environments/lift_object_environment.py b/isaaclab_arena_environments/lift_object_environment.py
index 17857b678..8f17a8918 100644
--- a/isaaclab_arena_environments/lift_object_environment.py
+++ b/isaaclab_arena_environments/lift_object_environment.py
@@ -19,7 +19,9 @@ class LiftObjectEnvironment(ExampleEnvironmentBase):
     name: str = "lift_object"
 
     def get_env(self, args_cli: argparse.Namespace):  # -> IsaacLabArenaEnvironment:
+        import isaaclab_arena.policy.rl_policy.base_rsl_rl_policy as base_rsl_rl_policy
         from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
+        from isaaclab_arena.reinforment_learning.frameworks import RLFramework
         from isaaclab_arena.scene.scene import Scene
         from isaaclab_arena.tasks.lift_object_task import LiftObjectTaskRL
         from isaaclab_arena.utils.pose import Pose
@@ -64,6 +66,8 @@ def get_env(self, args_cli: argparse.Namespace):  # -> IsaacLabArenaEnvironment:
             scene=scene,
             task=task,
             teleop_device=teleop_device,
+            rl_framework=RLFramework.RSL_RL,
+            rl_policy_cfg=f"{base_rsl_rl_policy.__name__}:RLPolicyCfg",
         )
 
         return isaaclab_arena_environment
diff --git a/submodules/IsaacLab b/submodules/IsaacLab
index 6acdd82a1..cafbfb890 160000
--- a/submodules/IsaacLab
+++ b/submodules/IsaacLab
@@ -1 +1 @@
-Subproject commit 6acdd82a1633732d32bb575e3d792e34fdeb437e
+Subproject commit cafbfb890f27255003d1a4913ea8dd65d5db278f

From fd4eca86d9b39053b10a2b511a080f746d9b3a4f Mon Sep 17 00:00:00 2001
From: alex <amillane@nvidia.com>
Date: Thu, 12 Feb 2026 16:25:41 +0100
Subject: [PATCH 03/14] Cleanup

---
 isaaclab_arena/evaluation/policy_runner.py    |  2 +-
 .../examples/compile_env_notebook.py          | 52 +++----------------
 2 files changed, 8 insertions(+), 46 deletions(-)

diff --git a/isaaclab_arena/evaluation/policy_runner.py b/isaaclab_arena/evaluation/policy_runner.py
index e091ce4d1..135529baf 100644
--- a/isaaclab_arena/evaluation/policy_runner.py
+++ b/isaaclab_arena/evaluation/policy_runner.py
@@ -112,7 +112,7 @@ def main():
     """Script to run an IsaacLab Arena environment with a zero-action agent."""
     args_parser = get_isaaclab_arena_cli_parser()
     # We do this as the parser is shared between the example environment and policy runner
-    # args_cli, unknown = args_parser.parse_known_args()
+    args_cli, unknown = args_parser.parse_known_args()
 
     # Start the simulation app
     with SimulationAppContext(args_cli):
diff --git a/isaaclab_arena/examples/compile_env_notebook.py b/isaaclab_arena/examples/compile_env_notebook.py
index 7ee00df23..409cdc901 100644
--- a/isaaclab_arena/examples/compile_env_notebook.py
+++ b/isaaclab_arena/examples/compile_env_notebook.py
@@ -15,14 +15,11 @@
 simulation_app = AppLauncher()
 
 from isaaclab_arena.assets.asset_registry import AssetRegistry
-from isaaclab_arena.assets.object_reference import ObjectReference
-from isaaclab_arena.assets.object_set import RigidObjectSet
 from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser
 from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder
 from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
 from isaaclab_arena.relations.relations import IsAnchor, On
 from isaaclab_arena.scene.scene import Scene
-from isaaclab_arena.tasks.pick_and_place_task import PickAndPlaceTask
 from isaaclab_arena.utils.pose import Pose
 
 asset_registry = AssetRegistry()
@@ -32,53 +29,18 @@
 cracker_box = asset_registry.get_asset_by_name("cracker_box")()
 tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
 
-# cracker_box.set_initial_pose(Pose(position_xyz=(0.4, 0.0, 0.1), rotation_wxyz=(1.0, 0.0, 0.0, 0.0)))
-# cracker_box.add_relation(IsAnchor())
-# tomato_soup_can.add_relation(On(cracker_box))
+cracker_box.set_initial_pose(Pose(position_xyz=(0.4, 0.0, 0.1), rotation_wxyz=(1.0, 0.0, 0.0, 0.0)))
+cracker_box.add_relation(IsAnchor())
+tomato_soup_can.add_relation(On(cracker_box))
 
-# object_set = RigidObjectSet(
-#     name="object_set",
-#     objects=[cracker_box, cracker_box],
-# )
-OBJECT_SET_1_PRIM_PATH = "/World/envs/env_.*/ObjectSet_1"
-# object_set = RigidObjectSet(
-#     name="single_object_set", objects=[cracker_box, cracker_box], prim_path=OBJECT_SET_1_PRIM_PATH
-# )
-
-# # scene = Scene(assets=[background, cracker_box, tomato_soup_can])
-# scene = Scene(assets=[background, object_set])
-# isaaclab_arena_environment = IsaacLabArenaEnvironment(
-#     name="reference_object_test",
-#     embodiment=embodiment,
-#     scene=scene,
-# )
-
-asset_registry = AssetRegistry()
-background = asset_registry.get_asset_by_name("kitchen")()
-embodiment = asset_registry.get_asset_by_name("franka")()
-cracker_box = asset_registry.get_asset_by_name("cracker_box")()
-destination_location = ObjectReference(
-    name="destination_location",
-    prim_path="{ENV_REGEX_NS}/kitchen/Cabinet_B_02",
-    parent_asset=background,
-)
-obj_set = RigidObjectSet(
-    name="single_object_set", objects=[cracker_box, tomato_soup_can], prim_path=OBJECT_SET_1_PRIM_PATH
-)
-obj_set.set_initial_pose(Pose(position_xyz=(0.1, 0.0, 0.1), rotation_wxyz=(1.0, 0.0, 0.0, 0.0)))
-scene = Scene(assets=[background, obj_set])
+scene = Scene(assets=[background, cracker_box, tomato_soup_can])
 isaaclab_arena_environment = IsaacLabArenaEnvironment(
-    name="single_object_set_test",
+    name="reference_object_test",
     embodiment=embodiment,
     scene=scene,
-    task=PickAndPlaceTask(
-        pick_up_object=obj_set, destination_location=destination_location, background_scene=background
-    ),
-    teleop_device=None,
 )
 
-
-args_cli = get_isaaclab_arena_cli_parser().parse_args(["--num_envs", "3"])
+args_cli = get_isaaclab_arena_cli_parser().parse_args([])
 args_cli.solve_relations = True
 env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args_cli)
 env = env_builder.make_registered()
@@ -87,7 +49,7 @@
 # %%
 
 # Run some zero actions.
-NUM_STEPS = 500
+NUM_STEPS = 1000
 for _ in tqdm.tqdm(range(NUM_STEPS)):
     with torch.inference_mode():
         actions = torch.zeros(env.action_space.shape, device=env.unwrapped.device)

From 4f7fe027847bf74762f0ff76353c3d13cee86ae1 Mon Sep 17 00:00:00 2001
From: alex <amillane@nvidia.com>
Date: Thu, 12 Feb 2026 16:41:25 +0100
Subject: [PATCH 04/14] Move environment callback somewhere more sensible.

---
 .../isaaclab_arena_environment.py             |  3 +-
 .../environments/isaaclab_interop.py          | 51 ++++++++++++
 isaaclab_arena/examples/training_interop.py   | 79 -------------------
 .../reinforcement_learning/frameworks.py      | 17 ++++
 .../lift_object_environment.py                |  2 +-
 5 files changed, 70 insertions(+), 82 deletions(-)
 create mode 100644 isaaclab_arena/environments/isaaclab_interop.py
 delete mode 100644 isaaclab_arena/examples/training_interop.py
 create mode 100644 isaaclab_arena/reinforcement_learning/frameworks.py

diff --git a/isaaclab_arena/environments/isaaclab_arena_environment.py b/isaaclab_arena/environments/isaaclab_arena_environment.py
index e778b9fe5..9eb603777 100644
--- a/isaaclab_arena/environments/isaaclab_arena_environment.py
+++ b/isaaclab_arena/environments/isaaclab_arena_environment.py
@@ -13,11 +13,10 @@
     from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase
     from isaaclab_arena.environments.isaaclab_arena_manager_based_env import IsaacLabArenaManagerBasedRLEnvCfg
     from isaaclab_arena.orchestrator.orchestrator_base import OrchestratorBase
+    from isaaclab_arena.reinforcement_learning.frameworks import RLFramework
     from isaaclab_arena.scene.scene import Scene
     from isaaclab_arena.tasks.task_base import TaskBase
 
-from isaaclab_arena.reinforment_learning.frameworks import RLFramework
-
 
 class IsaacLabArenaEnvironment:
     """Describes an environment in IsaacLab Arena."""
diff --git a/isaaclab_arena/environments/isaaclab_interop.py b/isaaclab_arena/environments/isaaclab_interop.py
new file mode 100644
index 000000000..6fd924fe3
--- /dev/null
+++ b/isaaclab_arena/environments/isaaclab_interop.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+
+from isaaclab_arena_environments.cli import ExampleEnvironments
+
+
+def environment_registration_callback() -> list[str]:
+    """This function is for use with Isaac Lab scripts to register an IsaacLab Arena environment.
+
+    This function is passed to an Isaac Lab script as an external callback function. Example:
+
+    python IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py
+        --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback
+        --task lift_object
+        --num_envs 512
+
+    In this case the "lift_object" environment is registered with Isaac Lab before
+    running the RSL RL training script. The training script will then run the
+    training for the lift_object environment.
+
+    """
+    from isaaclab.app import AppLauncher
+
+    from isaaclab_arena.cli.isaaclab_arena_cli import add_isaac_lab_cli_args, add_isaaclab_arena_cli_args
+    from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder
+
+    # Get the requested environment from the CLI.
+    parser = argparse.ArgumentParser()
+    # NOTE(alexmillane, 2026.02.12): With the Isaac Lab interop, we use the task name to
+    # determine the environment to register. The environment is also registered under this name.
+    # The result is that a single argument tells Arena what to register, and Lab what to run.
+    parser.add_argument("--task", type=str, required=True, help="Name of the IsaacLab Arena environment to register.")
+    environment_name = parser.parse_known_args()[0].task
+    environment = ExampleEnvironments[environment_name]()
+    # Get the full list of environment-specific CLI args.
+    AppLauncher.add_app_launcher_args(parser)
+    add_isaac_lab_cli_args(parser)
+    add_isaaclab_arena_cli_args(parser)
+    environment.add_cli_args(parser)
+    args, remaining_args = parser.parse_known_args()
+    # Create the environment config
+    isaaclab_arena_environment = environment.get_env(args)
+    # Build and register the environment
+    env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args)
+    env_builder.build_registered()
+    # Return the arguments that were not consumed by this callback
+    return remaining_args
diff --git a/isaaclab_arena/examples/training_interop.py b/isaaclab_arena/examples/training_interop.py
deleted file mode 100644
index 3a654acd9..000000000
--- a/isaaclab_arena/examples/training_interop.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import argparse
-
-# from isaaclab_arena_environments.lift_object_environment import LiftObjectEnvironment
-from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
-from isaaclab_arena_environments.cli import ExampleEnvironments
-
-
-# def add_environment_registration_args(parser: argparse.ArgumentParser) -> argparse.ArgumentGroup:
-# def get_environment(argv: list[str]) -> tuple[IsaacLabArenaEnvironment, list[str]]:
-def get_environment(
-    arena_environment_name: str, remaining_args: list[str]
-) -> tuple[IsaacLabArenaEnvironment, list[str]]:
-    # Get the environment class
-    environment = ExampleEnvironments[arena_environment_name]()
-    # Get arguments associated with this environment
-    parser = argparse.ArgumentParser()
-    environment.add_cli_args(parser)
-    # args, remaining_args = parser.parse_known_args(remaining_args)
-    args, remaining_args = parser.parse_known_args(remaining_args)
-    # Build the environment (from the args)
-    isaaclab_arena_environment = environment.get_env(args)
-    return isaaclab_arena_environment, remaining_args
-
-
-# def my_env_registration_callback(argv: list[str]) -> list[str]:
-def my_env_registration_callback() -> list[str]:
-    """Parse arena-specific CLI args, register env, and return remaining args.
-
-    This function is designed to be called from the main training script with
-    the list of arguments that have not yet been consumed. It parses only the
-    arguments it knows about and returns the leftover list for downstream use
-    (e.g. Hydra / other parsers).
-    """
-
-    from isaaclab.app import AppLauncher
-
-    from isaaclab_arena.cli.isaaclab_arena_cli import add_isaac_lab_cli_args, add_isaaclab_arena_cli_args
-    from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder
-
-    # print("Hello from my_env_registration_callback!")
-    # Build parser for arena-specific CLI args and parse only from the provided argv
-    # parser = get_isaaclab_arena_cli_parser()
-    # LiftObjectEnvironment.add_cli_args(parser)
-    # args, remaining_args = parser.parse_known_args(argv)
-    # # args, _ = get_isaaclab_arena_environments_cli_parser().parse_known_args()
-    # print(f"args: {args}")
-    # isaaclab_arena_environment = LiftObjectEnvironment().get_env(args)
-    # print(f"isaaclab_arena_environment: {isaaclab_arena_environment}")
-    # isaaclab_arena_environment, remaining_args = get_environment(argv)
-    parser = argparse.ArgumentParser()
-    # NOTE(alexmillane, 2026.02.12): With the Isaac Lab interop, we use the task name to
-    # determine the environment to register. The environment is also registered under this name.
-    # The result is that a single arugment tells Arena what to register, and Lab what to run.
-    parser.add_argument("--task", type=str, required=True, help="Name of the IsaacLab Arena environment to register.")
-    # Get the environment class
-    environment_name = parser.parse_known_args()[0].task
-    environment = ExampleEnvironments[environment_name]()
-    # Get the full list of arguments
-    AppLauncher.add_app_launcher_args(parser)
-    add_isaac_lab_cli_args(parser)
-    add_isaaclab_arena_cli_args(parser)
-    environment.add_cli_args(parser)
-    args, remaining_args = parser.parse_known_args()
-
-    # Get the environment
-    # isaaclab_arena_environment, remaining_args = get_environment(args.task, remaining_args)
-    isaaclab_arena_environment = environment.get_env(args)
-
-    # Build and register the environment (from the args)
-    env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args)
-    env_builder.build_registered()
-    # print(f"env_cfg: {env_cfg}")
-    # Return only the arguments that were not consumed by this callback
-    return remaining_args
diff --git a/isaaclab_arena/reinforcement_learning/frameworks.py b/isaaclab_arena/reinforcement_learning/frameworks.py
new file mode 100644
index 000000000..7d4aaa08f
--- /dev/null
+++ b/isaaclab_arena/reinforcement_learning/frameworks.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+from enum import Enum
+
+
+class RLFramework(Enum):
+    RSL_RL = "rsl_rl"
+    SKRL = "skrl"
+    RL_GAMES = "rl_games"
+    SB3 = "sb3"
+
+    def get_entry_point_string(self) -> str:
+        return f"{self.value}_cfg_entry_point"
diff --git a/isaaclab_arena_environments/lift_object_environment.py b/isaaclab_arena_environments/lift_object_environment.py
index 8f17a8918..9fcc4f789 100644
--- a/isaaclab_arena_environments/lift_object_environment.py
+++ b/isaaclab_arena_environments/lift_object_environment.py
@@ -21,7 +21,7 @@ class LiftObjectEnvironment(ExampleEnvironmentBase):
     def get_env(self, args_cli: argparse.Namespace):  # -> IsaacLabArenaEnvironment:
         import isaaclab_arena.policy.rl_policy.base_rsl_rl_policy as base_rsl_rl_policy
         from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
-        from isaaclab_arena.reinforment_learning.frameworks import RLFramework
+        from isaaclab_arena.reinforcement_learning.frameworks import RLFramework
         from isaaclab_arena.scene.scene import Scene
         from isaaclab_arena.tasks.lift_object_task import LiftObjectTaskRL
         from isaaclab_arena.utils.pose import Pose

From 16738cc079a88f8deec7db3adadead20152c053c Mon Sep 17 00:00:00 2001
From: alex <amillane@nvidia.com>
Date: Fri, 13 Feb 2026 10:36:43 +0100
Subject: [PATCH 05/14] Remove RL scripts.

---
 .../examples/rigid_object_variant.py          |  42 ++++
 .../policy/rl_policy/base_rsl_rl_policy.py    |   2 -
 .../reinforcement_learning/__init__.py        |   4 -
 .../reinforcement_learning/cli_args.py        | 106 ---------
 .../scripts/reinforcement_learning/play.py    | 202 ----------------
 .../scripts/reinforcement_learning/train.py   | 224 ------------------
 6 files changed, 42 insertions(+), 538 deletions(-)
 create mode 100644 isaaclab_arena/examples/rigid_object_variant.py
 delete mode 100644 isaaclab_arena/scripts/reinforcement_learning/__init__.py
 delete mode 100644 isaaclab_arena/scripts/reinforcement_learning/cli_args.py
 delete mode 100644 isaaclab_arena/scripts/reinforcement_learning/play.py
 delete mode 100644 isaaclab_arena/scripts/reinforcement_learning/train.py

diff --git a/isaaclab_arena/examples/rigid_object_variant.py b/isaaclab_arena/examples/rigid_object_variant.py
new file mode 100644
index 000000000..37b59ae2a
--- /dev/null
+++ b/isaaclab_arena/examples/rigid_object_variant.py
@@ -0,0 +1,42 @@
+
+rigid_object_variant_cfg = RigidObjectVariantCfg(
+    assets={
+        "box": RigidObjectCfg(
+            name="box",
+            spawn=UsdFileCfg(
+                usd_path="path/to/box.usd",
+            ),
+            scale=(1.0, 1.0, 1.0),
+            initial_pose=Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+        ),
+        "sphere": RigidObjectCfg(
+            name="sphere",
+            spawn=UsdFileCfg(
+                usd_path="path/to/sphere.usd",
+            ),
+            scale=(2.0, 2.0, 2.0),
+            initial_pose=Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+        ),
+    }
+}
+
+
+
+cracker_box = asset_registry.get_asset_by_name("cracker_box")()
+tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
+object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
+object_set.set_initial_pose(Pose(position=(0.0, 0.0, 0.0), orientation=(1.0, 0.0, 0.0, 0.0)))
+
+
+
+
+cracker_box = asset_registry.get_asset_by_name("cracker_box")()
+tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
+object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
+object_set.set_initial_pose(
+    PoseVariant(poses={
+        cracker_box: Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+        tomato_soup_can: Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
+    })
+)
+
diff --git a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
index 114454e69..90d2ff93d 100644
--- a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
+++ b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
@@ -39,8 +39,6 @@ class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
             "critic": ["policy"],
         }
     )
-    # policy: RslRlPpoActorCriticCfg = field(default_factory=RslRlPpoActorCriticCfg)
-    # algorithm: RslRlPpoAlgorithmCfg = field(default_factory=RslRlPpoAlgorithmCfg)
     policy: RslRlPpoActorCriticCfg = RslRlPpoActorCriticCfg(
         init_noise_std=1.0,
         actor_obs_normalization=False,
diff --git a/isaaclab_arena/scripts/reinforcement_learning/__init__.py b/isaaclab_arena/scripts/reinforcement_learning/__init__.py
deleted file mode 100644
index fee3a6a9f..000000000
--- a/isaaclab_arena/scripts/reinforcement_learning/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
diff --git a/isaaclab_arena/scripts/reinforcement_learning/cli_args.py b/isaaclab_arena/scripts/reinforcement_learning/cli_args.py
deleted file mode 100644
index 8148c1226..000000000
--- a/isaaclab_arena/scripts/reinforcement_learning/cli_args.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from __future__ import annotations
-
-import argparse
-import random
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg
-
-
-def add_rsl_rl_args(parser: argparse.ArgumentParser):
-    """Add RSL-RL arguments to the parser.
-
-    Args:
-        parser: The parser to add the arguments to.
-    """
-    # create a new argument group
-    arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.")
-    arg_group.add_argument("--run_name", type=str, default=None, help="Run name suffix to the log directory.")
-    # -- load arguments
-    arg_group.add_argument("--resume", action="store_true", default=False, help="Whether to resume from a checkpoint.")
-    arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.")
-    arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.")
-    # -- logger arguments
-    arg_group.add_argument(
-        "--logger", type=str, default=None, choices={"wandb", "tensorboard", "neptune"}, help="Logger module to use."
-    )
-    arg_group.add_argument(
-        "--log_project_name", type=str, default=None, help="Name of the logging project when using wandb or neptune."
-    )
-
-
-def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg:
-    """Parse configuration for RSL-RL agent based on inputs.
-
-    Args:
-        task_name: The name of the environment.
-        args_cli: The command line arguments.
-
-    Returns:
-        The parsed configuration for RSL-RL agent based on inputs.
-    """
-    from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry
-
-    # load the default configuration
-    rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point")
-    rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli)
-    return rslrl_cfg
-
-
-def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace):
-    """Update configuration for RSL-RL agent based on inputs.
-
-    Args:
-        agent_cfg: The configuration for RSL-RL agent.
-        args_cli: The command line arguments.
-
-    Returns:
-        The updated configuration for RSL-RL agent based on inputs.
-    """
-    # override the default configuration with CLI arguments
-    if hasattr(args_cli, "seed") and args_cli.seed is not None:
-        # randomly sample a seed if seed = -1
-        if args_cli.seed == -1:
-            args_cli.seed = random.randint(0, 10000)
-        agent_cfg.seed = args_cli.seed
-    if args_cli.resume is not None:
-        agent_cfg.resume = args_cli.resume
-    if args_cli.load_run is not None:
-        agent_cfg.load_run = args_cli.load_run
-    if args_cli.checkpoint is not None:
-        agent_cfg.load_checkpoint = args_cli.checkpoint
-    if args_cli.run_name is not None:
-        agent_cfg.run_name = args_cli.run_name
-    if args_cli.logger is not None:
-        agent_cfg.logger = args_cli.logger
-    # set the project name for wandb and neptune
-    if agent_cfg.logger in {"wandb", "neptune"} and args_cli.log_project_name:
-        agent_cfg.wandb_project = args_cli.log_project_name
-        agent_cfg.neptune_project = args_cli.log_project_name
-
-    return agent_cfg
-
-
-def add_rsl_rl_policy_args(parser: argparse.ArgumentParser):
-    """Add RSL-RL policy arguments to the parser.
-
-    Args:
-        parser: The parser to add the arguments to.
-    """
-    arg_group = parser.add_argument_group("rsl_rl_policy", description="Arguments for RSL-RL policy.")
-    arg_group.add_argument("--num_steps_per_env", type=int, default=24, help="Number of steps per environment.")
-    arg_group.add_argument("--max_iterations", type=int, default=4000, help="Maximum number of iterations.")
-    arg_group.add_argument("--save_interval", type=int, default=200, help="Save interval.")
-    arg_group.add_argument(
-        "--experiment_name",
-        type=str,
-        default="generic_experiment",
-        help="Name of the experiment folder where logs will be stored.",
-    )
-    return arg_group
diff --git a/isaaclab_arena/scripts/reinforcement_learning/play.py b/isaaclab_arena/scripts/reinforcement_learning/play.py
deleted file mode 100644
index e8edb3196..000000000
--- a/isaaclab_arena/scripts/reinforcement_learning/play.py
+++ /dev/null
@@ -1,202 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Script to play a checkpoint if an RL agent from RSL-RL."""
-
-"""Launch Isaac Sim Simulator first."""
-
-from pathlib import Path
-
-from isaaclab.app import AppLauncher
-
-from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser
-from isaaclab_arena_environments.cli import add_example_environments_cli_args
-
-# local imports
-import cli_args  # isort: skip
-
-# add argparse arguments
-parser = get_isaaclab_arena_cli_parser()
-parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
-parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
-parser.add_argument(
-    "--agent_cfg_path",
-    type=Path,
-    default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"),
-    help="Path to the RL agent configuration file.",
-)
-parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
-# append RSL-RL cli arguments
-cli_args.add_rsl_rl_args(parser)
-cli_args.add_rsl_rl_policy_args(parser)
-# Add the example environments CLI args
-# NOTE(alexmillane, 2025.09.04): This has to be added last, because
-# of the app specific flags being parsed after the global flags.
-add_example_environments_cli_args(parser)
-args_cli = parser.parse_args()
-
-# always enable cameras to record video
-if args_cli.video:
-    args_cli.enable_cameras = True
-
-if args_cli.enable_pinocchio:
-    # Import pinocchio before AppLauncher to force the use of the version installed by IsaacLab and not the one installed by Isaac Sim
-    # pinocchio is required by the Pink IK controllers and the GR1T2 retargeter
-    import pinocchio  # noqa: F401
-
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-"""Rest everything follows."""
-
-import gymnasium as gym
-import os
-import time
-import torch
-
-import isaaclab_tasks  # noqa: F401
-import omni.log
-from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
-from isaaclab.utils.assets import retrieve_file_path
-from isaaclab.utils.dict import print_dict
-from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
-from isaaclab_tasks.utils import get_checkpoint_path
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
-
-from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg
-from isaaclab_arena_environments.cli import get_arena_builder_from_cli
-
-# PLACEHOLDER: Extension template (do not remove this comment)
-
-
-def main():
-    """Play with RSL-RL agent."""
-    # We dont use hydra for the environment configuration, so we need to parse it manually
-    # parse configuration
-    try:
-        arena_builder = get_arena_builder_from_cli(args_cli)
-        env_name, env_cfg = arena_builder.build_registered()
-
-    except Exception as e:
-        omni.log.error(f"Failed to parse environment configuration: {e}")
-        exit(1)
-
-    agent_cfg = get_agent_cfg(args_cli)
-
-    # override configurations with non-hydra CLI arguments
-    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
-    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
-
-    # set the environment seed
-    # note: certain randomizations occur in the environment initialization so we set the seed here
-    env_cfg.seed = agent_cfg.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-
-    # specify directory for logging experiments
-    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
-    log_root_path = os.path.abspath(log_root_path)
-    print(f"[INFO] Loading experiment from directory: {log_root_path}")
-    if args_cli.checkpoint:
-        resume_path = retrieve_file_path(args_cli.checkpoint)
-    else:
-        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
-
-    log_dir = os.path.dirname(resume_path)
-
-    # set the log directory for the environment (works for all environment types)
-    env_cfg.log_dir = log_dir
-
-    # create isaac environment
-    env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
-
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
-    # wrap for video recording
-    if args_cli.video:
-        video_kwargs = {
-            "video_folder": os.path.join(log_dir, "videos", "play"),
-            "step_trigger": lambda step: step == 0,
-            "video_length": args_cli.video_length,
-            "disable_logger": True,
-        }
-        print("[INFO] Recording videos during training.")
-        print_dict(video_kwargs, nesting=4)
-        env = gym.wrappers.RecordVideo(env, **video_kwargs)
-
-    # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
-
-    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
-    # load previously trained model
-    if agent_cfg.class_name == "OnPolicyRunner":
-        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    elif agent_cfg.class_name == "DistillationRunner":
-        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    else:
-        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
-    runner.load(resume_path)
-
-    # obtain the trained policy for inference
-    policy = runner.get_inference_policy(device=env.unwrapped.device)
-
-    # extract the neural network module
-    # we do this in a try-except to maintain backwards compatibility.
-    try:
-        # version 2.3 onwards
-        policy_nn = runner.alg.policy
-    except AttributeError:
-        # version 2.2 and below
-        policy_nn = runner.alg.actor_critic
-
-    # extract the normalizer
-    if hasattr(policy_nn, "actor_obs_normalizer"):
-        normalizer = policy_nn.actor_obs_normalizer
-    elif hasattr(policy_nn, "student_obs_normalizer"):
-        normalizer = policy_nn.student_obs_normalizer
-    else:
-        normalizer = None
-
-    # export policy to onnx/jit
-    export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
-    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
-    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
-
-    dt = env.unwrapped.step_dt
-
-    # reset environment
-    obs = env.get_observations()
-    timestep = 0
-    # simulate environment
-    while simulation_app.is_running():
-        start_time = time.time()
-        # run everything in inference mode
-        with torch.inference_mode():
-            # agent stepping
-            actions = policy(obs)
-            # env stepping
-            obs, _, _, _ = env.step(actions)
-        if args_cli.video:
-            timestep += 1
-            # Exit the play loop after recording one video
-            if timestep == args_cli.video_length:
-                break
-
-        # time delay for real-time evaluation
-        sleep_time = dt - (time.time() - start_time)
-        if args_cli.real_time and sleep_time > 0:
-            time.sleep(sleep_time)
-
-    # close the simulator
-    env.close()
-
-
-if __name__ == "__main__":
-    # run the main function
-    main()
-    # close sim app
-    simulation_app.close()
diff --git a/isaaclab_arena/scripts/reinforcement_learning/train.py b/isaaclab_arena/scripts/reinforcement_learning/train.py
deleted file mode 100644
index a7ad52391..000000000
--- a/isaaclab_arena/scripts/reinforcement_learning/train.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-"""Script to train RL agent with RSL-RL."""
-
-"""Launch Isaac Sim Simulator first."""
-
-from pathlib import Path
-
-from isaaclab.app import AppLauncher
-
-from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser
-from isaaclab_arena_environments.cli import add_example_environments_cli_args
-
-# local imports
-import cli_args  # isort: skip
-
-# add argparse arguments
-parser = get_isaaclab_arena_cli_parser()
-parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
-parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
-parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
-parser.add_argument(
-    "--agent_cfg_path",
-    type=Path,
-    default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"),
-    help="Path to the RL agent configuration file.",
-)
-parser.add_argument(
-    "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
-)
-parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
-# append RSL-RL cli arguments
-cli_args.add_rsl_rl_args(parser)
-cli_args.add_rsl_rl_policy_args(parser)
-# Add the example environments CLI args
-# NOTE(alexmillane, 2025.09.04): This has to be added last, because
-# of the app specific flags being parsed after the global flags.
-add_example_environments_cli_args(parser)
-args_cli = parser.parse_args()
-
-# always enable cameras to record video
-if args_cli.video:
-    args_cli.enable_cameras = True
-
-if args_cli.enable_pinocchio:
-    # Import pinocchio before AppLauncher to force the use of the version installed by IsaacLab and not the one installed by Isaac Sim
-    # pinocchio is required by the Pink IK controllers and the GR1T2 retargeter
-    import pinocchio  # noqa: F401
-
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-"""Check for minimum supported RSL-RL version."""
-
-import importlib.metadata as metadata
-import platform
-
-from packaging import version
-
-# check minimum supported rsl-rl version
-RSL_RL_VERSION = "3.0.1"
-installed_version = metadata.version("rsl-rl-lib")
-if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
-    if platform.system() == "Windows":
-        cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
-    else:
-        cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
-    print(
-        f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'"
-        f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:"
-        f"\n\n\t{' '.join(cmd)}\n"
-    )
-    exit(1)
-
-"""Rest everything follows."""
-
-import gymnasium as gym
-import os
-import torch
-from datetime import datetime
-
-import isaaclab_tasks  # noqa: F401
-import omni.log
-from isaaclab.envs import DirectMARLEnv, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
-from isaaclab.utils.dict import print_dict
-from isaaclab.utils.io import dump_yaml
-from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
-from isaaclab_tasks.utils import get_checkpoint_path
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
-
-from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg
-from isaaclab_arena_environments.cli import get_arena_builder_from_cli
-
-# PLACEHOLDER: Extension template (do not remove this comment)
-
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.backends.cudnn.allow_tf32 = True
-torch.backends.cudnn.deterministic = False
-torch.backends.cudnn.benchmark = False
-
-
-def main():
-    # We dont use hydra for the environment configuration, so we need to parse it manually
-    # parse configuration
-    try:
-        arena_builder = get_arena_builder_from_cli(args_cli)
-        env_name, env_cfg = arena_builder.build_registered()
-
-    except Exception as e:
-        omni.log.error(f"Failed to parse environment configuration: {e}")
-        exit(1)
-
-    agent_cfg = get_agent_cfg(args_cli)
-
-    # set the environment seed
-    # note: certain randomizations occur in the environment initialization so we set the seed here
-    env_cfg.seed = agent_cfg.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-    # check for invalid combination of CPU device with distributed training
-    if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device:
-        raise ValueError(
-            "Distributed training is not supported when using CPU device. "
-            "Please use GPU device (e.g., --device cuda) for distributed training."
-        )
-
-    # multi-gpu training configuration
-    if args_cli.distributed:
-        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
-        agent_cfg.device = f"cuda:{app_launcher.local_rank}"
-
-        # set seed to have diversity in different threads
-        seed = agent_cfg.seed + app_launcher.local_rank
-        env_cfg.seed = seed
-        agent_cfg.seed = seed
-
-    # specify directory for logging experiments
-    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
-    log_root_path = os.path.abspath(log_root_path)
-    print(f"[INFO] Logging experiment in directory: {log_root_path}")
-    # specify directory for logging runs: {time-stamp}_{run_name}
-    log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849)
-    print(f"Exact experiment name requested from command line: {log_dir}")
-    if agent_cfg.run_name:
-        log_dir += f"_{agent_cfg.run_name}"
-    log_dir = os.path.join(log_root_path, log_dir)
-
-    # set the IO descriptors export flag if requested
-    if isinstance(env_cfg, ManagerBasedRLEnvCfg):
-        env_cfg.export_io_descriptors = args_cli.export_io_descriptors
-    else:
-        omni.log.warn(
-            "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported."
-        )
-
-    # set the log directory for the environment (works for all environment types)
-    env_cfg.log_dir = log_dir
-
-    # create isaac environment
-    env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
-
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
-    # save resume path before creating a new log_dir
-    if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
-        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
-
-    # wrap for video recording
-    if args_cli.video:
-        video_kwargs = {
-            "video_folder": os.path.join(log_dir, "videos", "train"),
-            "step_trigger": lambda step: step % args_cli.video_interval == 0,
-            "video_length": args_cli.video_length,
-            "disable_logger": True,
-        }
-        print("[INFO] Recording videos during training.")
-        print_dict(video_kwargs, nesting=4)
-        env = gym.wrappers.RecordVideo(env, **video_kwargs)
-
-    # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
-
-    # create runner from rsl-rl
-    if agent_cfg.class_name == "OnPolicyRunner":
-        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
-    elif agent_cfg.class_name == "DistillationRunner":
-        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
-    else:
-        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
-    # write git state to logs
-    runner.add_git_repo_to_log(__file__)
-    # load the checkpoint
-    if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
-        print(f"[INFO]: Loading model checkpoint from: {resume_path}")
-        # load previously trained model
-        runner.load(resume_path)
-
-    # dump the configuration into log-directory
-    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
-    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
-
-    # run training
-    runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
-
-    # close the simulator
-    env.close()
-
-
-if __name__ == "__main__":
-    # run the main function
-    main()
-    # close sim app
-    simulation_app.close()

From 74fc4c1e682a87797765782f971ff71ce8fdc57b Mon Sep 17 00:00:00 2001
From: alex <amillane@nvidia.com>
Date: Fri, 13 Feb 2026 10:50:50 +0100
Subject: [PATCH 06/14] Remove bug to get working.

---
 isaaclab_arena/examples/rigid_object_variant.py | 5 ++++-
 isaaclab_arena/policy/rsl_rl_action_policy.py   | 3 ++-
 submodules/IsaacLab                             | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/isaaclab_arena/examples/rigid_object_variant.py b/isaaclab_arena/examples/rigid_object_variant.py
index 37b59ae2a..e7a4aad40 100644
--- a/isaaclab_arena/examples/rigid_object_variant.py
+++ b/isaaclab_arena/examples/rigid_object_variant.py
@@ -1,3 +1,7 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 rigid_object_variant_cfg = RigidObjectVariantCfg(
     assets={
@@ -39,4 +43,3 @@
         tomato_soup_can: Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
     })
 )
-
diff --git a/isaaclab_arena/policy/rsl_rl_action_policy.py b/isaaclab_arena/policy/rsl_rl_action_policy.py
index e5bb4b441..132c94f1a 100644
--- a/isaaclab_arena/policy/rsl_rl_action_policy.py
+++ b/isaaclab_arena/policy/rsl_rl_action_policy.py
@@ -17,7 +17,8 @@
 from isaaclab_arena.assets.register import register_policy
 from isaaclab_arena.policy.policy_base import PolicyBase
 from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg
-from isaaclab_arena.scripts.reinforcement_learning import cli_args
+
+# from isaaclab_arena.scripts.reinforcement_learning import cli_args
 
 
 @dataclass
diff --git a/submodules/IsaacLab b/submodules/IsaacLab
index cafbfb890..018a78a8d 160000
--- a/submodules/IsaacLab
+++ b/submodules/IsaacLab
@@ -1 +1 @@
-Subproject commit cafbfb890f27255003d1a4913ea8dd65d5db278f
+Subproject commit 018a78a8d35f2e90d53b9009c09d4067bfd03d30

From 5045dc40daff8bb76e12d8e415d6bbf45e596c2b Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Mon, 2 Mar 2026 16:44:39 +0100
Subject: [PATCH 07/14] Load RSL-RL agent config from checkpoint
 params/agent.yaml

Remove the separate JSON agent config file. RslRlActionPolicy now
auto-detects params/agent.yaml saved alongside the checkpoint by
IsaacLab's train.py, making the checkpoint the single source of truth.

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 .../policy/rl_policy/base_rsl_rl_policy.py    |  66 +-----
 .../policy/rl_policy/generic_policy.json      |  28 ---
 isaaclab_arena/policy/rsl_rl_action_policy.py | 191 ++++--------------
 3 files changed, 43 insertions(+), 242 deletions(-)
 delete mode 100644 isaaclab_arena/policy/rl_policy/generic_policy.json

diff --git a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
index 90d2ff93d..7ae23a428 100644
--- a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
+++ b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
@@ -3,24 +3,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
 from dataclasses import field
-from typing import Any
 
 from isaaclab.utils import configclass
 from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@@ -28,6 +11,11 @@
 
 @configclass
 class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
+    """Default RSL-RL runner configuration for Arena environments.
+
+    Used as the ``rsl_rl_cfg_entry_point`` when registering environments with gym,
+    allowing IsaacLab's ``train.py`` to load it via ``@hydra_task_config``.
+    """
 
     num_steps_per_env: int = 24
     max_iterations: int = 4000
@@ -61,47 +49,3 @@ class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
         desired_kl=0.01,
         max_grad_norm=1.0,
     )
-
-    @classmethod
-    def update_cfg(
-        cls,
-        policy_cfg: dict[str, Any],
-        algorithm_cfg: dict[str, Any],
-        obs_groups: dict[str, list[str]],
-        num_steps_per_env: int,
-        max_iterations: int,
-        save_interval: int,
-        experiment_name: str,
-    ):
-        cfg = cls()
-        cfg.policy = RslRlPpoActorCriticCfg(**policy_cfg)
-        cfg.algorithm = RslRlPpoAlgorithmCfg(**algorithm_cfg)
-        cfg.obs_groups = obs_groups
-        cfg.num_steps_per_env = num_steps_per_env
-        cfg.max_iterations = max_iterations
-        cfg.save_interval = save_interval
-        cfg.experiment_name = experiment_name
-        return cfg
-
-
-def get_agent_cfg(args_cli: argparse.Namespace) -> Any:
-    """Get the environment and agent configuration from the command line arguments."""
-
-    # Read a json file containing the agent configuration
-    with open(args_cli.agent_cfg_path) as f:
-        agent_cfg_dict = json.load(f)
-
-    policy_cfg = agent_cfg_dict["policy_cfg"]
-    algorithm_cfg = agent_cfg_dict["algorithm_cfg"]
-    obs_groups = agent_cfg_dict["obs_groups"]
-    # Load all other arguments if they are in args_cli as policy arguments
-    num_steps_per_env = args_cli.num_steps_per_env
-    max_iterations = args_cli.max_iterations
-    save_interval = args_cli.save_interval
-    experiment_name = args_cli.experiment_name
-
-    agent_cfg = RLPolicyCfg.update_cfg(
-        policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name
-    )
-
-    return agent_cfg
diff --git a/isaaclab_arena/policy/rl_policy/generic_policy.json b/isaaclab_arena/policy/rl_policy/generic_policy.json
deleted file mode 100644
index 6fdf9d2fa..000000000
--- a/isaaclab_arena/policy/rl_policy/generic_policy.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "policy_cfg": {
-      "init_noise_std": 1.0,
-      "actor_obs_normalization": false,
-      "critic_obs_normalization": false,
-      "actor_hidden_dims": [256, 128, 64],
-      "critic_hidden_dims": [256, 128, 64],
-      "activation": "elu"
-    },
-    "algorithm_cfg": {
-      "value_loss_coef": 1.0,
-      "use_clipped_value_loss": true,
-      "clip_param": 0.2,
-      "entropy_coef": 0.006,
-      "num_learning_epochs": 5,
-      "num_mini_batches": 4,
-      "learning_rate": 0.0001,
-      "schedule": "adaptive",
-      "gamma": 0.98,
-      "lam": 0.95,
-      "desired_kl": 0.01,
-      "max_grad_norm": 1.0
-    },
-    "obs_groups": {
-      "policy": ["policy", "task_obs"],
-      "critic": ["policy", "task_obs"]
-    }
-  }
diff --git a/isaaclab_arena/policy/rsl_rl_action_policy.py b/isaaclab_arena/policy/rsl_rl_action_policy.py
index 132c94f1a..aee721404 100644
--- a/isaaclab_arena/policy/rsl_rl_action_policy.py
+++ b/isaaclab_arena/policy/rsl_rl_action_policy.py
@@ -5,68 +5,50 @@
 
 import argparse
 import gymnasium as gym
+import os
 import torch
 from dataclasses import dataclass
 from gymnasium.spaces.dict import Dict as GymSpacesDict
-from pathlib import Path
 
 from isaaclab.utils.assets import retrieve_file_path
+from isaaclab.utils.io import load_yaml
 from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
 from rsl_rl.runners import DistillationRunner, OnPolicyRunner
 
 from isaaclab_arena.assets.register import register_policy
 from isaaclab_arena.policy.policy_base import PolicyBase
-from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg
-
-# from isaaclab_arena.scripts.reinforcement_learning import cli_args
 
 
 @dataclass
 class RslRlActionPolicyConfig:
-    """
-    Configuration dataclass for RSL-RL action policy.
-
-    This dataclass serves as the single source of truth for policy configuration,
-    supporting both dict-based (from JSON) and CLI-based configuration paths.
-    """
+    """Configuration dataclass for RSL-RL action policy."""
 
     checkpoint_path: str
-    """Path to the RSL-RL checkpoint file."""
+    """Path to the RSL-RL checkpoint file.
 
-    agent_cfg_path: Path = Path("isaaclab_arena/policy/rl_policy/generic_policy.json")
-    """Path to the RL agent configuration file."""
+    The agent config is loaded automatically from ``params/agent.yaml`` in the
+    same directory, which is saved by IsaacLab's ``train.py`` alongside the checkpoint.
+    """
 
     device: str = "cuda:0"
     """Device to run the policy on."""
 
     @classmethod
     def from_cli_args(cls, args: argparse.Namespace) -> "RslRlActionPolicyConfig":
-        """
-        Create configuration from parsed CLI arguments.
-
-        Args:
-            args: Parsed command line arguments
-
-        Returns:
-            RslRlActionPolicyConfig instance
-        """
         return cls(
             checkpoint_path=args.checkpoint_path,
-            agent_cfg_path=args.agent_cfg_path,
             device=args.device if hasattr(args, "device") else "cuda:0",
         )
 
 
 @register_policy
 class RslRlActionPolicy(PolicyBase):
-    """
-    Policy that uses a trained RSL-RL model for inference.
+    """Policy that uses a trained RSL-RL model for inference.
 
-    This policy loads a checkpoint from RSL-RL training and uses it to generate
-    actions. It expects the environment to already be wrapped with RslRlVecEnvWrapper
-    if called from evaluation scripts.
+    Loads the checkpoint and agent config (``params/agent.yaml``) produced by
+    IsaacLab's ``train.py``. No separate JSON config file is required.
 
-    Example JSON configuration for eval runner:
+    Example configuration for eval runner:
 
     .. code-block:: json
 
@@ -76,9 +58,8 @@ class RslRlActionPolicy(PolicyBase):
               "name": "eval_lift_cube",
               "policy_type": "rsl_rl",
               "policy_config_dict": {
-                "checkpoint_path": "logs/rsl_rl/lift_object/model_1000.pt",
-                "agent_cfg_path": "isaaclab_arena/policy/rl_policy/generic_policy.json",
-                "device": "cuda:0",
+                "checkpoint_path": "logs/rsl_rl/lift_object/2026-01-28_17-26-10/model_1000.pt",
+                "device": "cuda:0"
               },
               "arena_env_args": ["lift_object", "--embodiment", "franka"]
             }
@@ -89,137 +70,58 @@ class RslRlActionPolicy(PolicyBase):
     name = "rsl_rl"
     config_class = RslRlActionPolicyConfig
 
-    def __init__(self, config: RslRlActionPolicyConfig, args_cli: argparse.Namespace | None = None):
-        """
-        Initialize RSL-RL action policy from a configuration dataclass.
-
-        Args:
-            config: RslRlActionPolicyConfig configuration dataclass
-            args_cli: Optional CLI arguments namespace. If provided, uses get_agent_cfg().
-                     If None, loads agent config directly from JSON file.
-        """
+    def __init__(self, config: RslRlActionPolicyConfig):
         super().__init__(config)
         self.config: RslRlActionPolicyConfig = config
         self._policy = None
         self._runner = None
-        self._env_is_wrapped = False
-        self.args_cli = args_cli
 
     def _load_policy(self, env: gym.Env) -> None:
-        """
-        Load the RSL-RL policy from checkpoint.
-
-        Args:
-            env: The gym environment (should already be wrapped with RslRlVecEnvWrapper)
-        """
-        import json
-
-        # Load agent configuration
-        # Prefer using get_agent_cfg() if args_cli is available (more robust)
-        # Otherwise, load directly from JSON (for from_dict() path)
-        if self.args_cli is not None:
-            agent_cfg = get_agent_cfg(self.args_cli)
-        else:
-            # Fallback: Load agent configuration directly from JSON file
-            with open(self.config.agent_cfg_path) as f:
-                agent_cfg_dict = json.load(f)
-
-            # Import the config class and create agent config
-            from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import RLPolicyCfg
-
-            policy_cfg = agent_cfg_dict["policy_cfg"]
-            algorithm_cfg = agent_cfg_dict["algorithm_cfg"]
-            obs_groups = agent_cfg_dict.get("obs_groups", {})
-
-            # Use defaults for training-specific parameters (not needed for inference)
-            num_steps_per_env = agent_cfg_dict.get("num_steps_per_env", 24)
-            max_iterations = agent_cfg_dict.get("max_iterations", 1500)
-            save_interval = agent_cfg_dict.get("save_interval", 100)
-            experiment_name = agent_cfg_dict.get("experiment_name", "rsl_rl")
+        """Load the RSL-RL policy from checkpoint and its accompanying agent.yaml."""
+        checkpoint_path = retrieve_file_path(self.config.checkpoint_path)
+        agent_yaml_path = os.path.join(os.path.dirname(checkpoint_path), "params", "agent.yaml")
 
-            agent_cfg = RLPolicyCfg.update_cfg(
-                policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name
+        if not os.path.exists(agent_yaml_path):
+            raise FileNotFoundError(
+                f"No agent config found at {agent_yaml_path}. "
+                "Ensure the checkpoint was produced by IsaacLab's train.py."
             )
 
-        # Override device from config
-        agent_cfg.device = self.config.device
+        agent_cfg_dict = load_yaml(agent_yaml_path)
+        agent_cfg_dict["device"] = self.config.device
 
-        # Check if environment is already wrapped
-        if isinstance(env, RslRlVecEnvWrapper):
-            wrapped_env = env
-            self._env_is_wrapped = True
-        else:
-            # Wrap if needed (for standalone policy runner usage)
-            wrapped_env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
-            self._env_is_wrapped = False
+        clip_actions = agent_cfg_dict.get("clip_actions")
+        class_name = agent_cfg_dict.get("class_name", "OnPolicyRunner")
 
-        # Create the appropriate runner
-        if agent_cfg.class_name == "OnPolicyRunner":
-            self._runner = OnPolicyRunner(
-                wrapped_env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device  # type: ignore[attr-defined]
-            )
-        elif agent_cfg.class_name == "DistillationRunner":
-            self._runner = DistillationRunner(
-                wrapped_env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device  # type: ignore[attr-defined]
-            )
+        wrapped_env = RslRlVecEnvWrapper(env, clip_actions=clip_actions)
+
+        if class_name == "OnPolicyRunner":
+            self._runner = OnPolicyRunner(wrapped_env, agent_cfg_dict, log_dir=None, device=self.config.device)
+        elif class_name == "DistillationRunner":
+            self._runner = DistillationRunner(wrapped_env, agent_cfg_dict, log_dir=None, device=self.config.device)
         else:
-            raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+            raise ValueError(f"Unsupported runner class: {class_name}")
 
-        # Load the checkpoint
-        checkpoint_path = retrieve_file_path(self.config.checkpoint_path)
         print(f"[INFO] Loading RSL-RL checkpoint from: {checkpoint_path}")
         self._runner.load(checkpoint_path)
-
-        # Get the inference policy
         self._policy = self._runner.get_inference_policy(device=wrapped_env.unwrapped.device)
 
     def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor:
-        """
-        Get the action from the RSL-RL policy.
-
-        Args:
-            env: The gym environment
-            observation: Current observation from the environment
-
-        Returns:
-            Action tensor from the policy
-        """
-        # Load policy on first call
         if self._policy is None:
             self._load_policy(env)
 
-        # Type checker doesn't know _policy is not None after _load_policy
         assert self._policy is not None, "Policy should be loaded after _load_policy()"
 
         with torch.inference_mode():
             return self._policy(observation)
 
     def reset(self, env_ids: torch.Tensor | None = None) -> None:
-        """
-        Reset the policy state for specific environments.
-
-        Args:
-            env_ids: Indices of environments to reset. If None, reset all.
-        """
-        # RSL-RL policies are typically stateless for evaluation
-        # Override if your policy has recurrent components
         pass
 
     @classmethod
     def from_dict(cls, config_dict: dict) -> "RslRlActionPolicy":
-        """
-        Create a policy instance from a configuration dictionary.
-
-        This override ensures args_cli is None when loading from JSON config.
-
-        Args:
-            config_dict: Dictionary containing the configuration fields
-
-        Returns:
-            RslRlActionPolicy instance
-        """
         config = RslRlActionPolicyConfig(**config_dict)
-        return cls(config, args_cli=None)
+        return cls(config)
 
     @staticmethod
     def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
@@ -229,31 +131,14 @@ def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars
             "--checkpoint_path",
             type=str,
             required=True,
-            help="Path to the checkpoint file containing the RSL-RL policy",
-        )
-        rsl_rl_group.add_argument(
-            "--agent_cfg_path",
-            type=Path,
-            default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"),
-            help="Path to the RL agent configuration file.",
+            help=(
+                "Path to the checkpoint file. Agent config is loaded automatically from params/agent.yaml in the same"
+                " directory."
+            ),
         )
-        # append RSL-RL cli arguments
-        cli_args.add_rsl_rl_args(parser)
-        cli_args.add_rsl_rl_policy_args(parser)
         return parser
 
     @staticmethod
     def from_args(args: argparse.Namespace) -> "RslRlActionPolicy":
-        """
-        Create a RSL-RL action policy instance from parsed CLI arguments.
-
-        Path: CLI args → ConfigDataclass → init cls
-
-        Args:
-            args: Parsed command line arguments
-
-        Returns:
-            RslRlActionPolicy instance
-        """
         config = RslRlActionPolicyConfig.from_cli_args(args)
-        return RslRlActionPolicy(config, args_cli=args)
+        return RslRlActionPolicy(config)

From 0fedddb02756a7c50a2296161c167e992b09bc18 Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Tue, 3 Mar 2026 11:14:55 +0100
Subject: [PATCH 08/14] Address PR review comments

- Delete rigid_object_variant.py (prototype scratch file)
- Move base_rsl_rl_policy.py to isaaclab_arena_examples/policy/
- Update lift_object_environment.py import for new module path
- Replace WIP comment in cameras.py with clean TODO(cvolk)
- Add TODO(cvolk) to RL workflow docs for follow-up rewrite

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 .../step_1_environment_setup.rst              |  3 ++
 .../step_2_policy_training.rst                |  6 +++
 .../examples/rigid_object_variant.py          | 45 -------------------
 isaaclab_arena/utils/cameras.py               |  4 +-
 .../lift_object_environment.py                |  2 +-
 isaaclab_arena_examples/policy/__init__.py    |  4 ++
 .../policy}/base_rsl_rl_policy.py             |  0
 7 files changed, 15 insertions(+), 49 deletions(-)
 delete mode 100644 isaaclab_arena/examples/rigid_object_variant.py
 create mode 100644 isaaclab_arena_examples/policy/__init__.py
 rename {isaaclab_arena/policy/rl_policy => isaaclab_arena_examples/policy}/base_rsl_rl_policy.py (100%)

diff --git a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
index 7107c6f0e..0ccccc018 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
@@ -1,3 +1,6 @@
+.. TODO(cvolk): Update the Validation section to use IsaacLab's train.py with --external_callback
+..              instead of the removed isaaclab_arena/scripts/reinforcement_learning/train.py.
+
 Environment Setup and Validation
 --------------------------------
 
diff --git a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
index 7fa0e4704..50b6ee708 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
@@ -1,3 +1,9 @@
+.. TODO(cvolk): Rewrite this page to reflect the new training workflow:
+..              - Replace isaaclab_arena/scripts/reinforcement_learning/train.py with
+..                IsaacLab's train.py using --external_callback.
+..              - Remove references to generic_policy.json and --agent_cfg_path (both deleted).
+..              - Document Hydra CLI overrides (agent.policy.activation=relu etc.) instead.
+
 Policy Training
 ---------------
 
diff --git a/isaaclab_arena/examples/rigid_object_variant.py b/isaaclab_arena/examples/rigid_object_variant.py
deleted file mode 100644
index e7a4aad40..000000000
--- a/isaaclab_arena/examples/rigid_object_variant.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-rigid_object_variant_cfg = RigidObjectVariantCfg(
-    assets={
-        "box": RigidObjectCfg(
-            name="box",
-            spawn=UsdFileCfg(
-                usd_path="path/to/box.usd",
-            ),
-            scale=(1.0, 1.0, 1.0),
-            initial_pose=Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
-        ),
-        "sphere": RigidObjectCfg(
-            name="sphere",
-            spawn=UsdFileCfg(
-                usd_path="path/to/sphere.usd",
-            ),
-            scale=(2.0, 2.0, 2.0),
-            initial_pose=Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
-        ),
-    }
-}
-
-
-
-cracker_box = asset_registry.get_asset_by_name("cracker_box")()
-tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
-object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
-object_set.set_initial_pose(Pose(position=(0.0, 0.0, 0.0), orientation=(1.0, 0.0, 0.0, 0.0)))
-
-
-
-
-cracker_box = asset_registry.get_asset_by_name("cracker_box")()
-tomato_soup_can = asset_registry.get_asset_by_name("tomato_soup_can")()
-object_set = RigidObjectSet(name="object_set", objects=[cracker_box, tomato_soup_can])
-object_set.set_initial_pose(
-    PoseVariant(poses={
-        cracker_box: Pose(position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)),
-        tomato_soup_can: Pose(position=(4.0, 5.0, 6.0), orientation=(0.0, 0.0, 0.0, 1.0)),
-    })
-)
diff --git a/isaaclab_arena/utils/cameras.py b/isaaclab_arena/utils/cameras.py
index d92977729..4567bb7e0 100644
--- a/isaaclab_arena/utils/cameras.py
+++ b/isaaclab_arena/utils/cameras.py
@@ -117,9 +117,7 @@ def get_viewer_cfg_look_at_object(lookat_object: Asset, offset: np.ndarray) -> V
     if isinstance(initial_pose, PoseRange):
         initial_pose = initial_pose.get_midpoint()
 
-    # WHEN STUFF WORKS, LOOK INTO WHY WE"RE GETTING np.float64 in the first place.
-    # probably need a validation step in the Pose object.
-
+    # TODO(cvolk): Add float coercion to Pose.__post_init__ so this conversion is unnecessary.
     # Ensure we only pass primitive Python floats (not NumPy scalars) into ViewerCfg,
     # since downstream config systems like Hydra/OmegaConf don't support np.float64.
     lookat = tuple(float(x) for x in initial_pose.position_xyz)
diff --git a/isaaclab_arena_environments/lift_object_environment.py b/isaaclab_arena_environments/lift_object_environment.py
index 9fcc4f789..69e768a12 100644
--- a/isaaclab_arena_environments/lift_object_environment.py
+++ b/isaaclab_arena_environments/lift_object_environment.py
@@ -19,7 +19,7 @@ class LiftObjectEnvironment(ExampleEnvironmentBase):
     name: str = "lift_object"
 
     def get_env(self, args_cli: argparse.Namespace):  # -> IsaacLabArenaEnvironment:
-        import isaaclab_arena.policy.rl_policy.base_rsl_rl_policy as base_rsl_rl_policy
+        import isaaclab_arena_examples.policy.base_rsl_rl_policy as base_rsl_rl_policy
         from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
         from isaaclab_arena.reinforcement_learning.frameworks import RLFramework
         from isaaclab_arena.scene.scene import Scene
diff --git a/isaaclab_arena_examples/policy/__init__.py b/isaaclab_arena_examples/policy/__init__.py
new file mode 100644
index 000000000..fee3a6a9f
--- /dev/null
+++ b/isaaclab_arena_examples/policy/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py b/isaaclab_arena_examples/policy/base_rsl_rl_policy.py
similarity index 100%
rename from isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
rename to isaaclab_arena_examples/policy/base_rsl_rl_policy.py

From 03fb17b4d50ff516d9b910cf4d3a47041cedda37 Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Tue, 3 Mar 2026 11:15:02 +0100
Subject: [PATCH 09/14] Update docs README to reflect host-based build workflow

Replace the outdated Docker-based instructions with the correct
host workflow using a Python 3.11 venv.

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 docs/README.md | 51 ++++++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index aa9669ce6..1120527a2 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,45 +1,42 @@
-# `isaaclab_arena` Dox - Developer Guide
+# `isaaclab_arena` Docs - Developer Guide
 
-To build the `isaaclab_arena` docs locally follow the following instructions.
+The docs are built on the **host machine** (not inside Docker) using a dedicated Python 3.11 venv.
 
-Enter the `isaaclab_arena` docker.
+## Prerequisites
 
-```
-./docker/run_docker.sh
-```
-
-The version of sphinx that we use requires a newer version of python.
-Install a newer version of `python` and `venv`:
+`python3.11` and `python3.11-venv` must be installed on the host:
 
-```
-sudo apt-get install python3.11 python3.11-venv
+```bash
+sudo apt-get install -y python3.11 python3.11-venv
 ```
 
-> It looks like this actually overwrites the currently installed version of python
-> inside.
+## First-time setup
 
-Create a `venv` and install the dependencies
+From the repo root, create the venv and install dependencies:
 
-```
+```bash
+cd docs
 python3.11 -m venv venv_docs
-source venv_docs/bin/activate
-cd ./docs
-python3.11 -m pip install -r requirements.txt
+venv_docs/bin/pip install -r requirements.txt
 ```
 
-To make the current version of docs
 
-```
-make html
+## Build and view
+
+```bash
+cd docs
+venv_docs/bin/sphinx-build -M html . _build/current
+xdg-open _build/current/html/index.html
 ```
 
-To view the docs, navigate to `isaaclab_arena/docs/_build/current/html/index.html`, and double-click.
 
-To make the multi version docs. Note that this will only build docs for the set branches, such
-as release, main etc. Only docs committed to these branches will be reflected.
+## Multi-version docs
 
-```
+Builds docs for committed branches only (e.g. `main`, `release`). Local uncommitted changes are **not** reflected.
+
+```bash
+cd docs
+source venv_docs/bin/activate
 make multi-docs
+xdg-open _build/index.html
 ```
-
-To view the multi version docs, navigate to `isaaclab_arena/docs/_build/index.html`, and double-click.

From 040aea8ed9895a1e9d977fee05507d053c815880 Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Tue, 3 Mar 2026 14:40:53 +0100
Subject: [PATCH 10/14] Disable AppLauncher pinocchio patch to fix pxr import
 failure

AppLauncher's enable_pinocchio path wraps _start_app() with a patch that
calls from pxr import Gf immediately after startup. If Isaac Sim's extension
loading is incomplete (e.g. due to a version constraint in the experience
file), pxr is never added to sys.path and the patch crashes with
ModuleNotFoundError.

Setting disable_pinocchio_patch=True tells AppLauncher to skip the patch.
Pinocchio is already imported before AppLauncher is constructed, which is
sufficient for it to work correctly.

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 isaaclab_arena/utils/isaaclab_utils/simulation_app.py | 9 +++++++++
 submodules/IsaacLab                                   | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/isaaclab_arena/utils/isaaclab_utils/simulation_app.py b/isaaclab_arena/utils/isaaclab_utils/simulation_app.py
index e3533375f..df8ef4749 100644
--- a/isaaclab_arena/utils/isaaclab_utils/simulation_app.py
+++ b/isaaclab_arena/utils/isaaclab_utils/simulation_app.py
@@ -22,9 +22,18 @@ def get_app_launcher(args: argparse.Namespace) -> AppLauncher:
     """Get an app launcher."""
     # NOTE(alexmillane, 2025.11.10): Import pinocchio before launching the app appears still to be required.
     # Monitor this and see if we can get rid of it.
+    # NOTE: We disable AppLauncher's pxr.Gf.Matrix4d patch here. That patch does `from pxr import Gf`
+    # immediately after _start_app(), which can fail when Isaac Sim's extension loading is incomplete
+    # (e.g. due to a version constraint mismatch in the experience file, such as
+    # isaacsim.asset.importer.urdf being pinned to a version not present in the container).
+    # Pinocchio is already imported at this point, which is sufficient for it to work correctly.
+    # The long-term fix is to keep the Arena Docker image in sync with what IsaacLab's experience
+    # files require, at which point this workaround can be revisited.
     if hasattr(args, "enable_pinocchio") and args.enable_pinocchio:
         import pinocchio  # noqa: F401
 
+        args.disable_pinocchio_patch = True
+
     app_launcher = AppLauncher(args)
     if get_isaac_sim_version() != "5.1.0":
         print(f"WARNING: IsaacSim has been upgraded to {get_isaac_sim_version()}.")
diff --git a/submodules/IsaacLab b/submodules/IsaacLab
index 018a78a8d..e7607ed15 160000
--- a/submodules/IsaacLab
+++ b/submodules/IsaacLab
@@ -1 +1 @@
-Subproject commit 018a78a8d35f2e90d53b9009c09d4067bfd03d30
+Subproject commit e7607ed155853a64f824302456cd5975cccf36ee

From e6ba7babd742bd3e461e00a26ffff88db1a43b82 Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Tue, 3 Mar 2026 14:47:53 +0100
Subject: [PATCH 11/14] Rework RL workflow docs to use IsaacLab train.py with
 --external_callback

- Replace Arena's removed train.py with IsaacLab's train.py + --external_callback
- Add explanation of how the callback registers the environment before training
- Add Hydra override examples for hyperparameter tuning
- Update tensorboard command to use /isaac-sim/python.sh -m tensorboard.main
- Rewrite evaluation section: drop removed play.py method, update commands
  to remove --agent_cfg_path (checkpoint now auto-loads params/agent.yaml)
- Update step 1 validation command to use IsaacLab train.py

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 .../step_1_environment_setup.rst              |  11 +-
 .../step_2_policy_training.rst                | 182 +++++-----------
 .../step_3_evaluation.rst                     | 200 +++---------------
 3 files changed, 87 insertions(+), 306 deletions(-)

diff --git a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
index 0ccccc018..49d3840a3 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
@@ -158,17 +158,18 @@ See :doc:`../../concepts/concept_environment_design` for environment composition
 Validation: Run Random Policy
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-To validate the environment setup, we can run a policy with random weights to ensure everything loads correctly:
+To validate the environment loads correctly, run one training iteration and check for errors:
 
 .. code-block:: bash
 
-   python isaaclab_arena/scripts/reinforcement_learning/train.py \
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
      --num_envs 64 \
      --max_iterations 1 \
-     lift_object
+     --headless
 
-This command will load the environment, initialize 64 parallel environments, and exit immediately
-(``max_iterations=1``). If successful, the environment is ready for training.
+If the environment is set up correctly, you will see one iteration of training output before the script exits.
 
 You should see output indicating the start of training:
 
diff --git a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
index 50b6ee708..0d4e4b090 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
@@ -1,141 +1,72 @@
-.. TODO(cvolk): Rewrite this page to reflect the new training workflow:
-..              - Replace isaaclab_arena/scripts/reinforcement_learning/train.py with
-..                IsaacLab's train.py using --external_callback.
-..              - Remove references to generic_policy.json and --agent_cfg_path (both deleted).
-..              - Document Hydra CLI overrides (agent.policy.activation=relu etc.) instead.
-
 Policy Training
 ---------------
 
-This workflow covers training an RL policy from scratch using RSL-RL's PPO implementation.
-The training is fully parallelized across hundreds of environments for efficient learning.
-
 **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details)
 
 :docker_run_default:
 
-
-Training Overview
-^^^^^^^^^^^^^^^^^
-
-We use **Proximal Policy Optimization (PPO)** from the `RSL-RL <https://github.com/leggedrobotics/rsl_rl>`_ library,
-a proven on-policy RL algorithm for robot learning. The training process:
-
-1. **Parallel Simulation**: Runs 512 parallel environments simultaneously
-2. **Dense Rewards**: Provides shaped rewards for reaching, grasping, lifting, and goal achievement
-3. **Command Sampling**: Randomly samples target positions within a workspace range
-4. **Automatic Checkpointing**: Saves model checkpoints every 500 iterations
-5. **Tensorboard Logging**: Monitors training progress in real-time
-
 Training Command
 ^^^^^^^^^^^^^^^^
 
-To train the policy, run:
+Training uses IsaacLab's RSL-RL training script directly. The ``--external_callback`` argument
+points to an Arena function that runs before training starts — it reads the ``--task`` argument,
+builds the environment, and registers it with gym so IsaacLab's script can find it by name.
 
 .. code-block:: bash
 
-   python isaaclab_arena/scripts/reinforcement_learning/train.py \
-     --env_spacing 5.0 \
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
      --num_envs 512 \
      --max_iterations 12000 \
-     --save_interval 500 \
-     --headless \
-     lift_object
-
-**Command Breakdown:**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--env_spacing 5.0``
-     - Spacing between parallel environments (meters)
-   * - ``--num_envs 512``
-     - Number of parallel environments for training
-   * - ``--max_iterations 12000``
-     - Total training iterations (each iteration = 24 timesteps × 512 envs = 12,288 samples)
-   * - ``--save_interval 500``
-     - Save checkpoint every 500 iterations
-   * - ``--headless``
-     - Run without GUI for faster training
-   * - ``lift_object``
-     - Environment name (must be last argument)
-
-**Additional Arguments (Optional):**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--seed <int>``
-     - Random seed for reproducibility (default: 42)
-   * - ``--device <str>``
-     - Device to use: 'cuda' or 'cpu' (default: 'cuda')
-   * - ``--video``
-     - Record training videos periodically
-   * - ``--video_interval 2000``
-     - Interval for recording videos (iterations)
-
-
-Training Configuration
-^^^^^^^^^^^^^^^^^^^^^^
-
-The training uses the default RSL-RL PPO configuration, which can be found at:
-
-``isaaclab_arena/policy/rl_policy/generic_policy.json``
-
-Key hyperparameters:
-
-.. code-block:: json
-
-   {
-     "algorithm": {
-       "class_name": "PPO",
-       "num_learning_epochs": 5,
-       "num_mini_batches": 4,
-       "learning_rate": 0.001,
-       "gamma": 0.99,
-       "lam": 0.95,
-       "clip_param": 0.2
-     },
-     "policy": {
-       "class_name": "ActorCritic",
-       "activation": "elu",
-       "actor_hidden_dims": [256, 256, 256],
-       "critic_hidden_dims": [256, 256, 256]
-     }
-   }
-
-To use a custom configuration, specify the path with ``--agent_cfg_path <path>``.
+     --headless
 
+Checkpoints are written to ``logs/rsl_rl/generic_experiment/<timestamp>/``.
+The agent configuration is saved alongside as ``params/agent.yaml``,
+which the evaluation script uses to reconstruct the policy at inference time.
 
-Monitoring Training
-^^^^^^^^^^^^^^^^^^^
 
-Training logs are saved to ``logs/rsl_rl/generic_experiment/<timestamp>/``.
+Overriding Hyperparameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Hyperparameters come from ``RLPolicyCfg`` in ``isaaclab_arena_examples/policy/base_rsl_rl_policy.py``
+and can be overridden with Hydra syntax appended to the training command:
+
+.. code-block:: bash
 
-**1. View Training Metrics with Tensorboard**
+   # Change network activation function to relu (default: elu)
+   agent.policy.activation=relu
 
-Launch Tensorboard to monitor training progress:
+   # Adjust the learning rate (default: 0.0001)
+   agent.algorithm.learning_rate=0.001
+
+   # Save a checkpoint more frequently (default: every 200 iterations)
+   agent.save_interval=500
+
+For example, to train with relu activation and a higher learning rate:
 
 .. code-block:: bash
 
-   tensorboard --logdir logs/rsl_rl
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
+     --num_envs 512 \
+     --max_iterations 12000 \
+     --headless \
+     agent.policy.activation=relu \
+     agent.algorithm.learning_rate=0.001
 
-Navigate to ``http://localhost:6006`` in your browser to view:
 
-- **Episode rewards**: Total reward per episode
-- **Episode length**: Steps per episode
-- **Policy loss**: Actor and critic losses
-- **Learning rate**: Current learning rate schedule
+Monitoring Training
+^^^^^^^^^^^^^^^^^^^
+
+Launch Tensorboard to monitor progress:
+
+.. code-block:: bash
 
-**2. Training Output**
+   /isaac-sim/python.sh -m tensorboard.main --logdir logs/rsl_rl
 
-During training, you'll see periodic console output:
+During training, each iteration prints a summary to the console:
 
 .. code-block:: text
 
@@ -165,43 +96,28 @@ During training, you'll see periodic console output:
                             Time elapsed: 00:00:04
                                      ETA: 00:00:49
 
-   [INFO] Saved checkpoint to: logs/rsl_rl/generic_experiment/<timestamp>/model_<iteration>.pt
-
-**3. Checkpoints**
-
-Model checkpoints are saved to:
-
-``logs/rsl_rl/generic_experiment/<timestamp>/model_<iteration>.pt``
-
-Example: ``logs/rsl_rl/generic_experiment/2026-01-29_12-30-00/model_2000.pt``
-
 
 Multi-GPU Training
 ^^^^^^^^^^^^^^^^^^
 
-For faster training on multi-GPU systems, use the ``--distributed`` flag:
+Add ``--distributed`` to spread environments across all available GPUs:
 
 .. code-block:: bash
 
-   python isaaclab_arena/scripts/reinforcement_learning/train.py \
-     --env_spacing 5.0 \
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
      --num_envs 512 \
      --max_iterations 12000 \
-     --save_interval 500 \
      --headless \
-     --distributed \
-     lift_object
-
-This automatically distributes environments across available GPUs.
+     --distributed
 
 
 Expected Results
 ^^^^^^^^^^^^^^^^
 
-After 12,000 iterations (~6 hours on a single GPU with 512 environments):
-
-The trained policy should reliably grasp and lift objects to commanded target positions.
-Please refer to the following gif for an example of the trained policy:
+After 12,000 iterations (~6 hours on a single GPU with 512 environments), the trained
+policy should reliably grasp and lift objects to commanded target positions.
 
 .. image:: ../../../images/lift_object_rl_task.gif
    :align: center
diff --git a/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst b/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst
index 05e2e08c1..5f71a4913 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst
@@ -1,9 +1,6 @@
 Closed-Loop Policy Inference and Evaluation
 -------------------------------------------
 
-This workflow demonstrates running the trained RSL-RL policy in closed-loop
-and evaluating it in the Lift Object environment.
-
 **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details)
 
 :docker_run_default:
@@ -15,16 +12,12 @@ Once inside the container, set the models directory if you plan to download pre-
     export MODELS_DIR=models/isaaclab_arena/reinforcement_learning
     mkdir -p $MODELS_DIR
 
-Note that this tutorial assumes that you've completed the
-:doc:`preceding step (Policy Training)   <step_2_policy_training>` and have a trained checkpoint available,
-or you can download a pre-trained checkpoint as described below.
+This tutorial assumes you've completed :doc:`step_2_policy_training` and have a trained checkpoint,
+or you can download a pre-trained one as described below.
 
 .. dropdown:: Download Pre-trained Model (skip preceding steps)
    :animate: fade-in
 
-   These commands can be used to download a pre-trained RSL-RL policy checkpoint,
-   such that the preceding training step can be skipped.
-
    .. code-block:: bash
 
       hf download \
@@ -32,73 +25,29 @@ or you can download a pre-trained checkpoint as described below.
          model_11999.pt \
          --local-dir $MODELS_DIR/lift_object_checkpoint
 
-   After downloading, you can use the checkpoint at:
+   After downloading, the checkpoint is at:
 
    ``$MODELS_DIR/lift_object_checkpoint/model_11999.pt``
 
-   Replace checkpoint paths in the examples below with this path to evaluate the pre-trained model.
+   Replace checkpoint paths in the examples below with this path.
 
 
 Evaluation Methods
 ^^^^^^^^^^^^^^^^^^
 
-Isaac Lab Arena provides multiple ways to evaluate trained RL policies:
-
-1. **Quick Visualization (play.py)**: Fast visual inspection of policy behavior
-2. **Single Environment Evaluation (policy_runner.py)**: Detailed evaluation with metrics
-3. **Parallel Environment Evaluation (policy_runner.py)**: Large-scale statistical evaluation
-4. **Batch Evaluation (eval_runner.py)**: Automated evaluation of multiple checkpoints
-
-
-Method 1: Quick Visualization
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The ``play.py`` script provides the fastest way to visually inspect your trained policy.
-This is useful for debugging and quick quality checks.
-
-.. code-block:: bash
-
-   python isaaclab_arena/scripts/reinforcement_learning/play.py \
-     --env_spacing 30.0 \
-     --num_envs 16 \
-     --checkpoint logs/rsl_rl/generic_experiment/2026-01-28_17-26-10/model_11999.pt \
-     lift_object
-
-**Key Features:**
+There are three ways to evaluate a trained policy:
 
-- Fast startup with GUI enabled by default
-- Visualizes policy rollouts in real-time
-- No metrics computation (pure visualization)
-- Useful for debugging policy behavior
+1. **Single environment** (``policy_runner.py``): detailed evaluation with metrics
+2. **Parallel environments** (``policy_runner.py``): larger-scale statistical evaluation
+3. **Batch evaluation** (``eval_runner.py``): automated evaluation across multiple checkpoints
 
-**Command Arguments:**
 
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--env_spacing 30.0``
-     - Larger spacing for visualization (avoids visual clutter)
-   * - ``--num_envs 16``
-     - Number of parallel environments to visualize
-   * - ``--checkpoint <path>``
-     - Path to the trained model checkpoint (.pt file)
-   * - ``lift_object``
-     - Environment name (must be last)
-
-You should see multiple Franka robots simultaneously attempting to lift objects to various target positions.
-
-
-Method 2: Single Environment Evaluation
+Method 1: Single Environment Evaluation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The ``policy_runner.py`` provides comprehensive evaluation with task-specific metrics.
-
 .. code-block:: bash
 
-   python isaaclab_arena/evaluation/policy_runner.py \
+   /isaac-sim/python.sh isaaclab_arena/evaluation/policy_runner.py \
      --policy_type rsl_rl \
      --num_steps 1000 \
      --checkpoint_path logs/rsl_rl/generic_experiment/2026-01-28_17-26-10/model_11999.pt \
@@ -107,54 +56,28 @@ The ``policy_runner.py`` provides comprehensive evaluation with task-specific me
 
 .. note::
 
-   If you downloaded the pre-trained model from Hugging Face, replace the checkpoint path:
-
-   ``--checkpoint_path $MODELS_DIR/lift_object_checkpoint/model_11999.pt``
-
-**Important: Argument Order**
-
-Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, etc.) must come **before** the environment name.
-Environment-specific arguments (``--rl_training_mode``, ``--object``, etc.) must come **after** the environment name.
-
-**Command Breakdown:**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--policy_type rsl_rl``
-     - Policy type to load (RSL-RL trained policy)
-   * - ``--num_steps 1000``
-     - Total simulation steps to run
-   * - ``--checkpoint_path <path>``
-     - Path to the model checkpoint
-   * - ``lift_object``
-     - Environment name
-   * - ``--rl_training_mode False``
-     - Enable success termination for evaluation
+   If you downloaded the pre-trained model from Hugging Face, replace the checkpoint path with:
+   ``$MODELS_DIR/lift_object_checkpoint/model_11999.pt``
 
-**Expected Output:**
+Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, etc.) must come **before** the
+environment name. Environment-specific arguments (``--rl_training_mode``, ``--object``, etc.) must
+come **after** it.
 
-At the end of evaluation, you should see metrics similar to:
+At the end of the run, metrics are printed to the console:
 
 .. code-block:: text
 
    Metrics: {'success_rate': 0.85, 'num_episodes': 12}
 
-This indicates that 85% of episodes successfully lifted the object to the target position,
-across 12 completed episodes in 1000 steps.
-
 
-Method 3: Parallel Environment Evaluation
+Method 2: Parallel Environment Evaluation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-For more statistically significant results, evaluate across many parallel environments:
+For more statistically significant results, run across many environments in parallel:
 
 .. code-block:: bash
 
-   python isaaclab_arena/evaluation/policy_runner.py \
+   /isaac-sim/python.sh isaaclab_arena/evaluation/policy_runner.py \
      --policy_type rsl_rl \
      --num_steps 5000 \
      --num_envs 64 \
@@ -163,37 +86,17 @@ For more statistically significant results, evaluate across many parallel enviro
      lift_object \
      --rl_training_mode False
 
-**Additional Arguments:**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--num_envs 64``
-     - Run 64 parallel environments simultaneously
-   * - ``--headless``
-     - Run without GUI for faster evaluation
-   * - ``--num_steps 5000``
-     - More steps for more episodes
-
-**Expected Output:**
-
 .. code-block:: text
 
    Metrics: {'success_rate': 0.83, 'num_episodes': 156}
 
-Running more environments and steps provides better statistical estimates of policy performance.
-
 
-Method 4: Batch Evaluation with JSON Configuration
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Method 3: Batch Evaluation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-For systematic evaluation of multiple checkpoints or hyperparameter sweeps, use ``eval_runner.py``
-with a JSON configuration file.
+To evaluate multiple checkpoints in sequence, use ``eval_runner.py`` with a JSON config.
 
-**1. Create Evaluation Configuration**
+**1. Create an evaluation config**
 
 Create a file ``eval_config.json``:
 
@@ -224,16 +127,11 @@ Create a file ``eval_config.json``:
      ]
    }
 
-**2. Run Batch Evaluation**
+**2. Run**
 
 .. code-block:: bash
 
-   python isaaclab_arena/evaluation/eval_runner.py --eval_jobs_config eval_config.json
-
-This will automatically evaluate all checkpoints listed in the configuration and output
-a summary of metrics for each.
-
-**Expected Output:**
+   /isaac-sim/python.sh isaaclab_arena/evaluation/eval_runner.py --eval_jobs_config eval_config.json
 
 .. code-block:: text
 
@@ -252,49 +150,15 @@ a summary of metrics for each.
 Understanding the Metrics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The Lift Object task reports the following metrics:
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Metric
-     - Description
-   * - ``success_rate``
-     - Fraction of episodes where object reached target position within tolerance
-   * - ``num_episodes``
-     - Total number of episodes completed during evaluation
-
-A well-trained policy should achieve:
-
-- **Success rate**: 70-90% (depends on target range difficulty)
-- **Consistent performance**: Success rate stable across multiple evaluation runs
-
-
-Troubleshooting
-^^^^^^^^^^^^^^^
-
-**Issue: Low success rate (<50%)**
-
-- Increase training iterations (try 20,000+)
-- Check reward configuration in task definition
-- Verify command sampling ranges are reasonable
-- Try different random seeds
-
-**Issue: Policy gets stuck or drops object**
-
-- Ensure object mass and friction are reasonable
-- Check gripper force limits
-- Visualize with ``play.py`` to diagnose behavior
-- Review episode recordings if ``--video`` was enabled during training
+The Lift Object task reports two metrics:
 
-**Issue: "Checkpoint not found" error**
+- ``success_rate``: fraction of episodes where the object reached the target position within tolerance
+- ``num_episodes``: total number of completed episodes during the evaluation run
 
-- Verify checkpoint path is correct
-- Use absolute paths if relative paths fail
-- Check that training completed and saved checkpoints
+A well-trained policy should reach 70–90% success rate. Results will vary with the target range,
+random seed, and hardware.
 
 .. note::
 
-   When running evaluation, always set ``--rl_training_mode False`` to enable success termination.
-   During training, this flag is ``True`` by default to prevent early episode termination.
+   Always set ``--rl_training_mode False`` when evaluating. During training this flag is ``True``
+   to disable success termination; setting it to ``False`` re-enables it for proper evaluation.

From 369785c63b2eac6b1b5cd5affef9baf55f5cec2b Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Tue, 3 Mar 2026 14:50:04 +0100
Subject: [PATCH 12/14] Remove stale TODO from step_1 environment setup doc

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 .../reinforcement_learning/step_1_environment_setup.rst        | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
index 49d3840a3..41dea4fb7 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
@@ -1,6 +1,3 @@
-.. TODO(cvolk): Update the Validation section to use IsaacLab's train.py with --external_callback
-..              instead of the removed isaaclab_arena/scripts/reinforcement_learning/train.py.
-
 Environment Setup and Validation
 --------------------------------
 

From 3cefa82d7872fb277164f7c79f7645a0475bb056 Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Tue, 3 Mar 2026 17:20:53 +0100
Subject: [PATCH 13/14] Upgrade Docker base image to Isaac Sim 5.1.0

- Switch base image from nvcr.io/nvidia/isaac-sim:5.0.0 to 5.1.0
- Add USER root after FROM (5.1.0 runs as non-root by default)
- Replace bare pip/pip3 calls with /isaac-sim/python.sh -m pip to
  target the Isaac Sim Python environment; add --break-system-packages
  where the system pip is still required (huggingface-hub CLI)
- Add chmod a+x /isaac-sim needed by 5.1.0
- Fix OSQP check to use /isaac-sim/python.sh -c
- Add isaac-sim group to the runtime user in entrypoint.sh

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 docker/Dockerfile.isaaclab_arena | 17 +++++++++--------
 docker/setup/entrypoint.sh       |  2 +-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/docker/Dockerfile.isaaclab_arena b/docker/Dockerfile.isaaclab_arena
index e5018fbf5..57c3d6be8 100644
--- a/docker/Dockerfile.isaaclab_arena
+++ b/docker/Dockerfile.isaaclab_arena
@@ -1,7 +1,10 @@
-ARG BASE_IMAGE=nvcr.io/nvidia/isaac-sim:5.0.0
+ARG BASE_IMAGE=nvcr.io/nvidia/isaac-sim:5.1.0
 
 FROM ${BASE_IMAGE}
 
+# Isaac Sim 5.1.0+ runs as a non-root user; switch to root for installation steps.
+USER root
+
 # GR00T Policy Build Arguments, these are only used if INSTALL_GROOT is true
 ARG INSTALL_GROOT=false
 
@@ -22,9 +25,6 @@ RUN apt-get update && apt-get install -y \
   sudo \
   python3-pip
 
-# Update pip to the latest version
-RUN pip3 install --upgrade pip
-
 ################################
 # Install Isaac Lab
 ################################
@@ -37,9 +37,10 @@ ENV TERM=xterm
 # Symlink isaac sim to IsaacLab
 RUN ln -s /isaac-sim/ ${WORKDIR}/submodules/IsaacLab/_isaac_sim
 # Install IsaacLab dependencies
-RUN for DIR in ${WORKDIR}/submodules/IsaacLab/source/isaaclab*/; do pip install --no-deps -e "$DIR"; done
+RUN for DIR in ${WORKDIR}/submodules/IsaacLab/source/isaaclab*/; do /isaac-sim/python.sh -m pip install --no-deps -e "$DIR"; done
 # Logs and other stuff appear under dist-packages per default, so this dir has to be writeable.
 RUN chmod 777 -R /isaac-sim/kit/
+RUN chmod a+x /isaac-sim
 # NOTE(alexmillane, 2026-02-10): We started having issues with flatdict 4.0.1 installation
 # during IsaacLab install. We install here with build isolation which seems to fix the issue.
 RUN /isaac-sim/python.sh -m pip install flatdict==4.0.1 --no-build-isolation
@@ -49,7 +50,7 @@ RUN ${ISAACLAB_PATH}/isaaclab.sh -i
 # Patch for osqp in IsaacLab. Downgrade qpsolvers
 # TODO(alexmillane): Watch the thread here: https://nvidia.slack.com/archives/C06HLQ6CB41/p1764680205807019
 #                    and remove this thread when IsaacLab has a fix.
-RUN if python -c "import qpsolvers; print(qpsolvers.available_solvers)" | grep -q "osqp"; then \
+RUN if /isaac-sim/python.sh -c "import qpsolvers; print(qpsolvers.available_solvers)" | grep -q "osqp"; then \
         echo "OSQP is installed. You can remove this clause from the Arena dockerfile."; \
     else \
         echo "OSQP missing, installing... This is a patch for an Isaac Lab bug."; \
@@ -79,7 +80,7 @@ ENV LW_API_ENDPOINT="https://api-dev.lightwheel.net"
 
 # HuggingFace for downloading datasets and models.
 # NOTE(alexmillane, 2025-10-28): For some reason the CLI has issues when installed in the IsaacSim version of python.
-RUN pip install huggingface-hub[cli]
+RUN pip install huggingface-hub[cli] --break-system-packages
 # Create alias for hf command to use the system-installed version
 RUN echo "alias hf='/usr/local/bin/hf'" >> /etc/bash.bashrc
 
@@ -136,7 +137,7 @@ RUN echo "alias pytest='/isaac-sim/python.sh -m pytest'" >> /etc/bash.bashrc
 #    It will pause waiting for the debugger to attach.
 # 3) Attach to the running container with VSCode using the "Attach to debugpy session"
 #    configuration from the Run and Debug panel.
-RUN pip3 install debugpy
+RUN /isaac-sim/python.sh -m pip install debugpy
 RUN echo "alias debugpy='python -Xfrozen_modules=off -m debugpy --listen localhost:5678 --wait-for-client'" >> /etc/bash.bashrc
 
 # Change prompt so it's obvious we're inside the arena container
diff --git a/docker/setup/entrypoint.sh b/docker/setup/entrypoint.sh
index 3aa0c3320..ce226e326 100755
--- a/docker/setup/entrypoint.sh
+++ b/docker/setup/entrypoint.sh
@@ -21,7 +21,7 @@ userdel ubuntu || true
 useradd --no-log-init \
         --uid "$DOCKER_RUN_USER_ID" \
         --gid "$DOCKER_RUN_GROUP_NAME" \
-        --groups sudo \
+        --groups sudo,isaac-sim \
         --shell /bin/bash \
         $DOCKER_RUN_USER_NAME
 chown $DOCKER_RUN_USER_NAME:$DOCKER_RUN_GROUP_NAME /home/$DOCKER_RUN_USER_NAME

From 1d56150e1880c3ee9d607626fed0a577cb6260d0 Mon Sep 17 00:00:00 2001
From: Clemens Volk <cvolk@nvidia.com>
Date: Tue, 3 Mar 2026 17:21:06 +0100
Subject: [PATCH 14/14] Remove pinocchio patch workaround now fixed by Isaac
 Sim 5.1.0 upgrade

The disable_pinocchio_patch workaround was added to prevent AppLauncher's
pxr.Gf.Matrix4d patch from crashing when extension loading was incomplete
due to the isaacsim.asset.importer.urdf version mismatch. With Isaac Sim
5.1.0 the correct URDF extension version is present, so extensions load
cleanly and the patch works as intended.

Signed-off-by: Clemens Volk <cvolk@nvidia.com>
---
 isaaclab_arena/utils/isaaclab_utils/simulation_app.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/isaaclab_arena/utils/isaaclab_utils/simulation_app.py b/isaaclab_arena/utils/isaaclab_utils/simulation_app.py
index df8ef4749..e3533375f 100644
--- a/isaaclab_arena/utils/isaaclab_utils/simulation_app.py
+++ b/isaaclab_arena/utils/isaaclab_utils/simulation_app.py
@@ -22,18 +22,9 @@ def get_app_launcher(args: argparse.Namespace) -> AppLauncher:
     """Get an app launcher."""
     # NOTE(alexmillane, 2025.11.10): Import pinocchio before launching the app appears still to be required.
     # Monitor this and see if we can get rid of it.
-    # NOTE: We disable AppLauncher's pxr.Gf.Matrix4d patch here. That patch does `from pxr import Gf`
-    # immediately after _start_app(), which can fail when Isaac Sim's extension loading is incomplete
-    # (e.g. due to a version constraint mismatch in the experience file, such as
-    # isaacsim.asset.importer.urdf being pinned to a version not present in the container).
-    # Pinocchio is already imported at this point, which is sufficient for it to work correctly.
-    # The long-term fix is to keep the Arena Docker image in sync with what IsaacLab's experience
-    # files require, at which point this workaround can be revisited.
     if hasattr(args, "enable_pinocchio") and args.enable_pinocchio:
         import pinocchio  # noqa: F401
 
-        args.disable_pinocchio_patch = True
-
     app_launcher = AppLauncher(args)
     if get_isaac_sim_version() != "5.1.0":
         print(f"WARNING: IsaacSim has been upgraded to {get_isaac_sim_version()}.")