diff --git a/docker/Dockerfile.isaaclab_arena b/docker/Dockerfile.isaaclab_arena
index e5018fbf5..57c3d6be8 100644
--- a/docker/Dockerfile.isaaclab_arena
+++ b/docker/Dockerfile.isaaclab_arena
@@ -1,7 +1,10 @@
-ARG BASE_IMAGE=nvcr.io/nvidia/isaac-sim:5.0.0
+ARG BASE_IMAGE=nvcr.io/nvidia/isaac-sim:5.1.0
 
 FROM ${BASE_IMAGE}
 
+# Isaac Sim 5.1.0+ runs as a non-root user; switch to root for installation steps.
+USER root
+
 # GR00T Policy Build Arguments, these are only used if INSTALL_GROOT is true
 ARG INSTALL_GROOT=false
 
@@ -22,9 +25,6 @@ RUN apt-get update && apt-get install -y \
   sudo \
   python3-pip
 
-# Update pip to the latest version
-RUN pip3 install --upgrade pip
-
 ################################
 # Install Isaac Lab
 ################################
@@ -37,9 +37,10 @@ ENV TERM=xterm
 # Symlink isaac sim to IsaacLab
 RUN ln -s /isaac-sim/ ${WORKDIR}/submodules/IsaacLab/_isaac_sim
 # Install IsaacLab dependencies
-RUN for DIR in ${WORKDIR}/submodules/IsaacLab/source/isaaclab*/; do pip install --no-deps -e "$DIR"; done
+RUN for DIR in ${WORKDIR}/submodules/IsaacLab/source/isaaclab*/; do /isaac-sim/python.sh -m pip install --no-deps -e "$DIR"; done
 # Logs and other stuff appear under dist-packages per default, so this dir has to be writeable.
 RUN chmod 777 -R /isaac-sim/kit/
+RUN chmod a+x /isaac-sim
 # NOTE(alexmillane, 2026-02-10): We started having issues with flatdict 4.0.1 installation
 # during IsaacLab install. We install here with build isolation which seems to fix the issue.
 RUN /isaac-sim/python.sh -m pip install flatdict==4.0.1 --no-build-isolation
@@ -49,7 +50,7 @@ RUN ${ISAACLAB_PATH}/isaaclab.sh -i
 # Patch for osqp in IsaacLab. Downgrade qpsolvers
 # TODO(alexmillane): Watch the thread here: https://nvidia.slack.com/archives/C06HLQ6CB41/p1764680205807019
 #                    and remove this thread when IsaacLab has a fix.
-RUN if python -c "import qpsolvers; print(qpsolvers.available_solvers)" | grep -q "osqp"; then \
+RUN if /isaac-sim/python.sh -c "import qpsolvers; print(qpsolvers.available_solvers)" | grep -q "osqp"; then \
         echo "OSQP is installed. You can remove this clause from the Arena dockerfile."; \
     else \
         echo "OSQP missing, installing... This is a patch for an Isaac Lab bug."; \
@@ -79,7 +80,7 @@ ENV LW_API_ENDPOINT="https://api-dev.lightwheel.net"
 
 # HuggingFace for downloading datasets and models.
 # NOTE(alexmillane, 2025-10-28): For some reason the CLI has issues when installed in the IsaacSim version of python.
-RUN pip install huggingface-hub[cli]
+RUN pip install huggingface-hub[cli] --break-system-packages
 # Create alias for hf command to use the system-installed version
 RUN echo "alias hf='/usr/local/bin/hf'" >> /etc/bash.bashrc
 
@@ -136,7 +137,7 @@ RUN echo "alias pytest='/isaac-sim/python.sh -m pytest'" >> /etc/bash.bashrc
 #    It will pause waiting for the debugger to attach.
 # 3) Attach to the running container with VSCode using the "Attach to debugpy session"
 #    configuration from the Run and Debug panel.
-RUN pip3 install debugpy
+RUN /isaac-sim/python.sh -m pip install debugpy
 RUN echo "alias debugpy='python -Xfrozen_modules=off -m debugpy --listen localhost:5678 --wait-for-client'" >> /etc/bash.bashrc
 
 # Change prompt so it's obvious we're inside the arena container
diff --git a/docker/setup/entrypoint.sh b/docker/setup/entrypoint.sh
index 3aa0c3320..ce226e326 100755
--- a/docker/setup/entrypoint.sh
+++ b/docker/setup/entrypoint.sh
@@ -21,7 +21,7 @@ userdel ubuntu || true
 useradd --no-log-init \
         --uid "$DOCKER_RUN_USER_ID" \
         --gid "$DOCKER_RUN_GROUP_NAME" \
-        --groups sudo \
+        --groups sudo,isaac-sim \
         --shell /bin/bash \
         $DOCKER_RUN_USER_NAME
 chown $DOCKER_RUN_USER_NAME:$DOCKER_RUN_GROUP_NAME /home/$DOCKER_RUN_USER_NAME
diff --git a/docs/README.md b/docs/README.md
index aa9669ce6..1120527a2 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,45 +1,42 @@
-# `isaaclab_arena` Dox - Developer Guide
+# `isaaclab_arena` Docs - Developer Guide
 
-To build the `isaaclab_arena` docs locally follow the following instructions.
+The docs are built on the **host machine** (not inside Docker) using a dedicated Python 3.11 venv.
 
-Enter the `isaaclab_arena` docker.
+## Prerequisites
 
-```
-./docker/run_docker.sh
-```
-
-The version of sphinx that we use requires a newer version of python.
-Install a newer version of `python` and `venv`:
+`python3.11` and `python3.11-venv` must be installed on the host:
 
-```
-sudo apt-get install python3.11 python3.11-venv
+```bash
+sudo apt-get install -y python3.11 python3.11-venv
 ```
 
-> It looks like this actually overwrites the currently installed version of python
-> inside.
+## First-time setup
 
-Create a `venv` and install the dependencies
+From the repo root, create the venv and install dependencies:
 
-```
+```bash
+cd docs
 python3.11 -m venv venv_docs
-source venv_docs/bin/activate
-cd ./docs
-python3.11 -m pip install -r requirements.txt
+venv_docs/bin/pip install -r requirements.txt
 ```
 
-To make the current version of docs
 
-```
-make html
+## Build and view
+
+```bash
+cd docs
+venv_docs/bin/sphinx-build -M html . _build/current
+xdg-open _build/current/html/index.html
 ```
 
-To view the docs, navigate to `isaaclab_arena/docs/_build/current/html/index.html`, and double-click.
 
-To make the multi version docs. Note that this will only build docs for the set branches, such
-as release, main etc. Only docs committed to these branches will be reflected.
+## Multi-version docs
 
-```
+Builds docs for committed branches only (e.g. `main`, `release`). Local uncommitted changes are **not** reflected.
+
+```bash
+cd docs
+source venv_docs/bin/activate
 make multi-docs
+xdg-open _build/index.html
 ```
-
-To view the multi version docs, navigate to `isaaclab_arena/docs/_build/index.html`, and double-click.
diff --git a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
index 7107c6f0e..41dea4fb7 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_1_environment_setup.rst
@@ -155,17 +155,18 @@ See :doc:`../../concepts/concept_environment_design` for environment composition
 Validation: Run Random Policy
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-To validate the environment setup, we can run a policy with random weights to ensure everything loads correctly:
+To validate the environment loads correctly, run one training iteration and check for errors:
 
 .. code-block:: bash
 
-   python isaaclab_arena/scripts/reinforcement_learning/train.py \
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
      --num_envs 64 \
      --max_iterations 1 \
-     lift_object
+     --headless
 
-This command will load the environment, initialize 64 parallel environments, and exit immediately
-(``max_iterations=1``). If successful, the environment is ready for training.
+If the environment is set up correctly, you will see one iteration of training output before the script exits.
 
 You should see output indicating the start of training:
 
diff --git a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
index b28aefff4..0d4e4b090 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_2_policy_training.rst
@@ -1,135 +1,72 @@
 Policy Training
 ---------------
 
-This workflow covers training an RL policy from scratch using RSL-RL's PPO implementation.
-The training is fully parallelized across hundreds of environments for sample-efficient learning.
-
 **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details)
 
 :docker_run_default:
 
-
-Training Overview
-^^^^^^^^^^^^^^^^^
-
-We use **Proximal Policy Optimization (PPO)** from the `RSL-RL <https://github.com/leggedrobotics/rsl_rl>`_ library,
-a proven on-policy RL algorithm for robot learning. The training process:
-
-1. **Parallel Simulation**: Runs 512 parallel environments simultaneously
-2. **Dense Rewards**: Provides shaped rewards for reaching, grasping, lifting, and goal achievement
-3. **Command Sampling**: Randomly samples target positions within a workspace range
-4. **Automatic Checkpointing**: Saves model checkpoints every 500 iterations
-5. **Tensorboard Logging**: Monitors training progress in real-time
-
 Training Command
 ^^^^^^^^^^^^^^^^
 
-To train the policy, run:
+Training uses IsaacLab's RSL-RL training script directly. The ``--external_callback`` argument
+points to an Arena function that runs before training starts — it reads the ``--task`` argument,
+builds the environment, and registers it with gym so IsaacLab's script can find it by name.
 
 .. code-block:: bash
 
-   python isaaclab_arena/scripts/reinforcement_learning/train.py \
-     --env_spacing 5.0 \
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
      --num_envs 512 \
      --max_iterations 12000 \
-     --save_interval 500 \
-     --headless \
-     lift_object
-
-**Command Breakdown:**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--env_spacing 5.0``
-     - Spacing between parallel environments (meters)
-   * - ``--num_envs 512``
-     - Number of parallel environments for training
-   * - ``--max_iterations 12000``
-     - Total training iterations (each iteration = 24 timesteps × 512 envs = 12,288 samples)
-   * - ``--save_interval 500``
-     - Save checkpoint every 500 iterations
-   * - ``--headless``
-     - Run without GUI for faster training
-   * - ``lift_object``
-     - Environment name (must be last argument)
-
-**Additional Arguments (Optional):**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--seed <int>``
-     - Random seed for reproducibility (default: 42)
-   * - ``--device <str>``
-     - Device to use: 'cuda' or 'cpu' (default: 'cuda')
-   * - ``--video``
-     - Record training videos periodically
-   * - ``--video_interval 2000``
-     - Interval for recording videos (iterations)
-
-
-Training Configuration
-^^^^^^^^^^^^^^^^^^^^^^
-
-The training uses the default RSL-RL PPO configuration, which can be found at:
-
-``isaaclab_arena/policy/rl_policy/generic_policy.json``
-
-Key hyperparameters:
-
-.. code-block:: json
-
-   {
-     "algorithm": {
-       "class_name": "PPO",
-       "num_learning_epochs": 5,
-       "num_mini_batches": 4,
-       "learning_rate": 0.001,
-       "gamma": 0.99,
-       "lam": 0.95,
-       "clip_param": 0.2
-     },
-     "policy": {
-       "class_name": "ActorCritic",
-       "activation": "elu",
-       "actor_hidden_dims": [256, 256, 256],
-       "critic_hidden_dims": [256, 256, 256]
-     }
-   }
-
-To use a custom configuration, specify the path with ``--agent_cfg_path <path>``.
+     --headless
 
+Checkpoints are written to ``logs/rsl_rl/generic_experiment/<timestamp>/``.
+The agent configuration is saved alongside as ``params/agent.yaml``,
+which the evaluation script uses to reconstruct the policy at inference time.
 
-Monitoring Training
-^^^^^^^^^^^^^^^^^^^
 
-Training logs are saved to ``logs/rsl_rl/generic_experiment/<timestamp>/``.
+Overriding Hyperparameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Hyperparameters come from ``RLPolicyCfg`` in ``isaaclab_arena_examples/policy/base_rsl_rl_policy.py``
+and can be overridden with Hydra syntax appended to the training command:
+
+.. code-block:: bash
 
-**1. View Training Metrics with Tensorboard**
+   # Change network activation function to relu (default: elu)
+   agent.policy.activation=relu
 
-Launch Tensorboard to monitor training progress:
+   # Adjust the learning rate (default: 0.0001)
+   agent.algorithm.learning_rate=0.001
+
+   # Save a checkpoint more frequently (default: every 200 iterations)
+   agent.save_interval=500
+
+For example, to train with relu activation and a higher learning rate:
 
 .. code-block:: bash
 
-   tensorboard --logdir logs/rsl_rl
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
+     --num_envs 512 \
+     --max_iterations 12000 \
+     --headless \
+     agent.policy.activation=relu \
+     agent.algorithm.learning_rate=0.001
+
 
-Navigate to ``http://localhost:6006`` in your browser to view:
+Monitoring Training
+^^^^^^^^^^^^^^^^^^^
+
+Launch Tensorboard to monitor progress:
 
-- **Episode rewards**: Total reward per episode
-- **Episode length**: Steps per episode
-- **Policy loss**: Actor and critic losses
-- **Learning rate**: Current learning rate schedule
+.. code-block:: bash
 
-**2. Training Output**
+   /isaac-sim/python.sh -m tensorboard.main --logdir logs/rsl_rl
 
-During training, you'll see periodic console output:
+During training, each iteration prints a summary to the console:
 
 .. code-block:: text
 
@@ -159,43 +96,28 @@ During training, you'll see periodic console output:
                             Time elapsed: 00:00:04
                                      ETA: 00:00:49
 
-   [INFO] Saved checkpoint to: logs/rsl_rl/generic_experiment/<timestamp>/model_<iteration>.pt
-
-**3. Checkpoints**
-
-Model checkpoints are saved to:
-
-``logs/rsl_rl/generic_experiment/<timestamp>/model_<iteration>.pt``
-
-Example: ``logs/rsl_rl/generic_experiment/2026-01-29_12-30-00/model_2000.pt``
-
 
 Multi-GPU Training
 ^^^^^^^^^^^^^^^^^^
 
-For faster training on multi-GPU systems, use the ``--distributed`` flag:
+Add ``--distributed`` to spread environments across all available GPUs:
 
 .. code-block:: bash
 
-   python isaaclab_arena/scripts/reinforcement_learning/train.py \
-     --env_spacing 5.0 \
+   /isaac-sim/python.sh submodules/IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py \
+     --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback \
+     --task lift_object \
      --num_envs 512 \
      --max_iterations 12000 \
-     --save_interval 500 \
      --headless \
-     --distributed \
-     lift_object
-
-This automatically distributes environments across available GPUs.
+     --distributed
 
 
 Expected Results
 ^^^^^^^^^^^^^^^^
 
-After 12,000 iterations (~6 hours on a single GPU with 512 environments):
-
-The trained policy should reliably grasp and lift objects to commanded target positions.
-Please refer to the following gif for an example of the trained policy:
+After 12,000 iterations (~6 hours on a single GPU with 512 environments), the trained
+policy should reliably grasp and lift objects to commanded target positions.
 
 .. image:: ../../../images/lift_object_rl_task.gif
    :align: center
diff --git a/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst b/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst
index 05e2e08c1..5f71a4913 100644
--- a/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst
+++ b/docs/pages/example_workflows/reinforcement_learning/step_3_evaluation.rst
@@ -1,9 +1,6 @@
 Closed-Loop Policy Inference and Evaluation
 -------------------------------------------
 
-This workflow demonstrates running the trained RSL-RL policy in closed-loop
-and evaluating it in the Lift Object environment.
-
 **Docker Container**: Base (see :doc:`../../quickstart/docker_containers` for more details)
 
 :docker_run_default:
@@ -15,16 +12,12 @@ Once inside the container, set the models directory if you plan to download pre-
     export MODELS_DIR=models/isaaclab_arena/reinforcement_learning
     mkdir -p $MODELS_DIR
 
-Note that this tutorial assumes that you've completed the
-:doc:`preceding step (Policy Training)   <step_2_policy_training>` and have a trained checkpoint available,
-or you can download a pre-trained checkpoint as described below.
+This tutorial assumes you've completed :doc:`step_2_policy_training` and have a trained checkpoint,
+or you can download a pre-trained one as described below.
 
 .. dropdown:: Download Pre-trained Model (skip preceding steps)
    :animate: fade-in
 
-   These commands can be used to download a pre-trained RSL-RL policy checkpoint,
-   such that the preceding training step can be skipped.
-
    .. code-block:: bash
 
       hf download \
@@ -32,73 +25,29 @@ or you can download a pre-trained checkpoint as described below.
          model_11999.pt \
          --local-dir $MODELS_DIR/lift_object_checkpoint
 
-   After downloading, you can use the checkpoint at:
+   After downloading, the checkpoint is at:
 
    ``$MODELS_DIR/lift_object_checkpoint/model_11999.pt``
 
-   Replace checkpoint paths in the examples below with this path to evaluate the pre-trained model.
+   Replace checkpoint paths in the examples below with this path.
 
 
 Evaluation Methods
 ^^^^^^^^^^^^^^^^^^
 
-Isaac Lab Arena provides multiple ways to evaluate trained RL policies:
-
-1. **Quick Visualization (play.py)**: Fast visual inspection of policy behavior
-2. **Single Environment Evaluation (policy_runner.py)**: Detailed evaluation with metrics
-3. **Parallel Environment Evaluation (policy_runner.py)**: Large-scale statistical evaluation
-4. **Batch Evaluation (eval_runner.py)**: Automated evaluation of multiple checkpoints
-
-
-Method 1: Quick Visualization
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The ``play.py`` script provides the fastest way to visually inspect your trained policy.
-This is useful for debugging and quick quality checks.
-
-.. code-block:: bash
-
-   python isaaclab_arena/scripts/reinforcement_learning/play.py \
-     --env_spacing 30.0 \
-     --num_envs 16 \
-     --checkpoint logs/rsl_rl/generic_experiment/2026-01-28_17-26-10/model_11999.pt \
-     lift_object
-
-**Key Features:**
+There are three ways to evaluate a trained policy:
 
-- Fast startup with GUI enabled by default
-- Visualizes policy rollouts in real-time
-- No metrics computation (pure visualization)
-- Useful for debugging policy behavior
+1. **Single environment** (``policy_runner.py``): detailed evaluation with metrics
+2. **Parallel environments** (``policy_runner.py``): larger-scale statistical evaluation
+3. **Batch evaluation** (``eval_runner.py``): automated evaluation across multiple checkpoints
 
-**Command Arguments:**
 
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--env_spacing 30.0``
-     - Larger spacing for visualization (avoids visual clutter)
-   * - ``--num_envs 16``
-     - Number of parallel environments to visualize
-   * - ``--checkpoint <path>``
-     - Path to the trained model checkpoint (.pt file)
-   * - ``lift_object``
-     - Environment name (must be last)
-
-You should see multiple Franka robots simultaneously attempting to lift objects to various target positions.
-
-
-Method 2: Single Environment Evaluation
+Method 1: Single Environment Evaluation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The ``policy_runner.py`` provides comprehensive evaluation with task-specific metrics.
-
 .. code-block:: bash
 
-   python isaaclab_arena/evaluation/policy_runner.py \
+   /isaac-sim/python.sh isaaclab_arena/evaluation/policy_runner.py \
      --policy_type rsl_rl \
      --num_steps 1000 \
      --checkpoint_path logs/rsl_rl/generic_experiment/2026-01-28_17-26-10/model_11999.pt \
@@ -107,54 +56,28 @@ The ``policy_runner.py`` provides comprehensive evaluation with task-specific me
 
 .. note::
 
-   If you downloaded the pre-trained model from Hugging Face, replace the checkpoint path:
-
-   ``--checkpoint_path $MODELS_DIR/lift_object_checkpoint/model_11999.pt``
-
-**Important: Argument Order**
-
-Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, etc.) must come **before** the environment name.
-Environment-specific arguments (``--rl_training_mode``, ``--object``, etc.) must come **after** the environment name.
-
-**Command Breakdown:**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--policy_type rsl_rl``
-     - Policy type to load (RSL-RL trained policy)
-   * - ``--num_steps 1000``
-     - Total simulation steps to run
-   * - ``--checkpoint_path <path>``
-     - Path to the model checkpoint
-   * - ``lift_object``
-     - Environment name
-   * - ``--rl_training_mode False``
-     - Enable success termination for evaluation
+   If you downloaded the pre-trained model from Hugging Face, replace the checkpoint path with:
+   ``$MODELS_DIR/lift_object_checkpoint/model_11999.pt``
 
-**Expected Output:**
+Policy-specific arguments (``--policy_type``, ``--checkpoint_path``, etc.) must come **before** the
+environment name. Environment-specific arguments (``--rl_training_mode``, ``--object``, etc.) must
+come **after** it.
 
-At the end of evaluation, you should see metrics similar to:
+At the end of the run, metrics are printed to the console:
 
 .. code-block:: text
 
    Metrics: {'success_rate': 0.85, 'num_episodes': 12}
 
-This indicates that 85% of episodes successfully lifted the object to the target position,
-across 12 completed episodes in 1000 steps.
-
 
-Method 3: Parallel Environment Evaluation
+Method 2: Parallel Environment Evaluation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-For more statistically significant results, evaluate across many parallel environments:
+For more statistically significant results, run across many environments in parallel:
 
 .. code-block:: bash
 
-   python isaaclab_arena/evaluation/policy_runner.py \
+   /isaac-sim/python.sh isaaclab_arena/evaluation/policy_runner.py \
      --policy_type rsl_rl \
      --num_steps 5000 \
      --num_envs 64 \
@@ -163,37 +86,17 @@ For more statistically significant results, evaluate across many parallel enviro
      lift_object \
      --rl_training_mode False
 
-**Additional Arguments:**
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Argument
-     - Description
-   * - ``--num_envs 64``
-     - Run 64 parallel environments simultaneously
-   * - ``--headless``
-     - Run without GUI for faster evaluation
-   * - ``--num_steps 5000``
-     - More steps for more episodes
-
-**Expected Output:**
-
 .. code-block:: text
 
    Metrics: {'success_rate': 0.83, 'num_episodes': 156}
 
-Running more environments and steps provides better statistical estimates of policy performance.
-
 
-Method 4: Batch Evaluation with JSON Configuration
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Method 3: Batch Evaluation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-For systematic evaluation of multiple checkpoints or hyperparameter sweeps, use ``eval_runner.py``
-with a JSON configuration file.
+To evaluate multiple checkpoints in sequence, use ``eval_runner.py`` with a JSON config.
 
-**1. Create Evaluation Configuration**
+**1. Create an evaluation config**
 
 Create a file ``eval_config.json``:
 
@@ -224,16 +127,11 @@ Create a file ``eval_config.json``:
      ]
    }
 
-**2. Run Batch Evaluation**
+**2. Run**
 
 .. code-block:: bash
 
-   python isaaclab_arena/evaluation/eval_runner.py --eval_jobs_config eval_config.json
-
-This will automatically evaluate all checkpoints listed in the configuration and output
-a summary of metrics for each.
-
-**Expected Output:**
+   /isaac-sim/python.sh isaaclab_arena/evaluation/eval_runner.py --eval_jobs_config eval_config.json
 
 .. code-block:: text
 
@@ -252,49 +150,15 @@ a summary of metrics for each.
 Understanding the Metrics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The Lift Object task reports the following metrics:
-
-.. list-table::
-   :widths: 30 70
-   :header-rows: 1
-
-   * - Metric
-     - Description
-   * - ``success_rate``
-     - Fraction of episodes where object reached target position within tolerance
-   * - ``num_episodes``
-     - Total number of episodes completed during evaluation
-
-A well-trained policy should achieve:
-
-- **Success rate**: 70-90% (depends on target range difficulty)
-- **Consistent performance**: Success rate stable across multiple evaluation runs
-
-
-Troubleshooting
-^^^^^^^^^^^^^^^
-
-**Issue: Low success rate (<50%)**
-
-- Increase training iterations (try 20,000+)
-- Check reward configuration in task definition
-- Verify command sampling ranges are reasonable
-- Try different random seeds
-
-**Issue: Policy gets stuck or drops object**
-
-- Ensure object mass and friction are reasonable
-- Check gripper force limits
-- Visualize with ``play.py`` to diagnose behavior
-- Review episode recordings if ``--video`` was enabled during training
+The Lift Object task reports two metrics:
 
-**Issue: "Checkpoint not found" error**
+- ``success_rate``: fraction of episodes where the object reached the target position within tolerance
+- ``num_episodes``: total number of completed episodes during the evaluation run
 
-- Verify checkpoint path is correct
-- Use absolute paths if relative paths fail
-- Check that training completed and saved checkpoints
+A well-trained policy should reach 70–90% success rate. Results will vary with the target range,
+random seed, and hardware.
 
 .. note::
 
-   When running evaluation, always set ``--rl_training_mode False`` to enable success termination.
-   During training, this flag is ``True`` by default to prevent early episode termination.
+   Always set ``--rl_training_mode False`` when evaluating. During training this flag is ``True``
+   to disable success termination; setting it to ``False`` re-enables it for proper evaluation.
diff --git a/isaaclab_arena/environments/arena_env_builder.py b/isaaclab_arena/environments/arena_env_builder.py
index 772923ae6..8898e63da 100644
--- a/isaaclab_arena/environments/arena_env_builder.py
+++ b/isaaclab_arena/environments/arena_env_builder.py
@@ -275,10 +275,17 @@ def build_registered(
         # THIS WILL BE REMOVED IN THE FUTURE.
         cfg_entry = self.modify_env_cfg(cfg_entry)
         entry_point = self.get_entry_point()
+        # Register the environment with the Gym registry.
+        kwargs = {
+            "env_cfg_entry_point": cfg_entry,
+        }
+        if self.arena_env.rl_framework is not None:
+            assert self.arena_env.rl_policy_cfg is not None
+            kwargs[self.arena_env.rl_framework.get_entry_point_string()] = self.arena_env.rl_policy_cfg
         gym.register(
             id=name,
             entry_point=entry_point,
-            kwargs={"env_cfg_entry_point": cfg_entry},
+            kwargs=kwargs,
             disable_env_checker=True,
         )
         cfg = parse_env_cfg(
diff --git a/isaaclab_arena/environments/isaaclab_arena_environment.py b/isaaclab_arena/environments/isaaclab_arena_environment.py
index a3ddc315a..9eb603777 100644
--- a/isaaclab_arena/environments/isaaclab_arena_environment.py
+++ b/isaaclab_arena/environments/isaaclab_arena_environment.py
@@ -13,6 +13,7 @@
     from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase
     from isaaclab_arena.environments.isaaclab_arena_manager_based_env import IsaacLabArenaManagerBasedRLEnvCfg
     from isaaclab_arena.orchestrator.orchestrator_base import OrchestratorBase
+    from isaaclab_arena.reinforcement_learning.frameworks import RLFramework
     from isaaclab_arena.scene.scene import Scene
     from isaaclab_arena.tasks.task_base import TaskBase
 
@@ -29,6 +30,8 @@ def __init__(
         teleop_device: TeleopDeviceBase | None = None,
         orchestrator: OrchestratorBase | None = None,
         env_cfg_callback: Callable[IsaacLabArenaManagerBasedRLEnvCfg] | None = None,
+        rl_framework: RLFramework | None = None,
+        rl_policy_cfg: str | None = None,
     ):
         """
         Args:
@@ -47,3 +50,5 @@ def __init__(
         self.teleop_device = teleop_device
         self.orchestrator = orchestrator
         self.env_cfg_callback = env_cfg_callback
+        self.rl_framework = rl_framework
+        self.rl_policy_cfg = rl_policy_cfg
diff --git a/isaaclab_arena/environments/isaaclab_interop.py b/isaaclab_arena/environments/isaaclab_interop.py
new file mode 100644
index 000000000..6fd924fe3
--- /dev/null
+++ b/isaaclab_arena/environments/isaaclab_interop.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+
+from isaaclab_arena_environments.cli import ExampleEnvironments
+
+
+def environment_registration_callback() -> list[str]:
+    """This function is for use with Isaac Lab scripts to register an IsaacLab Arena environment.
+
+    This function is passed to an Isaac Lab script as an external callback function. Example:
+
+    python IsaacLab/scripts/reinforcement_learning/rsl_rl/train.py
+        --external_callback isaaclab_arena.environments.isaaclab_interop.environment_registration_callback
+        --task lift_object
+        --num_envs 512
+
+    In this case the "lift_object" environment is registered with Isaac Lab before
+    running the RSL RL training script. The training script will then run the
+    training for the lift_object environment.
+
+    """
+    from isaaclab.app import AppLauncher
+
+    from isaaclab_arena.cli.isaaclab_arena_cli import add_isaac_lab_cli_args, add_isaaclab_arena_cli_args
+    from isaaclab_arena.environments.arena_env_builder import ArenaEnvBuilder
+
+    # Get the requested environment from the CLI.
+    parser = argparse.ArgumentParser()
+    # NOTE(alexmillane, 2026.02.12): With the Isaac Lab interop, we use the task name to
+    # determine the environment to register. The environment is also registered under this name.
+    # The result is that a single argument tells Arena what to register, and Lab what to run.
+    parser.add_argument("--task", type=str, required=True, help="Name of the IsaacLab Arena environment to register.")
+    environment_name = parser.parse_known_args()[0].task
+    environment = ExampleEnvironments[environment_name]()
+    # Get the full list of environment-specific CLI args.
+    AppLauncher.add_app_launcher_args(parser)
+    add_isaac_lab_cli_args(parser)
+    add_isaaclab_arena_cli_args(parser)
+    environment.add_cli_args(parser)
+    args, remaining_args = parser.parse_known_args()
+    # Create the environment config
+    isaaclab_arena_environment = environment.get_env(args)
+    # Build and register the environment
+    env_builder = ArenaEnvBuilder(isaaclab_arena_environment, args)
+    env_builder.build_registered()
+    # Return the arguments that were not consumed by this callback
+    return remaining_args
diff --git a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py b/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
deleted file mode 100644
index 181c7fd2f..000000000
--- a/isaaclab_arena/policy/rl_policy/base_rsl_rl_policy.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-from dataclasses import field
-from typing import Any
-
-from isaaclab.utils import configclass
-from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
-
-
-@configclass
-class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
-
-    num_steps_per_env: int = 24
-    max_iterations: int = 4000
-    save_interval: int = 200
-    experiment_name: str = "generic_experiment"
-    obs_groups = field(
-        default_factory=lambda: {
-            "policy": ["policy"],
-            "critic": ["policy"],
-        }
-    )
-    policy: RslRlPpoActorCriticCfg = field(default_factory=RslRlPpoActorCriticCfg)
-    algorithm: RslRlPpoAlgorithmCfg = field(default_factory=RslRlPpoAlgorithmCfg)
-
-    @classmethod
-    def update_cfg(
-        cls,
-        policy_cfg: dict[str, Any],
-        algorithm_cfg: dict[str, Any],
-        obs_groups: dict[str, list[str]],
-        num_steps_per_env: int,
-        max_iterations: int,
-        save_interval: int,
-        experiment_name: str,
-    ):
-        cfg = cls()
-        cfg.policy = RslRlPpoActorCriticCfg(**policy_cfg)
-        cfg.algorithm = RslRlPpoAlgorithmCfg(**algorithm_cfg)
-        cfg.obs_groups = obs_groups
-        cfg.num_steps_per_env = num_steps_per_env
-        cfg.max_iterations = max_iterations
-        cfg.save_interval = save_interval
-        cfg.experiment_name = experiment_name
-        return cfg
-
-
-def get_agent_cfg(args_cli: argparse.Namespace) -> Any:
-    """Get the environment and agent configuration from the command line arguments."""
-
-    # Read a json file containing the agent configuration
-    with open(args_cli.agent_cfg_path) as f:
-        agent_cfg_dict = json.load(f)
-
-    policy_cfg = agent_cfg_dict["policy_cfg"]
-    algorithm_cfg = agent_cfg_dict["algorithm_cfg"]
-    obs_groups = agent_cfg_dict["obs_groups"]
-    # Load all other arguments if they are in args_cli as policy arguments
-    num_steps_per_env = args_cli.num_steps_per_env
-    max_iterations = args_cli.max_iterations
-    save_interval = args_cli.save_interval
-    experiment_name = args_cli.experiment_name
-
-    agent_cfg = RLPolicyCfg.update_cfg(
-        policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name
-    )
-
-    return agent_cfg
diff --git a/isaaclab_arena/policy/rl_policy/generic_policy.json b/isaaclab_arena/policy/rl_policy/generic_policy.json
deleted file mode 100644
index 6fdf9d2fa..000000000
--- a/isaaclab_arena/policy/rl_policy/generic_policy.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "policy_cfg": {
-      "init_noise_std": 1.0,
-      "actor_obs_normalization": false,
-      "critic_obs_normalization": false,
-      "actor_hidden_dims": [256, 128, 64],
-      "critic_hidden_dims": [256, 128, 64],
-      "activation": "elu"
-    },
-    "algorithm_cfg": {
-      "value_loss_coef": 1.0,
-      "use_clipped_value_loss": true,
-      "clip_param": 0.2,
-      "entropy_coef": 0.006,
-      "num_learning_epochs": 5,
-      "num_mini_batches": 4,
-      "learning_rate": 0.0001,
-      "schedule": "adaptive",
-      "gamma": 0.98,
-      "lam": 0.95,
-      "desired_kl": 0.01,
-      "max_grad_norm": 1.0
-    },
-    "obs_groups": {
-      "policy": ["policy", "task_obs"],
-      "critic": ["policy", "task_obs"]
-    }
-  }
diff --git a/isaaclab_arena/policy/rsl_rl_action_policy.py b/isaaclab_arena/policy/rsl_rl_action_policy.py
index e5bb4b441..aee721404 100644
--- a/isaaclab_arena/policy/rsl_rl_action_policy.py
+++ b/isaaclab_arena/policy/rsl_rl_action_policy.py
@@ -5,67 +5,50 @@
 
 import argparse
 import gymnasium as gym
+import os
 import torch
 from dataclasses import dataclass
 from gymnasium.spaces.dict import Dict as GymSpacesDict
-from pathlib import Path
 
 from isaaclab.utils.assets import retrieve_file_path
+from isaaclab.utils.io import load_yaml
 from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
 from rsl_rl.runners import DistillationRunner, OnPolicyRunner
 
 from isaaclab_arena.assets.register import register_policy
 from isaaclab_arena.policy.policy_base import PolicyBase
-from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg
-from isaaclab_arena.scripts.reinforcement_learning import cli_args
 
 
 @dataclass
 class RslRlActionPolicyConfig:
-    """
-    Configuration dataclass for RSL-RL action policy.
-
-    This dataclass serves as the single source of truth for policy configuration,
-    supporting both dict-based (from JSON) and CLI-based configuration paths.
-    """
+    """Configuration dataclass for RSL-RL action policy."""
 
     checkpoint_path: str
-    """Path to the RSL-RL checkpoint file."""
+    """Path to the RSL-RL checkpoint file.
 
-    agent_cfg_path: Path = Path("isaaclab_arena/policy/rl_policy/generic_policy.json")
-    """Path to the RL agent configuration file."""
+    The agent config is loaded automatically from ``params/agent.yaml`` in the
+    same directory, which is saved by IsaacLab's ``train.py`` alongside the checkpoint.
+    """
 
     device: str = "cuda:0"
     """Device to run the policy on."""
 
     @classmethod
     def from_cli_args(cls, args: argparse.Namespace) -> "RslRlActionPolicyConfig":
-        """
-        Create configuration from parsed CLI arguments.
-
-        Args:
-            args: Parsed command line arguments
-
-        Returns:
-            RslRlActionPolicyConfig instance
-        """
         return cls(
             checkpoint_path=args.checkpoint_path,
-            agent_cfg_path=args.agent_cfg_path,
             device=args.device if hasattr(args, "device") else "cuda:0",
         )
 
 
 @register_policy
 class RslRlActionPolicy(PolicyBase):
-    """
-    Policy that uses a trained RSL-RL model for inference.
+    """Policy that uses a trained RSL-RL model for inference.
 
-    This policy loads a checkpoint from RSL-RL training and uses it to generate
-    actions. It expects the environment to already be wrapped with RslRlVecEnvWrapper
-    if called from evaluation scripts.
+    Loads the checkpoint and agent config (``params/agent.yaml``) produced by
+    IsaacLab's ``train.py``. No separate JSON config file is required.
 
-    Example JSON configuration for eval runner:
+    Example configuration for eval runner:
 
     .. code-block:: json
 
@@ -75,9 +58,8 @@ class RslRlActionPolicy(PolicyBase):
               "name": "eval_lift_cube",
               "policy_type": "rsl_rl",
               "policy_config_dict": {
-                "checkpoint_path": "logs/rsl_rl/lift_object/model_1000.pt",
-                "agent_cfg_path": "isaaclab_arena/policy/rl_policy/generic_policy.json",
-                "device": "cuda:0",
+                "checkpoint_path": "logs/rsl_rl/lift_object/2026-01-28_17-26-10/model_1000.pt",
+                "device": "cuda:0"
               },
               "arena_env_args": ["lift_object", "--embodiment", "franka"]
             }
@@ -88,137 +70,58 @@ class RslRlActionPolicy(PolicyBase):
     name = "rsl_rl"
     config_class = RslRlActionPolicyConfig
 
-    def __init__(self, config: RslRlActionPolicyConfig, args_cli: argparse.Namespace | None = None):
-        """
-        Initialize RSL-RL action policy from a configuration dataclass.
-
-        Args:
-            config: RslRlActionPolicyConfig configuration dataclass
-            args_cli: Optional CLI arguments namespace. If provided, uses get_agent_cfg().
-                     If None, loads agent config directly from JSON file.
-        """
+    def __init__(self, config: RslRlActionPolicyConfig):
         super().__init__(config)
         self.config: RslRlActionPolicyConfig = config
         self._policy = None
         self._runner = None
-        self._env_is_wrapped = False
-        self.args_cli = args_cli
 
     def _load_policy(self, env: gym.Env) -> None:
-        """
-        Load the RSL-RL policy from checkpoint.
-
-        Args:
-            env: The gym environment (should already be wrapped with RslRlVecEnvWrapper)
-        """
-        import json
-
-        # Load agent configuration
-        # Prefer using get_agent_cfg() if args_cli is available (more robust)
-        # Otherwise, load directly from JSON (for from_dict() path)
-        if self.args_cli is not None:
-            agent_cfg = get_agent_cfg(self.args_cli)
-        else:
-            # Fallback: Load agent configuration directly from JSON file
-            with open(self.config.agent_cfg_path) as f:
-                agent_cfg_dict = json.load(f)
-
-            # Import the config class and create agent config
-            from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import RLPolicyCfg
-
-            policy_cfg = agent_cfg_dict["policy_cfg"]
-            algorithm_cfg = agent_cfg_dict["algorithm_cfg"]
-            obs_groups = agent_cfg_dict.get("obs_groups", {})
-
-            # Use defaults for training-specific parameters (not needed for inference)
-            num_steps_per_env = agent_cfg_dict.get("num_steps_per_env", 24)
-            max_iterations = agent_cfg_dict.get("max_iterations", 1500)
-            save_interval = agent_cfg_dict.get("save_interval", 100)
-            experiment_name = agent_cfg_dict.get("experiment_name", "rsl_rl")
+        """Load the RSL-RL policy from checkpoint and its accompanying agent.yaml."""
+        checkpoint_path = retrieve_file_path(self.config.checkpoint_path)
+        agent_yaml_path = os.path.join(os.path.dirname(checkpoint_path), "params", "agent.yaml")
 
-            agent_cfg = RLPolicyCfg.update_cfg(
-                policy_cfg, algorithm_cfg, obs_groups, num_steps_per_env, max_iterations, save_interval, experiment_name
+        if not os.path.exists(agent_yaml_path):
+            raise FileNotFoundError(
+                f"No agent config found at {agent_yaml_path}. "
+                "Ensure the checkpoint was produced by IsaacLab's train.py."
             )
 
-        # Override device from config
-        agent_cfg.device = self.config.device
+        agent_cfg_dict = load_yaml(agent_yaml_path)
+        agent_cfg_dict["device"] = self.config.device
 
-        # Check if environment is already wrapped
-        if isinstance(env, RslRlVecEnvWrapper):
-            wrapped_env = env
-            self._env_is_wrapped = True
-        else:
-            # Wrap if needed (for standalone policy runner usage)
-            wrapped_env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
-            self._env_is_wrapped = False
+        clip_actions = agent_cfg_dict.get("clip_actions")
+        class_name = agent_cfg_dict.get("class_name", "OnPolicyRunner")
 
-        # Create the appropriate runner
-        if agent_cfg.class_name == "OnPolicyRunner":
-            self._runner = OnPolicyRunner(
-                wrapped_env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device  # type: ignore[attr-defined]
-            )
-        elif agent_cfg.class_name == "DistillationRunner":
-            self._runner = DistillationRunner(
-                wrapped_env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device  # type: ignore[attr-defined]
-            )
+        wrapped_env = RslRlVecEnvWrapper(env, clip_actions=clip_actions)
+
+        if class_name == "OnPolicyRunner":
+            self._runner = OnPolicyRunner(wrapped_env, agent_cfg_dict, log_dir=None, device=self.config.device)
+        elif class_name == "DistillationRunner":
+            self._runner = DistillationRunner(wrapped_env, agent_cfg_dict, log_dir=None, device=self.config.device)
         else:
-            raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+            raise ValueError(f"Unsupported runner class: {class_name}")
 
-        # Load the checkpoint
-        checkpoint_path = retrieve_file_path(self.config.checkpoint_path)
         print(f"[INFO] Loading RSL-RL checkpoint from: {checkpoint_path}")
         self._runner.load(checkpoint_path)
-
-        # Get the inference policy
         self._policy = self._runner.get_inference_policy(device=wrapped_env.unwrapped.device)
 
     def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor:
-        """
-        Get the action from the RSL-RL policy.
-
-        Args:
-            env: The gym environment
-            observation: Current observation from the environment
-
-        Returns:
-            Action tensor from the policy
-        """
-        # Load policy on first call
         if self._policy is None:
             self._load_policy(env)
 
-        # Type checker doesn't know _policy is not None after _load_policy
         assert self._policy is not None, "Policy should be loaded after _load_policy()"
 
         with torch.inference_mode():
             return self._policy(observation)
 
     def reset(self, env_ids: torch.Tensor | None = None) -> None:
-        """
-        Reset the policy state for specific environments.
-
-        Args:
-            env_ids: Indices of environments to reset. If None, reset all.
-        """
-        # RSL-RL policies are typically stateless for evaluation
-        # Override if your policy has recurrent components
         pass
 
     @classmethod
     def from_dict(cls, config_dict: dict) -> "RslRlActionPolicy":
-        """
-        Create a policy instance from a configuration dictionary.
-
-        This override ensures args_cli is None when loading from JSON config.
-
-        Args:
-            config_dict: Dictionary containing the configuration fields
-
-        Returns:
-            RslRlActionPolicy instance
-        """
         config = RslRlActionPolicyConfig(**config_dict)
-        return cls(config, args_cli=None)
+        return cls(config)
 
     @staticmethod
     def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
@@ -228,31 +131,14 @@ def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars
             "--checkpoint_path",
             type=str,
             required=True,
-            help="Path to the checkpoint file containing the RSL-RL policy",
-        )
-        rsl_rl_group.add_argument(
-            "--agent_cfg_path",
-            type=Path,
-            default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"),
-            help="Path to the RL agent configuration file.",
+            help=(
+                "Path to the checkpoint file. Agent config is loaded automatically from params/agent.yaml in the same"
+                " directory."
+            ),
         )
-        # append RSL-RL cli arguments
-        cli_args.add_rsl_rl_args(parser)
-        cli_args.add_rsl_rl_policy_args(parser)
         return parser
 
     @staticmethod
     def from_args(args: argparse.Namespace) -> "RslRlActionPolicy":
-        """
-        Create a RSL-RL action policy instance from parsed CLI arguments.
-
-        Path: CLI args → ConfigDataclass → init cls
-
-        Args:
-            args: Parsed command line arguments
-
-        Returns:
-            RslRlActionPolicy instance
-        """
         config = RslRlActionPolicyConfig.from_cli_args(args)
-        return RslRlActionPolicy(config, args_cli=args)
+        return RslRlActionPolicy(config)
diff --git a/isaaclab_arena/reinforcement_learning/frameworks.py b/isaaclab_arena/reinforcement_learning/frameworks.py
new file mode 100644
index 000000000..7d4aaa08f
--- /dev/null
+++ b/isaaclab_arena/reinforcement_learning/frameworks.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+from enum import Enum
+
+
+class RLFramework(Enum):
+    RSL_RL = "rsl_rl"
+    SKRL = "skrl"
+    RL_GAMES = "rl_games"
+    SB3 = "sb3"
+
+    def get_entry_point_string(self) -> str:
+        return f"{self.value}_cfg_entry_point"
diff --git a/isaaclab_arena/scripts/reinforcement_learning/cli_args.py b/isaaclab_arena/scripts/reinforcement_learning/cli_args.py
deleted file mode 100644
index 8148c1226..000000000
--- a/isaaclab_arena/scripts/reinforcement_learning/cli_args.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from __future__ import annotations
-
-import argparse
-import random
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg
-
-
-def add_rsl_rl_args(parser: argparse.ArgumentParser):
-    """Add RSL-RL arguments to the parser.
-
-    Args:
-        parser: The parser to add the arguments to.
-    """
-    # create a new argument group
-    arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.")
-    arg_group.add_argument("--run_name", type=str, default=None, help="Run name suffix to the log directory.")
-    # -- load arguments
-    arg_group.add_argument("--resume", action="store_true", default=False, help="Whether to resume from a checkpoint.")
-    arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.")
-    arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.")
-    # -- logger arguments
-    arg_group.add_argument(
-        "--logger", type=str, default=None, choices={"wandb", "tensorboard", "neptune"}, help="Logger module to use."
-    )
-    arg_group.add_argument(
-        "--log_project_name", type=str, default=None, help="Name of the logging project when using wandb or neptune."
-    )
-
-
-def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg:
-    """Parse configuration for RSL-RL agent based on inputs.
-
-    Args:
-        task_name: The name of the environment.
-        args_cli: The command line arguments.
-
-    Returns:
-        The parsed configuration for RSL-RL agent based on inputs.
-    """
-    from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry
-
-    # load the default configuration
-    rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point")
-    rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli)
-    return rslrl_cfg
-
-
-def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace):
-    """Update configuration for RSL-RL agent based on inputs.
-
-    Args:
-        agent_cfg: The configuration for RSL-RL agent.
-        args_cli: The command line arguments.
-
-    Returns:
-        The updated configuration for RSL-RL agent based on inputs.
-    """
-    # override the default configuration with CLI arguments
-    if hasattr(args_cli, "seed") and args_cli.seed is not None:
-        # randomly sample a seed if seed = -1
-        if args_cli.seed == -1:
-            args_cli.seed = random.randint(0, 10000)
-        agent_cfg.seed = args_cli.seed
-    if args_cli.resume is not None:
-        agent_cfg.resume = args_cli.resume
-    if args_cli.load_run is not None:
-        agent_cfg.load_run = args_cli.load_run
-    if args_cli.checkpoint is not None:
-        agent_cfg.load_checkpoint = args_cli.checkpoint
-    if args_cli.run_name is not None:
-        agent_cfg.run_name = args_cli.run_name
-    if args_cli.logger is not None:
-        agent_cfg.logger = args_cli.logger
-    # set the project name for wandb and neptune
-    if agent_cfg.logger in {"wandb", "neptune"} and args_cli.log_project_name:
-        agent_cfg.wandb_project = args_cli.log_project_name
-        agent_cfg.neptune_project = args_cli.log_project_name
-
-    return agent_cfg
-
-
-def add_rsl_rl_policy_args(parser: argparse.ArgumentParser):
-    """Add RSL-RL policy arguments to the parser.
-
-    Args:
-        parser: The parser to add the arguments to.
-    """
-    arg_group = parser.add_argument_group("rsl_rl_policy", description="Arguments for RSL-RL policy.")
-    arg_group.add_argument("--num_steps_per_env", type=int, default=24, help="Number of steps per environment.")
-    arg_group.add_argument("--max_iterations", type=int, default=4000, help="Maximum number of iterations.")
-    arg_group.add_argument("--save_interval", type=int, default=200, help="Save interval.")
-    arg_group.add_argument(
-        "--experiment_name",
-        type=str,
-        default="generic_experiment",
-        help="Name of the experiment folder where logs will be stored.",
-    )
-    return arg_group
diff --git a/isaaclab_arena/scripts/reinforcement_learning/play.py b/isaaclab_arena/scripts/reinforcement_learning/play.py
deleted file mode 100644
index e8edb3196..000000000
--- a/isaaclab_arena/scripts/reinforcement_learning/play.py
+++ /dev/null
@@ -1,202 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Script to play a checkpoint if an RL agent from RSL-RL."""
-
-"""Launch Isaac Sim Simulator first."""
-
-from pathlib import Path
-
-from isaaclab.app import AppLauncher
-
-from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser
-from isaaclab_arena_environments.cli import add_example_environments_cli_args
-
-# local imports
-import cli_args  # isort: skip
-
-# add argparse arguments
-parser = get_isaaclab_arena_cli_parser()
-parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
-parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
-parser.add_argument(
-    "--agent_cfg_path",
-    type=Path,
-    default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"),
-    help="Path to the RL agent configuration file.",
-)
-parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
-# append RSL-RL cli arguments
-cli_args.add_rsl_rl_args(parser)
-cli_args.add_rsl_rl_policy_args(parser)
-# Add the example environments CLI args
-# NOTE(alexmillane, 2025.09.04): This has to be added last, because
-# of the app specific flags being parsed after the global flags.
-add_example_environments_cli_args(parser)
-args_cli = parser.parse_args()
-
-# always enable cameras to record video
-if args_cli.video:
-    args_cli.enable_cameras = True
-
-if args_cli.enable_pinocchio:
-    # Import pinocchio before AppLauncher to force the use of the version installed by IsaacLab and not the one installed by Isaac Sim
-    # pinocchio is required by the Pink IK controllers and the GR1T2 retargeter
-    import pinocchio  # noqa: F401
-
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-"""Rest everything follows."""
-
-import gymnasium as gym
-import os
-import time
-import torch
-
-import isaaclab_tasks  # noqa: F401
-import omni.log
-from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
-from isaaclab.utils.assets import retrieve_file_path
-from isaaclab.utils.dict import print_dict
-from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
-from isaaclab_tasks.utils import get_checkpoint_path
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
-
-from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg
-from isaaclab_arena_environments.cli import get_arena_builder_from_cli
-
-# PLACEHOLDER: Extension template (do not remove this comment)
-
-
-def main():
-    """Play with RSL-RL agent."""
-    # We dont use hydra for the environment configuration, so we need to parse it manually
-    # parse configuration
-    try:
-        arena_builder = get_arena_builder_from_cli(args_cli)
-        env_name, env_cfg = arena_builder.build_registered()
-
-    except Exception as e:
-        omni.log.error(f"Failed to parse environment configuration: {e}")
-        exit(1)
-
-    agent_cfg = get_agent_cfg(args_cli)
-
-    # override configurations with non-hydra CLI arguments
-    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
-    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
-
-    # set the environment seed
-    # note: certain randomizations occur in the environment initialization so we set the seed here
-    env_cfg.seed = agent_cfg.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-
-    # specify directory for logging experiments
-    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
-    log_root_path = os.path.abspath(log_root_path)
-    print(f"[INFO] Loading experiment from directory: {log_root_path}")
-    if args_cli.checkpoint:
-        resume_path = retrieve_file_path(args_cli.checkpoint)
-    else:
-        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
-
-    log_dir = os.path.dirname(resume_path)
-
-    # set the log directory for the environment (works for all environment types)
-    env_cfg.log_dir = log_dir
-
-    # create isaac environment
-    env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
-
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
-    # wrap for video recording
-    if args_cli.video:
-        video_kwargs = {
-            "video_folder": os.path.join(log_dir, "videos", "play"),
-            "step_trigger": lambda step: step == 0,
-            "video_length": args_cli.video_length,
-            "disable_logger": True,
-        }
-        print("[INFO] Recording videos during training.")
-        print_dict(video_kwargs, nesting=4)
-        env = gym.wrappers.RecordVideo(env, **video_kwargs)
-
-    # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
-
-    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
-    # load previously trained model
-    if agent_cfg.class_name == "OnPolicyRunner":
-        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    elif agent_cfg.class_name == "DistillationRunner":
-        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    else:
-        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
-    runner.load(resume_path)
-
-    # obtain the trained policy for inference
-    policy = runner.get_inference_policy(device=env.unwrapped.device)
-
-    # extract the neural network module
-    # we do this in a try-except to maintain backwards compatibility.
-    try:
-        # version 2.3 onwards
-        policy_nn = runner.alg.policy
-    except AttributeError:
-        # version 2.2 and below
-        policy_nn = runner.alg.actor_critic
-
-    # extract the normalizer
-    if hasattr(policy_nn, "actor_obs_normalizer"):
-        normalizer = policy_nn.actor_obs_normalizer
-    elif hasattr(policy_nn, "student_obs_normalizer"):
-        normalizer = policy_nn.student_obs_normalizer
-    else:
-        normalizer = None
-
-    # export policy to onnx/jit
-    export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
-    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
-    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
-
-    dt = env.unwrapped.step_dt
-
-    # reset environment
-    obs = env.get_observations()
-    timestep = 0
-    # simulate environment
-    while simulation_app.is_running():
-        start_time = time.time()
-        # run everything in inference mode
-        with torch.inference_mode():
-            # agent stepping
-            actions = policy(obs)
-            # env stepping
-            obs, _, _, _ = env.step(actions)
-        if args_cli.video:
-            timestep += 1
-            # Exit the play loop after recording one video
-            if timestep == args_cli.video_length:
-                break
-
-        # time delay for real-time evaluation
-        sleep_time = dt - (time.time() - start_time)
-        if args_cli.real_time and sleep_time > 0:
-            time.sleep(sleep_time)
-
-    # close the simulator
-    env.close()
-
-
-if __name__ == "__main__":
-    # run the main function
-    main()
-    # close sim app
-    simulation_app.close()
diff --git a/isaaclab_arena/scripts/reinforcement_learning/train.py b/isaaclab_arena/scripts/reinforcement_learning/train.py
deleted file mode 100644
index a7ad52391..000000000
--- a/isaaclab_arena/scripts/reinforcement_learning/train.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-"""Script to train RL agent with RSL-RL."""
-
-"""Launch Isaac Sim Simulator first."""
-
-from pathlib import Path
-
-from isaaclab.app import AppLauncher
-
-from isaaclab_arena.cli.isaaclab_arena_cli import get_isaaclab_arena_cli_parser
-from isaaclab_arena_environments.cli import add_example_environments_cli_args
-
-# local imports
-import cli_args  # isort: skip
-
-# add argparse arguments
-parser = get_isaaclab_arena_cli_parser()
-parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
-parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
-parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
-parser.add_argument(
-    "--agent_cfg_path",
-    type=Path,
-    default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"),
-    help="Path to the RL agent configuration file.",
-)
-parser.add_argument(
-    "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
-)
-parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
-# append RSL-RL cli arguments
-cli_args.add_rsl_rl_args(parser)
-cli_args.add_rsl_rl_policy_args(parser)
-# Add the example environments CLI args
-# NOTE(alexmillane, 2025.09.04): This has to be added last, because
-# of the app specific flags being parsed after the global flags.
-add_example_environments_cli_args(parser)
-args_cli = parser.parse_args()
-
-# always enable cameras to record video
-if args_cli.video:
-    args_cli.enable_cameras = True
-
-if args_cli.enable_pinocchio:
-    # Import pinocchio before AppLauncher to force the use of the version installed by IsaacLab and not the one installed by Isaac Sim
-    # pinocchio is required by the Pink IK controllers and the GR1T2 retargeter
-    import pinocchio  # noqa: F401
-
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-"""Check for minimum supported RSL-RL version."""
-
-import importlib.metadata as metadata
-import platform
-
-from packaging import version
-
-# check minimum supported rsl-rl version
-RSL_RL_VERSION = "3.0.1"
-installed_version = metadata.version("rsl-rl-lib")
-if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
-    if platform.system() == "Windows":
-        cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
-    else:
-        cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
-    print(
-        f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'"
-        f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:"
-        f"\n\n\t{' '.join(cmd)}\n"
-    )
-    exit(1)
-
-"""Rest everything follows."""
-
-import gymnasium as gym
-import os
-import torch
-from datetime import datetime
-
-import isaaclab_tasks  # noqa: F401
-import omni.log
-from isaaclab.envs import DirectMARLEnv, ManagerBasedRLEnvCfg, multi_agent_to_single_agent
-from isaaclab.utils.dict import print_dict
-from isaaclab.utils.io import dump_yaml
-from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
-from isaaclab_tasks.utils import get_checkpoint_path
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
-
-from isaaclab_arena.policy.rl_policy.base_rsl_rl_policy import get_agent_cfg
-from isaaclab_arena_environments.cli import get_arena_builder_from_cli
-
-# PLACEHOLDER: Extension template (do not remove this comment)
-
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.backends.cudnn.allow_tf32 = True
-torch.backends.cudnn.deterministic = False
-torch.backends.cudnn.benchmark = False
-
-
-def main():
-    # We dont use hydra for the environment configuration, so we need to parse it manually
-    # parse configuration
-    try:
-        arena_builder = get_arena_builder_from_cli(args_cli)
-        env_name, env_cfg = arena_builder.build_registered()
-
-    except Exception as e:
-        omni.log.error(f"Failed to parse environment configuration: {e}")
-        exit(1)
-
-    agent_cfg = get_agent_cfg(args_cli)
-
-    # set the environment seed
-    # note: certain randomizations occur in the environment initialization so we set the seed here
-    env_cfg.seed = agent_cfg.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-    # check for invalid combination of CPU device with distributed training
-    if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device:
-        raise ValueError(
-            "Distributed training is not supported when using CPU device. "
-            "Please use GPU device (e.g., --device cuda) for distributed training."
-        )
-
-    # multi-gpu training configuration
-    if args_cli.distributed:
-        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
-        agent_cfg.device = f"cuda:{app_launcher.local_rank}"
-
-        # set seed to have diversity in different threads
-        seed = agent_cfg.seed + app_launcher.local_rank
-        env_cfg.seed = seed
-        agent_cfg.seed = seed
-
-    # specify directory for logging experiments
-    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
-    log_root_path = os.path.abspath(log_root_path)
-    print(f"[INFO] Logging experiment in directory: {log_root_path}")
-    # specify directory for logging runs: {time-stamp}_{run_name}
-    log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849)
-    print(f"Exact experiment name requested from command line: {log_dir}")
-    if agent_cfg.run_name:
-        log_dir += f"_{agent_cfg.run_name}"
-    log_dir = os.path.join(log_root_path, log_dir)
-
-    # set the IO descriptors export flag if requested
-    if isinstance(env_cfg, ManagerBasedRLEnvCfg):
-        env_cfg.export_io_descriptors = args_cli.export_io_descriptors
-    else:
-        omni.log.warn(
-            "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported."
-        )
-
-    # set the log directory for the environment (works for all environment types)
-    env_cfg.log_dir = log_dir
-
-    # create isaac environment
-    env = gym.make(env_name, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
-
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
-    # save resume path before creating a new log_dir
-    if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
-        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
-
-    # wrap for video recording
-    if args_cli.video:
-        video_kwargs = {
-            "video_folder": os.path.join(log_dir, "videos", "train"),
-            "step_trigger": lambda step: step % args_cli.video_interval == 0,
-            "video_length": args_cli.video_length,
-            "disable_logger": True,
-        }
-        print("[INFO] Recording videos during training.")
-        print_dict(video_kwargs, nesting=4)
-        env = gym.wrappers.RecordVideo(env, **video_kwargs)
-
-    # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
-
-    # create runner from rsl-rl
-    if agent_cfg.class_name == "OnPolicyRunner":
-        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
-    elif agent_cfg.class_name == "DistillationRunner":
-        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
-    else:
-        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
-    # write git state to logs
-    runner.add_git_repo_to_log(__file__)
-    # load the checkpoint
-    if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation":
-        print(f"[INFO]: Loading model checkpoint from: {resume_path}")
-        # load previously trained model
-        runner.load(resume_path)
-
-    # dump the configuration into log-directory
-    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
-    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
-
-    # run training
-    runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
-
-    # close the simulator
-    env.close()
-
-
-if __name__ == "__main__":
-    # run the main function
-    main()
-    # close sim app
-    simulation_app.close()
diff --git a/isaaclab_arena/utils/cameras.py b/isaaclab_arena/utils/cameras.py
index fcfd7b786..4567bb7e0 100644
--- a/isaaclab_arena/utils/cameras.py
+++ b/isaaclab_arena/utils/cameras.py
@@ -117,6 +117,10 @@ def get_viewer_cfg_look_at_object(lookat_object: Asset, offset: np.ndarray) -> V
     if isinstance(initial_pose, PoseRange):
         initial_pose = initial_pose.get_midpoint()
 
-    lookat = initial_pose.position_xyz
-    camera_position = tuple(np.array(lookat) + offset)
+    # TODO(cvolk): Add float coercion to Pose.__post_init__ so this conversion is unnecessary.
+    # Ensure we only pass primitive Python floats (not NumPy scalars) into ViewerCfg,
+    # since downstream config systems like Hydra/OmegaConf don't support np.float64.
+    lookat = tuple(float(x) for x in initial_pose.position_xyz)
+    camera_vec = np.array(lookat, dtype=float) + np.array(offset, dtype=float)
+    camera_position = tuple(float(x) for x in camera_vec.tolist())
     return ViewerCfg(eye=camera_position, lookat=lookat, origin_type="env")
diff --git a/isaaclab_arena_environments/lift_object_environment.py b/isaaclab_arena_environments/lift_object_environment.py
index 17857b678..69e768a12 100644
--- a/isaaclab_arena_environments/lift_object_environment.py
+++ b/isaaclab_arena_environments/lift_object_environment.py
@@ -19,7 +19,9 @@ class LiftObjectEnvironment(ExampleEnvironmentBase):
     name: str = "lift_object"
 
     def get_env(self, args_cli: argparse.Namespace):  # -> IsaacLabArenaEnvironment:
+        import isaaclab_arena_examples.policy.base_rsl_rl_policy as base_rsl_rl_policy
         from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
+        from isaaclab_arena.reinforcement_learning.frameworks import RLFramework
         from isaaclab_arena.scene.scene import Scene
         from isaaclab_arena.tasks.lift_object_task import LiftObjectTaskRL
         from isaaclab_arena.utils.pose import Pose
@@ -64,6 +66,8 @@ def get_env(self, args_cli: argparse.Namespace):  # -> IsaacLabArenaEnvironment:
             scene=scene,
             task=task,
             teleop_device=teleop_device,
+            rl_framework=RLFramework.RSL_RL,
+            rl_policy_cfg=f"{base_rsl_rl_policy.__name__}:RLPolicyCfg",
         )
 
         return isaaclab_arena_environment
diff --git a/isaaclab_arena/scripts/reinforcement_learning/__init__.py b/isaaclab_arena_examples/policy/__init__.py
similarity index 100%
rename from isaaclab_arena/scripts/reinforcement_learning/__init__.py
rename to isaaclab_arena_examples/policy/__init__.py
diff --git a/isaaclab_arena_examples/policy/base_rsl_rl_policy.py b/isaaclab_arena_examples/policy/base_rsl_rl_policy.py
new file mode 100644
index 000000000..7ae23a428
--- /dev/null
+++ b/isaaclab_arena_examples/policy/base_rsl_rl_policy.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from dataclasses import field
+
+from isaaclab.utils import configclass
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class RLPolicyCfg(RslRlOnPolicyRunnerCfg):
+    """Default RSL-RL runner configuration for Arena environments.
+
+    Used as the ``rsl_rl_cfg_entry_point`` when registering environments with gym,
+    allowing IsaacLab's ``train.py`` to load it via ``@hydra_task_config``.
+    """
+
+    num_steps_per_env: int = 24
+    max_iterations: int = 4000
+    save_interval: int = 200
+    experiment_name: str = "generic_experiment"
+    obs_groups = field(
+        default_factory=lambda: {
+            "policy": ["policy"],
+            "critic": ["policy"],
+        }
+    )
+    policy: RslRlPpoActorCriticCfg = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_obs_normalization=False,
+        critic_obs_normalization=False,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+    algorithm: RslRlPpoAlgorithmCfg = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.006,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=0.0001,
+        schedule="adaptive",
+        gamma=0.98,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
diff --git a/submodules/IsaacLab b/submodules/IsaacLab
index 6acdd82a1..e7607ed15 160000
--- a/submodules/IsaacLab
+++ b/submodules/IsaacLab
@@ -1 +1 @@
-Subproject commit 6acdd82a1633732d32bb575e3d792e34fdeb437e
+Subproject commit e7607ed155853a64f824302456cd5975cccf36ee