From 8f4c68f1fb404d512820a2a13999747ae3f9df56 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Tue, 10 Mar 2026 19:23:52 -0700 Subject: [PATCH 01/11] Add --video support for kitless renderers backends --- .../isaaclab/isaaclab/envs/direct_marl_env.py | 60 +++++++++++++++-- .../isaaclab/isaaclab/envs/direct_rl_env.py | 62 +++++++++++++++-- .../isaaclab/envs/manager_based_rl_env.py | 66 ++++++++++++++++--- 3 files changed, 168 insertions(+), 20 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index eb0a359e4f5..7994a1a5b88 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -521,13 +521,18 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # check that if any render could have happened + # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) + # and produces consistent, scene-content frames. Fall back to the omni.replicator + # viewer-camera path only when no TiledCamera with RGB output exists in the scene. + if self._find_video_camera() is not None: + return self._render_tiled_camera_rgb_array() if not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( - f"Cannot render '{self.render_mode}' - no GUI and offscreen rendering not enabled." - " If running headless, make sure --enable_cameras is set." + "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" + " the scene, and neither GUI nor offscreen rendering is available." + " Add a TiledCamera sensor to the scene configuration to enable video recording." ) - # create the annotator if it does not exist + # Kit-based fallback: use an omni.replicator annotator on the viewer camera. if not hasattr(self, "_rgb_annotator"): import omni.replicator.core as rep @@ -542,7 +547,6 @@ def render(self, recompute: bool = False) -> np.ndarray | None: rgb_data = self._rgb_annotator.get_data() # convert to numpy array rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # return the rgb data # note: initially the renderer is warming up and returns empty data if rgb_data.size == 0: return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) @@ -553,6 +557,52 @@ def render(self, recompute: bool = False) -> np.ndarray | None: f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." ) + def _find_video_camera(self): + """ + Locates and caches the first TiledCamera sensor with RGB output. + Previously used the omni.replicator viewer camera which had RGB output. + Returns ``None`` if absent. + """ + if not hasattr(self, "_video_camera"): + from isaaclab.sensors.camera import TiledCamera + + self._video_camera = None + for sensor in self.scene.sensors.values(): + if isinstance(sensor, TiledCamera): + output = sensor.data.output + if "rgb" in output or "rgba" in output: + self._video_camera = sensor + break + return self._video_camera + + def _render_tiled_camera_rgb_array(self) -> np.ndarray: + """Return a square tile-grid of RGB frames from the scene's TiledCamera. + + Create a square grid of tiles. This method reads directly from the + TiledCamera sensor buffer to generate the tiles. + + Returns: + RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where + ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. + """ + output = self._video_camera.data.output + # shape: [num_envs, H, W, 3], uint8 + rgb_all = output["rgb"] if "rgb" in output else output["rgba"][..., :3] + + n_envs = int(rgb_all.shape[0]) + grid_size = math.ceil(math.sqrt(n_envs)) + n_slots = grid_size * grid_size + tiles = rgb_all.cpu().numpy() # [n_envs, H, W, 3] + H, W = tiles.shape[1], tiles.shape[2] + # Pad unused slots with black to fill the square grid. + pad = n_slots - n_envs + if pad > 0: + tiles = np.concatenate([tiles, np.zeros((pad, H, W, 3), dtype=tiles.dtype)], axis=0) + # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] + grid = tiles.reshape(grid_size, grid_size, H, W, 3) + grid = grid.transpose(0, 2, 1, 3, 4) # [grid_size, H, grid_size, W, 3] + return grid.reshape(grid_size * H, grid_size * W, 3) + def close(self): """Cleanup for the environment.""" if not self._is_closed: diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index b362ac72bc2..013b9281ac0 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -489,13 +489,18 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # check that if any render could have happened + # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) + # and produces consistent, scene-content frames. Fall back to the omni.replicator + # viewer-camera path only when no TiledCamera with RGB output exists in the scene. + if self._find_video_camera() is not None: + return self._render_tiled_camera_rgb_array() if not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( - f"Cannot render '{self.render_mode}' - no GUI and offscreen rendering not enabled." - " If running headless, make sure --enable_cameras is set." + "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" + " the scene, and neither GUI nor offscreen rendering is available." + " Add a TiledCamera sensor to the scene configuration to enable video recording." ) - # create the annotator if it does not exist + # Kit-based fallback: use an omni.replicator annotator on the viewer camera. if not hasattr(self, "_rgb_annotator"): import omni.replicator.core as rep @@ -510,8 +515,7 @@ def render(self, recompute: bool = False) -> np.ndarray | None: rgb_data = self._rgb_annotator.get_data() # convert to numpy array rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # return the rgb data - # note: initially the renerer is warming up and returns empty data + # note: initially the renderer is warming up and returns empty data if rgb_data.size == 0: return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) else: @@ -521,6 +525,52 @@ def render(self, recompute: bool = False) -> np.ndarray | None: f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." ) + def _find_video_camera(self): + """ + Locates and caches the first TiledCamera sensor with RGB output. + Previously used the omni.replicator viewer camera which had RGB output. + Returns ``None`` if absent. + """ + if not hasattr(self, "_video_camera"): + from isaaclab.sensors.camera import TiledCamera + + self._video_camera = None + for sensor in self.scene.sensors.values(): + if isinstance(sensor, TiledCamera): + output = sensor.data.output + if "rgb" in output or "rgba" in output: + self._video_camera = sensor + break + return self._video_camera + + def _render_tiled_camera_rgb_array(self) -> np.ndarray: + """Return a square tile-grid of RGB frames from the scene's TiledCamera. + + Create a square grid of tiles. This method reads directly from the + TiledCamera sensor buffer to generate the tiles. + + Returns: + RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where + ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. + """ + output = self._video_camera.data.output + # shape: [num_envs, H, W, 3], uint8 + rgb_all = output["rgb"] if "rgb" in output else output["rgba"][..., :3] + + n_envs = int(rgb_all.shape[0]) + grid_size = math.ceil(math.sqrt(n_envs)) + n_slots = grid_size * grid_size + tiles = rgb_all.cpu().numpy() # [n_envs, H, W, 3] + H, W = tiles.shape[1], tiles.shape[2] + # Pad unused slots with black to fill the square grid. + pad = n_slots - n_envs + if pad > 0: + tiles = np.concatenate([tiles, np.zeros((pad, H, W, 3), dtype=tiles.dtype)], axis=0) + # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] + grid = tiles.reshape(grid_size, grid_size, H, W, 3) + grid = grid.transpose(0, 2, 1, 3, 4) # [grid_size, H, grid_size, W, 3] + return grid.reshape(grid_size * H, grid_size * W, 3) + def close(self): """Cleanup for the environment.""" if not self._is_closed: diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index d08b7e3be3a..30ac1ea88d2 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -270,15 +270,18 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # check that if any render could have happened - # Check for GUI, offscreen rendering, or visualizers - has_visualizers = bool(self.sim.get_setting("/isaaclab/visualizer")) - if not (self.sim.has_gui or self.sim.has_offscreen_render or has_visualizers): + # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) + # and produces consistent, scene-content frames. Fall back to the omni.replicator + # viewer-camera path only when no TiledCamera with RGB output exists in the scene. + if self._find_video_camera() is not None: + return self._render_tiled_camera_rgb_array() + if not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( - f"Cannot render '{self.render_mode}' - no GUI and offscreen rendering not enabled." - " If running headless, make sure --enable_cameras is set." + "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" + " the scene, and neither GUI nor offscreen rendering is available." + " Add a TiledCamera sensor to the scene configuration to enable video recording." ) - # create the annotator if it does not exist + # Kit-based fallback: use an omni.replicator annotator on the viewer camera. if not hasattr(self, "_rgb_annotator"): import omni.replicator.core as rep @@ -293,8 +296,7 @@ def render(self, recompute: bool = False) -> np.ndarray | None: rgb_data = self._rgb_annotator.get_data() # convert to numpy array rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # return the rgb data - # note: initially the renerer is warming up and returns empty data + # note: initially the renderer is warming up and returns empty data if rgb_data.size == 0: return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) else: @@ -304,6 +306,52 @@ def render(self, recompute: bool = False) -> np.ndarray | None: f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." ) + def _find_video_camera(self): + """ + Locates and caches the first TiledCamera sensor with RGB output. + Previously used the omni.replicator viewer camera which had RGB output. + Returns ``None`` if absent. + """ + if not hasattr(self, "_video_camera"): + from isaaclab.sensors.camera import TiledCamera + + self._video_camera = None + for sensor in self.scene.sensors.values(): + if isinstance(sensor, TiledCamera): + output = sensor.data.output + if "rgb" in output or "rgba" in output: + self._video_camera = sensor + break + return self._video_camera + + def _render_tiled_camera_rgb_array(self) -> np.ndarray: + """Return a square tile-grid of RGB frames from the scene's TiledCamera. + + Create a square grid of tiles. This method reads directly from the + TiledCamera sensor buffer to generate the tiles. + + Returns: + RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where + ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. + """ + output = self._video_camera.data.output + # shape: [num_envs, H, W, 3], uint8 + rgb_all = output["rgb"] if "rgb" in output else output["rgba"][..., :3] + + n_envs = int(rgb_all.shape[0]) + grid_size = math.ceil(math.sqrt(n_envs)) + n_slots = grid_size * grid_size + tiles = rgb_all.cpu().numpy() # [n_envs, H, W, 3] + H, W = tiles.shape[1], tiles.shape[2] + # Pad unused slots with black to fill the square grid. + pad = n_slots - n_envs + if pad > 0: + tiles = np.concatenate([tiles, np.zeros((pad, H, W, 3), dtype=tiles.dtype)], axis=0) + # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] + grid = tiles.reshape(grid_size, grid_size, H, W, 3) + grid = grid.transpose(0, 2, 1, 3, 4) # [grid_size, H, grid_size, W, 3] + return grid.reshape(grid_size * H, grid_size * W, 3) + def close(self): if not self._is_closed: # destructor is order-sensitive From b628c4d36f2fc180e5c54fa3fb16b7c9b48fe6b8 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Wed, 11 Mar 2026 14:44:33 -0700 Subject: [PATCH 02/11] Refactor video recording into VideoRecorder class with caching and contiguous memory --- .../isaaclab/isaaclab/envs/direct_marl_env.py | 62 +++------- .../isaaclab/envs/direct_marl_env_cfg.py | 14 +++ .../isaaclab/isaaclab/envs/direct_rl_env.py | 62 +++------- .../isaaclab/envs/direct_rl_env_cfg.py | 14 +++ .../isaaclab/envs/manager_based_env_cfg.py | 14 +++ .../isaaclab/envs/manager_based_rl_env.py | 63 +++------- .../isaaclab/envs/utils/video_recorder.py | 115 ++++++++++++++++++ .../isaaclab/envs/utils/video_recorder_cfg.py | 44 +++++++ 8 files changed, 244 insertions(+), 144 deletions(-) create mode 100644 source/isaaclab/isaaclab/envs/utils/video_recorder.py create mode 100644 source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index 7994a1a5b88..284ed815ab6 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -35,6 +35,8 @@ from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType from .direct_marl_env_cfg import DirectMARLEnvCfg from .ui import ViewportCameraController +from .utils.video_recorder import VideoRecorder +from .utils.video_recorder_cfg import VideoRecorderCfg from .utils.spaces import sample_space, spec_to_gym_space # import logger @@ -226,6 +228,14 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): if noise_model is not None } + # instantiate the viewport recorder for rgb_array video capture + if self.cfg.video_recorder is not None: + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # perform events at the start of the simulation if self.cfg.events: # we print it here to make the logging consistent @@ -524,8 +534,10 @@ def render(self, recompute: bool = False) -> np.ndarray | None: # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) # and produces consistent, scene-content frames. Fall back to the omni.replicator # viewer-camera path only when no TiledCamera with RGB output exists in the scene. - if self._find_video_camera() is not None: - return self._render_tiled_camera_rgb_array() + if self.video_recorder is not None: + frame = self.video_recorder.render_rgb_array() + if frame is not None: + return frame if not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" @@ -557,52 +569,6 @@ def render(self, recompute: bool = False) -> np.ndarray | None: f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." ) - def _find_video_camera(self): - """ - Locates and caches the first TiledCamera sensor with RGB output. - Previously used the omni.replicator viewer camera which had RGB output. - Returns ``None`` if absent. - """ - if not hasattr(self, "_video_camera"): - from isaaclab.sensors.camera import TiledCamera - - self._video_camera = None - for sensor in self.scene.sensors.values(): - if isinstance(sensor, TiledCamera): - output = sensor.data.output - if "rgb" in output or "rgba" in output: - self._video_camera = sensor - break - return self._video_camera - - def _render_tiled_camera_rgb_array(self) -> np.ndarray: - """Return a square tile-grid of RGB frames from the scene's TiledCamera. - - Create a square grid of tiles. This method reads directly from the - TiledCamera sensor buffer to generate the tiles. - - Returns: - RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where - ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. - """ - output = self._video_camera.data.output - # shape: [num_envs, H, W, 3], uint8 - rgb_all = output["rgb"] if "rgb" in output else output["rgba"][..., :3] - - n_envs = int(rgb_all.shape[0]) - grid_size = math.ceil(math.sqrt(n_envs)) - n_slots = grid_size * grid_size - tiles = rgb_all.cpu().numpy() # [n_envs, H, W, 3] - H, W = tiles.shape[1], tiles.shape[2] - # Pad unused slots with black to fill the square grid. - pad = n_slots - n_envs - if pad > 0: - tiles = np.concatenate([tiles, np.zeros((pad, H, W, 3), dtype=tiles.dtype)], axis=0) - # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] - grid = tiles.reshape(grid_size, grid_size, H, W, 3) - grid = grid.transpose(0, 2, 1, 3, 4) # [grid_size, H, grid_size, W, 3] - return grid.reshape(grid_size * H, grid_size * W, 3) - def close(self): """Cleanup for the environment.""" if not self._is_closed: diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py index b22a6169d7a..962c1ffb99b 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py @@ -17,6 +17,7 @@ from isaaclab.utils.noise import NoiseModelCfg from .common import AgentID, SpaceType, ViewerCfg +from .utils.video_recorder_cfg import VideoRecorderCfg @configclass @@ -234,3 +235,16 @@ class DirectMARLEnvCfg: log_dir: str | None = None """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" + + video_recorder: VideoRecorderCfg = VideoRecorderCfg() + """Configuration for the viewport recorder used when ``render_mode="rgb_array"``. + + Defaults to a :class:`~isaaclab.envs.VideoRecorderCfg` that captures all environments + in a square tile-grid using :class:`~isaaclab.envs.VideoRecorder`. + + Set :attr:`~isaaclab.envs.VideoRecorderCfg.class_type` to a custom subclass to swap the + capture implementation without modifying environment code. Set to ``None`` to disable + TiledCamera-based recording entirely and fall back to the Kit-based omni.replicator path. + + CLI example: ``env.video_recorder.video_num_tiles=9`` + """ diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index 013b9281ac0..af4a800b691 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -32,6 +32,8 @@ from .common import VecEnvObs, VecEnvStepReturn from .direct_rl_env_cfg import DirectRLEnvCfg from .ui import ViewportCameraController +from .utils.video_recorder import VideoRecorder +from .utils.video_recorder_cfg import VideoRecorderCfg from .utils.spaces import sample_space, spec_to_gym_space if has_kit(): @@ -229,6 +231,14 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): self.cfg.observation_noise_model, num_envs=self.num_envs, device=self.device ) + # instantiate the viewport recorder for rgb_array video capture + if self.cfg.video_recorder is not None: + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # perform events at the start of the simulation if self.cfg.events: # we print it here to make the logging consistent @@ -492,8 +502,10 @@ def render(self, recompute: bool = False) -> np.ndarray | None: # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) # and produces consistent, scene-content frames. Fall back to the omni.replicator # viewer-camera path only when no TiledCamera with RGB output exists in the scene. - if self._find_video_camera() is not None: - return self._render_tiled_camera_rgb_array() + if self.video_recorder is not None: + frame = self.video_recorder.render_rgb_array() + if frame is not None: + return frame if not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" @@ -525,52 +537,6 @@ def render(self, recompute: bool = False) -> np.ndarray | None: f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." ) - def _find_video_camera(self): - """ - Locates and caches the first TiledCamera sensor with RGB output. - Previously used the omni.replicator viewer camera which had RGB output. - Returns ``None`` if absent. - """ - if not hasattr(self, "_video_camera"): - from isaaclab.sensors.camera import TiledCamera - - self._video_camera = None - for sensor in self.scene.sensors.values(): - if isinstance(sensor, TiledCamera): - output = sensor.data.output - if "rgb" in output or "rgba" in output: - self._video_camera = sensor - break - return self._video_camera - - def _render_tiled_camera_rgb_array(self) -> np.ndarray: - """Return a square tile-grid of RGB frames from the scene's TiledCamera. - - Create a square grid of tiles. This method reads directly from the - TiledCamera sensor buffer to generate the tiles. - - Returns: - RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where - ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. - """ - output = self._video_camera.data.output - # shape: [num_envs, H, W, 3], uint8 - rgb_all = output["rgb"] if "rgb" in output else output["rgba"][..., :3] - - n_envs = int(rgb_all.shape[0]) - grid_size = math.ceil(math.sqrt(n_envs)) - n_slots = grid_size * grid_size - tiles = rgb_all.cpu().numpy() # [n_envs, H, W, 3] - H, W = tiles.shape[1], tiles.shape[2] - # Pad unused slots with black to fill the square grid. - pad = n_slots - n_envs - if pad > 0: - tiles = np.concatenate([tiles, np.zeros((pad, H, W, 3), dtype=tiles.dtype)], axis=0) - # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] - grid = tiles.reshape(grid_size, grid_size, H, W, 3) - grid = grid.transpose(0, 2, 1, 3, 4) # [grid_size, H, grid_size, W, 3] - return grid.reshape(grid_size * H, grid_size * W, 3) - def close(self): """Cleanup for the environment.""" if not self._is_closed: diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py index fd40b3104c2..c7c11bdb2e9 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py @@ -16,6 +16,7 @@ from isaaclab.utils.noise import NoiseModelCfg from .common import SpaceType, ViewerCfg +from .utils.video_recorder_cfg import VideoRecorderCfg @configclass @@ -254,3 +255,16 @@ class DirectRLEnvCfg: log_dir: str | None = None """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" + + video_recorder: VideoRecorderCfg = VideoRecorderCfg() + """Configuration for the viewport recorder used when ``render_mode="rgb_array"``. + + Defaults to a :class:`~isaaclab.envs.VideoRecorderCfg` that captures all environments + in a square tile-grid using :class:`~isaaclab.envs.VideoRecorder`. + + Set :attr:`~isaaclab.envs.VideoRecorderCfg.class_type` to a custom subclass to swap the + capture implementation without modifying environment code. Set to ``None`` to disable + TiledCamera-based recording entirely and fall back to the Kit-based omni.replicator path. + + CLI example: ``env.video_recorder.video_num_tiles=9`` + """ diff --git a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py index 24a88d5e72c..b231d278e44 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py @@ -26,6 +26,7 @@ from isaaclab.utils import configclass from .common import ViewerCfg +from .utils.video_recorder_cfg import VideoRecorderCfg @configclass @@ -163,3 +164,16 @@ class ManagerBasedEnvCfg: log_dir: str | None = None """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" + + video_recorder: VideoRecorderCfg = VideoRecorderCfg() + """Configuration for the viewport recorder used when ``render_mode="rgb_array"``. + + Defaults to a :class:`~isaaclab.envs.VideoRecorderCfg` that captures all environments + in a square tile-grid using :class:`~isaaclab.envs.VideoRecorder`. + + Set :attr:`~isaaclab.envs.VideoRecorderCfg.class_type` to a custom subclass to swap the + capture implementation without modifying environment code. Set to ``None`` to disable + TiledCamera-based recording entirely and fall back to the Kit-based omni.replicator path. + + CLI example: ``env.video_recorder.video_num_tiles=9`` + """ diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index 30ac1ea88d2..e2b5635b0f9 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -20,6 +20,8 @@ from .common import VecEnvStepReturn from .manager_based_env import ManagerBasedEnv from .manager_based_rl_env_cfg import ManagerBasedRLEnvCfg +from .utils.video_recorder import VideoRecorder +from .utils.video_recorder_cfg import VideoRecorderCfg class ManagerBasedRLEnv(ManagerBasedEnv, gym.Env): @@ -86,6 +88,15 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** # produced video matches the simulation self.metadata["render_fps"] = 1 / self.step_dt self.has_rtx_sensors = self.sim.get_setting("/isaaclab/render/rtx_sensors") + + # instantiate the viewport recorder for rgb_array video capture + if self.cfg.video_recorder is not None: + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + print("[INFO]: Completed setting up the environment...") """ @@ -273,8 +284,10 @@ def render(self, recompute: bool = False) -> np.ndarray | None: # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) # and produces consistent, scene-content frames. Fall back to the omni.replicator # viewer-camera path only when no TiledCamera with RGB output exists in the scene. - if self._find_video_camera() is not None: - return self._render_tiled_camera_rgb_array() + if self.video_recorder is not None: + frame = self.video_recorder.render_rgb_array() + if frame is not None: + return frame if not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" @@ -306,52 +319,6 @@ def render(self, recompute: bool = False) -> np.ndarray | None: f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." ) - def _find_video_camera(self): - """ - Locates and caches the first TiledCamera sensor with RGB output. - Previously used the omni.replicator viewer camera which had RGB output. - Returns ``None`` if absent. - """ - if not hasattr(self, "_video_camera"): - from isaaclab.sensors.camera import TiledCamera - - self._video_camera = None - for sensor in self.scene.sensors.values(): - if isinstance(sensor, TiledCamera): - output = sensor.data.output - if "rgb" in output or "rgba" in output: - self._video_camera = sensor - break - return self._video_camera - - def _render_tiled_camera_rgb_array(self) -> np.ndarray: - """Return a square tile-grid of RGB frames from the scene's TiledCamera. - - Create a square grid of tiles. This method reads directly from the - TiledCamera sensor buffer to generate the tiles. - - Returns: - RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where - ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. - """ - output = self._video_camera.data.output - # shape: [num_envs, H, W, 3], uint8 - rgb_all = output["rgb"] if "rgb" in output else output["rgba"][..., :3] - - n_envs = int(rgb_all.shape[0]) - grid_size = math.ceil(math.sqrt(n_envs)) - n_slots = grid_size * grid_size - tiles = rgb_all.cpu().numpy() # [n_envs, H, W, 3] - H, W = tiles.shape[1], tiles.shape[2] - # Pad unused slots with black to fill the square grid. - pad = n_slots - n_envs - if pad > 0: - tiles = np.concatenate([tiles, np.zeros((pad, H, W, 3), dtype=tiles.dtype)], axis=0) - # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] - grid = tiles.reshape(grid_size, grid_size, H, W, 3) - grid = grid.transpose(0, 2, 1, 3, 4) # [grid_size, H, grid_size, W, 3] - return grid.reshape(grid_size * H, grid_size * W, 3) - def close(self): if not self._is_closed: # destructor is order-sensitive diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py new file mode 100644 index 00000000000..d328f7c374b --- /dev/null +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -0,0 +1,115 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Viewport recorder for capturing video frames from a :class:`~isaaclab.sensors.camera.TiledCamera`.""" + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from isaaclab.scene import InteractiveScene + from .video_recorder_cfg import VideoRecorderCfg + + +class VideoRecorder: + """Records video frames from the scene's :class:`~isaaclab.sensors.camera.TiledCamera`. + + On the first :meth:`render_rgb_array` call this class searches the scene for the first + ``TiledCamera`` sensor with ``"rgb"`` or ``"rgba"`` output and caches the camera reference + together with all grid-layout constants so subsequent calls are allocation-free (except for + the unavoidable GPU-to-CPU transfer and the final tile-stitch reshape). + + The default implementation reads *all* ``num_envs`` frames from the TiledCamera buffer on + the GPU and slices the first ``cfg.video_num_tiles`` on the CPU (Option A). Swap + ``cfg.class_type`` for a custom subclass to change this behaviour without touching any + environment code. + + Args: + cfg: Configuration for this recorder. + scene: The interactive scene that owns the sensors. + """ + + def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): + self.cfg = cfg + self._scene = scene + + def render_rgb_array(self) -> np.ndarray | None: + """Return a square tile-grid RGB frame, or ``None`` if no suitable camera exists. + + Returns: + RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where + ``G = ceil(sqrt(video_num_tiles))`` and ``(H, W)`` is the per-tile resolution, + or ``None`` when no :class:`~isaaclab.sensors.camera.TiledCamera` with RGB output + is present in the scene. + """ + if self._find_video_camera() is None: + return None + return self._render_tiled_camera_rgb_array() + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _find_video_camera(self): + """ + Locates and caches the first TiledCamera sensor with RGB output. + Previously used the omni.replicator viewer camera which had RGB output. + Returns ``None`` if absent. + """ + if not hasattr(self, "_video_camera"): + from isaaclab.sensors.camera import TiledCamera + + self._video_camera = None + for sensor in self._scene.sensors.values(): + if isinstance(sensor, TiledCamera): + output = sensor.data.output + if "rgb" in output or "rgba" in output: + self._video_camera = sensor + self._video_rgb_key = "rgb" if "rgb" in output else "rgba" + # Cache all grid constants — these are fixed for the lifetime of the env. + n_total = int(sensor.data.output[self._video_rgb_key].shape[0]) + n_envs = n_total if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, n_total) + self._video_n_envs = n_envs + self._video_grid_size = math.ceil(math.sqrt(n_envs)) + n_slots = self._video_grid_size * self._video_grid_size + H = int(sensor.data.output[self._video_rgb_key].shape[1]) + W = int(sensor.data.output[self._video_rgb_key].shape[2]) + self._video_H = H + self._video_W = W + # Pre-allocate the black padding block (zero-copy when pad == 0). + pad = n_slots - n_envs + self._video_pad = np.zeros((pad, H, W, 3), dtype=np.uint8) if pad > 0 else None + break + return self._video_camera + + def _render_tiled_camera_rgb_array(self) -> np.ndarray: + """Return a square tile-grid of RGB frames from the scene's TiledCamera. + + Create a square grid of tiles. This method reads directly from the + TiledCamera sensor buffer to generate the tiles. + + Returns: + RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where + ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. + """ + rgb_all = self._video_camera.data.output[self._video_rgb_key] + # Drop alpha channel once on GPU before the CPU transfer. + if self._video_rgb_key == "rgba": + rgb_all = rgb_all[..., :3] + + # .contiguous() ensures the reshape below returns a zero-copy view. + tiles = rgb_all[: self._video_n_envs].contiguous().cpu().numpy() # [n_envs, H, W, 3] + if self._video_pad is not None: + tiles = np.concatenate([tiles, self._video_pad], axis=0) + # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] + g, H, W = self._video_grid_size, self._video_H, self._video_W + grid = tiles.reshape(g, g, H, W, 3) + grid = grid.transpose(0, 2, 1, 3, 4) + # after transpose the strides are non-standard; reshape must copy here. + return grid.reshape(g * H, g * W, 3) diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py new file mode 100644 index 00000000000..0d9b6287728 --- /dev/null +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py @@ -0,0 +1,44 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`.""" + +from __future__ import annotations + +from isaaclab.utils import configclass + +from .video_recorder import VideoRecorder + + +@configclass +class VideoRecorderCfg: + """Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. + + Set :attr:`class_type` to a custom subclass of + :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder` to swap the + video-capture implementation (e.g. an Option-B pipeline that only renders + ``video_num_tiles`` cameras on the GPU) without modifying any environment code. + """ + + class_type: type = VideoRecorder + """The recorder class to instantiate. Must accept ``(cfg, scene)`` as constructor arguments. + Defaults to :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. + """ + + video_num_tiles: int = -1 + """Number of environment tiles to include in each video frame when using ``render_mode="rgb_array"``. + Defaults to -1, which renders all environments. + + Environments are arranged into a square grid of size + ``ceil(sqrt(video_num_tiles)) * ceil(sqrt(video_num_tiles))``, with unused slots filled with + black. For example: + + * ``-1``: all environments (default) + * ``1``: single environment (1*1) + * ``4``: first 4 environments (2*2 grid) + * ``9``: first 9 environments (3*3 grid) + + CLI example: ``env.video_recorder.video_num_tiles=9`` + """ From 556154273c9cd4ed3604c941960b1bf4a31ff996 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 12:59:34 -0700 Subject: [PATCH 03/11] tiled camera-based video recording functionality --- .../isaaclab/isaaclab/envs/direct_marl_env.py | 20 ++- .../isaaclab/isaaclab/envs/direct_rl_env.py | 20 ++- .../isaaclab/envs/manager_based_env.py | 11 ++ .../isaaclab/envs/manager_based_rl_env.py | 14 +- .../isaaclab/envs/utils/video_recorder.py | 139 +++++++++++++++--- .../isaaclab/envs/utils/video_recorder_cfg.py | 73 +++++++++ 6 files changed, 232 insertions(+), 45 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index 284ed815ab6..9eebdc89e18 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -170,6 +170,18 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): if "prestartup" in self.event_manager.available_modes: self.event_manager.apply(mode="prestartup") + # Instantiate the video recorder before sim.reset() so that any fallback TiledCamera + # (used for state-based envs without an observation camera) is spawned into the USD + # stage and registered for the PHYSICS_READY callback before physics initialises. + # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. + if self.cfg.video_recorder is not None: + self.cfg.video_recorder.render_mode = render_mode + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # play the simulator to activate physics handles # note: this activates the physics simulation view that exposes TensorAPIs # note: when started in extension mode, first call sim.reset_async() and then initialize the managers @@ -228,14 +240,6 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): if noise_model is not None } - # instantiate the viewport recorder for rgb_array video capture - if self.cfg.video_recorder is not None: - self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( - self.cfg.video_recorder, self.scene - ) - else: - self.video_recorder = None - # perform events at the start of the simulation if self.cfg.events: # we print it here to make the logging consistent diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index af4a800b691..e821bc92a6f 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -175,6 +175,18 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): if "prestartup" in self.event_manager.available_modes: self.event_manager.apply(mode="prestartup") + # Instantiate the video recorder before sim.reset() so that any fallback TiledCamera + # (used for state-based envs without an observation camera) is spawned into the USD + # stage and registered for the PHYSICS_READY callback before physics initialises. + # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. + if self.cfg.video_recorder is not None: + self.cfg.video_recorder.render_mode = render_mode + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # play the simulator to activate physics handles # note: this activates the physics simulation view that exposes TensorAPIs # note: when started in extension mode, first call sim.reset_async() and then initialize the managers @@ -231,14 +243,6 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): self.cfg.observation_noise_model, num_envs=self.num_envs, device=self.device ) - # instantiate the viewport recorder for rgb_array video capture - if self.cfg.video_recorder is not None: - self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( - self.cfg.video_recorder, self.scene - ) - else: - self.video_recorder = None - # perform events at the start of the simulation if self.cfg.events: # we print it here to make the logging consistent diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py index 33327dc0186..c47dbfd89a5 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env.py @@ -26,6 +26,7 @@ from .manager_based_env_cfg import ManagerBasedEnvCfg from .ui import ViewportCameraController from .utils.io_descriptors import export_articulations_data, export_scene_data +from .utils.video_recorder import VideoRecorder # import logger logger = logging.getLogger(__name__) @@ -182,6 +183,16 @@ def _init_sim(self): if "prestartup" in self.event_manager.available_modes: self.event_manager.apply(mode="prestartup") + # Instantiate the video recorder before sim.reset() so that any fallback TiledCamera + # (used for state-based envs without an observation camera) is spawned into the USD + # stage and registered for the PHYSICS_READY callback before physics initialises. + if self.cfg.video_recorder is not None: + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # play the simulator to activate physics handles # note: this activates the physics simulation view that exposes TensorAPIs # note: when started in extension mode, first call sim.reset_async() and then initialize the managers diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index e2b5635b0f9..fb5e586d6f3 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -21,7 +21,6 @@ from .manager_based_env import ManagerBasedEnv from .manager_based_rl_env_cfg import ManagerBasedRLEnvCfg from .utils.video_recorder import VideoRecorder -from .utils.video_recorder_cfg import VideoRecorderCfg class ManagerBasedRLEnv(ManagerBasedEnv, gym.Env): @@ -78,6 +77,11 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** # initialize the episode length buffer BEFORE loading the managers to use it in mdp functions. self.episode_length_buf = torch.zeros(cfg.scene.num_envs, device=cfg.sim.device, dtype=torch.long) + # Forward render_mode to VideoRecorderCfg before super().__init__() creates VideoRecorder, + # so fallback cameras are only spawned when --video is active (render_mode="rgb_array"). + if cfg.video_recorder is not None: + cfg.video_recorder.render_mode = render_mode + # initialize the base class to setup the scene. super().__init__(cfg=cfg) # store the render mode @@ -89,14 +93,6 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** self.metadata["render_fps"] = 1 / self.step_dt self.has_rtx_sensors = self.sim.get_setting("/isaaclab/render/rtx_sensors") - # instantiate the viewport recorder for rgb_array video capture - if self.cfg.video_recorder is not None: - self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( - self.cfg.video_recorder, self.scene - ) - else: - self.video_recorder = None - print("[INFO]: Completed setting up the environment...") """ diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py index d328f7c374b..e0475223ecc 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -30,6 +30,18 @@ class VideoRecorder: ``cfg.class_type`` for a custom subclass to change this behaviour without touching any environment code. + **Camera selection priority:** + + 1. An existing :class:`~isaaclab.sensors.camera.TiledCamera` found in the scene sensors + (vision-based env path — the observation camera is reused for free). + 2. A dedicated video camera grid instantiated from ``cfg.fallback_camera_cfg`` + (state-based env path — no observation camera exists, so one camera per environment + is spawned, up to ``cfg.video_num_tiles``). + + For the fallback cameras to be initialised correctly they **must** be created before + ``sim.reset()`` is called, so :class:`VideoRecorder` must be instantiated before + ``sim.reset()`` in the environment setup. The environment base classes handle this. + Args: cfg: Configuration for this recorder. scene: The interactive scene that owns the sensors. @@ -38,6 +50,14 @@ class VideoRecorder: def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): self.cfg = cfg self._scene = scene + self._fallback_tiled_camera = None + + # Spawn fallback cameras only when video recording is actually requested. + # cfg.render_mode is set to "rgb_array" by the env base class when --video is active + # (forwarded from the render_mode argument of gym.make / the env constructor). + # Gating here avoids GPU overhead in ordinary training runs that don't record video. + if cfg.fallback_camera_cfg is not None and cfg.render_mode == "rgb_array": + self._fallback_tiled_camera = self._spawn_fallback_cameras(cfg, scene) def render_rgb_array(self) -> np.ndarray | None: """Return a square tile-grid RGB frame, or ``None`` if no suitable camera exists. @@ -46,7 +66,7 @@ def render_rgb_array(self) -> np.ndarray | None: RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where ``G = ceil(sqrt(video_num_tiles))`` and ``(H, W)`` is the per-tile resolution, or ``None`` when no :class:`~isaaclab.sensors.camera.TiledCamera` with RGB output - is present in the scene. + is present in the scene or configured as a fallback. """ if self._find_video_camera() is None: return None @@ -56,48 +76,127 @@ def render_rgb_array(self) -> np.ndarray | None: # Internal helpers # ------------------------------------------------------------------ - def _find_video_camera(self): + @staticmethod + def _spawn_fallback_cameras(cfg: VideoRecorderCfg, scene: InteractiveScene): + """Spawn one video camera prim per environment (up to ``cfg.video_num_tiles``) and + return a single :class:`~isaaclab.sensors.camera.TiledCamera` covering all of them. + + Camera prims are spawned at ``/World/envs/env_{i}/VideoCamera`` for + ``i in range(n_cameras)``, then a ``TiledCamera`` with the regex prim path + ``/World/envs/env_.*/VideoCamera`` is created so that all spawned prims are + discovered and rendered as tiles. + + This must be called **before** ``sim.reset()`` so the prims exist in the USD stage + and the ``TiledCamera`` can register for the ``PHYSICS_READY`` callback. """ - Locates and caches the first TiledCamera sensor with RGB output. - Previously used the omni.replicator viewer camera which had RGB output. - Returns ``None`` if absent. + import torch + + from isaaclab.sensors.camera import TiledCamera + from isaaclab.utils.math import convert_camera_frame_orientation_convention + + camera_cfg = cfg.fallback_camera_cfg + + # Pre-compute the OpenGL rotation offset (mirrors Camera.__init__ logic). + n_total_envs = scene.num_envs + rot = torch.tensor(camera_cfg.offset.rot, dtype=torch.float32, device="cpu").unsqueeze(0) + rot_offset = convert_camera_frame_orientation_convention( + rot, origin=camera_cfg.offset.convention, target="opengl" + ) + rot_offset = rot_offset.squeeze(0).cpu().numpy() + + # Ensure vertical_aperture is set before calling the spawn func. + spawn_cfg = camera_cfg.spawn + if spawn_cfg.vertical_aperture is None: + spawn_cfg = spawn_cfg.replace( + vertical_aperture=spawn_cfg.horizontal_aperture * camera_cfg.height / camera_cfg.width + ) + + # TiledCamera requires exactly one camera prim per environment (count == num_envs). + # We must therefore spawn cameras for ALL environments, not just video_num_tiles of them. + # The video_num_tiles limit is applied at render time in _render_tiled_camera_rgb_array, + # which only reads the first N tiles — the same behaviour as vision-based observation cameras. + for i in range(n_total_envs): + prim_path_i = f"/World/envs/env_{i}/VideoCamera" + spawn_cfg.func(prim_path_i, spawn_cfg, translation=camera_cfg.offset.pos, orientation=rot_offset) + + # Create one TiledCamera that discovers all spawned prims via the regex path. + # spawn=None tells Camera.__init__ to skip re-spawning; it will verify the prims exist. + tiled_cfg = camera_cfg.replace( + prim_path="/World/envs/env_.*/VideoCamera", + spawn=None, + ) + return TiledCamera(tiled_cfg) + + def _find_video_camera(self): + """Locate and cache the TiledCamera to use for video recording. + + Search order: + 1. Observation TiledCamera already in the scene (vision-based env path, zero extra cost). + 2. Dedicated fallback TiledCamera from ``cfg.fallback_camera_cfg`` (state-based env path). + + Returns ``None`` if neither source is available. + + Previously used the omni.replicator viewer camera which had RGB output only for + Kit-based backends (``physx`` / ``newton,isaacsim_rtx_renderer``). """ if not hasattr(self, "_video_camera"): from isaaclab.sensors.camera import TiledCamera self._video_camera = None + + # Priority 1: observation TiledCamera in the scene (vision-based env path). for sensor in self._scene.sensors.values(): if isinstance(sensor, TiledCamera): output = sensor.data.output if "rgb" in output or "rgba" in output: self._video_camera = sensor - self._video_rgb_key = "rgb" if "rgb" in output else "rgba" - # Cache all grid constants — these are fixed for the lifetime of the env. - n_total = int(sensor.data.output[self._video_rgb_key].shape[0]) - n_envs = n_total if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, n_total) - self._video_n_envs = n_envs - self._video_grid_size = math.ceil(math.sqrt(n_envs)) - n_slots = self._video_grid_size * self._video_grid_size - H = int(sensor.data.output[self._video_rgb_key].shape[1]) - W = int(sensor.data.output[self._video_rgb_key].shape[2]) - self._video_H = H - self._video_W = W - # Pre-allocate the black padding block (zero-copy when pad == 0). - pad = n_slots - n_envs - self._video_pad = np.zeros((pad, H, W, 3), dtype=np.uint8) if pad > 0 else None break + + # Priority 2: fallback video camera (state-based env path). + if self._video_camera is None and self._fallback_tiled_camera is not None: + if self._fallback_tiled_camera.is_initialized: + output = self._fallback_tiled_camera.data.output + if "rgb" in output or "rgba" in output: + self._video_camera = self._fallback_tiled_camera + + # Cache all grid constants — these are fixed for the lifetime of the env. + if self._video_camera is not None: + output = self._video_camera.data.output + self._video_rgb_key = "rgb" if "rgb" in output else "rgba" + n_total = int(output[self._video_rgb_key].shape[0]) + n_envs = n_total if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, n_total) + self._video_n_envs = n_envs + self._video_grid_size = math.ceil(math.sqrt(n_envs)) + n_slots = self._video_grid_size * self._video_grid_size + H = int(output[self._video_rgb_key].shape[1]) + W = int(output[self._video_rgb_key].shape[2]) + self._video_H = H + self._video_W = W + # Pre-allocate the black padding block (zero-copy when pad == 0). + pad = n_slots - n_envs + self._video_pad = np.zeros((pad, H, W, 3), dtype=np.uint8) if pad > 0 else None + return self._video_camera def _render_tiled_camera_rgb_array(self) -> np.ndarray: - """Return a square tile-grid of RGB frames from the scene's TiledCamera. + """Return a square tile-grid of RGB frames from the TiledCamera. Create a square grid of tiles. This method reads directly from the TiledCamera sensor buffer to generate the tiles. + If using the dedicated fallback video cameras (not observation sensors), + this method calls ``update()`` on them first to trigger a fresh render pass. + Observation TiledCameras are updated by ``scene.update()`` during the + environment step and do not need an extra update here. + Returns: RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. """ + # Fallback cameras are not updated by scene.update(), so drive them manually. + if self._video_camera is self._fallback_tiled_camera: + self._fallback_tiled_camera.update(dt=0.0, force_recompute=True) + rgb_all = self._video_camera.data.output[self._video_rgb_key] # Drop alpha channel once on GPU before the CPU transfer. if self._video_rgb_key == "rgba": diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py index 0d9b6287728..07726ed5a14 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py @@ -7,11 +7,47 @@ from __future__ import annotations +import isaaclab.sim as sim_utils +from isaaclab.sensors.camera import TiledCameraCfg from isaaclab.utils import configclass from .video_recorder import VideoRecorder +DEFAULT_VIDEO_FALLBACK_CAMERA_CFG = TiledCameraCfg( + prim_path="/World/envs/env_0/VideoCamera", + update_period=0.0, + height=480, + width=640, + data_types=["rgb"], + spawn=sim_utils.PinholeCameraCfg( + focal_length=24.0, + focus_distance=400.0, + horizontal_aperture=20.955, + clipping_range=(0.1, 1.0e5), + ), + offset=TiledCameraCfg.OffsetCfg(pos=(-7.0, 0.0, 3.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), +) +"""Default fallback :class:`~isaaclab.sensors.camera.TiledCameraCfg` for state-based video recording. + +Places a pinhole camera at ``/World/envs/env_0/VideoCamera`` offset ``(-7, 0, 3)`` from +env_0's origin, angled ~12° downward in the world frame. This matches the camera position used +by ``Isaac-Cartpole-RGB-v0`` and gives a reasonable side view for medium-scale environments +(env spacing ~4 m). + +This is the **default** value of :attr:`VideoRecorderCfg.fallback_camera_cfg`. No action is +needed in task configs — fallback cameras are automatically available for all state-based +environments. Spawning only occurs when :attr:`VideoRecorderCfg.render_mode` is ``"rgb_array"`` +(i.e. ``--video`` is active), so ordinary training runs incur zero overhead. + +To customise the pose for a different environment scale, override in the task's ``__post_init__``:: + + self.video_recorder.fallback_camera_cfg = self.video_recorder.fallback_camera_cfg.replace( + offset=TiledCameraCfg.OffsetCfg(pos=(-3.0, 0.0, 2.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), + ) +""" + + @configclass class VideoRecorderCfg: """Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. @@ -27,6 +63,19 @@ class VideoRecorderCfg: Defaults to :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. """ + render_mode: str | None = None + """The render mode forwarded from the environment constructor. + + Populated automatically by the environment base classes from the ``render_mode`` argument + passed to :func:`gymnasium.make` (or the environment constructor directly). User code + should not set this field manually. + + When ``None`` (the default, i.e. ``--video`` was **not** passed), :class:`VideoRecorder` + skips spawning any fallback cameras so that state-based runs incur zero overhead. + Only when this is ``"rgb_array"`` does the recorder allocate GPU resources for the + fallback camera grid. + """ + video_num_tiles: int = -1 """Number of environment tiles to include in each video frame when using ``render_mode="rgb_array"``. Defaults to -1, which renders all environments. @@ -42,3 +91,27 @@ class VideoRecorderCfg: CLI example: ``env.video_recorder.video_num_tiles=9`` """ + + fallback_camera_cfg: object = DEFAULT_VIDEO_FALLBACK_CAMERA_CFG + """Optional :class:`~isaaclab.sensors.camera.TiledCameraCfg` used to spawn a dedicated + video-only camera for state-based environments (no observation ``TiledCamera`` in the scene). + + Defaults to :data:`DEFAULT_VIDEO_FALLBACK_CAMERA_CFG` — a pinhole camera placed at + ``(-7, 0, 3)`` relative to env_0's origin, giving a reasonable side view for environments + with ~4 m spacing. Set to ``None`` to disable fallback cameras entirely (e.g. for + vision-based tasks that already have an observation :class:`~isaaclab.sensors.camera.TiledCamera`). + + Spawning is **gated on** :attr:`render_mode` ``== "rgb_array"`` (i.e. ``--video`` must be + active), so the default value causes zero overhead during ordinary training runs. + + To customise the pose for a different environment scale, override in the task's ``__post_init__``:: + + self.video_recorder.fallback_camera_cfg = self.video_recorder.fallback_camera_cfg.replace( + offset=TiledCameraCfg.OffsetCfg(pos=(-3.0, 0.0, 2.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), + ) + + .. note:: + The prim path in the cfg must start with ``/World/envs/env_0/`` so that the OVRTX + renderer path check succeeds and ``TiledCamera`` correctly infers ``num_envs`` from + the scene. + """ From d03f028b2b045718779a94e12d4bc58f3af45abc Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 16:17:56 -0700 Subject: [PATCH 04/11] Add --video=perspective mode for perspective recording --- scripts/benchmarks/benchmark_non_rl.py | 13 +- scripts/benchmarks/benchmark_rlgames.py | 13 +- scripts/benchmarks/benchmark_rsl_rl.py | 13 +- .../reinforcement_learning/rl_games/play.py | 13 +- .../reinforcement_learning/rl_games/train.py | 13 +- scripts/reinforcement_learning/rlinf/play.py | 9 +- scripts/reinforcement_learning/rsl_rl/play.py | 13 +- .../reinforcement_learning/rsl_rl/train.py | 13 +- scripts/reinforcement_learning/sb3/play.py | 13 +- scripts/reinforcement_learning/sb3/train.py | 13 +- scripts/reinforcement_learning/skrl/play.py | 13 +- scripts/reinforcement_learning/skrl/train.py | 13 +- scripts/sim2sim_transfer/rsl_rl_transfer.py | 13 +- .../isaaclab/isaaclab/envs/direct_marl_env.py | 19 +- .../isaaclab/isaaclab/envs/direct_rl_env.py | 22 +- .../isaaclab/envs/manager_based_rl_env.py | 19 +- .../isaaclab/envs/utils/video_recorder.py | 268 +++++++++++++++--- .../isaaclab/envs/utils/video_recorder_cfg.py | 53 +++- 18 files changed, 484 insertions(+), 62 deletions(-) diff --git a/scripts/benchmarks/benchmark_non_rl.py b/scripts/benchmarks/benchmark_non_rl.py index aee3be21a40..dfda247a0db 100644 --- a/scripts/benchmarks/benchmark_non_rl.py +++ b/scripts/benchmarks/benchmark_non_rl.py @@ -16,7 +16,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -140,6 +147,10 @@ def main( task_startup_time_begin = time.perf_counter_ns() + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) # wrap for video recording diff --git a/scripts/benchmarks/benchmark_rlgames.py b/scripts/benchmarks/benchmark_rlgames.py index 86786026493..e68db05187e 100644 --- a/scripts/benchmarks/benchmark_rlgames.py +++ b/scripts/benchmarks/benchmark_rlgames.py @@ -16,7 +16,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -195,6 +202,10 @@ def main( task_startup_time_begin = time.perf_counter_ns() + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) # wrap for video recording diff --git a/scripts/benchmarks/benchmark_rsl_rl.py b/scripts/benchmarks/benchmark_rsl_rl.py index e0c6eb68b5d..a1582950462 100644 --- a/scripts/benchmarks/benchmark_rsl_rl.py +++ b/scripts/benchmarks/benchmark_rsl_rl.py @@ -19,7 +19,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=4096, help="Number of environments to simulate.") @@ -182,6 +189,10 @@ def main( task_startup_time_begin = time.perf_counter_ns() + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) # wrap for video recording diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py index eb2390af90d..5762d45f801 100644 --- a/scripts/reinforcement_learning/rl_games/play.py +++ b/scripts/reinforcement_learning/rl_games/play.py @@ -32,7 +32,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -114,6 +121,10 @@ def main(): obs_groups = agent_cfg["params"]["env"].get("obs_groups") concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index 5ad13b401bb..cfc260941db 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -36,7 +36,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -158,6 +165,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = os.path.join(log_root_path, log_dir) + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/rlinf/play.py b/scripts/reinforcement_learning/rlinf/play.py index f63e02d3e1f..c3782567617 100644 --- a/scripts/reinforcement_learning/rlinf/play.py +++ b/scripts/reinforcement_learning/rlinf/play.py @@ -50,7 +50,14 @@ parser.add_argument( "--num_episodes", type=int, default=None, help="Number of evaluation episodes (overrides config if set)." ) -parser.add_argument("--video", action="store_true", default=False, help="Enable video recording.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Enable video recording. MODE is 'tiled' (default) or 'perspective' (not yet supported for rlinf).", +) cli_args.add_rlinf_args(parser) args_cli = parser.parse_args() diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index f790f627a22..3b87e88e170 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -40,7 +40,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -109,6 +116,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 7ca2d3156da..fac9142dd6a 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -45,7 +45,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -147,6 +154,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the log directory for the environment (works for all environment types) env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/sb3/play.py b/scripts/reinforcement_learning/sb3/play.py index a6f222d346c..73d56f5ccd2 100644 --- a/scripts/reinforcement_learning/sb3/play.py +++ b/scripts/reinforcement_learning/sb3/play.py @@ -30,7 +30,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from Stable-Baselines3.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -107,6 +114,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/sb3/train.py b/scripts/reinforcement_learning/sb3/train.py index bd79599d1fd..98148db3708 100644 --- a/scripts/reinforcement_learning/sb3/train.py +++ b/scripts/reinforcement_learning/sb3/train.py @@ -38,7 +38,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with Stable-Baselines3.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -137,6 +144,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 0349d405967..da7f36dd5a7 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -35,7 +35,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from skrl.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -150,6 +157,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 750ebeb8798..cb5eeb85752 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -40,7 +40,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with skrl.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -173,6 +180,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/sim2sim_transfer/rsl_rl_transfer.py b/scripts/sim2sim_transfer/rsl_rl_transfer.py index 4de3c42b7a8..78021ebf518 100644 --- a/scripts/sim2sim_transfer/rsl_rl_transfer.py +++ b/scripts/sim2sim_transfer/rsl_rl_transfer.py @@ -19,7 +19,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Play an RL agent with RSL-RL with policy transfer.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during transfer. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -171,6 +178,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the log directory for the environment (works for all environment types) env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index 9eebdc89e18..1bb2400d94d 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -535,20 +535,31 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) - # and produces consistent, scene-content frames. Fall back to the omni.replicator - # viewer-camera path only when no TiledCamera with RGB output exists in the scene. + # Prefer TiledCamera in tiled mode; works for all backends and produces + # consistent per-agent frames. In perspective mode the recorder returns None + # intentionally (bypassing TiledCamera entirely) so we always reach the + # omni.replicator Kit-viewport path below. if self.video_recorder is not None: frame = self.video_recorder.render_rgb_array() if frame is not None: return frame - if not self.sim.has_gui and not self.sim.has_offscreen_render: + _perspective_mode = ( + self.video_recorder is not None + and self.cfg.video_recorder is not None + and getattr(self.cfg.video_recorder, "video_mode", "tiled") == "perspective" + ) + if not _perspective_mode and not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" " the scene, and neither GUI nor offscreen rendering is available." " Add a TiledCamera sensor to the scene configuration to enable video recording." ) # Kit-based fallback: use an omni.replicator annotator on the viewer camera. + # /OmniverseKit_Persp is NOT an RTX sensor, so the guard above may have skipped + # sim.render() when has_rtx_sensors=True (e.g., vision tasks with TiledCamera). + # Force a render pass here so the annotator receives non-empty data. + if self.has_rtx_sensors: + self.sim.render() if not hasattr(self, "_rgb_annotator"): import omni.replicator.core as rep diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index e821bc92a6f..f2812062e69 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -503,20 +503,34 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) - # and produces consistent, scene-content frames. Fall back to the omni.replicator - # viewer-camera path only when no TiledCamera with RGB output exists in the scene. + # Prefer TiledCamera in tiled mode; works for all backends and produces + # consistent per-agent frames. In perspective mode the recorder returns None + # intentionally (bypassing TiledCamera entirely) so we always reach the + # omni.replicator Kit-viewport path below. if self.video_recorder is not None: frame = self.video_recorder.render_rgb_array() if frame is not None: return frame - if not self.sim.has_gui and not self.sim.has_offscreen_render: + # In perspective mode the recorder returns None intentionally so we fall through + # to the omni.replicator viewport path below. Skip the has_offscreen_render guard + # in that case; the annotator works in non-headless Kit sessions too. + _perspective_mode = ( + self.video_recorder is not None + and self.cfg.video_recorder is not None + and getattr(self.cfg.video_recorder, "video_mode", "tiled") == "perspective" + ) + if not _perspective_mode and not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" " the scene, and neither GUI nor offscreen rendering is available." " Add a TiledCamera sensor to the scene configuration to enable video recording." ) # Kit-based fallback: use an omni.replicator annotator on the viewer camera. + # /OmniverseKit_Persp is NOT an RTX sensor, so the guard above may have skipped + # sim.render() when has_rtx_sensors=True (e.g., vision tasks with TiledCamera). + # Force a render pass here so the annotator receives non-empty data. + if self.has_rtx_sensors: + self.sim.render() if not hasattr(self, "_rgb_annotator"): import omni.replicator.core as rep diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index fb5e586d6f3..68f9be35aac 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -277,20 +277,31 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # Prefer TiledCamera when available — works for all backends (kitless and Kit-based) - # and produces consistent, scene-content frames. Fall back to the omni.replicator - # viewer-camera path only when no TiledCamera with RGB output exists in the scene. + # Prefer TiledCamera in tiled mode; works for all backends and produces + # consistent per-agent frames. In perspective mode the recorder returns None + # intentionally (bypassing TiledCamera entirely) so we always reach the + # omni.replicator Kit-viewport path below. if self.video_recorder is not None: frame = self.video_recorder.render_rgb_array() if frame is not None: return frame - if not self.sim.has_gui and not self.sim.has_offscreen_render: + _perspective_mode = ( + self.video_recorder is not None + and self.cfg.video_recorder is not None + and getattr(self.cfg.video_recorder, "video_mode", "tiled") == "perspective" + ) + if not _perspective_mode and not self.sim.has_gui and not self.sim.has_offscreen_render: raise RuntimeError( "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" " the scene, and neither GUI nor offscreen rendering is available." " Add a TiledCamera sensor to the scene configuration to enable video recording." ) # Kit-based fallback: use an omni.replicator annotator on the viewer camera. + # /OmniverseKit_Persp is NOT an RTX sensor, so the guard above may have skipped + # sim.render() when has_rtx_sensors=True (e.g., vision tasks with TiledCamera). + # Force a render pass here so the annotator receives non-empty data. + if self.has_rtx_sensors: + self.sim.render() if not hasattr(self, "_rgb_annotator"): import omni.replicator.core as rep diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py index e0475223ecc..158c71ef371 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -3,10 +3,12 @@ # # SPDX-License-Identifier: BSD-3-Clause -"""Viewport recorder for capturing video frames from a :class:`~isaaclab.sensors.camera.TiledCamera`.""" +"""Video recorder for capturing video frames from either a Newton OpenGL perspective +viewer or a :class:`~isaaclab.sensors.camera.TiledCamera` sensor.""" from __future__ import annotations +import logging import math from typing import TYPE_CHECKING @@ -16,31 +18,43 @@ from isaaclab.scene import InteractiveScene from .video_recorder_cfg import VideoRecorderCfg +logger = logging.getLogger(__name__) + class VideoRecorder: - """Records video frames from the scene's :class:`~isaaclab.sensors.camera.TiledCamera`. + """Records video frames from the scene's active renderer. + + The recording strategy is determined by :attr:`~VideoRecorderCfg.video_mode`: + + **``video_mode = "perspective"`` (default)** + + The TiledCamera is **bypassed** entirely, even when one is in the scene. + + * **Newton backends** - headless :class:`newton.viewer.ViewerGL` renders an isometric + wide-angle view of all environments (limited to ``video_num_tiles`` when set). + * **Kit backends** - returns ``None`` so that the environment's ``render()`` method + falls through to the ``omni.replicator.core`` Kit viewport camera path + (``/OmniverseKit_Persp``). + + **``video_mode = "tiled"``** - On the first :meth:`render_rgb_array` call this class searches the scene for the first - ``TiledCamera`` sensor with ``"rgb"`` or ``"rgba"`` output and caches the camera reference - together with all grid-layout constants so subsequent calls are allocation-free (except for - the unavoidable GPU-to-CPU transfer and the final tile-stitch reshape). + Frame sources are tried in priority order on every :meth:`render_rgb_array` call: - The default implementation reads *all* ``num_envs`` frames from the TiledCamera buffer on - the GPU and slices the first ``cfg.video_num_tiles`` on the CPU (Option A). Swap - ``cfg.class_type`` for a custom subclass to change this behaviour without touching any - environment code. + 1. **Observation** :class:`~isaaclab.sensors.camera.TiledCamera` already present in + the scene; vision-based env path. Reuses the agent's own camera sensor at zero + extra cost and produces a square tile-grid of per-agent views. - **Camera selection priority:** + 2. **Newton OpenGL perspective viewer** - Newton backends with no observation + ``TiledCamera``. A headless :class:`newton.viewer.ViewerGL` is lazy-initialised + on the first call and renders an isometric perspective of all environments + (limited to ``video_num_tiles`` when that field is set). - 1. An existing :class:`~isaaclab.sensors.camera.TiledCamera` found in the scene sensors - (vision-based env path — the observation camera is reused for free). - 2. A dedicated video camera grid instantiated from ``cfg.fallback_camera_cfg`` - (state-based env path — no observation camera exists, so one camera per environment - is spawned, up to ``cfg.video_num_tiles``). + 3. **Fallback** :class:`~isaaclab.sensors.camera.TiledCamera` - state-based env path + with Kit-based backends. A camera prim is spawned per environment before + ``sim.reset()``. - For the fallback cameras to be initialised correctly they **must** be created before - ``sim.reset()`` is called, so :class:`VideoRecorder` must be instantiated before - ``sim.reset()`` in the environment setup. The environment base classes handle this. + For fallback cameras to initialise correctly they **must** be created before + ``sim.reset()`` is called; the environment base classes handle this. Args: cfg: Configuration for this recorder. @@ -52,28 +66,212 @@ def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): self._scene = scene self._fallback_tiled_camera = None - # Spawn fallback cameras only when video recording is actually requested. - # cfg.render_mode is set to "rgb_array" by the env base class when --video is active - # (forwarded from the render_mode argument of gym.make / the env constructor). - # Gating here avoids GPU overhead in ordinary training runs that don't record video. - if cfg.fallback_camera_cfg is not None and cfg.render_mode == "rgb_array": - self._fallback_tiled_camera = self._spawn_fallback_cameras(cfg, scene) + # Newton GL perspective viewer; lazy-initialised on first render call. + self._gl_viewer = None + self._gl_viewer_initialized = False # True once _try_init_gl_viewer() has run + + if cfg.render_mode == "rgb_array": + # Enable EGL-backed headless rendering for pyglet before ViewerGL is ever + # imported. Must be set before the first 'import pyglet.window'. This is a + # no-op when pyglet is not installed (GL viewer simply stays None). + try: + import pyglet + + if not pyglet.options.get("headless", False): + pyglet.options["headless"] = True + except ImportError: + pass + + # Skip spawning fallback TiledCameras when: + # (a) a Newton backend is active; the GL perspective viewer handles state-based + # rendering so creating per-env camera prims would waste GPU resources, or + # (b) perspective mode is requested; TiledCamera is not used in that path. + _newton_backend = self._is_newton_backend() + if cfg.fallback_camera_cfg is not None and not _newton_backend and cfg.video_mode == "tiled": + self._fallback_tiled_camera = self._spawn_fallback_cameras(cfg, scene) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ def render_rgb_array(self) -> np.ndarray | None: - """Return a square tile-grid RGB frame, or ``None`` if no suitable camera exists. + """Return an RGB frame for video recording, or ``None`` when unavailable. - Returns: - RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where - ``G = ceil(sqrt(video_num_tiles))`` and ``(H, W)`` is the per-tile resolution, - or ``None`` when no :class:`~isaaclab.sensors.camera.TiledCamera` with RGB output - is present in the scene or configured as a fallback. + The frame source depends on :attr:`~VideoRecorderCfg.video_mode`: + + **``"tiled"`` mode** (default): + + * Source 1 - observation :class:`~isaaclab.sensors.camera.TiledCamera`: + returns a square tile-grid ``(G*H, G*W, 3)`` uint8 array, + where ``G = ceil(sqrt(video_num_tiles))``. + * Source 2 - Newton GL perspective viewer (state-based + Newton backend): + returns ``(gl_viewer_height, gl_viewer_width, 3)`` uint8. + * Source 3 - fallback :class:`~isaaclab.sensors.camera.TiledCamera` + (state-based + Kit backend): same tile-grid shape as source 1. + + **``"perspective"`` mode**: + + * Newton backends: Newton GL perspective viewer (same shape as source 2). + * Kit backends: returns ``None`` so the environment's ``render()`` method + falls through to the ``omni.replicator.core`` viewport camera path. """ - if self._find_video_camera() is None: + if self.cfg.video_mode == "perspective": + # Perspective mode: bypass TiledCamera entirely. + # Newton backends → GL viewer; Kit backends → return None (env render() continues). + if not self._gl_viewer_initialized: + self._try_init_gl_viewer() + if self._gl_viewer is not None: + return self._render_newton_gl_rgb_array() + # No GL viewer (Kit backend) → signal the env to use its Kit perspective path. + return None + + # --- Tiled mode (default) - priority chain. --------------------------------- + + # Source 1: observation TiledCamera (vision-based path). + # _find_video_camera() sets self._video_camera and caches grid constants. + video_camera = self._find_video_camera() + has_obs_camera = video_camera is not None and video_camera is not self._fallback_tiled_camera + if has_obs_camera: + return self._render_tiled_camera_rgb_array() + + # Source 2: Newton GL perspective viewer (state-based + Newton backend). + if not self._gl_viewer_initialized: + self._try_init_gl_viewer() + if self._gl_viewer is not None: + return self._render_newton_gl_rgb_array() + + # Source 3: fallback TiledCamera (state-based + Kit backend). + if video_camera is None: return None return self._render_tiled_camera_rgb_array() # ------------------------------------------------------------------ - # Internal helpers + # Internal helpers - Newton GL viewer + # ------------------------------------------------------------------ + + @staticmethod + def _is_newton_backend() -> bool: + """Return ``True`` when the active scene data provider is Newton-based. + + Detected by duck-typing: Newton providers expose ``get_newton_model()``, + while PhysX providers do not. Safe to call before ``sim.reset()`` since + the provider is registered during scene setup. + """ + try: + from isaaclab.sim import SimulationContext + + sdp = SimulationContext.instance().initialize_scene_data_provider() + return hasattr(sdp, "get_newton_model") + except Exception: + return False + + def _try_init_gl_viewer(self) -> None: + """Lazy-initialise the Newton OpenGL perspective viewer. + + Called once on the first :meth:`render_rgb_array` invocation, at which point + ``sim.reset()`` has already been called so the Newton model is fully built. + On failure the viewer stays ``None`` and the caller falls through to the next + source: source 3 (fallback TiledCamera) in tiled mode, or ``None`` (Kit + viewport path) in perspective mode. + """ + self._gl_viewer_initialized = True + try: + from isaaclab.sim import SimulationContext + + sdp = SimulationContext.instance().initialize_scene_data_provider() + model = sdp.get_newton_model() + if model is None: + return + + import pyglet + + pyglet.options["headless"] = True + from newton.viewer import ViewerGL + + max_worlds = ( + None if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, model.world_count) + ) + + viewer = ViewerGL( + width=self.cfg.gl_viewer_width, + height=self.cfg.gl_viewer_height, + headless=True, + ) + # set_model() auto-computes per-world visual offsets from body positions. + viewer.set_model(model, max_worlds=max_worlds) + # Zero additional spacing - world positions are already in model body_q. + viewer.set_world_offsets((0.0, 0.0, 0.0)) + viewer.up_axis = 2 # Z-up + + self._gl_viewer = viewer + + # Frame the camera once using the current (reset) physics state. + # 1. Set an isometric CAD-viewport angle (pitch/yaw in degrees) so that + # _frame_camera_on_model() preserves the viewing direction and only + # adjusts the distance to fit the scene. + # 2. Run a throwaway begin_frame/log_state/end_frame cycle so the viewer + # has geometry context (needed for accurate bounding-box computation). + # 3. Call _frame_camera_on_model() to auto-set the distance. + # All subsequent renders in _render_newton_gl_rgb_array() reuse this camera. + try: + import warp as wp + + sim = SimulationContext.instance() + state = sdp.get_newton_state() + dt = sim.get_physics_dt() + # Match the Kit /OmniverseKit_Persp default FOV (60°) so the distance + # computed by _frame_camera_on_model() is consistent. Newton GL defaults + # to 45°, which places the camera ~1.3× further back for the same extent. + viewer.camera.fov = 60.0 + # Isometric angle: ~35° down, 45° to the right - matches the style of + # the Kit /OmniverseKit_Persp default "user" viewport camera. + viewer.set_camera(pos=wp.vec3(0.0, 0.0, 0.0), pitch=-35.0, yaw=45.0) + viewer.begin_frame(dt) + viewer.log_state(state) + viewer.end_frame() + viewer._frame_camera_on_model() + except Exception as frame_exc: + logger.warning("[VideoRecorder] GL viewer camera framing failed: %s", frame_exc) + + logger.info( + "[VideoRecorder] Newton GL perspective viewer ready (%dx%d, max_worlds=%s).", + self.cfg.gl_viewer_width, + self.cfg.gl_viewer_height, + max_worlds, + ) + except Exception as exc: + logger.warning("[VideoRecorder] Newton GL viewer unavailable: %s", exc) + + def _render_newton_gl_rgb_array(self) -> np.ndarray | None: + """Render one perspective frame from the Newton OpenGL viewer. + + Returns: + RGB array of shape ``(gl_viewer_height, gl_viewer_width, 3)`` and + dtype ``uint8``, or ``None`` on error. + """ + try: + from isaaclab.sim import SimulationContext + + sim = SimulationContext.instance() + sdp = sim.initialize_scene_data_provider() + state = sdp.get_newton_state() + + # Use the actual physics timestep so that the viewer does not treat + # dt=0 as a no-op and skip drawing geometry on frames after the first. + dt = sim.get_physics_dt() + + viewer = self._gl_viewer + viewer.begin_frame(dt) + viewer.log_state(state) + viewer.end_frame() # renders scene geometry to the off-screen FBO + frame = viewer.get_frame() # wp.array (H, W, 3) uint8 - GPU readback via PBO + return frame.numpy() + except Exception as exc: + logger.warning("[VideoRecorder] GL frame capture failed: %s", exc) + return None + + # ------------------------------------------------------------------ + # Internal helpers - TiledCamera (sources 1 and 3) # ------------------------------------------------------------------ @staticmethod @@ -114,7 +312,7 @@ def _spawn_fallback_cameras(cfg: VideoRecorderCfg, scene: InteractiveScene): # TiledCamera requires exactly one camera prim per environment (count == num_envs). # We must therefore spawn cameras for ALL environments, not just video_num_tiles of them. # The video_num_tiles limit is applied at render time in _render_tiled_camera_rgb_array, - # which only reads the first N tiles — the same behaviour as vision-based observation cameras. + # which only reads the first N tiles - the same behaviour as vision-based observation cameras. for i in range(n_total_envs): prim_path_i = f"/World/envs/env_{i}/VideoCamera" spawn_cfg.func(prim_path_i, spawn_cfg, translation=camera_cfg.offset.pos, orientation=rot_offset) @@ -159,7 +357,7 @@ def _find_video_camera(self): if "rgb" in output or "rgba" in output: self._video_camera = self._fallback_tiled_camera - # Cache all grid constants — these are fixed for the lifetime of the env. + # Cache all grid constants - these are fixed for the lifetime of the env. if self._video_camera is not None: output = self._video_camera.data.output self._video_rgb_key = "rgb" if "rgb" in output else "rgba" diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py index 07726ed5a14..b0d0b57cbfc 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py @@ -36,7 +36,7 @@ (env spacing ~4 m). This is the **default** value of :attr:`VideoRecorderCfg.fallback_camera_cfg`. No action is -needed in task configs — fallback cameras are automatically available for all state-based +needed in task configs - fallback cameras are automatically available for all state-based environments. Spawning only occurs when :attr:`VideoRecorderCfg.render_mode` is ``"rgb_array"`` (i.e. ``--video`` is active), so ordinary training runs incur zero overhead. @@ -76,6 +76,33 @@ class VideoRecorderCfg: fallback camera grid. """ + video_mode: str = "perspective" + """Video recording mode. One of ``"tiled"`` or ``"perspective"``. + + * ``"perspective"`` *(default)* - captures a single wide-angle isometric view of the + scene. + + * **Newton backends** (Newton Warp or OVRTX renderer): a headless + :class:`newton.viewer.ViewerGL` renders an isometric perspective of all + environments (or the first ``video_num_tiles`` when that field is set). + * **Kit backends** (PhysX + RTX renderer): the Kit viewport camera + ``/OmniverseKit_Persp`` is captured via ``omni.replicator.core``. + + The TiledCamera sensor is **bypassed** entirely, even when one is present in the + scene (e.g. vision-based tasks), giving a human-readable view instead of the + agent's raw pixel observations. + + * ``"tiled"`` - reads pixel data from a + :class:`~isaaclab.sensors.camera.TiledCamera`. On vision-based tasks the agent's + own observation camera is reused at zero extra cost and the output is a square + tile-grid of per-agent views. On state-based tasks with Kit-based backends a + fallback :class:`~isaaclab.sensors.camera.TiledCamera` (``fallback_camera_cfg``) is + spawned. On Newton backends the Newton OpenGL perspective viewer is used instead. + + Set via the ``--video`` CLI flag (``--video=perspective`` / ``--video=tiled``), or + as a Hydra override: ``env.video_recorder.video_mode=tiled``. + """ + video_num_tiles: int = -1 """Number of environment tiles to include in each video frame when using ``render_mode="rgb_array"``. Defaults to -1, which renders all environments. @@ -96,7 +123,7 @@ class VideoRecorderCfg: """Optional :class:`~isaaclab.sensors.camera.TiledCameraCfg` used to spawn a dedicated video-only camera for state-based environments (no observation ``TiledCamera`` in the scene). - Defaults to :data:`DEFAULT_VIDEO_FALLBACK_CAMERA_CFG` — a pinhole camera placed at + Defaults to :data:`DEFAULT_VIDEO_FALLBACK_CAMERA_CFG` - a pinhole camera placed at ``(-7, 0, 3)`` relative to env_0's origin, giving a reasonable side view for environments with ~4 m spacing. Set to ``None`` to disable fallback cameras entirely (e.g. for vision-based tasks that already have an observation :class:`~isaaclab.sensors.camera.TiledCamera`). @@ -104,6 +131,9 @@ class VideoRecorderCfg: Spawning is **gated on** :attr:`render_mode` ``== "rgb_array"`` (i.e. ``--video`` must be active), so the default value causes zero overhead during ordinary training runs. + For Newton-based backends (Newton Warp or OVRTX renderer), the Newton OpenGL perspective + viewer is used instead of fallback TiledCameras - see :attr:`gl_viewer_width`. + To customise the pose for a different environment scale, override in the task's ``__post_init__``:: self.video_recorder.fallback_camera_cfg = self.video_recorder.fallback_camera_cfg.replace( @@ -115,3 +145,22 @@ class VideoRecorderCfg: renderer path check succeeds and ``TiledCamera`` correctly infers ``num_envs`` from the scene. """ + + gl_viewer_width: int = 1280 + """Width in pixels of the Newton OpenGL perspective video frame. + + Only used when the active physics/renderer backend exposes a Newton model + (i.e. Newton Warp or OVRTX renderer presets). In that case :class:`VideoRecorder` + spawns a headless :class:`newton.viewer.ViewerGL` instance that renders an isometric + perspective view of all environments (limited to :attr:`video_num_tiles` when set), + replacing the fallback :class:`~isaaclab.sensors.camera.TiledCamera` grid. + + This perspective path is activated only when ``render_mode == "rgb_array"`` + (i.e. ``--video`` is active). Regular training runs are unaffected. + """ + + gl_viewer_height: int = 720 + """Height in pixels of the Newton OpenGL perspective video frame. + + See :attr:`gl_viewer_width` for full description. + """ From 2cca98d1c9eec5daaa6dc1a5ec975f856a458c4b Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 18:59:00 -0700 Subject: [PATCH 05/11] Match Newton GL perspective camera to OmniverseKit_Persp camera through FOV conversion and position --- .../isaaclab/envs/utils/video_recorder.py | 44 +++++++++---------- .../isaaclab/envs/utils/video_recorder_cfg.py | 20 +++++++++ 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py index 158c71ef371..14faf241c58 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -205,33 +205,31 @@ def _try_init_gl_viewer(self) -> None: self._gl_viewer = viewer - # Frame the camera once using the current (reset) physics state. - # 1. Set an isometric CAD-viewport angle (pitch/yaw in degrees) so that - # _frame_camera_on_model() preserves the viewing direction and only - # adjusts the distance to fit the scene. - # 2. Run a throwaway begin_frame/log_state/end_frame cycle so the viewer - # has geometry context (needed for accurate bounding-box computation). - # 3. Call _frame_camera_on_model() to auto-set the distance. - # All subsequent renders in _render_newton_gl_rgb_array() reuse this camera. + # Position the camera to match the Kit /OmniverseKit_Persp viewport. + # Convert cfg.camera_eye / cfg.camera_lookat (same defaults as ViewerCfg) + # into Newton GL pitch/yaw (Z-up convention, degrees). try: import warp as wp - sim = SimulationContext.instance() - state = sdp.get_newton_state() - dt = sim.get_physics_dt() - # Match the Kit /OmniverseKit_Persp default FOV (60°) so the distance - # computed by _frame_camera_on_model() is consistent. Newton GL defaults - # to 45°, which places the camera ~1.3× further back for the same extent. - viewer.camera.fov = 60.0 - # Isometric angle: ~35° down, 45° to the right - matches the style of - # the Kit /OmniverseKit_Persp default "user" viewport camera. - viewer.set_camera(pos=wp.vec3(0.0, 0.0, 0.0), pitch=-35.0, yaw=45.0) - viewer.begin_frame(dt) - viewer.log_state(state) - viewer.end_frame() - viewer._frame_camera_on_model() + ex, ey, ez = self.cfg.camera_eye + lx, ly, lz = self.cfg.camera_lookat + dx, dy, dz = lx - ex, ly - ey, lz - ez + length = math.sqrt(dx**2 + dy**2 + dz**2) + dx, dy, dz = dx / length, dy / length, dz / length + pitch = math.degrees(math.asin(max(-1.0, min(1.0, dz)))) + yaw = math.degrees(math.atan2(dy, dx)) + + # Kit's /OmniverseKit_Persp uses a *horizontal* FOV of 60° (derived + # from its default focal_length=18.15 mm / horizontal_aperture=20.955 mm). + # pyglet / Newton GL use *vertical* FOV. Convert so both cameras see + # the same scene extent. + aspect = self.cfg.gl_viewer_width / self.cfg.gl_viewer_height + kit_h_fov_rad = math.radians(60.0) + v_fov_deg = math.degrees(2.0 * math.atan(math.tan(kit_h_fov_rad / 2.0) / aspect)) + viewer.camera.fov = v_fov_deg # ≈ 36° for 1280×720 + viewer.set_camera(pos=wp.vec3(ex, ey, ez), pitch=pitch, yaw=yaw) except Exception as frame_exc: - logger.warning("[VideoRecorder] GL viewer camera framing failed: %s", frame_exc) + logger.warning("[VideoRecorder] GL viewer camera setup failed: %s", frame_exc) logger.info( "[VideoRecorder] Newton GL perspective viewer ready (%dx%d, max_worlds=%s).", diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py index b0d0b57cbfc..a39923f6334 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py @@ -146,6 +146,26 @@ class VideoRecorderCfg: the scene. """ + camera_eye: tuple[float, float, float] = (7.5, 7.5, 7.5) + """World-space position of the Newton GL perspective camera (in metres). + + Defaults to ``(7.5, 7.5, 7.5)`` — the same value as :attr:`~isaaclab.envs.common.ViewerCfg.eye` + — so the Newton GL video matches the Kit ``/OmniverseKit_Persp`` viewport exactly. + + Override to reposition the camera for tasks with a very different scene scale:: + + self.video_recorder.camera_eye = (20.0, 20.0, 20.0) + self.video_recorder.camera_lookat = (0.0, 0.0, 0.0) + + Only used by Newton backends in perspective mode. + """ + + camera_lookat: tuple[float, float, float] = (0.0, 0.0, 0.0) + """World-space point the Newton GL perspective camera looks at (in metres). + + Defaults to ``(0.0, 0.0, 0.0)`` — the same as :attr:`~isaaclab.envs.common.ViewerCfg.lookat`. + """ + gl_viewer_width: int = 1280 """Width in pixels of the Newton OpenGL perspective video frame. From ed55c9e39063c3e565b05abdbd955c0616f7e8f8 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 20:16:24 -0700 Subject: [PATCH 06/11] Refactor video recording: move OV camera into VideoRecorder, clean up routing --- .../isaaclab/isaaclab/envs/direct_marl_env.py | 47 +-- .../isaaclab/envs/direct_marl_env_cfg.py | 13 +- .../isaaclab/isaaclab/envs/direct_rl_env.py | 50 +-- .../isaaclab/envs/direct_rl_env_cfg.py | 13 +- .../isaaclab/envs/manager_based_env_cfg.py | 13 +- .../isaaclab/envs/manager_based_rl_env.py | 48 +-- .../isaaclab/envs/utils/video_recorder.py | 309 ++++++------------ .../isaaclab/envs/utils/video_recorder_cfg.py | 165 +++------- 8 files changed, 164 insertions(+), 494 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index 1bb2400d94d..412228b5c74 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -176,6 +176,8 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. if self.cfg.video_recorder is not None: self.cfg.video_recorder.render_mode = render_mode + self.cfg.video_recorder.kit_cam_prim_path = self.cfg.viewer.cam_prim_path + self.cfg.video_recorder.kit_resolution = self.cfg.viewer.resolution self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( self.cfg.video_recorder, self.scene ) @@ -535,50 +537,7 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # Prefer TiledCamera in tiled mode; works for all backends and produces - # consistent per-agent frames. In perspective mode the recorder returns None - # intentionally (bypassing TiledCamera entirely) so we always reach the - # omni.replicator Kit-viewport path below. - if self.video_recorder is not None: - frame = self.video_recorder.render_rgb_array() - if frame is not None: - return frame - _perspective_mode = ( - self.video_recorder is not None - and self.cfg.video_recorder is not None - and getattr(self.cfg.video_recorder, "video_mode", "tiled") == "perspective" - ) - if not _perspective_mode and not self.sim.has_gui and not self.sim.has_offscreen_render: - raise RuntimeError( - "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" - " the scene, and neither GUI nor offscreen rendering is available." - " Add a TiledCamera sensor to the scene configuration to enable video recording." - ) - # Kit-based fallback: use an omni.replicator annotator on the viewer camera. - # /OmniverseKit_Persp is NOT an RTX sensor, so the guard above may have skipped - # sim.render() when has_rtx_sensors=True (e.g., vision tasks with TiledCamera). - # Force a render pass here so the annotator receives non-empty data. - if self.has_rtx_sensors: - self.sim.render() - if not hasattr(self, "_rgb_annotator"): - import omni.replicator.core as rep - - # create render product - self._render_product = rep.create.render_product( - self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution - ) - # create rgb annotator -- used to read data from the render product - self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") - self._rgb_annotator.attach([self._render_product]) - # obtain the rgb data - rgb_data = self._rgb_annotator.get_data() - # convert to numpy array - rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # note: initially the renderer is warming up and returns empty data - if rgb_data.size == 0: - return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) - else: - return rgb_data[:, :, :3] + return self.video_recorder.render_rgb_array() else: raise NotImplementedError( f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py index 962c1ffb99b..d697c7fad93 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py @@ -237,14 +237,9 @@ class DirectMARLEnvCfg: """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" video_recorder: VideoRecorderCfg = VideoRecorderCfg() - """Configuration for the viewport recorder used when ``render_mode="rgb_array"``. + """Configuration for video recording when ``render_mode="rgb_array"`` (i.e. ``--video``). - Defaults to a :class:`~isaaclab.envs.VideoRecorderCfg` that captures all environments - in a square tile-grid using :class:`~isaaclab.envs.VideoRecorder`. - - Set :attr:`~isaaclab.envs.VideoRecorderCfg.class_type` to a custom subclass to swap the - capture implementation without modifying environment code. Set to ``None`` to disable - TiledCamera-based recording entirely and fall back to the Kit-based omni.replicator path. - - CLI example: ``env.video_recorder.video_num_tiles=9`` + See :class:`~isaaclab.envs.VideoRecorderCfg` for available options including + ``video_mode`` (``"perspective"`` or ``"tiled"``), ``camera_eye``/``camera_lookat``, + and ``video_num_tiles``. Set to ``None`` to disable the recorder entirely. """ diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index f2812062e69..1115d64415b 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -181,6 +181,8 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. if self.cfg.video_recorder is not None: self.cfg.video_recorder.render_mode = render_mode + self.cfg.video_recorder.kit_cam_prim_path = self.cfg.viewer.cam_prim_path + self.cfg.video_recorder.kit_resolution = self.cfg.viewer.resolution self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( self.cfg.video_recorder, self.scene ) @@ -503,53 +505,7 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # Prefer TiledCamera in tiled mode; works for all backends and produces - # consistent per-agent frames. In perspective mode the recorder returns None - # intentionally (bypassing TiledCamera entirely) so we always reach the - # omni.replicator Kit-viewport path below. - if self.video_recorder is not None: - frame = self.video_recorder.render_rgb_array() - if frame is not None: - return frame - # In perspective mode the recorder returns None intentionally so we fall through - # to the omni.replicator viewport path below. Skip the has_offscreen_render guard - # in that case; the annotator works in non-headless Kit sessions too. - _perspective_mode = ( - self.video_recorder is not None - and self.cfg.video_recorder is not None - and getattr(self.cfg.video_recorder, "video_mode", "tiled") == "perspective" - ) - if not _perspective_mode and not self.sim.has_gui and not self.sim.has_offscreen_render: - raise RuntimeError( - "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" - " the scene, and neither GUI nor offscreen rendering is available." - " Add a TiledCamera sensor to the scene configuration to enable video recording." - ) - # Kit-based fallback: use an omni.replicator annotator on the viewer camera. - # /OmniverseKit_Persp is NOT an RTX sensor, so the guard above may have skipped - # sim.render() when has_rtx_sensors=True (e.g., vision tasks with TiledCamera). - # Force a render pass here so the annotator receives non-empty data. - if self.has_rtx_sensors: - self.sim.render() - if not hasattr(self, "_rgb_annotator"): - import omni.replicator.core as rep - - # create render product - self._render_product = rep.create.render_product( - self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution - ) - # create rgb annotator -- used to read data from the render product - self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") - self._rgb_annotator.attach([self._render_product]) - # obtain the rgb data - rgb_data = self._rgb_annotator.get_data() - # convert to numpy array - rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # note: initially the renderer is warming up and returns empty data - if rgb_data.size == 0: - return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) - else: - return rgb_data[:, :, :3] + return self.video_recorder.render_rgb_array() else: raise NotImplementedError( f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py index c7c11bdb2e9..acc597dd3dd 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py @@ -257,14 +257,9 @@ class DirectRLEnvCfg: """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" video_recorder: VideoRecorderCfg = VideoRecorderCfg() - """Configuration for the viewport recorder used when ``render_mode="rgb_array"``. + """Configuration for video recording when ``render_mode="rgb_array"`` (i.e. ``--video``). - Defaults to a :class:`~isaaclab.envs.VideoRecorderCfg` that captures all environments - in a square tile-grid using :class:`~isaaclab.envs.VideoRecorder`. - - Set :attr:`~isaaclab.envs.VideoRecorderCfg.class_type` to a custom subclass to swap the - capture implementation without modifying environment code. Set to ``None`` to disable - TiledCamera-based recording entirely and fall back to the Kit-based omni.replicator path. - - CLI example: ``env.video_recorder.video_num_tiles=9`` + See :class:`~isaaclab.envs.VideoRecorderCfg` for available options including + ``video_mode`` (``"perspective"`` or ``"tiled"``), ``camera_eye``/``camera_lookat``, + and ``video_num_tiles``. Set to ``None`` to disable the recorder entirely. """ diff --git a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py index b231d278e44..2df177f2238 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py @@ -166,14 +166,9 @@ class ManagerBasedEnvCfg: """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" video_recorder: VideoRecorderCfg = VideoRecorderCfg() - """Configuration for the viewport recorder used when ``render_mode="rgb_array"``. + """Configuration for video recording when ``render_mode="rgb_array"`` (i.e. ``--video``). - Defaults to a :class:`~isaaclab.envs.VideoRecorderCfg` that captures all environments - in a square tile-grid using :class:`~isaaclab.envs.VideoRecorder`. - - Set :attr:`~isaaclab.envs.VideoRecorderCfg.class_type` to a custom subclass to swap the - capture implementation without modifying environment code. Set to ``None`` to disable - TiledCamera-based recording entirely and fall back to the Kit-based omni.replicator path. - - CLI example: ``env.video_recorder.video_num_tiles=9`` + See :class:`~isaaclab.envs.VideoRecorderCfg` for available options including + ``video_mode`` (``"perspective"`` or ``"tiled"``), ``camera_eye``/``camera_lookat``, + and ``video_num_tiles``. Set to ``None`` to disable the recorder entirely. """ diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index 68f9be35aac..669ac93032f 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -81,6 +81,8 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** # so fallback cameras are only spawned when --video is active (render_mode="rgb_array"). if cfg.video_recorder is not None: cfg.video_recorder.render_mode = render_mode + cfg.video_recorder.kit_cam_prim_path = cfg.viewer.cam_prim_path + cfg.video_recorder.kit_resolution = cfg.viewer.resolution # initialize the base class to setup the scene. super().__init__(cfg=cfg) @@ -92,7 +94,6 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** # produced video matches the simulation self.metadata["render_fps"] = 1 / self.step_dt self.has_rtx_sensors = self.sim.get_setting("/isaaclab/render/rtx_sensors") - print("[INFO]: Completed setting up the environment...") """ @@ -277,50 +278,7 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # Prefer TiledCamera in tiled mode; works for all backends and produces - # consistent per-agent frames. In perspective mode the recorder returns None - # intentionally (bypassing TiledCamera entirely) so we always reach the - # omni.replicator Kit-viewport path below. - if self.video_recorder is not None: - frame = self.video_recorder.render_rgb_array() - if frame is not None: - return frame - _perspective_mode = ( - self.video_recorder is not None - and self.cfg.video_recorder is not None - and getattr(self.cfg.video_recorder, "video_mode", "tiled") == "perspective" - ) - if not _perspective_mode and not self.sim.has_gui and not self.sim.has_offscreen_render: - raise RuntimeError( - "Cannot render 'rgb_array': no TiledCamera sensor with RGB output was found in" - " the scene, and neither GUI nor offscreen rendering is available." - " Add a TiledCamera sensor to the scene configuration to enable video recording." - ) - # Kit-based fallback: use an omni.replicator annotator on the viewer camera. - # /OmniverseKit_Persp is NOT an RTX sensor, so the guard above may have skipped - # sim.render() when has_rtx_sensors=True (e.g., vision tasks with TiledCamera). - # Force a render pass here so the annotator receives non-empty data. - if self.has_rtx_sensors: - self.sim.render() - if not hasattr(self, "_rgb_annotator"): - import omni.replicator.core as rep - - # create render product - self._render_product = rep.create.render_product( - self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution - ) - # create rgb annotator -- used to read data from the render product - self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") - self._rgb_annotator.attach([self._render_product]) - # obtain the rgb data - rgb_data = self._rgb_annotator.get_data() - # convert to numpy array - rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # note: initially the renderer is warming up and returns empty data - if rgb_data.size == 0: - return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) - else: - return rgb_data[:, :, :3] + return self.video_recorder.render_rgb_array() else: raise NotImplementedError( f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py index 14faf241c58..325b174bb06 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -3,8 +3,17 @@ # # SPDX-License-Identifier: BSD-3-Clause -"""Video recorder for capturing video frames from either a Newton OpenGL perspective -viewer or a :class:`~isaaclab.sensors.camera.TiledCamera` sensor.""" +"""Video recorder implementation. + +* **Perspective view** (``video_mode="perspective"``) — captures a single wide-angle + view of the scene using the Newton GL viewer (Newton backends) or the Kit viewport + camera ``/OmniverseKit_Persp`` via ``omni.replicator.core`` (Kit backends). +* **Camera sensor / tiled** (``video_mode="tiled"``) — reads pixel data from a + :class:`~isaaclab.sensors.camera.TiledCamera` sensor, producing a grid of per-agent + views. + +See :mod:`video_recorder_cfg` for configuration and full mode descriptions. +""" from __future__ import annotations @@ -24,40 +33,11 @@ class VideoRecorder: """Records video frames from the scene's active renderer. - The recording strategy is determined by :attr:`~VideoRecorderCfg.video_mode`: - - **``video_mode = "perspective"`` (default)** - - The TiledCamera is **bypassed** entirely, even when one is in the scene. - - * **Newton backends** - headless :class:`newton.viewer.ViewerGL` renders an isometric - wide-angle view of all environments (limited to ``video_num_tiles`` when set). - * **Kit backends** - returns ``None`` so that the environment's ``render()`` method - falls through to the ``omni.replicator.core`` Kit viewport camera path - (``/OmniverseKit_Persp``). - - **``video_mode = "tiled"``** - - Frame sources are tried in priority order on every :meth:`render_rgb_array` call: - - 1. **Observation** :class:`~isaaclab.sensors.camera.TiledCamera` already present in - the scene; vision-based env path. Reuses the agent's own camera sensor at zero - extra cost and produces a square tile-grid of per-agent views. - - 2. **Newton OpenGL perspective viewer** - Newton backends with no observation - ``TiledCamera``. A headless :class:`newton.viewer.ViewerGL` is lazy-initialised - on the first call and renders an isometric perspective of all environments - (limited to ``video_num_tiles`` when that field is set). - - 3. **Fallback** :class:`~isaaclab.sensors.camera.TiledCamera` - state-based env path - with Kit-based backends. A camera prim is spawned per environment before - ``sim.reset()``. - - For fallback cameras to initialise correctly they **must** be created before - ``sim.reset()`` is called; the environment base classes handle this. + See :class:`~isaaclab.envs.utils.video_recorder_cfg.VideoRecorderCfg` for the full + description of ``video_mode`` and the fallback priority chain. Args: - cfg: Configuration for this recorder. + cfg: Recorder configuration. scene: The interactive scene that owns the sensors. """ @@ -65,15 +45,11 @@ def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): self.cfg = cfg self._scene = scene self._fallback_tiled_camera = None - - # Newton GL perspective viewer; lazy-initialised on first render call. self._gl_viewer = None - self._gl_viewer_initialized = False # True once _try_init_gl_viewer() has run + self._gl_viewer_init_attempted = False if cfg.render_mode == "rgb_array": - # Enable EGL-backed headless rendering for pyglet before ViewerGL is ever - # imported. Must be set before the first 'import pyglet.window'. This is a - # no-op when pyglet is not installed (GL viewer simply stays None). + # enable EGL headless rendering for pyglet before any pyglet.window import. try: import pyglet @@ -82,99 +58,40 @@ def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): except ImportError: pass - # Skip spawning fallback TiledCameras when: - # (a) a Newton backend is active; the GL perspective viewer handles state-based - # rendering so creating per-env camera prims would waste GPU resources, or - # (b) perspective mode is requested; TiledCamera is not used in that path. - _newton_backend = self._is_newton_backend() - if cfg.fallback_camera_cfg is not None and not _newton_backend and cfg.video_mode == "tiled": + # pre-spawn fallback TiledCamera; must exist in USD stage before physics initialises. + # whether it is actually used is decided lazily in _find_video_camera(). + if cfg.fallback_camera_cfg is not None and cfg.video_mode == "tiled": self._fallback_tiled_camera = self._spawn_fallback_cameras(cfg, scene) - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - def render_rgb_array(self) -> np.ndarray | None: - """Return an RGB frame for video recording, or ``None`` when unavailable. - - The frame source depends on :attr:`~VideoRecorderCfg.video_mode`: - - **``"tiled"`` mode** (default): - - * Source 1 - observation :class:`~isaaclab.sensors.camera.TiledCamera`: - returns a square tile-grid ``(G*H, G*W, 3)`` uint8 array, - where ``G = ceil(sqrt(video_num_tiles))``. - * Source 2 - Newton GL perspective viewer (state-based + Newton backend): - returns ``(gl_viewer_height, gl_viewer_width, 3)`` uint8. - * Source 3 - fallback :class:`~isaaclab.sensors.camera.TiledCamera` - (state-based + Kit backend): same tile-grid shape as source 1. - - **``"perspective"`` mode**: - - * Newton backends: Newton GL perspective viewer (same shape as source 2). - * Kit backends: returns ``None`` so the environment's ``render()`` method - falls through to the ``omni.replicator.core`` viewport camera path. - """ + """Return an RGB frame for video recording, or ``None`` on transient Kit warmup.""" if self.cfg.video_mode == "perspective": - # Perspective mode: bypass TiledCamera entirely. - # Newton backends → GL viewer; Kit backends → return None (env render() continues). - if not self._gl_viewer_initialized: + if not self._gl_viewer_init_attempted: self._try_init_gl_viewer() if self._gl_viewer is not None: return self._render_newton_gl_rgb_array() - # No GL viewer (Kit backend) → signal the env to use its Kit perspective path. - return None - - # --- Tiled mode (default) - priority chain. --------------------------------- + return self._render_kit_perspective_rgb_array() - # Source 1: observation TiledCamera (vision-based path). - # _find_video_camera() sets self._video_camera and caches grid constants. + # tiled mode: use observation TiledCamera if available, then fallback. video_camera = self._find_video_camera() - has_obs_camera = video_camera is not None and video_camera is not self._fallback_tiled_camera - if has_obs_camera: - return self._render_tiled_camera_rgb_array() - - # Source 2: Newton GL perspective viewer (state-based + Newton backend). - if not self._gl_viewer_initialized: - self._try_init_gl_viewer() - if self._gl_viewer is not None: - return self._render_newton_gl_rgb_array() - - # Source 3: fallback TiledCamera (state-based + Kit backend). if video_camera is None: - return None + raise RuntimeError( + "Cannot record video in tiled mode: no TiledCamera sensor with RGB output was found" + " in the scene. Add a TiledCamera sensor or switch to perspective mode (--video=perspective)." + ) + if video_camera is not self._fallback_tiled_camera: + logger.debug("[VideoRecorder] tiled source: observation TiledCamera") + else: + logger.debug("[VideoRecorder] tiled source: fallback TiledCamera") return self._render_tiled_camera_rgb_array() - # ------------------------------------------------------------------ - # Internal helpers - Newton GL viewer - # ------------------------------------------------------------------ - - @staticmethod - def _is_newton_backend() -> bool: - """Return ``True`` when the active scene data provider is Newton-based. - - Detected by duck-typing: Newton providers expose ``get_newton_model()``, - while PhysX providers do not. Safe to call before ``sim.reset()`` since - the provider is registered during scene setup. - """ - try: - from isaaclab.sim import SimulationContext - - sdp = SimulationContext.instance().initialize_scene_data_provider() - return hasattr(sdp, "get_newton_model") - except Exception: - return False - def _try_init_gl_viewer(self) -> None: - """Lazy-initialise the Newton OpenGL perspective viewer. + """Lazy-initialise the Newton GL viewer on the first render call. - Called once on the first :meth:`render_rgb_array` invocation, at which point - ``sim.reset()`` has already been called so the Newton model is fully built. - On failure the viewer stays ``None`` and the caller falls through to the next - source: source 3 (fallback TiledCamera) in tiled mode, or ``None`` (Kit - viewport path) in perspective mode. + Called after ``sim.reset()`` so the Newton model is fully built. + Leaves ``_gl_viewer`` as ``None`` on failure so callers fall through gracefully. """ - self._gl_viewer_initialized = True + self._gl_viewer_init_attempted = True try: from isaaclab.sim import SimulationContext @@ -192,22 +109,13 @@ def _try_init_gl_viewer(self) -> None: None if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, model.world_count) ) - viewer = ViewerGL( - width=self.cfg.gl_viewer_width, - height=self.cfg.gl_viewer_height, - headless=True, - ) - # set_model() auto-computes per-world visual offsets from body positions. + viewer = ViewerGL(width=self.cfg.gl_viewer_width, height=self.cfg.gl_viewer_height, headless=True) viewer.set_model(model, max_worlds=max_worlds) - # Zero additional spacing - world positions are already in model body_q. - viewer.set_world_offsets((0.0, 0.0, 0.0)) + viewer.set_world_offsets((0.0, 0.0, 0.0)) # world positions already in body_q viewer.up_axis = 2 # Z-up - self._gl_viewer = viewer - # Position the camera to match the Kit /OmniverseKit_Persp viewport. - # Convert cfg.camera_eye / cfg.camera_lookat (same defaults as ViewerCfg) - # into Newton GL pitch/yaw (Z-up convention, degrees). + # place camera to match Kit /OmniverseKit_Persp (same eye/lookat as ViewerCfg). try: import warp as wp @@ -219,20 +127,16 @@ def _try_init_gl_viewer(self) -> None: pitch = math.degrees(math.asin(max(-1.0, min(1.0, dz)))) yaw = math.degrees(math.atan2(dy, dx)) - # Kit's /OmniverseKit_Persp uses a *horizontal* FOV of 60° (derived - # from its default focal_length=18.15 mm / horizontal_aperture=20.955 mm). - # pyglet / Newton GL use *vertical* FOV. Convert so both cameras see - # the same scene extent. + # Kit uses horizontal FOV (60°); pyglet/Newton GL uses vertical FOV. aspect = self.cfg.gl_viewer_width / self.cfg.gl_viewer_height - kit_h_fov_rad = math.radians(60.0) - v_fov_deg = math.degrees(2.0 * math.atan(math.tan(kit_h_fov_rad / 2.0) / aspect)) + v_fov_deg = math.degrees(2.0 * math.atan(math.tan(math.radians(60.0) / 2.0) / aspect)) viewer.camera.fov = v_fov_deg # ≈ 36° for 1280×720 viewer.set_camera(pos=wp.vec3(ex, ey, ez), pitch=pitch, yaw=yaw) - except Exception as frame_exc: - logger.warning("[VideoRecorder] GL viewer camera setup failed: %s", frame_exc) + except Exception as exc: + logger.warning("[VideoRecorder] GL viewer camera setup failed: %s", exc) logger.info( - "[VideoRecorder] Newton GL perspective viewer ready (%dx%d, max_worlds=%s).", + "[VideoRecorder] Newton GL viewer ready (%dx%d, max_worlds=%s).", self.cfg.gl_viewer_width, self.cfg.gl_viewer_height, max_worlds, @@ -241,49 +145,61 @@ def _try_init_gl_viewer(self) -> None: logger.warning("[VideoRecorder] Newton GL viewer unavailable: %s", exc) def _render_newton_gl_rgb_array(self) -> np.ndarray | None: - """Render one perspective frame from the Newton OpenGL viewer. - - Returns: - RGB array of shape ``(gl_viewer_height, gl_viewer_width, 3)`` and - dtype ``uint8``, or ``None`` on error. - """ + """Return one RGB frame from the Newton GL viewer, or ``None`` on error.""" try: from isaaclab.sim import SimulationContext sim = SimulationContext.instance() sdp = sim.initialize_scene_data_provider() state = sdp.get_newton_state() - - # Use the actual physics timestep so that the viewer does not treat - # dt=0 as a no-op and skip drawing geometry on frames after the first. dt = sim.get_physics_dt() viewer = self._gl_viewer viewer.begin_frame(dt) viewer.log_state(state) - viewer.end_frame() # renders scene geometry to the off-screen FBO - frame = viewer.get_frame() # wp.array (H, W, 3) uint8 - GPU readback via PBO - return frame.numpy() + viewer.end_frame() + return viewer.get_frame().numpy() except Exception as exc: logger.warning("[VideoRecorder] GL frame capture failed: %s", exc) return None - # ------------------------------------------------------------------ - # Internal helpers - TiledCamera (sources 1 and 3) - # ------------------------------------------------------------------ + def _render_kit_perspective_rgb_array(self) -> np.ndarray | None: + """Return one RGB frame from the Kit /OmniverseKit_Persp camera via omni.replicator. + + Returns ``None`` during the initial warmup frames when the renderer returns empty data. + """ + try: + import omni.replicator.core as rep + + from isaaclab.sim import SimulationContext + + # /OmniverseKit_Persp is not an RTX sensor; always force a render pass for fresh data. + SimulationContext.instance().render() + + if not hasattr(self, "_rgb_annotator"): + self._render_product = rep.create.render_product( + self.cfg.kit_cam_prim_path, self.cfg.kit_resolution + ) + self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._rgb_annotator.attach([self._render_product]) + + rgb_data = self._rgb_annotator.get_data() + rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + if rgb_data.size == 0: + # renderer is warming up; return blank frame + h, w = self.cfg.kit_resolution[1], self.cfg.kit_resolution[0] + return np.zeros((h, w, 3), dtype=np.uint8) + return rgb_data[:, :, :3] + except Exception as exc: + logger.warning("[VideoRecorder] Kit perspective capture failed: %s", exc) + return None @staticmethod def _spawn_fallback_cameras(cfg: VideoRecorderCfg, scene: InteractiveScene): - """Spawn one video camera prim per environment (up to ``cfg.video_num_tiles``) and - return a single :class:`~isaaclab.sensors.camera.TiledCamera` covering all of them. - - Camera prims are spawned at ``/World/envs/env_{i}/VideoCamera`` for - ``i in range(n_cameras)``, then a ``TiledCamera`` with the regex prim path - ``/World/envs/env_.*/VideoCamera`` is created so that all spawned prims are - discovered and rendered as tiles. + """Spawn one video camera prim per environment and return a single TiledCamera. - This must be called **before** ``sim.reset()`` so the prims exist in the USD stage - and the ``TiledCamera`` can register for the ``PHYSICS_READY`` callback. + Must be called **before** ``sim.reset()`` so the prims exist when the TiledCamera + registers for its ``PHYSICS_READY`` callback. """ import torch @@ -291,56 +207,37 @@ def _spawn_fallback_cameras(cfg: VideoRecorderCfg, scene: InteractiveScene): from isaaclab.utils.math import convert_camera_frame_orientation_convention camera_cfg = cfg.fallback_camera_cfg - - # Pre-compute the OpenGL rotation offset (mirrors Camera.__init__ logic). n_total_envs = scene.num_envs + rot = torch.tensor(camera_cfg.offset.rot, dtype=torch.float32, device="cpu").unsqueeze(0) rot_offset = convert_camera_frame_orientation_convention( rot, origin=camera_cfg.offset.convention, target="opengl" - ) - rot_offset = rot_offset.squeeze(0).cpu().numpy() + ).squeeze(0).cpu().numpy() - # Ensure vertical_aperture is set before calling the spawn func. spawn_cfg = camera_cfg.spawn if spawn_cfg.vertical_aperture is None: spawn_cfg = spawn_cfg.replace( vertical_aperture=spawn_cfg.horizontal_aperture * camera_cfg.height / camera_cfg.width ) - # TiledCamera requires exactly one camera prim per environment (count == num_envs). - # We must therefore spawn cameras for ALL environments, not just video_num_tiles of them. - # The video_num_tiles limit is applied at render time in _render_tiled_camera_rgb_array, - # which only reads the first N tiles - the same behaviour as vision-based observation cameras. for i in range(n_total_envs): - prim_path_i = f"/World/envs/env_{i}/VideoCamera" - spawn_cfg.func(prim_path_i, spawn_cfg, translation=camera_cfg.offset.pos, orientation=rot_offset) - - # Create one TiledCamera that discovers all spawned prims via the regex path. - # spawn=None tells Camera.__init__ to skip re-spawning; it will verify the prims exist. - tiled_cfg = camera_cfg.replace( - prim_path="/World/envs/env_.*/VideoCamera", - spawn=None, - ) + spawn_cfg.func(f"/World/envs/env_{i}/VideoCamera", spawn_cfg, + translation=camera_cfg.offset.pos, orientation=rot_offset) + + tiled_cfg = camera_cfg.replace(prim_path="/World/envs/env_.*/VideoCamera", spawn=None) return TiledCamera(tiled_cfg) def _find_video_camera(self): """Locate and cache the TiledCamera to use for video recording. - Search order: - 1. Observation TiledCamera already in the scene (vision-based env path, zero extra cost). - 2. Dedicated fallback TiledCamera from ``cfg.fallback_camera_cfg`` (state-based env path). - - Returns ``None`` if neither source is available. - - Previously used the omni.replicator viewer camera which had RGB output only for - Kit-based backends (``physx`` / ``newton,isaacsim_rtx_renderer``). + Priority: (1) observation TiledCamera already in the scene, (2) fallback camera. + Returns ``None`` if neither is available. """ if not hasattr(self, "_video_camera"): from isaaclab.sensors.camera import TiledCamera self._video_camera = None - # Priority 1: observation TiledCamera in the scene (vision-based env path). for sensor in self._scene.sensors.values(): if isinstance(sensor, TiledCamera): output = sensor.data.output @@ -348,14 +245,12 @@ def _find_video_camera(self): self._video_camera = sensor break - # Priority 2: fallback video camera (state-based env path). if self._video_camera is None and self._fallback_tiled_camera is not None: if self._fallback_tiled_camera.is_initialized: output = self._fallback_tiled_camera.data.output if "rgb" in output or "rgba" in output: self._video_camera = self._fallback_tiled_camera - # Cache all grid constants - these are fixed for the lifetime of the env. if self._video_camera is not None: output = self._video_camera.data.output self._video_rgb_key = "rgb" if "rgb" in output else "rgba" @@ -363,48 +258,28 @@ def _find_video_camera(self): n_envs = n_total if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, n_total) self._video_n_envs = n_envs self._video_grid_size = math.ceil(math.sqrt(n_envs)) - n_slots = self._video_grid_size * self._video_grid_size + n_slots = self._video_grid_size ** 2 H = int(output[self._video_rgb_key].shape[1]) W = int(output[self._video_rgb_key].shape[2]) self._video_H = H self._video_W = W - # Pre-allocate the black padding block (zero-copy when pad == 0). pad = n_slots - n_envs self._video_pad = np.zeros((pad, H, W, 3), dtype=np.uint8) if pad > 0 else None return self._video_camera def _render_tiled_camera_rgb_array(self) -> np.ndarray: - """Return a square tile-grid of RGB frames from the TiledCamera. - - Create a square grid of tiles. This method reads directly from the - TiledCamera sensor buffer to generate the tiles. - - If using the dedicated fallback video cameras (not observation sensors), - this method calls ``update()`` on them first to trigger a fresh render pass. - Observation TiledCameras are updated by ``scene.update()`` during the - environment step and do not need an extra update here. - - Returns: - RGB image of shape ``(G*H, G*W, 3)`` and dtype ``uint8``, where - ``G = ceil(sqrt(num_envs))`` and ``(H, W)`` is the per-tile resolution. - """ - # Fallback cameras are not updated by scene.update(), so drive them manually. + """Return a square tile-grid ``(G*H, G*W, 3)`` from the cached TiledCamera.""" if self._video_camera is self._fallback_tiled_camera: self._fallback_tiled_camera.update(dt=0.0, force_recompute=True) rgb_all = self._video_camera.data.output[self._video_rgb_key] - # Drop alpha channel once on GPU before the CPU transfer. if self._video_rgb_key == "rgba": rgb_all = rgb_all[..., :3] - # .contiguous() ensures the reshape below returns a zero-copy view. - tiles = rgb_all[: self._video_n_envs].contiguous().cpu().numpy() # [n_envs, H, W, 3] + tiles = rgb_all[: self._video_n_envs].contiguous().cpu().numpy() if self._video_pad is not None: tiles = np.concatenate([tiles, self._video_pad], axis=0) - # [grid_size, grid_size, H, W, 3] → [grid_size*H, grid_size*W, 3] + g, H, W = self._video_grid_size, self._video_H, self._video_W - grid = tiles.reshape(g, g, H, W, 3) - grid = grid.transpose(0, 2, 1, 3, 4) - # after transpose the strides are non-standard; reshape must copy here. - return grid.reshape(g * H, g * W, 3) + return tiles.reshape(g, g, H, W, 3).transpose(0, 2, 1, 3, 4).reshape(g * H, g * W, 3) diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py index a39923f6334..3436b2a333a 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py @@ -3,7 +3,17 @@ # # SPDX-License-Identifier: BSD-3-Clause -"""Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`.""" +"""Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. + +Two recording modes are supported (set via :attr:`VideoRecorderCfg.video_mode`): + +* **Perspective view** (``"perspective"``, default) - a single wide-angle viewport + camera. Uses the Newton GL viewer on Newton backends; falls back to the Kit + ``/OmniverseKit_Persp`` camera via ``omni.replicator.core`` on Kit backends. +* **Camera sensor / tiled** (``"tiled"``) - reads pixel data from a + :class:`~isaaclab.sensors.camera.TiledCamera` sensor and arranges the per-agent + frames into a square grid. +""" from __future__ import annotations @@ -14,7 +24,7 @@ from .video_recorder import VideoRecorder -DEFAULT_VIDEO_FALLBACK_CAMERA_CFG = TiledCameraCfg( +DEFAULT_TILED_RECORDING_CAMERA_CFG = TiledCameraCfg( prim_path="/World/envs/env_0/VideoCamera", update_period=0.0, height=480, @@ -28,19 +38,12 @@ ), offset=TiledCameraCfg.OffsetCfg(pos=(-7.0, 0.0, 3.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), ) -"""Default fallback :class:`~isaaclab.sensors.camera.TiledCameraCfg` for state-based video recording. +"""Default :class:`~isaaclab.sensors.camera.TiledCameraCfg` for tiled state-based video recording. -Places a pinhole camera at ``/World/envs/env_0/VideoCamera`` offset ``(-7, 0, 3)`` from -env_0's origin, angled ~12° downward in the world frame. This matches the camera position used -by ``Isaac-Cartpole-RGB-v0`` and gives a reasonable side view for medium-scale environments -(env spacing ~4 m). +Places a pinhole camera at ``(-7, 0, 3)`` m relative to env_0's origin, angled ~12° downward. +Only spawned when ``--video=tiled`` is active and no observation TiledCamera exists in the scene. -This is the **default** value of :attr:`VideoRecorderCfg.fallback_camera_cfg`. No action is -needed in task configs - fallback cameras are automatically available for all state-based -environments. Spawning only occurs when :attr:`VideoRecorderCfg.render_mode` is ``"rgb_array"`` -(i.e. ``--video`` is active), so ordinary training runs incur zero overhead. - -To customise the pose for a different environment scale, override in the task's ``__post_init__``:: +Override pose in ``__post_init__`` for tasks with different scene scales:: self.video_recorder.fallback_camera_cfg = self.video_recorder.fallback_camera_cfg.replace( offset=TiledCameraCfg.OffsetCfg(pos=(-3.0, 0.0, 2.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), @@ -50,137 +53,71 @@ @configclass class VideoRecorderCfg: - """Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. - - Set :attr:`class_type` to a custom subclass of - :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder` to swap the - video-capture implementation (e.g. an Option-B pipeline that only renders - ``video_num_tiles`` cameras on the GPU) without modifying any environment code. - """ + """Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`.""" class_type: type = VideoRecorder - """The recorder class to instantiate. Must accept ``(cfg, scene)`` as constructor arguments. - Defaults to :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. - """ + """Recorder class to instantiate; must accept ``(cfg, scene)``.""" render_mode: str | None = None - """The render mode forwarded from the environment constructor. - - Populated automatically by the environment base classes from the ``render_mode`` argument - passed to :func:`gymnasium.make` (or the environment constructor directly). User code - should not set this field manually. + """Render mode forwarded from the environment constructor (``"rgb_array"`` when ``--video`` is active). - When ``None`` (the default, i.e. ``--video`` was **not** passed), :class:`VideoRecorder` - skips spawning any fallback cameras so that state-based runs incur zero overhead. - Only when this is ``"rgb_array"`` does the recorder allocate GPU resources for the - fallback camera grid. + Set automatically by the environment base classes; do not set manually. """ video_mode: str = "perspective" - """Video recording mode. One of ``"tiled"`` or ``"perspective"``. - - * ``"perspective"`` *(default)* - captures a single wide-angle isometric view of the - scene. - - * **Newton backends** (Newton Warp or OVRTX renderer): a headless - :class:`newton.viewer.ViewerGL` renders an isometric perspective of all - environments (or the first ``video_num_tiles`` when that field is set). - * **Kit backends** (PhysX + RTX renderer): the Kit viewport camera - ``/OmniverseKit_Persp`` is captured via ``omni.replicator.core``. - - The TiledCamera sensor is **bypassed** entirely, even when one is present in the - scene (e.g. vision-based tasks), giving a human-readable view instead of the - agent's raw pixel observations. - - * ``"tiled"`` - reads pixel data from a - :class:`~isaaclab.sensors.camera.TiledCamera`. On vision-based tasks the agent's - own observation camera is reused at zero extra cost and the output is a square - tile-grid of per-agent views. On state-based tasks with Kit-based backends a - fallback :class:`~isaaclab.sensors.camera.TiledCamera` (``fallback_camera_cfg``) is - spawned. On Newton backends the Newton OpenGL perspective viewer is used instead. - - Set via the ``--video`` CLI flag (``--video=perspective`` / ``--video=tiled``), or - as a Hydra override: ``env.video_recorder.video_mode=tiled``. - """ + """Recording mode: ``"perspective"`` (default) or ``"tiled"``. - video_num_tiles: int = -1 - """Number of environment tiles to include in each video frame when using ``render_mode="rgb_array"``. - Defaults to -1, which renders all environments. + * ``"perspective"`` - single wide-angle view of the scene. Newton backends use the Newton GL + viewer; Kit backends use ``/OmniverseKit_Persp`` via ``omni.replicator.core``. TiledCamera + is bypassed even when present. + * ``"tiled"`` - square tile-grid from a :class:`~isaaclab.sensors.camera.TiledCamera`. + Reuses the observation camera on vision-based tasks; spawns ``fallback_camera_cfg`` on + state-based Kit tasks; uses the Newton GL viewer on Newton backends. - Environments are arranged into a square grid of size - ``ceil(sqrt(video_num_tiles)) * ceil(sqrt(video_num_tiles))``, with unused slots filled with - black. For example: + Set via CLI: ``--video=perspective`` / ``--video=tiled``. + """ - * ``-1``: all environments (default) - * ``1``: single environment (1*1) - * ``4``: first 4 environments (2*2 grid) - * ``9``: first 9 environments (3*3 grid) + video_num_tiles: int = -1 + """Max environments to include per frame (``-1`` = all). + Tiles are arranged into a ``ceil(sqrt(N)) × ceil(sqrt(N))`` grid with black padding. CLI example: ``env.video_recorder.video_num_tiles=9`` """ - fallback_camera_cfg: object = DEFAULT_VIDEO_FALLBACK_CAMERA_CFG - """Optional :class:`~isaaclab.sensors.camera.TiledCameraCfg` used to spawn a dedicated - video-only camera for state-based environments (no observation ``TiledCamera`` in the scene). + fallback_camera_cfg: object = DEFAULT_TILED_RECORDING_CAMERA_CFG + """Side-view :class:`~isaaclab.sensors.camera.TiledCameraCfg` for tiled state-based recording. - Defaults to :data:`DEFAULT_VIDEO_FALLBACK_CAMERA_CFG` - a pinhole camera placed at - ``(-7, 0, 3)`` relative to env_0's origin, giving a reasonable side view for environments - with ~4 m spacing. Set to ``None`` to disable fallback cameras entirely (e.g. for - vision-based tasks that already have an observation :class:`~isaaclab.sensors.camera.TiledCamera`). - - Spawning is **gated on** :attr:`render_mode` ``== "rgb_array"`` (i.e. ``--video`` must be - active), so the default value causes zero overhead during ordinary training runs. - - For Newton-based backends (Newton Warp or OVRTX renderer), the Newton OpenGL perspective - viewer is used instead of fallback TiledCameras - see :attr:`gl_viewer_width`. - - To customise the pose for a different environment scale, override in the task's ``__post_init__``:: - - self.video_recorder.fallback_camera_cfg = self.video_recorder.fallback_camera_cfg.replace( - offset=TiledCameraCfg.OffsetCfg(pos=(-3.0, 0.0, 2.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), - ) - - .. note:: - The prim path in the cfg must start with ``/World/envs/env_0/`` so that the OVRTX - renderer path check succeeds and ``TiledCamera`` correctly infers ``num_envs`` from - the scene. + Spawned when ``video_mode="tiled"`` and no observation TiledCamera exists in the scene. + Set to ``None`` to disable. """ camera_eye: tuple[float, float, float] = (7.5, 7.5, 7.5) - """World-space position of the Newton GL perspective camera (in metres). - - Defaults to ``(7.5, 7.5, 7.5)`` — the same value as :attr:`~isaaclab.envs.common.ViewerCfg.eye` - — so the Newton GL video matches the Kit ``/OmniverseKit_Persp`` viewport exactly. + """Newton GL perspective camera position in world space (metres). - Override to reposition the camera for tasks with a very different scene scale:: - - self.video_recorder.camera_eye = (20.0, 20.0, 20.0) - self.video_recorder.camera_lookat = (0.0, 0.0, 0.0) - - Only used by Newton backends in perspective mode. + Matches :attr:`~isaaclab.envs.common.ViewerCfg.eye` so the Newton GL video aligns with + the Kit ``/OmniverseKit_Persp`` viewport. Only used by Newton backends in perspective mode. """ camera_lookat: tuple[float, float, float] = (0.0, 0.0, 0.0) - """World-space point the Newton GL perspective camera looks at (in metres). + """Newton GL perspective camera look-at point in world space (metres). - Defaults to ``(0.0, 0.0, 0.0)`` — the same as :attr:`~isaaclab.envs.common.ViewerCfg.lookat`. + Matches :attr:`~isaaclab.envs.common.ViewerCfg.lookat`. Only used by Newton backends in perspective mode. """ gl_viewer_width: int = 1280 - """Width in pixels of the Newton OpenGL perspective video frame. + """Width in pixels of the Newton GL perspective frame. Only active when ``--video`` is set.""" - Only used when the active physics/renderer backend exposes a Newton model - (i.e. Newton Warp or OVRTX renderer presets). In that case :class:`VideoRecorder` - spawns a headless :class:`newton.viewer.ViewerGL` instance that renders an isometric - perspective view of all environments (limited to :attr:`video_num_tiles` when set), - replacing the fallback :class:`~isaaclab.sensors.camera.TiledCamera` grid. + gl_viewer_height: int = 720 + """Height in pixels of the Newton GL perspective frame. Only active when ``--video`` is set.""" - This perspective path is activated only when ``render_mode == "rgb_array"`` - (i.e. ``--video`` is active). Regular training runs are unaffected. + kit_cam_prim_path: str = "/OmniverseKit_Persp" + """USD prim path of the Kit viewport camera used for perspective recording on Kit backends. + + Set automatically from :attr:`~isaaclab.envs.common.ViewerCfg.cam_prim_path`; do not set manually. """ - gl_viewer_height: int = 720 - """Height in pixels of the Newton OpenGL perspective video frame. + kit_resolution: tuple[int, int] = (1280, 720) + """Resolution ``(width, height)`` of the Kit perspective frame. - See :attr:`gl_viewer_width` for full description. + Set automatically from :attr:`~isaaclab.envs.common.ViewerCfg.resolution`; do not set manually. """ From 9988962fc1d757f05a6ad3419240f50337b04576 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 20:29:30 -0700 Subject: [PATCH 07/11] Clean up video recorder: drop redundant kit cfg injection, debug logs, and stale comments --- source/isaaclab/isaaclab/envs/direct_marl_env.py | 2 -- source/isaaclab/isaaclab/envs/direct_rl_env.py | 2 -- .../isaaclab/envs/manager_based_rl_env.py | 2 -- .../isaaclab/envs/utils/video_recorder.py | 12 ++++-------- .../isaaclab/envs/utils/video_recorder_cfg.py | 15 ++------------- 5 files changed, 6 insertions(+), 27 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index 412228b5c74..fa067df4b42 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -176,8 +176,6 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. if self.cfg.video_recorder is not None: self.cfg.video_recorder.render_mode = render_mode - self.cfg.video_recorder.kit_cam_prim_path = self.cfg.viewer.cam_prim_path - self.cfg.video_recorder.kit_resolution = self.cfg.viewer.resolution self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( self.cfg.video_recorder, self.scene ) diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index 1115d64415b..237f94e3f61 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -181,8 +181,6 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. if self.cfg.video_recorder is not None: self.cfg.video_recorder.render_mode = render_mode - self.cfg.video_recorder.kit_cam_prim_path = self.cfg.viewer.cam_prim_path - self.cfg.video_recorder.kit_resolution = self.cfg.viewer.resolution self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( self.cfg.video_recorder, self.scene ) diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index 669ac93032f..db4ff7a13de 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -81,8 +81,6 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** # so fallback cameras are only spawned when --video is active (render_mode="rgb_array"). if cfg.video_recorder is not None: cfg.video_recorder.render_mode = render_mode - cfg.video_recorder.kit_cam_prim_path = cfg.viewer.cam_prim_path - cfg.video_recorder.kit_resolution = cfg.viewer.resolution # initialize the base class to setup the scene. super().__init__(cfg=cfg) diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py index 325b174bb06..20492ba0585 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -79,17 +79,14 @@ def render_rgb_array(self) -> np.ndarray | None: "Cannot record video in tiled mode: no TiledCamera sensor with RGB output was found" " in the scene. Add a TiledCamera sensor or switch to perspective mode (--video=perspective)." ) - if video_camera is not self._fallback_tiled_camera: - logger.debug("[VideoRecorder] tiled source: observation TiledCamera") - else: - logger.debug("[VideoRecorder] tiled source: fallback TiledCamera") return self._render_tiled_camera_rgb_array() def _try_init_gl_viewer(self) -> None: """Lazy-initialise the Newton GL viewer on the first render call. Called after ``sim.reset()`` so the Newton model is fully built. - Leaves ``_gl_viewer`` as ``None`` on failure so callers fall through gracefully. + Leaves ``_gl_viewer`` as ``None`` on Kit backends; ``render_rgb_array`` then + calls ``_render_kit_perspective_rgb_array`` instead. """ self._gl_viewer_init_attempted = True try: @@ -178,7 +175,7 @@ def _render_kit_perspective_rgb_array(self) -> np.ndarray | None: if not hasattr(self, "_rgb_annotator"): self._render_product = rep.create.render_product( - self.cfg.kit_cam_prim_path, self.cfg.kit_resolution + "/OmniverseKit_Persp", (1280, 720) ) self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") self._rgb_annotator.attach([self._render_product]) @@ -187,8 +184,7 @@ def _render_kit_perspective_rgb_array(self) -> np.ndarray | None: rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) if rgb_data.size == 0: # renderer is warming up; return blank frame - h, w = self.cfg.kit_resolution[1], self.cfg.kit_resolution[0] - return np.zeros((h, w, 3), dtype=np.uint8) + return np.zeros((720, 1280, 3), dtype=np.uint8) return rgb_data[:, :, :3] except Exception as exc: logger.warning("[VideoRecorder] Kit perspective capture failed: %s", exc) diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py index 3436b2a333a..501df00a5a8 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py @@ -71,8 +71,8 @@ class VideoRecorderCfg: viewer; Kit backends use ``/OmniverseKit_Persp`` via ``omni.replicator.core``. TiledCamera is bypassed even when present. * ``"tiled"`` - square tile-grid from a :class:`~isaaclab.sensors.camera.TiledCamera`. - Reuses the observation camera on vision-based tasks; spawns ``fallback_camera_cfg`` on - state-based Kit tasks; uses the Newton GL viewer on Newton backends. + Reuses the observation camera on vision-based tasks; spawns ``fallback_camera_cfg`` for + state-based tasks. Raises ``RuntimeError`` if no TiledCamera is available. Set via CLI: ``--video=perspective`` / ``--video=tiled``. """ @@ -110,14 +110,3 @@ class VideoRecorderCfg: gl_viewer_height: int = 720 """Height in pixels of the Newton GL perspective frame. Only active when ``--video`` is set.""" - kit_cam_prim_path: str = "/OmniverseKit_Persp" - """USD prim path of the Kit viewport camera used for perspective recording on Kit backends. - - Set automatically from :attr:`~isaaclab.envs.common.ViewerCfg.cam_prim_path`; do not set manually. - """ - - kit_resolution: tuple[int, int] = (1280, 720) - """Resolution ``(width, height)`` of the Kit perspective frame. - - Set automatically from :attr:`~isaaclab.envs.common.ViewerCfg.resolution`; do not set manually. - """ From ad35aa007969ed53e06ba97f6b4326a721baade5 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 20:34:11 -0700 Subject: [PATCH 08/11] greptile fix: fix video recorder bugs --- scripts/reinforcement_learning/rlinf/play.py | 2 +- .../isaaclab/isaaclab/envs/direct_marl_env.py | 2 + .../isaaclab/isaaclab/envs/direct_rl_env.py | 2 + .../isaaclab/envs/manager_based_rl_env.py | 2 + .../isaaclab/envs/utils/video_recorder.py | 75 ++++++++++--------- 5 files changed, 47 insertions(+), 36 deletions(-) diff --git a/scripts/reinforcement_learning/rlinf/play.py b/scripts/reinforcement_learning/rlinf/play.py index c3782567617..dcade9a3237 100644 --- a/scripts/reinforcement_learning/rlinf/play.py +++ b/scripts/reinforcement_learning/rlinf/play.py @@ -56,7 +56,7 @@ const="perspective", default=None, metavar="MODE", - help="Enable video recording. MODE is 'tiled' (default) or 'perspective' (not yet supported for rlinf).", + help="Enable video recording. MODE is 'perspective' (default) or 'tiled'.", ) cli_args.add_rlinf_args(parser) args_cli = parser.parse_args() diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index fa067df4b42..33541c3cd44 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -535,6 +535,8 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": + if self.video_recorder is None: + return None return self.video_recorder.render_rgb_array() else: raise NotImplementedError( diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index 237f94e3f61..58456e72fb2 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -503,6 +503,8 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": + if self.video_recorder is None: + return None return self.video_recorder.render_rgb_array() else: raise NotImplementedError( diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index db4ff7a13de..132fa4d97fb 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -276,6 +276,8 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": + if self.video_recorder is None: + return None return self.video_recorder.render_rgb_array() else: raise NotImplementedError( diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py index 20492ba0585..ab59707e384 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -64,7 +64,7 @@ def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): self._fallback_tiled_camera = self._spawn_fallback_cameras(cfg, scene) def render_rgb_array(self) -> np.ndarray | None: - """Return an RGB frame for video recording, or ``None`` on transient Kit warmup.""" + """Return an RGB frame for video recording, or ``None`` when no GL viewer and no Kit runtime.""" if self.cfg.video_mode == "perspective": if not self._gl_viewer_init_attempted: self._try_init_gl_viewer() @@ -188,7 +188,7 @@ def _render_kit_perspective_rgb_array(self) -> np.ndarray | None: return rgb_data[:, :, :3] except Exception as exc: logger.warning("[VideoRecorder] Kit perspective capture failed: %s", exc) - return None + return np.zeros((720, 1280, 3), dtype=np.uint8) @staticmethod def _spawn_fallback_cameras(cfg: VideoRecorderCfg, scene: InteractiveScene): @@ -227,41 +227,46 @@ def _find_video_camera(self): """Locate and cache the TiledCamera to use for video recording. Priority: (1) observation TiledCamera already in the scene, (2) fallback camera. - Returns ``None`` if neither is available. + Returns ``None`` if neither is available yet (retried on the next call). """ - if not hasattr(self, "_video_camera"): - from isaaclab.sensors.camera import TiledCamera - - self._video_camera = None - - for sensor in self._scene.sensors.values(): - if isinstance(sensor, TiledCamera): - output = sensor.data.output - if "rgb" in output or "rgba" in output: - self._video_camera = sensor - break - - if self._video_camera is None and self._fallback_tiled_camera is not None: - if self._fallback_tiled_camera.is_initialized: - output = self._fallback_tiled_camera.data.output - if "rgb" in output or "rgba" in output: - self._video_camera = self._fallback_tiled_camera - - if self._video_camera is not None: - output = self._video_camera.data.output - self._video_rgb_key = "rgb" if "rgb" in output else "rgba" - n_total = int(output[self._video_rgb_key].shape[0]) - n_envs = n_total if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, n_total) - self._video_n_envs = n_envs - self._video_grid_size = math.ceil(math.sqrt(n_envs)) - n_slots = self._video_grid_size ** 2 - H = int(output[self._video_rgb_key].shape[1]) - W = int(output[self._video_rgb_key].shape[2]) - self._video_H = H - self._video_W = W - pad = n_slots - n_envs - self._video_pad = np.zeros((pad, H, W, 3), dtype=np.uint8) if pad > 0 else None + if hasattr(self, "_video_camera"): + return self._video_camera + + from isaaclab.sensors.camera import TiledCamera + + camera = None + + for sensor in self._scene.sensors.values(): + if isinstance(sensor, TiledCamera): + output = sensor.data.output + if "rgb" in output or "rgba" in output: + camera = sensor + break + + if camera is None and self._fallback_tiled_camera is not None: + if self._fallback_tiled_camera.is_initialized: + output = self._fallback_tiled_camera.data.output + if "rgb" in output or "rgba" in output: + camera = self._fallback_tiled_camera + + if camera is None: + return None + # cache only once a camera is confirmed available. + self._video_camera = camera + output = camera.data.output + self._video_rgb_key = "rgb" if "rgb" in output else "rgba" + n_total = int(output[self._video_rgb_key].shape[0]) + n_envs = n_total if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, n_total) + self._video_n_envs = n_envs + self._video_grid_size = math.ceil(math.sqrt(n_envs)) + n_slots = self._video_grid_size ** 2 + H = int(output[self._video_rgb_key].shape[1]) + W = int(output[self._video_rgb_key].shape[2]) + self._video_H = H + self._video_W = W + pad = n_slots - n_envs + self._video_pad = np.zeros((pad, H, W, 3), dtype=np.uint8) if pad > 0 else None return self._video_camera def _render_tiled_camera_rgb_array(self) -> np.ndarray: From 9714614d9cf6c26442d984ef9552339951f76802 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 20:45:53 -0700 Subject: [PATCH 09/11] fix: return blank frame instead of None from _render_newton_gl_rgb_array on error --- source/isaaclab/isaaclab/envs/utils/video_recorder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py index ab59707e384..837563b4c9b 100644 --- a/source/isaaclab/isaaclab/envs/utils/video_recorder.py +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -64,7 +64,7 @@ def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): self._fallback_tiled_camera = self._spawn_fallback_cameras(cfg, scene) def render_rgb_array(self) -> np.ndarray | None: - """Return an RGB frame for video recording, or ``None`` when no GL viewer and no Kit runtime.""" + """Return an RGB frame for video recording, or ``None`` when neither GL viewer nor Kit runtime is available.""" if self.cfg.video_mode == "perspective": if not self._gl_viewer_init_attempted: self._try_init_gl_viewer() @@ -141,8 +141,8 @@ def _try_init_gl_viewer(self) -> None: except Exception as exc: logger.warning("[VideoRecorder] Newton GL viewer unavailable: %s", exc) - def _render_newton_gl_rgb_array(self) -> np.ndarray | None: - """Return one RGB frame from the Newton GL viewer, or ``None`` on error.""" + def _render_newton_gl_rgb_array(self) -> np.ndarray: + """Return one RGB frame from the Newton GL viewer, or a blank frame on error.""" try: from isaaclab.sim import SimulationContext @@ -158,7 +158,7 @@ def _render_newton_gl_rgb_array(self) -> np.ndarray | None: return viewer.get_frame().numpy() except Exception as exc: logger.warning("[VideoRecorder] GL frame capture failed: %s", exc) - return None + return np.zeros((self.cfg.gl_viewer_height, self.cfg.gl_viewer_width, 3), dtype=np.uint8) def _render_kit_perspective_rgb_array(self) -> np.ndarray | None: """Return one RGB frame from the Kit /OmniverseKit_Persp camera via omni.replicator. From e1a3b640ae514a2d949c5d34b1ed8ef43d032a15 Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 20:51:14 -0700 Subject: [PATCH 10/11] fix: clarify --video help text in rlinf/play.py; mode selection not yet supported --- scripts/reinforcement_learning/rlinf/play.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/reinforcement_learning/rlinf/play.py b/scripts/reinforcement_learning/rlinf/play.py index dcade9a3237..5d5e9682c9f 100644 --- a/scripts/reinforcement_learning/rlinf/play.py +++ b/scripts/reinforcement_learning/rlinf/play.py @@ -56,7 +56,7 @@ const="perspective", default=None, metavar="MODE", - help="Enable video recording. MODE is 'perspective' (default) or 'tiled'.", + help="Enable video recording. MODE is 'perspective' (default) or 'tiled'. Note: mode selection is not yet supported for rlinf; any non-None value enables recording.", ) cli_args.add_rlinf_args(parser) args_cli = parser.parse_args() From 2fee9dc2392d54f8c4d908ebd69713bb006899df Mon Sep 17 00:00:00 2001 From: Brian Dilinila Date: Thu, 12 Mar 2026 21:00:49 -0700 Subject: [PATCH 11/11] Added unit tests --- .../envs/utils/test_video_recorder.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 source/isaaclab/isaaclab/envs/utils/test_video_recorder.py diff --git a/source/isaaclab/isaaclab/envs/utils/test_video_recorder.py b/source/isaaclab/isaaclab/envs/utils/test_video_recorder.py new file mode 100644 index 00000000000..398dc0ee045 --- /dev/null +++ b/source/isaaclab/isaaclab/envs/utils/test_video_recorder.py @@ -0,0 +1,116 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +"""Unit tests for VideoRecorder.""" +import importlib.util, pathlib, sys +from types import SimpleNamespace +from unittest.mock import MagicMock, patch +import numpy as np +import pytest + +_spec = importlib.util.spec_from_file_location("_vr", pathlib.Path(__file__).parent / "video_recorder.py") +_module = importlib.util.module_from_spec(_spec); _spec.loader.exec_module(_module); VideoRecorder = _module.VideoRecorder + +_BLANK_720p = np.zeros((720, 1280, 3), dtype=np.uint8) +_DEFAULT_CFG = dict( + render_mode="rgb_array", video_mode="perspective", fallback_camera_cfg=None, + video_num_tiles=-1, camera_eye=(7.5, 7.5, 7.5), camera_lookat=(0.0, 0.0, 0.0), + gl_viewer_width=1280, gl_viewer_height=720, +) + + +def _create_recorder(**kw): + """Return a VideoRecorder with __init__ bypassed and all deps mocked out.""" + recorder = object.__new__(VideoRecorder) + recorder.cfg = SimpleNamespace(**{**_DEFAULT_CFG, **kw}) + recorder._scene = MagicMock(); recorder._scene.sensors = {} + recorder._fallback_tiled_camera = None + recorder._gl_viewer = None + recorder._gl_viewer_init_attempted = False + return recorder + + +def test_init_perspective_mode_does_not_spawn_fallback(): + """In perspective mode, __init__ never spawns a TiledCamera fallback.""" + scene = MagicMock(); scene.sensors = {}; scene.num_envs = 1 + cfg = SimpleNamespace(**{**_DEFAULT_CFG, "fallback_camera_cfg": MagicMock()}) + with patch.dict(sys.modules, {"pyglet": MagicMock()}): + with patch.object(VideoRecorder, "_spawn_fallback_cameras") as mock_spawn: + VideoRecorder(cfg, scene) + mock_spawn.assert_not_called() + + +def test_init_tiled_mode_spawns_fallback_when_configured(): + """In tiled mode with a fallback_camera_cfg, __init__ calls _spawn_fallback_cameras.""" + scene = MagicMock(); scene.sensors = {}; scene.num_envs = 1 + cfg = SimpleNamespace(**{**_DEFAULT_CFG, "video_mode": "tiled", "fallback_camera_cfg": MagicMock()}) + with patch.dict(sys.modules, {"pyglet": MagicMock()}): + with patch.object(VideoRecorder, "_spawn_fallback_cameras", return_value=MagicMock()) as mock_spawn: + VideoRecorder(cfg, scene) + mock_spawn.assert_called_once() + + +def test_render_rgb_array_perspective_uses_gl_viewer_when_available(): + """Perspective mode returns a GL viewer frame when _gl_viewer is set.""" + recorder = _create_recorder() + recorder._gl_viewer = MagicMock(); recorder._gl_viewer_init_attempted = True + with patch.object(recorder, "_render_newton_gl_rgb_array", return_value=_BLANK_720p) as mock_gl: + result = recorder.render_rgb_array() + mock_gl.assert_called_once() + assert result.shape == (720, 1280, 3) + + +def test_render_rgb_array_perspective_falls_through_to_kit_when_no_gl_viewer(): + """Kit capture path is used when no GL viewer is available (Kit backend).""" + recorder = _create_recorder(); recorder._gl_viewer_init_attempted = True + with patch.object(recorder, "_render_kit_perspective_rgb_array", return_value=_BLANK_720p) as mock_kit: + recorder.render_rgb_array() + mock_kit.assert_called_once() + + +def test_render_rgb_array_tiled_raises_when_no_camera(): + """Tiled mode with no TiledCamera raises RuntimeError with a descriptive message.""" + recorder = _create_recorder(video_mode="tiled") + with patch.object(recorder, "_find_video_camera", return_value=None): + with pytest.raises(RuntimeError, match="tiled mode"): + recorder.render_rgb_array() + + +def test_gl_exception_returns_blank_ndarray_not_none(): + """GL renderer crash must return a blank ndarray, never None, so RecordVideo never sees None.""" + recorder = _create_recorder(); recorder._gl_viewer = MagicMock(); recorder._gl_viewer_init_attempted = True + with patch.dict(sys.modules, {"isaaclab.sim": MagicMock(SimulationContext=MagicMock(instance=MagicMock(side_effect=RuntimeError)))}): + frame = recorder._render_newton_gl_rgb_array() + assert isinstance(frame, np.ndarray) and frame.shape == (720, 1280, 3) + + +def test_find_video_camera_does_not_cache_none(): + """A None result is not cached, allowing retry on the next call.""" + recorder = _create_recorder(video_mode="tiled") + FakeTiledCamera = type("TiledCamera", (), {}) + with patch.dict(sys.modules, {"isaaclab": MagicMock(), "isaaclab.sensors": MagicMock(), "isaaclab.sensors.camera": MagicMock(TiledCamera=FakeTiledCamera)}): + result = recorder._find_video_camera() + assert result is None and not hasattr(recorder, "_video_camera") + + +def test_find_video_camera_caches_result_when_found(): + """A found camera is cached so the scene is not re-scanned on subsequent calls.""" + recorder = _create_recorder(video_mode="tiled") + FakeTiledCamera = type("TiledCamera", (), {}) + camera = MagicMock(); camera.__class__ = FakeTiledCamera + camera.is_initialized = True; camera.data.output = {"rgb": MagicMock(shape=(4, 64, 64, 3))} + recorder._scene.sensors = {"cam": camera} + with patch.dict(sys.modules, {"isaaclab": MagicMock(), "isaaclab.sensors": MagicMock(), "isaaclab.sensors.camera": MagicMock(TiledCamera=FakeTiledCamera)}): + result = recorder._find_video_camera() + assert result is camera and hasattr(recorder, "_video_camera") + + +def test_gl_viewer_init_attempted_only_once(): + """_try_init_gl_viewer is called at most once regardless of render call count.""" + recorder = _create_recorder(); recorder._gl_viewer_init_attempted = False + def _set_flag(): recorder._gl_viewer_init_attempted = True + with patch.object(recorder, "_try_init_gl_viewer", side_effect=_set_flag) as mock_init, \ + patch.object(recorder, "_render_kit_perspective_rgb_array", return_value=_BLANK_720p): + for _ in range(3): recorder.render_rgb_array() + mock_init.assert_called_once()