diff --git a/scripts/benchmarks/benchmark_non_rl.py b/scripts/benchmarks/benchmark_non_rl.py index aee3be21a40..dfda247a0db 100644 --- a/scripts/benchmarks/benchmark_non_rl.py +++ b/scripts/benchmarks/benchmark_non_rl.py @@ -16,7 +16,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -140,6 +147,10 @@ def main( task_startup_time_begin = time.perf_counter_ns() + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) # wrap for video recording diff --git a/scripts/benchmarks/benchmark_rlgames.py b/scripts/benchmarks/benchmark_rlgames.py index f6c000a8fce..0f2c3aa8246 100644 --- a/scripts/benchmarks/benchmark_rlgames.py +++ b/scripts/benchmarks/benchmark_rlgames.py @@ -16,7 +16,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -205,6 +212,10 @@ def main( task_startup_time_begin = time.perf_counter_ns() + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) # wrap for video recording diff --git a/scripts/benchmarks/benchmark_rsl_rl.py b/scripts/benchmarks/benchmark_rsl_rl.py index e0c7361231b..cbb5de339fe 100644 --- a/scripts/benchmarks/benchmark_rsl_rl.py +++ b/scripts/benchmarks/benchmark_rsl_rl.py @@ -19,7 +19,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=4096, help="Number of environments to simulate.") @@ -192,6 +199,10 @@ def main( task_startup_time_begin = time.perf_counter_ns() + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) # wrap for video recording diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py index eb2390af90d..5762d45f801 100644 --- a/scripts/reinforcement_learning/rl_games/play.py +++ b/scripts/reinforcement_learning/rl_games/play.py @@ -32,7 +32,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -114,6 +121,10 @@ def main(): obs_groups = agent_cfg["params"]["env"].get("obs_groups") concate_obs_groups = agent_cfg["params"]["env"].get("concate_obs_groups", True) + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index 5ad13b401bb..cfc260941db 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -36,7 +36,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -158,6 +165,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = os.path.join(log_root_path, log_dir) + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/rlinf/play.py b/scripts/reinforcement_learning/rlinf/play.py index f63e02d3e1f..5d5e9682c9f 100644 --- a/scripts/reinforcement_learning/rlinf/play.py +++ b/scripts/reinforcement_learning/rlinf/play.py @@ -50,7 +50,14 @@ parser.add_argument( "--num_episodes", type=int, default=None, help="Number of evaluation episodes (overrides config if set)." ) -parser.add_argument("--video", action="store_true", default=False, help="Enable video recording.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Enable video recording. MODE is 'perspective' (default) or 'tiled'. Note: mode selection is not yet supported for rlinf; any non-None value enables recording.", +) cli_args.add_rlinf_args(parser) args_cli = parser.parse_args() diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index f790f627a22..3b87e88e170 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -40,7 +40,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -109,6 +116,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 7ca2d3156da..fac9142dd6a 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -45,7 +45,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -147,6 +154,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the log directory for the environment (works for all environment types) env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/sb3/play.py b/scripts/reinforcement_learning/sb3/play.py index a6f222d346c..73d56f5ccd2 100644 --- a/scripts/reinforcement_learning/sb3/play.py +++ b/scripts/reinforcement_learning/sb3/play.py @@ -30,7 +30,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from Stable-Baselines3.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -107,6 +114,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/sb3/train.py b/scripts/reinforcement_learning/sb3/train.py index bd79599d1fd..98148db3708 100644 --- a/scripts/reinforcement_learning/sb3/train.py +++ b/scripts/reinforcement_learning/sb3/train.py @@ -38,7 +38,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with Stable-Baselines3.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -137,6 +144,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 0349d405967..da7f36dd5a7 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -35,7 +35,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from skrl.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during playing. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -150,6 +157,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 9eab1712df9..5ab1255c6cc 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -40,7 +40,14 @@ # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with skrl.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during training. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") @@ -173,6 +180,10 @@ def main(): # set the log directory for the environment env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/scripts/sim2sim_transfer/rsl_rl_transfer.py b/scripts/sim2sim_transfer/rsl_rl_transfer.py index 4de3c42b7a8..78021ebf518 100644 --- a/scripts/sim2sim_transfer/rsl_rl_transfer.py +++ b/scripts/sim2sim_transfer/rsl_rl_transfer.py @@ -19,7 +19,14 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Play an RL agent with RSL-RL with policy transfer.") -parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") +parser.add_argument( + "--video", + nargs="?", + const="perspective", + default=None, + metavar="MODE", + help="Record videos during transfer. MODE is 'perspective' (default, wide-angle isometric view) or 'tiled' (camera-sensor tile-grid).", +) parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." @@ -171,6 +178,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the log directory for the environment (works for all environment types) env_cfg.log_dir = log_dir + # Forward the video mode ("tiled" / "perspective") to the recorder config before env creation. + if args_cli.video and hasattr(env_cfg, "video_recorder") and env_cfg.video_recorder is not None: + env_cfg.video_recorder.video_mode = args_cli.video + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index eb0a359e4f5..33541c3cd44 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -35,6 +35,8 @@ from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType from .direct_marl_env_cfg import DirectMARLEnvCfg from .ui import ViewportCameraController +from .utils.video_recorder import VideoRecorder +from .utils.video_recorder_cfg import VideoRecorderCfg from .utils.spaces import sample_space, spec_to_gym_space # import logger @@ -168,6 +170,18 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): if "prestartup" in self.event_manager.available_modes: self.event_manager.apply(mode="prestartup") + # Instantiate the video recorder before sim.reset() so that any fallback TiledCamera + # (used for state-based envs without an observation camera) is spawned into the USD + # stage and registered for the PHYSICS_READY callback before physics initialises. + # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. + if self.cfg.video_recorder is not None: + self.cfg.video_recorder.render_mode = render_mode + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # play the simulator to activate physics handles # note: this activates the physics simulation view that exposes TensorAPIs # note: when started in extension mode, first call sim.reset_async() and then initialize the managers @@ -521,33 +535,9 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # check that if any render could have happened - if not self.sim.has_gui and not self.sim.has_offscreen_render: - raise RuntimeError( - f"Cannot render '{self.render_mode}' - no GUI and offscreen rendering not enabled." - " If running headless, make sure --enable_cameras is set." - ) - # create the annotator if it does not exist - if not hasattr(self, "_rgb_annotator"): - import omni.replicator.core as rep - - # create render product - self._render_product = rep.create.render_product( - self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution - ) - # create rgb annotator -- used to read data from the render product - self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") - self._rgb_annotator.attach([self._render_product]) - # obtain the rgb data - rgb_data = self._rgb_annotator.get_data() - # convert to numpy array - rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # return the rgb data - # note: initially the renderer is warming up and returns empty data - if rgb_data.size == 0: - return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) - else: - return rgb_data[:, :, :3] + if self.video_recorder is None: + return None + return self.video_recorder.render_rgb_array() else: raise NotImplementedError( f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py index b22a6169d7a..d697c7fad93 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py @@ -17,6 +17,7 @@ from isaaclab.utils.noise import NoiseModelCfg from .common import AgentID, SpaceType, ViewerCfg +from .utils.video_recorder_cfg import VideoRecorderCfg @configclass @@ -234,3 +235,11 @@ class DirectMARLEnvCfg: log_dir: str | None = None """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" + + video_recorder: VideoRecorderCfg = VideoRecorderCfg() + """Configuration for video recording when ``render_mode="rgb_array"`` (i.e. ``--video``). + + See :class:`~isaaclab.envs.VideoRecorderCfg` for available options including + ``video_mode`` (``"perspective"`` or ``"tiled"``), ``camera_eye``/``camera_lookat``, + and ``video_num_tiles``. Set to ``None`` to disable the recorder entirely. + """ diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index b362ac72bc2..58456e72fb2 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -32,6 +32,8 @@ from .common import VecEnvObs, VecEnvStepReturn from .direct_rl_env_cfg import DirectRLEnvCfg from .ui import ViewportCameraController +from .utils.video_recorder import VideoRecorder +from .utils.video_recorder_cfg import VideoRecorderCfg from .utils.spaces import sample_space, spec_to_gym_space if has_kit(): @@ -173,6 +175,18 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): if "prestartup" in self.event_manager.available_modes: self.event_manager.apply(mode="prestartup") + # Instantiate the video recorder before sim.reset() so that any fallback TiledCamera + # (used for state-based envs without an observation camera) is spawned into the USD + # stage and registered for the PHYSICS_READY callback before physics initialises. + # Forward render_mode so VideoRecorder only spawns fallback cameras when --video is active. + if self.cfg.video_recorder is not None: + self.cfg.video_recorder.render_mode = render_mode + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # play the simulator to activate physics handles # note: this activates the physics simulation view that exposes TensorAPIs # note: when started in extension mode, first call sim.reset_async() and then initialize the managers @@ -489,33 +503,9 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # check that if any render could have happened - if not self.sim.has_gui and not self.sim.has_offscreen_render: - raise RuntimeError( - f"Cannot render '{self.render_mode}' - no GUI and offscreen rendering not enabled." - " If running headless, make sure --enable_cameras is set." - ) - # create the annotator if it does not exist - if not hasattr(self, "_rgb_annotator"): - import omni.replicator.core as rep - - # create render product - self._render_product = rep.create.render_product( - self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution - ) - # create rgb annotator -- used to read data from the render product - self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") - self._rgb_annotator.attach([self._render_product]) - # obtain the rgb data - rgb_data = self._rgb_annotator.get_data() - # convert to numpy array - rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # return the rgb data - # note: initially the renerer is warming up and returns empty data - if rgb_data.size == 0: - return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) - else: - return rgb_data[:, :, :3] + if self.video_recorder is None: + return None + return self.video_recorder.render_rgb_array() else: raise NotImplementedError( f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py index fd40b3104c2..acc597dd3dd 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py @@ -16,6 +16,7 @@ from isaaclab.utils.noise import NoiseModelCfg from .common import SpaceType, ViewerCfg +from .utils.video_recorder_cfg import VideoRecorderCfg @configclass @@ -254,3 +255,11 @@ class DirectRLEnvCfg: log_dir: str | None = None """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" + + video_recorder: VideoRecorderCfg = VideoRecorderCfg() + """Configuration for video recording when ``render_mode="rgb_array"`` (i.e. ``--video``). + + See :class:`~isaaclab.envs.VideoRecorderCfg` for available options including + ``video_mode`` (``"perspective"`` or ``"tiled"``), ``camera_eye``/``camera_lookat``, + and ``video_num_tiles``. Set to ``None`` to disable the recorder entirely. + """ diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py index 996e88216e1..a5397f22314 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env.py @@ -26,6 +26,7 @@ from .manager_based_env_cfg import ManagerBasedEnvCfg from .ui import ViewportCameraController from .utils.io_descriptors import export_articulations_data, export_scene_data +from .utils.video_recorder import VideoRecorder # import logger logger = logging.getLogger(__name__) @@ -182,6 +183,16 @@ def _init_sim(self): if "prestartup" in self.event_manager.available_modes: self.event_manager.apply(mode="prestartup") + # Instantiate the video recorder before sim.reset() so that any fallback TiledCamera + # (used for state-based envs without an observation camera) is spawned into the USD + # stage and registered for the PHYSICS_READY callback before physics initialises. + if self.cfg.video_recorder is not None: + self.video_recorder: VideoRecorder = self.cfg.video_recorder.class_type( + self.cfg.video_recorder, self.scene + ) + else: + self.video_recorder = None + # play the simulator to activate physics handles # note: this activates the physics simulation view that exposes TensorAPIs # note: when started in extension mode, first call sim.reset_async() and then initialize the managers diff --git a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py index 24a88d5e72c..2df177f2238 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py @@ -26,6 +26,7 @@ from isaaclab.utils import configclass from .common import ViewerCfg +from .utils.video_recorder_cfg import VideoRecorderCfg @configclass @@ -163,3 +164,11 @@ class ManagerBasedEnvCfg: log_dir: str | None = None """Directory for logging experiment artifacts. Defaults to None, in which case no specific log directory is set.""" + + video_recorder: VideoRecorderCfg = VideoRecorderCfg() + """Configuration for video recording when ``render_mode="rgb_array"`` (i.e. ``--video``). + + See :class:`~isaaclab.envs.VideoRecorderCfg` for available options including + ``video_mode`` (``"perspective"`` or ``"tiled"``), ``camera_eye``/``camera_lookat``, + and ``video_num_tiles``. Set to ``None`` to disable the recorder entirely. + """ diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index d08b7e3be3a..132fa4d97fb 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -20,6 +20,7 @@ from .common import VecEnvStepReturn from .manager_based_env import ManagerBasedEnv from .manager_based_rl_env_cfg import ManagerBasedRLEnvCfg +from .utils.video_recorder import VideoRecorder class ManagerBasedRLEnv(ManagerBasedEnv, gym.Env): @@ -76,6 +77,11 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** # initialize the episode length buffer BEFORE loading the managers to use it in mdp functions. self.episode_length_buf = torch.zeros(cfg.scene.num_envs, device=cfg.sim.device, dtype=torch.long) + # Forward render_mode to VideoRecorderCfg before super().__init__() creates VideoRecorder, + # so fallback cameras are only spawned when --video is active (render_mode="rgb_array"). + if cfg.video_recorder is not None: + cfg.video_recorder.render_mode = render_mode + # initialize the base class to setup the scene. super().__init__(cfg=cfg) # store the render mode @@ -270,35 +276,9 @@ def render(self, recompute: bool = False) -> np.ndarray | None: if self.render_mode == "human" or self.render_mode is None: return None elif self.render_mode == "rgb_array": - # check that if any render could have happened - # Check for GUI, offscreen rendering, or visualizers - has_visualizers = bool(self.sim.get_setting("/isaaclab/visualizer")) - if not (self.sim.has_gui or self.sim.has_offscreen_render or has_visualizers): - raise RuntimeError( - f"Cannot render '{self.render_mode}' - no GUI and offscreen rendering not enabled." - " If running headless, make sure --enable_cameras is set." - ) - # create the annotator if it does not exist - if not hasattr(self, "_rgb_annotator"): - import omni.replicator.core as rep - - # create render product - self._render_product = rep.create.render_product( - self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution - ) - # create rgb annotator -- used to read data from the render product - self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") - self._rgb_annotator.attach([self._render_product]) - # obtain the rgb data - rgb_data = self._rgb_annotator.get_data() - # convert to numpy array - rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) - # return the rgb data - # note: initially the renerer is warming up and returns empty data - if rgb_data.size == 0: - return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) - else: - return rgb_data[:, :, :3] + if self.video_recorder is None: + return None + return self.video_recorder.render_rgb_array() else: raise NotImplementedError( f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." diff --git a/source/isaaclab/isaaclab/envs/utils/test_video_recorder.py b/source/isaaclab/isaaclab/envs/utils/test_video_recorder.py new file mode 100644 index 00000000000..398dc0ee045 --- /dev/null +++ b/source/isaaclab/isaaclab/envs/utils/test_video_recorder.py @@ -0,0 +1,116 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +"""Unit tests for VideoRecorder.""" +import importlib.util, pathlib, sys +from types import SimpleNamespace +from unittest.mock import MagicMock, patch +import numpy as np +import pytest + +_spec = importlib.util.spec_from_file_location("_vr", pathlib.Path(__file__).parent / "video_recorder.py") +_module = importlib.util.module_from_spec(_spec); _spec.loader.exec_module(_module); VideoRecorder = _module.VideoRecorder + +_BLANK_720p = np.zeros((720, 1280, 3), dtype=np.uint8) +_DEFAULT_CFG = dict( + render_mode="rgb_array", video_mode="perspective", fallback_camera_cfg=None, + video_num_tiles=-1, camera_eye=(7.5, 7.5, 7.5), camera_lookat=(0.0, 0.0, 0.0), + gl_viewer_width=1280, gl_viewer_height=720, +) + + +def _create_recorder(**kw): + """Return a VideoRecorder with __init__ bypassed and all deps mocked out.""" + recorder = object.__new__(VideoRecorder) + recorder.cfg = SimpleNamespace(**{**_DEFAULT_CFG, **kw}) + recorder._scene = MagicMock(); recorder._scene.sensors = {} + recorder._fallback_tiled_camera = None + recorder._gl_viewer = None + recorder._gl_viewer_init_attempted = False + return recorder + + +def test_init_perspective_mode_does_not_spawn_fallback(): + """In perspective mode, __init__ never spawns a TiledCamera fallback.""" + scene = MagicMock(); scene.sensors = {}; scene.num_envs = 1 + cfg = SimpleNamespace(**{**_DEFAULT_CFG, "fallback_camera_cfg": MagicMock()}) + with patch.dict(sys.modules, {"pyglet": MagicMock()}): + with patch.object(VideoRecorder, "_spawn_fallback_cameras") as mock_spawn: + VideoRecorder(cfg, scene) + mock_spawn.assert_not_called() + + +def test_init_tiled_mode_spawns_fallback_when_configured(): + """In tiled mode with a fallback_camera_cfg, __init__ calls _spawn_fallback_cameras.""" + scene = MagicMock(); scene.sensors = {}; scene.num_envs = 1 + cfg = SimpleNamespace(**{**_DEFAULT_CFG, "video_mode": "tiled", "fallback_camera_cfg": MagicMock()}) + with patch.dict(sys.modules, {"pyglet": MagicMock()}): + with patch.object(VideoRecorder, "_spawn_fallback_cameras", return_value=MagicMock()) as mock_spawn: + VideoRecorder(cfg, scene) + mock_spawn.assert_called_once() + + +def test_render_rgb_array_perspective_uses_gl_viewer_when_available(): + """Perspective mode returns a GL viewer frame when _gl_viewer is set.""" + recorder = _create_recorder() + recorder._gl_viewer = MagicMock(); recorder._gl_viewer_init_attempted = True + with patch.object(recorder, "_render_newton_gl_rgb_array", return_value=_BLANK_720p) as mock_gl: + result = recorder.render_rgb_array() + mock_gl.assert_called_once() + assert result.shape == (720, 1280, 3) + + +def test_render_rgb_array_perspective_falls_through_to_kit_when_no_gl_viewer(): + """Kit capture path is used when no GL viewer is available (Kit backend).""" + recorder = _create_recorder(); recorder._gl_viewer_init_attempted = True + with patch.object(recorder, "_render_kit_perspective_rgb_array", return_value=_BLANK_720p) as mock_kit: + recorder.render_rgb_array() + mock_kit.assert_called_once() + + +def test_render_rgb_array_tiled_raises_when_no_camera(): + """Tiled mode with no TiledCamera raises RuntimeError with a descriptive message.""" + recorder = _create_recorder(video_mode="tiled") + with patch.object(recorder, "_find_video_camera", return_value=None): + with pytest.raises(RuntimeError, match="tiled mode"): + recorder.render_rgb_array() + + +def test_gl_exception_returns_blank_ndarray_not_none(): + """GL renderer crash must return a blank ndarray, never None, so RecordVideo never sees None.""" + recorder = _create_recorder(); recorder._gl_viewer = MagicMock(); recorder._gl_viewer_init_attempted = True + with patch.dict(sys.modules, {"isaaclab.sim": MagicMock(SimulationContext=MagicMock(instance=MagicMock(side_effect=RuntimeError)))}): + frame = recorder._render_newton_gl_rgb_array() + assert isinstance(frame, np.ndarray) and frame.shape == (720, 1280, 3) + + +def test_find_video_camera_does_not_cache_none(): + """A None result is not cached, allowing retry on the next call.""" + recorder = _create_recorder(video_mode="tiled") + FakeTiledCamera = type("TiledCamera", (), {}) + with patch.dict(sys.modules, {"isaaclab": MagicMock(), "isaaclab.sensors": MagicMock(), "isaaclab.sensors.camera": MagicMock(TiledCamera=FakeTiledCamera)}): + result = recorder._find_video_camera() + assert result is None and not hasattr(recorder, "_video_camera") + + +def test_find_video_camera_caches_result_when_found(): + """A found camera is cached so the scene is not re-scanned on subsequent calls.""" + recorder = _create_recorder(video_mode="tiled") + FakeTiledCamera = type("TiledCamera", (), {}) + camera = MagicMock(); camera.__class__ = FakeTiledCamera + camera.is_initialized = True; camera.data.output = {"rgb": MagicMock(shape=(4, 64, 64, 3))} + recorder._scene.sensors = {"cam": camera} + with patch.dict(sys.modules, {"isaaclab": MagicMock(), "isaaclab.sensors": MagicMock(), "isaaclab.sensors.camera": MagicMock(TiledCamera=FakeTiledCamera)}): + result = recorder._find_video_camera() + assert result is camera and hasattr(recorder, "_video_camera") + + +def test_gl_viewer_init_attempted_only_once(): + """_try_init_gl_viewer is called at most once regardless of render call count.""" + recorder = _create_recorder(); recorder._gl_viewer_init_attempted = False + def _set_flag(): recorder._gl_viewer_init_attempted = True + with patch.object(recorder, "_try_init_gl_viewer", side_effect=_set_flag) as mock_init, \ + patch.object(recorder, "_render_kit_perspective_rgb_array", return_value=_BLANK_720p): + for _ in range(3): recorder.render_rgb_array() + mock_init.assert_called_once() diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder.py b/source/isaaclab/isaaclab/envs/utils/video_recorder.py new file mode 100644 index 00000000000..837563b4c9b --- /dev/null +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder.py @@ -0,0 +1,286 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Video recorder implementation. + +* **Perspective view** (``video_mode="perspective"``) — captures a single wide-angle + view of the scene using the Newton GL viewer (Newton backends) or the Kit viewport + camera ``/OmniverseKit_Persp`` via ``omni.replicator.core`` (Kit backends). +* **Camera sensor / tiled** (``video_mode="tiled"``) — reads pixel data from a + :class:`~isaaclab.sensors.camera.TiledCamera` sensor, producing a grid of per-agent + views. + +See :mod:`video_recorder_cfg` for configuration and full mode descriptions. +""" + +from __future__ import annotations + +import logging +import math +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from isaaclab.scene import InteractiveScene + from .video_recorder_cfg import VideoRecorderCfg + +logger = logging.getLogger(__name__) + + +class VideoRecorder: + """Records video frames from the scene's active renderer. + + See :class:`~isaaclab.envs.utils.video_recorder_cfg.VideoRecorderCfg` for the full + description of ``video_mode`` and the fallback priority chain. + + Args: + cfg: Recorder configuration. + scene: The interactive scene that owns the sensors. + """ + + def __init__(self, cfg: VideoRecorderCfg, scene: InteractiveScene): + self.cfg = cfg + self._scene = scene + self._fallback_tiled_camera = None + self._gl_viewer = None + self._gl_viewer_init_attempted = False + + if cfg.render_mode == "rgb_array": + # enable EGL headless rendering for pyglet before any pyglet.window import. + try: + import pyglet + + if not pyglet.options.get("headless", False): + pyglet.options["headless"] = True + except ImportError: + pass + + # pre-spawn fallback TiledCamera; must exist in USD stage before physics initialises. + # whether it is actually used is decided lazily in _find_video_camera(). + if cfg.fallback_camera_cfg is not None and cfg.video_mode == "tiled": + self._fallback_tiled_camera = self._spawn_fallback_cameras(cfg, scene) + + def render_rgb_array(self) -> np.ndarray | None: + """Return an RGB frame for video recording, or ``None`` when neither GL viewer nor Kit runtime is available.""" + if self.cfg.video_mode == "perspective": + if not self._gl_viewer_init_attempted: + self._try_init_gl_viewer() + if self._gl_viewer is not None: + return self._render_newton_gl_rgb_array() + return self._render_kit_perspective_rgb_array() + + # tiled mode: use observation TiledCamera if available, then fallback. + video_camera = self._find_video_camera() + if video_camera is None: + raise RuntimeError( + "Cannot record video in tiled mode: no TiledCamera sensor with RGB output was found" + " in the scene. Add a TiledCamera sensor or switch to perspective mode (--video=perspective)." + ) + return self._render_tiled_camera_rgb_array() + + def _try_init_gl_viewer(self) -> None: + """Lazy-initialise the Newton GL viewer on the first render call. + + Called after ``sim.reset()`` so the Newton model is fully built. + Leaves ``_gl_viewer`` as ``None`` on Kit backends; ``render_rgb_array`` then + calls ``_render_kit_perspective_rgb_array`` instead. + """ + self._gl_viewer_init_attempted = True + try: + from isaaclab.sim import SimulationContext + + sdp = SimulationContext.instance().initialize_scene_data_provider() + model = sdp.get_newton_model() + if model is None: + return + + import pyglet + + pyglet.options["headless"] = True + from newton.viewer import ViewerGL + + max_worlds = ( + None if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, model.world_count) + ) + + viewer = ViewerGL(width=self.cfg.gl_viewer_width, height=self.cfg.gl_viewer_height, headless=True) + viewer.set_model(model, max_worlds=max_worlds) + viewer.set_world_offsets((0.0, 0.0, 0.0)) # world positions already in body_q + viewer.up_axis = 2 # Z-up + self._gl_viewer = viewer + + # place camera to match Kit /OmniverseKit_Persp (same eye/lookat as ViewerCfg). + try: + import warp as wp + + ex, ey, ez = self.cfg.camera_eye + lx, ly, lz = self.cfg.camera_lookat + dx, dy, dz = lx - ex, ly - ey, lz - ez + length = math.sqrt(dx**2 + dy**2 + dz**2) + dx, dy, dz = dx / length, dy / length, dz / length + pitch = math.degrees(math.asin(max(-1.0, min(1.0, dz)))) + yaw = math.degrees(math.atan2(dy, dx)) + + # Kit uses horizontal FOV (60°); pyglet/Newton GL uses vertical FOV. + aspect = self.cfg.gl_viewer_width / self.cfg.gl_viewer_height + v_fov_deg = math.degrees(2.0 * math.atan(math.tan(math.radians(60.0) / 2.0) / aspect)) + viewer.camera.fov = v_fov_deg # ≈ 36° for 1280×720 + viewer.set_camera(pos=wp.vec3(ex, ey, ez), pitch=pitch, yaw=yaw) + except Exception as exc: + logger.warning("[VideoRecorder] GL viewer camera setup failed: %s", exc) + + logger.info( + "[VideoRecorder] Newton GL viewer ready (%dx%d, max_worlds=%s).", + self.cfg.gl_viewer_width, + self.cfg.gl_viewer_height, + max_worlds, + ) + except Exception as exc: + logger.warning("[VideoRecorder] Newton GL viewer unavailable: %s", exc) + + def _render_newton_gl_rgb_array(self) -> np.ndarray: + """Return one RGB frame from the Newton GL viewer, or a blank frame on error.""" + try: + from isaaclab.sim import SimulationContext + + sim = SimulationContext.instance() + sdp = sim.initialize_scene_data_provider() + state = sdp.get_newton_state() + dt = sim.get_physics_dt() + + viewer = self._gl_viewer + viewer.begin_frame(dt) + viewer.log_state(state) + viewer.end_frame() + return viewer.get_frame().numpy() + except Exception as exc: + logger.warning("[VideoRecorder] GL frame capture failed: %s", exc) + return np.zeros((self.cfg.gl_viewer_height, self.cfg.gl_viewer_width, 3), dtype=np.uint8) + + def _render_kit_perspective_rgb_array(self) -> np.ndarray | None: + """Return one RGB frame from the Kit /OmniverseKit_Persp camera via omni.replicator. + + Returns ``None`` during the initial warmup frames when the renderer returns empty data. + """ + try: + import omni.replicator.core as rep + + from isaaclab.sim import SimulationContext + + # /OmniverseKit_Persp is not an RTX sensor; always force a render pass for fresh data. + SimulationContext.instance().render() + + if not hasattr(self, "_rgb_annotator"): + self._render_product = rep.create.render_product( + "/OmniverseKit_Persp", (1280, 720) + ) + self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._rgb_annotator.attach([self._render_product]) + + rgb_data = self._rgb_annotator.get_data() + rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + if rgb_data.size == 0: + # renderer is warming up; return blank frame + return np.zeros((720, 1280, 3), dtype=np.uint8) + return rgb_data[:, :, :3] + except Exception as exc: + logger.warning("[VideoRecorder] Kit perspective capture failed: %s", exc) + return np.zeros((720, 1280, 3), dtype=np.uint8) + + @staticmethod + def _spawn_fallback_cameras(cfg: VideoRecorderCfg, scene: InteractiveScene): + """Spawn one video camera prim per environment and return a single TiledCamera. + + Must be called **before** ``sim.reset()`` so the prims exist when the TiledCamera + registers for its ``PHYSICS_READY`` callback. + """ + import torch + + from isaaclab.sensors.camera import TiledCamera + from isaaclab.utils.math import convert_camera_frame_orientation_convention + + camera_cfg = cfg.fallback_camera_cfg + n_total_envs = scene.num_envs + + rot = torch.tensor(camera_cfg.offset.rot, dtype=torch.float32, device="cpu").unsqueeze(0) + rot_offset = convert_camera_frame_orientation_convention( + rot, origin=camera_cfg.offset.convention, target="opengl" + ).squeeze(0).cpu().numpy() + + spawn_cfg = camera_cfg.spawn + if spawn_cfg.vertical_aperture is None: + spawn_cfg = spawn_cfg.replace( + vertical_aperture=spawn_cfg.horizontal_aperture * camera_cfg.height / camera_cfg.width + ) + + for i in range(n_total_envs): + spawn_cfg.func(f"/World/envs/env_{i}/VideoCamera", spawn_cfg, + translation=camera_cfg.offset.pos, orientation=rot_offset) + + tiled_cfg = camera_cfg.replace(prim_path="/World/envs/env_.*/VideoCamera", spawn=None) + return TiledCamera(tiled_cfg) + + def _find_video_camera(self): + """Locate and cache the TiledCamera to use for video recording. + + Priority: (1) observation TiledCamera already in the scene, (2) fallback camera. + Returns ``None`` if neither is available yet (retried on the next call). + """ + if hasattr(self, "_video_camera"): + return self._video_camera + + from isaaclab.sensors.camera import TiledCamera + + camera = None + + for sensor in self._scene.sensors.values(): + if isinstance(sensor, TiledCamera): + output = sensor.data.output + if "rgb" in output or "rgba" in output: + camera = sensor + break + + if camera is None and self._fallback_tiled_camera is not None: + if self._fallback_tiled_camera.is_initialized: + output = self._fallback_tiled_camera.data.output + if "rgb" in output or "rgba" in output: + camera = self._fallback_tiled_camera + + if camera is None: + return None + + # cache only once a camera is confirmed available. + self._video_camera = camera + output = camera.data.output + self._video_rgb_key = "rgb" if "rgb" in output else "rgba" + n_total = int(output[self._video_rgb_key].shape[0]) + n_envs = n_total if self.cfg.video_num_tiles < 0 else min(self.cfg.video_num_tiles, n_total) + self._video_n_envs = n_envs + self._video_grid_size = math.ceil(math.sqrt(n_envs)) + n_slots = self._video_grid_size ** 2 + H = int(output[self._video_rgb_key].shape[1]) + W = int(output[self._video_rgb_key].shape[2]) + self._video_H = H + self._video_W = W + pad = n_slots - n_envs + self._video_pad = np.zeros((pad, H, W, 3), dtype=np.uint8) if pad > 0 else None + return self._video_camera + + def _render_tiled_camera_rgb_array(self) -> np.ndarray: + """Return a square tile-grid ``(G*H, G*W, 3)`` from the cached TiledCamera.""" + if self._video_camera is self._fallback_tiled_camera: + self._fallback_tiled_camera.update(dt=0.0, force_recompute=True) + + rgb_all = self._video_camera.data.output[self._video_rgb_key] + if self._video_rgb_key == "rgba": + rgb_all = rgb_all[..., :3] + + tiles = rgb_all[: self._video_n_envs].contiguous().cpu().numpy() + if self._video_pad is not None: + tiles = np.concatenate([tiles, self._video_pad], axis=0) + + g, H, W = self._video_grid_size, self._video_H, self._video_W + return tiles.reshape(g, g, H, W, 3).transpose(0, 2, 1, 3, 4).reshape(g * H, g * W, 3) diff --git a/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py new file mode 100644 index 00000000000..501df00a5a8 --- /dev/null +++ b/source/isaaclab/isaaclab/envs/utils/video_recorder_cfg.py @@ -0,0 +1,112 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`. + +Two recording modes are supported (set via :attr:`VideoRecorderCfg.video_mode`): + +* **Perspective view** (``"perspective"``, default) - a single wide-angle viewport + camera. Uses the Newton GL viewer on Newton backends; falls back to the Kit + ``/OmniverseKit_Persp`` camera via ``omni.replicator.core`` on Kit backends. +* **Camera sensor / tiled** (``"tiled"``) - reads pixel data from a + :class:`~isaaclab.sensors.camera.TiledCamera` sensor and arranges the per-agent + frames into a square grid. +""" + +from __future__ import annotations + +import isaaclab.sim as sim_utils +from isaaclab.sensors.camera import TiledCameraCfg +from isaaclab.utils import configclass + +from .video_recorder import VideoRecorder + + +DEFAULT_TILED_RECORDING_CAMERA_CFG = TiledCameraCfg( + prim_path="/World/envs/env_0/VideoCamera", + update_period=0.0, + height=480, + width=640, + data_types=["rgb"], + spawn=sim_utils.PinholeCameraCfg( + focal_length=24.0, + focus_distance=400.0, + horizontal_aperture=20.955, + clipping_range=(0.1, 1.0e5), + ), + offset=TiledCameraCfg.OffsetCfg(pos=(-7.0, 0.0, 3.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), +) +"""Default :class:`~isaaclab.sensors.camera.TiledCameraCfg` for tiled state-based video recording. + +Places a pinhole camera at ``(-7, 0, 3)`` m relative to env_0's origin, angled ~12° downward. +Only spawned when ``--video=tiled`` is active and no observation TiledCamera exists in the scene. + +Override pose in ``__post_init__`` for tasks with different scene scales:: + + self.video_recorder.fallback_camera_cfg = self.video_recorder.fallback_camera_cfg.replace( + offset=TiledCameraCfg.OffsetCfg(pos=(-3.0, 0.0, 2.0), rot=(0.0, 0.1045, 0.0, 0.9945), convention="world"), + ) +""" + + +@configclass +class VideoRecorderCfg: + """Configuration for :class:`~isaaclab.envs.utils.video_recorder.VideoRecorder`.""" + + class_type: type = VideoRecorder + """Recorder class to instantiate; must accept ``(cfg, scene)``.""" + + render_mode: str | None = None + """Render mode forwarded from the environment constructor (``"rgb_array"`` when ``--video`` is active). + + Set automatically by the environment base classes; do not set manually. + """ + + video_mode: str = "perspective" + """Recording mode: ``"perspective"`` (default) or ``"tiled"``. + + * ``"perspective"`` - single wide-angle view of the scene. Newton backends use the Newton GL + viewer; Kit backends use ``/OmniverseKit_Persp`` via ``omni.replicator.core``. TiledCamera + is bypassed even when present. + * ``"tiled"`` - square tile-grid from a :class:`~isaaclab.sensors.camera.TiledCamera`. + Reuses the observation camera on vision-based tasks; spawns ``fallback_camera_cfg`` for + state-based tasks. Raises ``RuntimeError`` if no TiledCamera is available. + + Set via CLI: ``--video=perspective`` / ``--video=tiled``. + """ + + video_num_tiles: int = -1 + """Max environments to include per frame (``-1`` = all). + + Tiles are arranged into a ``ceil(sqrt(N)) × ceil(sqrt(N))`` grid with black padding. + CLI example: ``env.video_recorder.video_num_tiles=9`` + """ + + fallback_camera_cfg: object = DEFAULT_TILED_RECORDING_CAMERA_CFG + """Side-view :class:`~isaaclab.sensors.camera.TiledCameraCfg` for tiled state-based recording. + + Spawned when ``video_mode="tiled"`` and no observation TiledCamera exists in the scene. + Set to ``None`` to disable. + """ + + camera_eye: tuple[float, float, float] = (7.5, 7.5, 7.5) + """Newton GL perspective camera position in world space (metres). + + Matches :attr:`~isaaclab.envs.common.ViewerCfg.eye` so the Newton GL video aligns with + the Kit ``/OmniverseKit_Persp`` viewport. Only used by Newton backends in perspective mode. + """ + + camera_lookat: tuple[float, float, float] = (0.0, 0.0, 0.0) + """Newton GL perspective camera look-at point in world space (metres). + + Matches :attr:`~isaaclab.envs.common.ViewerCfg.lookat`. Only used by Newton backends in perspective mode. + """ + + gl_viewer_width: int = 1280 + """Width in pixels of the Newton GL perspective frame. Only active when ``--video`` is set.""" + + gl_viewer_height: int = 720 + """Height in pixels of the Newton GL perspective frame. Only active when ``--video`` is set.""" +