From f870f106ed6e2efb888095e66c78eb5616cffb30 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 27 Jan 2026 11:30:17 -0500
Subject: [PATCH 01/68] Add data auto-generate module.

---
 .../teleoperation/data_generate.py            | 587 ++++++++++++++++++
 .../leisaac/controllers/pose_expert.py        | 202 ++++++
 .../leisaac/leisaac/devices/action_process.py |  28 +-
 .../leisaac/devices/auto_generate/__init__.py |   1 +
 .../auto_generate/so101_auto_generate.py      | 109 ++++
 .../tasks/pick_orange/mdp/terminations.py     |   4 +-
 source/leisaac/leisaac/utils/env_utils.py     |   2 +-
 source/leisaac/leisaac/utils/usd_fixes.py     |  31 +
 8 files changed, 960 insertions(+), 4 deletions(-)
 create mode 100644 scripts/environments/teleoperation/data_generate.py
 create mode 100644 source/leisaac/leisaac/controllers/pose_expert.py
 create mode 100644 source/leisaac/leisaac/devices/auto_generate/__init__.py
 create mode 100644 source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
 create mode 100644 source/leisaac/leisaac/utils/usd_fixes.py

diff --git a/scripts/environments/teleoperation/data_generate.py b/scripts/environments/teleoperation/data_generate.py
new file mode 100644
index 00000000..d895a8c5
--- /dev/null
+++ b/scripts/environments/teleoperation/data_generate.py
@@ -0,0 +1,587 @@
+# -*- coding: utf-8 -*-
+"""
+Minimal Lula integration for data_generate:
+- Replace Differential IK with Lula (LulaKinematicsSolver + ArticulationKinematicsSolver)
+- Output joint target tensor (num_envs, n_joints) and call env.step(actions)
+- Minimal checks, suitable for debug runs. Adjust LULA paths as needed.
+"""
+
+import multiprocessing
+if multiprocessing.get_start_method() != "spawn":
+    multiprocessing.set_start_method("spawn", force=True)
+
+import argparse
+import os
+import cv2
+import time
+import numpy as np
+import torch
+from isaaclab.app import AppLauncher
+import gymnasium as gym
+
+import omni.kit.app
+
+# ---- CLI ----
+parser = argparse.ArgumentParser(description="leisaac data generation - Lula minimal")
+parser.add_argument("--num_envs", type=int, default=1)
+parser.add_argument("--task", type=str, default=None)
+parser.add_argument("--seed", type=int, default=None)
+parser.add_argument("--record", action="store_true")
+parser.add_argument("--step_hz", type=int, default=60)
+parser.add_argument("--dataset_file", type=str, default="./datasets/dataset.hdf5")
+parser.add_argument("--resume", action="store_true")
+parser.add_argument("--num_demos", type=int, default=1)
+parser.add_argument("--quality", action="store_true")
+
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+app_launcher_args = vars(args_cli)
+
+app_launcher = AppLauncher(app_launcher_args)
+simulation_app = app_launcher.app
+
+def load_extensions():
+    app = omni.kit.app.get_app()
+    ext_mgr = app.get_extension_manager()
+    ext_mgr.set_extension_enabled_immediate("omni.isaac.motion_generation", True)
+    ext_mgr.set_extension_enabled_immediate("omni.isaac.core", True)
+
+# 在 AppLauncher / SimulationApp 之后
+load_extensions()
+from leisaac.enhance.managers import StreamingRecorderManager, EnhanceDatasetExportMode
+from leisaac.tasks.pick_orange.mdp import task_done
+from isaaclab.managers import DatasetExportMode, TerminationTermCfg
+from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
+from isaaclab_tasks.utils import parse_env_cfg
+
+import omni.usd
+from pxr import Usd, UsdGeom, UsdPhysics
+
+def manual_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
+    """
+    Programmatically mark the current episode as success or failure.
+
+    This is the SAME implementation used in the teleoperation script.
+    It does NOT require any human input.
+    """
+    if hasattr(env, "termination_manager"):
+        if success:
+            env.termination_manager.set_term_cfg(
+                "success",
+                TerminationTermCfg(
+                    func=lambda env: torch.ones(
+                        env.num_envs, dtype=torch.bool, device=env.device
+                    )
+                ),
+            )
+        else:
+            env.termination_manager.set_term_cfg(
+                "success",
+                TerminationTermCfg(
+                    func=lambda env: torch.zeros(
+                        env.num_envs, dtype=torch.bool, device=env.device
+                    )
+                ),
+            )
+        env.termination_manager.compute()
+    elif hasattr(env, "_get_dones"):
+        # fallback for some Direct envs
+        env.cfg.return_success_status = success
+    return False
+# ---- minimal helpers ----
+def auto_fix_collision_issues():
+    stage = omni.usd.get_context().get_stage()
+    if not stage:
+        return
+    for prim in stage.Traverse():
+        name = prim.GetName()
+        if "handle" in name or "drawer" in name or "board" in name:
+            if not prim.IsA(UsdGeom.Mesh):
+                continue
+            collision_api = UsdPhysics.MeshCollisionAPI(prim)
+            if not collision_api:
+                collision_api = UsdPhysics.MeshCollisionAPI.Apply(prim)
+            approx_attr = collision_api.GetApproximationAttr()
+            current_val = approx_attr.Get()
+            if current_val != "convexDecomposition":
+                approx_attr.Set("convexDecomposition")
+
+# ---- minimal pose-based expert (fallback) ----
+from isaaclab.utils.math import quat_inv, quat_apply
+
+def is_grasp_phase_by_step(step_count):
+    return None
+def get_expert_action_pose_based(env, step_count, target, flag):
+    
+    device = env.device
+    num_envs = env.num_envs
+    orange_pos_w = env.scene[target].data.root_pos_w.clone()
+    plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
+    robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
+    robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
+
+    target_pos_w = orange_pos_w.clone()
+    
+    import math
+    from isaaclab.utils.math import quat_from_euler_xyz, quat_mul, quat_inv
+    pitch = math.radians(0)
+
+    target_quat_w = quat_from_euler_xyz(
+        torch.tensor(pitch, device=device),
+        torch.tensor(0.0, device=device),
+        torch.tensor(0.0, device=device),
+    ).repeat(num_envs, 1)
+
+    target_quat = quat_mul(
+        quat_inv(robot_base_quat_w),
+        target_quat_w
+    )
+    def apply_triangle_offset(pos_tensor, flag, radius=0.1):
+        """
+        给位置张量的 x, y 轴增加等边三角形偏移量。
+        
+        Args:
+            pos_tensor: (num_envs, 3) 的 target_pos_w 张量
+            flag: 当前是第几个橙子 (1, 2, 3)
+            radius: 距离中心的半径 (默认 0.2米)
+        """
+
+        idx = (flag - 1) % 3 
+        angle = idx * (2 * math.pi / 3)
+        
+        offset_x = radius * math.cos(angle)
+        offset_y = radius * math.sin(angle)
+        
+        pos_tensor[:, 0] += offset_x
+        pos_tensor[:, 1] += offset_y
+        
+        return pos_tensor
+    
+    GRIPPER = 0.1
+    target_pos_w[:,0] -= 0.03
+    
+    gripper_cmd = torch.full((num_envs,1), 1.0, device=device)
+    if step_count < 120:
+        gripper_cmd[:] = 1.0
+        target_pos_w[:,2] += 0.1
+        target_pos_w[:,2] += GRIPPER
+    elif step_count < 150:
+        gripper_cmd[:] = 1.0
+        target_pos_w[:,2] += GRIPPER
+    elif step_count < 180:
+        gripper_cmd[:] = -1.0
+        target_pos_w[:,2] += GRIPPER
+    elif step_count < 220:
+        gripper_cmd[:] = -1.0
+        target_pos_w[:,2] += 0.25
+    elif step_count < 320:
+        gripper_cmd[:] = -1.0
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:,2] += 0.25
+    elif step_count < 350:
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:,2] += GRIPPER + 0.1
+        apply_triangle_offset(target_pos_w, flag)
+        gripper_cmd[:] = -1.0
+    elif step_count < 380:
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:,2] += GRIPPER + 0.1
+        apply_triangle_offset(target_pos_w, flag)
+        gripper_cmd[:] = 1.0
+    elif step_count < 420:
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:,2] += 0.2
+        apply_triangle_offset(target_pos_w, flag)
+        gripper_cmd[:] = 1.0
+    else:
+        gripper_cmd[:] = 1.0
+    
+        
+    diff_w = target_pos_w - robot_base_pos_w
+    target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
+    actions = torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
+    return actions
+
+        
+def main():
+
+    MAX_STEPS = 420
+    # prepare
+    task_name = getattr(args_cli, "task", None)
+    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs)
+    env_cfg.use_teleop_device("auto_generate")
+    # -----------------------
+    # dataset / recorder setup (same as manual teleop)
+    # -----------------------
+    # get directory and file name from cli
+    output_dir = os.path.dirname(args_cli.dataset_file)
+    output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    if args_cli.record:
+        if args_cli.resume:
+            env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
+            assert os.path.exists(args_cli.dataset_file)
+        else:
+            env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+            assert not os.path.exists(args_cli.dataset_file)
+        env_cfg.recorders.dataset_export_dir_path = output_dir
+        env_cfg.recorders.dataset_filename = output_file_name
+
+        is_direct_env = "Direct" in task_name
+        if is_direct_env:
+            env_cfg.return_success_status = False
+        else:
+            if not hasattr(env_cfg.terminations, "success"):
+                setattr(env_cfg.terminations, "success", None)
+            env_cfg.terminations.success = TerminationTermCfg(
+                func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)
+            )
+    else:
+        env_cfg.recorders = None
+
+
+    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
+    task_name = args_cli.task
+
+    if args_cli.quality:
+        env_cfg.sim.render.antialiasing_mode = "FXAA"
+        env_cfg.sim.render.rendering_mode = "quality"
+
+    # create env
+    env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
+
+    # (optional) fix USD collisions
+    auto_fix_collision_issues()
+    resume_recorded_demo_count = 0
+    if args_cli.record:
+        try:
+            del env.recorder_manager
+        except Exception:
+            pass
+        print("Setting up recorder_manager for recording.")
+        env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
+        env.recorder_manager.flush_steps = 100
+        env.recorder_manager.compression = "lzf"
+        if args_cli.resume:
+            resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
+            print(f"Resume recording from existing dataset file with {resume_recorded_demo_count} demonstrations.")
+        current_recorded_demo_count = resume_recorded_demo_count
+
+    # init / reset
+    if hasattr(env, "initialize"):
+        env.initialize()
+    env.reset()
+    # Print debug values right after reset to verify randomization actually applied to physics
+    try:
+        target_name = f"Orange00{flag}"
+        print("=== Post-reset diagnostics ===")
+        print("physics_dt:", getattr(env, "physics_dt", None))
+        if target_name in env.scene:
+            phys_pos = env.scene[target_name].data.root_pos_w.clone()
+            print(f"{target_name} physics root_pos_w:", phys_pos[0].cpu().numpy() if phys_pos.ndim>1 else phys_pos.cpu().numpy())
+        else:
+            print(f"{target_name} not found in scene after reset")
+        # also print robot base to ensure base pose consistent
+        if "robot" in env.scene:
+            rb = env.scene["robot"].data.root_pos_w.clone()
+            print("robot root_pos_w:", rb[0].cpu().numpy() if rb.ndim>1 else rb.cpu().numpy())
+        print("===============================")
+    except Exception as e:
+        print("Post-reset diagnostic failed:", e)
+    # try to write lower stiffness / higher damping to sim (best-effort)
+    try:
+        # get joint names / count for printing
+        robot_prim = env.scene["robot"]
+        joint_names = robot_prim.data.joint_names if hasattr(robot_prim, "data") and hasattr(robot_prim.data, "joint_names") else None
+    except Exception:
+        joint_names = None
+
+    if joint_names is not None:
+        print("Detected joints:", joint_names)
+        # example starting values - tune these per-joint as needed
+        n_joints = len(joint_names)
+        # conservative starting point (you can further reduce stiffness)
+        start_stiffness = [1000.0] * n_joints
+        start_damping = [30.0] * n_joints
+    else:
+        start_stiffness = None
+        start_damping = None
+
+    # attempt to write gains into simulation (API differs across versions; best-effort)
+    if start_stiffness is not None:
+        try:
+            # preferred helper if available
+            robot_art.write_joint_stiffness_to_sim(start_stiffness)
+            robot_art.write_joint_damping_to_sim(start_damping)
+            print("Wrote joint stiffness/damping via robot_art helper.")
+        except Exception:
+            # fallback: print warning - user may need to set gains in asset or use other API
+            print("Warning: robot_art helper methods for stiffness/damping not available. If you need to set them, check robot_art API or the asset YAML.")
+
+    # rate limiter
+    rate_limiter = None
+    if args_cli.step_hz:
+        rate_limiter = type("R", (), {"hz": args_cli.step_hz, "sleep": lambda self, e: time.sleep(1.0/self.hz)})()
+    step_count = 0
+
+    prev_gripper_target = torch.full((env.num_envs, 1), 1.0, device=env.device)
+
+    
+
+    max_gripper_delta_normal = 0.04
+    max_gripper_delta_during_grasp = 0.01
+
+    # simple step-based fallback
+
+    # auto detector: distance + relative speed + gripper-target
+    def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tensor,
+                            dist_thresh: float = 0.06, rel_vel_thresh: float = 0.08, gripper_close_threshold: float = 0.7):
+        """
+        Return (is_grasp: bool, info: dict).
+        - device-safe: newly created tensors follow the device of scene tensors (cuda or cpu).
+        - prev_gripper_t should be a tensor on the env device (if not, we'll move it).
+        """
+        info = {}
+        try:
+            orange = env_local.scene[orange_name]
+            orange_pos = None
+            device = None
+
+            # obtain orange pos and device
+            if hasattr(orange.data, "root_pos_w"):
+                orange_pos = orange.data.root_pos_w  # (num_envs,3) tensor
+                device = orange_pos.device
+
+            # fallback device: try env_local.device or cpu
+            if device is None:
+                device = getattr(env_local, "device", torch.device("cpu"))
+
+            # robot and ee
+            robot_local = env_local.scene["robot"]
+            try:
+                ee_idx = robot_local.data.body_names.index("gripper")
+            except Exception:
+                ee_idx = 0
+
+            # ee pos tensor (ensure on same device)
+            ee_pos = robot_local.data.body_state_w[:, ee_idx, :3]  # (num_envs,3)
+            if ee_pos is not None:
+                device = ee_pos.device
+
+            # if orange_pos missing -> cannot detect
+            if orange_pos is None:
+                return False, {"error": "orange pos not available"}
+
+            # horizontal / full distance (tensors on same device)
+            horiz_dist = torch.norm((orange_pos - ee_pos)[:, :2], dim=-1)  # (num_envs,)
+            full_dist = torch.norm((orange_pos - ee_pos), dim=-1)
+            info['horiz_dist'] = horiz_dist.detach().cpu().numpy().tolist()
+            info['full_dist'] = full_dist.detach().cpu().numpy().tolist()
+
+            # read linear velocities if available; default zeros on correct device
+            obj_speed = torch.zeros((orange_pos.shape[0],), device=device)
+            ee_speed = torch.zeros((orange_pos.shape[0],), device=device)
+            if hasattr(orange.data, "root_linvel_w"):
+                try:
+                    obj_v = orange.data.root_linvel_w  # (num_envs,3)
+                    obj_speed = torch.norm(obj_v, dim=-1).to(device)
+                except Exception:
+                    pass
+            if hasattr(robot_local.data, "body_linvel_w"):
+                try:
+                    v_ee = robot_local.data.body_linvel_w[:, ee_idx, :]  # (num_envs,3)
+                    ee_speed = torch.norm(v_ee, dim=-1).to(device)
+                except Exception:
+                    pass
+            rel_speed = torch.minimum(obj_speed, ee_speed)
+            info['obj_speed'] = obj_speed.detach().cpu().numpy().tolist() if obj_speed.numel() else None
+            info['ee_speed'] = ee_speed.detach().cpu().numpy().tolist() if ee_speed.numel() else None
+            info['rel_speed'] = rel_speed.detach().cpu().numpy().tolist()
+
+            # gripper target: ensure prev_gripper_t is on same device
+            try:
+                if not isinstance(prev_gripper_t, torch.Tensor):
+                    # try to coerce
+                    prev_gripper_t = torch.as_tensor(prev_gripper_t, device=device)
+                else:
+                    prev_gripper_t = prev_gripper_t.to(device)
+                gripper_target = prev_gripper_t.view(-1).detach().cpu().numpy().tolist()
+            except Exception:
+                gripper_target = None
+            info['gripper_target'] = gripper_target
+
+            # Decide masks (create boolean tensors on same device)
+            near_mask = (horiz_dist <= torch.as_tensor(dist_thresh, device=device))
+            slow_mask = (rel_speed <= torch.as_tensor(rel_vel_thresh, device=device))
+
+            if gripper_target is not None:
+                # gripper_target is a python list; build a boolean tensor on device
+                try:
+                    gripper_mask = torch.tensor([gt < gripper_close_threshold for gt in gripper_target], dtype=torch.bool, device=device)
+                except Exception:
+                    gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
+            else:
+                gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
+
+            is_grasp_tensor = near_mask & slow_mask & gripper_mask
+            # convert to python bool for single-env; if multi-env return list
+            is_grasp_list = is_grasp_tensor.detach().cpu().numpy().tolist()
+            is_grasp_any = any(is_grasp_list)
+            return is_grasp_any, info
+
+        except Exception as e:
+            return False, {"error": str(e)}
+
+    flag = 1  # which orange to pick
+    start_record_state = False
+    # main loop (NO pos/quaternion smoothing; gripper still clamped)
+    while simulation_app.is_running():
+        # choose Lula-based actions (supports num_envs>1)
+        dt = env.physics_dt
+        env.scene.update(dt)
+        
+        # 放在 while simulation_app.is_running(): 的顶部，紧跟 env.scene.update(dt) 之后或 actions 之前
+        if args_cli.record and not start_record_state:
+            print("Auto: enabling recorder / start recording state.")
+            start_record_state = True
+            # Best-effort: call recorder start/enable APIs if available
+            try:
+                rm = getattr(env, "recorder_manager", None)
+                if rm is not None:
+                    # try common start-like method names across versions
+                    for meth in ("start", "start_recording", "begin_record", "enable", "start_capture"):
+                        if hasattr(rm, meth) and callable(getattr(rm, meth)):
+                            print(f"Calling recorder_manager.{meth}()")
+                            getattr(rm, meth)()
+                            break
+                    # also try to ensure flush_steps/compression are set
+                    try:
+                        if hasattr(rm, "flush_steps"):
+                            rm.flush_steps = getattr(rm, "flush_steps", 100)
+                        if hasattr(rm, "compression"):
+                            rm.compression = getattr(rm, "compression", "lzf")
+                    except Exception:
+                        pass
+                else:
+                    print("Warning: env.recorder_manager is None — recorder not created.")
+            except Exception as e:
+                print("Failed to start recorder_manager automatically:", e)
+        actions = get_expert_action_pose_based(env, step_count, target=f"Orange00{flag}", flag=flag)
+        # 强制把 prev_gripper_target 设为你刚才计算出的 desired（从 actions 取）
+        # 这会让平滑阶段认为上次目标就是现在的目标，从而直接生效
+        prev_gripper_target = actions[:, -1].clone().unsqueeze(-1)
+
+        # ensure dtype/device
+        actions = actions.to(device=env.device, dtype=torch.float32)
+
+        # Decide grasp phase (auto or fallback)
+        is_grasp_auto, grasp_info = is_grasp_phase_auto(env, f"Orange00{flag}", prev_gripper_target,
+                                                       dist_thresh=0.06, rel_vel_thresh=0.08, gripper_close_threshold=0.7)
+        if not is_grasp_auto:
+            is_grasp = is_grasp_phase_by_step(step_count)
+        else:
+            is_grasp = is_grasp_auto
+
+        # --- GRIPPER: clamp per-frame to avoid instant violent close ---
+        desired_gripper = actions[:, -1].clone().unsqueeze(-1)  # (num_envs,1)
+        cur_gripper = prev_gripper_target  # previous target
+        # use different delta in grasp vs normal
+        if isinstance(is_grasp, bool):
+            if is_grasp:
+                use_gripper_delta = torch.full((env.num_envs,), max_gripper_delta_during_grasp, device=env.device, dtype=torch.float32)
+            else:
+                use_gripper_delta = torch.full((env.num_envs,), max_gripper_delta_normal, device=env.device, dtype=torch.float32)
+        else:
+            # per-env list-like
+            use_gripper_delta = torch.tensor([max_gripper_delta_during_grasp if g else max_gripper_delta_normal for g in (is_grasp if hasattr(is_grasp, "__len__") else [is_grasp])], device=env.device, dtype=torch.float32)
+            if use_gripper_delta.numel() != env.num_envs:
+                use_gripper_delta = torch.full((env.num_envs,), max_gripper_delta_normal, device=env.device, dtype=torch.float32)
+
+        delta_gr = torch.clamp(desired_gripper - cur_gripper, -use_gripper_delta.unsqueeze(-1), use_gripper_delta.unsqueeze(-1))
+        smooth_gripper = cur_gripper + delta_gr
+        actions[:, -1] = smooth_gripper.squeeze(-1)
+        prev_gripper_target = smooth_gripper.clone()
+
+        # --- POSITION / QUATERNION: NO SMOOTHING, directly send ---
+        # action layout expected: [pos_x,pos_y,pos_z, quat_x,quat_y,quat_z,quat_w, gripper]
+        smoothed_actions = actions.to(device=env.device, dtype=torch.float32)
+
+        step_count += 1
+        
+        from isaaclab.managers import SceneEntityCfg
+        if step_count >= MAX_STEPS and flag >= 3:
+            # --- 判断本次 episode 是否被认为成功（使用 env 的 task_done） ---
+            try:
+                print(f"完成一轮（{step_count} 步）, 检查任务成功状态...")
+                success_tensor = task_done(
+                    env,
+                    oranges_cfg=[
+                        SceneEntityCfg("Orange001"),
+                        SceneEntityCfg("Orange002"),
+                        SceneEntityCfg("Orange003"),
+                    ],
+                    plate_cfg=SceneEntityCfg("Plate"),
+                )
+                # 多 env 时，我们认为当所有 env 都为 True 才视为 success（根据你的需要可改为 any）
+                success = bool(success_tensor.all().item())
+                print("任务成功状态:", success)
+            except Exception as e:
+                print("task_done failed:", e)
+                success = False
+
+            # ✅ 只有成功时，才“告诉 recorder 这一轮要存”
+            if args_cli.record and success:
+                print("✅ 任务成功，标记本次演示为 SUCCESS")
+                manual_terminate(env, True)
+                print("SUCCESS!!!!!!!")
+                current_recorded_demo_count += 1
+            else:
+                print("❌ 任务失败，标记本次演示为 FAILURE")
+                manual_terminate(env, False)
+                
+            if (
+                args_cli.record
+                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                > current_recorded_demo_count
+            ):
+                current_recorded_demo_count = (
+                    env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                )
+                print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
+            if (
+                args_cli.record
+                and args_cli.num_demos > 0
+                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                >= args_cli.num_demos
+            ):
+                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                break
+            
+            # --- bookkeeping: 更新 recorder 导出的计数并判断是否达到 --num_demos ---
+            env.reset()
+            flag = 1
+            step_count = 0    
+            if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
+                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                break
+        elif step_count >= MAX_STEPS and flag < 3:
+            step_count = 0
+
+            # --- 不论成功与否，都重置环境（你的要求） ---
+            print(f"完成一轮（{step_count} 步）, 重置环境，准备下一个橙子 (flag {flag} -> {flag+1})")
+
+            # --- 更新 step_count/flag 控制逻辑（和你原来逻辑一致） ---
+            flag += 1
+        else:
+            env.step(smoothed_actions)
+            pass
+        
+    env.close()
+    simulation_app.close()
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/source/leisaac/leisaac/controllers/pose_expert.py b/source/leisaac/leisaac/controllers/pose_expert.py
new file mode 100644
index 00000000..dc93e30c
--- /dev/null
+++ b/source/leisaac/leisaac/controllers/pose_expert.py
@@ -0,0 +1,202 @@
+# controllers/pose_expert.py
+"""
+Pose-based expert controller.
+
+Outputs actions with layout:
+[pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]
+"""
+import torch
+from isaaclab.utils.math import quat_from_euler_xyz, quat_mul, quat_inv, quat_apply
+
+def apply_triangle_offset(pos_tensor: torch.Tensor, flag: int, radius: float = 0.1) -> torch.Tensor:
+    """
+    Add an equilateral-triangle offset to the x/y of pos_tensor based on flag (1..3).
+    Args:
+        pos_tensor: (num_envs, 3) tensor
+        flag: which orange (1..3)
+        radius: radius in meters
+    """
+    import math
+    idx = (flag - 1) % 3
+    angle = idx * (2 * math.pi / 3)
+    offset_x = radius * math.cos(angle)
+    offset_y = radius * math.sin(angle)
+    pos_tensor = pos_tensor.clone()
+    pos_tensor[:, 0] += offset_x
+    pos_tensor[:, 1] += offset_y
+    return pos_tensor
+
+def get_expert_action_pose_based(env, step_count: int, target: str, flag: int) -> torch.Tensor:
+    """
+    Simple scripted pose-based controller.
+
+    Args:
+        env: environment instance with scene data
+        step_count: int step counter
+        target: name of target object in scene (e.g., "Orange001")
+        flag: integer flag (which orange) used for offsets
+
+    Returns:
+        actions: (num_envs, action_dim) torch.Tensor on same device as env
+    """
+    device = env.device
+    num_envs = env.num_envs
+
+    orange_pos_w = env.scene[target].data.root_pos_w.clone()
+    plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
+    robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
+    robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
+
+    target_pos_w = orange_pos_w.clone()
+
+    # Fixed orientation (world)
+    pitch = 0.0
+    target_quat_w = quat_from_euler_xyz(
+        torch.tensor(pitch, device=device),
+        torch.tensor(0.0, device=device),
+        torch.tensor(0.0, device=device),
+    ).repeat(num_envs, 1)
+
+    target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
+
+    # schedule and offsets
+    GRIPPER = 0.1
+    gripper_cmd = torch.full((num_envs, 1), 1.0, device=device)
+
+    if step_count < 120:
+        gripper_cmd[:] = 1.0
+        target_pos_w[:, 2] += 0.1 + GRIPPER
+    elif step_count < 150:
+        gripper_cmd[:] = 1.0
+        target_pos_w[:, 2] += GRIPPER
+    elif step_count < 180:
+        gripper_cmd[:] = -1.0
+        target_pos_w[:, 2] += GRIPPER
+    elif step_count < 220:
+        gripper_cmd[:] = -1.0
+        target_pos_w[:, 2] += 0.25
+    elif step_count < 320:
+        gripper_cmd[:] = -1.0
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += 0.25
+    elif step_count < 350:
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += GRIPPER + 0.1
+        target_pos_w = apply_triangle_offset(target_pos_w, flag)
+        gripper_cmd[:] = -1.0
+    elif step_count < 380:
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += GRIPPER + 0.1
+        target_pos_w = apply_triangle_offset(target_pos_w, flag)
+        gripper_cmd[:] = 1.0
+    elif step_count < 420:
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += 0.2
+        target_pos_w = apply_triangle_offset(target_pos_w, flag)
+        gripper_cmd[:] = 1.0
+    else:
+        gripper_cmd[:] = 1.0
+
+    # small x offset toward robot
+    target_pos_w[:, 0] -= 0.03
+
+    diff_w = target_pos_w - robot_base_pos_w
+    target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
+
+    actions = torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
+    return actions
+
+"""
+Heuristics for detecting grasp phase.
+"""
+from typing import Tuple, Dict, Any
+import torch
+
+def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tensor,
+                        dist_thresh: float = 0.06, rel_vel_thresh: float = 0.08,
+                        gripper_close_threshold: float = 0.7) -> Tuple[bool, Dict[str, Any]]:
+    """
+    Return (is_grasp: bool, info: dict).
+
+    Info contains metrics useful for debugging.
+    """
+    info = {}
+    try:
+        orange = env_local.scene[orange_name]
+        orange_pos = None
+        device = None
+
+        if hasattr(orange.data, "root_pos_w"):
+            orange_pos = orange.data.root_pos_w  # (num_envs,3)
+            device = orange_pos.device
+
+        if device is None:
+            device = getattr(env_local, "device", torch.device("cpu"))
+
+        robot_local = env_local.scene["robot"]
+        try:
+            ee_idx = robot_local.data.body_names.index("gripper")
+        except Exception:
+            ee_idx = 0
+
+        ee_pos = robot_local.data.body_state_w[:, ee_idx, :3]
+        if ee_pos is not None:
+            device = ee_pos.device
+
+        if orange_pos is None:
+            return False, {"error": "orange pos not available"}
+
+        horiz_dist = torch.norm((orange_pos - ee_pos)[:, :2], dim=-1)
+        full_dist = torch.norm((orange_pos - ee_pos), dim=-1)
+        info['horiz_dist'] = horiz_dist.detach().cpu().numpy().tolist()
+        info['full_dist'] = full_dist.detach().cpu().numpy().tolist()
+
+        obj_speed = torch.zeros((orange_pos.shape[0],), device=device)
+        ee_speed = torch.zeros((orange_pos.shape[0],), device=device)
+        if hasattr(orange.data, "root_linvel_w"):
+            try:
+                obj_v = orange.data.root_linvel_w
+                obj_speed = torch.norm(obj_v, dim=-1).to(device)
+            except Exception:
+                pass
+        if hasattr(robot_local.data, "body_linvel_w"):
+            try:
+                v_ee = robot_local.data.body_linvel_w[:, ee_idx, :]
+                ee_speed = torch.norm(v_ee, dim=-1).to(device)
+            except Exception:
+                pass
+
+        rel_speed = torch.minimum(obj_speed, ee_speed)
+        info['obj_speed'] = obj_speed.detach().cpu().numpy().tolist() if obj_speed.numel() else None
+        info['ee_speed'] = ee_speed.detach().cpu().numpy().tolist() if ee_speed.numel() else None
+        info['rel_speed'] = rel_speed.detach().cpu().numpy().tolist()
+
+        # ensure prev_gripper_t on same device and as python list for mask
+        try:
+            if not isinstance(prev_gripper_t, torch.Tensor):
+                prev_gripper_t = torch.as_tensor(prev_gripper_t, device=device)
+            else:
+                prev_gripper_t = prev_gripper_t.to(device)
+            gripper_target = prev_gripper_t.view(-1).detach().cpu().numpy().tolist()
+        except Exception:
+            gripper_target = None
+        info['gripper_target'] = gripper_target
+
+        near_mask = (horiz_dist <= torch.as_tensor(dist_thresh, device=device))
+        slow_mask = (rel_speed <= torch.as_tensor(rel_vel_thresh, device=device))
+
+        if gripper_target is not None:
+            try:
+                gripper_mask = torch.tensor([gt < gripper_close_threshold for gt in gripper_target], dtype=torch.bool, device=device)
+            except Exception:
+                gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
+        else:
+            gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
+
+        is_grasp_tensor = near_mask & slow_mask & gripper_mask
+        is_grasp_list = is_grasp_tensor.detach().cpu().numpy().tolist()
+        is_grasp_any = any(is_grasp_list)
+        return is_grasp_any, info
+
+    except Exception as e:
+        return False, {"error": str(e)}
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index d03cd2d9..327db355 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -76,7 +76,30 @@ def init_action_cfg(action_cfg, device):
             joint_names=["gripper"],
             scale=1.0,
         )
-
+    elif device in ["auto_generate"]:
+        action_cfg.arm_action = mdp.DifferentialInverseKinematicsActionCfg(
+            asset_name="robot",
+            joint_names=[
+                "shoulder_pan",
+                "shoulder_lift",
+                "elbow_flex",
+                "wrist_flex",
+                "wrist_roll",
+            ],
+            body_name="gripper",
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose",
+                ik_method="dls",
+                ik_params={"lambda_val": 0.02},
+            ),
+        )
+        action_cfg.gripper_action = mdp.BinaryJointPositionActionCfg(
+            asset_name="robot",
+            joint_names=["gripper"],
+            open_command_expr={"gripper": 1.0},
+            close_command_expr={"gripper": 0.4},
+        )
+        
     """LeKiwi action configuration"""
     if device in ["lekiwi-leader", "lekiwi-keyboard", "lekiwi-gamepad"]:
         action_cfg.wheel_action = mdp.JointVelocityActionCfg(
@@ -129,6 +152,9 @@ def preprocess_device_action(action: dict[str, Any], teleop_device) -> torch.Ten
     elif action.get("keyboard") is not None or action.get("gamepad") is not None:
         processed_action = torch.zeros(teleop_device.env.num_envs, 8, device=teleop_device.env.device)
         processed_action[:, :] = action["joint_state"]
+    elif action.get("auto_generate") is not None:
+        processed_action = torch.zeros(teleop_device.env.num_envs, 8, device=teleop_device.env.device)
+        processed_action[:, :] = action["joint_state"]
     elif action.get("bi_so101_leader") is not None:
         processed_action = torch.zeros(teleop_device.env.num_envs, 12, device=teleop_device.env.device)
         processed_action[:, :6] = convert_action_from_so101_leader(
diff --git a/source/leisaac/leisaac/devices/auto_generate/__init__.py b/source/leisaac/leisaac/devices/auto_generate/__init__.py
new file mode 100644
index 00000000..b34602c0
--- /dev/null
+++ b/source/leisaac/leisaac/devices/auto_generate/__init__.py
@@ -0,0 +1 @@
+from .so101_auto_generate import SO101Autogenerate
diff --git a/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py b/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
new file mode 100644
index 00000000..6d739ad6
--- /dev/null
+++ b/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
@@ -0,0 +1,109 @@
+import carb
+import numpy as np
+
+from ..device_base import Device
+
+
+class SO101Autogenerate(Device):
+    """A keyboard controller for sending SE(3) commands as delta poses for so101 single arm.
+
+    Key bindings:
+        ============================== ================= =================
+        Description                    Key               Key
+        ============================== ================= =================
+        Forward / Backward              W                 S
+        Left / Right                    A                 D
+        Up / Down                       Q                 E
+        Rotate (Yaw) Left / Right       J                 L
+        Rotate (Pitch) Up / Down        K                 I
+        Gripper Open / Close            U                 O
+        ============================== ================= =================
+
+    """
+
+    def __init__(self, env, sensitivity: float = 1.0):
+        super().__init__(env, "auto_generate")
+
+        # store inputs
+        self.pos_sensitivity = 0.01 * sensitivity
+        self.joint_sensitivity = 0.15 * sensitivity
+        self.rot_sensitivity = 0.15 * sensitivity
+
+        # bindings for keyboard to command
+        self._create_key_bindings()
+
+        # command buffers (dx, dy, dz, droll, dpitch, dyaw, d_shoulder_pan, d_gripper)
+        self._delta_action = np.zeros(8)
+
+        # initialize the target frame
+        self.asset_name = "robot"
+        self.robot_asset = self.env.scene[self.asset_name]
+
+        self.target_frame = "gripper"
+        body_idxs, _ = self.robot_asset.find_bodies(self.target_frame)
+        self.target_frame_idx = body_idxs[0]
+
+    def __str__(self) -> str:
+        """Returns: A string containing the information of keyboard controller."""
+        msg = "Keyboard Controller for SO101 Single Arm (SE(3) Control).\n"
+        msg += f"\tKeyboard name: {self._input.get_keyboard_name(self._keyboard)}\n"
+        msg += f"\ttarget frame: {self.target_frame}\n"
+        msg += "\t----------------------------------------------\n"
+        msg += "\tForward / Backward:                W / S\n"
+        msg += "\tLeft / Right:                      A / D\n"
+        msg += "\tUp / Down:                         Q / E\n"
+        msg += "\tRotate (Yaw) Left / Right:         J / L\n"
+        msg += "\tRotate (Pitch) Up / Down:          K / I\n"
+        msg += "\tGripper Open / Close:              U / O\n"
+        msg += "\t----------------------------------------------\n"
+        return msg
+
+    def get_device_state(self):
+        return self._convert_delta_from_frame(self._delta_action)
+
+    def reset(self):
+        self._delta_action[:] = 0.0
+
+    def _on_keyboard_event(self, event, *args, **kwargs):
+        super()._on_keyboard_event(event, *args, **kwargs)
+        # apply the command when pressed
+        if event.type == carb.input.KeyboardEventType.KEY_PRESS:
+            if event.input.name in self._INPUT_KEY_MAPPING.keys():
+                self._delta_action += self._ACTION_DELTA_MAPPING[self._INPUT_KEY_MAPPING[event.input.name]]
+        # remove the command when un-pressed
+        if event.type == carb.input.KeyboardEventType.KEY_RELEASE:
+            if event.input.name in self._INPUT_KEY_MAPPING.keys():
+                self._delta_action -= self._ACTION_DELTA_MAPPING[self._INPUT_KEY_MAPPING[event.input.name]]
+
+    def _create_key_bindings(self):
+        """Creates default key binding.
+        Based on target frame to control the delta action.
+        """
+        self._ACTION_DELTA_MAPPING = {
+            "forward": np.asarray([0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
+            "backward": np.asarray([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
+            "left": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0]) * self.joint_sensitivity,
+            "right": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]) * self.joint_sensitivity,
+            "up": np.asarray([1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
+            "down": np.asarray([-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
+            "rotate_up": np.asarray([0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0]) * self.rot_sensitivity,
+            "rotate_down": np.asarray([0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]) * self.rot_sensitivity,
+            "rotate_left": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]) * self.rot_sensitivity,
+            "rotate_right": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0]) * self.rot_sensitivity,
+            "gripper_open": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) * self.joint_sensitivity,
+            "gripper_close": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0]) * self.joint_sensitivity,
+        }
+        self._INPUT_KEY_MAPPING = {
+            "W": "forward",
+            "S": "backward",
+            "A": "left",
+            "D": "right",
+            "Q": "up",
+            "E": "down",
+            "K": "rotate_up",
+            "I": "rotate_down",
+            "J": "rotate_left",
+            "L": "rotate_right",
+            "U": "gripper_open",
+            "O": "gripper_close",
+        }
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py
index de747bfc..e3750d62 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py
@@ -50,9 +50,9 @@ def task_done(
         done = torch.logical_and(done, orange_y > plate_y + y_range[0])
         done = torch.logical_and(done, orange_height < plate_height + height_range[1])
         done = torch.logical_and(done, orange_height > plate_height + height_range[0])
-
+    """
     joint_pos = env.scene["robot"].data.joint_pos
     joint_names = env.scene["robot"].data.joint_names
     done = torch.logical_and(done, is_so101_at_rest_pose(joint_pos, joint_names))
-
+    """
     return done
diff --git a/source/leisaac/leisaac/utils/env_utils.py b/source/leisaac/leisaac/utils/env_utils.py
index 9a52c5bd..57e70d59 100644
--- a/source/leisaac/leisaac/utils/env_utils.py
+++ b/source/leisaac/leisaac/utils/env_utils.py
@@ -5,7 +5,7 @@ def dynamic_reset_gripper_effort_limit_sim(env, teleop_device):
     need_to_set = []
     if teleop_device == "bi-so101leader":
         need_to_set = [env.scene.articulations["left_arm"], env.scene.articulations["right_arm"]]
-    elif teleop_device in ["so101leader", "keyboard"]:
+    elif teleop_device in ["so101leader", "keyboard", "auto_generate"]:
         need_to_set = [env.scene["robot"]]
     for arm in need_to_set:
         write_gripper_effort_limit_sim(env, arm)
diff --git a/source/leisaac/leisaac/utils/usd_fixes.py b/source/leisaac/leisaac/utils/usd_fixes.py
new file mode 100644
index 00000000..14d38bbd
--- /dev/null
+++ b/source/leisaac/leisaac/utils/usd_fixes.py
@@ -0,0 +1,31 @@
+# utils/usd_fixes.py
+"""
+USD-related utilities (best-effort fixes).
+"""
+import omni.usd
+from pxr import UsdGeom, UsdPhysics
+
+def auto_fix_collision_issues():
+    """
+    Convert certain mesh collisions to convex decomposition to reduce contact issues.
+    Best-effort: requires a valid current stage.
+    """
+    stage = omni.usd.get_context().get_stage()
+    if not stage:
+        return
+
+    for prim in stage.Traverse():
+        name = prim.GetName().lower()
+        if any(k in name for k in ("handle", "drawer", "board")):
+            if not prim.IsA(UsdGeom.Mesh):
+                continue
+            api = UsdPhysics.MeshCollisionAPI(prim)
+            if not api:
+                api = UsdPhysics.MeshCollisionAPI.Apply(prim)
+            approx_attr = api.GetApproximationAttr()
+            try:
+                current_val = approx_attr.Get()
+            except Exception:
+                current_val = None
+            if current_val != "convexDecomposition":
+                approx_attr.Set("convexDecomposition")

From e52df2efd4b934b9f027cc476b93c7e8adf85b7f Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 27 Jan 2026 12:37:12 -0500
Subject: [PATCH 02/68] Add data auto-generate module.

---
 scripts/environments/teleoperation/data_generate.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/scripts/environments/teleoperation/data_generate.py b/scripts/environments/teleoperation/data_generate.py
index d895a8c5..1e701f5a 100644
--- a/scripts/environments/teleoperation/data_generate.py
+++ b/scripts/environments/teleoperation/data_generate.py
@@ -1,11 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-Minimal Lula integration for data_generate:
-- Replace Differential IK with Lula (LulaKinematicsSolver + ArticulationKinematicsSolver)
-- Output joint target tensor (num_envs, n_joints) and call env.step(actions)
-- Minimal checks, suitable for debug runs. Adjust LULA paths as needed.
-"""
-
 import multiprocessing
 if multiprocessing.get_start_method() != "spawn":
     multiprocessing.set_start_method("spawn", force=True)
@@ -139,7 +131,6 @@ def get_expert_action_pose_based(env, step_count, target, flag):
     def apply_triangle_offset(pos_tensor, flag, radius=0.1):
         """
         给位置张量的 x, y 轴增加等边三角形偏移量。
-        
         Args:
             pos_tensor: (num_envs, 3) 的 target_pos_w 张量
             flag: 当前是第几个橙子 (1, 2, 3)

From ae15ddc21638907ea385746c27d283ec72e38248 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 27 Jan 2026 12:48:13 -0500
Subject: [PATCH 03/68] Add data auto-generate module.

---
 scripts/environments/teleoperation/data_generate.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/environments/teleoperation/data_generate.py b/scripts/environments/teleoperation/data_generate.py
index 1e701f5a..dd096e91 100644
--- a/scripts/environments/teleoperation/data_generate.py
+++ b/scripts/environments/teleoperation/data_generate.py
@@ -105,6 +105,7 @@ def is_grasp_phase_by_step(step_count):
     return None
 def get_expert_action_pose_based(env, step_count, target, flag):
     
+    
     device = env.device
     num_envs = env.num_envs
     orange_pos_w = env.scene[target].data.root_pos_w.clone()

From a2457122386fe86558a8d48613c5a22b487ad1da Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 27 Jan 2026 12:50:02 -0500
Subject: [PATCH 04/68] Add data auto-generate module.

---
 scripts/environments/teleoperation/data_generate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/environments/teleoperation/data_generate.py b/scripts/environments/teleoperation/data_generate.py
index dd096e91..1e701f5a 100644
--- a/scripts/environments/teleoperation/data_generate.py
+++ b/scripts/environments/teleoperation/data_generate.py
@@ -105,7 +105,6 @@ def is_grasp_phase_by_step(step_count):
     return None
 def get_expert_action_pose_based(env, step_count, target, flag):
     
-    
     device = env.device
     num_envs = env.num_envs
     orange_pos_w = env.scene[target].data.root_pos_w.clone()

From 38df5fc1ef189c9944d9b28b75e5b04aab83cda1 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 12:53:16 -0500
Subject: [PATCH 05/68] Add data auto-generate module.

---
 scripts/environments/teleoperation/data_generate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/environments/teleoperation/data_generate.py b/scripts/environments/teleoperation/data_generate.py
index 1e701f5a..3a9791ef 100644
--- a/scripts/environments/teleoperation/data_generate.py
+++ b/scripts/environments/teleoperation/data_generate.py
@@ -104,7 +104,6 @@ def auto_fix_collision_issues():
 def is_grasp_phase_by_step(step_count):
     return None
 def get_expert_action_pose_based(env, step_count, target, flag):
-    
     device = env.device
     num_envs = env.num_envs
     orange_pos_w = env.scene[target].data.root_pos_w.clone()

From 83b9e0bc216f3623cca9f729b411f2fbddb09f5d Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 20:35:49 -0500
Subject: [PATCH 06/68] Add auto_terminate.

---
 dependencies/IsaacLab                         |  2 +-
 .../pick_orange.py}                           | 13 +--
 .../leisaac/leisaac/devices/action_process.py |  6 +-
 .../leisaac/devices/auto_generate/__init__.py |  2 +-
 .../auto_generate/so101_auto_generate.py      | 83 ++-----------------
 .../leisaac/enhance/envs/direct_rl_env_cfg.py |  4 +-
 .../tasks/template/direct/single_arm_env.py   |  2 +-
 .../tasks/template/single_arm_env_cfg.py      |  2 +-
 source/leisaac/leisaac/utils/env_utils.py     |  2 +-
 9 files changed, 19 insertions(+), 97 deletions(-)
 rename scripts/environments/{teleoperation/data_generate.py => state_machine/pick_orange.py} (98%)

diff --git a/dependencies/IsaacLab b/dependencies/IsaacLab
index 3c6e67bb..0f030aa6 160000
--- a/dependencies/IsaacLab
+++ b/dependencies/IsaacLab
@@ -1 +1 @@
-Subproject commit 3c6e67bb5c7ada942a6d1884ab69338f57596f77
+Subproject commit 0f030aa6bdd968ae387ac4b74289748437586618
diff --git a/scripts/environments/teleoperation/data_generate.py b/scripts/environments/state_machine/pick_orange.py
similarity index 98%
rename from scripts/environments/teleoperation/data_generate.py
rename to scripts/environments/state_machine/pick_orange.py
index 3a9791ef..203cfbc4 100644
--- a/scripts/environments/teleoperation/data_generate.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -4,13 +4,10 @@
 
 import argparse
 import os
-import cv2
 import time
-import numpy as np
 import torch
 from isaaclab.app import AppLauncher
 import gymnasium as gym
-
 import omni.kit.app
 
 # ---- CLI ----
@@ -38,8 +35,8 @@ def load_extensions():
     ext_mgr.set_extension_enabled_immediate("omni.isaac.motion_generation", True)
     ext_mgr.set_extension_enabled_immediate("omni.isaac.core", True)
 
-# 在 AppLauncher / SimulationApp 之后
 load_extensions()
+
 from leisaac.enhance.managers import StreamingRecorderManager, EnhanceDatasetExportMode
 from leisaac.tasks.pick_orange.mdp import task_done
 from isaaclab.managers import DatasetExportMode, TerminationTermCfg
@@ -98,7 +95,6 @@ def auto_fix_collision_issues():
             if current_val != "convexDecomposition":
                 approx_attr.Set("convexDecomposition")
 
-# ---- minimal pose-based expert (fallback) ----
 from isaaclab.utils.math import quat_inv, quat_apply
 
 def is_grasp_phase_by_step(step_count):
@@ -199,11 +195,7 @@ def main():
     # prepare
     task_name = getattr(args_cli, "task", None)
     env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs)
-    env_cfg.use_teleop_device("auto_generate")
-    # -----------------------
-    # dataset / recorder setup (same as manual teleop)
-    # -----------------------
-    # get directory and file name from cli
+    env_cfg.use_teleop_device("so101_state_machine")
     output_dir = os.path.dirname(args_cli.dataset_file)
     output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
     if not os.path.exists(output_dir):
@@ -245,6 +237,7 @@ def main():
     # (optional) fix USD collisions
     auto_fix_collision_issues()
     resume_recorded_demo_count = 0
+    
     if args_cli.record:
         try:
             del env.recorder_manager
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 327db355..e86b4a0e 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -76,7 +76,7 @@ def init_action_cfg(action_cfg, device):
             joint_names=["gripper"],
             scale=1.0,
         )
-    elif device in ["auto_generate"]:
+    elif device in ["so101_state_machine"]:
         action_cfg.arm_action = mdp.DifferentialInverseKinematicsActionCfg(
             asset_name="robot",
             joint_names=[
@@ -90,7 +90,7 @@ def init_action_cfg(action_cfg, device):
             controller=mdp.DifferentialIKControllerCfg(
                 command_type="pose",
                 ik_method="dls",
-                ik_params={"lambda_val": 0.02},
+                ik_params={"lambda_val": 0.4},
             ),
         )
         action_cfg.gripper_action = mdp.BinaryJointPositionActionCfg(
@@ -152,7 +152,7 @@ def preprocess_device_action(action: dict[str, Any], teleop_device) -> torch.Ten
     elif action.get("keyboard") is not None or action.get("gamepad") is not None:
         processed_action = torch.zeros(teleop_device.env.num_envs, 8, device=teleop_device.env.device)
         processed_action[:, :] = action["joint_state"]
-    elif action.get("auto_generate") is not None:
+    elif action.get("so101_state_machine") is not None:
         processed_action = torch.zeros(teleop_device.env.num_envs, 8, device=teleop_device.env.device)
         processed_action[:, :] = action["joint_state"]
     elif action.get("bi_so101_leader") is not None:
diff --git a/source/leisaac/leisaac/devices/auto_generate/__init__.py b/source/leisaac/leisaac/devices/auto_generate/__init__.py
index b34602c0..f0a78f4f 100644
--- a/source/leisaac/leisaac/devices/auto_generate/__init__.py
+++ b/source/leisaac/leisaac/devices/auto_generate/__init__.py
@@ -1 +1 @@
-from .so101_auto_generate import SO101Autogenerate
+from .so101_state_machine import SO101Statemachine
diff --git a/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py b/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
index 6d739ad6..2da171a3 100644
--- a/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
+++ b/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
@@ -4,34 +4,19 @@
 from ..device_base import Device
 
 
-class SO101Autogenerate(Device):
-    """A keyboard controller for sending SE(3) commands as delta poses for so101 single arm.
-
-    Key bindings:
-        ============================== ================= =================
-        Description                    Key               Key
-        ============================== ================= =================
-        Forward / Backward              W                 S
-        Left / Right                    A                 D
-        Up / Down                       Q                 E
-        Rotate (Yaw) Left / Right       J                 L
-        Rotate (Pitch) Up / Down        K                 I
-        Gripper Open / Close            U                 O
-        ============================== ================= =================
-
+class SO101Statemachine(Device):
+    """
+    A statemachine controller to generate poses for so101 single arm.
     """
 
     def __init__(self, env, sensitivity: float = 1.0):
-        super().__init__(env, "auto_generate")
+        super().__init__(env, "so101_state_machine")
 
         # store inputs
         self.pos_sensitivity = 0.01 * sensitivity
         self.joint_sensitivity = 0.15 * sensitivity
         self.rot_sensitivity = 0.15 * sensitivity
 
-        # bindings for keyboard to command
-        self._create_key_bindings()
-
         # command buffers (dx, dy, dz, droll, dpitch, dyaw, d_shoulder_pan, d_gripper)
         self._delta_action = np.zeros(8)
 
@@ -43,67 +28,9 @@ def __init__(self, env, sensitivity: float = 1.0):
         body_idxs, _ = self.robot_asset.find_bodies(self.target_frame)
         self.target_frame_idx = body_idxs[0]
 
-    def __str__(self) -> str:
-        """Returns: A string containing the information of keyboard controller."""
-        msg = "Keyboard Controller for SO101 Single Arm (SE(3) Control).\n"
-        msg += f"\tKeyboard name: {self._input.get_keyboard_name(self._keyboard)}\n"
-        msg += f"\ttarget frame: {self.target_frame}\n"
-        msg += "\t----------------------------------------------\n"
-        msg += "\tForward / Backward:                W / S\n"
-        msg += "\tLeft / Right:                      A / D\n"
-        msg += "\tUp / Down:                         Q / E\n"
-        msg += "\tRotate (Yaw) Left / Right:         J / L\n"
-        msg += "\tRotate (Pitch) Up / Down:          K / I\n"
-        msg += "\tGripper Open / Close:              U / O\n"
-        msg += "\t----------------------------------------------\n"
-        return msg
 
     def get_device_state(self):
         return self._convert_delta_from_frame(self._delta_action)
 
     def reset(self):
-        self._delta_action[:] = 0.0
-
-    def _on_keyboard_event(self, event, *args, **kwargs):
-        super()._on_keyboard_event(event, *args, **kwargs)
-        # apply the command when pressed
-        if event.type == carb.input.KeyboardEventType.KEY_PRESS:
-            if event.input.name in self._INPUT_KEY_MAPPING.keys():
-                self._delta_action += self._ACTION_DELTA_MAPPING[self._INPUT_KEY_MAPPING[event.input.name]]
-        # remove the command when un-pressed
-        if event.type == carb.input.KeyboardEventType.KEY_RELEASE:
-            if event.input.name in self._INPUT_KEY_MAPPING.keys():
-                self._delta_action -= self._ACTION_DELTA_MAPPING[self._INPUT_KEY_MAPPING[event.input.name]]
-
-    def _create_key_bindings(self):
-        """Creates default key binding.
-        Based on target frame to control the delta action.
-        """
-        self._ACTION_DELTA_MAPPING = {
-            "forward": np.asarray([0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
-            "backward": np.asarray([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
-            "left": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0]) * self.joint_sensitivity,
-            "right": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]) * self.joint_sensitivity,
-            "up": np.asarray([1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
-            "down": np.asarray([-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) * self.pos_sensitivity,
-            "rotate_up": np.asarray([0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0]) * self.rot_sensitivity,
-            "rotate_down": np.asarray([0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]) * self.rot_sensitivity,
-            "rotate_left": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]) * self.rot_sensitivity,
-            "rotate_right": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0]) * self.rot_sensitivity,
-            "gripper_open": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) * self.joint_sensitivity,
-            "gripper_close": np.asarray([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0]) * self.joint_sensitivity,
-        }
-        self._INPUT_KEY_MAPPING = {
-            "W": "forward",
-            "S": "backward",
-            "A": "left",
-            "D": "right",
-            "Q": "up",
-            "E": "down",
-            "K": "rotate_up",
-            "I": "rotate_down",
-            "J": "rotate_left",
-            "L": "rotate_right",
-            "U": "gripper_open",
-            "O": "gripper_close",
-        }
+        self._delta_action[:] = 0.0
\ No newline at end of file
diff --git a/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py b/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py
index 7e821ec1..1eee2c57 100644
--- a/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py
+++ b/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py
@@ -16,4 +16,6 @@ class RecorderEnhanceDirectRLEnvCfg(DirectRLEnvCfg):
     manual_terminate: bool = False
     """Whether enable manual terminate in this env. Set it to True when teleoperating."""
     return_success_status: bool = False
-    """When manual_terminate is True, _get_dones() will return this value as done"""
+    """When manual_terminate or auto_terminate is True, _get_dones() will return this value as done"""
+    auto_terminate: bool = False
+    """Whether enable auto terminate in this env. Set it to True when using state machine."""
\ No newline at end of file
diff --git a/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py b/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
index e9680890..ecf4f997 100644
--- a/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
+++ b/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
@@ -83,7 +83,7 @@ def __post_init__(self) -> None:
 
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
-        if teleop_device in ["keyboard", "gamepad"]:
+        if teleop_device in ["keyboard", "gamepad", "so101_state_machine"]:
             self.scene.robot.spawn.rigid_props.disable_gravity = True
 
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
diff --git a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
index 8850cf0a..b11664a3 100644
--- a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
@@ -194,7 +194,7 @@ def __post_init__(self) -> None:
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
         self.actions = init_action_cfg(self.actions, device=teleop_device)
-        if teleop_device in ["keyboard", "gamepad"]:
+        if teleop_device in ["keyboard", "gamepad", "so101_state_machine"]:
             self.scene.robot.spawn.rigid_props.disable_gravity = True
 
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
diff --git a/source/leisaac/leisaac/utils/env_utils.py b/source/leisaac/leisaac/utils/env_utils.py
index 57e70d59..b5342ccd 100644
--- a/source/leisaac/leisaac/utils/env_utils.py
+++ b/source/leisaac/leisaac/utils/env_utils.py
@@ -5,7 +5,7 @@ def dynamic_reset_gripper_effort_limit_sim(env, teleop_device):
     need_to_set = []
     if teleop_device == "bi-so101leader":
         need_to_set = [env.scene.articulations["left_arm"], env.scene.articulations["right_arm"]]
-    elif teleop_device in ["so101leader", "keyboard", "auto_generate"]:
+    elif teleop_device in ["so101leader", "keyboard", "so101_state_machine"]:
         need_to_set = [env.scene["robot"]]
     for arm in need_to_set:
         write_gripper_effort_limit_sim(env, arm)

From 3b78d34927e7d1665e16198f7182bbabbba3f02a Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 20:54:47 -0500
Subject: [PATCH 07/68] Add auto_terminate.

---
 .../environments/state_machine/pick_orange.py | 62 +++++++++----------
 .../tasks/template/direct/bi_arm_env.py       |  2 +-
 2 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 203cfbc4..e9d26f61 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -29,24 +29,41 @@
 app_launcher = AppLauncher(app_launcher_args)
 simulation_app = app_launcher.app
 
-def load_extensions():
-    app = omni.kit.app.get_app()
-    ext_mgr = app.get_extension_manager()
-    ext_mgr.set_extension_enabled_immediate("omni.isaac.motion_generation", True)
-    ext_mgr.set_extension_enabled_immediate("omni.isaac.core", True)
-
-load_extensions()
-
 from leisaac.enhance.managers import StreamingRecorderManager, EnhanceDatasetExportMode
 from leisaac.tasks.pick_orange.mdp import task_done
 from isaaclab.managers import DatasetExportMode, TerminationTermCfg
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
 from isaaclab_tasks.utils import parse_env_cfg
 
-import omni.usd
-from pxr import Usd, UsdGeom, UsdPhysics
+class RateLimiter:
+    """Convenience class for enforcing rates in loops."""
+
+    def __init__(self, hz):
+        """
+        Args:
+            hz (int): frequency to enforce
+        """
+        self.hz = hz
+        self.last_time = time.time()
+        self.sleep_duration = 1.0 / hz
+        self.render_period = min(0.0166, self.sleep_duration)
+
+    def sleep(self, env):
+        """Attempt to sleep at the specified rate in hz."""
+        next_wakeup_time = self.last_time + self.sleep_duration
+        while time.time() < next_wakeup_time:
+            time.sleep(self.render_period)
+            env.sim.render()
+
+        self.last_time = self.last_time + self.sleep_duration
+
+        # detect time jumping forwards (e.g. loop is too slow)
+        if self.last_time < time.time():
+            while self.last_time < time.time():
+                self.last_time += self.sleep_duration
+
 
-def manual_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
+def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
     """
     Programmatically mark the current episode as success or failure.
 
@@ -77,23 +94,6 @@ def manual_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         # fallback for some Direct envs
         env.cfg.return_success_status = success
     return False
-# ---- minimal helpers ----
-def auto_fix_collision_issues():
-    stage = omni.usd.get_context().get_stage()
-    if not stage:
-        return
-    for prim in stage.Traverse():
-        name = prim.GetName()
-        if "handle" in name or "drawer" in name or "board" in name:
-            if not prim.IsA(UsdGeom.Mesh):
-                continue
-            collision_api = UsdPhysics.MeshCollisionAPI(prim)
-            if not collision_api:
-                collision_api = UsdPhysics.MeshCollisionAPI.Apply(prim)
-            approx_attr = collision_api.GetApproximationAttr()
-            current_val = approx_attr.Get()
-            if current_val != "convexDecomposition":
-                approx_attr.Set("convexDecomposition")
 
 from isaaclab.utils.math import quat_inv, quat_apply
 
@@ -234,8 +234,6 @@ def main():
     # create env
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
 
-    # (optional) fix USD collisions
-    auto_fix_collision_issues()
     resume_recorded_demo_count = 0
     
     if args_cli.record:
@@ -517,12 +515,12 @@ def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tenso
             # ✅ 只有成功时，才“告诉 recorder 这一轮要存”
             if args_cli.record and success:
                 print("✅ 任务成功，标记本次演示为 SUCCESS")
-                manual_terminate(env, True)
+                auto_terminate(env, True)
                 print("SUCCESS!!!!!!!")
                 current_recorded_demo_count += 1
             else:
                 print("❌ 任务失败，标记本次演示为 FAILURE")
-                manual_terminate(env, False)
+                auto_terminate(env, False)
                 
             if (
                 args_cli.record
diff --git a/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py b/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
index 69ff3c8b..d7fd3668 100644
--- a/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
+++ b/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
@@ -176,7 +176,7 @@ def _get_rewards(self) -> torch.Tensor:
         return 0.0
 
     def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
-        if self.cfg.manual_terminate and self.cfg.return_success_status:
+        if (self.cfg.manual_terminate or self.cfg.auto_terminate) and self.cfg.return_success_status:
             done = torch.ones(self.num_envs, dtype=torch.bool, device=self.device)
         else:
             done = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)

From 47d8504282f0c1e9823d3074528235f4a69ab199 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 20:56:32 -0500
Subject: [PATCH 08/68] Add auto_terminate.

---
 scripts/environments/state_machine/pick_orange.py              | 2 +-
 source/leisaac/leisaac/tasks/template/direct/single_arm_env.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index e9d26f61..cf571ad0 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -95,7 +95,7 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.cfg.return_success_status = success
     return False
 
-from isaaclab.utils.math import quat_inv, quat_apply
+from isaaclab.utils.math import quat_apply
 
 def is_grasp_phase_by_step(step_count):
     return None
diff --git a/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py b/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
index ecf4f997..aa7ea7a6 100644
--- a/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
+++ b/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
@@ -155,7 +155,7 @@ def _get_rewards(self) -> torch.Tensor:
         return 0.0
 
     def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
-        if self.cfg.manual_terminate and self.cfg.return_success_status:
+        if (self.cfg.manual_terminate or self.cfg.auto_terminate) and self.cfg.return_success_status:
             done = torch.ones(self.num_envs, dtype=torch.bool, device=self.device)
         else:
             done = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)

From 2e448cf47679c142ebc298cb2da18aff15320f07 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 21:41:26 -0500
Subject: [PATCH 09/68] Add Description.

---
 .../environments/state_machine/pick_orange.py | 163 ++----------------
 1 file changed, 10 insertions(+), 153 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index cf571ad0..8d5b821a 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -1,3 +1,6 @@
+"""Script to generate data using state machine with leisaac manipulation environments."""
+
+"""Launch Isaac Sim Simulator first."""
 import multiprocessing
 if multiprocessing.get_start_method() != "spawn":
     multiprocessing.set_start_method("spawn", force=True)
@@ -10,8 +13,8 @@
 import gymnasium as gym
 import omni.kit.app
 
-# ---- CLI ----
-parser = argparse.ArgumentParser(description="leisaac data generation - Lula minimal")
+# add argparse arguments
+parser = argparse.ArgumentParser(description="leisaac data generation script for pick_orange task.")
 parser.add_argument("--num_envs", type=int, default=1)
 parser.add_argument("--task", type=str, default=None)
 parser.add_argument("--seed", type=int, default=None)
@@ -302,119 +305,8 @@ def main():
             print("Warning: robot_art helper methods for stiffness/damping not available. If you need to set them, check robot_art API or the asset YAML.")
 
     # rate limiter
-    rate_limiter = None
-    if args_cli.step_hz:
-        rate_limiter = type("R", (), {"hz": args_cli.step_hz, "sleep": lambda self, e: time.sleep(1.0/self.hz)})()
+    rate_limiter = RateLimiter(args_cli.step_hz)
     step_count = 0
-
-    prev_gripper_target = torch.full((env.num_envs, 1), 1.0, device=env.device)
-
-    
-
-    max_gripper_delta_normal = 0.04
-    max_gripper_delta_during_grasp = 0.01
-
-    # simple step-based fallback
-
-    # auto detector: distance + relative speed + gripper-target
-    def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tensor,
-                            dist_thresh: float = 0.06, rel_vel_thresh: float = 0.08, gripper_close_threshold: float = 0.7):
-        """
-        Return (is_grasp: bool, info: dict).
-        - device-safe: newly created tensors follow the device of scene tensors (cuda or cpu).
-        - prev_gripper_t should be a tensor on the env device (if not, we'll move it).
-        """
-        info = {}
-        try:
-            orange = env_local.scene[orange_name]
-            orange_pos = None
-            device = None
-
-            # obtain orange pos and device
-            if hasattr(orange.data, "root_pos_w"):
-                orange_pos = orange.data.root_pos_w  # (num_envs,3) tensor
-                device = orange_pos.device
-
-            # fallback device: try env_local.device or cpu
-            if device is None:
-                device = getattr(env_local, "device", torch.device("cpu"))
-
-            # robot and ee
-            robot_local = env_local.scene["robot"]
-            try:
-                ee_idx = robot_local.data.body_names.index("gripper")
-            except Exception:
-                ee_idx = 0
-
-            # ee pos tensor (ensure on same device)
-            ee_pos = robot_local.data.body_state_w[:, ee_idx, :3]  # (num_envs,3)
-            if ee_pos is not None:
-                device = ee_pos.device
-
-            # if orange_pos missing -> cannot detect
-            if orange_pos is None:
-                return False, {"error": "orange pos not available"}
-
-            # horizontal / full distance (tensors on same device)
-            horiz_dist = torch.norm((orange_pos - ee_pos)[:, :2], dim=-1)  # (num_envs,)
-            full_dist = torch.norm((orange_pos - ee_pos), dim=-1)
-            info['horiz_dist'] = horiz_dist.detach().cpu().numpy().tolist()
-            info['full_dist'] = full_dist.detach().cpu().numpy().tolist()
-
-            # read linear velocities if available; default zeros on correct device
-            obj_speed = torch.zeros((orange_pos.shape[0],), device=device)
-            ee_speed = torch.zeros((orange_pos.shape[0],), device=device)
-            if hasattr(orange.data, "root_linvel_w"):
-                try:
-                    obj_v = orange.data.root_linvel_w  # (num_envs,3)
-                    obj_speed = torch.norm(obj_v, dim=-1).to(device)
-                except Exception:
-                    pass
-            if hasattr(robot_local.data, "body_linvel_w"):
-                try:
-                    v_ee = robot_local.data.body_linvel_w[:, ee_idx, :]  # (num_envs,3)
-                    ee_speed = torch.norm(v_ee, dim=-1).to(device)
-                except Exception:
-                    pass
-            rel_speed = torch.minimum(obj_speed, ee_speed)
-            info['obj_speed'] = obj_speed.detach().cpu().numpy().tolist() if obj_speed.numel() else None
-            info['ee_speed'] = ee_speed.detach().cpu().numpy().tolist() if ee_speed.numel() else None
-            info['rel_speed'] = rel_speed.detach().cpu().numpy().tolist()
-
-            # gripper target: ensure prev_gripper_t is on same device
-            try:
-                if not isinstance(prev_gripper_t, torch.Tensor):
-                    # try to coerce
-                    prev_gripper_t = torch.as_tensor(prev_gripper_t, device=device)
-                else:
-                    prev_gripper_t = prev_gripper_t.to(device)
-                gripper_target = prev_gripper_t.view(-1).detach().cpu().numpy().tolist()
-            except Exception:
-                gripper_target = None
-            info['gripper_target'] = gripper_target
-
-            # Decide masks (create boolean tensors on same device)
-            near_mask = (horiz_dist <= torch.as_tensor(dist_thresh, device=device))
-            slow_mask = (rel_speed <= torch.as_tensor(rel_vel_thresh, device=device))
-
-            if gripper_target is not None:
-                # gripper_target is a python list; build a boolean tensor on device
-                try:
-                    gripper_mask = torch.tensor([gt < gripper_close_threshold for gt in gripper_target], dtype=torch.bool, device=device)
-                except Exception:
-                    gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
-            else:
-                gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
-
-            is_grasp_tensor = near_mask & slow_mask & gripper_mask
-            # convert to python bool for single-env; if multi-env return list
-            is_grasp_list = is_grasp_tensor.detach().cpu().numpy().tolist()
-            is_grasp_any = any(is_grasp_list)
-            return is_grasp_any, info
-
-        except Exception as e:
-            return False, {"error": str(e)}
-
     flag = 1  # which orange to pick
     start_record_state = False
     # main loop (NO pos/quaternion smoothing; gripper still clamped)
@@ -450,44 +342,6 @@ def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tenso
             except Exception as e:
                 print("Failed to start recorder_manager automatically:", e)
         actions = get_expert_action_pose_based(env, step_count, target=f"Orange00{flag}", flag=flag)
-        # 强制把 prev_gripper_target 设为你刚才计算出的 desired（从 actions 取）
-        # 这会让平滑阶段认为上次目标就是现在的目标，从而直接生效
-        prev_gripper_target = actions[:, -1].clone().unsqueeze(-1)
-
-        # ensure dtype/device
-        actions = actions.to(device=env.device, dtype=torch.float32)
-
-        # Decide grasp phase (auto or fallback)
-        is_grasp_auto, grasp_info = is_grasp_phase_auto(env, f"Orange00{flag}", prev_gripper_target,
-                                                       dist_thresh=0.06, rel_vel_thresh=0.08, gripper_close_threshold=0.7)
-        if not is_grasp_auto:
-            is_grasp = is_grasp_phase_by_step(step_count)
-        else:
-            is_grasp = is_grasp_auto
-
-        # --- GRIPPER: clamp per-frame to avoid instant violent close ---
-        desired_gripper = actions[:, -1].clone().unsqueeze(-1)  # (num_envs,1)
-        cur_gripper = prev_gripper_target  # previous target
-        # use different delta in grasp vs normal
-        if isinstance(is_grasp, bool):
-            if is_grasp:
-                use_gripper_delta = torch.full((env.num_envs,), max_gripper_delta_during_grasp, device=env.device, dtype=torch.float32)
-            else:
-                use_gripper_delta = torch.full((env.num_envs,), max_gripper_delta_normal, device=env.device, dtype=torch.float32)
-        else:
-            # per-env list-like
-            use_gripper_delta = torch.tensor([max_gripper_delta_during_grasp if g else max_gripper_delta_normal for g in (is_grasp if hasattr(is_grasp, "__len__") else [is_grasp])], device=env.device, dtype=torch.float32)
-            if use_gripper_delta.numel() != env.num_envs:
-                use_gripper_delta = torch.full((env.num_envs,), max_gripper_delta_normal, device=env.device, dtype=torch.float32)
-
-        delta_gr = torch.clamp(desired_gripper - cur_gripper, -use_gripper_delta.unsqueeze(-1), use_gripper_delta.unsqueeze(-1))
-        smooth_gripper = cur_gripper + delta_gr
-        actions[:, -1] = smooth_gripper.squeeze(-1)
-        prev_gripper_target = smooth_gripper.clone()
-
-        # --- POSITION / QUATERNION: NO SMOOTHING, directly send ---
-        # action layout expected: [pos_x,pos_y,pos_z, quat_x,quat_y,quat_z,quat_w, gripper]
-        smoothed_actions = actions.to(device=env.device, dtype=torch.float32)
 
         step_count += 1
         
@@ -556,13 +410,16 @@ def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tenso
             # --- 更新 step_count/flag 控制逻辑（和你原来逻辑一致） ---
             flag += 1
         else:
-            env.step(smoothed_actions)
+            env.step(actions)
             pass
+        if rate_limiter:
+            rate_limiter.sleep(env)
         
     env.close()
     simulation_app.close()
 
 
 if __name__ == "__main__":
+    # run the main function
     main()
 

From 0e5f0081c2dae93616ff74bc0d562613025802de Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 21:50:35 -0500
Subject: [PATCH 10/68] State Machinecode refactoring.

---
 .../environments/state_machine/pick_orange.py | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 8d5b821a..171abd42 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -2,16 +2,15 @@
 
 """Launch Isaac Sim Simulator first."""
 import multiprocessing
+
 if multiprocessing.get_start_method() != "spawn":
     multiprocessing.set_start_method("spawn", force=True)
-
 import argparse
+
 import os
 import time
-import torch
+
 from isaaclab.app import AppLauncher
-import gymnasium as gym
-import omni.kit.app
 
 # add argparse arguments
 parser = argparse.ArgumentParser(description="leisaac data generation script for pick_orange task.")
@@ -38,6 +37,11 @@
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
 from isaaclab_tasks.utils import parse_env_cfg
 
+from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+
+import torch
+import gymnasium as gym
+
 class RateLimiter:
     """Convenience class for enforcing rates in loops."""
 
@@ -98,10 +102,6 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.cfg.return_success_status = success
     return False
 
-from isaaclab.utils.math import quat_apply
-
-def is_grasp_phase_by_step(step_count):
-    return None
 def get_expert_action_pose_based(env, step_count, target, flag):
     device = env.device
     num_envs = env.num_envs
@@ -113,7 +113,6 @@ def get_expert_action_pose_based(env, step_count, target, flag):
     target_pos_w = orange_pos_w.clone()
     
     import math
-    from isaaclab.utils.math import quat_from_euler_xyz, quat_mul, quat_inv
     pitch = math.radians(0)
 
     target_quat_w = quat_from_euler_xyz(
@@ -191,10 +190,9 @@ def apply_triangle_offset(pos_tensor, flag, radius=0.1):
     actions = torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
     return actions
 
-        
-def main():
+MAX_STEPS = 420
 
-    MAX_STEPS = 420
+def main() -> None:
     # prepare
     task_name = getattr(args_cli, "task", None)
     env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs)

From 9750b3d36a7481c13cf83aef861b97a5133b9bfe Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 22:53:29 -0500
Subject: [PATCH 11/68] State Machinecode refactoring.

---
 .../environments/state_machine/pick_orange.py | 196 ++++++++----------
 .../leisaac/devices/auto_generate/__init__.py |   1 -
 .../auto_generate/so101_auto_generate.py      |  36 ----
 3 files changed, 92 insertions(+), 141 deletions(-)
 delete mode 100644 source/leisaac/leisaac/devices/auto_generate/__init__.py
 delete mode 100644 source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 171abd42..90b9fd67 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -23,6 +23,9 @@
 parser.add_argument("--resume", action="store_true")
 parser.add_argument("--num_demos", type=int, default=1)
 parser.add_argument("--quality", action="store_true")
+parser.add_argument("--use_lerobot_recorder", action="store_true", help="whether to use lerobot recorder.")
+parser.add_argument("--lerobot_dataset_repo_id", type=str, default=None, help="Lerobot Dataset repository ID.")
+parser.add_argument("--lerobot_dataset_fps", type=int, default=30, help="Lerobot Dataset frames per second.")
 
 AppLauncher.add_app_launcher_args(parser)
 args_cli = parser.parse_args()
@@ -102,7 +105,7 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.cfg.return_success_status = success
     return False
 
-def get_expert_action_pose_based(env, step_count, target, flag):
+def get_expert_action_pose_based(env, step_count, target, orange_now):
     device = env.device
     num_envs = env.num_envs
     orange_pos_w = env.scene[target].data.root_pos_w.clone()
@@ -125,16 +128,16 @@ def get_expert_action_pose_based(env, step_count, target, flag):
         quat_inv(robot_base_quat_w),
         target_quat_w
     )
-    def apply_triangle_offset(pos_tensor, flag, radius=0.1):
+    def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
         """
         给位置张量的 x, y 轴增加等边三角形偏移量。
         Args:
             pos_tensor: (num_envs, 3) 的 target_pos_w 张量
-            flag: 当前是第几个橙子 (1, 2, 3)
+            orange_now: 当前是第几个橙子 (1, 2, 3)
             radius: 距离中心的半径 (默认 0.2米)
         """
 
-        idx = (flag - 1) % 3 
+        idx = (orange_now - 1) % 3 
         angle = idx * (2 * math.pi / 3)
         
         offset_x = radius * math.cos(angle)
@@ -169,17 +172,17 @@ def apply_triangle_offset(pos_tensor, flag, radius=0.1):
     elif step_count < 350:
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:,2] += GRIPPER + 0.1
-        apply_triangle_offset(target_pos_w, flag)
+        apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = -1.0
     elif step_count < 380:
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:,2] += GRIPPER + 0.1
-        apply_triangle_offset(target_pos_w, flag)
+        apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = 1.0
     elif step_count < 420:
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:,2] += 0.2
-        apply_triangle_offset(target_pos_w, flag)
+        apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = 1.0
     else:
         gripper_cmd[:] = 1.0
@@ -193,26 +196,57 @@ def apply_triangle_offset(pos_tensor, flag, radius=0.1):
 MAX_STEPS = 420
 
 def main() -> None:
-    # prepare
-    task_name = getattr(args_cli, "task", None)
-    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs)
-    env_cfg.use_teleop_device("so101_state_machine")
+    """
+    Run a pick-orange state machine in an Isaac Lab manipulation environment.
+
+    Creates the environment, initializes the pick-and-place state machine for
+    picking an orange, and runs the main simulation loop until the application
+    is closed.
+
+    Returns:
+        None
+    """
     output_dir = os.path.dirname(args_cli.dataset_file)
     output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
 
+    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs)
+    env_cfg.use_teleop_device("so101_state_machine")
+    task_name = args_cli.task   
+    is_direct_env = "Direct" in task_name
+    
+    # timeout and terminate preprocess
+    if is_direct_env:
+        env_cfg.never_time_out = True
+        env_cfg.manual_terminate = True
+    else:
+        # modify configuration
+        if hasattr(env_cfg.terminations, "time_out"):
+            env_cfg.terminations.time_out = None
+        if hasattr(env_cfg.terminations, "success"):
+            env_cfg.terminations.success = None
+            
+    # recorder preprocess & manual success terminate preprocess
     if args_cli.record:
-        if args_cli.resume:
-            env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
-            assert os.path.exists(args_cli.dataset_file)
+        if args_cli.use_lerobot_recorder:
+            if args_cli.resume:
+                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_SUCCEEDED_ONLY_RESUME
+            else:
+                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_SUCCEEDED_ONLY
         else:
-            env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
-            assert not os.path.exists(args_cli.dataset_file)
+            if args_cli.resume:
+                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
+                assert os.path.exists(
+                    args_cli.dataset_file
+                ), "the dataset file does not exist, please don't use '--resume' if you want to record a new dataset"
+            else:
+                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+                assert not os.path.exists(
+                    args_cli.dataset_file
+                ), "the dataset file already exists, please use '--resume' to resume recording"
         env_cfg.recorders.dataset_export_dir_path = output_dir
         env_cfg.recorders.dataset_filename = output_file_name
-
-        is_direct_env = "Direct" in task_name
         if is_direct_env:
             env_cfg.return_success_status = False
         else:
@@ -223,96 +257,50 @@ def main() -> None:
             )
     else:
         env_cfg.recorders = None
-
-
-    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
-    task_name = args_cli.task
-
-    if args_cli.quality:
-        env_cfg.sim.render.antialiasing_mode = "FXAA"
-        env_cfg.sim.render.rendering_mode = "quality"
-
+        
     # create env
+    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
-
-    resume_recorded_demo_count = 0
     
     if args_cli.record:
-        try:
-            del env.recorder_manager
-        except Exception:
-            pass
-        print("Setting up recorder_manager for recording.")
-        env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
-        env.recorder_manager.flush_steps = 100
-        env.recorder_manager.compression = "lzf"
-        if args_cli.resume:
-            resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
-            print(f"Resume recording from existing dataset file with {resume_recorded_demo_count} demonstrations.")
-        current_recorded_demo_count = resume_recorded_demo_count
+        del env.recorder_manager
+        if args_cli.use_lerobot_recorder:
+            from leisaac.enhance.datasets.lerobot_dataset_handler import (
+                LeRobotDatasetCfg,
+            )
+            from leisaac.enhance.managers.lerobot_recorder_manager import (
+                LeRobotRecorderManager,
+            )
 
+            dataset_cfg = LeRobotDatasetCfg(
+                repo_id=args_cli.lerobot_dataset_repo_id,
+                fps=args_cli.lerobot_dataset_fps,
+            )
+            env.recorder_manager = LeRobotRecorderManager(env_cfg.recorders, dataset_cfg, env)
+        else:
+            env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
+            env.recorder_manager.flush_steps = 100
+            env.recorder_manager.compression = "lzf"
+            
+    # rate limiter        
+    rate_limiter = RateLimiter(args_cli.step_hz)
+    
     # init / reset
     if hasattr(env, "initialize"):
         env.initialize()
     env.reset()
-    # Print debug values right after reset to verify randomization actually applied to physics
-    try:
-        target_name = f"Orange00{flag}"
-        print("=== Post-reset diagnostics ===")
-        print("physics_dt:", getattr(env, "physics_dt", None))
-        if target_name in env.scene:
-            phys_pos = env.scene[target_name].data.root_pos_w.clone()
-            print(f"{target_name} physics root_pos_w:", phys_pos[0].cpu().numpy() if phys_pos.ndim>1 else phys_pos.cpu().numpy())
-        else:
-            print(f"{target_name} not found in scene after reset")
-        # also print robot base to ensure base pose consistent
-        if "robot" in env.scene:
-            rb = env.scene["robot"].data.root_pos_w.clone()
-            print("robot root_pos_w:", rb[0].cpu().numpy() if rb.ndim>1 else rb.cpu().numpy())
-        print("===============================")
-    except Exception as e:
-        print("Post-reset diagnostic failed:", e)
-    # try to write lower stiffness / higher damping to sim (best-effort)
-    try:
-        # get joint names / count for printing
-        robot_prim = env.scene["robot"]
-        joint_names = robot_prim.data.joint_names if hasattr(robot_prim, "data") and hasattr(robot_prim.data, "joint_names") else None
-    except Exception:
-        joint_names = None
-
-    if joint_names is not None:
-        print("Detected joints:", joint_names)
-        # example starting values - tune these per-joint as needed
-        n_joints = len(joint_names)
-        # conservative starting point (you can further reduce stiffness)
-        start_stiffness = [1000.0] * n_joints
-        start_damping = [30.0] * n_joints
-    else:
-        start_stiffness = None
-        start_damping = None
-
-    # attempt to write gains into simulation (API differs across versions; best-effort)
-    if start_stiffness is not None:
-        try:
-            # preferred helper if available
-            robot_art.write_joint_stiffness_to_sim(start_stiffness)
-            robot_art.write_joint_damping_to_sim(start_damping)
-            print("Wrote joint stiffness/damping via robot_art helper.")
-        except Exception:
-            # fallback: print warning - user may need to set gains in asset or use other API
-            print("Warning: robot_art helper methods for stiffness/damping not available. If you need to set them, check robot_art API or the asset YAML.")
-
-    # rate limiter
-    rate_limiter = RateLimiter(args_cli.step_hz)
+    
+    resume_recorded_demo_count = 0
+    if args_cli.record and args_cli.resume:
+        resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
+        print(f"Resume recording from existing dataset file with {resume_recorded_demo_count} demonstrations.")
+    current_recorded_demo_count = resume_recorded_demo_count
+    
     step_count = 0
-    flag = 1  # which orange to pick
+    orange_now = 1  # which orange to pick
     start_record_state = False
-    # main loop (NO pos/quaternion smoothing; gripper still clamped)
-    while simulation_app.is_running():
-        # choose Lula-based actions (supports num_envs>1)
-        dt = env.physics_dt
-        env.scene.update(dt)
-        
+    
+    while simulation_app.is_running() and not simulation_app.is_exiting():
         # 放在 while simulation_app.is_running(): 的顶部，紧跟 env.scene.update(dt) 之后或 actions 之前
         if args_cli.record and not start_record_state:
             print("Auto: enabling recorder / start recording state.")
@@ -339,12 +327,12 @@ def main() -> None:
                     print("Warning: env.recorder_manager is None — recorder not created.")
             except Exception as e:
                 print("Failed to start recorder_manager automatically:", e)
-        actions = get_expert_action_pose_based(env, step_count, target=f"Orange00{flag}", flag=flag)
+        actions = get_expert_action_pose_based(env, step_count, target=f"Orange00{orange_now}", orange_now=orange_now)
 
         step_count += 1
         
         from isaaclab.managers import SceneEntityCfg
-        if step_count >= MAX_STEPS and flag >= 3:
+        if step_count >= MAX_STEPS and orange_now >= 3:
             # --- 判断本次 episode 是否被认为成功（使用 env 的 task_done） ---
             try:
                 print(f"完成一轮（{step_count} 步）, 检查任务成功状态...")
@@ -394,19 +382,19 @@ def main() -> None:
             
             # --- bookkeeping: 更新 recorder 导出的计数并判断是否达到 --num_demos ---
             env.reset()
-            flag = 1
+            orange_now = 1
             step_count = 0    
             if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
                 print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
                 break
-        elif step_count >= MAX_STEPS and flag < 3:
+        elif step_count >= MAX_STEPS and orange_now < 3:
             step_count = 0
 
             # --- 不论成功与否，都重置环境（你的要求） ---
-            print(f"完成一轮（{step_count} 步）, 重置环境，准备下一个橙子 (flag {flag} -> {flag+1})")
+            print(f"完成一轮（{step_count} 步）, 重置环境，准备下一个橙子 (orange_now {orange_now} -> {orange_now+1})")
 
-            # --- 更新 step_count/flag 控制逻辑（和你原来逻辑一致） ---
-            flag += 1
+            # --- 更新 step_count/orange_now 控制逻辑（和你原来逻辑一致） ---
+            orange_now += 1
         else:
             env.step(actions)
             pass
diff --git a/source/leisaac/leisaac/devices/auto_generate/__init__.py b/source/leisaac/leisaac/devices/auto_generate/__init__.py
deleted file mode 100644
index f0a78f4f..00000000
--- a/source/leisaac/leisaac/devices/auto_generate/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .so101_state_machine import SO101Statemachine
diff --git a/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py b/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
deleted file mode 100644
index 2da171a3..00000000
--- a/source/leisaac/leisaac/devices/auto_generate/so101_auto_generate.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import carb
-import numpy as np
-
-from ..device_base import Device
-
-
-class SO101Statemachine(Device):
-    """
-    A statemachine controller to generate poses for so101 single arm.
-    """
-
-    def __init__(self, env, sensitivity: float = 1.0):
-        super().__init__(env, "so101_state_machine")
-
-        # store inputs
-        self.pos_sensitivity = 0.01 * sensitivity
-        self.joint_sensitivity = 0.15 * sensitivity
-        self.rot_sensitivity = 0.15 * sensitivity
-
-        # command buffers (dx, dy, dz, droll, dpitch, dyaw, d_shoulder_pan, d_gripper)
-        self._delta_action = np.zeros(8)
-
-        # initialize the target frame
-        self.asset_name = "robot"
-        self.robot_asset = self.env.scene[self.asset_name]
-
-        self.target_frame = "gripper"
-        body_idxs, _ = self.robot_asset.find_bodies(self.target_frame)
-        self.target_frame_idx = body_idxs[0]
-
-
-    def get_device_state(self):
-        return self._convert_delta_from_frame(self._delta_action)
-
-    def reset(self):
-        self._delta_action[:] = 0.0
\ No newline at end of file

From 34be0024e2d7826427982115bed2befc6a4946f0 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 22:58:49 -0500
Subject: [PATCH 12/68] State Machinecode refactoring.

---
 .../environments/state_machine/pick_orange.py | 34 +++----------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 90b9fd67..0e6f2241 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -34,13 +34,13 @@
 app_launcher = AppLauncher(app_launcher_args)
 simulation_app = app_launcher.app
 
-from leisaac.enhance.managers import StreamingRecorderManager, EnhanceDatasetExportMode
-from leisaac.tasks.pick_orange.mdp import task_done
 from isaaclab.managers import DatasetExportMode, TerminationTermCfg
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
 from isaaclab_tasks.utils import parse_env_cfg
-
 from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+from leisaac.enhance.managers import StreamingRecorderManager, EnhanceDatasetExportMode
+from leisaac.tasks.pick_orange.mdp import task_done
+from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
 
 import torch
 import gymnasium as gym
@@ -271,7 +271,6 @@ def main() -> None:
             from leisaac.enhance.managers.lerobot_recorder_manager import (
                 LeRobotRecorderManager,
             )
-
             dataset_cfg = LeRobotDatasetCfg(
                 repo_id=args_cli.lerobot_dataset_repo_id,
                 fps=args_cli.lerobot_dataset_fps,
@@ -302,31 +301,8 @@ def main() -> None:
     
     while simulation_app.is_running() and not simulation_app.is_exiting():
         # 放在 while simulation_app.is_running(): 的顶部，紧跟 env.scene.update(dt) 之后或 actions 之前
-        if args_cli.record and not start_record_state:
-            print("Auto: enabling recorder / start recording state.")
-            start_record_state = True
-            # Best-effort: call recorder start/enable APIs if available
-            try:
-                rm = getattr(env, "recorder_manager", None)
-                if rm is not None:
-                    # try common start-like method names across versions
-                    for meth in ("start", "start_recording", "begin_record", "enable", "start_capture"):
-                        if hasattr(rm, meth) and callable(getattr(rm, meth)):
-                            print(f"Calling recorder_manager.{meth}()")
-                            getattr(rm, meth)()
-                            break
-                    # also try to ensure flush_steps/compression are set
-                    try:
-                        if hasattr(rm, "flush_steps"):
-                            rm.flush_steps = getattr(rm, "flush_steps", 100)
-                        if hasattr(rm, "compression"):
-                            rm.compression = getattr(rm, "compression", "lzf")
-                    except Exception:
-                        pass
-                else:
-                    print("Warning: env.recorder_manager is None — recorder not created.")
-            except Exception as e:
-                print("Failed to start recorder_manager automatically:", e)
+        if env.cfg.dynamic_reset_gripper_effort_limit:
+            dynamic_reset_gripper_effort_limit_sim(env, args_cli.teleop_device)
         actions = get_expert_action_pose_based(env, step_count, target=f"Orange00{orange_now}", orange_now=orange_now)
 
         step_count += 1

From d590d276893d3e61c9c8a157df590612ba8dfb38 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 23:00:23 -0500
Subject: [PATCH 13/68] State Machinecode refactoring.

---
 scripts/environments/state_machine/pick_orange.py            | 2 +-
 source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 0e6f2241..a7c09384 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -38,6 +38,7 @@
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
 from isaaclab_tasks.utils import parse_env_cfg
 from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+from isaaclab.managers import SceneEntityCfg
 from leisaac.enhance.managers import StreamingRecorderManager, EnhanceDatasetExportMode
 from leisaac.tasks.pick_orange.mdp import task_done
 from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
@@ -307,7 +308,6 @@ def main() -> None:
 
         step_count += 1
         
-        from isaaclab.managers import SceneEntityCfg
         if step_count >= MAX_STEPS and orange_now >= 3:
             # --- 判断本次 episode 是否被认为成功（使用 env 的 task_done） ---
             try:
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py
index e3750d62..de747bfc 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/mdp/terminations.py
@@ -50,9 +50,9 @@ def task_done(
         done = torch.logical_and(done, orange_y > plate_y + y_range[0])
         done = torch.logical_and(done, orange_height < plate_height + height_range[1])
         done = torch.logical_and(done, orange_height > plate_height + height_range[0])
-    """
+
     joint_pos = env.scene["robot"].data.joint_pos
     joint_names = env.scene["robot"].data.joint_names
     done = torch.logical_and(done, is_so101_at_rest_pose(joint_pos, joint_names))
-    """
+
     return done

From b8c97a983a9b30b93060f541ee839d2bc4841245 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 23:05:45 -0500
Subject: [PATCH 14/68] State Machinecode refactoring.

---
 .../environments/state_machine/pick_orange.py   | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index a7c09384..415ae764 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -325,9 +325,12 @@ def main() -> None:
                 success = bool(success_tensor.all().item())
                 print("任务成功状态:", success)
             except Exception as e:
-                print("task_done failed:", e)
+                print("Task failed due to:", e)
                 success = False
-
+            if start_record_state:
+                if args_cli.record:
+                    print("Stop Recording!!!")
+                start_record_state = False
             # ✅ 只有成功时，才“告诉 recorder 这一轮要存”
             if args_cli.record and success:
                 print("✅ 任务成功，标记本次演示为 SUCCESS")
@@ -372,15 +375,19 @@ def main() -> None:
             # --- 更新 step_count/orange_now 控制逻辑（和你原来逻辑一致） ---
             orange_now += 1
         else:
+            if not start_record_state:
+                if args_cli.record:
+                    print("Start Recording!!!")
+                start_record_state = True
             env.step(actions)
-            pass
         if rate_limiter:
             rate_limiter.sleep(env)
-        
+            
+    if args_cli.record and hasattr(env.recorder_manager, "finalize"):
+        env.recorder_manager.finalize()    
     env.close()
     simulation_app.close()
 
-
 if __name__ == "__main__":
     # run the main function
     main()

From 4567af780f807fb42d5ee337248b40e708842fa8 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Tue, 27 Jan 2026 23:29:36 -0500
Subject: [PATCH 15/68] State Machinecode refactoring.

---
 .../environments/state_machine/pick_orange.py | 215 +++++++++++-------
 1 file changed, 136 insertions(+), 79 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 415ae764..0b212f66 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -106,16 +106,47 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.cfg.return_success_status = success
     return False
 
-def get_expert_action_pose_based(env, step_count, target, orange_now):
+def state_machine(env, step_count, target, orange_now):
+    """
+    Finite state machine for a pick-and-place task with a robot gripper.
+
+    This function generates low-level action commands based on the current
+    simulation step. The robot approaches an orange, grasps it, lifts it,
+    moves it above a plate, and places it at one vertex of an equilateral
+    triangle arrangement.
+
+    Args:
+        env:
+            The simulation environment instance. It is expected to provide
+            access to scene objects, device information, and number of parallel
+            environments.
+        step_count (int):
+            The current timestep of the episode, used to switch between
+            different phases of the state machine.
+        target (str):
+            The name of the target object (e.g., an orange) in the scene.
+        orange_now (int):
+            Index of the current orange being placed (1, 2, or 3). This
+            determines the triangle offset on the plate.
+
+    Returns:
+        torch.Tensor:
+            Action tensor of shape (num_envs, action_dim), containing:
+            - target position in the robot base frame (x, y, z)
+            - target orientation as a quaternion (x, y, z, w)
+            - gripper command (open / close)
+    """
+
     device = env.device
     num_envs = env.num_envs
+
     orange_pos_w = env.scene[target].data.root_pos_w.clone()
     plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
     robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
     robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
 
     target_pos_w = orange_pos_w.clone()
-    
+
     import math
     pitch = math.radians(0)
 
@@ -129,73 +160,88 @@ def get_expert_action_pose_based(env, step_count, target, orange_now):
         quat_inv(robot_base_quat_w),
         target_quat_w
     )
+
     def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
         """
-        给位置张量的 x, y 轴增加等边三角形偏移量。
+        Apply an equilateral triangle offset on the x-y plane.
+
+        The offset places objects evenly around a circle, forming the vertices
+        of an equilateral triangle. This is used to arrange multiple oranges
+        on the plate without overlap.
+
         Args:
-            pos_tensor: (num_envs, 3) 的 target_pos_w 张量
-            orange_now: 当前是第几个橙子 (1, 2, 3)
-            radius: 距离中心的半径 (默认 0.2米)
+            pos_tensor (torch.Tensor):
+                Position tensor of shape (num_envs, 3) in world coordinates.
+            orange_now (int):
+                Index of the current orange (1, 2, or 3).
+            radius (float, optional):
+                Distance from the center of the plate to each triangle vertex.
+                Defaults to 0.1 meters.
+
+        Returns:
+            torch.Tensor:
+                The input position tensor with the triangle offset applied.
         """
 
-        idx = (orange_now - 1) % 3 
+        idx = (orange_now - 1) % 3
         angle = idx * (2 * math.pi / 3)
-        
+
         offset_x = radius * math.cos(angle)
         offset_y = radius * math.sin(angle)
-        
+
         pos_tensor[:, 0] += offset_x
         pos_tensor[:, 1] += offset_y
-        
+
         return pos_tensor
-    
+
     GRIPPER = 0.1
-    target_pos_w[:,0] -= 0.03
-    
-    gripper_cmd = torch.full((num_envs,1), 1.0, device=device)
+    target_pos_w[:, 0] -= 0.03
+
+    gripper_cmd = torch.full((num_envs, 1), 1.0, device=device)
+
     if step_count < 120:
         gripper_cmd[:] = 1.0
-        target_pos_w[:,2] += 0.1
-        target_pos_w[:,2] += GRIPPER
+        target_pos_w[:, 2] += 0.1 + GRIPPER
     elif step_count < 150:
         gripper_cmd[:] = 1.0
-        target_pos_w[:,2] += GRIPPER
+        target_pos_w[:, 2] += GRIPPER
     elif step_count < 180:
         gripper_cmd[:] = -1.0
-        target_pos_w[:,2] += GRIPPER
+        target_pos_w[:, 2] += GRIPPER
     elif step_count < 220:
         gripper_cmd[:] = -1.0
-        target_pos_w[:,2] += 0.25
+        target_pos_w[:, 2] += 0.25
     elif step_count < 320:
         gripper_cmd[:] = -1.0
         target_pos_w = plate_pos_w.clone()
-        target_pos_w[:,2] += 0.25
+        target_pos_w[:, 2] += 0.25
     elif step_count < 350:
         target_pos_w = plate_pos_w.clone()
-        target_pos_w[:,2] += GRIPPER + 0.1
+        target_pos_w[:, 2] += GRIPPER + 0.1
         apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = -1.0
     elif step_count < 380:
         target_pos_w = plate_pos_w.clone()
-        target_pos_w[:,2] += GRIPPER + 0.1
+        target_pos_w[:, 2] += GRIPPER + 0.1
         apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = 1.0
     elif step_count < 420:
         target_pos_w = plate_pos_w.clone()
-        target_pos_w[:,2] += 0.2
+        target_pos_w[:, 2] += 0.2
         apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = 1.0
     else:
         gripper_cmd[:] = 1.0
-    
-        
+
     diff_w = target_pos_w - robot_base_pos_w
     target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
+
     actions = torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
     return actions
 
-MAX_STEPS = 420
 
+
+MAX_STEPS = 420
 def main() -> None:
     """
     Run a pick-orange state machine in an Isaac Lab manipulation environment.
@@ -213,22 +259,25 @@ def main() -> None:
         os.makedirs(output_dir)
 
     env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs)
-    env_cfg.use_teleop_device("so101_state_machine")
-    task_name = args_cli.task   
+
+    # Temporarily use so101_state_machine as the teleoperation device
+    device = "so101_state_machine"
+    env_cfg.use_teleop_device(device)
+    task_name = args_cli.task
     is_direct_env = "Direct" in task_name
-    
-    # timeout and terminate preprocess
+
+    # Timeout and termination preprocessing
     if is_direct_env:
         env_cfg.never_time_out = True
-        env_cfg.manual_terminate = True
+        env_cfg.auto_terminate = True
     else:
-        # modify configuration
+        # Modify termination configuration
         if hasattr(env_cfg.terminations, "time_out"):
             env_cfg.terminations.time_out = None
         if hasattr(env_cfg.terminations, "success"):
             env_cfg.terminations.success = None
-            
-    # recorder preprocess & manual success terminate preprocess
+
+    # Recorder preprocessing & manual success-termination preprocessing
     if args_cli.record:
         if args_cli.use_lerobot_recorder:
             if args_cli.resume:
@@ -240,12 +289,12 @@ def main() -> None:
                 env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
                 assert os.path.exists(
                     args_cli.dataset_file
-                ), "the dataset file does not exist, please don't use '--resume' if you want to record a new dataset"
+                ), "The dataset file does not exist. Do not use '--resume' when recording a new dataset."
             else:
                 env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
                 assert not os.path.exists(
                     args_cli.dataset_file
-                ), "the dataset file already exists, please use '--resume' to resume recording"
+                ), "The dataset file already exists. Use '--resume' to resume recording."
         env_cfg.recorders.dataset_export_dir_path = output_dir
         env_cfg.recorders.dataset_filename = output_file_name
         if is_direct_env:
@@ -258,20 +307,17 @@ def main() -> None:
             )
     else:
         env_cfg.recorders = None
-        
-    # create env
+
+    # Create environment
     env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
-    
+
     if args_cli.record:
         del env.recorder_manager
         if args_cli.use_lerobot_recorder:
-            from leisaac.enhance.datasets.lerobot_dataset_handler import (
-                LeRobotDatasetCfg,
-            )
-            from leisaac.enhance.managers.lerobot_recorder_manager import (
-                LeRobotRecorderManager,
-            )
+            from leisaac.enhance.datasets.lerobot_dataset_handler import LeRobotDatasetCfg
+            from leisaac.enhance.managers.lerobot_recorder_manager import LeRobotRecorderManager
+
             dataset_cfg = LeRobotDatasetCfg(
                 repo_id=args_cli.lerobot_dataset_repo_id,
                 fps=args_cli.lerobot_dataset_fps,
@@ -281,37 +327,38 @@ def main() -> None:
             env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
             env.recorder_manager.flush_steps = 100
             env.recorder_manager.compression = "lzf"
-            
-    # rate limiter        
+
+    # Rate limiter
     rate_limiter = RateLimiter(args_cli.step_hz)
-    
-    # init / reset
+
+    # Initialize / reset
     if hasattr(env, "initialize"):
         env.initialize()
     env.reset()
-    
+
     resume_recorded_demo_count = 0
     if args_cli.record and args_cli.resume:
         resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
-        print(f"Resume recording from existing dataset file with {resume_recorded_demo_count} demonstrations.")
+        print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
     current_recorded_demo_count = resume_recorded_demo_count
-    
+
     step_count = 0
-    orange_now = 1  # which orange to pick
+    orange_now = 1  # Index of the current orange to pick
     start_record_state = False
-    
+
     while simulation_app.is_running() and not simulation_app.is_exiting():
-        # 放在 while simulation_app.is_running(): 的顶部，紧跟 env.scene.update(dt) 之后或 actions 之前
+        # Place this at the top of the loop, right after env.scene.update(dt)
+        # or before computing actions
         if env.cfg.dynamic_reset_gripper_effort_limit:
-            dynamic_reset_gripper_effort_limit_sim(env, args_cli.teleop_device)
-        actions = get_expert_action_pose_based(env, step_count, target=f"Orange00{orange_now}", orange_now=orange_now)
+            dynamic_reset_gripper_effort_limit_sim(env, device)
 
+        actions = state_machine(env, step_count, target=f"Orange00{orange_now}", orange_now=orange_now)
         step_count += 1
-        
+
         if step_count >= MAX_STEPS and orange_now >= 3:
-            # --- 判断本次 episode 是否被认为成功（使用 env 的 task_done） ---
+            # --- Check whether the current episode is considered successful ---
             try:
-                print(f"完成一轮（{step_count} 步）, 检查任务成功状态...")
+                print(f"Completed one cycle ({step_count} steps). Checking task success status...")
                 success_tensor = task_done(
                     env,
                     oranges_cfg=[
@@ -321,26 +368,28 @@ def main() -> None:
                     ],
                     plate_cfg=SceneEntityCfg("Plate"),
                 )
-                # 多 env 时，我们认为当所有 env 都为 True 才视为 success（根据你的需要可改为 any）
+                # For multiple environments, consider success only if all envs succeed
                 success = bool(success_tensor.all().item())
-                print("任务成功状态:", success)
+                print("Task success status:", success)
             except Exception as e:
                 print("Task failed due to:", e)
                 success = False
+
             if start_record_state:
                 if args_cli.record:
-                    print("Stop Recording!!!")
+                    print("Stop recording.")
                 start_record_state = False
-            # ✅ 只有成功时，才“告诉 recorder 这一轮要存”
+
+            # Only mark the episode as successful if the task succeeds
             if args_cli.record and success:
-                print("✅ 任务成功，标记本次演示为 SUCCESS")
+                print("✅ Task succeeded. Marking this demonstration as SUCCESS.")
                 auto_terminate(env, True)
-                print("SUCCESS!!!!!!!")
+                print("SUCCESS.")
                 current_recorded_demo_count += 1
             else:
-                print("❌ 任务失败，标记本次演示为 FAILURE")
+                print("❌ Task failed. Marking this demonstration as FAILURE.")
                 auto_terminate(env, False)
-                
+
             if (
                 args_cli.record
                 and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
@@ -350,41 +399,49 @@ def main() -> None:
                     env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
                 )
                 print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
+
             if (
                 args_cli.record
                 and args_cli.num_demos > 0
                 and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
                 >= args_cli.num_demos
             ):
-                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
                 break
-            
-            # --- bookkeeping: 更新 recorder 导出的计数并判断是否达到 --num_demos ---
+
+            # Reset environment and bookkeeping
             env.reset()
             orange_now = 1
-            step_count = 0    
+            step_count = 0
+
             if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
-                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
                 break
+
         elif step_count >= MAX_STEPS and orange_now < 3:
             step_count = 0
 
-            # --- 不论成功与否，都重置环境（你的要求） ---
-            print(f"完成一轮（{step_count} 步）, 重置环境，准备下一个橙子 (orange_now {orange_now} -> {orange_now+1})")
+            # Reset environment regardless of success or failure
+            print(
+                f"Completed one cycle. Resetting environment and moving to next orange "
+                f"(orange_now {orange_now} -> {orange_now + 1})."
+            )
 
-            # --- 更新 step_count/orange_now 控制逻辑（和你原来逻辑一致） ---
+            # Update control variables (same logic as original)
             orange_now += 1
         else:
             if not start_record_state:
                 if args_cli.record:
-                    print("Start Recording!!!")
+                    print("Start recording.")
                 start_record_state = True
             env.step(actions)
+
         if rate_limiter:
             rate_limiter.sleep(env)
-            
+
     if args_cli.record and hasattr(env.recorder_manager, "finalize"):
-        env.recorder_manager.finalize()    
+        env.recorder_manager.finalize()
+
     env.close()
     simulation_app.close()
 

From 6ad6606032aef7710e82b31e6accba8ff371df18 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 28 Jan 2026 00:16:45 -0500
Subject: [PATCH 16/68] Add State Machine code.

---
 .../environments/state_machine/pick_orange.py | 106 ++++++++++--------
 1 file changed, 58 insertions(+), 48 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 0b212f66..615eb9de 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -44,6 +44,7 @@
 from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
 
 import torch
+import math
 import gymnasium as gym
 
 class RateLimiter:
@@ -106,6 +107,39 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.cfg.return_success_status = success
     return False
 
+def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
+    """
+    Apply an equilateral triangle offset on the x-y plane.
+
+    The offset places objects evenly around a circle, forming the vertices
+    of an equilateral triangle. This is used to arrange multiple oranges
+    on the plate without overlap.
+
+    Args:
+        pos_tensor (torch.Tensor):
+            Position tensor of shape (num_envs, 3) in world coordinates.
+        orange_now (int):
+            Index of the current orange (1, 2, or 3).
+        radius (float, optional):
+            Distance from the center of the plate to each triangle vertex.
+            Defaults to 0.1 meters.
+
+    Returns:
+        torch.Tensor:
+            The input position tensor with the triangle offset applied.
+    """
+
+    idx = (orange_now - 1) % 3
+    angle = idx * (2 * math.pi / 3)
+
+    offset_x = radius * math.cos(angle)
+    offset_y = radius * math.sin(angle)
+
+    pos_tensor[:, 0] += offset_x
+    pos_tensor[:, 1] += offset_y
+
+    return pos_tensor
+
 def state_machine(env, step_count, target, orange_now):
     """
     Finite state machine for a pick-and-place task with a robot gripper.
@@ -145,9 +179,6 @@ def state_machine(env, step_count, target, orange_now):
     robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
     robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
 
-    target_pos_w = orange_pos_w.clone()
-
-    import math
     pitch = math.radians(0)
 
     target_quat_w = quat_from_euler_xyz(
@@ -160,71 +191,52 @@ def state_machine(env, step_count, target, orange_now):
         quat_inv(robot_base_quat_w),
         target_quat_w
     )
-
-    def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
-        """
-        Apply an equilateral triangle offset on the x-y plane.
-
-        The offset places objects evenly around a circle, forming the vertices
-        of an equilateral triangle. This is used to arrange multiple oranges
-        on the plate without overlap.
-
-        Args:
-            pos_tensor (torch.Tensor):
-                Position tensor of shape (num_envs, 3) in world coordinates.
-            orange_now (int):
-                Index of the current orange (1, 2, or 3).
-            radius (float, optional):
-                Distance from the center of the plate to each triangle vertex.
-                Defaults to 0.1 meters.
-
-        Returns:
-            torch.Tensor:
-                The input position tensor with the triangle offset applied.
-        """
-
-        idx = (orange_now - 1) % 3
-        angle = idx * (2 * math.pi / 3)
-
-        offset_x = radius * math.cos(angle)
-        offset_y = radius * math.sin(angle)
-
-        pos_tensor[:, 0] += offset_x
-        pos_tensor[:, 1] += offset_y
-
-        return pos_tensor
-
     GRIPPER = 0.1
-    target_pos_w[:, 0] -= 0.03
-
+    
     gripper_cmd = torch.full((num_envs, 1), 1.0, device=device)
 
+    #move above orange
     if step_count < 120:
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = 1.0
         target_pos_w[:, 2] += 0.1 + GRIPPER
+    #lower to orange
     elif step_count < 150:
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = 1.0
         target_pos_w[:, 2] += GRIPPER
+    #close gripper
     elif step_count < 180:
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = -1.0
         target_pos_w[:, 2] += GRIPPER
+    #lift orange
     elif step_count < 220:
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = -1.0
         target_pos_w[:, 2] += 0.25
+    #move above plate
     elif step_count < 320:
         gripper_cmd[:] = -1.0
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:, 2] += 0.25
+    #lower to plate
     elif step_count < 350:
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:, 2] += GRIPPER + 0.1
         apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = -1.0
+    # release orange
     elif step_count < 380:
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:, 2] += GRIPPER + 0.1
         apply_triangle_offset(target_pos_w, orange_now)
         gripper_cmd[:] = 1.0
+    # lift gripper
     elif step_count < 420:
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:, 2] += 0.2
@@ -263,10 +275,11 @@ def main() -> None:
     # Temporarily use so101_state_machine as the teleoperation device
     device = "so101_state_machine"
     env_cfg.use_teleop_device(device)
+    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
     task_name = args_cli.task
-    is_direct_env = "Direct" in task_name
-
+    
     # Timeout and termination preprocessing
+    is_direct_env = "Direct" in task_name
     if is_direct_env:
         env_cfg.never_time_out = True
         env_cfg.auto_terminate = True
@@ -309,9 +322,8 @@ def main() -> None:
         env_cfg.recorders = None
 
     # Create environment
-    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
-
+        
     if args_cli.record:
         del env.recorder_manager
         if args_cli.use_lerobot_recorder:
@@ -341,11 +353,11 @@ def main() -> None:
         resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
         print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
     current_recorded_demo_count = resume_recorded_demo_count
-
+    
     step_count = 0
     orange_now = 1  # Index of the current orange to pick
     start_record_state = False
-
+    
     while simulation_app.is_running() and not simulation_app.is_exiting():
         # Place this at the top of the loop, right after env.scene.update(dt)
         # or before computing actions
@@ -353,8 +365,6 @@ def main() -> None:
             dynamic_reset_gripper_effort_limit_sim(env, device)
 
         actions = state_machine(env, step_count, target=f"Orange00{orange_now}", orange_now=orange_now)
-        step_count += 1
-
         if step_count >= MAX_STEPS and orange_now >= 3:
             # --- Check whether the current episode is considered successful ---
             try:
@@ -435,7 +445,7 @@ def main() -> None:
                     print("Start recording.")
                 start_record_state = True
             env.step(actions)
-
+            step_count += 1
         if rate_limiter:
             rate_limiter.sleep(env)
 

From e694585b75d6f5526dadb981fb404b533fc373b2 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 00:19:34 -0500
Subject: [PATCH 17/68] Apply pre-commit fixes (black/isort/pyupgrade) for
 several files

---
 .../environments/state_machine/pick_orange.py | 71 +++++++++----------
 .../teleoperation/teleop_se3_agent.py         |  1 +
 .../leisaac/controllers/pose_expert.py        | 41 +++++++----
 .../leisaac/leisaac/devices/action_process.py |  2 +-
 .../leisaac/enhance/envs/direct_rl_env_cfg.py |  2 +-
 source/leisaac/leisaac/utils/usd_fixes.py     |  1 +
 6 files changed, 64 insertions(+), 54 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 615eb9de..8b3d964e 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -6,7 +6,6 @@
 if multiprocessing.get_start_method() != "spawn":
     multiprocessing.set_start_method("spawn", force=True)
 import argparse
-
 import os
 import time
 
@@ -34,18 +33,18 @@
 app_launcher = AppLauncher(app_launcher_args)
 simulation_app = app_launcher.app
 
-from isaaclab.managers import DatasetExportMode, TerminationTermCfg
+import math
+
+import gymnasium as gym
+import torch
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
-from isaaclab_tasks.utils import parse_env_cfg
+from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
 from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
-from isaaclab.managers import SceneEntityCfg
-from leisaac.enhance.managers import StreamingRecorderManager, EnhanceDatasetExportMode
+from isaaclab_tasks.utils import parse_env_cfg
+from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
 from leisaac.tasks.pick_orange.mdp import task_done
 from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
 
-import torch
-import math
-import gymnasium as gym
 
 class RateLimiter:
     """Convenience class for enforcing rates in loops."""
@@ -86,20 +85,12 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         if success:
             env.termination_manager.set_term_cfg(
                 "success",
-                TerminationTermCfg(
-                    func=lambda env: torch.ones(
-                        env.num_envs, dtype=torch.bool, device=env.device
-                    )
-                ),
+                TerminationTermCfg(func=lambda env: torch.ones(env.num_envs, dtype=torch.bool, device=env.device)),
             )
         else:
             env.termination_manager.set_term_cfg(
                 "success",
-                TerminationTermCfg(
-                    func=lambda env: torch.zeros(
-                        env.num_envs, dtype=torch.bool, device=env.device
-                    )
-                ),
+                TerminationTermCfg(func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)),
             )
         env.termination_manager.compute()
     elif hasattr(env, "_get_dones"):
@@ -107,6 +98,7 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.cfg.return_success_status = success
     return False
 
+
 def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
     """
     Apply an equilateral triangle offset on the x-y plane.
@@ -140,6 +132,7 @@ def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
 
     return pos_tensor
 
+
 def state_machine(env, step_count, target, orange_now):
     """
     Finite state machine for a pick-and-place task with a robot gripper.
@@ -187,44 +180,41 @@ def state_machine(env, step_count, target, orange_now):
         torch.tensor(0.0, device=device),
     ).repeat(num_envs, 1)
 
-    target_quat = quat_mul(
-        quat_inv(robot_base_quat_w),
-        target_quat_w
-    )
+    target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
     GRIPPER = 0.1
-    
+
     gripper_cmd = torch.full((num_envs, 1), 1.0, device=device)
 
-    #move above orange
+    # move above orange
     if step_count < 120:
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = 1.0
         target_pos_w[:, 2] += 0.1 + GRIPPER
-    #lower to orange
+    # lower to orange
     elif step_count < 150:
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = 1.0
         target_pos_w[:, 2] += GRIPPER
-    #close gripper
+    # close gripper
     elif step_count < 180:
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = -1.0
         target_pos_w[:, 2] += GRIPPER
-    #lift orange
+    # lift orange
     elif step_count < 220:
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
         gripper_cmd[:] = -1.0
         target_pos_w[:, 2] += 0.25
-    #move above plate
+    # move above plate
     elif step_count < 320:
         gripper_cmd[:] = -1.0
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:, 2] += 0.25
-    #lower to plate
+    # lower to plate
     elif step_count < 350:
         target_pos_w = plate_pos_w.clone()
         target_pos_w[:, 2] += GRIPPER + 0.1
@@ -252,8 +242,9 @@ def state_machine(env, step_count, target, orange_now):
     return actions
 
 
-
 MAX_STEPS = 420
+
+
 def main() -> None:
     """
     Run a pick-orange state machine in an Isaac Lab manipulation environment.
@@ -277,7 +268,7 @@ def main() -> None:
     env_cfg.use_teleop_device(device)
     env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
     task_name = args_cli.task
-    
+
     # Timeout and termination preprocessing
     is_direct_env = "Direct" in task_name
     if is_direct_env:
@@ -323,12 +314,16 @@ def main() -> None:
 
     # Create environment
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
-        
+
     if args_cli.record:
         del env.recorder_manager
         if args_cli.use_lerobot_recorder:
-            from leisaac.enhance.datasets.lerobot_dataset_handler import LeRobotDatasetCfg
-            from leisaac.enhance.managers.lerobot_recorder_manager import LeRobotRecorderManager
+            from leisaac.enhance.datasets.lerobot_dataset_handler import (
+                LeRobotDatasetCfg,
+            )
+            from leisaac.enhance.managers.lerobot_recorder_manager import (
+                LeRobotRecorderManager,
+            )
 
             dataset_cfg = LeRobotDatasetCfg(
                 repo_id=args_cli.lerobot_dataset_repo_id,
@@ -353,11 +348,11 @@ def main() -> None:
         resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
         print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
     current_recorded_demo_count = resume_recorded_demo_count
-    
+
     step_count = 0
     orange_now = 1  # Index of the current orange to pick
     start_record_state = False
-    
+
     while simulation_app.is_running() and not simulation_app.is_exiting():
         # Place this at the top of the loop, right after env.scene.update(dt)
         # or before computing actions
@@ -433,7 +428,7 @@ def main() -> None:
 
             # Reset environment regardless of success or failure
             print(
-                f"Completed one cycle. Resetting environment and moving to next orange "
+                "Completed one cycle. Resetting environment and moving to next orange "
                 f"(orange_now {orange_now} -> {orange_now + 1})."
             )
 
@@ -455,7 +450,7 @@ def main() -> None:
     env.close()
     simulation_app.close()
 
+
 if __name__ == "__main__":
     # run the main function
     main()
-
diff --git a/scripts/environments/teleoperation/teleop_se3_agent.py b/scripts/environments/teleoperation/teleop_se3_agent.py
index 573738fc..ed472005 100644
--- a/scripts/environments/teleoperation/teleop_se3_agent.py
+++ b/scripts/environments/teleoperation/teleop_se3_agent.py
@@ -196,6 +196,7 @@ def main():  # noqa: C901
                     args_cli.dataset_file
                 ), "the dataset file does not exist, please don't use '--resume' if you want to record a new dataset"
             else:
+
                 env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
                 assert not os.path.exists(
                     args_cli.dataset_file
diff --git a/source/leisaac/leisaac/controllers/pose_expert.py b/source/leisaac/leisaac/controllers/pose_expert.py
index dc93e30c..d7ff23a8 100644
--- a/source/leisaac/leisaac/controllers/pose_expert.py
+++ b/source/leisaac/leisaac/controllers/pose_expert.py
@@ -6,7 +6,8 @@
 [pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]
 """
 import torch
-from isaaclab.utils.math import quat_from_euler_xyz, quat_mul, quat_inv, quat_apply
+from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+
 
 def apply_triangle_offset(pos_tensor: torch.Tensor, flag: int, radius: float = 0.1) -> torch.Tensor:
     """
@@ -17,6 +18,7 @@ def apply_triangle_offset(pos_tensor: torch.Tensor, flag: int, radius: float = 0
         radius: radius in meters
     """
     import math
+
     idx = (flag - 1) % 3
     angle = idx * (2 * math.pi / 3)
     offset_x = radius * math.cos(angle)
@@ -26,6 +28,7 @@ def apply_triangle_offset(pos_tensor: torch.Tensor, flag: int, radius: float = 0
     pos_tensor[:, 1] += offset_y
     return pos_tensor
 
+
 def get_expert_action_pose_based(env, step_count: int, target: str, flag: int) -> torch.Tensor:
     """
     Simple scripted pose-based controller.
@@ -106,15 +109,23 @@ def get_expert_action_pose_based(env, step_count: int, target: str, flag: int) -
     actions = torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
     return actions
 
+
 """
 Heuristics for detecting grasp phase.
 """
-from typing import Tuple, Dict, Any
+from typing import Any, Dict, Tuple
+
 import torch
 
-def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tensor,
-                        dist_thresh: float = 0.06, rel_vel_thresh: float = 0.08,
-                        gripper_close_threshold: float = 0.7) -> Tuple[bool, Dict[str, Any]]:
+
+def is_grasp_phase_auto(
+    env_local,
+    orange_name: str,
+    prev_gripper_t: torch.Tensor,
+    dist_thresh: float = 0.06,
+    rel_vel_thresh: float = 0.08,
+    gripper_close_threshold: float = 0.7,
+) -> tuple[bool, dict[str, Any]]:
     """
     Return (is_grasp: bool, info: dict).
 
@@ -148,8 +159,8 @@ def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tenso
 
         horiz_dist = torch.norm((orange_pos - ee_pos)[:, :2], dim=-1)
         full_dist = torch.norm((orange_pos - ee_pos), dim=-1)
-        info['horiz_dist'] = horiz_dist.detach().cpu().numpy().tolist()
-        info['full_dist'] = full_dist.detach().cpu().numpy().tolist()
+        info["horiz_dist"] = horiz_dist.detach().cpu().numpy().tolist()
+        info["full_dist"] = full_dist.detach().cpu().numpy().tolist()
 
         obj_speed = torch.zeros((orange_pos.shape[0],), device=device)
         ee_speed = torch.zeros((orange_pos.shape[0],), device=device)
@@ -167,9 +178,9 @@ def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tenso
                 pass
 
         rel_speed = torch.minimum(obj_speed, ee_speed)
-        info['obj_speed'] = obj_speed.detach().cpu().numpy().tolist() if obj_speed.numel() else None
-        info['ee_speed'] = ee_speed.detach().cpu().numpy().tolist() if ee_speed.numel() else None
-        info['rel_speed'] = rel_speed.detach().cpu().numpy().tolist()
+        info["obj_speed"] = obj_speed.detach().cpu().numpy().tolist() if obj_speed.numel() else None
+        info["ee_speed"] = ee_speed.detach().cpu().numpy().tolist() if ee_speed.numel() else None
+        info["rel_speed"] = rel_speed.detach().cpu().numpy().tolist()
 
         # ensure prev_gripper_t on same device and as python list for mask
         try:
@@ -180,14 +191,16 @@ def is_grasp_phase_auto(env_local, orange_name: str, prev_gripper_t: torch.Tenso
             gripper_target = prev_gripper_t.view(-1).detach().cpu().numpy().tolist()
         except Exception:
             gripper_target = None
-        info['gripper_target'] = gripper_target
+        info["gripper_target"] = gripper_target
 
-        near_mask = (horiz_dist <= torch.as_tensor(dist_thresh, device=device))
-        slow_mask = (rel_speed <= torch.as_tensor(rel_vel_thresh, device=device))
+        near_mask = horiz_dist <= torch.as_tensor(dist_thresh, device=device)
+        slow_mask = rel_speed <= torch.as_tensor(rel_vel_thresh, device=device)
 
         if gripper_target is not None:
             try:
-                gripper_mask = torch.tensor([gt < gripper_close_threshold for gt in gripper_target], dtype=torch.bool, device=device)
+                gripper_mask = torch.tensor(
+                    [gt < gripper_close_threshold for gt in gripper_target], dtype=torch.bool, device=device
+                )
             except Exception:
                 gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
         else:
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index e86b4a0e..95774e16 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -99,7 +99,7 @@ def init_action_cfg(action_cfg, device):
             open_command_expr={"gripper": 1.0},
             close_command_expr={"gripper": 0.4},
         )
-        
+
     """LeKiwi action configuration"""
     if device in ["lekiwi-leader", "lekiwi-keyboard", "lekiwi-gamepad"]:
         action_cfg.wheel_action = mdp.JointVelocityActionCfg(
diff --git a/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py b/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py
index 1eee2c57..4c43c98b 100644
--- a/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py
+++ b/source/leisaac/leisaac/enhance/envs/direct_rl_env_cfg.py
@@ -18,4 +18,4 @@ class RecorderEnhanceDirectRLEnvCfg(DirectRLEnvCfg):
     return_success_status: bool = False
     """When manual_terminate or auto_terminate is True, _get_dones() will return this value as done"""
     auto_terminate: bool = False
-    """Whether enable auto terminate in this env. Set it to True when using state machine."""
\ No newline at end of file
+    """Whether enable auto terminate in this env. Set it to True when using state machine."""
diff --git a/source/leisaac/leisaac/utils/usd_fixes.py b/source/leisaac/leisaac/utils/usd_fixes.py
index 14d38bbd..cf82ae34 100644
--- a/source/leisaac/leisaac/utils/usd_fixes.py
+++ b/source/leisaac/leisaac/utils/usd_fixes.py
@@ -5,6 +5,7 @@
 import omni.usd
 from pxr import UsdGeom, UsdPhysics
 
+
 def auto_fix_collision_issues():
     """
     Convert certain mesh collisions to convex decomposition to reduce contact issues.

From 507469912b6a19e13d29a890c3ed4c5cb8c4a44d Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 01:36:19 -0500
Subject: [PATCH 18/68] Apply pre-commit fixes (black/isort/pyupgrade) for
 pick_orange.py

---
 .../environments/state_machine/pick_orange.py | 102 +++++++-----------
 1 file changed, 41 insertions(+), 61 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 8b3d964e..d0e77104 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -1,30 +1,33 @@
-"""Script to generate data using state machine with leisaac manipulation environments."""
+"""Script to generate data using state machine with leisaac manipulation environments.
 
-"""Launch Isaac Sim Simulator first."""
-import multiprocessing
+Launch Isaac Sim Simulator first.
+"""
 
-if multiprocessing.get_start_method() != "spawn":
-    multiprocessing.set_start_method("spawn", force=True)
 import argparse
+import math
+import multiprocessing
 import os
 import time
 
+if multiprocessing.get_start_method() != 'spawn':
+    multiprocessing.set_start_method('spawn', force=True)
+
 from isaaclab.app import AppLauncher
 
 # add argparse arguments
-parser = argparse.ArgumentParser(description="leisaac data generation script for pick_orange task.")
-parser.add_argument("--num_envs", type=int, default=1)
-parser.add_argument("--task", type=str, default=None)
-parser.add_argument("--seed", type=int, default=None)
-parser.add_argument("--record", action="store_true")
-parser.add_argument("--step_hz", type=int, default=60)
-parser.add_argument("--dataset_file", type=str, default="./datasets/dataset.hdf5")
-parser.add_argument("--resume", action="store_true")
-parser.add_argument("--num_demos", type=int, default=1)
-parser.add_argument("--quality", action="store_true")
-parser.add_argument("--use_lerobot_recorder", action="store_true", help="whether to use lerobot recorder.")
-parser.add_argument("--lerobot_dataset_repo_id", type=str, default=None, help="Lerobot Dataset repository ID.")
-parser.add_argument("--lerobot_dataset_fps", type=int, default=30, help="Lerobot Dataset frames per second.")
+parser = argparse.ArgumentParser(description='leisaac data generation script for pick_orange task.')
+parser.add_argument('--num_envs', type=int, default=1)
+parser.add_argument('--task', type=str, default=None)
+parser.add_argument('--seed', type=int, default=None)
+parser.add_argument('--record', action='store_true')
+parser.add_argument('--step_hz', type=int, default=60)
+parser.add_argument('--dataset_file', type=str, default='./datasets/dataset.hdf5')
+parser.add_argument('--resume', action='store_true')
+parser.add_argument('--num_demos', type=int, default=1)
+parser.add_argument('--quality', action='store_true')
+parser.add_argument('--use_lerobot_recorder', action='store_true', help='whether to use lerobot recorder.')
+parser.add_argument('--lerobot_dataset_repo_id', type=str, default=None, help='Lerobot Dataset repository ID.')
+parser.add_argument('--lerobot_dataset_fps', type=int, default=30, help='Lerobot Dataset frames per second.')
 
 AppLauncher.add_app_launcher_args(parser)
 args_cli = parser.parse_args()
@@ -33,26 +36,26 @@
 app_launcher = AppLauncher(app_launcher_args)
 simulation_app = app_launcher.app
 
-import math
-
 import gymnasium as gym
 import torch
+
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
 from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
 from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
 from isaaclab_tasks.utils import parse_env_cfg
+
 from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
 from leisaac.tasks.pick_orange.mdp import task_done
 from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
 
-
 class RateLimiter:
     """Convenience class for enforcing rates in loops."""
 
     def __init__(self, hz):
-        """
+        """Initialize a RateLimiter.
+
         Args:
-            hz (int): frequency to enforce
+            hz (int): frequency to enforce.
         """
         self.hz = hz
         self.last_time = time.time()
@@ -75,11 +78,10 @@ def sleep(self, env):
 
 
 def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
-    """
-    Programmatically mark the current episode as success or failure.
+    """Programmatically mark the current episode as success or failure.
 
-    This is the SAME implementation used in the teleoperation script.
-    It does NOT require any human input.
+    This is the same implementation used in the teleoperation script.
+    It does not require any human input.
     """
     if hasattr(env, "termination_manager"):
         if success:
@@ -100,27 +102,20 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
 
 
 def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
-    """
-    Apply an equilateral triangle offset on the x-y plane.
+    """Apply an equilateral triangle offset on the x-y plane.
 
     The offset places objects evenly around a circle, forming the vertices
     of an equilateral triangle. This is used to arrange multiple oranges
     on the plate without overlap.
 
     Args:
-        pos_tensor (torch.Tensor):
-            Position tensor of shape (num_envs, 3) in world coordinates.
-        orange_now (int):
-            Index of the current orange (1, 2, or 3).
-        radius (float, optional):
-            Distance from the center of the plate to each triangle vertex.
-            Defaults to 0.1 meters.
+        pos_tensor (torch.Tensor): Position tensor of shape (num_envs, 3) in world coordinates.
+        orange_now (int): Index of the current orange (1, 2, or 3).
+        radius (float, optional): Distance from the center of the plate to each triangle vertex. Defaults to 0.1 meters.
 
     Returns:
-        torch.Tensor:
-            The input position tensor with the triangle offset applied.
+        torch.Tensor: The input position tensor with the triangle offset applied.
     """
-
     idx = (orange_now - 1) % 3
     angle = idx * (2 * math.pi / 3)
 
@@ -134,8 +129,7 @@ def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
 
 
 def state_machine(env, step_count, target, orange_now):
-    """
-    Finite state machine for a pick-and-place task with a robot gripper.
+    """Finite state machine for a pick-and-place task with a robot gripper.
 
     This function generates low-level action commands based on the current
     simulation step. The robot approaches an orange, grasps it, lifts it,
@@ -143,27 +137,17 @@ def state_machine(env, step_count, target, orange_now):
     triangle arrangement.
 
     Args:
-        env:
-            The simulation environment instance. It is expected to provide
-            access to scene objects, device information, and number of parallel
-            environments.
-        step_count (int):
-            The current timestep of the episode, used to switch between
-            different phases of the state machine.
-        target (str):
-            The name of the target object (e.g., an orange) in the scene.
-        orange_now (int):
-            Index of the current orange being placed (1, 2, or 3). This
-            determines the triangle offset on the plate.
+        env: The simulation environment instance. It is expected to provide access to scene objects, device information, and number of parallel environments.
+        step_count (int): The current timestep of the episode, used to switch between different phases of the state machine.
+        target (str): The name of the target object (e.g., an orange) in the scene.
+        orange_now (int): Index of the current orange being placed (1, 2, or 3). This determines the triangle offset on the plate.
 
     Returns:
-        torch.Tensor:
-            Action tensor of shape (num_envs, action_dim), containing:
+        torch.Tensor: Action tensor of shape (num_envs, action_dim), containing:
             - target position in the robot base frame (x, y, z)
             - target orientation as a quaternion (x, y, z, w)
             - gripper command (open / close)
     """
-
     device = env.device
     num_envs = env.num_envs
 
@@ -246,15 +230,11 @@ def state_machine(env, step_count, target, orange_now):
 
 
 def main() -> None:
-    """
-    Run a pick-orange state machine in an Isaac Lab manipulation environment.
+    """Run a pick-orange state machine in an Isaac Lab manipulation environment.
 
     Creates the environment, initializes the pick-and-place state machine for
     picking an orange, and runs the main simulation loop until the application
     is closed.
-
-    Returns:
-        None
     """
     output_dir = os.path.dirname(args_cli.dataset_file)
     output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]

From 9e7343358382eab9428acbb1c9317e6f09850f63 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 01:45:40 -0500
Subject: [PATCH 19/68] Change structure..

---
 scripts/environments/state_machine/pick_orange.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index d0e77104..ce42912b 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -226,9 +226,6 @@ def state_machine(env, step_count, target, orange_now):
     return actions
 
 
-MAX_STEPS = 420
-
-
 def main() -> None:
     """Run a pick-orange state machine in an Isaac Lab manipulation environment.
 
@@ -333,6 +330,8 @@ def main() -> None:
     orange_now = 1  # Index of the current orange to pick
     start_record_state = False
 
+    MAX_STEPS = 420  # Total steps for one pick-and-place cycle per orange
+    
     while simulation_app.is_running() and not simulation_app.is_exiting():
         # Place this at the top of the loop, right after env.scene.update(dt)
         # or before computing actions

From 310aee6003a0943672f1b30a55ae8800534f3785 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 15:20:55 -0500
Subject: [PATCH 20/68] Create StateMacchine Class.

---
 .../environments/state_machine/pick_orange.py | 162 +------------
 .../leisaac/leisaac/state_machine/__init__.py |   4 +
 source/leisaac/leisaac/state_machine/base.py  |  68 ++++++
 .../leisaac/state_machine/pick_orange.py      | 217 ++++++++++++++++++
 4 files changed, 299 insertions(+), 152 deletions(-)
 create mode 100644 source/leisaac/leisaac/state_machine/__init__.py
 create mode 100644 source/leisaac/leisaac/state_machine/base.py
 create mode 100644 source/leisaac/leisaac/state_machine/pick_orange.py

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index ce42912b..f7485320 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -4,7 +4,6 @@
 """
 
 import argparse
-import math
 import multiprocessing
 import os
 import time
@@ -41,10 +40,10 @@
 
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
 from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
-from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
 from isaaclab_tasks.utils import parse_env_cfg
 
 from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
+from leisaac.state_machine import PickOrangeStateMachine
 from leisaac.tasks.pick_orange.mdp import task_done
 from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
 
@@ -101,131 +100,6 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
     return False
 
 
-def apply_triangle_offset(pos_tensor, orange_now, radius=0.1):
-    """Apply an equilateral triangle offset on the x-y plane.
-
-    The offset places objects evenly around a circle, forming the vertices
-    of an equilateral triangle. This is used to arrange multiple oranges
-    on the plate without overlap.
-
-    Args:
-        pos_tensor (torch.Tensor): Position tensor of shape (num_envs, 3) in world coordinates.
-        orange_now (int): Index of the current orange (1, 2, or 3).
-        radius (float, optional): Distance from the center of the plate to each triangle vertex. Defaults to 0.1 meters.
-
-    Returns:
-        torch.Tensor: The input position tensor with the triangle offset applied.
-    """
-    idx = (orange_now - 1) % 3
-    angle = idx * (2 * math.pi / 3)
-
-    offset_x = radius * math.cos(angle)
-    offset_y = radius * math.sin(angle)
-
-    pos_tensor[:, 0] += offset_x
-    pos_tensor[:, 1] += offset_y
-
-    return pos_tensor
-
-
-def state_machine(env, step_count, target, orange_now):
-    """Finite state machine for a pick-and-place task with a robot gripper.
-
-    This function generates low-level action commands based on the current
-    simulation step. The robot approaches an orange, grasps it, lifts it,
-    moves it above a plate, and places it at one vertex of an equilateral
-    triangle arrangement.
-
-    Args:
-        env: The simulation environment instance. It is expected to provide access to scene objects, device information, and number of parallel environments.
-        step_count (int): The current timestep of the episode, used to switch between different phases of the state machine.
-        target (str): The name of the target object (e.g., an orange) in the scene.
-        orange_now (int): Index of the current orange being placed (1, 2, or 3). This determines the triangle offset on the plate.
-
-    Returns:
-        torch.Tensor: Action tensor of shape (num_envs, action_dim), containing:
-            - target position in the robot base frame (x, y, z)
-            - target orientation as a quaternion (x, y, z, w)
-            - gripper command (open / close)
-    """
-    device = env.device
-    num_envs = env.num_envs
-
-    orange_pos_w = env.scene[target].data.root_pos_w.clone()
-    plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
-    robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
-    robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
-
-    pitch = math.radians(0)
-
-    target_quat_w = quat_from_euler_xyz(
-        torch.tensor(pitch, device=device),
-        torch.tensor(0.0, device=device),
-        torch.tensor(0.0, device=device),
-    ).repeat(num_envs, 1)
-
-    target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
-    GRIPPER = 0.1
-
-    gripper_cmd = torch.full((num_envs, 1), 1.0, device=device)
-
-    # move above orange
-    if step_count < 120:
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        gripper_cmd[:] = 1.0
-        target_pos_w[:, 2] += 0.1 + GRIPPER
-    # lower to orange
-    elif step_count < 150:
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        gripper_cmd[:] = 1.0
-        target_pos_w[:, 2] += GRIPPER
-    # close gripper
-    elif step_count < 180:
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        gripper_cmd[:] = -1.0
-        target_pos_w[:, 2] += GRIPPER
-    # lift orange
-    elif step_count < 220:
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        gripper_cmd[:] = -1.0
-        target_pos_w[:, 2] += 0.25
-    # move above plate
-    elif step_count < 320:
-        gripper_cmd[:] = -1.0
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += 0.25
-    # lower to plate
-    elif step_count < 350:
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += GRIPPER + 0.1
-        apply_triangle_offset(target_pos_w, orange_now)
-        gripper_cmd[:] = -1.0
-    # release orange
-    elif step_count < 380:
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += GRIPPER + 0.1
-        apply_triangle_offset(target_pos_w, orange_now)
-        gripper_cmd[:] = 1.0
-    # lift gripper
-    elif step_count < 420:
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += 0.2
-        apply_triangle_offset(target_pos_w, orange_now)
-        gripper_cmd[:] = 1.0
-    else:
-        gripper_cmd[:] = 1.0
-
-    diff_w = target_pos_w - robot_base_pos_w
-    target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
-
-    actions = torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
-    return actions
-
-
 def main() -> None:
     """Run a pick-orange state machine in an Isaac Lab manipulation environment.
 
@@ -326,23 +200,18 @@ def main() -> None:
         print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
     current_recorded_demo_count = resume_recorded_demo_count
 
-    step_count = 0
-    orange_now = 1  # Index of the current orange to pick
+    sm = PickOrangeStateMachine(num_oranges=3)
+    sm.reset()
     start_record_state = False
 
-    MAX_STEPS = 420  # Total steps for one pick-and-place cycle per orange
-    
     while simulation_app.is_running() and not simulation_app.is_exiting():
-        # Place this at the top of the loop, right after env.scene.update(dt)
-        # or before computing actions
         if env.cfg.dynamic_reset_gripper_effort_limit:
             dynamic_reset_gripper_effort_limit_sim(env, device)
 
-        actions = state_machine(env, step_count, target=f"Orange00{orange_now}", orange_now=orange_now)
-        if step_count >= MAX_STEPS and orange_now >= 3:
+        if sm.is_episode_done:
             # --- Check whether the current episode is considered successful ---
             try:
-                print(f"Completed one cycle ({step_count} steps). Checking task success status...")
+                print("Completed one cycle. Checking task success status...")
                 success_tensor = task_done(
                     env,
                     oranges_cfg=[
@@ -393,33 +262,22 @@ def main() -> None:
                 print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
                 break
 
-            # Reset environment and bookkeeping
+            # Reset environment and state machine for the next episode
             env.reset()
-            orange_now = 1
-            step_count = 0
+            sm.reset()
 
             if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
                 print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
                 break
-
-        elif step_count >= MAX_STEPS and orange_now < 3:
-            step_count = 0
-
-            # Reset environment regardless of success or failure
-            print(
-                "Completed one cycle. Resetting environment and moving to next orange "
-                f"(orange_now {orange_now} -> {orange_now + 1})."
-            )
-
-            # Update control variables (same logic as original)
-            orange_now += 1
         else:
             if not start_record_state:
                 if args_cli.record:
                     print("Start recording.")
                 start_record_state = True
+            actions = sm.get_action(env)
             env.step(actions)
-            step_count += 1
+            sm.advance()
+
         if rate_limiter:
             rate_limiter.sleep(env)
 
diff --git a/source/leisaac/leisaac/state_machine/__init__.py b/source/leisaac/leisaac/state_machine/__init__.py
new file mode 100644
index 00000000..2cb93449
--- /dev/null
+++ b/source/leisaac/leisaac/state_machine/__init__.py
@@ -0,0 +1,4 @@
+"""State machine implementations for LeIsaac tasks."""
+
+from .base import StateMachineBase
+from .pick_orange import PickOrangeStateMachine
diff --git a/source/leisaac/leisaac/state_machine/base.py b/source/leisaac/leisaac/state_machine/base.py
new file mode 100644
index 00000000..3fc2ba86
--- /dev/null
+++ b/source/leisaac/leisaac/state_machine/base.py
@@ -0,0 +1,68 @@
+"""Abstract base class for task state machines in LeIsaac."""
+
+from abc import ABC, abstractmethod
+
+import torch
+
+
+class StateMachineBase(ABC):
+    """Abstract base class for task state machines in LeIsaac.
+
+    A state machine encapsulates the step-by-step control logic for a task.
+    It is designed to be decoupled from the simulation control flow: the caller
+    is responsible for calling :meth:`get_action`, stepping the environment, and
+    then calling :meth:`advance` to progress the internal state.
+
+    Typical usage::
+
+        sm = MyStateMachine()
+        env.reset()
+        while not sm.is_episode_done:
+            actions = sm.get_action(env)
+            env.step(actions)
+            sm.advance()
+        sm.reset()
+    """
+
+    @abstractmethod
+    def get_action(self, env) -> torch.Tensor:
+        """Compute and return the action tensor for the current step.
+
+        This method does **not** advance the internal state counter.
+        Call :meth:`advance` after :meth:`env.step` to progress the machine.
+
+        Args:
+            env: The simulation environment instance. Must expose ``env.device``,
+                ``env.num_envs``, and ``env.scene``.
+
+        Returns:
+            Action tensor of shape ``(num_envs, action_dim)``.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def advance(self) -> None:
+        """Advance the internal step counter and manage state transitions.
+
+        Should be called exactly once after each :meth:`env.step` call.
+        Internally handles multi-phase and multi-object transitions.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def reset(self) -> None:
+        """Reset the state machine to its initial state.
+
+        Should be called before starting a new episode.
+        """
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def is_episode_done(self) -> bool:
+        """Whether the state machine has completed a full episode cycle.
+
+        Returns:
+            ``True`` once the state machine has finished all phases of the task.
+        """
+        raise NotImplementedError
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
new file mode 100644
index 00000000..9ae25854
--- /dev/null
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -0,0 +1,217 @@
+"""State machine for the pick-orange task."""
+
+import math
+
+import torch
+from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+
+from .base import StateMachineBase
+
+# ---------------------------------------------------------------------------
+# Module-level helpers
+# ---------------------------------------------------------------------------
+
+_GRIPPER_OPEN = 1.0
+_GRIPPER_CLOSE = -1.0
+_GRIPPER_OFFSET = 0.1  # vertical clearance for the gripper tip
+
+
+def _apply_triangle_offset(pos_tensor: torch.Tensor, orange_now: int, radius: float = 0.1) -> torch.Tensor:
+    """Apply an equilateral-triangle offset on the x-y plane.
+
+    Distributes up to three target positions evenly around a circle so that
+    oranges placed on the plate do not overlap.
+
+    Args:
+        pos_tensor: Position tensor of shape ``(num_envs, 3)`` in world coordinates.
+            Modified **in-place**.
+        orange_now: 1-based index of the current orange (1, 2, or 3).
+        radius: Distance from the plate centre to each triangle vertex in metres.
+
+    Returns:
+        The modified ``pos_tensor`` (same object, for convenience).
+    """
+    idx = (orange_now - 1) % 3
+    angle = idx * (2 * math.pi / 3)
+    pos_tensor[:, 0] += radius * math.cos(angle)
+    pos_tensor[:, 1] += radius * math.sin(angle)
+    return pos_tensor
+
+
+# ---------------------------------------------------------------------------
+# State machine
+# ---------------------------------------------------------------------------
+
+
+class PickOrangeStateMachine(StateMachineBase):
+    """State machine for the pick-orange manipulation task.
+
+    The robot cycles through *num_oranges* oranges.  For each orange it
+    executes a fixed sequence of 420 steps that moves the gripper above the
+    orange, grasps it, lifts it, transports it to the plate and places it at
+    one vertex of an equilateral triangle arrangement.
+
+    Args:
+        num_oranges: Total number of oranges to pick and place. Defaults to 3.
+
+    Attributes:
+        MAX_STEPS_PER_ORANGE (int): Number of simulation steps per orange cycle.
+
+    Example::
+
+        sm = PickOrangeStateMachine(num_oranges=3)
+        env.reset()
+        while not sm.is_episode_done:
+            actions = sm.get_action(env)
+            env.step(actions)
+            sm.advance()
+        success = task_done(env, ...)
+        sm.reset()
+    """
+
+    MAX_STEPS_PER_ORANGE: int = 420
+
+    def __init__(self, num_oranges: int = 3) -> None:
+        self._num_oranges = num_oranges
+        self._step_count: int = 0
+        self._orange_now: int = 1
+        self._episode_done: bool = False
+
+    # ------------------------------------------------------------------
+    # StateMachineBase interface
+    # ------------------------------------------------------------------
+
+    def get_action(self, env) -> torch.Tensor:
+        """Compute the action tensor for the current step.
+
+        Reads object poses from ``env.scene`` and returns a 8-DOF action
+        tensor ``[pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]``
+        expressed in the robot base frame.
+
+        The scene is expected to contain:
+        - ``"Orange00{orange_now}"`` – the current orange rigid object
+        - ``"Plate"`` – the target plate rigid object
+        - ``"robot"`` – the manipulator
+
+        Args:
+            env: The simulation environment.  Must expose ``env.device``,
+                ``env.num_envs``, and ``env.scene``.
+
+        Returns:
+            Action tensor of shape ``(num_envs, 8)``.
+        """
+        device = env.device
+        num_envs = env.num_envs
+        step = self._step_count
+
+        orange_pos_w = env.scene[f"Orange00{self._orange_now}"].data.root_pos_w.clone()
+        plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
+        robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
+        robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
+
+        # Fixed end-effector orientation (no pitch/roll)
+        target_quat_w = quat_from_euler_xyz(
+            torch.tensor(0.0, device=device),
+            torch.tensor(0.0, device=device),
+            torch.tensor(0.0, device=device),
+        ).repeat(num_envs, 1)
+        target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
+
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+        # --- Phase selection based on step count ---
+        if step < 120:
+            # Move above orange (hover)
+            target_pos_w = orange_pos_w.clone()
+            target_pos_w[:, 0] -= 0.03
+            target_pos_w[:, 2] += 0.1 + _GRIPPER_OFFSET
+            gripper_cmd[:] = _GRIPPER_OPEN
+        elif step < 150:
+            # Lower to orange
+            target_pos_w = orange_pos_w.clone()
+            target_pos_w[:, 0] -= 0.03
+            target_pos_w[:, 2] += _GRIPPER_OFFSET
+            gripper_cmd[:] = _GRIPPER_OPEN
+        elif step < 180:
+            # Close gripper (grasp)
+            target_pos_w = orange_pos_w.clone()
+            target_pos_w[:, 0] -= 0.03
+            target_pos_w[:, 2] += _GRIPPER_OFFSET
+            gripper_cmd[:] = _GRIPPER_CLOSE
+        elif step < 220:
+            # Lift orange
+            target_pos_w = orange_pos_w.clone()
+            target_pos_w[:, 0] -= 0.03
+            target_pos_w[:, 2] += 0.25
+            gripper_cmd[:] = _GRIPPER_CLOSE
+        elif step < 320:
+            # Move above plate
+            target_pos_w = plate_pos_w.clone()
+            target_pos_w[:, 2] += 0.25
+            gripper_cmd[:] = _GRIPPER_CLOSE
+        elif step < 350:
+            # Lower to plate
+            target_pos_w = plate_pos_w.clone()
+            target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
+            _apply_triangle_offset(target_pos_w, self._orange_now)
+            gripper_cmd[:] = _GRIPPER_CLOSE
+        elif step < 380:
+            # Release orange
+            target_pos_w = plate_pos_w.clone()
+            target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
+            _apply_triangle_offset(target_pos_w, self._orange_now)
+            gripper_cmd[:] = _GRIPPER_OPEN
+        else:
+            # Lift gripper clear of the plate
+            target_pos_w = plate_pos_w.clone()
+            target_pos_w[:, 2] += 0.2
+            _apply_triangle_offset(target_pos_w, self._orange_now)
+            gripper_cmd[:] = _GRIPPER_OPEN
+
+        diff_w = target_pos_w - robot_base_pos_w
+        target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
+
+        return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
+
+    def advance(self) -> None:
+        """Advance the internal step counter and manage orange transitions.
+
+        When the current orange's cycle completes (``step_count`` reaches
+        :attr:`MAX_STEPS_PER_ORANGE`), the machine either:
+
+        - increments ``orange_now`` and resets ``step_count`` if more oranges
+          remain, or
+        - sets :attr:`is_episode_done` to ``True`` if all oranges are done.
+        """
+        self._step_count += 1
+        if self._step_count >= self.MAX_STEPS_PER_ORANGE:
+            if self._orange_now >= self._num_oranges:
+                self._episode_done = True
+            else:
+                self._orange_now += 1
+                self._step_count = 0
+
+    def reset(self) -> None:
+        """Reset the state machine to its initial state for a new episode."""
+        self._step_count = 0
+        self._orange_now = 1
+        self._episode_done = False
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def is_episode_done(self) -> bool:
+        """``True`` once all oranges have been picked and placed."""
+        return self._episode_done
+
+    @property
+    def orange_now(self) -> int:
+        """1-based index of the orange currently being handled."""
+        return self._orange_now
+
+    @property
+    def step_count(self) -> int:
+        """Number of steps elapsed within the current orange's cycle."""
+        return self._step_count

From fb8304d1b93631075505cd338e18375129fd506c Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 15:27:00 -0500
Subject: [PATCH 21/68] Refactor code.

---
 .../leisaac/state_machine/pick_orange.py      | 219 ++++++++++++++----
 1 file changed, 173 insertions(+), 46 deletions(-)

diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index 9ae25854..27aea704 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -84,9 +84,9 @@ def __init__(self, num_oranges: int = 3) -> None:
     def get_action(self, env) -> torch.Tensor:
         """Compute the action tensor for the current step.
 
-        Reads object poses from ``env.scene`` and returns a 8-DOF action
-        tensor ``[pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]``
-        expressed in the robot base frame.
+        Reads object poses from ``env.scene``, dispatches to the appropriate
+        phase method, then converts the world-frame target position to the
+        robot base frame and assembles the full action tensor.
 
         The scene is expected to contain:
         - ``"Orange00{orange_now}"`` – the current orange rigid object
@@ -98,7 +98,9 @@ def get_action(self, env) -> torch.Tensor:
                 ``env.num_envs``, and ``env.scene``.
 
         Returns:
-            Action tensor of shape ``(num_envs, 8)``.
+            Action tensor of shape ``(num_envs, 8)`` with layout
+            ``[pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]``
+            expressed in the robot base frame.
         """
         device = env.device
         num_envs = env.num_envs
@@ -117,62 +119,187 @@ def get_action(self, env) -> torch.Tensor:
         ).repeat(num_envs, 1)
         target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
 
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-
-        # --- Phase selection based on step count ---
+        # --- Phase dispatch ---
         if step < 120:
-            # Move above orange (hover)
-            target_pos_w = orange_pos_w.clone()
-            target_pos_w[:, 0] -= 0.03
-            target_pos_w[:, 2] += 0.1 + _GRIPPER_OFFSET
-            gripper_cmd[:] = _GRIPPER_OPEN
+            target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
         elif step < 150:
-            # Lower to orange
-            target_pos_w = orange_pos_w.clone()
-            target_pos_w[:, 0] -= 0.03
-            target_pos_w[:, 2] += _GRIPPER_OFFSET
-            gripper_cmd[:] = _GRIPPER_OPEN
+            target_pos_w, gripper_cmd = self._phase_lower_to_orange(orange_pos_w, num_envs, device)
         elif step < 180:
-            # Close gripper (grasp)
-            target_pos_w = orange_pos_w.clone()
-            target_pos_w[:, 0] -= 0.03
-            target_pos_w[:, 2] += _GRIPPER_OFFSET
-            gripper_cmd[:] = _GRIPPER_CLOSE
+            target_pos_w, gripper_cmd = self._phase_grasp(orange_pos_w, num_envs, device)
         elif step < 220:
-            # Lift orange
-            target_pos_w = orange_pos_w.clone()
-            target_pos_w[:, 0] -= 0.03
-            target_pos_w[:, 2] += 0.25
-            gripper_cmd[:] = _GRIPPER_CLOSE
+            target_pos_w, gripper_cmd = self._phase_lift_orange(orange_pos_w, num_envs, device)
         elif step < 320:
-            # Move above plate
-            target_pos_w = plate_pos_w.clone()
-            target_pos_w[:, 2] += 0.25
-            gripper_cmd[:] = _GRIPPER_CLOSE
+            target_pos_w, gripper_cmd = self._phase_move_above_plate(plate_pos_w, num_envs, device)
         elif step < 350:
-            # Lower to plate
-            target_pos_w = plate_pos_w.clone()
-            target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
-            _apply_triangle_offset(target_pos_w, self._orange_now)
-            gripper_cmd[:] = _GRIPPER_CLOSE
+            target_pos_w, gripper_cmd = self._phase_lower_to_plate(plate_pos_w, num_envs, device)
         elif step < 380:
-            # Release orange
-            target_pos_w = plate_pos_w.clone()
-            target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
-            _apply_triangle_offset(target_pos_w, self._orange_now)
-            gripper_cmd[:] = _GRIPPER_OPEN
+            target_pos_w, gripper_cmd = self._phase_release(plate_pos_w, num_envs, device)
         else:
-            # Lift gripper clear of the plate
-            target_pos_w = plate_pos_w.clone()
-            target_pos_w[:, 2] += 0.2
-            _apply_triangle_offset(target_pos_w, self._orange_now)
-            gripper_cmd[:] = _GRIPPER_OPEN
+            target_pos_w, gripper_cmd = self._phase_lift_gripper(plate_pos_w, num_envs, device)
 
         diff_w = target_pos_w - robot_base_pos_w
         target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
 
         return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
 
+    # ------------------------------------------------------------------
+    # Phase methods  (steps 0-419, one method per phase)
+    # ------------------------------------------------------------------
+
+    def _phase_hover_above_orange(
+        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 0–119: move the gripper to a hover position above the orange.
+
+        Args:
+            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 2] += 0.1 + _GRIPPER_OFFSET
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return target_pos_w, gripper_cmd
+
+    def _phase_lower_to_orange(
+        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 120–149: lower the gripper down to grasp height over the orange.
+
+        Args:
+            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 2] += _GRIPPER_OFFSET
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return target_pos_w, gripper_cmd
+
+    def _phase_grasp(
+        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 150–179: close the gripper to grasp the orange.
+
+        Args:
+            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 2] += _GRIPPER_OFFSET
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return target_pos_w, gripper_cmd
+
+    def _phase_lift_orange(
+        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 180–219: lift the grasped orange upward.
+
+        Args:
+            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 2] += 0.25
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return target_pos_w, gripper_cmd
+
+    def _phase_move_above_plate(
+        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 220–319: transport the orange to a hover position above the plate.
+
+        Args:
+            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += 0.25
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return target_pos_w, gripper_cmd
+
+    def _phase_lower_to_plate(
+        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 320–349: lower the orange to the placement height on the plate.
+
+        The target position is offset to one vertex of an equilateral triangle
+        so that successive oranges do not overlap.
+
+        Args:
+            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
+        _apply_triangle_offset(target_pos_w, self._orange_now)
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return target_pos_w, gripper_cmd
+
+    def _phase_release(
+        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 350–379: open the gripper to release the orange onto the plate.
+
+        Args:
+            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
+        _apply_triangle_offset(target_pos_w, self._orange_now)
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return target_pos_w, gripper_cmd
+
+    def _phase_lift_gripper(
+        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 380–419: lift the gripper clear of the plate after releasing.
+
+        Args:
+            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
+        """
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += 0.2
+        _apply_triangle_offset(target_pos_w, self._orange_now)
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return target_pos_w, gripper_cmd
+
     def advance(self) -> None:
         """Advance the internal step counter and manage orange transitions.
 

From 93285293df025493dce5f3b05001111e9db44dcf Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 19:13:27 -0500
Subject: [PATCH 22/68] Fix bugs.

---
 scripts/environments/teleoperation/teleop_se3_agent.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/environments/teleoperation/teleop_se3_agent.py b/scripts/environments/teleoperation/teleop_se3_agent.py
index ed472005..573738fc 100644
--- a/scripts/environments/teleoperation/teleop_se3_agent.py
+++ b/scripts/environments/teleoperation/teleop_se3_agent.py
@@ -196,7 +196,6 @@ def main():  # noqa: C901
                     args_cli.dataset_file
                 ), "the dataset file does not exist, please don't use '--resume' if you want to record a new dataset"
             else:
-
                 env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
                 assert not os.path.exists(
                     args_cli.dataset_file

From a5338d61ab3e71111c388cd06cb3a4ef10dee752 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 19:16:26 -0500
Subject: [PATCH 23/68] Delete redundant files

---
 .../leisaac/state_machine/pick_orange.py      | 28 +++++++++++++++-
 source/leisaac/leisaac/utils/usd_fixes.py     | 32 -------------------
 2 files changed, 27 insertions(+), 33 deletions(-)
 delete mode 100644 source/leisaac/leisaac/utils/usd_fixes.py

diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index 27aea704..2ec7fe54 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -14,6 +14,7 @@
 _GRIPPER_OPEN = 1.0
 _GRIPPER_CLOSE = -1.0
 _GRIPPER_OFFSET = 0.1  # vertical clearance for the gripper tip
+_WARMUP_STEPS: int = 30  # physics-settle steps at episode start (first orange only)
 
 
 def _apply_triangle_offset(pos_tensor: torch.Tensor, orange_now: int, radius: float = 0.1) -> torch.Tensor:
@@ -120,7 +121,10 @@ def get_action(self, env) -> torch.Tensor:
         target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
 
         # --- Phase dispatch ---
-        if step < 120:
+        if self._orange_now == 1 and step < _WARMUP_STEPS:
+            ee_pos_w = env.scene["ee_frame"].data.target_pos_w[:, 0, :].clone()
+            target_pos_w, gripper_cmd = self._phase_warmup(ee_pos_w, num_envs, device)
+        elif step < 120:
             target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
         elif step < 150:
             target_pos_w, gripper_cmd = self._phase_lower_to_orange(orange_pos_w, num_envs, device)
@@ -146,6 +150,28 @@ def get_action(self, env) -> torch.Tensor:
     # Phase methods  (steps 0-419, one method per phase)
     # ------------------------------------------------------------------
 
+    def _phase_warmup(
+        self, ee_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 0–_WARMUP_STEPS-1 of the first orange: hold the current EE position.
+
+        Keeps the IK target at the robot's actual end-effector position so the
+        physics simulation can settle before any deliberate motion begins.  Only
+        active during the first orange's cycle; subsequent oranges skip directly
+        to the hover phase.
+
+        Args:
+            ee_pos_w: Current end-effector position in world frame, shape ``(num_envs, 3)``.
+                Obtained from ``env.scene["ee_frame"].data.target_pos_w[:, 0, :]``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – current EE world position and open gripper command.
+        """
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return ee_pos_w, gripper_cmd
+
     def _phase_hover_above_orange(
         self, orange_pos_w: torch.Tensor, num_envs: int, device: str
     ) -> tuple[torch.Tensor, torch.Tensor]:
diff --git a/source/leisaac/leisaac/utils/usd_fixes.py b/source/leisaac/leisaac/utils/usd_fixes.py
deleted file mode 100644
index cf82ae34..00000000
--- a/source/leisaac/leisaac/utils/usd_fixes.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# utils/usd_fixes.py
-"""
-USD-related utilities (best-effort fixes).
-"""
-import omni.usd
-from pxr import UsdGeom, UsdPhysics
-
-
-def auto_fix_collision_issues():
-    """
-    Convert certain mesh collisions to convex decomposition to reduce contact issues.
-    Best-effort: requires a valid current stage.
-    """
-    stage = omni.usd.get_context().get_stage()
-    if not stage:
-        return
-
-    for prim in stage.Traverse():
-        name = prim.GetName().lower()
-        if any(k in name for k in ("handle", "drawer", "board")):
-            if not prim.IsA(UsdGeom.Mesh):
-                continue
-            api = UsdPhysics.MeshCollisionAPI(prim)
-            if not api:
-                api = UsdPhysics.MeshCollisionAPI.Apply(prim)
-            approx_attr = api.GetApproximationAttr()
-            try:
-                current_val = approx_attr.Get()
-            except Exception:
-                current_val = None
-            if current_val != "convexDecomposition":
-                approx_attr.Set("convexDecomposition")

From e1c8729f42b326a8e4c18f97f11aad4ffacd5900 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 19:28:09 -0500
Subject: [PATCH 24/68] Delete redundant files.

---
 .../leisaac/controllers/pose_expert.py        | 215 ------------------
 1 file changed, 215 deletions(-)
 delete mode 100644 source/leisaac/leisaac/controllers/pose_expert.py

diff --git a/source/leisaac/leisaac/controllers/pose_expert.py b/source/leisaac/leisaac/controllers/pose_expert.py
deleted file mode 100644
index d7ff23a8..00000000
--- a/source/leisaac/leisaac/controllers/pose_expert.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# controllers/pose_expert.py
-"""
-Pose-based expert controller.
-
-Outputs actions with layout:
-[pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]
-"""
-import torch
-from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
-
-
-def apply_triangle_offset(pos_tensor: torch.Tensor, flag: int, radius: float = 0.1) -> torch.Tensor:
-    """
-    Add an equilateral-triangle offset to the x/y of pos_tensor based on flag (1..3).
-    Args:
-        pos_tensor: (num_envs, 3) tensor
-        flag: which orange (1..3)
-        radius: radius in meters
-    """
-    import math
-
-    idx = (flag - 1) % 3
-    angle = idx * (2 * math.pi / 3)
-    offset_x = radius * math.cos(angle)
-    offset_y = radius * math.sin(angle)
-    pos_tensor = pos_tensor.clone()
-    pos_tensor[:, 0] += offset_x
-    pos_tensor[:, 1] += offset_y
-    return pos_tensor
-
-
-def get_expert_action_pose_based(env, step_count: int, target: str, flag: int) -> torch.Tensor:
-    """
-    Simple scripted pose-based controller.
-
-    Args:
-        env: environment instance with scene data
-        step_count: int step counter
-        target: name of target object in scene (e.g., "Orange001")
-        flag: integer flag (which orange) used for offsets
-
-    Returns:
-        actions: (num_envs, action_dim) torch.Tensor on same device as env
-    """
-    device = env.device
-    num_envs = env.num_envs
-
-    orange_pos_w = env.scene[target].data.root_pos_w.clone()
-    plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
-    robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
-    robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
-
-    target_pos_w = orange_pos_w.clone()
-
-    # Fixed orientation (world)
-    pitch = 0.0
-    target_quat_w = quat_from_euler_xyz(
-        torch.tensor(pitch, device=device),
-        torch.tensor(0.0, device=device),
-        torch.tensor(0.0, device=device),
-    ).repeat(num_envs, 1)
-
-    target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
-
-    # schedule and offsets
-    GRIPPER = 0.1
-    gripper_cmd = torch.full((num_envs, 1), 1.0, device=device)
-
-    if step_count < 120:
-        gripper_cmd[:] = 1.0
-        target_pos_w[:, 2] += 0.1 + GRIPPER
-    elif step_count < 150:
-        gripper_cmd[:] = 1.0
-        target_pos_w[:, 2] += GRIPPER
-    elif step_count < 180:
-        gripper_cmd[:] = -1.0
-        target_pos_w[:, 2] += GRIPPER
-    elif step_count < 220:
-        gripper_cmd[:] = -1.0
-        target_pos_w[:, 2] += 0.25
-    elif step_count < 320:
-        gripper_cmd[:] = -1.0
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += 0.25
-    elif step_count < 350:
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += GRIPPER + 0.1
-        target_pos_w = apply_triangle_offset(target_pos_w, flag)
-        gripper_cmd[:] = -1.0
-    elif step_count < 380:
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += GRIPPER + 0.1
-        target_pos_w = apply_triangle_offset(target_pos_w, flag)
-        gripper_cmd[:] = 1.0
-    elif step_count < 420:
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += 0.2
-        target_pos_w = apply_triangle_offset(target_pos_w, flag)
-        gripper_cmd[:] = 1.0
-    else:
-        gripper_cmd[:] = 1.0
-
-    # small x offset toward robot
-    target_pos_w[:, 0] -= 0.03
-
-    diff_w = target_pos_w - robot_base_pos_w
-    target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
-
-    actions = torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
-    return actions
-
-
-"""
-Heuristics for detecting grasp phase.
-"""
-from typing import Any, Dict, Tuple
-
-import torch
-
-
-def is_grasp_phase_auto(
-    env_local,
-    orange_name: str,
-    prev_gripper_t: torch.Tensor,
-    dist_thresh: float = 0.06,
-    rel_vel_thresh: float = 0.08,
-    gripper_close_threshold: float = 0.7,
-) -> tuple[bool, dict[str, Any]]:
-    """
-    Return (is_grasp: bool, info: dict).
-
-    Info contains metrics useful for debugging.
-    """
-    info = {}
-    try:
-        orange = env_local.scene[orange_name]
-        orange_pos = None
-        device = None
-
-        if hasattr(orange.data, "root_pos_w"):
-            orange_pos = orange.data.root_pos_w  # (num_envs,3)
-            device = orange_pos.device
-
-        if device is None:
-            device = getattr(env_local, "device", torch.device("cpu"))
-
-        robot_local = env_local.scene["robot"]
-        try:
-            ee_idx = robot_local.data.body_names.index("gripper")
-        except Exception:
-            ee_idx = 0
-
-        ee_pos = robot_local.data.body_state_w[:, ee_idx, :3]
-        if ee_pos is not None:
-            device = ee_pos.device
-
-        if orange_pos is None:
-            return False, {"error": "orange pos not available"}
-
-        horiz_dist = torch.norm((orange_pos - ee_pos)[:, :2], dim=-1)
-        full_dist = torch.norm((orange_pos - ee_pos), dim=-1)
-        info["horiz_dist"] = horiz_dist.detach().cpu().numpy().tolist()
-        info["full_dist"] = full_dist.detach().cpu().numpy().tolist()
-
-        obj_speed = torch.zeros((orange_pos.shape[0],), device=device)
-        ee_speed = torch.zeros((orange_pos.shape[0],), device=device)
-        if hasattr(orange.data, "root_linvel_w"):
-            try:
-                obj_v = orange.data.root_linvel_w
-                obj_speed = torch.norm(obj_v, dim=-1).to(device)
-            except Exception:
-                pass
-        if hasattr(robot_local.data, "body_linvel_w"):
-            try:
-                v_ee = robot_local.data.body_linvel_w[:, ee_idx, :]
-                ee_speed = torch.norm(v_ee, dim=-1).to(device)
-            except Exception:
-                pass
-
-        rel_speed = torch.minimum(obj_speed, ee_speed)
-        info["obj_speed"] = obj_speed.detach().cpu().numpy().tolist() if obj_speed.numel() else None
-        info["ee_speed"] = ee_speed.detach().cpu().numpy().tolist() if ee_speed.numel() else None
-        info["rel_speed"] = rel_speed.detach().cpu().numpy().tolist()
-
-        # ensure prev_gripper_t on same device and as python list for mask
-        try:
-            if not isinstance(prev_gripper_t, torch.Tensor):
-                prev_gripper_t = torch.as_tensor(prev_gripper_t, device=device)
-            else:
-                prev_gripper_t = prev_gripper_t.to(device)
-            gripper_target = prev_gripper_t.view(-1).detach().cpu().numpy().tolist()
-        except Exception:
-            gripper_target = None
-        info["gripper_target"] = gripper_target
-
-        near_mask = horiz_dist <= torch.as_tensor(dist_thresh, device=device)
-        slow_mask = rel_speed <= torch.as_tensor(rel_vel_thresh, device=device)
-
-        if gripper_target is not None:
-            try:
-                gripper_mask = torch.tensor(
-                    [gt < gripper_close_threshold for gt in gripper_target], dtype=torch.bool, device=device
-                )
-            except Exception:
-                gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
-        else:
-            gripper_mask = torch.zeros_like(near_mask, dtype=torch.bool, device=device)
-
-        is_grasp_tensor = near_mask & slow_mask & gripper_mask
-        is_grasp_list = is_grasp_tensor.detach().cpu().numpy().tolist()
-        is_grasp_any = any(is_grasp_list)
-        return is_grasp_any, info
-
-    except Exception as e:
-        return False, {"error": str(e)}

From c4f8111ae41e9e60f23b5263aa805f2bb18b837f Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 20:07:28 -0500
Subject: [PATCH 25/68] Change PickOrangeStateMachine

---
 .../leisaac/state_machine/pick_orange.py      | 49 +++++++++++++------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index 2ec7fe54..b263b02b 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -14,7 +14,7 @@
 _GRIPPER_OPEN = 1.0
 _GRIPPER_CLOSE = -1.0
 _GRIPPER_OFFSET = 0.1  # vertical clearance for the gripper tip
-_WARMUP_STEPS: int = 30  # physics-settle steps at episode start (first orange only)
+_APPROACH_STEPS: int = 60  # steps to smoothly interpolate from init EE pos to hover (first orange only)
 
 
 def _apply_triangle_offset(pos_tensor: torch.Tensor, orange_now: int, radius: float = 0.1) -> torch.Tensor:
@@ -77,6 +77,7 @@ def __init__(self, num_oranges: int = 3) -> None:
         self._step_count: int = 0
         self._orange_now: int = 1
         self._episode_done: bool = False
+        self._initial_ee_pos: torch.Tensor | None = None
 
     # ------------------------------------------------------------------
     # StateMachineBase interface
@@ -120,10 +121,14 @@ def get_action(self, env) -> torch.Tensor:
         ).repeat(num_envs, 1)
         target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
 
+        # Capture initial EE position from robot body data at episode start (step 0, orange 1).
+        # body_pos_w is always valid after env.reset() and does not suffer from stale sensor data.
+        if self._orange_now == 1 and step == 0:
+            self._initial_ee_pos = env.scene["robot"].data.body_pos_w[:, -1, :].clone()
+
         # --- Phase dispatch ---
-        if self._orange_now == 1 and step < _WARMUP_STEPS:
-            ee_pos_w = env.scene["ee_frame"].data.target_pos_w[:, 0, :].clone()
-            target_pos_w, gripper_cmd = self._phase_warmup(ee_pos_w, num_envs, device)
+        if self._orange_now == 1 and step < _APPROACH_STEPS:
+            target_pos_w, gripper_cmd = self._phase_approach_hover(orange_pos_w, num_envs, device)
         elif step < 120:
             target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
         elif step < 150:
@@ -150,27 +155,40 @@ def get_action(self, env) -> torch.Tensor:
     # Phase methods  (steps 0-419, one method per phase)
     # ------------------------------------------------------------------
 
-    def _phase_warmup(
-        self, ee_pos_w: torch.Tensor, num_envs: int, device: str
+    def _phase_approach_hover(
+        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 0–_WARMUP_STEPS-1 of the first orange: hold the current EE position.
+        """Steps 0–_APPROACH_STEPS-1 of the first orange: smoothly approach hover position.
 
-        Keeps the IK target at the robot's actual end-effector position so the
-        physics simulation can settle before any deliberate motion begins.  Only
-        active during the first orange's cycle; subsequent oranges skip directly
-        to the hover phase.
+        Linearly interpolates the IK target from the robot's initial end-effector
+        position (captured at step 0 from ``robot.data.body_pos_w``) to the hover
+        position above the orange.  This avoids the sudden large IK error that
+        occurs when jumping directly from the zero-joint configuration to the hover
+        target, which caused the arm to visually drop and oscillate.
+
+        Only active for the first orange; subsequent oranges proceed directly to the
+        hover phase because the arm is already in a reasonable working configuration.
 
         Args:
-            ee_pos_w: Current end-effector position in world frame, shape ``(num_envs, 3)``.
-                Obtained from ``env.scene["ee_frame"].data.target_pos_w[:, 0, :]``.
+            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
             num_envs: Number of parallel environments.
             device: Torch device string.
 
         Returns:
-            ``(target_pos_w, gripper_cmd)`` – current EE world position and open gripper command.
+            ``(target_pos_w, gripper_cmd)`` – interpolated world position and open gripper command.
         """
+        hover_target = orange_pos_w.clone()
+        hover_target[:, 0] -= 0.03
+        hover_target[:, 2] += 0.1 + _GRIPPER_OFFSET
+
+        alpha = self._step_count / _APPROACH_STEPS  # 0.0 at step 0, 1.0 at step _APPROACH_STEPS
+        if self._initial_ee_pos is not None:
+            target_pos_w = (1.0 - alpha) * self._initial_ee_pos + alpha * hover_target
+        else:
+            target_pos_w = hover_target
+
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return ee_pos_w, gripper_cmd
+        return target_pos_w, gripper_cmd
 
     def _phase_hover_above_orange(
         self, orange_pos_w: torch.Tensor, num_envs: int, device: str
@@ -349,6 +367,7 @@ def reset(self) -> None:
         self._step_count = 0
         self._orange_now = 1
         self._episode_done = False
+        self._initial_ee_pos = None
 
     # ------------------------------------------------------------------
     # Properties

From e42fc85e1065139159ff618abad2a2e63215bfdf Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 20:51:07 -0500
Subject: [PATCH 26/68] Change PickOrangeStateMachine

---
 .../leisaac/state_machine/pick_orange.py       | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index b263b02b..207ca8a1 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -14,7 +14,7 @@
 _GRIPPER_OPEN = 1.0
 _GRIPPER_CLOSE = -1.0
 _GRIPPER_OFFSET = 0.1  # vertical clearance for the gripper tip
-_APPROACH_STEPS: int = 60  # steps to smoothly interpolate from init EE pos to hover (first orange only)
+_APPROACH_STEPS: int = 120  # steps to smoothly interpolate from init EE pos to hover (first orange only)
 
 
 def _apply_triangle_offset(pos_tensor: torch.Tensor, orange_now: int, radius: float = 0.1) -> torch.Tensor:
@@ -70,7 +70,7 @@ class PickOrangeStateMachine(StateMachineBase):
         sm.reset()
     """
 
-    MAX_STEPS_PER_ORANGE: int = 420
+    MAX_STEPS_PER_ORANGE: int = 840
 
     def __init__(self, num_oranges: int = 3) -> None:
         self._num_oranges = num_oranges
@@ -129,19 +129,19 @@ def get_action(self, env) -> torch.Tensor:
         # --- Phase dispatch ---
         if self._orange_now == 1 and step < _APPROACH_STEPS:
             target_pos_w, gripper_cmd = self._phase_approach_hover(orange_pos_w, num_envs, device)
-        elif step < 120:
+        elif step < 240:
             target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
-        elif step < 150:
+        elif step < 300:
             target_pos_w, gripper_cmd = self._phase_lower_to_orange(orange_pos_w, num_envs, device)
-        elif step < 180:
+        elif step < 360:
             target_pos_w, gripper_cmd = self._phase_grasp(orange_pos_w, num_envs, device)
-        elif step < 220:
+        elif step < 440:
             target_pos_w, gripper_cmd = self._phase_lift_orange(orange_pos_w, num_envs, device)
-        elif step < 320:
+        elif step < 640:
             target_pos_w, gripper_cmd = self._phase_move_above_plate(plate_pos_w, num_envs, device)
-        elif step < 350:
+        elif step < 700:
             target_pos_w, gripper_cmd = self._phase_lower_to_plate(plate_pos_w, num_envs, device)
-        elif step < 380:
+        elif step < 760:
             target_pos_w, gripper_cmd = self._phase_release(plate_pos_w, num_envs, device)
         else:
             target_pos_w, gripper_cmd = self._phase_lift_gripper(plate_pos_w, num_envs, device)

From 002133fc8e31ff4e56b33ea2d80b3430b513642f Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 22:10:12 -0500
Subject: [PATCH 27/68] Change PickOrangeStateMachine

---
 .../environments/state_machine/pick_orange.py | 14 +++++++
 .../leisaac/state_machine/pick_orange.py      | 42 ++++++++++++++++---
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index f7485320..a8dd8b72 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -166,6 +166,20 @@ def main() -> None:
     # Create environment
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
 
+    # Disable gravity for every robot link prim.
+    # IsaacLab's disable_gravity flag in ArticulationCfg.spawn.rigid_props only writes
+    # to the articulation ROOT prim; individual link prims (shoulder_pan_link, elbow_flex_link,
+    # etc.) each carry their own PhysicsRigidBodyAPI with gravity still enabled.
+    # This block traverses the USD stage and explicitly sets disableGravity=True on every
+    # rigid-body prim that belongs to the robot, fixing the gravity-induced arm drop at reset.
+    import omni.usd
+    from pxr import PhysxSchema, UsdPhysics
+
+    _stage = omni.usd.get_context().get_stage()
+    for _prim in _stage.Traverse():
+        if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
+            PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
+
     if args_cli.record:
         del env.recorder_manager
         if args_cli.use_lerobot_recorder:
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index 207ca8a1..ba769bb6 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -70,7 +70,7 @@ class PickOrangeStateMachine(StateMachineBase):
         sm.reset()
     """
 
-    MAX_STEPS_PER_ORANGE: int = 840
+    MAX_STEPS_PER_ORANGE: int = 660
 
     def __init__(self, num_oranges: int = 3) -> None:
         self._num_oranges = num_oranges
@@ -125,6 +125,11 @@ def get_action(self, env) -> torch.Tensor:
         # body_pos_w is always valid after env.reset() and does not suffer from stale sensor data.
         if self._orange_now == 1 and step == 0:
             self._initial_ee_pos = env.scene["robot"].data.body_pos_w[:, -1, :].clone()
+            print("initial_ee_pos:", self._initial_ee_pos[0].cpu().numpy())
+            print("robot_base_pos_w:", robot_base_pos_w[0].cpu().numpy())
+            print("robot_base_quat_w:", robot_base_quat_w[0].cpu().numpy())
+            robot = env.scene["robot"]
+            robot.write_joint_damping_to_sim(damping=4.0)
 
         # --- Phase dispatch ---
         if self._orange_now == 1 and step < _APPROACH_STEPS:
@@ -137,14 +142,16 @@ def get_action(self, env) -> torch.Tensor:
             target_pos_w, gripper_cmd = self._phase_grasp(orange_pos_w, num_envs, device)
         elif step < 440:
             target_pos_w, gripper_cmd = self._phase_lift_orange(orange_pos_w, num_envs, device)
-        elif step < 640:
+        elif step < 490:
             target_pos_w, gripper_cmd = self._phase_move_above_plate(plate_pos_w, num_envs, device)
-        elif step < 700:
+        elif step < 540:
             target_pos_w, gripper_cmd = self._phase_lower_to_plate(plate_pos_w, num_envs, device)
-        elif step < 760:
+        elif step < 580:
             target_pos_w, gripper_cmd = self._phase_release(plate_pos_w, num_envs, device)
-        else:
+        elif step < 620:
             target_pos_w, gripper_cmd = self._phase_lift_gripper(plate_pos_w, num_envs, device)
+        else:
+            target_pos_w, gripper_cmd = self._phase_return_home(num_envs, device)
 
         diff_w = target_pos_w - robot_base_pos_w
         target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
@@ -344,6 +351,31 @@ def _phase_lift_gripper(
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
         return target_pos_w, gripper_cmd
 
+    def _phase_return_home(
+        self, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 620–659: return the gripper to the episode's initial end-effector position.
+
+        Targets the EE position that was captured at step 0 from
+        ``robot.data.body_pos_w`` (the zero-joint configuration).  This brings
+        the arm back to a neutral home pose at the end of each orange's cycle,
+        ensuring a consistent starting point for the next orange (or for the
+        episode-done check).
+
+        Args:
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – home world position and open gripper command.
+        """
+        if self._initial_ee_pos is not None:
+            target_pos_w = self._initial_ee_pos.clone()
+        else:
+            target_pos_w = torch.zeros(num_envs, 3, device=device)
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return target_pos_w, gripper_cmd
+
     def advance(self) -> None:
         """Advance the internal step counter and manage orange transitions.
 

From 2460e148f64f1913c49a3ef286195dbb5caa1e2e Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Wed, 18 Feb 2026 23:24:31 -0500
Subject: [PATCH 28/68] Change PickOrangeStateMachine

---
 source/leisaac/leisaac/state_machine/pick_orange.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index ba769bb6..a5fb891f 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -129,7 +129,7 @@ def get_action(self, env) -> torch.Tensor:
             print("robot_base_pos_w:", robot_base_pos_w[0].cpu().numpy())
             print("robot_base_quat_w:", robot_base_quat_w[0].cpu().numpy())
             robot = env.scene["robot"]
-            robot.write_joint_damping_to_sim(damping=4.0)
+            robot.write_joint_damping_to_sim(damping=1.0)
 
         # --- Phase dispatch ---
         if self._orange_now == 1 and step < _APPROACH_STEPS:
@@ -156,6 +156,17 @@ def get_action(self, env) -> torch.Tensor:
         diff_w = target_pos_w - robot_base_pos_w
         target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
 
+        idx = 0  # 只看第0个环境
+
+        # 目标位置（world）
+        print("STEP:", self._step_count, "ORANGE:", self._orange_now)
+        print("Target (world):", target_pos_w[idx].cpu().numpy())
+
+        # 实际末端位置（world）
+        print("Actual EE (world):", 
+            env.scene["robot"].data.body_pos_w[idx, -1, :].cpu().numpy())
+
+        print("----------------------------------")
         return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
 
     # ------------------------------------------------------------------

From a7488727e1947c458f6bdec0539cefabf3b3dcb1 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Thu, 19 Feb 2026 01:37:20 -0500
Subject: [PATCH 29/68] Change PickOrangeStateMachine

---
 .../environments/state_machine/pick_orange.py | 39 ++++++++++++++-
 .../leisaac/leisaac/devices/action_process.py |  2 +-
 .../leisaac/state_machine/pick_orange.py      | 48 ++++++++-----------
 3 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index a8dd8b72..a5b9326e 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -214,7 +214,44 @@ def main() -> None:
         print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
     current_recorded_demo_count = resume_recorded_demo_count
 
-    sm = PickOrangeStateMachine(num_oranges=3)
+    # --- FK calibration: drive arm to rest pose and record the EE world position ---
+    # task_done() requires SO-101 joints within SO101_FOLLOWER_REST_POSE_RANGE
+    # (shoulder_lift≈-100°, elbow_flex≈90°, wrist_flex≈50°, ±30° each).
+    # The IK action controls EE position, not joints directly.  We find the EE
+    # position that corresponds to the rest pose by temporarily driving the arm
+    # there with high stiffness, then reading body_pos_w.  env.reset() afterwards
+    # returns every scene object to its default state.
+    _robot = env.scene["robot"]
+    _joint_names = list(_robot.data.joint_names)
+    _REST_POSE_DEG = {
+        "shoulder_pan": 0.0,
+        "shoulder_lift": -100.0,
+        "elbow_flex": 90.0,
+        "wrist_flex": 50.0,
+        "wrist_roll": 0.0,
+        "gripper": -10.0,
+    }
+    _rest_joint_pos = torch.zeros(env.num_envs, len(_joint_names), device=env.device)
+    for _idx, _name in enumerate(_joint_names):
+        if _name in _REST_POSE_DEG:
+            _rest_joint_pos[:, _idx] = _REST_POSE_DEG[_name] * torch.pi / 180.0
+
+    _robot.write_joint_stiffness_to_sim(stiffness=5000.0)
+    _robot.write_joint_damping_to_sim(damping=100.0)
+    _robot.write_joint_position_target_to_sim(_rest_joint_pos)
+    for _ in range(30):
+        env.sim.step(render=False)
+        env.scene.update(dt=env.physics_dt)
+    _rest_ee_pos_world = _robot.data.body_pos_w[:, -1, :].clone()
+    print(f"[Calibration] Rest pose EE (world): {_rest_ee_pos_world[0].cpu().numpy()}")
+
+    # Restore original gains and reset scene to initial state
+    _robot.write_joint_stiffness_to_sim(stiffness=17.8)
+    _robot.write_joint_damping_to_sim(damping=0.6)
+    env.reset()
+    # -------------------------------------------------------------------------
+
+    sm = PickOrangeStateMachine(num_oranges=3, rest_ee_pos_world=_rest_ee_pos_world)
     sm.reset()
     start_record_state = False
 
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 95774e16..e3c26cdb 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -90,7 +90,7 @@ def init_action_cfg(action_cfg, device):
             controller=mdp.DifferentialIKControllerCfg(
                 command_type="pose",
                 ik_method="dls",
-                ik_params={"lambda_val": 0.4},
+                ik_params={"lambda_val": 0.04},
             ),
         )
         action_cfg.gripper_action = mdp.BinaryJointPositionActionCfg(
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index a5fb891f..f14cc6d7 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -70,14 +70,15 @@ class PickOrangeStateMachine(StateMachineBase):
         sm.reset()
     """
 
-    MAX_STEPS_PER_ORANGE: int = 660
+    MAX_STEPS_PER_ORANGE: int = 920
 
-    def __init__(self, num_oranges: int = 3) -> None:
+    def __init__(self, num_oranges: int = 3, rest_ee_pos_world: torch.Tensor | None = None) -> None:
         self._num_oranges = num_oranges
         self._step_count: int = 0
         self._orange_now: int = 1
         self._episode_done: bool = False
         self._initial_ee_pos: torch.Tensor | None = None
+        self._rest_ee_pos_world = rest_ee_pos_world  # (num_envs, 3) world frame, computed by FK calibration
 
     # ------------------------------------------------------------------
     # StateMachineBase interface
@@ -123,14 +124,12 @@ def get_action(self, env) -> torch.Tensor:
 
         # Capture initial EE position from robot body data at episode start (step 0, orange 1).
         # body_pos_w is always valid after env.reset() and does not suffer from stale sensor data.
+        robot = env.scene["robot"]
+        robot.write_joint_damping_to_sim(damping=10.0)
+        
         if self._orange_now == 1 and step == 0:
             self._initial_ee_pos = env.scene["robot"].data.body_pos_w[:, -1, :].clone()
-            print("initial_ee_pos:", self._initial_ee_pos[0].cpu().numpy())
-            print("robot_base_pos_w:", robot_base_pos_w[0].cpu().numpy())
-            print("robot_base_quat_w:", robot_base_quat_w[0].cpu().numpy())
-            robot = env.scene["robot"]
-            robot.write_joint_damping_to_sim(damping=1.0)
-
+            # 或者直接设置 drive params（依据你 robot API）
         # --- Phase dispatch ---
         if self._orange_now == 1 and step < _APPROACH_STEPS:
             target_pos_w, gripper_cmd = self._phase_approach_hover(orange_pos_w, num_envs, device)
@@ -156,17 +155,6 @@ def get_action(self, env) -> torch.Tensor:
         diff_w = target_pos_w - robot_base_pos_w
         target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
 
-        idx = 0  # 只看第0个环境
-
-        # 目标位置（world）
-        print("STEP:", self._step_count, "ORANGE:", self._orange_now)
-        print("Target (world):", target_pos_w[idx].cpu().numpy())
-
-        # 实际末端位置（world）
-        print("Actual EE (world):", 
-            env.scene["robot"].data.body_pos_w[idx, -1, :].cpu().numpy())
-
-        print("----------------------------------")
         return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
 
     # ------------------------------------------------------------------
@@ -365,22 +353,28 @@ def _phase_lift_gripper(
     def _phase_return_home(
         self, num_envs: int, device: str
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 620–659: return the gripper to the episode's initial end-effector position.
+        """Steps 620–919: return the gripper to the robot's rest pose end-effector position.
+
+        Targets the EE position that was computed by FK calibration at startup
+        (``rest_ee_pos_world``), which corresponds to the SO-101 rest pose joints
+        (shoulder_lift ≈ -100°, elbow_flex ≈ 90°, wrist_flex ≈ 50°).  The
+        incremental IK will drive the arm toward this position over the 300 steps
+        (~5 s at 60 Hz), giving it enough time to converge within the ±30° joint
+        tolerance required by ``task_done``.
 
-        Targets the EE position that was captured at step 0 from
-        ``robot.data.body_pos_w`` (the zero-joint configuration).  This brings
-        the arm back to a neutral home pose at the end of each orange's cycle,
-        ensuring a consistent starting point for the next orange (or for the
-        episode-done check).
+        Falls back to ``_initial_ee_pos`` (zero-joint EE) if calibration was
+        not performed.
 
         Args:
             num_envs: Number of parallel environments.
             device: Torch device string.
 
         Returns:
-            ``(target_pos_w, gripper_cmd)`` – home world position and open gripper command.
+            ``(target_pos_w, gripper_cmd)`` – rest-pose world position and open gripper command.
         """
-        if self._initial_ee_pos is not None:
+        if self._rest_ee_pos_world is not None:
+            target_pos_w = self._rest_ee_pos_world.clone()
+        elif self._initial_ee_pos is not None:
             target_pos_w = self._initial_ee_pos.clone()
         else:
             target_pos_w = torch.zeros(num_envs, 3, device=device)

From c07cd7f60810b7ab3e3881f77d38e2b443663761 Mon Sep 17 00:00:00 2001
From: Papaercold <papercold123@foxmail.com>
Date: Thu, 19 Feb 2026 01:51:40 -0500
Subject: [PATCH 30/68] Change PickOrangeStateMachine

---
 .../environments/state_machine/pick_orange.py  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index a5b9326e..3720f82a 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -236,18 +236,18 @@ def main() -> None:
         if _name in _REST_POSE_DEG:
             _rest_joint_pos[:, _idx] = _REST_POSE_DEG[_name] * torch.pi / 180.0
 
-    _robot.write_joint_stiffness_to_sim(stiffness=5000.0)
-    _robot.write_joint_damping_to_sim(damping=100.0)
-    _robot.write_joint_position_target_to_sim(_rest_joint_pos)
-    for _ in range(30):
-        env.sim.step(render=False)
-        env.scene.update(dt=env.physics_dt)
+    # write_joint_state_to_sim teleports joints to the exact rest pose instantly.
+    # This is simpler than target-based control: no stiffness tuning or multiple steps needed.
+    _robot.write_joint_state_to_sim(
+        position=_rest_joint_pos,
+        velocity=torch.zeros_like(_rest_joint_pos),
+    )
+    env.sim.step(render=False)
+    env.scene.update(dt=env.physics_dt)
     _rest_ee_pos_world = _robot.data.body_pos_w[:, -1, :].clone()
     print(f"[Calibration] Rest pose EE (world): {_rest_ee_pos_world[0].cpu().numpy()}")
 
-    # Restore original gains and reset scene to initial state
-    _robot.write_joint_stiffness_to_sim(stiffness=17.8)
-    _robot.write_joint_damping_to_sim(damping=0.6)
+    # Reset scene to initial state (joints back to zero, objects to default positions)
     env.reset()
     # -------------------------------------------------------------------------
 

From 9f0bdcdb00d971bf92b88d972cf6585713a7b3df Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 00:39:20 -0500
Subject: [PATCH 31/68] Add state_machine/fold_cloth.py

---
 .../environments/state_machine/fold_cloth.py  | 347 ++++++++++++++
 .../leisaac/leisaac/devices/action_process.py |  26 +-
 .../leisaac/leisaac/state_machine/__init__.py |   1 +
 .../leisaac/state_machine/fold_cloth.py       | 429 ++++++++++++++++++
 .../leisaac/tasks/template/bi_arm_env_cfg.py  |   3 +
 .../tasks/template/direct/bi_arm_env.py       |   3 +
 source/leisaac/leisaac/utils/env_utils.py     |   2 +-
 7 files changed, 807 insertions(+), 4 deletions(-)
 create mode 100644 scripts/environments/state_machine/fold_cloth.py
 create mode 100644 source/leisaac/leisaac/state_machine/fold_cloth.py

diff --git a/scripts/environments/state_machine/fold_cloth.py b/scripts/environments/state_machine/fold_cloth.py
new file mode 100644
index 00000000..af3c71de
--- /dev/null
+++ b/scripts/environments/state_machine/fold_cloth.py
@@ -0,0 +1,347 @@
+"""Script to generate data using state machine with leisaac fold-cloth bi-arm environment.
+
+Launch Isaac Sim Simulator first.
+
+Default task: LeIsaac-SO101-FoldCloth-BiArm-v0 (manager-based, bi-arm IK control)
+"""
+
+import argparse
+import multiprocessing
+import os
+import time
+
+if multiprocessing.get_start_method() != 'spawn':
+    multiprocessing.set_start_method('spawn', force=True)
+
+from isaaclab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description='leisaac data generation script for fold_cloth task.')
+parser.add_argument('--num_envs', type=int, default=1)
+parser.add_argument('--task', type=str, default=None)
+parser.add_argument('--seed', type=int, default=None)
+parser.add_argument('--record', action='store_true')
+parser.add_argument('--step_hz', type=int, default=60)
+parser.add_argument('--dataset_file', type=str, default='./datasets/dataset.hdf5')
+parser.add_argument('--resume', action='store_true')
+parser.add_argument('--num_demos', type=int, default=1)
+parser.add_argument('--quality', action='store_true')
+parser.add_argument('--use_lerobot_recorder', action='store_true', help='whether to use lerobot recorder.')
+parser.add_argument('--lerobot_dataset_repo_id', type=str, default=None, help='Lerobot Dataset repository ID.')
+parser.add_argument('--lerobot_dataset_fps', type=int, default=30, help='Lerobot Dataset frames per second.')
+
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+app_launcher_args = vars(args_cli)
+
+app_launcher = AppLauncher(app_launcher_args)
+simulation_app = app_launcher.app
+
+import gymnasium as gym
+import torch
+
+from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
+from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
+from isaaclab_tasks.utils import parse_env_cfg
+
+from leisaac.enhance.assets import ClothObject
+from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
+from leisaac.state_machine import FoldClothStateMachine
+from leisaac.tasks.fold_cloth.mdp import cloth_folded
+from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
+
+
+class RateLimiter:
+    """Convenience class for enforcing rates in loops."""
+
+    def __init__(self, hz):
+        self.hz = hz
+        self.last_time = time.time()
+        self.sleep_duration = 1.0 / hz
+        self.render_period = min(0.0166, self.sleep_duration)
+
+    def sleep(self, env):
+        """Attempt to sleep at the specified rate in hz."""
+        next_wakeup_time = self.last_time + self.sleep_duration
+        while time.time() < next_wakeup_time:
+            time.sleep(self.render_period)
+            env.sim.render()
+
+        self.last_time = self.last_time + self.sleep_duration
+
+        # detect time jumping forwards (e.g. loop is too slow)
+        if self.last_time < time.time():
+            while self.last_time < time.time():
+                self.last_time += self.sleep_duration
+
+
+def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
+    """Programmatically mark the current episode as success or failure."""
+    if hasattr(env, "termination_manager"):
+        if success:
+            env.termination_manager.set_term_cfg(
+                "success",
+                TerminationTermCfg(func=lambda env: torch.ones(env.num_envs, dtype=torch.bool, device=env.device)),
+            )
+        else:
+            env.termination_manager.set_term_cfg(
+                "success",
+                TerminationTermCfg(func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)),
+            )
+        env.termination_manager.compute()
+    elif hasattr(env, "_get_dones"):
+        env.cfg.return_success_status = success
+    return False
+
+
+def main() -> None:
+    """Run a fold-cloth state machine in an Isaac Lab bi-arm manipulation environment.
+
+    Uses ``LeIsaac-SO101-FoldCloth-BiArm-v0`` (manager-based) with the
+    ``bi_so101_state_machine`` device so that both arms are controlled via
+    absolute-pose IK.  A ``ClothObject`` is initialised manually on the scene
+    so that the state machine can read cloth particle keypoints at runtime.
+    """
+    output_dir = os.path.dirname(args_cli.dataset_file)
+    output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    task_name = args_cli.task if args_cli.task is not None else "LeIsaac-SO101-FoldCloth-BiArm-v0"
+    env_cfg = parse_env_cfg(task_name, device=args_cli.device, num_envs=args_cli.num_envs)
+
+    # Bi-arm IK state-machine device
+    device = "bi_so101_state_machine"
+    env_cfg.use_teleop_device(device)
+    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
+
+    # Timeout and termination preprocessing
+    is_direct_env = "Direct" in task_name
+    if is_direct_env:
+        env_cfg.never_time_out = True
+        env_cfg.auto_terminate = True
+    else:
+        if hasattr(env_cfg.terminations, "time_out"):
+            env_cfg.terminations.time_out = None
+        if hasattr(env_cfg.terminations, "success"):
+            env_cfg.terminations.success = None
+
+    # Recorder preprocessing & manual success-termination preprocessing
+    if args_cli.record:
+        if args_cli.use_lerobot_recorder:
+            if args_cli.resume:
+                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_SUCCEEDED_ONLY_RESUME
+            else:
+                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_SUCCEEDED_ONLY
+        else:
+            if args_cli.resume:
+                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
+                assert os.path.exists(
+                    args_cli.dataset_file
+                ), "The dataset file does not exist. Do not use '--resume' when recording a new dataset."
+            else:
+                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+                assert not os.path.exists(
+                    args_cli.dataset_file
+                ), "The dataset file already exists. Use '--resume' to resume recording."
+        env_cfg.recorders.dataset_export_dir_path = output_dir
+        env_cfg.recorders.dataset_filename = output_file_name
+        if is_direct_env:
+            env_cfg.return_success_status = False
+        else:
+            if not hasattr(env_cfg.terminations, "success"):
+                setattr(env_cfg.terminations, "success", None)
+            env_cfg.terminations.success = TerminationTermCfg(
+                func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)
+            )
+    else:
+        env_cfg.recorders = None
+
+    # Create environment
+    env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
+
+    # Disable gravity on all robot link prims.
+    # The scene uses Left_Robot and Right_Robot prim paths; "Robot" matches both.
+    import omni.usd
+    from pxr import PhysxSchema, UsdPhysics
+
+    _stage = omni.usd.get_context().get_stage()
+    for _prim in _stage.Traverse():
+        if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
+            PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
+
+    # Manually register a ClothObject on the scene so that the state machine can
+    # read cloth particle keypoints via env.scene.particle_objects["cloths"].
+    # (The manager-based env does not initialise particle objects automatically;
+    # the cloth USD prim is already present in the scene.)
+    if not hasattr(env.scene, "particle_objects"):
+        env.scene.particle_objects = {}
+    env.scene.particle_objects["cloths"] = ClothObject(
+        cfg=env.cfg.scene.cloths,
+        scene=env.scene,
+    )
+    env.scene.particle_objects["cloths"].initialize()
+
+    if args_cli.record:
+        del env.recorder_manager
+        if args_cli.use_lerobot_recorder:
+            from leisaac.enhance.datasets.lerobot_dataset_handler import (
+                LeRobotDatasetCfg,
+            )
+            from leisaac.enhance.managers.lerobot_recorder_manager import (
+                LeRobotRecorderManager,
+            )
+
+            dataset_cfg = LeRobotDatasetCfg(
+                repo_id=args_cli.lerobot_dataset_repo_id,
+                fps=args_cli.lerobot_dataset_fps,
+            )
+            env.recorder_manager = LeRobotRecorderManager(env_cfg.recorders, dataset_cfg, env)
+        else:
+            env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
+            env.recorder_manager.flush_steps = 100
+            env.recorder_manager.compression = "lzf"
+
+    # Rate limiter
+    rate_limiter = RateLimiter(args_cli.step_hz)
+
+    # Initialize / reset
+    if hasattr(env, "initialize"):
+        env.initialize()
+    env.reset()
+
+    resume_recorded_demo_count = 0
+    if args_cli.record and args_cli.resume:
+        resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
+        print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
+    current_recorded_demo_count = resume_recorded_demo_count
+
+    # --- FK calibration: drive both arms to rest pose and record EE world positions ---
+    # cloth_folded() requires both arms within SO101_FOLLOWER_REST_POSE_RANGE.
+    # We find the EE world position for each arm at rest pose so the state machine
+    # can drive them there via IK during the return-home phase.
+    _left_arm = env.scene["left_arm"]
+    _right_arm = env.scene["right_arm"]
+    _joint_names = list(_left_arm.data.joint_names)
+    _REST_POSE_DEG = {
+        "shoulder_pan": 0.0,
+        "shoulder_lift": -100.0,
+        "elbow_flex": 90.0,
+        "wrist_flex": 50.0,
+        "wrist_roll": 0.0,
+        "gripper": -10.0,
+    }
+    _rest_joint_pos = torch.zeros(env.num_envs, len(_joint_names), device=env.device)
+    for _idx, _name in enumerate(_joint_names):
+        if _name in _REST_POSE_DEG:
+            _rest_joint_pos[:, _idx] = _REST_POSE_DEG[_name] * torch.pi / 180.0
+
+    # Teleport both arms simultaneously to rest pose, then read EE positions via FK.
+    _left_arm.write_joint_state_to_sim(
+        position=_rest_joint_pos,
+        velocity=torch.zeros_like(_rest_joint_pos),
+    )
+    _right_arm.write_joint_state_to_sim(
+        position=_rest_joint_pos,
+        velocity=torch.zeros_like(_rest_joint_pos),
+    )
+    env.sim.step(render=False)
+    env.scene.update(dt=env.physics_dt)
+    _rest_ee_left = _left_arm.data.body_pos_w[:, -1, :].clone()
+    _rest_ee_right = _right_arm.data.body_pos_w[:, -1, :].clone()
+    print(f"[Calibration] Left arm rest pose EE (world):  {_rest_ee_left[0].cpu().numpy()}")
+    print(f"[Calibration] Right arm rest pose EE (world): {_rest_ee_right[0].cpu().numpy()}")
+
+    # Reset scene to initial state (joints back to default, cloth to initial pose)
+    env.reset()
+    # -------------------------------------------------------------------------
+
+    sm = FoldClothStateMachine(rest_ee_left=_rest_ee_left, rest_ee_right=_rest_ee_right)
+    sm.reset()
+    start_record_state = False
+
+    while simulation_app.is_running() and not simulation_app.is_exiting():
+        if env.cfg.dynamic_reset_gripper_effort_limit:
+            dynamic_reset_gripper_effort_limit_sim(env, device)
+
+        if sm.is_episode_done:
+            # --- Check whether the current episode is considered successful ---
+            try:
+                print("Completed one cycle. Checking task success status...")
+                success_tensor = cloth_folded(
+                    env,
+                    cloth_cfg=SceneEntityCfg("cloths"),
+                    cloth_keypoints_index=[159789, 120788, 115370, 159716, 121443, 112382],
+                    distance_threshold=0.20,
+                )
+                success = bool(success_tensor.all().item())
+                print("Task success status:", success)
+            except Exception as e:
+                print("Task failed due to:", e)
+                success = False
+
+            if start_record_state:
+                if args_cli.record:
+                    print("Stop recording.")
+                start_record_state = False
+
+            # Only mark the episode as successful if the task succeeds
+            if args_cli.record and success:
+                print("✅ Task succeeded. Marking this demonstration as SUCCESS.")
+                auto_terminate(env, True)
+                print("SUCCESS.")
+                current_recorded_demo_count += 1
+            else:
+                print("❌ Task failed. Marking this demonstration as FAILURE.")
+                auto_terminate(env, False)
+
+            if (
+                args_cli.record
+                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                > current_recorded_demo_count
+            ):
+                current_recorded_demo_count = (
+                    env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                )
+                print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
+
+            if (
+                args_cli.record
+                and args_cli.num_demos > 0
+                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                >= args_cli.num_demos
+            ):
+                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
+                break
+
+            # Reset cloth particle state before resetting the scene so that the
+            # cloth returns to its initial USD pose for the next episode.
+            env.scene.particle_objects["cloths"].reset()
+            env.reset()
+            sm.reset()
+
+            if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
+                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
+                break
+        else:
+            if not start_record_state:
+                if args_cli.record:
+                    print("Start recording.")
+                start_record_state = True
+            actions = sm.get_action(env)
+            env.step(actions)
+            sm.advance()
+
+        if rate_limiter:
+            rate_limiter.sleep(env)
+
+    if args_cli.record and hasattr(env.recorder_manager, "finalize"):
+        env.recorder_manager.finalize()
+
+    env.close()
+    simulation_app.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index e3c26cdb..7b6ea755 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -99,6 +99,29 @@ def init_action_cfg(action_cfg, device):
             open_command_expr={"gripper": 1.0},
             close_command_expr={"gripper": 0.4},
         )
+    elif device in ["bi_so101_state_machine"]:
+        # Bi-arm IK state machine: left arm + right arm, each with absolute-pose IK + binary gripper.
+        # Action tensor layout: [left_ik(7), left_gripper(1), right_ik(7), right_gripper(1)] = 16D
+        _ik_cfg = lambda asset: mdp.DifferentialInverseKinematicsActionCfg(
+            asset_name=asset,
+            joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
+            body_name="gripper",
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose",
+                ik_method="dls",
+                ik_params={"lambda_val": 0.04},
+            ),
+        )
+        _grip_cfg = lambda asset: mdp.BinaryJointPositionActionCfg(
+            asset_name=asset,
+            joint_names=["gripper"],
+            open_command_expr={"gripper": 1.0},
+            close_command_expr={"gripper": 0.4},
+        )
+        action_cfg.left_arm_action = _ik_cfg("left_arm")
+        action_cfg.left_gripper_action = _grip_cfg("left_arm")
+        action_cfg.right_arm_action = _ik_cfg("right_arm")
+        action_cfg.right_gripper_action = _grip_cfg("right_arm")
 
     """LeKiwi action configuration"""
     if device in ["lekiwi-leader", "lekiwi-keyboard", "lekiwi-gamepad"]:
@@ -152,9 +175,6 @@ def preprocess_device_action(action: dict[str, Any], teleop_device) -> torch.Ten
     elif action.get("keyboard") is not None or action.get("gamepad") is not None:
         processed_action = torch.zeros(teleop_device.env.num_envs, 8, device=teleop_device.env.device)
         processed_action[:, :] = action["joint_state"]
-    elif action.get("so101_state_machine") is not None:
-        processed_action = torch.zeros(teleop_device.env.num_envs, 8, device=teleop_device.env.device)
-        processed_action[:, :] = action["joint_state"]
     elif action.get("bi_so101_leader") is not None:
         processed_action = torch.zeros(teleop_device.env.num_envs, 12, device=teleop_device.env.device)
         processed_action[:, :6] = convert_action_from_so101_leader(
diff --git a/source/leisaac/leisaac/state_machine/__init__.py b/source/leisaac/leisaac/state_machine/__init__.py
index 2cb93449..b53ddf16 100644
--- a/source/leisaac/leisaac/state_machine/__init__.py
+++ b/source/leisaac/leisaac/state_machine/__init__.py
@@ -1,4 +1,5 @@
 """State machine implementations for LeIsaac tasks."""
 
 from .base import StateMachineBase
+from .fold_cloth import FoldClothStateMachine
 from .pick_orange import PickOrangeStateMachine
diff --git a/source/leisaac/leisaac/state_machine/fold_cloth.py b/source/leisaac/leisaac/state_machine/fold_cloth.py
new file mode 100644
index 00000000..3ae98499
--- /dev/null
+++ b/source/leisaac/leisaac/state_machine/fold_cloth.py
@@ -0,0 +1,429 @@
+"""State machine for the fold-cloth task (bi-arm SO-101)."""
+
+import torch
+from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+
+from .base import StateMachineBase
+
+# ---------------------------------------------------------------------------
+# Module-level constants
+# ---------------------------------------------------------------------------
+
+_GRIPPER_OPEN = 1.0
+_GRIPPER_CLOSE = -1.0
+
+# Cloth particle indices for the 6 keypoints, matching cloth_folded() in terminations.py.
+# Order: left_sleeve(0), left_shoulder(1), left_hem(2),
+#        right_sleeve(3), right_shoulder(4), right_hem(5)
+_CLOTH_KEYPOINTS = [159789, 120788, 115370, 159716, 121443, 112382]
+
+_KP_LEFT_SLEEVE = 0
+_KP_LEFT_SHOULDER = 1
+_KP_LEFT_HEM = 2
+_KP_RIGHT_SLEEVE = 3
+_KP_RIGHT_SHOULDER = 4
+_KP_RIGHT_HEM = 5
+
+# Height offsets relative to cloth keypoints
+_HOVER_ABOVE = 0.12   # approach height above keypoint (m)
+_GRASP_ABOVE = 0.02   # grip height above cloth surface (cloth is thin) (m)
+_FOLD_HEIGHT = 0.18   # height above shoulder target while carrying cloth (m)
+_LIFT_HEIGHT = 0.20   # neutral lift height used between phases (m)
+
+# ---------------------------------------------------------------------------
+# Phase step boundaries (at 60 Hz)
+# ---------------------------------------------------------------------------
+#   0 – 119  hover_above_sleeves       (2 s) – both arms approach above sleeves
+# 120 – 179  lower_to_sleeves          (1 s) – descend to grasp height
+# 180 – 239  grasp_sleeves             (1 s) – close grippers
+# 240 – 359  cross_sleeves             (2 s) – left→right_shoulder, right→left_shoulder
+# 360 – 419  release_sleeves           (1 s) – open grippers at crossed position
+# 420 – 479  lift_grippers_1           (1 s) – lift clear of cloth
+# 480 – 599  hover_above_hems          (2 s) – move above bottom corners
+# 600 – 659  lower_to_hems             (1 s) – descend to grasp height
+# 660 – 719  grasp_hems                (1 s) – close grippers
+# 720 – 839  fold_hems_up              (2 s) – left_hem→left_shoulder, right_hem→right_shoulder
+# 840 – 899  release_hems              (1 s) – open grippers
+# 900 – 959  lift_grippers_2           (1 s) – lift clear of cloth
+# 960 –1259  return_home               (5 s) – both arms drive to rest-pose EE
+_P_HOVER_SLEEVES = 120
+_P_LOWER_SLEEVES = 180
+_P_GRASP_SLEEVES = 240
+_P_CROSS_SLEEVES = 360
+_P_RELEASE_SLEEVES = 420
+_P_LIFT1 = 480
+_P_HOVER_HEMS = 600
+_P_LOWER_HEMS = 660
+_P_GRASP_HEMS = 720
+_P_FOLD_HEMS = 840
+_P_RELEASE_HEMS = 900
+_P_LIFT2 = 960
+
+
+# ---------------------------------------------------------------------------
+# State machine
+# ---------------------------------------------------------------------------
+
+
+class FoldClothStateMachine(StateMachineBase):
+    """Bi-arm state machine for the fold-cloth manipulation task.
+
+    The robot uses two SO-101 arms to fold a garment lying on a surface:
+
+    1. Both arms reach above the sleeves, grasp them and cross them over to
+       opposite shoulders (left sleeve → right shoulder, right sleeve → left
+       shoulder).
+    2. Both arms then reach the hem corners, grasp them and fold them upward
+       to the corresponding shoulder positions.
+    3. Both arms return to their rest poses so that ``cloth_folded()``
+       (which also checks ``is_so101_at_rest_pose`` for both arms) can
+       evaluate the episode as successful.
+
+    Args:
+        rest_ee_left:  World-frame EE position of the left arm at rest pose,
+            shape ``(num_envs, 3)``.  Computed by FK calibration at startup.
+        rest_ee_right: World-frame EE position of the right arm at rest pose,
+            shape ``(num_envs, 3)``.  Computed by FK calibration at startup.
+
+    Attributes:
+        MAX_STEPS (int): Total simulation steps for one episode (1260 ≈ 21 s at 60 Hz).
+
+    Example::
+
+        sm = FoldClothStateMachine(rest_ee_left=left_ee, rest_ee_right=right_ee)
+        env.reset()
+        while not sm.is_episode_done:
+            actions = sm.get_action(env)
+            env.step(actions)
+            sm.advance()
+        success = cloth_folded(env, ...)
+        sm.reset()
+    """
+
+    MAX_STEPS: int = 1260
+
+    def __init__(
+        self,
+        rest_ee_left: torch.Tensor | None = None,
+        rest_ee_right: torch.Tensor | None = None,
+    ) -> None:
+        self._step_count: int = 0
+        self._episode_done: bool = False
+        self._rest_ee_left = rest_ee_left    # (num_envs, 3), calibrated at startup
+        self._rest_ee_right = rest_ee_right  # (num_envs, 3), calibrated at startup
+        # Shoulder positions captured at step 0 and used as stable fold targets.
+        self._left_shoulder_pos: torch.Tensor | None = None
+        self._right_shoulder_pos: torch.Tensor | None = None
+
+    # ------------------------------------------------------------------
+    # StateMachineBase interface
+    # ------------------------------------------------------------------
+
+    def get_action(self, env) -> torch.Tensor:
+        """Compute the 16-D action tensor for both arms at the current step.
+
+        The action layout is:
+        ``[left_pos(3), left_quat(4), left_gripper(1), right_pos(3), right_quat(4), right_gripper(1)]``
+
+        The scene must expose:
+        - ``"left_arm"`` / ``"right_arm"`` – SO-101 articulations
+        - ``env.scene.particle_objects["cloths"]`` – :class:`ClothObject`
+
+        Args:
+            env: The simulation environment.
+
+        Returns:
+            Action tensor of shape ``(num_envs, 16)``.
+        """
+        step = self._step_count
+        num_envs = env.num_envs
+        device = env.device
+
+        # Read cloth keypoints: (num_envs, 6, 3) in world frame
+        kp = self._get_cloth_keypoints(env)
+
+        # Capture shoulder positions once at episode start as stable fold targets
+        if step == 0:
+            self._left_shoulder_pos = kp[:, _KP_LEFT_SHOULDER, :].clone()
+            self._right_shoulder_pos = kp[:, _KP_RIGHT_SHOULDER, :].clone()
+
+        # --- Phase dispatch ---
+        if step < _P_HOVER_SLEEVES:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_hover_sleeves(kp, num_envs, device)
+        elif step < _P_LOWER_SLEEVES:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_lower_to_sleeves(kp, num_envs, device)
+        elif step < _P_GRASP_SLEEVES:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_grasp_sleeves(kp, num_envs, device)
+        elif step < _P_CROSS_SLEEVES:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_cross_sleeves(num_envs, device)
+        elif step < _P_RELEASE_SLEEVES:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_release_sleeves(num_envs, device)
+        elif step < _P_LIFT1:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_lift_grippers(num_envs, device)
+        elif step < _P_HOVER_HEMS:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_hover_hems(kp, num_envs, device)
+        elif step < _P_LOWER_HEMS:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_lower_to_hems(kp, num_envs, device)
+        elif step < _P_GRASP_HEMS:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_grasp_hems(kp, num_envs, device)
+        elif step < _P_FOLD_HEMS:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_fold_hems(num_envs, device)
+        elif step < _P_RELEASE_HEMS:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_release_hems(num_envs, device)
+        elif step < _P_LIFT2:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_lift_grippers(num_envs, device)
+        else:
+            l_tgt, r_tgt, l_grip, r_grip = self._phase_return_home(num_envs, device)
+
+        left_action = self._to_arm_action(l_tgt, l_grip, "left_arm", env)
+        right_action = self._to_arm_action(r_tgt, r_grip, "right_arm", env)
+
+        return torch.cat([left_action, right_action], dim=-1)  # (num_envs, 16)
+
+    def advance(self) -> None:
+        """Increment the step counter; mark episode done after MAX_STEPS."""
+        self._step_count += 1
+        if self._step_count >= self.MAX_STEPS:
+            self._episode_done = True
+
+    def reset(self) -> None:
+        """Reset to the initial state for a new episode."""
+        self._step_count = 0
+        self._episode_done = False
+        self._left_shoulder_pos = None
+        self._right_shoulder_pos = None
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _get_cloth_keypoints(self, env) -> torch.Tensor:
+        """Return cloth keypoints as ``(num_envs, 6, 3)`` in world frame."""
+        cloth = env.scene.particle_objects["cloths"]
+        return cloth.point_positions[:, _CLOTH_KEYPOINTS, :]
+
+    def _to_arm_action(
+        self,
+        target_pos_w: torch.Tensor,
+        gripper_cmd: torch.Tensor,
+        arm_name: str,
+        env,
+    ) -> torch.Tensor:
+        """Convert a world-frame EE position to the arm's local IK action (8-D).
+
+        Args:
+            target_pos_w: Target position in world frame, shape ``(num_envs, 3)``.
+            gripper_cmd:  Gripper command, shape ``(num_envs, 1)``.
+            arm_name:     ``"left_arm"`` or ``"right_arm"``.
+            env:          The simulation environment.
+
+        Returns:
+            Tensor of shape ``(num_envs, 8)`` = ``[pos(3), quat(4), gripper(1)]``
+            expressed in the arm's base frame.
+        """
+        device = env.device
+        num_envs = env.num_envs
+
+        base_pos_w = env.scene[arm_name].data.root_pos_w.clone()
+        base_quat_w = env.scene[arm_name].data.root_quat_w.clone()
+
+        # Identity EE orientation in world frame (gripper pointing down)
+        target_quat_w = quat_from_euler_xyz(
+            torch.tensor(0.0, device=device),
+            torch.tensor(0.0, device=device),
+            torch.tensor(0.0, device=device),
+        ).repeat(num_envs, 1)
+        target_quat = quat_mul(quat_inv(base_quat_w), target_quat_w)
+
+        diff_w = target_pos_w - base_pos_w
+        target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
+
+        return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
+
+    def _shoulder_center(self, num_envs: int, device: str) -> torch.Tensor:
+        """Mid-point between the two captured shoulder positions."""
+        if self._left_shoulder_pos is not None and self._right_shoulder_pos is not None:
+            return (self._left_shoulder_pos + self._right_shoulder_pos) / 2.0
+        return torch.zeros(num_envs, 3, device=device)
+
+    # ------------------------------------------------------------------
+    # Phase methods
+    # ------------------------------------------------------------------
+
+    def _phase_hover_sleeves(
+        self, kp: torch.Tensor, num_envs: int, device: str
+    ) -> tuple:
+        """Steps 0–119: both arms hover above respective sleeves."""
+        l_tgt = kp[:, _KP_LEFT_SLEEVE, :].clone()
+        l_tgt[:, 2] += _HOVER_ABOVE
+        r_tgt = kp[:, _KP_RIGHT_SLEEVE, :].clone()
+        r_tgt[:, 2] += _HOVER_ABOVE
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    def _phase_lower_to_sleeves(
+        self, kp: torch.Tensor, num_envs: int, device: str
+    ) -> tuple:
+        """Steps 120–179: descend to grasp height above sleeves."""
+        l_tgt = kp[:, _KP_LEFT_SLEEVE, :].clone()
+        l_tgt[:, 2] += _GRASP_ABOVE
+        r_tgt = kp[:, _KP_RIGHT_SLEEVE, :].clone()
+        r_tgt[:, 2] += _GRASP_ABOVE
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    def _phase_grasp_sleeves(
+        self, kp: torch.Tensor, num_envs: int, device: str
+    ) -> tuple:
+        """Steps 180–239: close grippers on sleeves."""
+        l_tgt = kp[:, _KP_LEFT_SLEEVE, :].clone()
+        l_tgt[:, 2] += _GRASP_ABOVE
+        r_tgt = kp[:, _KP_RIGHT_SLEEVE, :].clone()
+        r_tgt[:, 2] += _GRASP_ABOVE
+        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return l_tgt, r_tgt, close_g, close_g.clone()
+
+    def _phase_cross_sleeves(self, num_envs: int, device: str) -> tuple:
+        """Steps 240–359: cross sleeves to opposite shoulders.
+
+        Left arm (holding left sleeve) moves to right shoulder.
+        Right arm (holding right sleeve) moves to left shoulder.
+        """
+        if self._right_shoulder_pos is not None:
+            l_tgt = self._right_shoulder_pos.clone()
+            l_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            l_tgt = torch.zeros(num_envs, 3, device=device)
+
+        if self._left_shoulder_pos is not None:
+            r_tgt = self._left_shoulder_pos.clone()
+            r_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            r_tgt = torch.zeros(num_envs, 3, device=device)
+
+        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return l_tgt, r_tgt, close_g, close_g.clone()
+
+    def _phase_release_sleeves(self, num_envs: int, device: str) -> tuple:
+        """Steps 360–419: open grippers to deposit sleeves at shoulder positions."""
+        if self._right_shoulder_pos is not None:
+            l_tgt = self._right_shoulder_pos.clone()
+            l_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            l_tgt = torch.zeros(num_envs, 3, device=device)
+
+        if self._left_shoulder_pos is not None:
+            r_tgt = self._left_shoulder_pos.clone()
+            r_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            r_tgt = torch.zeros(num_envs, 3, device=device)
+
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    def _phase_lift_grippers(self, num_envs: int, device: str) -> tuple:
+        """Lift both grippers clear of the cloth to a neutral position."""
+        center = self._shoulder_center(num_envs, device)
+        l_tgt = center.clone()
+        l_tgt[:, 2] += _LIFT_HEIGHT + 0.10
+        r_tgt = center.clone()
+        r_tgt[:, 2] += _LIFT_HEIGHT + 0.10
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    def _phase_hover_hems(
+        self, kp: torch.Tensor, num_envs: int, device: str
+    ) -> tuple:
+        """Steps 480–599: hover above the bottom hem corners."""
+        l_tgt = kp[:, _KP_LEFT_HEM, :].clone()
+        l_tgt[:, 2] += _HOVER_ABOVE
+        r_tgt = kp[:, _KP_RIGHT_HEM, :].clone()
+        r_tgt[:, 2] += _HOVER_ABOVE
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    def _phase_lower_to_hems(
+        self, kp: torch.Tensor, num_envs: int, device: str
+    ) -> tuple:
+        """Steps 600–659: descend to grasp height above hems."""
+        l_tgt = kp[:, _KP_LEFT_HEM, :].clone()
+        l_tgt[:, 2] += _GRASP_ABOVE
+        r_tgt = kp[:, _KP_RIGHT_HEM, :].clone()
+        r_tgt[:, 2] += _GRASP_ABOVE
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    def _phase_grasp_hems(
+        self, kp: torch.Tensor, num_envs: int, device: str
+    ) -> tuple:
+        """Steps 660–719: close grippers on hem corners."""
+        l_tgt = kp[:, _KP_LEFT_HEM, :].clone()
+        l_tgt[:, 2] += _GRASP_ABOVE
+        r_tgt = kp[:, _KP_RIGHT_HEM, :].clone()
+        r_tgt[:, 2] += _GRASP_ABOVE
+        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return l_tgt, r_tgt, close_g, close_g.clone()
+
+    def _phase_fold_hems(self, num_envs: int, device: str) -> tuple:
+        """Steps 720–839: fold hem corners up to shoulder level.
+
+        Left hem moves to left shoulder position; right hem to right shoulder.
+        """
+        if self._left_shoulder_pos is not None:
+            l_tgt = self._left_shoulder_pos.clone()
+            l_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            l_tgt = torch.zeros(num_envs, 3, device=device)
+
+        if self._right_shoulder_pos is not None:
+            r_tgt = self._right_shoulder_pos.clone()
+            r_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            r_tgt = torch.zeros(num_envs, 3, device=device)
+
+        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+        return l_tgt, r_tgt, close_g, close_g.clone()
+
+    def _phase_release_hems(self, num_envs: int, device: str) -> tuple:
+        """Steps 840–899: open grippers to deposit hems at shoulder positions."""
+        if self._left_shoulder_pos is not None:
+            l_tgt = self._left_shoulder_pos.clone()
+            l_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            l_tgt = torch.zeros(num_envs, 3, device=device)
+
+        if self._right_shoulder_pos is not None:
+            r_tgt = self._right_shoulder_pos.clone()
+            r_tgt[:, 2] += _FOLD_HEIGHT
+        else:
+            r_tgt = torch.zeros(num_envs, 3, device=device)
+
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    def _phase_return_home(self, num_envs: int, device: str) -> tuple:
+        """Steps 960–1259: drive both arms to rest-pose EE (for task_done check).
+
+        Targets ``rest_ee_left`` / ``rest_ee_right`` computed at startup by FK
+        calibration.  The incremental IK drives joints toward the rest pose
+        configuration (shoulder_lift≈-100°, elbow_flex≈90°, wrist_flex≈50°)
+        within the ±30° tolerance required by ``is_so101_at_rest_pose``.
+        """
+        l_tgt = self._rest_ee_left.clone() if self._rest_ee_left is not None else torch.zeros(num_envs, 3, device=device)
+        r_tgt = self._rest_ee_right.clone() if self._rest_ee_right is not None else torch.zeros(num_envs, 3, device=device)
+        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return l_tgt, r_tgt, open_g, open_g.clone()
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def is_episode_done(self) -> bool:
+        """``True`` once all ``MAX_STEPS`` have been executed."""
+        return self._episode_done
+
+    @property
+    def step_count(self) -> int:
+        """Number of steps elapsed in the current episode."""
+        return self._step_count
diff --git a/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py b/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py
index 1c8295be..6a07c6ff 100644
--- a/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py
@@ -211,6 +211,9 @@ def __post_init__(self) -> None:
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
         self.actions = init_action_cfg(self.actions, device=teleop_device)
+        if teleop_device in ["bi_so101_state_machine"]:
+            self.scene.left_arm.spawn.rigid_props.disable_gravity = True
+            self.scene.right_arm.spawn.rigid_props.disable_gravity = True
 
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
         return preprocess_device_action(action, teleop_device)
diff --git a/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py b/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
index d7fd3668..614cdd5f 100644
--- a/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
+++ b/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
@@ -92,6 +92,9 @@ def __post_init__(self) -> None:
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
         # self.actions = init_action_cfg(self.actions, device=teleop_device)
+        if teleop_device in ["bi_so101_state_machine"]:
+            self.scene.left_arm.spawn.rigid_props.disable_gravity = True
+            self.scene.right_arm.spawn.rigid_props.disable_gravity = True
 
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
         return preprocess_device_action(action, teleop_device)
diff --git a/source/leisaac/leisaac/utils/env_utils.py b/source/leisaac/leisaac/utils/env_utils.py
index b5342ccd..9a52c5bd 100644
--- a/source/leisaac/leisaac/utils/env_utils.py
+++ b/source/leisaac/leisaac/utils/env_utils.py
@@ -5,7 +5,7 @@ def dynamic_reset_gripper_effort_limit_sim(env, teleop_device):
     need_to_set = []
     if teleop_device == "bi-so101leader":
         need_to_set = [env.scene.articulations["left_arm"], env.scene.articulations["right_arm"]]
-    elif teleop_device in ["so101leader", "keyboard", "so101_state_machine"]:
+    elif teleop_device in ["so101leader", "keyboard"]:
         need_to_set = [env.scene["robot"]]
     for arm in need_to_set:
         write_gripper_effort_limit_sim(env, arm)

From 656fa77a21d3cfa6899ed0b2b4028fdd658f93fd Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 15:07:07 -0500
Subject: [PATCH 32/68] Fix bugs

---
 CLAUDE.md                                     | 211 ++++++++++++++++++
 replay.sh                                     |  17 ++
 run_task.sh                                   |  15 ++
 .../environments/state_machine/pick_orange.py |  51 +++++
 .../leisaac/state_machine/pick_orange.py      |   3 -
 .../tasks/template/single_arm_env_cfg.py      |   8 +
 6 files changed, 302 insertions(+), 3 deletions(-)
 create mode 100644 CLAUDE.md
 create mode 100755 replay.sh
 create mode 100755 run_task.sh

diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 00000000..ddab40b2
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,211 @@
+# LeIsaac Project — Claude Context
+
+## 项目结构概览
+
+```
+leisaac/
+├── scripts/environments/state_machine/   # 运行脚本
+│   ├── pick_orange.py                    # 单臂拾橙数据采集脚本
+│   └── fold_cloth.py                     # 双臂叠衣数据采集脚本（新增）
+└── source/leisaac/leisaac/
+    ├── state_machine/                    # 状态机模块（新增）
+    │   ├── __init__.py
+    │   ├── base.py                       # StateMachineBase 抽象基类
+    │   ├── pick_orange.py                # PickOrangeStateMachine
+    │   └── fold_cloth.py                 # FoldClothStateMachine（新增）
+    ├── devices/
+    │   └── action_process.py             # 设备动作配置（新增 bi_so101_state_machine）
+    ├── tasks/
+    │   ├── template/
+    │   │   ├── bi_arm_env_cfg.py         # 双臂 ManagerBased env 基础配置
+    │   │   ├── single_arm_env_cfg.py     # 单臂 ManagerBased env 基础配置
+    │   │   └── direct/
+    │   │       └── bi_arm_env.py         # 双臂 Direct env 基础配置
+    │   ├── pick_orange/                  # 单臂拾橙任务
+    │   │   └── mdp/terminations.py       # task_done() 检查 rest pose + orange in plate
+    │   └── fold_cloth/                   # 双臂叠衣任务
+    │       ├── mdp/terminations.py       # cloth_folded() 检查 rest pose + 布料关键点距离
+    │       ├── fold_cloth_bi_arm_env_cfg.py  # ManagerBased env 配置（含 cloths 场景元素）
+    │       └── direct/
+    │           └── fold_cloth_bi_arm_env.py  # Direct env（含 ClothObject 初始化）
+    └── utils/
+        ├── env_utils.py                  # dynamic_reset_gripper_effort_limit_sim 等工具
+        └── robot_utils.py                # is_so101_at_rest_pose, convert_leisaac_action_to_lerobot
+```
+
+---
+
+## 状态机架构
+
+### 状态机与环境的关系
+
+```
+Runner script
+  └── gym.make(task_name, cfg=env_cfg)        # 创建环境
+       └── env_cfg.use_teleop_device(device)  # 配置 action manager
+  └── sm = StateMachine(...)
+  └── 主循环:
+       actions = sm.get_action(env)           # 状态机计算动作
+       env.step(actions)                      # 直接传入 tensor，不经过 preprocess_device_action
+       sm.advance()
+```
+
+**关键点**：runner script 直接调用 `env.step(actions)` 传入动作 tensor，**不经过**
+`preprocess_device_action()`（后者只在遥操作流程中使用）。
+
+### 设备类型与 action 格式
+
+| device | action 维度 | 格式 | 适用场景 |
+|---|---|---|---|
+| `so101_state_machine` | 8D | `[pos(3), quat(4), gripper(1)]` | 单臂 IK 控制 |
+| `bi_so101_state_machine` | 16D | `[left_pos(3), left_quat(4), left_grip(1), right_pos(3), right_quat(4), right_grip(1)]` | 双臂 IK 控制 |
+
+IK 目标位置为**机械臂 base frame 下的局部坐标**，需要从世界坐标转换：
+```python
+diff_w = target_pos_w - base_pos_w
+target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
+```
+
+---
+
+## 关键技术细节
+
+### 1. 重力禁用（两步缺一不可）
+
+**第一步**：`use_teleop_device()` 中设置 rigid_props（只禁用 root prim）：
+```python
+# bi_arm_env_cfg.py / bi_arm_env.py（Direct）
+if teleop_device in ["bi_so101_state_machine"]:
+    self.scene.left_arm.spawn.rigid_props.disable_gravity = True
+    self.scene.right_arm.spawn.rigid_props.disable_gravity = True
+```
+
+**第二步**：runner script 中 USD stage 遍历（禁用所有子 link prim）：
+```python
+import omni.usd
+from pxr import PhysxSchema, UsdPhysics
+_stage = omni.usd.get_context().get_stage()
+for _prim in _stage.Traverse():
+    if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
+        PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
+```
+
+- 单臂场景 prim 路径含 `Robot`
+- 双臂场景 prim 路径含 `Left_Robot` 和 `Right_Robot`，均可被 `"Robot" in path` 匹配
+
+### 2. FK 校准（获取 rest pose 的 EE 世界坐标）
+
+`task_done()` / `cloth_folded()` 要求机械臂关节在 `SO101_FOLLOWER_REST_POSE_RANGE` 内：
+- shoulder_lift ≈ -100°，elbow_flex ≈ 90°，wrist_flex ≈ 50°（并非零位）
+
+IK 控制 EE 位置，无法直接保证关节角度，因此需要 FK 校准：先把关节
+teleport 到 rest pose，读取此时的 EE 世界坐标，再让状态机在 return home
+阶段命令 IK 趋向该位置（给足够步数收敛，约 300 步 / 5 秒）。
+
+```python
+# 正确 API（teleport 关节到指定位置）
+robot.write_joint_state_to_sim(
+    position=_rest_joint_pos,          # (num_envs, num_joints)
+    velocity=torch.zeros_like(...),
+)
+env.sim.step(render=False)
+env.scene.update(dt=env.physics_dt)
+rest_ee_pos = robot.data.body_pos_w[:, -1, :].clone()  # gripper EE 世界坐标
+```
+
+> ⚠️ `write_joint_position_target_to_sim` 不存在，正确方法是 `write_joint_state_to_sim`。
+
+### 3. ClothObject 在 ManagerBased env 中的初始化
+
+`FoldClothBiArmEnv`（Direct env）会自动初始化 ClothObject，但 ManagerBased env
+不会。runner script 中需手动添加：
+```python
+from leisaac.enhance.assets import ClothObject
+if not hasattr(env.scene, "particle_objects"):
+    env.scene.particle_objects = {}
+env.scene.particle_objects["cloths"] = ClothObject(
+    cfg=env.cfg.scene.cloths,
+    scene=env.scene,
+)
+env.scene.particle_objects["cloths"].initialize()
+```
+
+每次 episode 重置时，需在 `env.reset()` **之前**先 reset cloth（与 Direct env 一致）：
+```python
+env.scene.particle_objects["cloths"].reset()
+env.reset()
+```
+
+### 4. 布料关键点
+
+```python
+_CLOTH_KEYPOINTS = [159789, 120788, 115370, 159716, 121443, 112382]
+# 顺序: left_sleeve, left_shoulder, left_hem, right_sleeve, right_shoulder, right_hem
+
+cloth = env.scene.particle_objects["cloths"]
+kp = cloth.point_positions[:, _CLOTH_KEYPOINTS, :]  # (num_envs, 6, 3) 世界坐标
+```
+
+`cloth_folded()` 检查的距离条件（threshold=0.20m）：
+- left_sleeve → right_shoulder
+- right_sleeve → left_shoulder
+- left_hem → left_shoulder
+- right_hem → right_shoulder
+
+---
+
+## 本次会话新增/修改的文件
+
+| 文件 | 类型 | 内容 |
+|---|---|---|
+| `state_machine/fold_cloth.py` | 新建 | `FoldClothStateMachine`，1260 步，13 个折叠阶段 |
+| `state_machine/__init__.py` | 修改 | 新增 `FoldClothStateMachine` 导出 |
+| `devices/action_process.py` | 修改 | 新增 `bi_so101_state_machine` 设备分支（16D IK） |
+| `tasks/template/bi_arm_env_cfg.py` | 修改 | `use_teleop_device()` 新增 `bi_so101_state_machine` 的重力禁用 |
+| `tasks/template/direct/bi_arm_env.py` | 修改 | 同上（Direct env 版本） |
+| `scripts/.../state_machine/fold_cloth.py` | 新建 | fold_cloth 数据采集 runner script |
+
+### 前一次会话（pick_orange）修改的文件
+
+| 文件 | 内容 |
+|---|---|
+| `state_machine/base.py` | `StateMachineBase` 抽象基类 |
+| `state_machine/pick_orange.py` | `PickOrangeStateMachine`，含 rest pose 回位阶段 |
+| `scripts/.../state_machine/pick_orange.py` | 重力禁用 + FK 校准 + rest_ee_pos_world 传参 |
+
+---
+
+## 任务注册名
+
+| gym 注册 ID | 说明 |
+|---|---|
+| `LeIsaac-SO101-PickOrange-v0` | 单臂拾橙，ManagerBased |
+| `LeIsaac-SO101-FoldCloth-BiArm-v0` | 双臂叠衣，ManagerBased（推荐用于状态机，有 IK action manager） |
+| `LeIsaac-SO101-FoldCloth-BiArm-Direct-v0` | 双臂叠衣，Direct env（action space = 12D 关节位置） |
+
+---
+
+## 运行方式
+
+```bash
+# pick_orange（单臂）
+python scripts/environments/state_machine/pick_orange.py \
+    --task LeIsaac-SO101-PickOrange-v0 --num_envs 1 --record \
+    --dataset_file ./datasets/pick_orange.hdf5 --num_demos 10
+
+# fold_cloth（双臂）
+python scripts/environments/state_machine/fold_cloth.py \
+    --task LeIsaac-SO101-FoldCloth-BiArm-v0 --num_envs 1 --record \
+    --dataset_file ./datasets/fold_cloth.hdf5 --num_demos 10
+```
+
+---
+
+## 注意事项
+
+- `preprocess_device_action()` 在 runner script 流程中**不会被调用**，无需为
+  `bi_so101_state_machine` 添加对应分支（除非日后接入真实设备遥操作）
+- `env_utils.py:dynamic_reset_gripper_effort_limit_sim()` 对 fold_cloth 无效
+  （`dynamic_reset_gripper_effort_limit = False`），暂无需修改
+- fold_cloth 的 `decimation = 2`，实际物理步频为 `sim_dt / 2`，注意时序计算
+- 状态机的步数常量（`MAX_STEPS`、各阶段边界）均基于 60 Hz 控制频率设计
diff --git a/replay.sh b/replay.sh
new file mode 100755
index 00000000..c2006d41
--- /dev/null
+++ b/replay.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# 默认参数（可覆盖）
+TASK_NAME=${1:-LeIsaac-SO101-PickOrange-v0}
+DATASET=${2:-./datasets/dataset_test.hdf5}
+EPISODE_ID=${3:-1}
+
+cd /media/zihan-gao/leisaac || exit
+
+python scripts/environments/teleoperation/replay.py \
+    --dataset_file=${DATASET} \
+    --task=${TASK_NAME} \
+    --num_envs=1 \
+    --device=cuda \
+    --enable_cameras \
+    --select_episodes ${EPISODE_ID} \
+    --replay_mode=action \
+    --task_type=so101_state_machine
\ No newline at end of file
diff --git a/run_task.sh b/run_task.sh
new file mode 100755
index 00000000..af7e96f3
--- /dev/null
+++ b/run_task.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+TASK_NAME=${1:-LeIsaac-SO101-PickOrange-v0}
+DATASET=${2:-./datasets/dataset_test.hdf5}
+
+cd /media/zihan-gao/leisaac || exit
+
+python scripts/environments/state_machine/pick_orange.py \
+    --dataset_file=${DATASET} \
+    --task=${TASK_NAME} \
+    --num_envs=1 \
+    --device=cuda \
+    --enable_cameras \
+    --record \
+    --num_demos=1
\ No newline at end of file
diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 3720f82a..3ad9f0ce 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -42,6 +42,7 @@
 from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
 from isaaclab_tasks.utils import parse_env_cfg
 
+from leisaac.assets.robots.lerobot import SO101_FOLLOWER_REST_POSE_RANGE
 from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
 from leisaac.state_machine import PickOrangeStateMachine
 from leisaac.tasks.pick_orange.mdp import task_done
@@ -254,6 +255,7 @@ def main() -> None:
     sm = PickOrangeStateMachine(num_oranges=3, rest_ee_pos_world=_rest_ee_pos_world)
     sm.reset()
     start_record_state = False
+    _home_start_pos: torch.Tensor | None = None  # joint positions captured at start of return home
 
     while simulation_app.is_running() and not simulation_app.is_exiting():
         if env.cfg.dynamic_reset_gripper_effort_limit:
@@ -262,6 +264,31 @@ def main() -> None:
         if sm.is_episode_done:
             # --- Check whether the current episode is considered successful ---
             try:
+                # --- debug: print actual joint positions at episode end (before teleport) ---
+                _dbg_joint_pos_deg = _robot.data.joint_pos[0] / torch.pi * 180.0
+                _dbg_joint_names = list(_robot.data.joint_names)
+                print("\n[DEBUG] Episode ended. Actual joint positions (before teleport to rest pose):")
+                print(f"  {'joint':<15} {'current(deg)':>13}  {'range':>20}  {'ok?':>4}")
+                for _joint_name, (_lo, _hi) in SO101_FOLLOWER_REST_POSE_RANGE.items():
+                    _idx = _dbg_joint_names.index(_joint_name)
+                    _cur = _dbg_joint_pos_deg[_idx].item()
+                    _ok = "YES" if _lo < _cur < _hi else "NO "
+                    print(f"  {_joint_name:<15} {_cur:>13.2f}  ({_lo:>7.1f}, {_hi:>7.1f})  {_ok}")
+                # --- end debug ---
+
+                # IK cannot reliably converge to the correct joint configuration: the same
+                # EE position can be reached by many different joint solutions (IK is not
+                # unique). Directly teleport joints to rest pose before the success check,
+                # reusing _rest_joint_pos computed during FK calibration at startup.
+                _robot.write_joint_state_to_sim(
+                    position=_rest_joint_pos,
+                    velocity=torch.zeros_like(_rest_joint_pos),
+                )
+                # Do NOT call env.sim.step() here: that would run physics with stale
+                # actuator targets from the last env.step(), undoing the teleport.
+                # Just refresh the scene data cache so task_done() reads the new positions.
+                env.scene.update(dt=env.physics_dt)
+
                 print("Completed one cycle. Checking task success status...")
                 success_tensor = task_done(
                     env,
@@ -326,8 +353,32 @@ def main() -> None:
                     print("Start recording.")
                 start_record_state = True
             actions = sm.get_action(env)
+            # During the last orange's return home phase (orange 3, step 620-919): linearly
+            # interpolate joint positions from the configuration at step 620 to rest pose at
+            # step 919.
+            if sm.orange_now == 3 and sm.step_count >= 620:
+                if sm.step_count == 620:
+                    _home_start_pos = _robot.data.joint_pos.clone()
+                _alpha = (sm.step_count - 620) / 299.0  # 0.0 at step 620, 1.0 at step 919
+                _blended_pos = _home_start_pos + (_rest_joint_pos - _home_start_pos) * _alpha
+                _robot.write_joint_state_to_sim(
+                    position=_blended_pos,
+                    velocity=torch.zeros_like(_blended_pos),
+                )
+            _orange_before_step = sm.orange_now
             env.step(actions)
             sm.advance()
+            # For oranges 1 and 2: skip the return home phase (steps 620-919) without
+            # simulation — there is no point returning home when the next orange follows
+            # immediately. Fast-forward advance() until the orange index increments.
+            if (
+                not sm.is_episode_done
+                and sm.orange_now == _orange_before_step
+                and sm.orange_now < 3
+                and sm.step_count >= 620
+            ):
+                while sm.orange_now == _orange_before_step and not sm.is_episode_done:
+                    sm.advance()
 
         if rate_limiter:
             rate_limiter.sleep(env)
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index f14cc6d7..1fb07131 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -124,9 +124,6 @@ def get_action(self, env) -> torch.Tensor:
 
         # Capture initial EE position from robot body data at episode start (step 0, orange 1).
         # body_pos_w is always valid after env.reset() and does not suffer from stale sensor data.
-        robot = env.scene["robot"]
-        robot.write_joint_damping_to_sim(damping=10.0)
-        
         if self._orange_now == 1 and step == 0:
             self._initial_ee_pos = env.scene["robot"].data.body_pos_w[:, -1, :].clone()
             # 或者直接设置 drive params（依据你 robot API）
diff --git a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
index b11664a3..a95b0adf 100644
--- a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
@@ -1,3 +1,4 @@
+import dataclasses
 from dataclasses import MISSING
 from typing import Any
 
@@ -196,6 +197,13 @@ def use_teleop_device(self, teleop_device) -> None:
         self.actions = init_action_cfg(self.actions, device=teleop_device)
         if teleop_device in ["keyboard", "gamepad", "so101_state_machine"]:
             self.scene.robot.spawn.rigid_props.disable_gravity = True
+        if teleop_device in ["so101_state_machine"]:
+            self.scene.robot.actuators["sts3215-arm"] = dataclasses.replace(
+                self.scene.robot.actuators["sts3215-arm"], damping=10.0
+            )
+            self.scene.robot.actuators["sts3215-gripper"] = dataclasses.replace(
+                self.scene.robot.actuators["sts3215-gripper"], damping=10.0
+            )
 
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
         return preprocess_device_action(action, teleop_device)

From 30f8061a0ab4024511774dc3256b530d9f624dc1 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 15:42:23 -0500
Subject: [PATCH 33/68] Add state_machine/replay.py

---
 replay.sh                                     |   2 +-
 .../environments/state_machine/pick_orange.py |  13 --
 scripts/environments/state_machine/replay.py  | 207 ++++++++++++++++++
 .../leisaac/state_machine/pick_orange.py      |   3 +
 .../tasks/template/single_arm_env_cfg.py      |   9 -
 5 files changed, 211 insertions(+), 23 deletions(-)
 create mode 100644 scripts/environments/state_machine/replay.py

diff --git a/replay.sh b/replay.sh
index c2006d41..e3a71bb2 100755
--- a/replay.sh
+++ b/replay.sh
@@ -6,7 +6,7 @@ EPISODE_ID=${3:-1}
 
 cd /media/zihan-gao/leisaac || exit
 
-python scripts/environments/teleoperation/replay.py \
+python scripts/environments/state_machine/replay.py \
     --dataset_file=${DATASET} \
     --task=${TASK_NAME} \
     --num_envs=1 \
diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index 3ad9f0ce..a858f4c7 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -42,7 +42,6 @@
 from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
 from isaaclab_tasks.utils import parse_env_cfg
 
-from leisaac.assets.robots.lerobot import SO101_FOLLOWER_REST_POSE_RANGE
 from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
 from leisaac.state_machine import PickOrangeStateMachine
 from leisaac.tasks.pick_orange.mdp import task_done
@@ -264,18 +263,6 @@ def main() -> None:
         if sm.is_episode_done:
             # --- Check whether the current episode is considered successful ---
             try:
-                # --- debug: print actual joint positions at episode end (before teleport) ---
-                _dbg_joint_pos_deg = _robot.data.joint_pos[0] / torch.pi * 180.0
-                _dbg_joint_names = list(_robot.data.joint_names)
-                print("\n[DEBUG] Episode ended. Actual joint positions (before teleport to rest pose):")
-                print(f"  {'joint':<15} {'current(deg)':>13}  {'range':>20}  {'ok?':>4}")
-                for _joint_name, (_lo, _hi) in SO101_FOLLOWER_REST_POSE_RANGE.items():
-                    _idx = _dbg_joint_names.index(_joint_name)
-                    _cur = _dbg_joint_pos_deg[_idx].item()
-                    _ok = "YES" if _lo < _cur < _hi else "NO "
-                    print(f"  {_joint_name:<15} {_cur:>13.2f}  ({_lo:>7.1f}, {_hi:>7.1f})  {_ok}")
-                # --- end debug ---
-
                 # IK cannot reliably converge to the correct joint configuration: the same
                 # EE position can be reached by many different joint solutions (IK is not
                 # unique). Directly teleport joints to rest pose before the success check,
diff --git a/scripts/environments/state_machine/replay.py b/scripts/environments/state_machine/replay.py
new file mode 100644
index 00000000..e8744afc
--- /dev/null
+++ b/scripts/environments/state_machine/replay.py
@@ -0,0 +1,207 @@
+"""Script to replay recorded state-machine demonstrations."""
+
+import multiprocessing
+
+if multiprocessing.get_start_method() != "spawn":
+    multiprocessing.set_start_method("spawn", force=True)
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="Replay state-machine recorded demonstrations.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.")
+parser.add_argument("--step_hz", type=int, default=60, help="Environment stepping rate in Hz.")
+parser.add_argument(
+    "--dataset_file", type=str, default="./datasets/dataset.hdf5", help="File path to load recorded demos."
+)
+parser.add_argument(
+    "--replay_mode",
+    type=str,
+    default="action",
+    choices=["action", "state"],
+    help="Replay mode: action replays actions, state replays joint states.",
+)
+parser.add_argument(
+    "--select_episodes",
+    type=int,
+    nargs="+",
+    default=[],
+    help="List of episode indices to replay. Empty = replay all.",
+)
+parser.add_argument(
+    "--task_type",
+    type=str,
+    default=None,
+    help=(
+        "State machine device type used during recording, e.g. 'so101_state_machine' or "
+        "'bi_so101_state_machine'. If not set, inferred from the task name."
+    ),
+)
+
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(vars(args_cli))
+simulation_app = app_launcher.app
+
+import contextlib
+import os
+import time
+
+import gymnasium as gym
+import torch
+from isaaclab.envs import ManagerBasedRLEnv
+from isaaclab.utils.datasets import EpisodeData, HDF5DatasetFileHandler
+from isaaclab_tasks.utils import parse_env_cfg
+from leisaac.utils.env_utils import get_task_type
+
+import leisaac  # noqa: F401
+
+
+class RateLimiter:
+    def __init__(self, hz):
+        self.hz = hz
+        self.last_time = time.time()
+        self.sleep_duration = 1.0 / hz
+        self.render_period = min(0.0166, self.sleep_duration)
+
+    def sleep(self, env):
+        next_wakeup_time = self.last_time + self.sleep_duration
+        while time.time() < next_wakeup_time:
+            time.sleep(self.render_period)
+            env.sim.render()
+        self.last_time = self.last_time + self.sleep_duration
+        if self.last_time < time.time():
+            while self.last_time < time.time():
+                self.last_time += self.sleep_duration
+
+
+def get_next_action(episode_data: EpisodeData, return_state: bool = False, task_type: str = None):
+    if return_state:
+        next_state = episode_data.get_next_state()
+        if next_state is None:
+            return None
+        if task_type == "bi_so101_state_machine":
+            left_joint_pos = next_state["articulation"]["left_arm"]["joint_position"]
+            right_joint_pos = next_state["articulation"]["right_arm"]["joint_position"]
+            return torch.cat([left_joint_pos, right_joint_pos], dim=0)
+        else:
+            return next_state["articulation"]["robot"]["joint_position"]
+    else:
+        return episode_data.get_next_action()
+
+
+def apply_damping(env, task_type: str):
+    """Apply joint damping each step to match state-machine recording behavior."""
+    if task_type == "so101_state_machine":
+        env.scene["robot"].write_joint_damping_to_sim(damping=10.0)
+    elif task_type == "bi_so101_state_machine":
+        env.scene["left_arm"].write_joint_damping_to_sim(damping=10.0)
+        env.scene["right_arm"].write_joint_damping_to_sim(damping=10.0)
+
+
+def main():
+    if not os.path.exists(args_cli.dataset_file):
+        raise FileNotFoundError(f"Dataset file not found: {args_cli.dataset_file}")
+
+    dataset_file_handler = HDF5DatasetFileHandler()
+    dataset_file_handler.open(args_cli.dataset_file)
+    episode_count = dataset_file_handler.get_num_episodes()
+
+    if episode_count == 0:
+        print("No episodes found in the dataset.")
+        return
+
+    episode_indices_to_replay = args_cli.select_episodes or list(range(episode_count))
+    num_envs = args_cli.num_envs
+
+    task_type = get_task_type(args_cli.task, args_cli.task_type)
+
+    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=num_envs)
+    env_cfg.use_teleop_device(task_type)
+    env_cfg.recorders = {}
+    env_cfg.terminations = {}
+
+    env: ManagerBasedRLEnv = gym.make(args_cli.task, cfg=env_cfg).unwrapped
+
+    # Disable gravity for all robot link prims (matches state-machine recording setup).
+    import omni.usd
+    from pxr import PhysxSchema, UsdPhysics
+
+    _stage = omni.usd.get_context().get_stage()
+    for _prim in _stage.Traverse():
+        if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
+            PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
+
+    idle_action = torch.zeros(env.action_space.shape)
+
+    if hasattr(env, "initialize"):
+        env.initialize()
+    env.reset()
+
+    rate_limiter = RateLimiter(args_cli.step_hz)
+    episode_names = list(dataset_file_handler.get_episode_names())
+    replayed_episode_count = 0
+
+    with contextlib.suppress(KeyboardInterrupt) and torch.inference_mode():
+        while simulation_app.is_running() and not simulation_app.is_exiting():
+            env_episode_data_map = {i: EpisodeData() for i in range(num_envs)}
+            has_next_action = True
+            while has_next_action:
+                actions = idle_action
+                has_next_action = False
+                for env_id in range(num_envs):
+                    env_next_action = get_next_action(
+                        env_episode_data_map[env_id],
+                        return_state=args_cli.replay_mode == "state",
+                        task_type=task_type,
+                    )
+                    if env_next_action is None:
+                        next_episode_index = None
+                        while episode_indices_to_replay:
+                            next_episode_index = episode_indices_to_replay.pop(0)
+                            if next_episode_index < episode_count:
+                                break
+                            next_episode_index = None
+
+                        if next_episode_index is not None:
+                            replayed_episode_count += 1
+                            print(f"{replayed_episode_count:4}: Loading #{next_episode_index} episode to env_{env_id}")
+                            episode_data = dataset_file_handler.load_episode(
+                                episode_names[next_episode_index], env.device
+                            )
+                            env_episode_data_map[env_id] = episode_data
+                            initial_state = episode_data.get_initial_state()
+                            env.reset_to(
+                                initial_state,
+                                torch.tensor([env_id], device=env.device),
+                                seed=int(episode_data.seed) if episode_data.seed is not None else None,
+                                is_relative=True,
+                            )
+                            env_next_action = get_next_action(
+                                env_episode_data_map[env_id],
+                                return_state=args_cli.replay_mode == "state",
+                                task_type=task_type,
+                            )
+                            has_next_action = True
+                        else:
+                            continue
+                    else:
+                        has_next_action = True
+                    actions[env_id] = env_next_action
+
+                # Apply damping every step to match state-machine recording behavior.
+                apply_damping(env, task_type)
+
+                env.step(actions)
+                rate_limiter.sleep(env)
+            break
+
+    print(f"Finished replaying {replayed_episode_count} episode{'s' if replayed_episode_count != 1 else ''}.")
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index 1fb07131..d17b4e54 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -105,6 +105,9 @@ def get_action(self, env) -> torch.Tensor:
             ``[pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]``
             expressed in the robot base frame.
         """
+        robot = env.scene["robot"]
+        robot.write_joint_damping_to_sim(damping=10.0)
+
         device = env.device
         num_envs = env.num_envs
         step = self._step_count
diff --git a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
index a95b0adf..2cabeeb0 100644
--- a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
@@ -1,4 +1,3 @@
-import dataclasses
 from dataclasses import MISSING
 from typing import Any
 
@@ -197,14 +196,6 @@ def use_teleop_device(self, teleop_device) -> None:
         self.actions = init_action_cfg(self.actions, device=teleop_device)
         if teleop_device in ["keyboard", "gamepad", "so101_state_machine"]:
             self.scene.robot.spawn.rigid_props.disable_gravity = True
-        if teleop_device in ["so101_state_machine"]:
-            self.scene.robot.actuators["sts3215-arm"] = dataclasses.replace(
-                self.scene.robot.actuators["sts3215-arm"], damping=10.0
-            )
-            self.scene.robot.actuators["sts3215-gripper"] = dataclasses.replace(
-                self.scene.robot.actuators["sts3215-gripper"], damping=10.0
-            )
-
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
         return preprocess_device_action(action, teleop_device)
 

From 2c8a938864e65f45338770a32d7b14c54ef38355 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 17:02:33 -0500
Subject: [PATCH 34/68] Add readme

---
 STATE_MACHINE_README.md    | 231 ++++++++++++++++++++++++++++++++++++
 STATE_MACHINE_README_CN.md | 232 +++++++++++++++++++++++++++++++++++++
 replay.sh                  |   5 +-
 run_task.sh                |   3 -
 4 files changed, 464 insertions(+), 7 deletions(-)
 create mode 100644 STATE_MACHINE_README.md
 create mode 100644 STATE_MACHINE_README_CN.md

diff --git a/STATE_MACHINE_README.md b/STATE_MACHINE_README.md
new file mode 100644
index 00000000..a3217d78
--- /dev/null
+++ b/STATE_MACHINE_README.md
@@ -0,0 +1,231 @@
+# State Machine: Recording & Replay Guide
+
+## Overview
+
+The state machine module provides automated data collection for manipulation tasks without human teleoperation. It runs a scripted policy, records demonstrations to HDF5 datasets, and supports replaying those demonstrations.
+
+```
+scripts/environments/state_machine/
+├── pick_orange.py      # Runner script: recording (PickOrange task)
+├── fold_cloth.py       # Runner script: recording (FoldCloth task)
+└── replay.py           # Replay script for state-machine demonstrations
+
+source/leisaac/leisaac/state_machine/
+├── base.py             # StateMachineBase abstract class
+├── pick_orange.py      # PickOrangeStateMachine
+└── fold_cloth.py       # FoldClothStateMachine
+```
+
+---
+
+## Recording
+
+### Quick Start
+
+```bash
+# Single-arm pick-orange (3 oranges → plate)
+python scripts/environments/state_machine/pick_orange.py \
+    --task LeIsaac-SO101-PickOrange-v0 \
+    --num_envs 1 \
+    --record \
+    --dataset_file ./datasets/pick_orange.hdf5 \
+    --num_demos 10
+```
+
+### How It Works
+
+```
+Runner script
+  └── gym.make(task, cfg=env_cfg)           # create env
+       └── env_cfg.use_teleop_device(       # configure IK action manager
+               "so101_state_machine")
+  └── sm = PickOrangeStateMachine(...)
+  └── Main loop:
+       actions = sm.get_action(env)         # state machine computes 8D IK action
+       env.step(actions)                    # steps sim + recorder captures data
+       sm.advance()                         # advance state machine
+```
+
+The runner calls `env.step(actions)` directly with the state machine's output tensor.
+`preprocess_device_action()` is **not** called (that is only used in the teleoperation pipeline).
+
+### Action Format
+
+| Device | Dims | Layout |
+|---|---|---|
+| `so101_state_machine` | 8D | `[pos(3), quat(4), gripper(1)]` in robot base frame |
+| `bi_so101_state_machine` | 16D | `[left_pos(3), left_quat(4), left_grip(1), right_pos(3), right_quat(4), right_grip(1)]` |
+
+IK targets are expressed in the **robot base local frame**, not world frame:
+```python
+diff_w = target_pos_w - base_pos_w
+target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
+```
+
+---
+
+## Dataset Structure
+
+Episodes are stored in HDF5 format under the `data/` group:
+
+```
+data/
+├── demo_0      # EMPTY — artifact of the initial env.reset() at startup
+│   └── initial_state        # only field; num_samples=0; no actions
+├── demo_1      # First real demonstration
+│   ├── actions              # (T, 8) — IK pose targets passed to env.step()
+│   ├── processed_actions    # (T, 6) — joint targets computed by IK solver
+│   ├── initial_state        # scene state at episode start (for reset_to)
+│   ├── states/              # per-step articulation/sensor states
+│   └── obs/                 # per-step observations (images, joint_pos, etc.)
+├── demo_2      # Second real demonstration
+...
+```
+
+**Important:** `demo_0` is always empty. The **K-th recorded demonstration** is stored as `demo_K`.
+
+When replaying, use `--select_episodes K` to load `demo_K`:
+```bash
+--select_episodes 1   # → demo_1, the first real episode
+--select_episodes 2   # → demo_2, the second real episode
+```
+
+---
+
+## Replay
+
+### Quick Start
+
+```bash
+# replay.sh wrapper (default: demo_3 of dataset_test.hdf5)
+bash replay.sh
+
+# explicit call
+python scripts/environments/state_machine/replay.py \
+    --task LeIsaac-SO101-PickOrange-v0 \
+    --dataset_file ./datasets/pick_orange.hdf5 \
+    --task_type so101_state_machine \
+    --select_episodes 1 \
+    --device cuda \
+    --enable_cameras \
+    --replay_mode action
+```
+
+### Replay Modes
+
+| Mode | Data replayed | Use case |
+|---|---|---|
+| `action` | `HDF5["actions"]` (8D IK pose targets) | IK-based devices (`so101_state_machine`) |
+| `state` | `HDF5["states"]["articulation"]["robot"]["joint_position"]` | Joint-position devices (`so101leader`) |
+
+For `so101_state_machine`, **only `action` mode is valid**. The IK action manager expects an 8D pose target; passing raw 6D joint positions (state mode) would cause a dimension mismatch.
+
+---
+
+## Technical Details
+
+### 1. Gravity Disable (Two Steps Required)
+
+IsaacLab's `disable_gravity` flag in `ArticulationCfg.spawn.rigid_props` only writes to the articulation root prim. Individual link prims each carry their own `PhysicsRigidBodyAPI` with gravity still enabled.
+
+**Step 1** — Config level (in `use_teleop_device()`):
+```python
+self.scene.robot.spawn.rigid_props.disable_gravity = True
+```
+
+**Step 2** — USD stage traversal (in runner script, after `gym.make()`):
+```python
+_stage = omni.usd.get_context().get_stage()
+for _prim in _stage.Traverse():
+    if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
+        PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
+```
+
+Both steps must be present. The same pattern applies to bi-arm tasks (`"Robot"` matches both `Left_Robot` and `Right_Robot`).
+
+### 2. Joint Damping
+
+The IK controller requires higher damping than the robot's default (0.6 N·m·s/rad) for stable, smooth trajectories. Damping is set to **10.0 N·m·s/rad** every step via:
+
+```python
+# In PickOrangeStateMachine.get_action():
+robot = env.scene["robot"]
+robot.write_joint_damping_to_sim(damping=10.0)
+```
+
+This must also be applied during **replay** to match recording conditions. The state-machine replay script (`scripts/environments/state_machine/replay.py`) calls `apply_damping(env, task_type)` before every `env.step()`.
+
+### 3. FK Calibration for Rest Pose
+
+The success check (`task_done()`) requires joints to be within `SO101_FOLLOWER_REST_POSE_RANGE` (e.g., shoulder_lift ≈ −100°, not 0°). Since IK controls EE position and cannot guarantee a specific joint configuration, the runner:
+
+1. Teleports joints to the rest pose using `write_joint_state_to_sim()` before `task_done()`.
+2. Calls `env.scene.update()` (not `env.sim.step()`) to refresh the data cache without letting the physics overwrite the teleport via stale actuator targets.
+
+```python
+robot.write_joint_state_to_sim(
+    position=_rest_joint_pos,
+    velocity=torch.zeros_like(_rest_joint_pos),
+)
+env.scene.update(dt=env.physics_dt)
+success = task_done(env, ...)
+```
+
+### 4. Return-Home Strategy
+
+After placing an orange, the robot must return to rest pose for the success check.
+
+**IK alone is insufficient:** the IK solver can reach the same end-effector position via many different joint configurations (IK non-uniqueness). Commanding the rest-pose EE position via IK from a post-placement configuration typically lands in a different joint solution.
+
+**Current approach for orange 3 (the last orange):**
+
+During steps 620–919, joint positions are linearly interpolated from the post-placement configuration to the rest pose:
+
+```python
+if sm.orange_now == 3 and sm.step_count >= 620:
+    if sm.step_count == 620:
+        _home_start_pos = _robot.data.joint_pos.clone()
+    alpha = (sm.step_count - 620) / 299.0      # 0.0 → 1.0
+    blended = _home_start_pos + (_rest_joint_pos - _home_start_pos) * alpha
+    _robot.write_joint_state_to_sim(position=blended, velocity=zeros)
+# IK actions from sm.get_action() are still passed to env.step() for recording
+env.step(actions)
+```
+
+**Replay limitation:** the recorded `actions` are 8D IK pose targets, not the blended joint positions. During replay, the IK solver starts from a different joint state and may follow a different path to the same EE target. The `processed_actions` field in the HDF5 file contains the actual IK-computed joint targets, but the current replay infrastructure uses `actions`, not `processed_actions`.
+
+**Oranges 1 and 2:** return-home is skipped entirely using `sm.advance()` without `env.step()`. This avoids wasted simulation time when the next orange follows immediately.
+
+### 5. IK Action Coordinate Frame
+
+IK targets must be expressed in the **robot base local frame**, not the world frame:
+
+```python
+diff_w        = target_pos_w - robot_base_pos_w
+target_pos_lo = quat_apply(quat_inv(robot_base_quat_w), diff_w)
+```
+
+### 6. Episode Numbering
+
+The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on the very first `env.reset()` call (before any steps). This becomes `demo_0`.
+
+| `--select_episodes N` | Episode loaded | Content |
+|---|---|---|
+| 0 | `demo_0` | Empty (no actions) — causes `TypeError` |
+| 1 | `demo_1` | 1st real demonstration |
+| K | `demo_K` | K-th real demonstration |
+
+---
+
+## File Reference
+
+| File | Purpose |
+|---|---|
+| `scripts/environments/state_machine/pick_orange.py` | Recording runner for PickOrange |
+| `scripts/environments/state_machine/fold_cloth.py` | Recording runner for FoldCloth |
+| `scripts/environments/state_machine/replay.py` | State-machine replay (with damping) |
+| `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` abstract class |
+| `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
+| `source/leisaac/leisaac/state_machine/fold_cloth.py` | `FoldClothStateMachine` |
+| `replay.sh` | Shell wrapper for replay.py |
+| `run_task.sh` | Shell wrapper for pick_orange recording |
diff --git a/STATE_MACHINE_README_CN.md b/STATE_MACHINE_README_CN.md
new file mode 100644
index 00000000..81ca5676
--- /dev/null
+++ b/STATE_MACHINE_README_CN.md
@@ -0,0 +1,232 @@
+# 状态机模块：录制与回放说明
+
+## 概述
+
+状态机模块提供无需人工遥操作的自动化数据采集能力。它运行脚本化的策略，将演示数据录制为 HDF5 数据集，并支持回放已录制的演示。
+
+```
+scripts/environments/state_machine/
+├── pick_orange.py      # 录制 Runner 脚本（拾橙任务）
+├── fold_cloth.py       # 录制 Runner 脚本（叠衣任务）
+└── replay.py           # 状态机演示回放脚本
+
+source/leisaac/leisaac/state_machine/
+├── base.py             # StateMachineBase 抽象基类
+├── pick_orange.py      # PickOrangeStateMachine
+└── fold_cloth.py       # FoldClothStateMachine
+```
+
+---
+
+## 录制
+
+### 快速开始
+
+```bash
+# 单臂拾橙（3 个橘子 → 盘子）
+python scripts/environments/state_machine/pick_orange.py \
+    --task LeIsaac-SO101-PickOrange-v0 \
+    --num_envs 1 \
+    --record \
+    --dataset_file ./datasets/pick_orange.hdf5 \
+    --num_demos 10
+```
+
+### 工作原理
+
+```
+Runner 脚本
+  └── gym.make(task, cfg=env_cfg)           # 创建环境
+       └── env_cfg.use_teleop_device(       # 配置 IK action manager
+               "so101_state_machine")
+  └── sm = PickOrangeStateMachine(...)
+  └── 主循环:
+       actions = sm.get_action(env)         # 状态机计算 8D IK 动作
+       env.step(actions)                    # 推进仿真 + 录制器采集数据
+       sm.advance()                         # 推进状态机
+```
+
+Runner 脚本直接将状态机输出的动作张量传给 `env.step(actions)`，**不经过** `preprocess_device_action()`（后者仅在遥操作流程中使用）。
+
+### 动作格式
+
+| 设备 | 维度 | 格式 |
+|---|---|---|
+| `so101_state_machine` | 8D | `[pos(3), quat(4), gripper(1)]`，机械臂 base 局部坐标系 |
+| `bi_so101_state_machine` | 16D | `[左_pos(3), 左_quat(4), 左_grip(1), 右_pos(3), 右_quat(4), 右_grip(1)]` |
+
+IK 目标位置需表示在**机械臂 base 局部坐标系**下，而非世界坐标系：
+
+```python
+diff_w = target_pos_w - base_pos_w
+target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
+```
+
+---
+
+## 数据集结构
+
+演示数据以 HDF5 格式存储在 `data/` 组中：
+
+```
+data/
+├── demo_0      # 空 episode —— 初始 env.reset() 产生的副产物
+│   └── initial_state        # 仅有此字段；num_samples=0；无 actions
+├── demo_1      # 第一条真实演示
+│   ├── actions              # (T, 8) —— 传入 env.step() 的 IK pose 目标
+│   ├── processed_actions    # (T, 6) —— IK 求解器算出的关节目标位置
+│   ├── initial_state        # episode 开始时的场景状态（用于 reset_to）
+│   ├── states/              # 每步的关节/传感器状态
+│   └── obs/                 # 每步的观测（图像、关节位置等）
+├── demo_2      # 第二条真实演示
+...
+```
+
+**重要：** `demo_0` 永远是空的。**第 K 条录制的演示**存储为 `demo_K`。
+
+回放时用 `--select_episodes K` 加载 `demo_K`：
+
+```bash
+--select_episodes 1   # → demo_1，第一条真实演示
+--select_episodes 2   # → demo_2，第二条真实演示
+```
+
+---
+
+## 回放
+
+### 快速开始
+
+```bash
+# replay.sh 封装脚本（默认：dataset_test.hdf5 的 demo_3）
+bash replay.sh
+
+# 显式调用
+python scripts/environments/state_machine/replay.py \
+    --task LeIsaac-SO101-PickOrange-v0 \
+    --dataset_file ./datasets/pick_orange.hdf5 \
+    --task_type so101_state_machine \
+    --select_episodes 1 \
+    --device cuda \
+    --enable_cameras \
+    --replay_mode action
+```
+
+### 回放模式
+
+| 模式 | 回放的数据 | 适用场景 |
+|---|---|---|
+| `action` | `HDF5["actions"]`（8D IK pose 目标） | IK 控制设备（`so101_state_machine`） |
+| `state` | `HDF5["states"]["articulation"]["robot"]["joint_position"]` | 关节位置控制设备（`so101leader`） |
+
+对 `so101_state_machine` 而言，**只有 `action` 模式有效**。IK action manager 期望 8D pose 输入，而 `state` 模式传入的是 6D 关节位置，会导致维度不匹配。
+
+---
+
+## 技术细节
+
+### 1. 重力禁用（两步缺一不可）
+
+IsaacLab 的 `disable_gravity` 标志只写入关节根 prim，各子 link prim 各自带有 `PhysicsRigidBodyAPI`，重力默认仍然开启。
+
+**第一步** —— 配置层（在 `use_teleop_device()` 中）：
+```python
+self.scene.robot.spawn.rigid_props.disable_gravity = True
+```
+
+**第二步** —— USD Stage 遍历（在 runner 脚本 `gym.make()` 之后）：
+```python
+_stage = omni.usd.get_context().get_stage()
+for _prim in _stage.Traverse():
+    if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
+        PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
+```
+
+两步必须同时存在。双臂任务同理（`"Robot"` 可同时匹配 `Left_Robot` 和 `Right_Robot`）。
+
+### 2. 关节阻尼
+
+IK 控制器需要比机械臂默认阻尼（0.6 N·m·s/rad）更高的阻尼值，才能获得稳定、平滑的轨迹。阻尼设置为 **10.0 N·m·s/rad**，在每步调用：
+
+```python
+# 在 PickOrangeStateMachine.get_action() 中：
+robot = env.scene["robot"]
+robot.write_joint_damping_to_sim(damping=10.0)
+```
+
+**回放时也必须应用相同阻尼**，以匹配录制条件。状态机回放脚本（`scripts/environments/state_machine/replay.py`）在每次 `env.step()` 前调用 `apply_damping(env, task_type)`。
+
+### 3. 归位 FK 校准
+
+成功判断（`task_done()`）要求关节位于 `SO101_FOLLOWER_REST_POSE_RANGE` 内（例如 shoulder_lift ≈ −100°，并非零位）。由于 IK 控制 EE 位置，无法保证特定关节构型，因此 runner 脚本在调用 `task_done()` 前：
+
+1. 用 `write_joint_state_to_sim()` 将关节传送（teleport）到 rest pose。
+2. 只调用 `env.scene.update()`（不调用 `env.sim.step()`），仅刷新数据缓存，避免物理引擎用过期的 actuator 目标把 teleport 撤销。
+
+```python
+robot.write_joint_state_to_sim(
+    position=_rest_joint_pos,
+    velocity=torch.zeros_like(_rest_joint_pos),
+)
+env.scene.update(dt=env.physics_dt)
+success = task_done(env, ...)
+```
+
+### 4. 归位策略
+
+放置橘子后，机械臂需要返回 rest pose 才能通过成功判断。
+
+**IK 单独归位不可靠：** 对同一 EE 目标位置，IK 求解器可能找到多个关节解（IK 非唯一性）。从放置后的构型出发，命令 rest pose EE 位置，IK 通常会落入与 rest pose 不同的关节解。
+
+**第三个橘子（最后一个）的当前方案：**
+
+在步骤 620–919 间，关节位置从放置后构型线性插值到 rest pose：
+
+```python
+if sm.orange_now == 3 and sm.step_count >= 620:
+    if sm.step_count == 620:
+        _home_start_pos = _robot.data.joint_pos.clone()
+    alpha = (sm.step_count - 620) / 299.0      # 0.0 → 1.0
+    blended = _home_start_pos + (_rest_joint_pos - _home_start_pos) * alpha
+    _robot.write_joint_state_to_sim(position=blended, velocity=zeros)
+# sm.get_action() 的 IK 动作仍然传入 env.step()，用于录制
+env.step(actions)
+```
+
+**回放局限性：** 录制的 `actions` 是 8D IK pose 目标，而非插值后的关节位置。回放时 IK 求解器从不同的关节起点出发，可能走向不同的路径。HDF5 中的 `processed_actions` 字段存有 IK 实际求解的关节目标位置，但当前回放基础设施使用的是 `actions` 而非 `processed_actions`。
+
+**第一、二个橘子：** 直接用 `sm.advance()` 跳过归位阶段（不调用 `env.step()`），避免在下一个橘子紧随其后时浪费仿真时间。
+
+### 5. IK 动作坐标系
+
+IK 目标必须表示在**机械臂 base 局部坐标系**下，而非世界坐标系：
+
+```python
+diff_w        = target_pos_w - robot_base_pos_w
+target_pos_lo = quat_apply(quat_inv(robot_base_quat_w), diff_w)
+```
+
+### 6. Episode 编号规则
+
+IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任何步骤）会保存一个仅含初始状态的 episode（`num_samples=0`），即 `demo_0`。
+
+| `--select_episodes N` | 加载的 episode | 内容 |
+|---|---|---|
+| 0 | `demo_0` | 空（无 actions）—— 会导致 `TypeError` |
+| 1 | `demo_1` | 第 1 条真实演示 |
+| K | `demo_K` | 第 K 条真实演示 |
+
+---
+
+## 文件说明
+
+| 文件 | 用途 |
+|---|---|
+| `scripts/environments/state_machine/pick_orange.py` | 拾橙任务录制 Runner |
+| `scripts/environments/state_machine/fold_cloth.py` | 叠衣任务录制 Runner |
+| `scripts/environments/state_machine/replay.py` | 状态机专用回放脚本（含阻尼设置） |
+| `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` 抽象基类 |
+| `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
+| `source/leisaac/leisaac/state_machine/fold_cloth.py` | `FoldClothStateMachine` |
+| `replay.sh` | replay.py 的 Shell 封装脚本 |
+| `run_task.sh` | 拾橙录制的 Shell 封装脚本 |
diff --git a/replay.sh b/replay.sh
index e3a71bb2..c7c826f1 100755
--- a/replay.sh
+++ b/replay.sh
@@ -1,10 +1,7 @@
 #!/bin/bash
-# 默认参数（可覆盖）
 TASK_NAME=${1:-LeIsaac-SO101-PickOrange-v0}
 DATASET=${2:-./datasets/dataset_test.hdf5}
-EPISODE_ID=${3:-1}
-
-cd /media/zihan-gao/leisaac || exit
+EPISODE_ID=${3:-3}
 
 python scripts/environments/state_machine/replay.py \
     --dataset_file=${DATASET} \
diff --git a/run_task.sh b/run_task.sh
index af7e96f3..5b71eb0c 100755
--- a/run_task.sh
+++ b/run_task.sh
@@ -1,10 +1,7 @@
 #!/bin/bash
-
 TASK_NAME=${1:-LeIsaac-SO101-PickOrange-v0}
 DATASET=${2:-./datasets/dataset_test.hdf5}
 
-cd /media/zihan-gao/leisaac || exit
-
 python scripts/environments/state_machine/pick_orange.py \
     --dataset_file=${DATASET} \
     --task=${TASK_NAME} \

From f81ba704004a4e1a0153334b31daeb0754a22101 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 17:34:54 -0500
Subject: [PATCH 35/68] fix bugs

---
 replay.sh                                     |  2 +-
 .../environments/state_machine/pick_orange.py | 16 ++---
 .../leisaac/state_machine/pick_orange.py      | 58 ++++++++++++++-----
 3 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/replay.sh b/replay.sh
index c7c826f1..a3b536eb 100755
--- a/replay.sh
+++ b/replay.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 TASK_NAME=${1:-LeIsaac-SO101-PickOrange-v0}
 DATASET=${2:-./datasets/dataset_test.hdf5}
-EPISODE_ID=${3:-3}
+EPISODE_ID=${3:-1}
 
 python scripts/environments/state_machine/replay.py \
     --dataset_file=${DATASET} \
diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index a858f4c7..d0c40574 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -340,13 +340,13 @@ def main() -> None:
                     print("Start recording.")
                 start_record_state = True
             actions = sm.get_action(env)
-            # During the last orange's return home phase (orange 3, step 620-919): linearly
-            # interpolate joint positions from the configuration at step 620 to rest pose at
-            # step 919.
-            if sm.orange_now == 3 and sm.step_count >= 620:
-                if sm.step_count == 620:
+            # During the last orange's return home phase (orange 3, step 680-979): linearly
+            # interpolate joint positions from the configuration at step 680 to rest pose at
+            # step 979.
+            if sm.orange_now == 3 and sm.step_count >= 680:
+                if sm.step_count == 680:
                     _home_start_pos = _robot.data.joint_pos.clone()
-                _alpha = (sm.step_count - 620) / 299.0  # 0.0 at step 620, 1.0 at step 919
+                _alpha = (sm.step_count - 680) / 299.0  # 0.0 at step 680, 1.0 at step 979
                 _blended_pos = _home_start_pos + (_rest_joint_pos - _home_start_pos) * _alpha
                 _robot.write_joint_state_to_sim(
                     position=_blended_pos,
@@ -355,14 +355,14 @@ def main() -> None:
             _orange_before_step = sm.orange_now
             env.step(actions)
             sm.advance()
-            # For oranges 1 and 2: skip the return home phase (steps 620-919) without
+            # For oranges 1 and 2: skip the return home phase (steps 680-979) without
             # simulation — there is no point returning home when the next orange follows
             # immediately. Fast-forward advance() until the orange index increments.
             if (
                 not sm.is_episode_done
                 and sm.orange_now == _orange_before_step
                 and sm.orange_now < 3
-                and sm.step_count >= 620
+                and sm.step_count >= 680
             ):
                 while sm.orange_now == _orange_before_step and not sm.is_episode_done:
                     sm.advance()
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index d17b4e54..b2336041 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -70,7 +70,7 @@ class PickOrangeStateMachine(StateMachineBase):
         sm.reset()
     """
 
-    MAX_STEPS_PER_ORANGE: int = 920
+    MAX_STEPS_PER_ORANGE: int = 980
 
     def __init__(self, num_oranges: int = 3, rest_ee_pos_world: torch.Tensor | None = None) -> None:
         self._num_oranges = num_oranges
@@ -133,21 +133,23 @@ def get_action(self, env) -> torch.Tensor:
         # --- Phase dispatch ---
         if self._orange_now == 1 and step < _APPROACH_STEPS:
             target_pos_w, gripper_cmd = self._phase_approach_hover(orange_pos_w, num_envs, device)
-        elif step < 240:
-            target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
+        elif step < 180:
+            target_pos_w, gripper_cmd = self._phase_move_above_orange(orange_pos_w, num_envs, device)
         elif step < 300:
-            target_pos_w, gripper_cmd = self._phase_lower_to_orange(orange_pos_w, num_envs, device)
+            target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
         elif step < 360:
+            target_pos_w, gripper_cmd = self._phase_lower_to_orange(orange_pos_w, num_envs, device)
+        elif step < 420:
             target_pos_w, gripper_cmd = self._phase_grasp(orange_pos_w, num_envs, device)
-        elif step < 440:
+        elif step < 500:
             target_pos_w, gripper_cmd = self._phase_lift_orange(orange_pos_w, num_envs, device)
-        elif step < 490:
+        elif step < 550:
             target_pos_w, gripper_cmd = self._phase_move_above_plate(plate_pos_w, num_envs, device)
-        elif step < 540:
+        elif step < 600:
             target_pos_w, gripper_cmd = self._phase_lower_to_plate(plate_pos_w, num_envs, device)
-        elif step < 580:
+        elif step < 640:
             target_pos_w, gripper_cmd = self._phase_release(plate_pos_w, num_envs, device)
-        elif step < 620:
+        elif step < 680:
             target_pos_w, gripper_cmd = self._phase_lift_gripper(plate_pos_w, num_envs, device)
         else:
             target_pos_w, gripper_cmd = self._phase_return_home(num_envs, device)
@@ -185,6 +187,7 @@ def _phase_approach_hover(
         """
         hover_target = orange_pos_w.clone()
         hover_target[:, 0] -= 0.03
+        hover_target[:, 1] -= 0.005
         hover_target[:, 2] += 0.1 + _GRIPPER_OFFSET
 
         alpha = self._step_count / _APPROACH_STEPS  # 0.0 at step 0, 1.0 at step _APPROACH_STEPS
@@ -196,10 +199,35 @@ def _phase_approach_hover(
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
         return target_pos_w, gripper_cmd
 
+    def _phase_move_above_orange(
+        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Steps 120–179 (orange 1) / 0–179 (oranges 2-3): move to a high position directly above the orange.
+
+        This phase brings the gripper to a clear altitude above the orange before
+        the finer hover and lower phases.  It is the first phase executed for
+        oranges 2 and 3 (where the robot transits from the plate side), and runs
+        immediately after ``_phase_approach_hover`` for the first orange.
+
+        Args:
+            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
+            num_envs: Number of parallel environments.
+            device: Torch device string.
+
+        Returns:
+            ``(target_pos_w, gripper_cmd)`` – target world position and open gripper command.
+        """
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.005
+        target_pos_w[:, 2] += 0.15 + _GRIPPER_OFFSET
+        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+        return target_pos_w, gripper_cmd
+
     def _phase_hover_above_orange(
         self, orange_pos_w: torch.Tensor, num_envs: int, device: str
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 0–119: move the gripper to a hover position above the orange.
+        """Steps 180–299: hold the gripper at a hover position above the orange.
 
         Args:
             orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
@@ -211,6 +239,7 @@ def _phase_hover_above_orange(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.005
         target_pos_w[:, 2] += 0.1 + _GRIPPER_OFFSET
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
         return target_pos_w, gripper_cmd
@@ -218,7 +247,7 @@ def _phase_hover_above_orange(
     def _phase_lower_to_orange(
         self, orange_pos_w: torch.Tensor, num_envs: int, device: str
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 120–149: lower the gripper down to grasp height over the orange.
+        """Steps 300–359: lower the gripper down to grasp height over the orange.
 
         Args:
             orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
@@ -230,6 +259,7 @@ def _phase_lower_to_orange(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.005
         target_pos_w[:, 2] += _GRIPPER_OFFSET
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
         return target_pos_w, gripper_cmd
@@ -237,7 +267,7 @@ def _phase_lower_to_orange(
     def _phase_grasp(
         self, orange_pos_w: torch.Tensor, num_envs: int, device: str
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 150–179: close the gripper to grasp the orange.
+        """Steps 360–419: close the gripper to grasp the orange.
 
         Args:
             orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
@@ -249,6 +279,7 @@ def _phase_grasp(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.005
         target_pos_w[:, 2] += _GRIPPER_OFFSET
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
         return target_pos_w, gripper_cmd
@@ -256,7 +287,7 @@ def _phase_grasp(
     def _phase_lift_orange(
         self, orange_pos_w: torch.Tensor, num_envs: int, device: str
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 180–219: lift the grasped orange upward.
+        """Steps 420–499: lift the grasped orange upward.
 
         Args:
             orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
@@ -268,6 +299,7 @@ def _phase_lift_orange(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.005
         target_pos_w[:, 2] += 0.25
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
         return target_pos_w, gripper_cmd

From d1c3cef9f3f334107e0bb7e7b4a8171aa62c4e6d Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 18:18:19 -0500
Subject: [PATCH 36/68] fix bugs

---
 STATE_MACHINE_README_CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/STATE_MACHINE_README_CN.md b/STATE_MACHINE_README_CN.md
index 81ca5676..a619d787 100644
--- a/STATE_MACHINE_README_CN.md
+++ b/STATE_MACHINE_README_CN.md
@@ -29,7 +29,7 @@ python scripts/environments/state_machine/pick_orange.py \
     --num_envs 1 \
     --record \
     --dataset_file ./datasets/pick_orange.hdf5 \
-    --num_demos 10
+    --num_demos 1
 ```
 
 ### 工作原理

From e81f2dd9c8f518f64fc34d689bbc818ea626fa30 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 18:18:32 -0500
Subject: [PATCH 37/68] fix bugs

---
 STATE_MACHINE_README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/STATE_MACHINE_README.md b/STATE_MACHINE_README.md
index a3217d78..c16e2bef 100644
--- a/STATE_MACHINE_README.md
+++ b/STATE_MACHINE_README.md
@@ -29,7 +29,7 @@ python scripts/environments/state_machine/pick_orange.py \
     --num_envs 1 \
     --record \
     --dataset_file ./datasets/pick_orange.hdf5 \
-    --num_demos 10
+    --num_demos 1
 ```
 
 ### How It Works

From 246a48f56e01c5af689ae2b203d90a327d1e2de6 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 18:18:53 -0500
Subject: [PATCH 38/68] fix bugs

---
 CLAUDE.md | 211 ------------------------------------------------------
 1 file changed, 211 deletions(-)
 delete mode 100644 CLAUDE.md

diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index ddab40b2..00000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,211 +0,0 @@
-# LeIsaac Project — Claude Context
-
-## 项目结构概览
-
-```
-leisaac/
-├── scripts/environments/state_machine/   # 运行脚本
-│   ├── pick_orange.py                    # 单臂拾橙数据采集脚本
-│   └── fold_cloth.py                     # 双臂叠衣数据采集脚本（新增）
-└── source/leisaac/leisaac/
-    ├── state_machine/                    # 状态机模块（新增）
-    │   ├── __init__.py
-    │   ├── base.py                       # StateMachineBase 抽象基类
-    │   ├── pick_orange.py                # PickOrangeStateMachine
-    │   └── fold_cloth.py                 # FoldClothStateMachine（新增）
-    ├── devices/
-    │   └── action_process.py             # 设备动作配置（新增 bi_so101_state_machine）
-    ├── tasks/
-    │   ├── template/
-    │   │   ├── bi_arm_env_cfg.py         # 双臂 ManagerBased env 基础配置
-    │   │   ├── single_arm_env_cfg.py     # 单臂 ManagerBased env 基础配置
-    │   │   └── direct/
-    │   │       └── bi_arm_env.py         # 双臂 Direct env 基础配置
-    │   ├── pick_orange/                  # 单臂拾橙任务
-    │   │   └── mdp/terminations.py       # task_done() 检查 rest pose + orange in plate
-    │   └── fold_cloth/                   # 双臂叠衣任务
-    │       ├── mdp/terminations.py       # cloth_folded() 检查 rest pose + 布料关键点距离
-    │       ├── fold_cloth_bi_arm_env_cfg.py  # ManagerBased env 配置（含 cloths 场景元素）
-    │       └── direct/
-    │           └── fold_cloth_bi_arm_env.py  # Direct env（含 ClothObject 初始化）
-    └── utils/
-        ├── env_utils.py                  # dynamic_reset_gripper_effort_limit_sim 等工具
-        └── robot_utils.py                # is_so101_at_rest_pose, convert_leisaac_action_to_lerobot
-```
-
----
-
-## 状态机架构
-
-### 状态机与环境的关系
-
-```
-Runner script
-  └── gym.make(task_name, cfg=env_cfg)        # 创建环境
-       └── env_cfg.use_teleop_device(device)  # 配置 action manager
-  └── sm = StateMachine(...)
-  └── 主循环:
-       actions = sm.get_action(env)           # 状态机计算动作
-       env.step(actions)                      # 直接传入 tensor，不经过 preprocess_device_action
-       sm.advance()
-```
-
-**关键点**：runner script 直接调用 `env.step(actions)` 传入动作 tensor，**不经过**
-`preprocess_device_action()`（后者只在遥操作流程中使用）。
-
-### 设备类型与 action 格式
-
-| device | action 维度 | 格式 | 适用场景 |
-|---|---|---|---|
-| `so101_state_machine` | 8D | `[pos(3), quat(4), gripper(1)]` | 单臂 IK 控制 |
-| `bi_so101_state_machine` | 16D | `[left_pos(3), left_quat(4), left_grip(1), right_pos(3), right_quat(4), right_grip(1)]` | 双臂 IK 控制 |
-
-IK 目标位置为**机械臂 base frame 下的局部坐标**，需要从世界坐标转换：
-```python
-diff_w = target_pos_w - base_pos_w
-target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
-```
-
----
-
-## 关键技术细节
-
-### 1. 重力禁用（两步缺一不可）
-
-**第一步**：`use_teleop_device()` 中设置 rigid_props（只禁用 root prim）：
-```python
-# bi_arm_env_cfg.py / bi_arm_env.py（Direct）
-if teleop_device in ["bi_so101_state_machine"]:
-    self.scene.left_arm.spawn.rigid_props.disable_gravity = True
-    self.scene.right_arm.spawn.rigid_props.disable_gravity = True
-```
-
-**第二步**：runner script 中 USD stage 遍历（禁用所有子 link prim）：
-```python
-import omni.usd
-from pxr import PhysxSchema, UsdPhysics
-_stage = omni.usd.get_context().get_stage()
-for _prim in _stage.Traverse():
-    if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
-        PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
-```
-
-- 单臂场景 prim 路径含 `Robot`
-- 双臂场景 prim 路径含 `Left_Robot` 和 `Right_Robot`，均可被 `"Robot" in path` 匹配
-
-### 2. FK 校准（获取 rest pose 的 EE 世界坐标）
-
-`task_done()` / `cloth_folded()` 要求机械臂关节在 `SO101_FOLLOWER_REST_POSE_RANGE` 内：
-- shoulder_lift ≈ -100°，elbow_flex ≈ 90°，wrist_flex ≈ 50°（并非零位）
-
-IK 控制 EE 位置，无法直接保证关节角度，因此需要 FK 校准：先把关节
-teleport 到 rest pose，读取此时的 EE 世界坐标，再让状态机在 return home
-阶段命令 IK 趋向该位置（给足够步数收敛，约 300 步 / 5 秒）。
-
-```python
-# 正确 API（teleport 关节到指定位置）
-robot.write_joint_state_to_sim(
-    position=_rest_joint_pos,          # (num_envs, num_joints)
-    velocity=torch.zeros_like(...),
-)
-env.sim.step(render=False)
-env.scene.update(dt=env.physics_dt)
-rest_ee_pos = robot.data.body_pos_w[:, -1, :].clone()  # gripper EE 世界坐标
-```
-
-> ⚠️ `write_joint_position_target_to_sim` 不存在，正确方法是 `write_joint_state_to_sim`。
-
-### 3. ClothObject 在 ManagerBased env 中的初始化
-
-`FoldClothBiArmEnv`（Direct env）会自动初始化 ClothObject，但 ManagerBased env
-不会。runner script 中需手动添加：
-```python
-from leisaac.enhance.assets import ClothObject
-if not hasattr(env.scene, "particle_objects"):
-    env.scene.particle_objects = {}
-env.scene.particle_objects["cloths"] = ClothObject(
-    cfg=env.cfg.scene.cloths,
-    scene=env.scene,
-)
-env.scene.particle_objects["cloths"].initialize()
-```
-
-每次 episode 重置时，需在 `env.reset()` **之前**先 reset cloth（与 Direct env 一致）：
-```python
-env.scene.particle_objects["cloths"].reset()
-env.reset()
-```
-
-### 4. 布料关键点
-
-```python
-_CLOTH_KEYPOINTS = [159789, 120788, 115370, 159716, 121443, 112382]
-# 顺序: left_sleeve, left_shoulder, left_hem, right_sleeve, right_shoulder, right_hem
-
-cloth = env.scene.particle_objects["cloths"]
-kp = cloth.point_positions[:, _CLOTH_KEYPOINTS, :]  # (num_envs, 6, 3) 世界坐标
-```
-
-`cloth_folded()` 检查的距离条件（threshold=0.20m）：
-- left_sleeve → right_shoulder
-- right_sleeve → left_shoulder
-- left_hem → left_shoulder
-- right_hem → right_shoulder
-
----
-
-## 本次会话新增/修改的文件
-
-| 文件 | 类型 | 内容 |
-|---|---|---|
-| `state_machine/fold_cloth.py` | 新建 | `FoldClothStateMachine`，1260 步，13 个折叠阶段 |
-| `state_machine/__init__.py` | 修改 | 新增 `FoldClothStateMachine` 导出 |
-| `devices/action_process.py` | 修改 | 新增 `bi_so101_state_machine` 设备分支（16D IK） |
-| `tasks/template/bi_arm_env_cfg.py` | 修改 | `use_teleop_device()` 新增 `bi_so101_state_machine` 的重力禁用 |
-| `tasks/template/direct/bi_arm_env.py` | 修改 | 同上（Direct env 版本） |
-| `scripts/.../state_machine/fold_cloth.py` | 新建 | fold_cloth 数据采集 runner script |
-
-### 前一次会话（pick_orange）修改的文件
-
-| 文件 | 内容 |
-|---|---|
-| `state_machine/base.py` | `StateMachineBase` 抽象基类 |
-| `state_machine/pick_orange.py` | `PickOrangeStateMachine`，含 rest pose 回位阶段 |
-| `scripts/.../state_machine/pick_orange.py` | 重力禁用 + FK 校准 + rest_ee_pos_world 传参 |
-
----
-
-## 任务注册名
-
-| gym 注册 ID | 说明 |
-|---|---|
-| `LeIsaac-SO101-PickOrange-v0` | 单臂拾橙，ManagerBased |
-| `LeIsaac-SO101-FoldCloth-BiArm-v0` | 双臂叠衣，ManagerBased（推荐用于状态机，有 IK action manager） |
-| `LeIsaac-SO101-FoldCloth-BiArm-Direct-v0` | 双臂叠衣，Direct env（action space = 12D 关节位置） |
-
----
-
-## 运行方式
-
-```bash
-# pick_orange（单臂）
-python scripts/environments/state_machine/pick_orange.py \
-    --task LeIsaac-SO101-PickOrange-v0 --num_envs 1 --record \
-    --dataset_file ./datasets/pick_orange.hdf5 --num_demos 10
-
-# fold_cloth（双臂）
-python scripts/environments/state_machine/fold_cloth.py \
-    --task LeIsaac-SO101-FoldCloth-BiArm-v0 --num_envs 1 --record \
-    --dataset_file ./datasets/fold_cloth.hdf5 --num_demos 10
-```
-
----
-
-## 注意事项
-
-- `preprocess_device_action()` 在 runner script 流程中**不会被调用**，无需为
-  `bi_so101_state_machine` 添加对应分支（除非日后接入真实设备遥操作）
-- `env_utils.py:dynamic_reset_gripper_effort_limit_sim()` 对 fold_cloth 无效
-  （`dynamic_reset_gripper_effort_limit = False`），暂无需修改
-- fold_cloth 的 `decimation = 2`，实际物理步频为 `sim_dt / 2`，注意时序计算
-- 状态机的步数常量（`MAX_STEPS`、各阶段边界）均基于 60 Hz 控制频率设计

From 733de371e44efc1839e851f74b0a6c56a526e934 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 18:26:53 -0500
Subject: [PATCH 39/68] fix bugs

---
 STATE_MACHINE_README.md    | 2 ++
 STATE_MACHINE_README_CN.md | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/STATE_MACHINE_README.md b/STATE_MACHINE_README.md
index c16e2bef..944f8cf2 100644
--- a/STATE_MACHINE_README.md
+++ b/STATE_MACHINE_README.md
@@ -27,6 +27,8 @@ source/leisaac/leisaac/state_machine/
 python scripts/environments/state_machine/pick_orange.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --num_envs 1 \
+    --device cuda \
+    --enable_cameras \
     --record \
     --dataset_file ./datasets/pick_orange.hdf5 \
     --num_demos 1
diff --git a/STATE_MACHINE_README_CN.md b/STATE_MACHINE_README_CN.md
index a619d787..1ee939a5 100644
--- a/STATE_MACHINE_README_CN.md
+++ b/STATE_MACHINE_README_CN.md
@@ -27,6 +27,8 @@ source/leisaac/leisaac/state_machine/
 python scripts/environments/state_machine/pick_orange.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --num_envs 1 \
+    --device cuda \
+    --enable_cameras \
     --record \
     --dataset_file ./datasets/pick_orange.hdf5 \
     --num_demos 1

From 1bde9b0866342db2f6658e6ad4466c8a559d565e Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 18:50:29 -0500
Subject: [PATCH 40/68] Change documents

---
 STATE_MACHINE_README.md                       | 54 ++-----------------
 STATE_MACHINE_README_CN.md                    | 54 ++-----------------
 .../leisaac/state_machine/pick_orange.py      | 12 ++---
 3 files changed, 12 insertions(+), 108 deletions(-)

diff --git a/STATE_MACHINE_README.md b/STATE_MACHINE_README.md
index 944f8cf2..520a75dd 100644
--- a/STATE_MACHINE_README.md
+++ b/STATE_MACHINE_README.md
@@ -34,6 +34,8 @@ python scripts/environments/state_machine/pick_orange.py \
     --num_demos 1
 ```
 
+> **Note — Grasp Success Rate:** In the default scene configuration the oranges are placed relatively far from the robot's end-effector, which results in a low grasp success rate. Adjusting the orange spawn positions in the task's environment config file (e.g. moving them closer to the robot base) significantly improves the success rate.
+
 ### How It Works
 
 ```
@@ -157,57 +159,7 @@ robot.write_joint_damping_to_sim(damping=10.0)
 
 This must also be applied during **replay** to match recording conditions. The state-machine replay script (`scripts/environments/state_machine/replay.py`) calls `apply_damping(env, task_type)` before every `env.step()`.
 
-### 3. FK Calibration for Rest Pose
-
-The success check (`task_done()`) requires joints to be within `SO101_FOLLOWER_REST_POSE_RANGE` (e.g., shoulder_lift ≈ −100°, not 0°). Since IK controls EE position and cannot guarantee a specific joint configuration, the runner:
-
-1. Teleports joints to the rest pose using `write_joint_state_to_sim()` before `task_done()`.
-2. Calls `env.scene.update()` (not `env.sim.step()`) to refresh the data cache without letting the physics overwrite the teleport via stale actuator targets.
-
-```python
-robot.write_joint_state_to_sim(
-    position=_rest_joint_pos,
-    velocity=torch.zeros_like(_rest_joint_pos),
-)
-env.scene.update(dt=env.physics_dt)
-success = task_done(env, ...)
-```
-
-### 4. Return-Home Strategy
-
-After placing an orange, the robot must return to rest pose for the success check.
-
-**IK alone is insufficient:** the IK solver can reach the same end-effector position via many different joint configurations (IK non-uniqueness). Commanding the rest-pose EE position via IK from a post-placement configuration typically lands in a different joint solution.
-
-**Current approach for orange 3 (the last orange):**
-
-During steps 620–919, joint positions are linearly interpolated from the post-placement configuration to the rest pose:
-
-```python
-if sm.orange_now == 3 and sm.step_count >= 620:
-    if sm.step_count == 620:
-        _home_start_pos = _robot.data.joint_pos.clone()
-    alpha = (sm.step_count - 620) / 299.0      # 0.0 → 1.0
-    blended = _home_start_pos + (_rest_joint_pos - _home_start_pos) * alpha
-    _robot.write_joint_state_to_sim(position=blended, velocity=zeros)
-# IK actions from sm.get_action() are still passed to env.step() for recording
-env.step(actions)
-```
-
-**Replay limitation:** the recorded `actions` are 8D IK pose targets, not the blended joint positions. During replay, the IK solver starts from a different joint state and may follow a different path to the same EE target. The `processed_actions` field in the HDF5 file contains the actual IK-computed joint targets, but the current replay infrastructure uses `actions`, not `processed_actions`.
-
-**Oranges 1 and 2:** return-home is skipped entirely using `sm.advance()` without `env.step()`. This avoids wasted simulation time when the next orange follows immediately.
-
-### 5. IK Action Coordinate Frame
-
-IK targets must be expressed in the **robot base local frame**, not the world frame:
-
-```python
-diff_w        = target_pos_w - robot_base_pos_w
-target_pos_lo = quat_apply(quat_inv(robot_base_quat_w), diff_w)
-```
-
-### 6. Episode Numbering
+### 3. Episode Numbering
 
 The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on the very first `env.reset()` call (before any steps). This becomes `demo_0`.
 
diff --git a/STATE_MACHINE_README_CN.md b/STATE_MACHINE_README_CN.md
index 1ee939a5..eeaf32c1 100644
--- a/STATE_MACHINE_README_CN.md
+++ b/STATE_MACHINE_README_CN.md
@@ -34,6 +34,8 @@ python scripts/environments/state_machine/pick_orange.py \
     --num_demos 1
 ```
 
+> **注意 — 夹取成功率：** 默认场景配置中橙子离机械臂末端较远，导致夹取成功率较低。在任务的环境 cfg 文件中调整橙子的生成位置（例如将其移近机械臂底座），可以显著提升夹取成功率。
+
 ### 工作原理
 
 ```
@@ -158,57 +160,7 @@ robot.write_joint_damping_to_sim(damping=10.0)
 
 **回放时也必须应用相同阻尼**，以匹配录制条件。状态机回放脚本（`scripts/environments/state_machine/replay.py`）在每次 `env.step()` 前调用 `apply_damping(env, task_type)`。
 
-### 3. 归位 FK 校准
-
-成功判断（`task_done()`）要求关节位于 `SO101_FOLLOWER_REST_POSE_RANGE` 内（例如 shoulder_lift ≈ −100°，并非零位）。由于 IK 控制 EE 位置，无法保证特定关节构型，因此 runner 脚本在调用 `task_done()` 前：
-
-1. 用 `write_joint_state_to_sim()` 将关节传送（teleport）到 rest pose。
-2. 只调用 `env.scene.update()`（不调用 `env.sim.step()`），仅刷新数据缓存，避免物理引擎用过期的 actuator 目标把 teleport 撤销。
-
-```python
-robot.write_joint_state_to_sim(
-    position=_rest_joint_pos,
-    velocity=torch.zeros_like(_rest_joint_pos),
-)
-env.scene.update(dt=env.physics_dt)
-success = task_done(env, ...)
-```
-
-### 4. 归位策略
-
-放置橘子后，机械臂需要返回 rest pose 才能通过成功判断。
-
-**IK 单独归位不可靠：** 对同一 EE 目标位置，IK 求解器可能找到多个关节解（IK 非唯一性）。从放置后的构型出发，命令 rest pose EE 位置，IK 通常会落入与 rest pose 不同的关节解。
-
-**第三个橘子（最后一个）的当前方案：**
-
-在步骤 620–919 间，关节位置从放置后构型线性插值到 rest pose：
-
-```python
-if sm.orange_now == 3 and sm.step_count >= 620:
-    if sm.step_count == 620:
-        _home_start_pos = _robot.data.joint_pos.clone()
-    alpha = (sm.step_count - 620) / 299.0      # 0.0 → 1.0
-    blended = _home_start_pos + (_rest_joint_pos - _home_start_pos) * alpha
-    _robot.write_joint_state_to_sim(position=blended, velocity=zeros)
-# sm.get_action() 的 IK 动作仍然传入 env.step()，用于录制
-env.step(actions)
-```
-
-**回放局限性：** 录制的 `actions` 是 8D IK pose 目标，而非插值后的关节位置。回放时 IK 求解器从不同的关节起点出发，可能走向不同的路径。HDF5 中的 `processed_actions` 字段存有 IK 实际求解的关节目标位置，但当前回放基础设施使用的是 `actions` 而非 `processed_actions`。
-
-**第一、二个橘子：** 直接用 `sm.advance()` 跳过归位阶段（不调用 `env.step()`），避免在下一个橘子紧随其后时浪费仿真时间。
-
-### 5. IK 动作坐标系
-
-IK 目标必须表示在**机械臂 base 局部坐标系**下，而非世界坐标系：
-
-```python
-diff_w        = target_pos_w - robot_base_pos_w
-target_pos_lo = quat_apply(quat_inv(robot_base_quat_w), diff_w)
-```
-
-### 6. Episode 编号规则
+### 3. Episode 编号规则
 
 IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任何步骤）会保存一个仅含初始状态的 episode（`num_samples=0`），即 `demo_0`。
 
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
index b2336041..f5aea210 100644
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/state_machine/pick_orange.py
@@ -187,7 +187,7 @@ def _phase_approach_hover(
         """
         hover_target = orange_pos_w.clone()
         hover_target[:, 0] -= 0.03
-        hover_target[:, 1] -= 0.005
+        hover_target[:, 1] -= 0.01
         hover_target[:, 2] += 0.1 + _GRIPPER_OFFSET
 
         alpha = self._step_count / _APPROACH_STEPS  # 0.0 at step 0, 1.0 at step _APPROACH_STEPS
@@ -219,7 +219,7 @@ def _phase_move_above_orange(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.005
+        target_pos_w[:, 1] -= 0.01
         target_pos_w[:, 2] += 0.15 + _GRIPPER_OFFSET
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
         return target_pos_w, gripper_cmd
@@ -239,7 +239,7 @@ def _phase_hover_above_orange(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.005
+        target_pos_w[:, 1] -= 0.01
         target_pos_w[:, 2] += 0.1 + _GRIPPER_OFFSET
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
         return target_pos_w, gripper_cmd
@@ -259,7 +259,7 @@ def _phase_lower_to_orange(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.005
+        target_pos_w[:, 1] -= 0.01
         target_pos_w[:, 2] += _GRIPPER_OFFSET
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
         return target_pos_w, gripper_cmd
@@ -279,7 +279,7 @@ def _phase_grasp(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.005
+        target_pos_w[:, 1] -= 0.01
         target_pos_w[:, 2] += _GRIPPER_OFFSET
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
         return target_pos_w, gripper_cmd
@@ -299,7 +299,7 @@ def _phase_lift_orange(
         """
         target_pos_w = orange_pos_w.clone()
         target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.005
+        target_pos_w[:, 1] -= 0.01
         target_pos_w[:, 2] += 0.25
         gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
         return target_pos_w, gripper_cmd

From 1342c18b6b5c0444042aaaff485723ef55ab75bf Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 21 Feb 2026 21:46:01 -0500
Subject: [PATCH 41/68] Change bi_arm_cfg

---
 run_fold_cloth.sh                                | 12 ++++++++++++
 source/leisaac/leisaac/devices/action_process.py |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100755 run_fold_cloth.sh

diff --git a/run_fold_cloth.sh b/run_fold_cloth.sh
new file mode 100755
index 00000000..14f28c60
--- /dev/null
+++ b/run_fold_cloth.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+TASK_NAME=${1:-LeIsaac-SO101-FoldCloth-BiArm-v0}
+DATASET=${2:-./datasets/dataset_cloth.hdf5}
+
+python scripts/environments/state_machine/fold_cloth.py \
+    --dataset_file=${DATASET} \
+    --task=${TASK_NAME} \
+    --num_envs=1 \
+    --device=cuda \
+    --enable_cameras \
+    --record \
+    --num_demos=1
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 7b6ea755..1051c986 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -116,7 +116,7 @@ def init_action_cfg(action_cfg, device):
             asset_name=asset,
             joint_names=["gripper"],
             open_command_expr={"gripper": 1.0},
-            close_command_expr={"gripper": 0.4},
+            close_command_expr={"gripper": 0.01},
         )
         action_cfg.left_arm_action = _ik_cfg("left_arm")
         action_cfg.left_gripper_action = _grip_cfg("left_arm")

From c7d8cca2e41d24ec290dd4e715e50b87a1afcc95 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Mon, 2 Mar 2026 12:45:50 -0500
Subject: [PATCH 42/68] Add RL module - 1st version.

---
 .../environments/state_machine/fold_cloth.py  | 347 --------------
 .../environments/state_machine/pick_orange.py |  11 -
 scripts/rl/train_pick_orange.py               | 107 +++++
 .../leisaac/leisaac/devices/action_process.py |  55 +--
 source/leisaac/leisaac/rl/__init__.py         |   5 +
 source/leisaac/leisaac/rl/rsl_rl_wrapper.py   | 108 +++++
 .../leisaac/state_machine/fold_cloth.py       | 429 ------------------
 .../leisaac/tasks/pick_orange/__init__.py     |   9 +
 .../leisaac/tasks/pick_orange/mdp/__init__.py |   1 +
 .../tasks/pick_orange/mdp/observations.py     |  27 ++
 .../leisaac/tasks/pick_orange/mdp/rewards.py  |  75 +++
 .../pick_orange/pick_orange_rl_env_cfg.py     | 173 +++++++
 12 files changed, 528 insertions(+), 819 deletions(-)
 delete mode 100644 scripts/environments/state_machine/fold_cloth.py
 create mode 100644 scripts/rl/train_pick_orange.py
 create mode 100644 source/leisaac/leisaac/rl/__init__.py
 create mode 100644 source/leisaac/leisaac/rl/rsl_rl_wrapper.py
 delete mode 100644 source/leisaac/leisaac/state_machine/fold_cloth.py
 create mode 100644 source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py
 create mode 100644 source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py

diff --git a/scripts/environments/state_machine/fold_cloth.py b/scripts/environments/state_machine/fold_cloth.py
deleted file mode 100644
index af3c71de..00000000
--- a/scripts/environments/state_machine/fold_cloth.py
+++ /dev/null
@@ -1,347 +0,0 @@
-"""Script to generate data using state machine with leisaac fold-cloth bi-arm environment.
-
-Launch Isaac Sim Simulator first.
-
-Default task: LeIsaac-SO101-FoldCloth-BiArm-v0 (manager-based, bi-arm IK control)
-"""
-
-import argparse
-import multiprocessing
-import os
-import time
-
-if multiprocessing.get_start_method() != 'spawn':
-    multiprocessing.set_start_method('spawn', force=True)
-
-from isaaclab.app import AppLauncher
-
-# add argparse arguments
-parser = argparse.ArgumentParser(description='leisaac data generation script for fold_cloth task.')
-parser.add_argument('--num_envs', type=int, default=1)
-parser.add_argument('--task', type=str, default=None)
-parser.add_argument('--seed', type=int, default=None)
-parser.add_argument('--record', action='store_true')
-parser.add_argument('--step_hz', type=int, default=60)
-parser.add_argument('--dataset_file', type=str, default='./datasets/dataset.hdf5')
-parser.add_argument('--resume', action='store_true')
-parser.add_argument('--num_demos', type=int, default=1)
-parser.add_argument('--quality', action='store_true')
-parser.add_argument('--use_lerobot_recorder', action='store_true', help='whether to use lerobot recorder.')
-parser.add_argument('--lerobot_dataset_repo_id', type=str, default=None, help='Lerobot Dataset repository ID.')
-parser.add_argument('--lerobot_dataset_fps', type=int, default=30, help='Lerobot Dataset frames per second.')
-
-AppLauncher.add_app_launcher_args(parser)
-args_cli = parser.parse_args()
-app_launcher_args = vars(args_cli)
-
-app_launcher = AppLauncher(app_launcher_args)
-simulation_app = app_launcher.app
-
-import gymnasium as gym
-import torch
-
-from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
-from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
-from isaaclab_tasks.utils import parse_env_cfg
-
-from leisaac.enhance.assets import ClothObject
-from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
-from leisaac.state_machine import FoldClothStateMachine
-from leisaac.tasks.fold_cloth.mdp import cloth_folded
-from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
-
-
-class RateLimiter:
-    """Convenience class for enforcing rates in loops."""
-
-    def __init__(self, hz):
-        self.hz = hz
-        self.last_time = time.time()
-        self.sleep_duration = 1.0 / hz
-        self.render_period = min(0.0166, self.sleep_duration)
-
-    def sleep(self, env):
-        """Attempt to sleep at the specified rate in hz."""
-        next_wakeup_time = self.last_time + self.sleep_duration
-        while time.time() < next_wakeup_time:
-            time.sleep(self.render_period)
-            env.sim.render()
-
-        self.last_time = self.last_time + self.sleep_duration
-
-        # detect time jumping forwards (e.g. loop is too slow)
-        if self.last_time < time.time():
-            while self.last_time < time.time():
-                self.last_time += self.sleep_duration
-
-
-def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
-    """Programmatically mark the current episode as success or failure."""
-    if hasattr(env, "termination_manager"):
-        if success:
-            env.termination_manager.set_term_cfg(
-                "success",
-                TerminationTermCfg(func=lambda env: torch.ones(env.num_envs, dtype=torch.bool, device=env.device)),
-            )
-        else:
-            env.termination_manager.set_term_cfg(
-                "success",
-                TerminationTermCfg(func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)),
-            )
-        env.termination_manager.compute()
-    elif hasattr(env, "_get_dones"):
-        env.cfg.return_success_status = success
-    return False
-
-
-def main() -> None:
-    """Run a fold-cloth state machine in an Isaac Lab bi-arm manipulation environment.
-
-    Uses ``LeIsaac-SO101-FoldCloth-BiArm-v0`` (manager-based) with the
-    ``bi_so101_state_machine`` device so that both arms are controlled via
-    absolute-pose IK.  A ``ClothObject`` is initialised manually on the scene
-    so that the state machine can read cloth particle keypoints at runtime.
-    """
-    output_dir = os.path.dirname(args_cli.dataset_file)
-    output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    task_name = args_cli.task if args_cli.task is not None else "LeIsaac-SO101-FoldCloth-BiArm-v0"
-    env_cfg = parse_env_cfg(task_name, device=args_cli.device, num_envs=args_cli.num_envs)
-
-    # Bi-arm IK state-machine device
-    device = "bi_so101_state_machine"
-    env_cfg.use_teleop_device(device)
-    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
-
-    # Timeout and termination preprocessing
-    is_direct_env = "Direct" in task_name
-    if is_direct_env:
-        env_cfg.never_time_out = True
-        env_cfg.auto_terminate = True
-    else:
-        if hasattr(env_cfg.terminations, "time_out"):
-            env_cfg.terminations.time_out = None
-        if hasattr(env_cfg.terminations, "success"):
-            env_cfg.terminations.success = None
-
-    # Recorder preprocessing & manual success-termination preprocessing
-    if args_cli.record:
-        if args_cli.use_lerobot_recorder:
-            if args_cli.resume:
-                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_SUCCEEDED_ONLY_RESUME
-            else:
-                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_SUCCEEDED_ONLY
-        else:
-            if args_cli.resume:
-                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
-                assert os.path.exists(
-                    args_cli.dataset_file
-                ), "The dataset file does not exist. Do not use '--resume' when recording a new dataset."
-            else:
-                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
-                assert not os.path.exists(
-                    args_cli.dataset_file
-                ), "The dataset file already exists. Use '--resume' to resume recording."
-        env_cfg.recorders.dataset_export_dir_path = output_dir
-        env_cfg.recorders.dataset_filename = output_file_name
-        if is_direct_env:
-            env_cfg.return_success_status = False
-        else:
-            if not hasattr(env_cfg.terminations, "success"):
-                setattr(env_cfg.terminations, "success", None)
-            env_cfg.terminations.success = TerminationTermCfg(
-                func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)
-            )
-    else:
-        env_cfg.recorders = None
-
-    # Create environment
-    env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
-
-    # Disable gravity on all robot link prims.
-    # The scene uses Left_Robot and Right_Robot prim paths; "Robot" matches both.
-    import omni.usd
-    from pxr import PhysxSchema, UsdPhysics
-
-    _stage = omni.usd.get_context().get_stage()
-    for _prim in _stage.Traverse():
-        if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
-            PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
-
-    # Manually register a ClothObject on the scene so that the state machine can
-    # read cloth particle keypoints via env.scene.particle_objects["cloths"].
-    # (The manager-based env does not initialise particle objects automatically;
-    # the cloth USD prim is already present in the scene.)
-    if not hasattr(env.scene, "particle_objects"):
-        env.scene.particle_objects = {}
-    env.scene.particle_objects["cloths"] = ClothObject(
-        cfg=env.cfg.scene.cloths,
-        scene=env.scene,
-    )
-    env.scene.particle_objects["cloths"].initialize()
-
-    if args_cli.record:
-        del env.recorder_manager
-        if args_cli.use_lerobot_recorder:
-            from leisaac.enhance.datasets.lerobot_dataset_handler import (
-                LeRobotDatasetCfg,
-            )
-            from leisaac.enhance.managers.lerobot_recorder_manager import (
-                LeRobotRecorderManager,
-            )
-
-            dataset_cfg = LeRobotDatasetCfg(
-                repo_id=args_cli.lerobot_dataset_repo_id,
-                fps=args_cli.lerobot_dataset_fps,
-            )
-            env.recorder_manager = LeRobotRecorderManager(env_cfg.recorders, dataset_cfg, env)
-        else:
-            env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
-            env.recorder_manager.flush_steps = 100
-            env.recorder_manager.compression = "lzf"
-
-    # Rate limiter
-    rate_limiter = RateLimiter(args_cli.step_hz)
-
-    # Initialize / reset
-    if hasattr(env, "initialize"):
-        env.initialize()
-    env.reset()
-
-    resume_recorded_demo_count = 0
-    if args_cli.record and args_cli.resume:
-        resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
-        print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
-    current_recorded_demo_count = resume_recorded_demo_count
-
-    # --- FK calibration: drive both arms to rest pose and record EE world positions ---
-    # cloth_folded() requires both arms within SO101_FOLLOWER_REST_POSE_RANGE.
-    # We find the EE world position for each arm at rest pose so the state machine
-    # can drive them there via IK during the return-home phase.
-    _left_arm = env.scene["left_arm"]
-    _right_arm = env.scene["right_arm"]
-    _joint_names = list(_left_arm.data.joint_names)
-    _REST_POSE_DEG = {
-        "shoulder_pan": 0.0,
-        "shoulder_lift": -100.0,
-        "elbow_flex": 90.0,
-        "wrist_flex": 50.0,
-        "wrist_roll": 0.0,
-        "gripper": -10.0,
-    }
-    _rest_joint_pos = torch.zeros(env.num_envs, len(_joint_names), device=env.device)
-    for _idx, _name in enumerate(_joint_names):
-        if _name in _REST_POSE_DEG:
-            _rest_joint_pos[:, _idx] = _REST_POSE_DEG[_name] * torch.pi / 180.0
-
-    # Teleport both arms simultaneously to rest pose, then read EE positions via FK.
-    _left_arm.write_joint_state_to_sim(
-        position=_rest_joint_pos,
-        velocity=torch.zeros_like(_rest_joint_pos),
-    )
-    _right_arm.write_joint_state_to_sim(
-        position=_rest_joint_pos,
-        velocity=torch.zeros_like(_rest_joint_pos),
-    )
-    env.sim.step(render=False)
-    env.scene.update(dt=env.physics_dt)
-    _rest_ee_left = _left_arm.data.body_pos_w[:, -1, :].clone()
-    _rest_ee_right = _right_arm.data.body_pos_w[:, -1, :].clone()
-    print(f"[Calibration] Left arm rest pose EE (world):  {_rest_ee_left[0].cpu().numpy()}")
-    print(f"[Calibration] Right arm rest pose EE (world): {_rest_ee_right[0].cpu().numpy()}")
-
-    # Reset scene to initial state (joints back to default, cloth to initial pose)
-    env.reset()
-    # -------------------------------------------------------------------------
-
-    sm = FoldClothStateMachine(rest_ee_left=_rest_ee_left, rest_ee_right=_rest_ee_right)
-    sm.reset()
-    start_record_state = False
-
-    while simulation_app.is_running() and not simulation_app.is_exiting():
-        if env.cfg.dynamic_reset_gripper_effort_limit:
-            dynamic_reset_gripper_effort_limit_sim(env, device)
-
-        if sm.is_episode_done:
-            # --- Check whether the current episode is considered successful ---
-            try:
-                print("Completed one cycle. Checking task success status...")
-                success_tensor = cloth_folded(
-                    env,
-                    cloth_cfg=SceneEntityCfg("cloths"),
-                    cloth_keypoints_index=[159789, 120788, 115370, 159716, 121443, 112382],
-                    distance_threshold=0.20,
-                )
-                success = bool(success_tensor.all().item())
-                print("Task success status:", success)
-            except Exception as e:
-                print("Task failed due to:", e)
-                success = False
-
-            if start_record_state:
-                if args_cli.record:
-                    print("Stop recording.")
-                start_record_state = False
-
-            # Only mark the episode as successful if the task succeeds
-            if args_cli.record and success:
-                print("✅ Task succeeded. Marking this demonstration as SUCCESS.")
-                auto_terminate(env, True)
-                print("SUCCESS.")
-                current_recorded_demo_count += 1
-            else:
-                print("❌ Task failed. Marking this demonstration as FAILURE.")
-                auto_terminate(env, False)
-
-            if (
-                args_cli.record
-                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                > current_recorded_demo_count
-            ):
-                current_recorded_demo_count = (
-                    env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                )
-                print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
-
-            if (
-                args_cli.record
-                and args_cli.num_demos > 0
-                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                >= args_cli.num_demos
-            ):
-                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
-                break
-
-            # Reset cloth particle state before resetting the scene so that the
-            # cloth returns to its initial USD pose for the next episode.
-            env.scene.particle_objects["cloths"].reset()
-            env.reset()
-            sm.reset()
-
-            if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
-                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
-                break
-        else:
-            if not start_record_state:
-                if args_cli.record:
-                    print("Start recording.")
-                start_record_state = True
-            actions = sm.get_action(env)
-            env.step(actions)
-            sm.advance()
-
-        if rate_limiter:
-            rate_limiter.sleep(env)
-
-    if args_cli.record and hasattr(env.recorder_manager, "finalize"):
-        env.recorder_manager.finalize()
-
-    env.close()
-    simulation_app.close()
-
-
-if __name__ == "__main__":
-    # run the main function
-    main()
diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
index d0c40574..2c442919 100644
--- a/scripts/environments/state_machine/pick_orange.py
+++ b/scripts/environments/state_machine/pick_orange.py
@@ -263,17 +263,10 @@ def main() -> None:
         if sm.is_episode_done:
             # --- Check whether the current episode is considered successful ---
             try:
-                # IK cannot reliably converge to the correct joint configuration: the same
-                # EE position can be reached by many different joint solutions (IK is not
-                # unique). Directly teleport joints to rest pose before the success check,
-                # reusing _rest_joint_pos computed during FK calibration at startup.
                 _robot.write_joint_state_to_sim(
                     position=_rest_joint_pos,
                     velocity=torch.zeros_like(_rest_joint_pos),
                 )
-                # Do NOT call env.sim.step() here: that would run physics with stale
-                # actuator targets from the last env.step(), undoing the teleport.
-                # Just refresh the scene data cache so task_done() reads the new positions.
                 env.scene.update(dt=env.physics_dt)
 
                 print("Completed one cycle. Checking task success status...")
@@ -340,9 +333,6 @@ def main() -> None:
                     print("Start recording.")
                 start_record_state = True
             actions = sm.get_action(env)
-            # During the last orange's return home phase (orange 3, step 680-979): linearly
-            # interpolate joint positions from the configuration at step 680 to rest pose at
-            # step 979.
             if sm.orange_now == 3 and sm.step_count >= 680:
                 if sm.step_count == 680:
                     _home_start_pos = _robot.data.joint_pos.clone()
@@ -378,5 +368,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    # run the main function
     main()
diff --git a/scripts/rl/train_pick_orange.py b/scripts/rl/train_pick_orange.py
new file mode 100644
index 00000000..2a158184
--- /dev/null
+++ b/scripts/rl/train_pick_orange.py
@@ -0,0 +1,107 @@
+"""PPO training script for LeIsaac-SO101-PickOrange-RL-v0 using rsl_rl.
+
+Usage:
+    python scripts/rl/train_pick_orange.py --num_envs 64 --num_iters 1000 \\
+        --headless --log_dir ./logs/pick_orange_rl
+
+Launch Isaac Sim before running (or pass --headless).
+"""
+
+import argparse
+import os
+import time
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="Train pick-orange with rsl_rl PPO.")
+parser.add_argument("--num_envs", type=int, default=64)
+parser.add_argument("--num_iters", type=int, default=1000)
+parser.add_argument("--log_dir", type=str, default="./logs/pick_orange_rl")
+parser.add_argument("--seed", type=int, default=None)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(vars(args_cli))
+simulation_app = app_launcher.app
+
+import gymnasium as gym
+import torch
+
+from isaaclab_tasks.utils import parse_env_cfg
+from rsl_rl.runners import OnPolicyRunner
+
+import leisaac.tasks  # noqa: F401 — registers all leisaac gym envs
+from leisaac.rl import IsaaclabRslRlVecEnvWrapper
+
+# ------------------------------------------------------------------
+# PPO training configuration
+# ------------------------------------------------------------------
+TRAIN_CFG = {
+    "num_steps_per_env": 24,
+    "save_interval": 50,
+    "obs_groups": {
+        "policy": ["policy"],
+        "critic": ["policy"],
+    },
+    "empirical_normalization": True,
+    "policy": {
+        "class_name": "ActorCritic",
+        "hidden_dims": [256, 128, 64],
+        "activation": "elu",
+        "init_noise_std": 1.0,
+    },
+    "algorithm": {
+        "class_name": "PPO",
+        "value_loss_coef": 1.0,
+        "use_clipping": True,
+        "clip_param": 0.2,
+        "entropy_coef": 0.005,
+        "num_learning_epochs": 5,
+        "num_mini_batches": 4,
+        "learning_rate": 3e-4,
+        "schedule": "adaptive",
+        "gamma": 0.99,
+        "lam": 0.95,
+        "desired_kl": 0.01,
+        "max_grad_norm": 1.0,
+    },
+}
+
+
+def main():
+    os.makedirs(args_cli.log_dir, exist_ok=True)
+
+    env_cfg = parse_env_cfg(
+        "LeIsaac-SO101-PickOrange-RL-v0",
+        device=args_cli.device,
+        num_envs=args_cli.num_envs,
+    )
+    if args_cli.seed is not None:
+        env_cfg.seed = args_cli.seed
+    else:
+        env_cfg.seed = int(time.time())
+
+    env = gym.make("LeIsaac-SO101-PickOrange-RL-v0", cfg=env_cfg).unwrapped
+
+    wrapped_env = IsaaclabRslRlVecEnvWrapper(env)
+
+    print(f"[INFO] num_envs={wrapped_env.num_envs}  "
+          f"num_actions={wrapped_env.num_actions}  "
+          f"max_episode_length={wrapped_env.max_episode_length}")
+    obs = wrapped_env.get_observations()
+    print(f"[INFO] obs['policy'] shape: {obs['policy'].shape}")
+
+    runner = OnPolicyRunner(
+        wrapped_env,
+        TRAIN_CFG,
+        log_dir=args_cli.log_dir,
+        device=args_cli.device,
+    )
+    runner.learn(num_learning_iterations=args_cli.num_iters, init_at_random_ep_len=True)
+
+    env.close()
+    simulation_app.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 1051c986..729199ac 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -8,7 +8,7 @@
 
 def init_action_cfg(action_cfg, device):
     """SO101 Follower action configuration: arm_action and gripper_action"""
-    if device in ["so101leader", "lekiwi-leader"]:
+    if device in ["so101_joint_pos", "so101leader", "lekiwi-leader"]:
         action_cfg.arm_action = mdp.JointPositionActionCfg(
             asset_name="robot",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
@@ -76,22 +76,12 @@ def init_action_cfg(action_cfg, device):
             joint_names=["gripper"],
             scale=1.0,
         )
-    elif device in ["so101_state_machine"]:
+    elif device in ["so101_state_machine"]:  # IK-based: action = EE pose (7D) + binary gripper, not raw joint angles
         action_cfg.arm_action = mdp.DifferentialInverseKinematicsActionCfg(
             asset_name="robot",
-            joint_names=[
-                "shoulder_pan",
-                "shoulder_lift",
-                "elbow_flex",
-                "wrist_flex",
-                "wrist_roll",
-            ],
+            joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
             body_name="gripper",
-            controller=mdp.DifferentialIKControllerCfg(
-                command_type="pose",
-                ik_method="dls",
-                ik_params={"lambda_val": 0.04},
-            ),
+            controller=mdp.DifferentialIKControllerCfg(command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}),
         )
         action_cfg.gripper_action = mdp.BinaryJointPositionActionCfg(
             asset_name="robot",
@@ -99,30 +89,31 @@ def init_action_cfg(action_cfg, device):
             open_command_expr={"gripper": 1.0},
             close_command_expr={"gripper": 0.4},
         )
-    elif device in ["bi_so101_state_machine"]:
-        # Bi-arm IK state machine: left arm + right arm, each with absolute-pose IK + binary gripper.
-        # Action tensor layout: [left_ik(7), left_gripper(1), right_ik(7), right_gripper(1)] = 16D
-        _ik_cfg = lambda asset: mdp.DifferentialInverseKinematicsActionCfg(
-            asset_name=asset,
+    elif device in ["bi_so101_state_machine"]:  # IK-based: action = EE pose (7D) + binary gripper, not raw joint angles
+        action_cfg.left_arm_action = mdp.DifferentialInverseKinematicsActionCfg(
+            asset_name="left_arm",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
             body_name="gripper",
-            controller=mdp.DifferentialIKControllerCfg(
-                command_type="pose",
-                ik_method="dls",
-                ik_params={"lambda_val": 0.04},
-            ),
-        )
-        _grip_cfg = lambda asset: mdp.BinaryJointPositionActionCfg(
-            asset_name=asset,
+            controller=mdp.DifferentialIKControllerCfg(command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}),
+        )
+        action_cfg.left_gripper_action = mdp.BinaryJointPositionActionCfg(
+            asset_name="left_arm",
+            joint_names=["gripper"],
+            open_command_expr={"gripper": 1.0},
+            close_command_expr={"gripper": 0.01},
+        )
+        action_cfg.right_arm_action = mdp.DifferentialInverseKinematicsActionCfg(
+            asset_name="right_arm",
+            joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
+            body_name="gripper",
+            controller=mdp.DifferentialIKControllerCfg(command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}),
+        )
+        action_cfg.right_gripper_action = mdp.BinaryJointPositionActionCfg(
+            asset_name="right_arm",
             joint_names=["gripper"],
             open_command_expr={"gripper": 1.0},
             close_command_expr={"gripper": 0.01},
         )
-        action_cfg.left_arm_action = _ik_cfg("left_arm")
-        action_cfg.left_gripper_action = _grip_cfg("left_arm")
-        action_cfg.right_arm_action = _ik_cfg("right_arm")
-        action_cfg.right_gripper_action = _grip_cfg("right_arm")
-
     """LeKiwi action configuration"""
     if device in ["lekiwi-leader", "lekiwi-keyboard", "lekiwi-gamepad"]:
         action_cfg.wheel_action = mdp.JointVelocityActionCfg(
diff --git a/source/leisaac/leisaac/rl/__init__.py b/source/leisaac/leisaac/rl/__init__.py
new file mode 100644
index 00000000..710f1dcd
--- /dev/null
+++ b/source/leisaac/leisaac/rl/__init__.py
@@ -0,0 +1,5 @@
+"""RL training utilities for leisaac environments."""
+
+from .rsl_rl_wrapper import IsaaclabRslRlVecEnvWrapper
+
+__all__ = ["IsaaclabRslRlVecEnvWrapper"]
diff --git a/source/leisaac/leisaac/rl/rsl_rl_wrapper.py b/source/leisaac/leisaac/rl/rsl_rl_wrapper.py
new file mode 100644
index 00000000..96986b5e
--- /dev/null
+++ b/source/leisaac/leisaac/rl/rsl_rl_wrapper.py
@@ -0,0 +1,108 @@
+"""Wrapper that bridges an IsaacLab ManagerBasedRLEnv to the rsl_rl VecEnv interface."""
+
+from __future__ import annotations
+
+import torch
+from tensordict import TensorDict
+
+from isaaclab.envs import ManagerBasedRLEnv
+
+from rsl_rl.env import VecEnv
+
+
+class IsaaclabRslRlVecEnvWrapper(VecEnv):
+    """Wraps a :class:`ManagerBasedRLEnv` so it satisfies rsl_rl's :class:`VecEnv` ABC.
+
+    The RL env cfg must configure its policy observation group with
+    ``concatenate_terms = True`` so that ``obs_dict["policy"]`` is already a
+    ``(num_envs, obs_dim)`` tensor rather than a dict of named terms.
+
+    Args:
+        env: An already-constructed :class:`ManagerBasedRLEnv` instance.
+    """
+
+    def __init__(self, env: ManagerBasedRLEnv) -> None:
+        self.env = env
+
+    # ------------------------------------------------------------------
+    # VecEnv required attributes
+    # ------------------------------------------------------------------
+
+    @property
+    def num_envs(self) -> int:
+        return self.env.num_envs
+
+    @property
+    def num_actions(self) -> int:
+        return self.env.action_manager.total_action_dim
+
+    @property
+    def max_episode_length(self) -> int:
+        return int(self.env.max_episode_length)
+
+    @property
+    def episode_length_buf(self) -> torch.Tensor:
+        return self.env.episode_length_buf
+
+    @episode_length_buf.setter
+    def episode_length_buf(self, value: torch.Tensor) -> None:
+        self.env.episode_length_buf = value
+
+    @property
+    def device(self) -> torch.device | str:
+        return self.env.device
+
+    @property
+    def cfg(self):
+        return self.env.cfg
+
+    # ------------------------------------------------------------------
+    # VecEnv interface
+    # ------------------------------------------------------------------
+
+    def get_observations(self) -> TensorDict:
+        """Return current observations as a TensorDict with key ``"policy"``."""
+        obs_dict = self.env.observation_manager.compute()
+        return self._to_tensordict(obs_dict)
+
+    def step(
+        self, actions: torch.Tensor
+    ) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]:
+        """Step the environment.
+
+        Returns:
+            obs: TensorDict with ``"policy"`` key.
+            rewards: Shape ``(num_envs,)``.
+            dones: Float tensor ``(num_envs,)``, 1 on episode end.
+            extras: Dict with at minimum ``"time_outs"`` key (bool tensor).
+        """
+        obs_dict, rew, terminated, timeouts, extras = self.env.step(actions)
+        obs = self._to_tensordict(obs_dict)
+        dones = (terminated | timeouts).float()
+        extras["time_outs"] = timeouts
+        return obs, rew, dones, extras
+
+    def reset(self) -> TensorDict:
+        obs_dict, _ = self.env.reset()
+        return self._to_tensordict(obs_dict)
+
+    # ------------------------------------------------------------------
+    # Helper
+    # ------------------------------------------------------------------
+
+    def _to_tensordict(self, obs_dict: dict) -> TensorDict:
+        """Convert the IsaacLab obs dict to a TensorDict.
+
+        When ``concatenate_terms=True`` (required for RL), ``obs_dict["policy"]``
+        is already a ``(num_envs, obs_dim)`` tensor.  We wrap it in a TensorDict
+        so rsl_rl can access it by group name.
+        """
+        policy_obs = obs_dict["policy"]
+        if isinstance(policy_obs, dict):
+            # Fallback: concatenate manually if cfg has concatenate_terms=False
+            tensors = []
+            for v in policy_obs.values():
+                t = v.float() if v.dtype == torch.bool else v
+                tensors.append(t.reshape(self.num_envs, -1))
+            policy_obs = torch.cat(tensors, dim=-1)
+        return TensorDict({"policy": policy_obs}, batch_size=[self.num_envs])
diff --git a/source/leisaac/leisaac/state_machine/fold_cloth.py b/source/leisaac/leisaac/state_machine/fold_cloth.py
deleted file mode 100644
index 3ae98499..00000000
--- a/source/leisaac/leisaac/state_machine/fold_cloth.py
+++ /dev/null
@@ -1,429 +0,0 @@
-"""State machine for the fold-cloth task (bi-arm SO-101)."""
-
-import torch
-from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
-
-from .base import StateMachineBase
-
-# ---------------------------------------------------------------------------
-# Module-level constants
-# ---------------------------------------------------------------------------
-
-_GRIPPER_OPEN = 1.0
-_GRIPPER_CLOSE = -1.0
-
-# Cloth particle indices for the 6 keypoints, matching cloth_folded() in terminations.py.
-# Order: left_sleeve(0), left_shoulder(1), left_hem(2),
-#        right_sleeve(3), right_shoulder(4), right_hem(5)
-_CLOTH_KEYPOINTS = [159789, 120788, 115370, 159716, 121443, 112382]
-
-_KP_LEFT_SLEEVE = 0
-_KP_LEFT_SHOULDER = 1
-_KP_LEFT_HEM = 2
-_KP_RIGHT_SLEEVE = 3
-_KP_RIGHT_SHOULDER = 4
-_KP_RIGHT_HEM = 5
-
-# Height offsets relative to cloth keypoints
-_HOVER_ABOVE = 0.12   # approach height above keypoint (m)
-_GRASP_ABOVE = 0.02   # grip height above cloth surface (cloth is thin) (m)
-_FOLD_HEIGHT = 0.18   # height above shoulder target while carrying cloth (m)
-_LIFT_HEIGHT = 0.20   # neutral lift height used between phases (m)
-
-# ---------------------------------------------------------------------------
-# Phase step boundaries (at 60 Hz)
-# ---------------------------------------------------------------------------
-#   0 – 119  hover_above_sleeves       (2 s) – both arms approach above sleeves
-# 120 – 179  lower_to_sleeves          (1 s) – descend to grasp height
-# 180 – 239  grasp_sleeves             (1 s) – close grippers
-# 240 – 359  cross_sleeves             (2 s) – left→right_shoulder, right→left_shoulder
-# 360 – 419  release_sleeves           (1 s) – open grippers at crossed position
-# 420 – 479  lift_grippers_1           (1 s) – lift clear of cloth
-# 480 – 599  hover_above_hems          (2 s) – move above bottom corners
-# 600 – 659  lower_to_hems             (1 s) – descend to grasp height
-# 660 – 719  grasp_hems                (1 s) – close grippers
-# 720 – 839  fold_hems_up              (2 s) – left_hem→left_shoulder, right_hem→right_shoulder
-# 840 – 899  release_hems              (1 s) – open grippers
-# 900 – 959  lift_grippers_2           (1 s) – lift clear of cloth
-# 960 –1259  return_home               (5 s) – both arms drive to rest-pose EE
-_P_HOVER_SLEEVES = 120
-_P_LOWER_SLEEVES = 180
-_P_GRASP_SLEEVES = 240
-_P_CROSS_SLEEVES = 360
-_P_RELEASE_SLEEVES = 420
-_P_LIFT1 = 480
-_P_HOVER_HEMS = 600
-_P_LOWER_HEMS = 660
-_P_GRASP_HEMS = 720
-_P_FOLD_HEMS = 840
-_P_RELEASE_HEMS = 900
-_P_LIFT2 = 960
-
-
-# ---------------------------------------------------------------------------
-# State machine
-# ---------------------------------------------------------------------------
-
-
-class FoldClothStateMachine(StateMachineBase):
-    """Bi-arm state machine for the fold-cloth manipulation task.
-
-    The robot uses two SO-101 arms to fold a garment lying on a surface:
-
-    1. Both arms reach above the sleeves, grasp them and cross them over to
-       opposite shoulders (left sleeve → right shoulder, right sleeve → left
-       shoulder).
-    2. Both arms then reach the hem corners, grasp them and fold them upward
-       to the corresponding shoulder positions.
-    3. Both arms return to their rest poses so that ``cloth_folded()``
-       (which also checks ``is_so101_at_rest_pose`` for both arms) can
-       evaluate the episode as successful.
-
-    Args:
-        rest_ee_left:  World-frame EE position of the left arm at rest pose,
-            shape ``(num_envs, 3)``.  Computed by FK calibration at startup.
-        rest_ee_right: World-frame EE position of the right arm at rest pose,
-            shape ``(num_envs, 3)``.  Computed by FK calibration at startup.
-
-    Attributes:
-        MAX_STEPS (int): Total simulation steps for one episode (1260 ≈ 21 s at 60 Hz).
-
-    Example::
-
-        sm = FoldClothStateMachine(rest_ee_left=left_ee, rest_ee_right=right_ee)
-        env.reset()
-        while not sm.is_episode_done:
-            actions = sm.get_action(env)
-            env.step(actions)
-            sm.advance()
-        success = cloth_folded(env, ...)
-        sm.reset()
-    """
-
-    MAX_STEPS: int = 1260
-
-    def __init__(
-        self,
-        rest_ee_left: torch.Tensor | None = None,
-        rest_ee_right: torch.Tensor | None = None,
-    ) -> None:
-        self._step_count: int = 0
-        self._episode_done: bool = False
-        self._rest_ee_left = rest_ee_left    # (num_envs, 3), calibrated at startup
-        self._rest_ee_right = rest_ee_right  # (num_envs, 3), calibrated at startup
-        # Shoulder positions captured at step 0 and used as stable fold targets.
-        self._left_shoulder_pos: torch.Tensor | None = None
-        self._right_shoulder_pos: torch.Tensor | None = None
-
-    # ------------------------------------------------------------------
-    # StateMachineBase interface
-    # ------------------------------------------------------------------
-
-    def get_action(self, env) -> torch.Tensor:
-        """Compute the 16-D action tensor for both arms at the current step.
-
-        The action layout is:
-        ``[left_pos(3), left_quat(4), left_gripper(1), right_pos(3), right_quat(4), right_gripper(1)]``
-
-        The scene must expose:
-        - ``"left_arm"`` / ``"right_arm"`` – SO-101 articulations
-        - ``env.scene.particle_objects["cloths"]`` – :class:`ClothObject`
-
-        Args:
-            env: The simulation environment.
-
-        Returns:
-            Action tensor of shape ``(num_envs, 16)``.
-        """
-        step = self._step_count
-        num_envs = env.num_envs
-        device = env.device
-
-        # Read cloth keypoints: (num_envs, 6, 3) in world frame
-        kp = self._get_cloth_keypoints(env)
-
-        # Capture shoulder positions once at episode start as stable fold targets
-        if step == 0:
-            self._left_shoulder_pos = kp[:, _KP_LEFT_SHOULDER, :].clone()
-            self._right_shoulder_pos = kp[:, _KP_RIGHT_SHOULDER, :].clone()
-
-        # --- Phase dispatch ---
-        if step < _P_HOVER_SLEEVES:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_hover_sleeves(kp, num_envs, device)
-        elif step < _P_LOWER_SLEEVES:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_lower_to_sleeves(kp, num_envs, device)
-        elif step < _P_GRASP_SLEEVES:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_grasp_sleeves(kp, num_envs, device)
-        elif step < _P_CROSS_SLEEVES:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_cross_sleeves(num_envs, device)
-        elif step < _P_RELEASE_SLEEVES:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_release_sleeves(num_envs, device)
-        elif step < _P_LIFT1:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_lift_grippers(num_envs, device)
-        elif step < _P_HOVER_HEMS:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_hover_hems(kp, num_envs, device)
-        elif step < _P_LOWER_HEMS:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_lower_to_hems(kp, num_envs, device)
-        elif step < _P_GRASP_HEMS:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_grasp_hems(kp, num_envs, device)
-        elif step < _P_FOLD_HEMS:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_fold_hems(num_envs, device)
-        elif step < _P_RELEASE_HEMS:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_release_hems(num_envs, device)
-        elif step < _P_LIFT2:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_lift_grippers(num_envs, device)
-        else:
-            l_tgt, r_tgt, l_grip, r_grip = self._phase_return_home(num_envs, device)
-
-        left_action = self._to_arm_action(l_tgt, l_grip, "left_arm", env)
-        right_action = self._to_arm_action(r_tgt, r_grip, "right_arm", env)
-
-        return torch.cat([left_action, right_action], dim=-1)  # (num_envs, 16)
-
-    def advance(self) -> None:
-        """Increment the step counter; mark episode done after MAX_STEPS."""
-        self._step_count += 1
-        if self._step_count >= self.MAX_STEPS:
-            self._episode_done = True
-
-    def reset(self) -> None:
-        """Reset to the initial state for a new episode."""
-        self._step_count = 0
-        self._episode_done = False
-        self._left_shoulder_pos = None
-        self._right_shoulder_pos = None
-
-    # ------------------------------------------------------------------
-    # Internal helpers
-    # ------------------------------------------------------------------
-
-    def _get_cloth_keypoints(self, env) -> torch.Tensor:
-        """Return cloth keypoints as ``(num_envs, 6, 3)`` in world frame."""
-        cloth = env.scene.particle_objects["cloths"]
-        return cloth.point_positions[:, _CLOTH_KEYPOINTS, :]
-
-    def _to_arm_action(
-        self,
-        target_pos_w: torch.Tensor,
-        gripper_cmd: torch.Tensor,
-        arm_name: str,
-        env,
-    ) -> torch.Tensor:
-        """Convert a world-frame EE position to the arm's local IK action (8-D).
-
-        Args:
-            target_pos_w: Target position in world frame, shape ``(num_envs, 3)``.
-            gripper_cmd:  Gripper command, shape ``(num_envs, 1)``.
-            arm_name:     ``"left_arm"`` or ``"right_arm"``.
-            env:          The simulation environment.
-
-        Returns:
-            Tensor of shape ``(num_envs, 8)`` = ``[pos(3), quat(4), gripper(1)]``
-            expressed in the arm's base frame.
-        """
-        device = env.device
-        num_envs = env.num_envs
-
-        base_pos_w = env.scene[arm_name].data.root_pos_w.clone()
-        base_quat_w = env.scene[arm_name].data.root_quat_w.clone()
-
-        # Identity EE orientation in world frame (gripper pointing down)
-        target_quat_w = quat_from_euler_xyz(
-            torch.tensor(0.0, device=device),
-            torch.tensor(0.0, device=device),
-            torch.tensor(0.0, device=device),
-        ).repeat(num_envs, 1)
-        target_quat = quat_mul(quat_inv(base_quat_w), target_quat_w)
-
-        diff_w = target_pos_w - base_pos_w
-        target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
-
-        return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
-
-    def _shoulder_center(self, num_envs: int, device: str) -> torch.Tensor:
-        """Mid-point between the two captured shoulder positions."""
-        if self._left_shoulder_pos is not None and self._right_shoulder_pos is not None:
-            return (self._left_shoulder_pos + self._right_shoulder_pos) / 2.0
-        return torch.zeros(num_envs, 3, device=device)
-
-    # ------------------------------------------------------------------
-    # Phase methods
-    # ------------------------------------------------------------------
-
-    def _phase_hover_sleeves(
-        self, kp: torch.Tensor, num_envs: int, device: str
-    ) -> tuple:
-        """Steps 0–119: both arms hover above respective sleeves."""
-        l_tgt = kp[:, _KP_LEFT_SLEEVE, :].clone()
-        l_tgt[:, 2] += _HOVER_ABOVE
-        r_tgt = kp[:, _KP_RIGHT_SLEEVE, :].clone()
-        r_tgt[:, 2] += _HOVER_ABOVE
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    def _phase_lower_to_sleeves(
-        self, kp: torch.Tensor, num_envs: int, device: str
-    ) -> tuple:
-        """Steps 120–179: descend to grasp height above sleeves."""
-        l_tgt = kp[:, _KP_LEFT_SLEEVE, :].clone()
-        l_tgt[:, 2] += _GRASP_ABOVE
-        r_tgt = kp[:, _KP_RIGHT_SLEEVE, :].clone()
-        r_tgt[:, 2] += _GRASP_ABOVE
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    def _phase_grasp_sleeves(
-        self, kp: torch.Tensor, num_envs: int, device: str
-    ) -> tuple:
-        """Steps 180–239: close grippers on sleeves."""
-        l_tgt = kp[:, _KP_LEFT_SLEEVE, :].clone()
-        l_tgt[:, 2] += _GRASP_ABOVE
-        r_tgt = kp[:, _KP_RIGHT_SLEEVE, :].clone()
-        r_tgt[:, 2] += _GRASP_ABOVE
-        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return l_tgt, r_tgt, close_g, close_g.clone()
-
-    def _phase_cross_sleeves(self, num_envs: int, device: str) -> tuple:
-        """Steps 240–359: cross sleeves to opposite shoulders.
-
-        Left arm (holding left sleeve) moves to right shoulder.
-        Right arm (holding right sleeve) moves to left shoulder.
-        """
-        if self._right_shoulder_pos is not None:
-            l_tgt = self._right_shoulder_pos.clone()
-            l_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            l_tgt = torch.zeros(num_envs, 3, device=device)
-
-        if self._left_shoulder_pos is not None:
-            r_tgt = self._left_shoulder_pos.clone()
-            r_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            r_tgt = torch.zeros(num_envs, 3, device=device)
-
-        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return l_tgt, r_tgt, close_g, close_g.clone()
-
-    def _phase_release_sleeves(self, num_envs: int, device: str) -> tuple:
-        """Steps 360–419: open grippers to deposit sleeves at shoulder positions."""
-        if self._right_shoulder_pos is not None:
-            l_tgt = self._right_shoulder_pos.clone()
-            l_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            l_tgt = torch.zeros(num_envs, 3, device=device)
-
-        if self._left_shoulder_pos is not None:
-            r_tgt = self._left_shoulder_pos.clone()
-            r_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            r_tgt = torch.zeros(num_envs, 3, device=device)
-
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    def _phase_lift_grippers(self, num_envs: int, device: str) -> tuple:
-        """Lift both grippers clear of the cloth to a neutral position."""
-        center = self._shoulder_center(num_envs, device)
-        l_tgt = center.clone()
-        l_tgt[:, 2] += _LIFT_HEIGHT + 0.10
-        r_tgt = center.clone()
-        r_tgt[:, 2] += _LIFT_HEIGHT + 0.10
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    def _phase_hover_hems(
-        self, kp: torch.Tensor, num_envs: int, device: str
-    ) -> tuple:
-        """Steps 480–599: hover above the bottom hem corners."""
-        l_tgt = kp[:, _KP_LEFT_HEM, :].clone()
-        l_tgt[:, 2] += _HOVER_ABOVE
-        r_tgt = kp[:, _KP_RIGHT_HEM, :].clone()
-        r_tgt[:, 2] += _HOVER_ABOVE
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    def _phase_lower_to_hems(
-        self, kp: torch.Tensor, num_envs: int, device: str
-    ) -> tuple:
-        """Steps 600–659: descend to grasp height above hems."""
-        l_tgt = kp[:, _KP_LEFT_HEM, :].clone()
-        l_tgt[:, 2] += _GRASP_ABOVE
-        r_tgt = kp[:, _KP_RIGHT_HEM, :].clone()
-        r_tgt[:, 2] += _GRASP_ABOVE
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    def _phase_grasp_hems(
-        self, kp: torch.Tensor, num_envs: int, device: str
-    ) -> tuple:
-        """Steps 660–719: close grippers on hem corners."""
-        l_tgt = kp[:, _KP_LEFT_HEM, :].clone()
-        l_tgt[:, 2] += _GRASP_ABOVE
-        r_tgt = kp[:, _KP_RIGHT_HEM, :].clone()
-        r_tgt[:, 2] += _GRASP_ABOVE
-        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return l_tgt, r_tgt, close_g, close_g.clone()
-
-    def _phase_fold_hems(self, num_envs: int, device: str) -> tuple:
-        """Steps 720–839: fold hem corners up to shoulder level.
-
-        Left hem moves to left shoulder position; right hem to right shoulder.
-        """
-        if self._left_shoulder_pos is not None:
-            l_tgt = self._left_shoulder_pos.clone()
-            l_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            l_tgt = torch.zeros(num_envs, 3, device=device)
-
-        if self._right_shoulder_pos is not None:
-            r_tgt = self._right_shoulder_pos.clone()
-            r_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            r_tgt = torch.zeros(num_envs, 3, device=device)
-
-        close_g = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return l_tgt, r_tgt, close_g, close_g.clone()
-
-    def _phase_release_hems(self, num_envs: int, device: str) -> tuple:
-        """Steps 840–899: open grippers to deposit hems at shoulder positions."""
-        if self._left_shoulder_pos is not None:
-            l_tgt = self._left_shoulder_pos.clone()
-            l_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            l_tgt = torch.zeros(num_envs, 3, device=device)
-
-        if self._right_shoulder_pos is not None:
-            r_tgt = self._right_shoulder_pos.clone()
-            r_tgt[:, 2] += _FOLD_HEIGHT
-        else:
-            r_tgt = torch.zeros(num_envs, 3, device=device)
-
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    def _phase_return_home(self, num_envs: int, device: str) -> tuple:
-        """Steps 960–1259: drive both arms to rest-pose EE (for task_done check).
-
-        Targets ``rest_ee_left`` / ``rest_ee_right`` computed at startup by FK
-        calibration.  The incremental IK drives joints toward the rest pose
-        configuration (shoulder_lift≈-100°, elbow_flex≈90°, wrist_flex≈50°)
-        within the ±30° tolerance required by ``is_so101_at_rest_pose``.
-        """
-        l_tgt = self._rest_ee_left.clone() if self._rest_ee_left is not None else torch.zeros(num_envs, 3, device=device)
-        r_tgt = self._rest_ee_right.clone() if self._rest_ee_right is not None else torch.zeros(num_envs, 3, device=device)
-        open_g = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return l_tgt, r_tgt, open_g, open_g.clone()
-
-    # ------------------------------------------------------------------
-    # Properties
-    # ------------------------------------------------------------------
-
-    @property
-    def is_episode_done(self) -> bool:
-        """``True`` once all ``MAX_STEPS`` have been executed."""
-        return self._episode_done
-
-    @property
-    def step_count(self) -> int:
-        """Number of steps elapsed in the current episode."""
-        return self._step_count
diff --git a/source/leisaac/leisaac/tasks/pick_orange/__init__.py b/source/leisaac/leisaac/tasks/pick_orange/__init__.py
index 7b00fe38..06c6adb1 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/__init__.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/__init__.py
@@ -26,3 +26,12 @@
         "env_cfg_entry_point": f"{__name__}.direct.pick_orange_env:PickOrangeEnvCfg",
     },
 )
+
+gym.register(
+    id="LeIsaac-SO101-PickOrange-RL-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.pick_orange_rl_env_cfg:PickOrangeRLEnvCfg",
+    },
+)
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py
index 38ebdb6c..1d602cf2 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py
@@ -2,4 +2,5 @@
 from leisaac.enhance.envs.mdp import *
 
 from .observations import *
+from .rewards import *
 from .terminations import *
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py
index 8123d9b3..a49519ec 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py
@@ -61,3 +61,30 @@ def put_orange_to_plate(
     placed = torch.logical_and(placed, gripper_open)
 
     return placed
+
+
+def oranges_pos_relative_to_ee(
+    env: ManagerBasedRLEnv | DirectRLEnv,
+    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
+    oranges_cfg: list[SceneEntityCfg] | None = None,
+) -> torch.Tensor:
+    """Return positions of all oranges relative to the end-effector, flattened.
+
+    Returns:
+        Tensor of shape ``(num_envs, 3 * num_oranges)`` in world frame offsets.
+    """
+    if oranges_cfg is None:
+        oranges_cfg = [
+            SceneEntityCfg("Orange001"),
+            SceneEntityCfg("Orange002"),
+            SceneEntityCfg("Orange003"),
+        ]
+    ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
+    ee_pos = ee_frame.data.target_pos_w[:, 1, :]  # (num_envs, 3)
+
+    rel_positions = []
+    for orange_cfg in oranges_cfg:
+        orange: RigidObject = env.scene[orange_cfg.name]
+        rel_positions.append(orange.data.root_pos_w - ee_pos)  # (num_envs, 3)
+
+    return torch.cat(rel_positions, dim=-1)  # (num_envs, 9)
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py
new file mode 100644
index 00000000..ec0f4314
--- /dev/null
+++ b/source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py
@@ -0,0 +1,75 @@
+"""Reward functions for the pick-orange RL task."""
+
+import torch
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.envs import ManagerBasedRLEnv
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.sensors import FrameTransformer
+
+from .observations import orange_grasped, put_orange_to_plate
+from .terminations import task_done
+
+
+def ee_to_nearest_orange_reward(
+    env: ManagerBasedRLEnv,
+    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
+    oranges_cfg: list[SceneEntityCfg] | None = None,
+) -> torch.Tensor:
+    """Shaped reward: exp(-2 * dist) to the nearest orange not yet placed on the plate.
+
+    Encourages the arm to approach any orange. Falls back to all oranges if none
+    are unplaced (e.g. all on plate already).
+    """
+    if oranges_cfg is None:
+        oranges_cfg = [
+            SceneEntityCfg("Orange001"),
+            SceneEntityCfg("Orange002"),
+            SceneEntityCfg("Orange003"),
+        ]
+
+    ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
+    ee_pos = ee_frame.data.target_pos_w[:, 1, :]  # (num_envs, 3)
+
+    min_dist = torch.full((env.num_envs,), fill_value=1e6, device=env.device)
+    for orange_cfg in oranges_cfg:
+        orange: RigidObject = env.scene[orange_cfg.name]
+        dist = torch.linalg.vector_norm(orange.data.root_pos_w - ee_pos, dim=-1)
+        min_dist = torch.minimum(min_dist, dist)
+
+    return torch.exp(-2.0 * min_dist)
+
+
+def orange_grasped_reward(
+    env: ManagerBasedRLEnv,
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
+    object_cfg: SceneEntityCfg = SceneEntityCfg("Orange001"),
+) -> torch.Tensor:
+    """Binary reward: +1.0 each step an orange is held in the gripper."""
+    return orange_grasped(env, robot_cfg, ee_frame_cfg, object_cfg).float()
+
+
+def orange_placed_reward(
+    env: ManagerBasedRLEnv,
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
+    object_cfg: SceneEntityCfg = SceneEntityCfg("Orange001"),
+    plate_cfg: SceneEntityCfg = SceneEntityCfg("Plate"),
+) -> torch.Tensor:
+    """Binary reward: +1.0 each step an orange is resting on the plate."""
+    return put_orange_to_plate(env, robot_cfg, ee_frame_cfg, object_cfg, plate_cfg).float()
+
+
+def task_complete_bonus(
+    env: ManagerBasedRLEnv,
+    oranges_cfg: list[SceneEntityCfg] | None = None,
+    plate_cfg: SceneEntityCfg = SceneEntityCfg("Plate"),
+) -> torch.Tensor:
+    """Sparse bonus: +1.0 when all 3 oranges are on the plate and arm at rest."""
+    if oranges_cfg is None:
+        oranges_cfg = [
+            SceneEntityCfg("Orange001"),
+            SceneEntityCfg("Orange002"),
+            SceneEntityCfg("Orange003"),
+        ]
+    return task_done(env, oranges_cfg=oranges_cfg, plate_cfg=plate_cfg).float()
diff --git a/source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py b/source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py
new file mode 100644
index 00000000..c6332727
--- /dev/null
+++ b/source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py
@@ -0,0 +1,173 @@
+"""RL environment configuration for the pick-orange task.
+
+Uses joint-position control (no IK) with proprioceptive-only observations
+so the policy can be trained with rsl_rl PPO.
+
+Action space (6D):  [shoulder_pan, shoulder_lift, elbow_flex, wrist_flex, wrist_roll, gripper]
+Observation space (28D):
+    joint_pos          (6)
+    joint_vel          (6)
+    ee_frame_state     (7)  — EE position (3) + quaternion (4) in robot base frame
+    oranges_pos_rel    (9)  — 3 oranges × 3D position relative to EE
+"""
+
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils import configclass
+
+from .pick_orange_env_cfg import PickOrangeEnvCfg
+from . import mdp
+
+
+@configclass
+class PickOrangeRLObsCfg:
+    """Proprioceptive-only observation config for RL (28D, concatenated)."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        joint_pos = ObsTerm(func=mdp.joint_pos)
+        joint_vel = ObsTerm(func=mdp.joint_vel)
+        ee_frame_state = ObsTerm(
+            func=mdp.ee_frame_state,
+            params={
+                "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+                "robot_cfg": SceneEntityCfg("robot"),
+            },
+        )
+        oranges_pos_relative = ObsTerm(
+            func=mdp.oranges_pos_relative_to_ee,
+            params={
+                "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+                "oranges_cfg": [
+                    SceneEntityCfg("Orange001"),
+                    SceneEntityCfg("Orange002"),
+                    SceneEntityCfg("Orange003"),
+                ],
+            },
+        )
+
+        def __post_init__(self):
+            self.enable_corruption = False
+            self.concatenate_terms = True  # rsl_rl needs a single flat vector
+
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class PickOrangeRLRewardsCfg:
+    """Reward terms for RL training."""
+
+    # Shaped: guide EE toward the nearest orange
+    ee_guidance = RewTerm(
+        func=mdp.ee_to_nearest_orange_reward,
+        weight=0.5,
+        params={
+            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+            "oranges_cfg": [
+                SceneEntityCfg("Orange001"),
+                SceneEntityCfg("Orange002"),
+                SceneEntityCfg("Orange003"),
+            ],
+        },
+    )
+
+    # Per-orange grasp rewards
+    grasped_001 = RewTerm(
+        func=mdp.orange_grasped_reward,
+        weight=1.0,
+        params={
+            "robot_cfg": SceneEntityCfg("robot"),
+            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+            "object_cfg": SceneEntityCfg("Orange001"),
+        },
+    )
+    grasped_002 = RewTerm(
+        func=mdp.orange_grasped_reward,
+        weight=1.0,
+        params={
+            "robot_cfg": SceneEntityCfg("robot"),
+            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+            "object_cfg": SceneEntityCfg("Orange002"),
+        },
+    )
+    grasped_003 = RewTerm(
+        func=mdp.orange_grasped_reward,
+        weight=1.0,
+        params={
+            "robot_cfg": SceneEntityCfg("robot"),
+            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+            "object_cfg": SceneEntityCfg("Orange003"),
+        },
+    )
+
+    # Per-orange placement rewards
+    placed_001 = RewTerm(
+        func=mdp.orange_placed_reward,
+        weight=2.0,
+        params={
+            "robot_cfg": SceneEntityCfg("robot"),
+            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+            "object_cfg": SceneEntityCfg("Orange001"),
+            "plate_cfg": SceneEntityCfg("Plate"),
+        },
+    )
+    placed_002 = RewTerm(
+        func=mdp.orange_placed_reward,
+        weight=2.0,
+        params={
+            "robot_cfg": SceneEntityCfg("robot"),
+            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+            "object_cfg": SceneEntityCfg("Orange002"),
+            "plate_cfg": SceneEntityCfg("Plate"),
+        },
+    )
+    placed_003 = RewTerm(
+        func=mdp.orange_placed_reward,
+        weight=2.0,
+        params={
+            "robot_cfg": SceneEntityCfg("robot"),
+            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
+            "object_cfg": SceneEntityCfg("Orange003"),
+            "plate_cfg": SceneEntityCfg("Plate"),
+        },
+    )
+
+    # Sparse task-completion bonus
+    task_complete = RewTerm(
+        func=mdp.task_complete_bonus,
+        weight=10.0,
+        params={
+            "oranges_cfg": [
+                SceneEntityCfg("Orange001"),
+                SceneEntityCfg("Orange002"),
+                SceneEntityCfg("Orange003"),
+            ],
+            "plate_cfg": SceneEntityCfg("Plate"),
+        },
+    )
+
+
+@configclass
+class PickOrangeRLEnvCfg(PickOrangeEnvCfg):
+    """pick_orange environment configured for RL training with rsl_rl."""
+
+    observations: PickOrangeRLObsCfg = PickOrangeRLObsCfg()
+    rewards: PickOrangeRLRewardsCfg = PickOrangeRLRewardsCfg()
+
+    def __post_init__(self) -> None:
+        super().__post_init__()
+
+        # Joint position control — no IK, gravity disabled for arm stability
+        self.use_teleop_device("so101_joint_pos")
+
+        # Shorter episode is sufficient for RL exploration
+        self.episode_length_s = 30.0
+
+        # Cameras not needed for proprioceptive RL
+        self.scene.wrist = None
+        self.scene.front = None
+
+        # Disable recorder for training runs
+        self.recorders = None

From 5dc5b8d3ea1d4417fe838dfe04e43b61086874a0 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Mon, 2 Mar 2026 12:55:38 -0500
Subject: [PATCH 43/68] Change documents.

---
 STATE_MACHINE_README.md => POLICY_README.md   | 102 ++++++++++++++++--
 ...ACHINE_README_CN.md => POLICY_README_CN.md | 102 ++++++++++++++++--
 2 files changed, 186 insertions(+), 18 deletions(-)
 rename STATE_MACHINE_README.md => POLICY_README.md (63%)
 rename STATE_MACHINE_README_CN.md => POLICY_README_CN.md (63%)

diff --git a/STATE_MACHINE_README.md b/POLICY_README.md
similarity index 63%
rename from STATE_MACHINE_README.md
rename to POLICY_README.md
index 520a75dd..86682061 100644
--- a/STATE_MACHINE_README.md
+++ b/POLICY_README.md
@@ -1,24 +1,34 @@
-# State Machine: Recording & Replay Guide
+# Autonomous Policy Guide: State Machine & RL Training
 
 ## Overview
 
-The state machine module provides automated data collection for manipulation tasks without human teleoperation. It runs a scripted policy, records demonstrations to HDF5 datasets, and supports replaying those demonstrations.
+This document covers two approaches to running the robot without human teleoperation:
+
+| Approach | How it works | Primary use |
+|---|---|---|
+| **State Machine** | Scripted IK-based policy; action = EE pose (7D) + binary gripper | Data collection for imitation learning |
+| **RL Training** | Learned policy via PPO; action = raw joint angles (6D continuous) | End-to-end reinforcement learning |
 
 ```
 scripts/environments/state_machine/
 ├── pick_orange.py      # Runner script: recording (PickOrange task)
-├── fold_cloth.py       # Runner script: recording (FoldCloth task)
 └── replay.py           # Replay script for state-machine demonstrations
 
+scripts/rl/
+└── train_pick_orange.py    # PPO training script (PickOrange task)
+
 source/leisaac/leisaac/state_machine/
 ├── base.py             # StateMachineBase abstract class
-├── pick_orange.py      # PickOrangeStateMachine
-└── fold_cloth.py       # FoldClothStateMachine
+└── pick_orange.py      # PickOrangeStateMachine
+
+source/leisaac/leisaac/rl/
+├── __init__.py
+└── rsl_rl_wrapper.py   # IsaacLab → rsl_rl VecEnv bridge wrapper
 ```
 
 ---
 
-## Recording
+## State Machine
 
 ### Quick Start
 
@@ -126,6 +136,77 @@ For `so101_state_machine`, **only `action` mode is valid**. The IK action manage
 
 ---
 
+## RL Training
+
+### Quick Start
+
+```bash
+# Short smoke-test (4 envs, 2 iterations)
+python scripts/rl/train_pick_orange.py \
+    --num_envs 4 --num_iters 2 --headless
+
+# Full training run
+python scripts/rl/train_pick_orange.py \
+    --num_envs 64 --num_iters 1000 --headless \
+    --log_dir ./logs/pick_orange_rl
+```
+
+### How It Works
+
+```
+train_pick_orange.py
+  └── gym.make("LeIsaac-SO101-PickOrange-RL-v0", cfg=env_cfg)
+       └── env_cfg.use_teleop_device("so101_joint_pos")  # direct joint pos, no IK
+  └── IsaaclabRslRlVecEnvWrapper(env)     # bridge to rsl_rl VecEnv API
+  └── OnPolicyRunner(wrapped_env, TRAIN_CFG, log_dir, device)
+  └── runner.learn(num_learning_iterations)
+```
+
+The wrapper bridges IsaacLab's `ManagerBasedRLEnv` to the TensorDict-based `rsl_rl` VecEnv interface:
+- `get_observations()` → `TensorDict({"policy": obs_tensor})`
+- `step(actions)` → `(TensorDict, rew, dones, extras)` with `extras["time_outs"]`
+
+### Action & Observation Format
+
+**Action space — `so101_joint_pos` (6D continuous):**
+
+| Device | Dims | Layout |
+|---|---|---|
+| `so101_joint_pos` | 6D | `[shoulder_pan, shoulder_lift, elbow_flex, wrist_flex, wrist_roll, gripper]` — raw joint angles (rad) |
+
+> Unlike the state machine, the RL policy outputs joint angles directly with no IK in the loop.
+
+**Observation space — PickOrange RL (28D, flat vector):**
+
+| Term | Dims | Description |
+|---|---|---|
+| `joint_pos` | 6 | Current joint positions (rad) |
+| `joint_vel` | 6 | Current joint velocities (rad/s) |
+| `ee_frame_state` | 7 | EE position (3) + quaternion (4) in robot base frame |
+| `oranges_pos_relative_to_ee` | 9 | 3 oranges × 3D position relative to EE |
+| **Total** | **28** | Concatenated flat vector (`concatenate_terms=True`) |
+
+### Reward Design
+
+| Term | Weight | Description |
+|---|---|---|
+| `ee_to_nearest_orange` | 0.5 | Shaped: `exp(-2·dist)` to nearest ungrasped orange |
+| `orange_grasped` × 3 | 1.0 each | Per-orange: +1 every step while orange is held |
+| `orange_placed` × 3 | 2.0 each | Per-orange: +1 every step while orange is on plate |
+| `task_complete_bonus` | 10.0 | Sparse: +1 when all 3 oranges are on the plate |
+
+### Key Differences from State Machine
+
+| | State Machine | RL |
+|---|---|---|
+| Action space | 8D (EE pose + binary grip) | 6D (raw joint angles) |
+| Control | IK → joint angles | Direct joint position |
+| Gripper | Binary (open/close signal) | Continuous joint angle |
+| Device type | `so101_state_machine` | `so101_joint_pos` |
+| Cameras | Required for recording | Not used (proprioceptive only) |
+
+---
+
 ## Technical Details
 
 ### 1. Gravity Disable (Two Steps Required)
@@ -147,7 +228,7 @@ for _prim in _stage.Traverse():
 
 Both steps must be present. The same pattern applies to bi-arm tasks (`"Robot"` matches both `Left_Robot` and `Right_Robot`).
 
-### 2. Joint Damping
+### 2. Joint Damping (State Machine only)
 
 The IK controller requires higher damping than the robot's default (0.6 N·m·s/rad) for stable, smooth trajectories. Damping is set to **10.0 N·m·s/rad** every step via:
 
@@ -159,6 +240,8 @@ robot.write_joint_damping_to_sim(damping=10.0)
 
 This must also be applied during **replay** to match recording conditions. The state-machine replay script (`scripts/environments/state_machine/replay.py`) calls `apply_damping(env, task_type)` before every `env.step()`.
 
+> RL training does not require this — direct joint position control is inherently stable without extra damping.
+
 ### 3. Episode Numbering
 
 The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on the very first `env.reset()` call (before any steps). This becomes `demo_0`.
@@ -176,10 +259,11 @@ The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on t
 | File | Purpose |
 |---|---|
 | `scripts/environments/state_machine/pick_orange.py` | Recording runner for PickOrange |
-| `scripts/environments/state_machine/fold_cloth.py` | Recording runner for FoldCloth |
 | `scripts/environments/state_machine/replay.py` | State-machine replay (with damping) |
+| `scripts/rl/train_pick_orange.py` | PPO RL training for PickOrange |
 | `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` abstract class |
 | `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
-| `source/leisaac/leisaac/state_machine/fold_cloth.py` | `FoldClothStateMachine` |
+| `source/leisaac/leisaac/rl/rsl_rl_wrapper.py` | IsaacLab → rsl_rl VecEnv wrapper |
+| `source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py` | RL env config (obs, rewards, action) |
 | `replay.sh` | Shell wrapper for replay.py |
 | `run_task.sh` | Shell wrapper for pick_orange recording |
diff --git a/STATE_MACHINE_README_CN.md b/POLICY_README_CN.md
similarity index 63%
rename from STATE_MACHINE_README_CN.md
rename to POLICY_README_CN.md
index eeaf32c1..b12dff61 100644
--- a/STATE_MACHINE_README_CN.md
+++ b/POLICY_README_CN.md
@@ -1,24 +1,34 @@
-# 状态机模块：录制与回放说明
+# 自主策略指南：状态机与 RL 训练
 
 ## 概述
 
-状态机模块提供无需人工遥操作的自动化数据采集能力。它运行脚本化的策略，将演示数据录制为 HDF5 数据集，并支持回放已录制的演示。
+本文档涵盖两种无需人工遥操作的机器人自主运行方式：
+
+| 方式 | 原理 | 主要用途 |
+|---|---|---|
+| **状态机** | 脚本化 IK 策略；动作 = 末端位姿 (7D) + 二值夹爪 | 采集模仿学习数据 |
+| **RL 训练** | 基于 PPO 的学习策略；动作 = 原始关节角 (6D 连续) | 端到端强化学习 |
 
 ```
 scripts/environments/state_machine/
 ├── pick_orange.py      # 录制 Runner 脚本（拾橙任务）
-├── fold_cloth.py       # 录制 Runner 脚本（叠衣任务）
 └── replay.py           # 状态机演示回放脚本
 
+scripts/rl/
+└── train_pick_orange.py    # PPO 训练脚本（拾橙任务）
+
 source/leisaac/leisaac/state_machine/
 ├── base.py             # StateMachineBase 抽象基类
-├── pick_orange.py      # PickOrangeStateMachine
-└── fold_cloth.py       # FoldClothStateMachine
+└── pick_orange.py      # PickOrangeStateMachine
+
+source/leisaac/leisaac/rl/
+├── __init__.py
+└── rsl_rl_wrapper.py   # IsaacLab → rsl_rl VecEnv 桥接包装器
 ```
 
 ---
 
-## 录制
+## 状态机
 
 ### 快速开始
 
@@ -127,6 +137,77 @@ python scripts/environments/state_machine/replay.py \
 
 ---
 
+## RL 训练
+
+### 快速开始
+
+```bash
+# 快速冒烟测试（4 个环境，2 次迭代）
+python scripts/rl/train_pick_orange.py \
+    --num_envs 4 --num_iters 2 --headless
+
+# 完整训练
+python scripts/rl/train_pick_orange.py \
+    --num_envs 64 --num_iters 1000 --headless \
+    --log_dir ./logs/pick_orange_rl
+```
+
+### 工作原理
+
+```
+train_pick_orange.py
+  └── gym.make("LeIsaac-SO101-PickOrange-RL-v0", cfg=env_cfg)
+       └── env_cfg.use_teleop_device("so101_joint_pos")  # 直接关节角，无 IK
+  └── IsaaclabRslRlVecEnvWrapper(env)     # 桥接至 rsl_rl VecEnv 接口
+  └── OnPolicyRunner(wrapped_env, TRAIN_CFG, log_dir, device)
+  └── runner.learn(num_learning_iterations)
+```
+
+包装器将 IsaacLab 的 `ManagerBasedRLEnv` 桥接到基于 TensorDict 的 `rsl_rl` VecEnv 接口：
+- `get_observations()` → `TensorDict({"policy": obs_tensor})`
+- `step(actions)` → `(TensorDict, rew, dones, extras)`，其中 `extras["time_outs"]` 必须存在
+
+### 动作与观测格式
+
+**动作空间 — `so101_joint_pos`（6D 连续）：**
+
+| 设备 | 维度 | 格式 |
+|---|---|---|
+| `so101_joint_pos` | 6D | `[shoulder_pan, shoulder_lift, elbow_flex, wrist_flex, wrist_roll, gripper]`，原始关节角（rad） |
+
+> 与状态机不同，RL policy 直接输出关节角，中间没有 IK 环节。
+
+**观测空间 — PickOrange RL（28D 平坦向量）：**
+
+| 项 | 维度 | 说明 |
+|---|---|---|
+| `joint_pos` | 6 | 当前关节位置（rad） |
+| `joint_vel` | 6 | 当前关节速度（rad/s） |
+| `ee_frame_state` | 7 | 末端位置 (3) + 四元数 (4)，机械臂 base 坐标系 |
+| `oranges_pos_relative_to_ee` | 9 | 3 个橙子 × 3D 位置，相对于末端 |
+| **合计** | **28** | 拼接为平坦向量（`concatenate_terms=True`） |
+
+### 奖励设计
+
+| 项 | 权重 | 说明 |
+|---|---|---|
+| `ee_to_nearest_orange` | 0.5 | 稠密：`exp(-2·dist)` 到最近未放置橙子 |
+| `orange_grasped` × 3 | 各 1.0 | 逐橙子：持握期间每步 +1 |
+| `orange_placed` × 3 | 各 2.0 | 逐橙子：在盘中期间每步 +1 |
+| `task_complete_bonus` | 10.0 | 稀疏：全部 3 个橙子放入盘中时 +1 |
+
+### 与状态机的关键区别
+
+| | 状态机 | RL |
+|---|---|---|
+| 动作空间 | 8D（末端位姿 + 二值夹爪） | 6D（原始关节角） |
+| 控制方式 | IK → 关节角 | 直接关节位置控制 |
+| 夹爪 | 二值信号（开/闭） | 连续关节角 |
+| 设备类型 | `so101_state_machine` | `so101_joint_pos` |
+| 摄像头 | 录制时必需 | 不使用（纯本体感知） |
+
+---
+
 ## 技术细节
 
 ### 1. 重力禁用（两步缺一不可）
@@ -148,7 +229,7 @@ for _prim in _stage.Traverse():
 
 两步必须同时存在。双臂任务同理（`"Robot"` 可同时匹配 `Left_Robot` 和 `Right_Robot`）。
 
-### 2. 关节阻尼
+### 2. 关节阻尼（仅状态机需要）
 
 IK 控制器需要比机械臂默认阻尼（0.6 N·m·s/rad）更高的阻尼值，才能获得稳定、平滑的轨迹。阻尼设置为 **10.0 N·m·s/rad**，在每步调用：
 
@@ -160,6 +241,8 @@ robot.write_joint_damping_to_sim(damping=10.0)
 
 **回放时也必须应用相同阻尼**，以匹配录制条件。状态机回放脚本（`scripts/environments/state_machine/replay.py`）在每次 `env.step()` 前调用 `apply_damping(env, task_type)`。
 
+> RL 训练无需此设置——直接关节位置控制本身不依赖高阻尼。
+
 ### 3. Episode 编号规则
 
 IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任何步骤）会保存一个仅含初始状态的 episode（`num_samples=0`），即 `demo_0`。
@@ -177,10 +260,11 @@ IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任
 | 文件 | 用途 |
 |---|---|
 | `scripts/environments/state_machine/pick_orange.py` | 拾橙任务录制 Runner |
-| `scripts/environments/state_machine/fold_cloth.py` | 叠衣任务录制 Runner |
 | `scripts/environments/state_machine/replay.py` | 状态机专用回放脚本（含阻尼设置） |
+| `scripts/rl/train_pick_orange.py` | 拾橙任务 PPO RL 训练 |
 | `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` 抽象基类 |
 | `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
-| `source/leisaac/leisaac/state_machine/fold_cloth.py` | `FoldClothStateMachine` |
+| `source/leisaac/leisaac/rl/rsl_rl_wrapper.py` | IsaacLab → rsl_rl VecEnv 包装器 |
+| `source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py` | RL 环境配置（观测、奖励、动作） |
 | `replay.sh` | replay.py 的 Shell 封装脚本 |
 | `run_task.sh` | 拾橙录制的 Shell 封装脚本 |

From 8d1c8100acc2029abb9039b8be3d501fccd97d03 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Mon, 2 Mar 2026 13:00:07 -0500
Subject: [PATCH 44/68] Change bash.

---
 POLICY_README.md                     |  8 ++++----
 POLICY_README_CN.md                  |  8 ++++----
 run_task.sh => record_pick_orange.sh |  0
 replay.sh => replay_pick_orange.sh   |  0
 run_fold_cloth.sh                    | 12 ------------
 5 files changed, 8 insertions(+), 20 deletions(-)
 rename run_task.sh => record_pick_orange.sh (100%)
 rename replay.sh => replay_pick_orange.sh (100%)
 delete mode 100755 run_fold_cloth.sh

diff --git a/POLICY_README.md b/POLICY_README.md
index 86682061..d29d3d2b 100644
--- a/POLICY_README.md
+++ b/POLICY_README.md
@@ -111,8 +111,8 @@ When replaying, use `--select_episodes K` to load `demo_K`:
 ### Quick Start
 
 ```bash
-# replay.sh wrapper (default: demo_3 of dataset_test.hdf5)
-bash replay.sh
+# replay_pick_orange.sh wrapper (default: demo_3 of dataset_test.hdf5)
+bash replay_pick_orange.sh
 
 # explicit call
 python scripts/environments/state_machine/replay.py \
@@ -265,5 +265,5 @@ The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on t
 | `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
 | `source/leisaac/leisaac/rl/rsl_rl_wrapper.py` | IsaacLab → rsl_rl VecEnv wrapper |
 | `source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py` | RL env config (obs, rewards, action) |
-| `replay.sh` | Shell wrapper for replay.py |
-| `run_task.sh` | Shell wrapper for pick_orange recording |
+| `replay_pick_orange.sh` | Shell wrapper for replay.py |
+| `record_pick_orange.sh` | Shell wrapper for pick_orange recording |
diff --git a/POLICY_README_CN.md b/POLICY_README_CN.md
index b12dff61..c049e2f4 100644
--- a/POLICY_README_CN.md
+++ b/POLICY_README_CN.md
@@ -112,8 +112,8 @@ data/
 ### 快速开始
 
 ```bash
-# replay.sh 封装脚本（默认：dataset_test.hdf5 的 demo_3）
-bash replay.sh
+# replay_pick_orange.sh 封装脚本（默认：dataset_test.hdf5 的 demo_3）
+bash replay_pick_orange.sh
 
 # 显式调用
 python scripts/environments/state_machine/replay.py \
@@ -266,5 +266,5 @@ IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任
 | `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
 | `source/leisaac/leisaac/rl/rsl_rl_wrapper.py` | IsaacLab → rsl_rl VecEnv 包装器 |
 | `source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py` | RL 环境配置（观测、奖励、动作） |
-| `replay.sh` | replay.py 的 Shell 封装脚本 |
-| `run_task.sh` | 拾橙录制的 Shell 封装脚本 |
+| `replay_pick_orange.sh` | replay.py 的 Shell 封装脚本 |
+| `record_pick_orange.sh` | 拾橙录制的 Shell 封装脚本 |
diff --git a/run_task.sh b/record_pick_orange.sh
similarity index 100%
rename from run_task.sh
rename to record_pick_orange.sh
diff --git a/replay.sh b/replay_pick_orange.sh
similarity index 100%
rename from replay.sh
rename to replay_pick_orange.sh
diff --git a/run_fold_cloth.sh b/run_fold_cloth.sh
deleted file mode 100755
index 14f28c60..00000000
--- a/run_fold_cloth.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-TASK_NAME=${1:-LeIsaac-SO101-FoldCloth-BiArm-v0}
-DATASET=${2:-./datasets/dataset_cloth.hdf5}
-
-python scripts/environments/state_machine/fold_cloth.py \
-    --dataset_file=${DATASET} \
-    --task=${TASK_NAME} \
-    --num_envs=1 \
-    --device=cuda \
-    --enable_cameras \
-    --record \
-    --num_demos=1

From f34cb6eabb08739e111acb58b637f08f30c7b7c2 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 3 Mar 2026 03:31:21 -0500
Subject: [PATCH 45/68] Delete RL part.

---
 POLICY_README.md => STATE_MACHINE_README.md   |  96 +---------
 ...README_CN.md => STATE_MACHINE_README_CN.md |  96 +---------
 scripts/rl/train_pick_orange.py               | 107 -----------
 .../leisaac/leisaac/devices/action_process.py |   2 +-
 source/leisaac/leisaac/rl/__init__.py         |   5 -
 source/leisaac/leisaac/rl/rsl_rl_wrapper.py   | 108 -----------
 .../leisaac/tasks/pick_orange/__init__.py     |   9 -
 .../leisaac/tasks/pick_orange/mdp/__init__.py |   1 -
 .../tasks/pick_orange/mdp/observations.py     |  27 ---
 .../leisaac/tasks/pick_orange/mdp/rewards.py  |  75 --------
 .../pick_orange/pick_orange_rl_env_cfg.py     | 173 ------------------
 11 files changed, 9 insertions(+), 690 deletions(-)
 rename POLICY_README.md => STATE_MACHINE_README.md (63%)
 rename POLICY_README_CN.md => STATE_MACHINE_README_CN.md (64%)
 delete mode 100644 scripts/rl/train_pick_orange.py
 delete mode 100644 source/leisaac/leisaac/rl/__init__.py
 delete mode 100644 source/leisaac/leisaac/rl/rsl_rl_wrapper.py
 delete mode 100644 source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py
 delete mode 100644 source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py

diff --git a/POLICY_README.md b/STATE_MACHINE_README.md
similarity index 63%
rename from POLICY_README.md
rename to STATE_MACHINE_README.md
index d29d3d2b..8b3d0f46 100644
--- a/POLICY_README.md
+++ b/STATE_MACHINE_README.md
@@ -1,34 +1,22 @@
-# Autonomous Policy Guide: State Machine & RL Training
+# State Machine: Recording & Replay Guide
 
 ## Overview
 
-This document covers two approaches to running the robot without human teleoperation:
-
-| Approach | How it works | Primary use |
-|---|---|---|
-| **State Machine** | Scripted IK-based policy; action = EE pose (7D) + binary gripper | Data collection for imitation learning |
-| **RL Training** | Learned policy via PPO; action = raw joint angles (6D continuous) | End-to-end reinforcement learning |
+The state machine module provides automated data collection for manipulation tasks without human teleoperation. It runs a scripted policy, records demonstrations to HDF5 datasets, and supports replaying those demonstrations.
 
 ```
 scripts/environments/state_machine/
 ├── pick_orange.py      # Runner script: recording (PickOrange task)
 └── replay.py           # Replay script for state-machine demonstrations
 
-scripts/rl/
-└── train_pick_orange.py    # PPO training script (PickOrange task)
-
 source/leisaac/leisaac/state_machine/
 ├── base.py             # StateMachineBase abstract class
 └── pick_orange.py      # PickOrangeStateMachine
-
-source/leisaac/leisaac/rl/
-├── __init__.py
-└── rsl_rl_wrapper.py   # IsaacLab → rsl_rl VecEnv bridge wrapper
 ```
 
 ---
 
-## State Machine
+## Recording
 
 ### Quick Start
 
@@ -136,77 +124,6 @@ For `so101_state_machine`, **only `action` mode is valid**. The IK action manage
 
 ---
 
-## RL Training
-
-### Quick Start
-
-```bash
-# Short smoke-test (4 envs, 2 iterations)
-python scripts/rl/train_pick_orange.py \
-    --num_envs 4 --num_iters 2 --headless
-
-# Full training run
-python scripts/rl/train_pick_orange.py \
-    --num_envs 64 --num_iters 1000 --headless \
-    --log_dir ./logs/pick_orange_rl
-```
-
-### How It Works
-
-```
-train_pick_orange.py
-  └── gym.make("LeIsaac-SO101-PickOrange-RL-v0", cfg=env_cfg)
-       └── env_cfg.use_teleop_device("so101_joint_pos")  # direct joint pos, no IK
-  └── IsaaclabRslRlVecEnvWrapper(env)     # bridge to rsl_rl VecEnv API
-  └── OnPolicyRunner(wrapped_env, TRAIN_CFG, log_dir, device)
-  └── runner.learn(num_learning_iterations)
-```
-
-The wrapper bridges IsaacLab's `ManagerBasedRLEnv` to the TensorDict-based `rsl_rl` VecEnv interface:
-- `get_observations()` → `TensorDict({"policy": obs_tensor})`
-- `step(actions)` → `(TensorDict, rew, dones, extras)` with `extras["time_outs"]`
-
-### Action & Observation Format
-
-**Action space — `so101_joint_pos` (6D continuous):**
-
-| Device | Dims | Layout |
-|---|---|---|
-| `so101_joint_pos` | 6D | `[shoulder_pan, shoulder_lift, elbow_flex, wrist_flex, wrist_roll, gripper]` — raw joint angles (rad) |
-
-> Unlike the state machine, the RL policy outputs joint angles directly with no IK in the loop.
-
-**Observation space — PickOrange RL (28D, flat vector):**
-
-| Term | Dims | Description |
-|---|---|---|
-| `joint_pos` | 6 | Current joint positions (rad) |
-| `joint_vel` | 6 | Current joint velocities (rad/s) |
-| `ee_frame_state` | 7 | EE position (3) + quaternion (4) in robot base frame |
-| `oranges_pos_relative_to_ee` | 9 | 3 oranges × 3D position relative to EE |
-| **Total** | **28** | Concatenated flat vector (`concatenate_terms=True`) |
-
-### Reward Design
-
-| Term | Weight | Description |
-|---|---|---|
-| `ee_to_nearest_orange` | 0.5 | Shaped: `exp(-2·dist)` to nearest ungrasped orange |
-| `orange_grasped` × 3 | 1.0 each | Per-orange: +1 every step while orange is held |
-| `orange_placed` × 3 | 2.0 each | Per-orange: +1 every step while orange is on plate |
-| `task_complete_bonus` | 10.0 | Sparse: +1 when all 3 oranges are on the plate |
-
-### Key Differences from State Machine
-
-| | State Machine | RL |
-|---|---|---|
-| Action space | 8D (EE pose + binary grip) | 6D (raw joint angles) |
-| Control | IK → joint angles | Direct joint position |
-| Gripper | Binary (open/close signal) | Continuous joint angle |
-| Device type | `so101_state_machine` | `so101_joint_pos` |
-| Cameras | Required for recording | Not used (proprioceptive only) |
-
----
-
 ## Technical Details
 
 ### 1. Gravity Disable (Two Steps Required)
@@ -228,7 +145,7 @@ for _prim in _stage.Traverse():
 
 Both steps must be present. The same pattern applies to bi-arm tasks (`"Robot"` matches both `Left_Robot` and `Right_Robot`).
 
-### 2. Joint Damping (State Machine only)
+### 2. Joint Damping
 
 The IK controller requires higher damping than the robot's default (0.6 N·m·s/rad) for stable, smooth trajectories. Damping is set to **10.0 N·m·s/rad** every step via:
 
@@ -240,8 +157,6 @@ robot.write_joint_damping_to_sim(damping=10.0)
 
 This must also be applied during **replay** to match recording conditions. The state-machine replay script (`scripts/environments/state_machine/replay.py`) calls `apply_damping(env, task_type)` before every `env.step()`.
 
-> RL training does not require this — direct joint position control is inherently stable without extra damping.
-
 ### 3. Episode Numbering
 
 The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on the very first `env.reset()` call (before any steps). This becomes `demo_0`.
@@ -260,10 +175,7 @@ The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on t
 |---|---|
 | `scripts/environments/state_machine/pick_orange.py` | Recording runner for PickOrange |
 | `scripts/environments/state_machine/replay.py` | State-machine replay (with damping) |
-| `scripts/rl/train_pick_orange.py` | PPO RL training for PickOrange |
 | `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` abstract class |
 | `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
-| `source/leisaac/leisaac/rl/rsl_rl_wrapper.py` | IsaacLab → rsl_rl VecEnv wrapper |
-| `source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py` | RL env config (obs, rewards, action) |
 | `replay_pick_orange.sh` | Shell wrapper for replay.py |
 | `record_pick_orange.sh` | Shell wrapper for pick_orange recording |
diff --git a/POLICY_README_CN.md b/STATE_MACHINE_README_CN.md
similarity index 64%
rename from POLICY_README_CN.md
rename to STATE_MACHINE_README_CN.md
index c049e2f4..b3ff0505 100644
--- a/POLICY_README_CN.md
+++ b/STATE_MACHINE_README_CN.md
@@ -1,34 +1,22 @@
-# 自主策略指南：状态机与 RL 训练
+# 状态机模块：录制与回放说明
 
 ## 概述
 
-本文档涵盖两种无需人工遥操作的机器人自主运行方式：
-
-| 方式 | 原理 | 主要用途 |
-|---|---|---|
-| **状态机** | 脚本化 IK 策略；动作 = 末端位姿 (7D) + 二值夹爪 | 采集模仿学习数据 |
-| **RL 训练** | 基于 PPO 的学习策略；动作 = 原始关节角 (6D 连续) | 端到端强化学习 |
+状态机模块提供无需人工遥操作的自动化数据采集能力。它运行脚本化的策略，将演示数据录制为 HDF5 数据集，并支持回放已录制的演示。
 
 ```
 scripts/environments/state_machine/
 ├── pick_orange.py      # 录制 Runner 脚本（拾橙任务）
 └── replay.py           # 状态机演示回放脚本
 
-scripts/rl/
-└── train_pick_orange.py    # PPO 训练脚本（拾橙任务）
-
 source/leisaac/leisaac/state_machine/
 ├── base.py             # StateMachineBase 抽象基类
 └── pick_orange.py      # PickOrangeStateMachine
-
-source/leisaac/leisaac/rl/
-├── __init__.py
-└── rsl_rl_wrapper.py   # IsaacLab → rsl_rl VecEnv 桥接包装器
 ```
 
 ---
 
-## 状态机
+## 录制
 
 ### 快速开始
 
@@ -137,77 +125,6 @@ python scripts/environments/state_machine/replay.py \
 
 ---
 
-## RL 训练
-
-### 快速开始
-
-```bash
-# 快速冒烟测试（4 个环境，2 次迭代）
-python scripts/rl/train_pick_orange.py \
-    --num_envs 4 --num_iters 2 --headless
-
-# 完整训练
-python scripts/rl/train_pick_orange.py \
-    --num_envs 64 --num_iters 1000 --headless \
-    --log_dir ./logs/pick_orange_rl
-```
-
-### 工作原理
-
-```
-train_pick_orange.py
-  └── gym.make("LeIsaac-SO101-PickOrange-RL-v0", cfg=env_cfg)
-       └── env_cfg.use_teleop_device("so101_joint_pos")  # 直接关节角，无 IK
-  └── IsaaclabRslRlVecEnvWrapper(env)     # 桥接至 rsl_rl VecEnv 接口
-  └── OnPolicyRunner(wrapped_env, TRAIN_CFG, log_dir, device)
-  └── runner.learn(num_learning_iterations)
-```
-
-包装器将 IsaacLab 的 `ManagerBasedRLEnv` 桥接到基于 TensorDict 的 `rsl_rl` VecEnv 接口：
-- `get_observations()` → `TensorDict({"policy": obs_tensor})`
-- `step(actions)` → `(TensorDict, rew, dones, extras)`，其中 `extras["time_outs"]` 必须存在
-
-### 动作与观测格式
-
-**动作空间 — `so101_joint_pos`（6D 连续）：**
-
-| 设备 | 维度 | 格式 |
-|---|---|---|
-| `so101_joint_pos` | 6D | `[shoulder_pan, shoulder_lift, elbow_flex, wrist_flex, wrist_roll, gripper]`，原始关节角（rad） |
-
-> 与状态机不同，RL policy 直接输出关节角，中间没有 IK 环节。
-
-**观测空间 — PickOrange RL（28D 平坦向量）：**
-
-| 项 | 维度 | 说明 |
-|---|---|---|
-| `joint_pos` | 6 | 当前关节位置（rad） |
-| `joint_vel` | 6 | 当前关节速度（rad/s） |
-| `ee_frame_state` | 7 | 末端位置 (3) + 四元数 (4)，机械臂 base 坐标系 |
-| `oranges_pos_relative_to_ee` | 9 | 3 个橙子 × 3D 位置，相对于末端 |
-| **合计** | **28** | 拼接为平坦向量（`concatenate_terms=True`） |
-
-### 奖励设计
-
-| 项 | 权重 | 说明 |
-|---|---|---|
-| `ee_to_nearest_orange` | 0.5 | 稠密：`exp(-2·dist)` 到最近未放置橙子 |
-| `orange_grasped` × 3 | 各 1.0 | 逐橙子：持握期间每步 +1 |
-| `orange_placed` × 3 | 各 2.0 | 逐橙子：在盘中期间每步 +1 |
-| `task_complete_bonus` | 10.0 | 稀疏：全部 3 个橙子放入盘中时 +1 |
-
-### 与状态机的关键区别
-
-| | 状态机 | RL |
-|---|---|---|
-| 动作空间 | 8D（末端位姿 + 二值夹爪） | 6D（原始关节角） |
-| 控制方式 | IK → 关节角 | 直接关节位置控制 |
-| 夹爪 | 二值信号（开/闭） | 连续关节角 |
-| 设备类型 | `so101_state_machine` | `so101_joint_pos` |
-| 摄像头 | 录制时必需 | 不使用（纯本体感知） |
-
----
-
 ## 技术细节
 
 ### 1. 重力禁用（两步缺一不可）
@@ -229,7 +146,7 @@ for _prim in _stage.Traverse():
 
 两步必须同时存在。双臂任务同理（`"Robot"` 可同时匹配 `Left_Robot` 和 `Right_Robot`）。
 
-### 2. 关节阻尼（仅状态机需要）
+### 2. 关节阻尼
 
 IK 控制器需要比机械臂默认阻尼（0.6 N·m·s/rad）更高的阻尼值，才能获得稳定、平滑的轨迹。阻尼设置为 **10.0 N·m·s/rad**，在每步调用：
 
@@ -241,8 +158,6 @@ robot.write_joint_damping_to_sim(damping=10.0)
 
 **回放时也必须应用相同阻尼**，以匹配录制条件。状态机回放脚本（`scripts/environments/state_machine/replay.py`）在每次 `env.step()` 前调用 `apply_damping(env, task_type)`。
 
-> RL 训练无需此设置——直接关节位置控制本身不依赖高阻尼。
-
 ### 3. Episode 编号规则
 
 IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任何步骤）会保存一个仅含初始状态的 episode（`num_samples=0`），即 `demo_0`。
@@ -261,10 +176,7 @@ IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任
 |---|---|
 | `scripts/environments/state_machine/pick_orange.py` | 拾橙任务录制 Runner |
 | `scripts/environments/state_machine/replay.py` | 状态机专用回放脚本（含阻尼设置） |
-| `scripts/rl/train_pick_orange.py` | 拾橙任务 PPO RL 训练 |
 | `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` 抽象基类 |
 | `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
-| `source/leisaac/leisaac/rl/rsl_rl_wrapper.py` | IsaacLab → rsl_rl VecEnv 包装器 |
-| `source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py` | RL 环境配置（观测、奖励、动作） |
 | `replay_pick_orange.sh` | replay.py 的 Shell 封装脚本 |
 | `record_pick_orange.sh` | 拾橙录制的 Shell 封装脚本 |
diff --git a/scripts/rl/train_pick_orange.py b/scripts/rl/train_pick_orange.py
deleted file mode 100644
index 2a158184..00000000
--- a/scripts/rl/train_pick_orange.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""PPO training script for LeIsaac-SO101-PickOrange-RL-v0 using rsl_rl.
-
-Usage:
-    python scripts/rl/train_pick_orange.py --num_envs 64 --num_iters 1000 \\
-        --headless --log_dir ./logs/pick_orange_rl
-
-Launch Isaac Sim before running (or pass --headless).
-"""
-
-import argparse
-import os
-import time
-
-from isaaclab.app import AppLauncher
-
-parser = argparse.ArgumentParser(description="Train pick-orange with rsl_rl PPO.")
-parser.add_argument("--num_envs", type=int, default=64)
-parser.add_argument("--num_iters", type=int, default=1000)
-parser.add_argument("--log_dir", type=str, default="./logs/pick_orange_rl")
-parser.add_argument("--seed", type=int, default=None)
-AppLauncher.add_app_launcher_args(parser)
-args_cli = parser.parse_args()
-
-app_launcher = AppLauncher(vars(args_cli))
-simulation_app = app_launcher.app
-
-import gymnasium as gym
-import torch
-
-from isaaclab_tasks.utils import parse_env_cfg
-from rsl_rl.runners import OnPolicyRunner
-
-import leisaac.tasks  # noqa: F401 — registers all leisaac gym envs
-from leisaac.rl import IsaaclabRslRlVecEnvWrapper
-
-# ------------------------------------------------------------------
-# PPO training configuration
-# ------------------------------------------------------------------
-TRAIN_CFG = {
-    "num_steps_per_env": 24,
-    "save_interval": 50,
-    "obs_groups": {
-        "policy": ["policy"],
-        "critic": ["policy"],
-    },
-    "empirical_normalization": True,
-    "policy": {
-        "class_name": "ActorCritic",
-        "hidden_dims": [256, 128, 64],
-        "activation": "elu",
-        "init_noise_std": 1.0,
-    },
-    "algorithm": {
-        "class_name": "PPO",
-        "value_loss_coef": 1.0,
-        "use_clipping": True,
-        "clip_param": 0.2,
-        "entropy_coef": 0.005,
-        "num_learning_epochs": 5,
-        "num_mini_batches": 4,
-        "learning_rate": 3e-4,
-        "schedule": "adaptive",
-        "gamma": 0.99,
-        "lam": 0.95,
-        "desired_kl": 0.01,
-        "max_grad_norm": 1.0,
-    },
-}
-
-
-def main():
-    os.makedirs(args_cli.log_dir, exist_ok=True)
-
-    env_cfg = parse_env_cfg(
-        "LeIsaac-SO101-PickOrange-RL-v0",
-        device=args_cli.device,
-        num_envs=args_cli.num_envs,
-    )
-    if args_cli.seed is not None:
-        env_cfg.seed = args_cli.seed
-    else:
-        env_cfg.seed = int(time.time())
-
-    env = gym.make("LeIsaac-SO101-PickOrange-RL-v0", cfg=env_cfg).unwrapped
-
-    wrapped_env = IsaaclabRslRlVecEnvWrapper(env)
-
-    print(f"[INFO] num_envs={wrapped_env.num_envs}  "
-          f"num_actions={wrapped_env.num_actions}  "
-          f"max_episode_length={wrapped_env.max_episode_length}")
-    obs = wrapped_env.get_observations()
-    print(f"[INFO] obs['policy'] shape: {obs['policy'].shape}")
-
-    runner = OnPolicyRunner(
-        wrapped_env,
-        TRAIN_CFG,
-        log_dir=args_cli.log_dir,
-        device=args_cli.device,
-    )
-    runner.learn(num_learning_iterations=args_cli.num_iters, init_at_random_ep_len=True)
-
-    env.close()
-    simulation_app.close()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 729199ac..0c038d8a 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -8,7 +8,7 @@
 
 def init_action_cfg(action_cfg, device):
     """SO101 Follower action configuration: arm_action and gripper_action"""
-    if device in ["so101_joint_pos", "so101leader", "lekiwi-leader"]:
+    if device in ["so101leader", "lekiwi-leader"]:
         action_cfg.arm_action = mdp.JointPositionActionCfg(
             asset_name="robot",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
diff --git a/source/leisaac/leisaac/rl/__init__.py b/source/leisaac/leisaac/rl/__init__.py
deleted file mode 100644
index 710f1dcd..00000000
--- a/source/leisaac/leisaac/rl/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""RL training utilities for leisaac environments."""
-
-from .rsl_rl_wrapper import IsaaclabRslRlVecEnvWrapper
-
-__all__ = ["IsaaclabRslRlVecEnvWrapper"]
diff --git a/source/leisaac/leisaac/rl/rsl_rl_wrapper.py b/source/leisaac/leisaac/rl/rsl_rl_wrapper.py
deleted file mode 100644
index 96986b5e..00000000
--- a/source/leisaac/leisaac/rl/rsl_rl_wrapper.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""Wrapper that bridges an IsaacLab ManagerBasedRLEnv to the rsl_rl VecEnv interface."""
-
-from __future__ import annotations
-
-import torch
-from tensordict import TensorDict
-
-from isaaclab.envs import ManagerBasedRLEnv
-
-from rsl_rl.env import VecEnv
-
-
-class IsaaclabRslRlVecEnvWrapper(VecEnv):
-    """Wraps a :class:`ManagerBasedRLEnv` so it satisfies rsl_rl's :class:`VecEnv` ABC.
-
-    The RL env cfg must configure its policy observation group with
-    ``concatenate_terms = True`` so that ``obs_dict["policy"]`` is already a
-    ``(num_envs, obs_dim)`` tensor rather than a dict of named terms.
-
-    Args:
-        env: An already-constructed :class:`ManagerBasedRLEnv` instance.
-    """
-
-    def __init__(self, env: ManagerBasedRLEnv) -> None:
-        self.env = env
-
-    # ------------------------------------------------------------------
-    # VecEnv required attributes
-    # ------------------------------------------------------------------
-
-    @property
-    def num_envs(self) -> int:
-        return self.env.num_envs
-
-    @property
-    def num_actions(self) -> int:
-        return self.env.action_manager.total_action_dim
-
-    @property
-    def max_episode_length(self) -> int:
-        return int(self.env.max_episode_length)
-
-    @property
-    def episode_length_buf(self) -> torch.Tensor:
-        return self.env.episode_length_buf
-
-    @episode_length_buf.setter
-    def episode_length_buf(self, value: torch.Tensor) -> None:
-        self.env.episode_length_buf = value
-
-    @property
-    def device(self) -> torch.device | str:
-        return self.env.device
-
-    @property
-    def cfg(self):
-        return self.env.cfg
-
-    # ------------------------------------------------------------------
-    # VecEnv interface
-    # ------------------------------------------------------------------
-
-    def get_observations(self) -> TensorDict:
-        """Return current observations as a TensorDict with key ``"policy"``."""
-        obs_dict = self.env.observation_manager.compute()
-        return self._to_tensordict(obs_dict)
-
-    def step(
-        self, actions: torch.Tensor
-    ) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]:
-        """Step the environment.
-
-        Returns:
-            obs: TensorDict with ``"policy"`` key.
-            rewards: Shape ``(num_envs,)``.
-            dones: Float tensor ``(num_envs,)``, 1 on episode end.
-            extras: Dict with at minimum ``"time_outs"`` key (bool tensor).
-        """
-        obs_dict, rew, terminated, timeouts, extras = self.env.step(actions)
-        obs = self._to_tensordict(obs_dict)
-        dones = (terminated | timeouts).float()
-        extras["time_outs"] = timeouts
-        return obs, rew, dones, extras
-
-    def reset(self) -> TensorDict:
-        obs_dict, _ = self.env.reset()
-        return self._to_tensordict(obs_dict)
-
-    # ------------------------------------------------------------------
-    # Helper
-    # ------------------------------------------------------------------
-
-    def _to_tensordict(self, obs_dict: dict) -> TensorDict:
-        """Convert the IsaacLab obs dict to a TensorDict.
-
-        When ``concatenate_terms=True`` (required for RL), ``obs_dict["policy"]``
-        is already a ``(num_envs, obs_dim)`` tensor.  We wrap it in a TensorDict
-        so rsl_rl can access it by group name.
-        """
-        policy_obs = obs_dict["policy"]
-        if isinstance(policy_obs, dict):
-            # Fallback: concatenate manually if cfg has concatenate_terms=False
-            tensors = []
-            for v in policy_obs.values():
-                t = v.float() if v.dtype == torch.bool else v
-                tensors.append(t.reshape(self.num_envs, -1))
-            policy_obs = torch.cat(tensors, dim=-1)
-        return TensorDict({"policy": policy_obs}, batch_size=[self.num_envs])
diff --git a/source/leisaac/leisaac/tasks/pick_orange/__init__.py b/source/leisaac/leisaac/tasks/pick_orange/__init__.py
index 06c6adb1..7b00fe38 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/__init__.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/__init__.py
@@ -26,12 +26,3 @@
         "env_cfg_entry_point": f"{__name__}.direct.pick_orange_env:PickOrangeEnvCfg",
     },
 )
-
-gym.register(
-    id="LeIsaac-SO101-PickOrange-RL-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv",
-    disable_env_checker=True,
-    kwargs={
-        "env_cfg_entry_point": f"{__name__}.pick_orange_rl_env_cfg:PickOrangeRLEnvCfg",
-    },
-)
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py
index 1d602cf2..38ebdb6c 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/mdp/__init__.py
@@ -2,5 +2,4 @@
 from leisaac.enhance.envs.mdp import *
 
 from .observations import *
-from .rewards import *
 from .terminations import *
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py
index a49519ec..8123d9b3 100644
--- a/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py
+++ b/source/leisaac/leisaac/tasks/pick_orange/mdp/observations.py
@@ -61,30 +61,3 @@ def put_orange_to_plate(
     placed = torch.logical_and(placed, gripper_open)
 
     return placed
-
-
-def oranges_pos_relative_to_ee(
-    env: ManagerBasedRLEnv | DirectRLEnv,
-    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
-    oranges_cfg: list[SceneEntityCfg] | None = None,
-) -> torch.Tensor:
-    """Return positions of all oranges relative to the end-effector, flattened.
-
-    Returns:
-        Tensor of shape ``(num_envs, 3 * num_oranges)`` in world frame offsets.
-    """
-    if oranges_cfg is None:
-        oranges_cfg = [
-            SceneEntityCfg("Orange001"),
-            SceneEntityCfg("Orange002"),
-            SceneEntityCfg("Orange003"),
-        ]
-    ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
-    ee_pos = ee_frame.data.target_pos_w[:, 1, :]  # (num_envs, 3)
-
-    rel_positions = []
-    for orange_cfg in oranges_cfg:
-        orange: RigidObject = env.scene[orange_cfg.name]
-        rel_positions.append(orange.data.root_pos_w - ee_pos)  # (num_envs, 3)
-
-    return torch.cat(rel_positions, dim=-1)  # (num_envs, 9)
diff --git a/source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py b/source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py
deleted file mode 100644
index ec0f4314..00000000
--- a/source/leisaac/leisaac/tasks/pick_orange/mdp/rewards.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""Reward functions for the pick-orange RL task."""
-
-import torch
-from isaaclab.assets import Articulation, RigidObject
-from isaaclab.envs import ManagerBasedRLEnv
-from isaaclab.managers import SceneEntityCfg
-from isaaclab.sensors import FrameTransformer
-
-from .observations import orange_grasped, put_orange_to_plate
-from .terminations import task_done
-
-
-def ee_to_nearest_orange_reward(
-    env: ManagerBasedRLEnv,
-    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
-    oranges_cfg: list[SceneEntityCfg] | None = None,
-) -> torch.Tensor:
-    """Shaped reward: exp(-2 * dist) to the nearest orange not yet placed on the plate.
-
-    Encourages the arm to approach any orange. Falls back to all oranges if none
-    are unplaced (e.g. all on plate already).
-    """
-    if oranges_cfg is None:
-        oranges_cfg = [
-            SceneEntityCfg("Orange001"),
-            SceneEntityCfg("Orange002"),
-            SceneEntityCfg("Orange003"),
-        ]
-
-    ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
-    ee_pos = ee_frame.data.target_pos_w[:, 1, :]  # (num_envs, 3)
-
-    min_dist = torch.full((env.num_envs,), fill_value=1e6, device=env.device)
-    for orange_cfg in oranges_cfg:
-        orange: RigidObject = env.scene[orange_cfg.name]
-        dist = torch.linalg.vector_norm(orange.data.root_pos_w - ee_pos, dim=-1)
-        min_dist = torch.minimum(min_dist, dist)
-
-    return torch.exp(-2.0 * min_dist)
-
-
-def orange_grasped_reward(
-    env: ManagerBasedRLEnv,
-    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
-    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
-    object_cfg: SceneEntityCfg = SceneEntityCfg("Orange001"),
-) -> torch.Tensor:
-    """Binary reward: +1.0 each step an orange is held in the gripper."""
-    return orange_grasped(env, robot_cfg, ee_frame_cfg, object_cfg).float()
-
-
-def orange_placed_reward(
-    env: ManagerBasedRLEnv,
-    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
-    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
-    object_cfg: SceneEntityCfg = SceneEntityCfg("Orange001"),
-    plate_cfg: SceneEntityCfg = SceneEntityCfg("Plate"),
-) -> torch.Tensor:
-    """Binary reward: +1.0 each step an orange is resting on the plate."""
-    return put_orange_to_plate(env, robot_cfg, ee_frame_cfg, object_cfg, plate_cfg).float()
-
-
-def task_complete_bonus(
-    env: ManagerBasedRLEnv,
-    oranges_cfg: list[SceneEntityCfg] | None = None,
-    plate_cfg: SceneEntityCfg = SceneEntityCfg("Plate"),
-) -> torch.Tensor:
-    """Sparse bonus: +1.0 when all 3 oranges are on the plate and arm at rest."""
-    if oranges_cfg is None:
-        oranges_cfg = [
-            SceneEntityCfg("Orange001"),
-            SceneEntityCfg("Orange002"),
-            SceneEntityCfg("Orange003"),
-        ]
-    return task_done(env, oranges_cfg=oranges_cfg, plate_cfg=plate_cfg).float()
diff --git a/source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py b/source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py
deleted file mode 100644
index c6332727..00000000
--- a/source/leisaac/leisaac/tasks/pick_orange/pick_orange_rl_env_cfg.py
+++ /dev/null
@@ -1,173 +0,0 @@
-"""RL environment configuration for the pick-orange task.
-
-Uses joint-position control (no IK) with proprioceptive-only observations
-so the policy can be trained with rsl_rl PPO.
-
-Action space (6D):  [shoulder_pan, shoulder_lift, elbow_flex, wrist_flex, wrist_roll, gripper]
-Observation space (28D):
-    joint_pos          (6)
-    joint_vel          (6)
-    ee_frame_state     (7)  — EE position (3) + quaternion (4) in robot base frame
-    oranges_pos_rel    (9)  — 3 oranges × 3D position relative to EE
-"""
-
-from isaaclab.managers import ObservationGroupCfg as ObsGroup
-from isaaclab.managers import ObservationTermCfg as ObsTerm
-from isaaclab.managers import RewardTermCfg as RewTerm
-from isaaclab.managers import SceneEntityCfg
-from isaaclab.utils import configclass
-
-from .pick_orange_env_cfg import PickOrangeEnvCfg
-from . import mdp
-
-
-@configclass
-class PickOrangeRLObsCfg:
-    """Proprioceptive-only observation config for RL (28D, concatenated)."""
-
-    @configclass
-    class PolicyCfg(ObsGroup):
-        joint_pos = ObsTerm(func=mdp.joint_pos)
-        joint_vel = ObsTerm(func=mdp.joint_vel)
-        ee_frame_state = ObsTerm(
-            func=mdp.ee_frame_state,
-            params={
-                "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-                "robot_cfg": SceneEntityCfg("robot"),
-            },
-        )
-        oranges_pos_relative = ObsTerm(
-            func=mdp.oranges_pos_relative_to_ee,
-            params={
-                "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-                "oranges_cfg": [
-                    SceneEntityCfg("Orange001"),
-                    SceneEntityCfg("Orange002"),
-                    SceneEntityCfg("Orange003"),
-                ],
-            },
-        )
-
-        def __post_init__(self):
-            self.enable_corruption = False
-            self.concatenate_terms = True  # rsl_rl needs a single flat vector
-
-    policy: PolicyCfg = PolicyCfg()
-
-
-@configclass
-class PickOrangeRLRewardsCfg:
-    """Reward terms for RL training."""
-
-    # Shaped: guide EE toward the nearest orange
-    ee_guidance = RewTerm(
-        func=mdp.ee_to_nearest_orange_reward,
-        weight=0.5,
-        params={
-            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-            "oranges_cfg": [
-                SceneEntityCfg("Orange001"),
-                SceneEntityCfg("Orange002"),
-                SceneEntityCfg("Orange003"),
-            ],
-        },
-    )
-
-    # Per-orange grasp rewards
-    grasped_001 = RewTerm(
-        func=mdp.orange_grasped_reward,
-        weight=1.0,
-        params={
-            "robot_cfg": SceneEntityCfg("robot"),
-            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-            "object_cfg": SceneEntityCfg("Orange001"),
-        },
-    )
-    grasped_002 = RewTerm(
-        func=mdp.orange_grasped_reward,
-        weight=1.0,
-        params={
-            "robot_cfg": SceneEntityCfg("robot"),
-            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-            "object_cfg": SceneEntityCfg("Orange002"),
-        },
-    )
-    grasped_003 = RewTerm(
-        func=mdp.orange_grasped_reward,
-        weight=1.0,
-        params={
-            "robot_cfg": SceneEntityCfg("robot"),
-            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-            "object_cfg": SceneEntityCfg("Orange003"),
-        },
-    )
-
-    # Per-orange placement rewards
-    placed_001 = RewTerm(
-        func=mdp.orange_placed_reward,
-        weight=2.0,
-        params={
-            "robot_cfg": SceneEntityCfg("robot"),
-            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-            "object_cfg": SceneEntityCfg("Orange001"),
-            "plate_cfg": SceneEntityCfg("Plate"),
-        },
-    )
-    placed_002 = RewTerm(
-        func=mdp.orange_placed_reward,
-        weight=2.0,
-        params={
-            "robot_cfg": SceneEntityCfg("robot"),
-            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-            "object_cfg": SceneEntityCfg("Orange002"),
-            "plate_cfg": SceneEntityCfg("Plate"),
-        },
-    )
-    placed_003 = RewTerm(
-        func=mdp.orange_placed_reward,
-        weight=2.0,
-        params={
-            "robot_cfg": SceneEntityCfg("robot"),
-            "ee_frame_cfg": SceneEntityCfg("ee_frame"),
-            "object_cfg": SceneEntityCfg("Orange003"),
-            "plate_cfg": SceneEntityCfg("Plate"),
-        },
-    )
-
-    # Sparse task-completion bonus
-    task_complete = RewTerm(
-        func=mdp.task_complete_bonus,
-        weight=10.0,
-        params={
-            "oranges_cfg": [
-                SceneEntityCfg("Orange001"),
-                SceneEntityCfg("Orange002"),
-                SceneEntityCfg("Orange003"),
-            ],
-            "plate_cfg": SceneEntityCfg("Plate"),
-        },
-    )
-
-
-@configclass
-class PickOrangeRLEnvCfg(PickOrangeEnvCfg):
-    """pick_orange environment configured for RL training with rsl_rl."""
-
-    observations: PickOrangeRLObsCfg = PickOrangeRLObsCfg()
-    rewards: PickOrangeRLRewardsCfg = PickOrangeRLRewardsCfg()
-
-    def __post_init__(self) -> None:
-        super().__post_init__()
-
-        # Joint position control — no IK, gravity disabled for arm stability
-        self.use_teleop_device("so101_joint_pos")
-
-        # Shorter episode is sufficient for RL exploration
-        self.episode_length_s = 30.0
-
-        # Cameras not needed for proprioceptive RL
-        self.scene.wrist = None
-        self.scene.front = None
-
-        # Disable recorder for training runs
-        self.recorders = None

From db99a6cf417837331b669ffec645785975b3e0ef Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 5 Mar 2026 20:50:01 -0500
Subject: [PATCH 46/68] Refactor

---
 STATE_MACHINE_README.md                       |  86 ++--
 STATE_MACHINE_README_CN.md                    |  85 ++--
 record_pick_orange.sh                         |  12 -
 replay_pick_orange.sh                         |  14 -
 scripts/datagen/state_machine/generate.py     | 270 +++++++++++
 .../state_machine/replay.py                   |   0
 .../environments/state_machine/pick_orange.py | 371 --------------
 source/leisaac/leisaac/datagen/__init__.py    |   0
 .../leisaac/datagen/state_machine/__init__.py |   3 +
 .../{ => datagen}/state_machine/base.py       |  34 +-
 .../datagen/state_machine/pick_orange.py      | 320 ++++++++++++
 .../leisaac/leisaac/state_machine/__init__.py |   5 -
 .../leisaac/state_machine/pick_orange.py      | 458 ------------------
 13 files changed, 728 insertions(+), 930 deletions(-)
 delete mode 100755 record_pick_orange.sh
 delete mode 100755 replay_pick_orange.sh
 create mode 100644 scripts/datagen/state_machine/generate.py
 rename scripts/{environments => datagen}/state_machine/replay.py (100%)
 delete mode 100644 scripts/environments/state_machine/pick_orange.py
 create mode 100644 source/leisaac/leisaac/datagen/__init__.py
 create mode 100644 source/leisaac/leisaac/datagen/state_machine/__init__.py
 rename source/leisaac/leisaac/{ => datagen}/state_machine/base.py (63%)
 create mode 100644 source/leisaac/leisaac/datagen/state_machine/pick_orange.py
 delete mode 100644 source/leisaac/leisaac/state_machine/__init__.py
 delete mode 100644 source/leisaac/leisaac/state_machine/pick_orange.py

diff --git a/STATE_MACHINE_README.md b/STATE_MACHINE_README.md
index 8b3d0f46..6ebbc9c4 100644
--- a/STATE_MACHINE_README.md
+++ b/STATE_MACHINE_README.md
@@ -5,13 +5,13 @@
 The state machine module provides automated data collection for manipulation tasks without human teleoperation. It runs a scripted policy, records demonstrations to HDF5 datasets, and supports replaying those demonstrations.
 
 ```
-scripts/environments/state_machine/
-├── pick_orange.py      # Runner script: recording (PickOrange task)
-└── replay.py           # Replay script for state-machine demonstrations
+scripts/datagen/state_machine/
+├── generate.py     # Unified runner script: select task via --task argument
+└── replay.py       # Replay script for state-machine demonstrations
 
-source/leisaac/leisaac/state_machine/
-├── base.py             # StateMachineBase abstract class
-└── pick_orange.py      # PickOrangeStateMachine
+source/leisaac/leisaac/datagen/state_machine/
+├── base.py         # StateMachineBase abstract class
+└── pick_orange.py  # PickOrangeStateMachine
 ```
 
 ---
@@ -22,7 +22,7 @@ source/leisaac/leisaac/state_machine/
 
 ```bash
 # Single-arm pick-orange (3 oranges → plate)
-python scripts/environments/state_machine/pick_orange.py \
+python scripts/datagen/state_machine/generate.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --num_envs 1 \
     --device cuda \
@@ -37,19 +37,29 @@ python scripts/environments/state_machine/pick_orange.py \
 ### How It Works
 
 ```
-Runner script
-  └── gym.make(task, cfg=env_cfg)           # create env
-       └── env_cfg.use_teleop_device(       # configure IK action manager
-               "so101_state_machine")
-  └── sm = PickOrangeStateMachine(...)
+generate.py
+  └── TASK_REGISTRY[task] → (SMClass, device)   # select SM from registry
+  └── gym.make(task, cfg=env_cfg)                # create env
+       └── env_cfg.use_teleop_device(device)     # configure IK action manager
+  └── sm = SMClass()
+  └── sm.setup(env)                              # FK calibration, etc.
   └── Main loop:
-       actions = sm.get_action(env)         # state machine computes 8D IK action
-       env.step(actions)                    # steps sim + recorder captures data
-       sm.advance()                         # advance state machine
+       sm.pre_step(env)                          # e.g. home-blend joint write
+       actions = sm.get_action(env)              # state machine computes 8D IK action
+       env.step(actions)                         # steps sim + recorder captures data
+       sm.advance()                              # advance state machine (incl. fast-forward)
 ```
 
-The runner calls `env.step(actions)` directly with the state machine's output tensor.
-`preprocess_device_action()` is **not** called (that is only used in the teleoperation pipeline).
+### Adding a New Task
+
+1. Implement a `StateMachineBase` subclass in `source/leisaac/leisaac/datagen/state_machine/`.
+2. Register it in `TASK_REGISTRY` inside `generate.py`:
+   ```python
+   TASK_REGISTRY = {
+       "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
+       "LeIsaac-MY-NewTask-v0":       (MyNewStateMachine,      "so101_state_machine"),
+   }
+   ```
 
 ### Action Format
 
@@ -99,11 +109,7 @@ When replaying, use `--select_episodes K` to load `demo_K`:
 ### Quick Start
 
 ```bash
-# replay_pick_orange.sh wrapper (default: demo_3 of dataset_test.hdf5)
-bash replay_pick_orange.sh
-
-# explicit call
-python scripts/environments/state_machine/replay.py \
+python scripts/datagen/state_machine/replay.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --dataset_file ./datasets/pick_orange.hdf5 \
     --task_type so101_state_machine \
@@ -126,7 +132,21 @@ For `so101_state_machine`, **only `action` mode is valid**. The IK action manage
 
 ## Technical Details
 
-### 1. Gravity Disable (Two Steps Required)
+### 1. StateMachineBase Interface
+
+Each state machine must implement:
+
+| Method | Description |
+|---|---|
+| `setup(env)` | One-time calibration after env creation (e.g. FK rest-pose computation) |
+| `check_success(env) → bool` | Episode-end success evaluation |
+| `pre_step(env)` | Optional per-step hook before `get_action` (default: no-op) |
+| `get_action(env) → Tensor` | Compute IK action for current step |
+| `advance()` | Increment step counter; handles state transitions and fast-forward |
+| `reset()` | Reset to initial state for a new episode |
+| `is_episode_done` | Property: True when all phases complete |
+
+### 2. Gravity Disable (Two Steps Required)
 
 IsaacLab's `disable_gravity` flag in `ArticulationCfg.spawn.rigid_props` only writes to the articulation root prim. Individual link prims each carry their own `PhysicsRigidBodyAPI` with gravity still enabled.
 
@@ -145,19 +165,17 @@ for _prim in _stage.Traverse():
 
 Both steps must be present. The same pattern applies to bi-arm tasks (`"Robot"` matches both `Left_Robot` and `Right_Robot`).
 
-### 2. Joint Damping
+### 3. Joint Damping
 
-The IK controller requires higher damping than the robot's default (0.6 N·m·s/rad) for stable, smooth trajectories. Damping is set to **10.0 N·m·s/rad** every step via:
+The IK controller requires higher damping than the robot's default (0.6 N·m·s/rad) for stable, smooth trajectories. Damping is set to **10.0 N·m·s/rad** inside `get_action()`:
 
 ```python
-# In PickOrangeStateMachine.get_action():
-robot = env.scene["robot"]
 robot.write_joint_damping_to_sim(damping=10.0)
 ```
 
-This must also be applied during **replay** to match recording conditions. The state-machine replay script (`scripts/environments/state_machine/replay.py`) calls `apply_damping(env, task_type)` before every `env.step()`.
+This must also be applied during **replay** to match recording conditions. The replay script calls `apply_damping(env, task_type)` before every `env.step()`.
 
-### 3. Episode Numbering
+### 4. Episode Numbering
 
 The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on the very first `env.reset()` call (before any steps). This becomes `demo_0`.
 
@@ -173,9 +191,7 @@ The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on t
 
 | File | Purpose |
 |---|---|
-| `scripts/environments/state_machine/pick_orange.py` | Recording runner for PickOrange |
-| `scripts/environments/state_machine/replay.py` | State-machine replay (with damping) |
-| `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` abstract class |
-| `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
-| `replay_pick_orange.sh` | Shell wrapper for replay.py |
-| `record_pick_orange.sh` | Shell wrapper for pick_orange recording |
+| `scripts/datagen/state_machine/generate.py` | Unified recording runner (task selected via `--task`) |
+| `scripts/datagen/state_machine/replay.py` | State-machine replay (with damping) |
+| `source/leisaac/leisaac/datagen/state_machine/base.py` | `StateMachineBase` abstract class |
+| `source/leisaac/leisaac/datagen/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
diff --git a/STATE_MACHINE_README_CN.md b/STATE_MACHINE_README_CN.md
index b3ff0505..66f62501 100644
--- a/STATE_MACHINE_README_CN.md
+++ b/STATE_MACHINE_README_CN.md
@@ -5,13 +5,13 @@
 状态机模块提供无需人工遥操作的自动化数据采集能力。它运行脚本化的策略，将演示数据录制为 HDF5 数据集，并支持回放已录制的演示。
 
 ```
-scripts/environments/state_machine/
-├── pick_orange.py      # 录制 Runner 脚本（拾橙任务）
-└── replay.py           # 状态机演示回放脚本
+scripts/datagen/state_machine/
+├── generate.py     # 统一 Runner 脚本：通过 --task 参数选择任务
+└── replay.py       # 状态机演示回放脚本
 
-source/leisaac/leisaac/state_machine/
-├── base.py             # StateMachineBase 抽象基类
-└── pick_orange.py      # PickOrangeStateMachine
+source/leisaac/leisaac/datagen/state_machine/
+├── base.py         # StateMachineBase 抽象基类
+└── pick_orange.py  # PickOrangeStateMachine
 ```
 
 ---
@@ -22,7 +22,7 @@ source/leisaac/leisaac/state_machine/
 
 ```bash
 # 单臂拾橙（3 个橘子 → 盘子）
-python scripts/environments/state_machine/pick_orange.py \
+python scripts/datagen/state_machine/generate.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --num_envs 1 \
     --device cuda \
@@ -37,18 +37,29 @@ python scripts/environments/state_machine/pick_orange.py \
 ### 工作原理
 
 ```
-Runner 脚本
-  └── gym.make(task, cfg=env_cfg)           # 创建环境
-       └── env_cfg.use_teleop_device(       # 配置 IK action manager
-               "so101_state_machine")
-  └── sm = PickOrangeStateMachine(...)
+generate.py
+  └── TASK_REGISTRY[task] → (SMClass, device)   # 从注册表选择状态机
+  └── gym.make(task, cfg=env_cfg)                # 创建环境
+       └── env_cfg.use_teleop_device(device)     # 配置 IK action manager
+  └── sm = SMClass()
+  └── sm.setup(env)                              # FK 校准等一次性初始化
   └── 主循环:
-       actions = sm.get_action(env)         # 状态机计算 8D IK 动作
-       env.step(actions)                    # 推进仿真 + 录制器采集数据
-       sm.advance()                         # 推进状态机
+       sm.pre_step(env)                          # 例如 home-blend 关节写入
+       actions = sm.get_action(env)              # 状态机计算 8D IK 动作
+       env.step(actions)                         # 推进仿真 + 录制器采集数据
+       sm.advance()                              # 推进状态机（含快进逻辑）
 ```
 
-Runner 脚本直接将状态机输出的动作张量传给 `env.step(actions)`，**不经过** `preprocess_device_action()`（后者仅在遥操作流程中使用）。
+### 添加新任务
+
+1. 在 `source/leisaac/leisaac/datagen/state_machine/` 中实现一个 `StateMachineBase` 子类。
+2. 在 `generate.py` 的 `TASK_REGISTRY` 中注册：
+   ```python
+   TASK_REGISTRY = {
+       "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
+       "LeIsaac-MY-NewTask-v0":       (MyNewStateMachine,      "so101_state_machine"),
+   }
+   ```
 
 ### 动作格式
 
@@ -100,11 +111,7 @@ data/
 ### 快速开始
 
 ```bash
-# replay_pick_orange.sh 封装脚本（默认：dataset_test.hdf5 的 demo_3）
-bash replay_pick_orange.sh
-
-# 显式调用
-python scripts/environments/state_machine/replay.py \
+python scripts/datagen/state_machine/replay.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --dataset_file ./datasets/pick_orange.hdf5 \
     --task_type so101_state_machine \
@@ -127,7 +134,21 @@ python scripts/environments/state_machine/replay.py \
 
 ## 技术细节
 
-### 1. 重力禁用（两步缺一不可）
+### 1. StateMachineBase 接口
+
+每个状态机必须实现：
+
+| 方法 | 说明 |
+|---|---|
+| `setup(env)` | 环境创建后的一次性初始化（如 FK rest-pose 校准） |
+| `check_success(env) → bool` | episode 结束时的成功判断 |
+| `pre_step(env)` | 可选的每步前置钩子（默认空操作） |
+| `get_action(env) → Tensor` | 计算当前步的 IK 动作 |
+| `advance()` | 推进步计数器，处理状态转移和快进 |
+| `reset()` | 重置到初始状态，供新 episode 使用 |
+| `is_episode_done` | 属性：所有阶段完成时为 True |
+
+### 2. 重力禁用（两步缺一不可）
 
 IsaacLab 的 `disable_gravity` 标志只写入关节根 prim，各子 link prim 各自带有 `PhysicsRigidBodyAPI`，重力默认仍然开启。
 
@@ -146,19 +167,17 @@ for _prim in _stage.Traverse():
 
 两步必须同时存在。双臂任务同理（`"Robot"` 可同时匹配 `Left_Robot` 和 `Right_Robot`）。
 
-### 2. 关节阻尼
+### 3. 关节阻尼
 
-IK 控制器需要比机械臂默认阻尼（0.6 N·m·s/rad）更高的阻尼值，才能获得稳定、平滑的轨迹。阻尼设置为 **10.0 N·m·s/rad**，在每步调用：
+IK 控制器需要比机械臂默认阻尼（0.6 N·m·s/rad）更高的阻尼值，才能获得稳定、平滑的轨迹。在 `get_action()` 中每步调用：
 
 ```python
-# 在 PickOrangeStateMachine.get_action() 中：
-robot = env.scene["robot"]
 robot.write_joint_damping_to_sim(damping=10.0)
 ```
 
-**回放时也必须应用相同阻尼**，以匹配录制条件。状态机回放脚本（`scripts/environments/state_machine/replay.py`）在每次 `env.step()` 前调用 `apply_damping(env, task_type)`。
+**回放时也必须应用相同阻尼**，以匹配录制条件。回放脚本在每次 `env.step()` 前调用 `apply_damping(env, task_type)`。
 
-### 3. Episode 编号规则
+### 4. Episode 编号规则
 
 IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任何步骤）会保存一个仅含初始状态的 episode（`num_samples=0`），即 `demo_0`。
 
@@ -174,9 +193,7 @@ IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任
 
 | 文件 | 用途 |
 |---|---|
-| `scripts/environments/state_machine/pick_orange.py` | 拾橙任务录制 Runner |
-| `scripts/environments/state_machine/replay.py` | 状态机专用回放脚本（含阻尼设置） |
-| `source/leisaac/leisaac/state_machine/base.py` | `StateMachineBase` 抽象基类 |
-| `source/leisaac/leisaac/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
-| `replay_pick_orange.sh` | replay.py 的 Shell 封装脚本 |
-| `record_pick_orange.sh` | 拾橙录制的 Shell 封装脚本 |
+| `scripts/datagen/state_machine/generate.py` | 统一录制 Runner（通过 `--task` 选择任务） |
+| `scripts/datagen/state_machine/replay.py` | 状态机专用回放脚本（含阻尼设置） |
+| `source/leisaac/leisaac/datagen/state_machine/base.py` | `StateMachineBase` 抽象基类 |
+| `source/leisaac/leisaac/datagen/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
diff --git a/record_pick_orange.sh b/record_pick_orange.sh
deleted file mode 100755
index 5b71eb0c..00000000
--- a/record_pick_orange.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-TASK_NAME=${1:-LeIsaac-SO101-PickOrange-v0}
-DATASET=${2:-./datasets/dataset_test.hdf5}
-
-python scripts/environments/state_machine/pick_orange.py \
-    --dataset_file=${DATASET} \
-    --task=${TASK_NAME} \
-    --num_envs=1 \
-    --device=cuda \
-    --enable_cameras \
-    --record \
-    --num_demos=1
\ No newline at end of file
diff --git a/replay_pick_orange.sh b/replay_pick_orange.sh
deleted file mode 100755
index a3b536eb..00000000
--- a/replay_pick_orange.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-TASK_NAME=${1:-LeIsaac-SO101-PickOrange-v0}
-DATASET=${2:-./datasets/dataset_test.hdf5}
-EPISODE_ID=${3:-1}
-
-python scripts/environments/state_machine/replay.py \
-    --dataset_file=${DATASET} \
-    --task=${TASK_NAME} \
-    --num_envs=1 \
-    --device=cuda \
-    --enable_cameras \
-    --select_episodes ${EPISODE_ID} \
-    --replay_mode=action \
-    --task_type=so101_state_machine
\ No newline at end of file
diff --git a/scripts/datagen/state_machine/generate.py b/scripts/datagen/state_machine/generate.py
new file mode 100644
index 00000000..a7e4cf60
--- /dev/null
+++ b/scripts/datagen/state_machine/generate.py
@@ -0,0 +1,270 @@
+"""Unified data generation script using state machines.
+
+Selects the appropriate state machine based on --task and runs the recording loop.
+
+Usage:
+    python scripts/datagen/state_machine/generate.py \\
+        --task LeIsaac-SO101-PickOrange-v0 \\
+        --num_envs 1 --device cuda --enable_cameras \\
+        --record --dataset_file ./datasets/pick_orange.hdf5 --num_demos 50
+"""
+
+import argparse
+import multiprocessing
+import os
+import time
+
+if multiprocessing.get_start_method() != "spawn":
+    multiprocessing.set_start_method("spawn", force=True)
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="State machine data generation for LeIsaac tasks.")
+parser.add_argument("--num_envs", type=int, default=1)
+parser.add_argument("--task", type=str, required=True)
+parser.add_argument("--seed", type=int, default=None)
+parser.add_argument("--record", action="store_true")
+parser.add_argument("--step_hz", type=int, default=60)
+parser.add_argument("--dataset_file", type=str, default="./datasets/dataset.hdf5")
+parser.add_argument("--resume", action="store_true")
+parser.add_argument("--num_demos", type=int, default=1)
+parser.add_argument("--quality", action="store_true")
+parser.add_argument("--use_lerobot_recorder", action="store_true")
+parser.add_argument("--lerobot_dataset_repo_id", type=str, default=None)
+parser.add_argument("--lerobot_dataset_fps", type=int, default=30)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(vars(args_cli))
+simulation_app = app_launcher.app
+
+import gymnasium as gym
+import torch
+from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
+from isaaclab.managers import DatasetExportMode, TerminationTermCfg
+from isaaclab_tasks.utils import parse_env_cfg
+
+from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
+from leisaac.datagen.state_machine import PickOrangeStateMachine
+from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
+
+import leisaac.tasks  # noqa: F401
+
+# ---------------------------------------------------------------------------
+# Task registry: maps gym task id → (StateMachineClass, device_type)
+# ---------------------------------------------------------------------------
+TASK_REGISTRY = {
+    "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
+}
+
+
+class RateLimiter:
+    def __init__(self, hz):
+        self.hz = hz
+        self.last_time = time.time()
+        self.sleep_duration = 1.0 / hz
+        self.render_period = min(0.0166, self.sleep_duration)
+
+    def sleep(self, env):
+        next_wakeup_time = self.last_time + self.sleep_duration
+        while time.time() < next_wakeup_time:
+            time.sleep(self.render_period)
+            env.sim.render()
+        self.last_time = self.last_time + self.sleep_duration
+        if self.last_time < time.time():
+            while self.last_time < time.time():
+                self.last_time += self.sleep_duration
+
+
+def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
+    if hasattr(env, "termination_manager"):
+        if success:
+            env.termination_manager.set_term_cfg(
+                "success",
+                TerminationTermCfg(func=lambda env: torch.ones(env.num_envs, dtype=torch.bool, device=env.device)),
+            )
+        else:
+            env.termination_manager.set_term_cfg(
+                "success",
+                TerminationTermCfg(func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)),
+            )
+        env.termination_manager.compute()
+    elif hasattr(env, "_get_dones"):
+        env.cfg.return_success_status = success
+    return False
+
+
+def main():
+    task_name = args_cli.task
+    if task_name not in TASK_REGISTRY:
+        raise ValueError(
+            f"Task '{task_name}' is not registered in TASK_REGISTRY.\n"
+            f"Available tasks: {list(TASK_REGISTRY.keys())}"
+        )
+    SMClass, device = TASK_REGISTRY[task_name]
+
+    output_dir = os.path.dirname(args_cli.dataset_file)
+    output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    env_cfg = parse_env_cfg(task_name, device=args_cli.device, num_envs=args_cli.num_envs)
+    env_cfg.use_teleop_device(device)
+    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
+
+    is_direct_env = "Direct" in task_name
+    if is_direct_env:
+        env_cfg.never_time_out = True
+        env_cfg.auto_terminate = True
+    else:
+        if hasattr(env_cfg.terminations, "time_out"):
+            env_cfg.terminations.time_out = None
+        if hasattr(env_cfg.terminations, "success"):
+            env_cfg.terminations.success = None
+
+    if args_cli.record:
+        if args_cli.use_lerobot_recorder:
+            if args_cli.resume:
+                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_SUCCEEDED_ONLY_RESUME
+            else:
+                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_SUCCEEDED_ONLY
+        else:
+            if args_cli.resume:
+                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
+                assert os.path.exists(args_cli.dataset_file), \
+                    "Dataset file does not exist. Do not use '--resume' when recording a new dataset."
+            else:
+                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+                assert not os.path.exists(args_cli.dataset_file), \
+                    "Dataset file already exists. Use '--resume' to resume recording."
+        env_cfg.recorders.dataset_export_dir_path = output_dir
+        env_cfg.recorders.dataset_filename = output_file_name
+        if is_direct_env:
+            env_cfg.return_success_status = False
+        else:
+            if not hasattr(env_cfg.terminations, "success"):
+                setattr(env_cfg.terminations, "success", None)
+            env_cfg.terminations.success = TerminationTermCfg(
+                func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)
+            )
+    else:
+        env_cfg.recorders = None
+
+    env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
+
+    import omni.usd
+    from pxr import PhysxSchema, UsdPhysics
+
+    _stage = omni.usd.get_context().get_stage()
+    for _prim in _stage.Traverse():
+        if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
+            PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
+
+    if args_cli.record:
+        del env.recorder_manager
+        if args_cli.use_lerobot_recorder:
+            from leisaac.enhance.datasets.lerobot_dataset_handler import LeRobotDatasetCfg
+            from leisaac.enhance.managers.lerobot_recorder_manager import LeRobotRecorderManager
+
+            dataset_cfg = LeRobotDatasetCfg(
+                repo_id=args_cli.lerobot_dataset_repo_id,
+                fps=args_cli.lerobot_dataset_fps,
+            )
+            env.recorder_manager = LeRobotRecorderManager(env_cfg.recorders, dataset_cfg, env)
+        else:
+            env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
+            env.recorder_manager.flush_steps = 100
+            env.recorder_manager.compression = "lzf"
+
+    rate_limiter = RateLimiter(args_cli.step_hz)
+
+    if hasattr(env, "initialize"):
+        env.initialize()
+    env.reset()
+
+    # One-time state machine setup (e.g. FK calibration)
+    sm = SMClass()
+    sm.setup(env)
+    sm.reset()
+
+    resume_recorded_demo_count = 0
+    if args_cli.record and args_cli.resume:
+        resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
+        print(f"Resuming recording with {resume_recorded_demo_count} existing demonstrations.")
+    current_recorded_demo_count = resume_recorded_demo_count
+
+    start_record_state = False
+
+    while simulation_app.is_running() and not simulation_app.is_exiting():
+        if env.cfg.dynamic_reset_gripper_effort_limit:
+            dynamic_reset_gripper_effort_limit_sim(env, device)
+
+        if sm.is_episode_done:
+            try:
+                print("Episode complete. Checking task success...")
+                success = sm.check_success(env)
+                print("Task success:", success)
+            except Exception as e:
+                print("Success check failed:", e)
+                success = False
+
+            if start_record_state:
+                if args_cli.record:
+                    print("Stop recording.")
+                start_record_state = False
+
+            if args_cli.record and success:
+                print("✅ Task succeeded. Marking as SUCCESS.")
+                auto_terminate(env, True)
+                current_recorded_demo_count += 1
+            else:
+                print("❌ Task failed. Marking as FAILURE.")
+                auto_terminate(env, False)
+
+            if (
+                args_cli.record
+                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                > current_recorded_demo_count
+            ):
+                current_recorded_demo_count = (
+                    env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                )
+                print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
+
+            if (
+                args_cli.record
+                and args_cli.num_demos > 0
+                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                >= args_cli.num_demos
+            ):
+                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting.")
+                break
+
+            env.reset()
+            sm.reset()
+
+            if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
+                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting.")
+                break
+        else:
+            if not start_record_state:
+                if args_cli.record:
+                    print("Start recording.")
+                start_record_state = True
+
+            sm.pre_step(env)
+            actions = sm.get_action(env)
+            env.step(actions)
+            sm.advance()
+
+        rate_limiter.sleep(env)
+
+    if args_cli.record and hasattr(env.recorder_manager, "finalize"):
+        env.recorder_manager.finalize()
+
+    env.close()
+    simulation_app.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/environments/state_machine/replay.py b/scripts/datagen/state_machine/replay.py
similarity index 100%
rename from scripts/environments/state_machine/replay.py
rename to scripts/datagen/state_machine/replay.py
diff --git a/scripts/environments/state_machine/pick_orange.py b/scripts/environments/state_machine/pick_orange.py
deleted file mode 100644
index 2c442919..00000000
--- a/scripts/environments/state_machine/pick_orange.py
+++ /dev/null
@@ -1,371 +0,0 @@
-"""Script to generate data using state machine with leisaac manipulation environments.
-
-Launch Isaac Sim Simulator first.
-"""
-
-import argparse
-import multiprocessing
-import os
-import time
-
-if multiprocessing.get_start_method() != 'spawn':
-    multiprocessing.set_start_method('spawn', force=True)
-
-from isaaclab.app import AppLauncher
-
-# add argparse arguments
-parser = argparse.ArgumentParser(description='leisaac data generation script for pick_orange task.')
-parser.add_argument('--num_envs', type=int, default=1)
-parser.add_argument('--task', type=str, default=None)
-parser.add_argument('--seed', type=int, default=None)
-parser.add_argument('--record', action='store_true')
-parser.add_argument('--step_hz', type=int, default=60)
-parser.add_argument('--dataset_file', type=str, default='./datasets/dataset.hdf5')
-parser.add_argument('--resume', action='store_true')
-parser.add_argument('--num_demos', type=int, default=1)
-parser.add_argument('--quality', action='store_true')
-parser.add_argument('--use_lerobot_recorder', action='store_true', help='whether to use lerobot recorder.')
-parser.add_argument('--lerobot_dataset_repo_id', type=str, default=None, help='Lerobot Dataset repository ID.')
-parser.add_argument('--lerobot_dataset_fps', type=int, default=30, help='Lerobot Dataset frames per second.')
-
-AppLauncher.add_app_launcher_args(parser)
-args_cli = parser.parse_args()
-app_launcher_args = vars(args_cli)
-
-app_launcher = AppLauncher(app_launcher_args)
-simulation_app = app_launcher.app
-
-import gymnasium as gym
-import torch
-
-from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
-from isaaclab.managers import DatasetExportMode, SceneEntityCfg, TerminationTermCfg
-from isaaclab_tasks.utils import parse_env_cfg
-
-from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
-from leisaac.state_machine import PickOrangeStateMachine
-from leisaac.tasks.pick_orange.mdp import task_done
-from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
-
-class RateLimiter:
-    """Convenience class for enforcing rates in loops."""
-
-    def __init__(self, hz):
-        """Initialize a RateLimiter.
-
-        Args:
-            hz (int): frequency to enforce.
-        """
-        self.hz = hz
-        self.last_time = time.time()
-        self.sleep_duration = 1.0 / hz
-        self.render_period = min(0.0166, self.sleep_duration)
-
-    def sleep(self, env):
-        """Attempt to sleep at the specified rate in hz."""
-        next_wakeup_time = self.last_time + self.sleep_duration
-        while time.time() < next_wakeup_time:
-            time.sleep(self.render_period)
-            env.sim.render()
-
-        self.last_time = self.last_time + self.sleep_duration
-
-        # detect time jumping forwards (e.g. loop is too slow)
-        if self.last_time < time.time():
-            while self.last_time < time.time():
-                self.last_time += self.sleep_duration
-
-
-def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
-    """Programmatically mark the current episode as success or failure.
-
-    This is the same implementation used in the teleoperation script.
-    It does not require any human input.
-    """
-    if hasattr(env, "termination_manager"):
-        if success:
-            env.termination_manager.set_term_cfg(
-                "success",
-                TerminationTermCfg(func=lambda env: torch.ones(env.num_envs, dtype=torch.bool, device=env.device)),
-            )
-        else:
-            env.termination_manager.set_term_cfg(
-                "success",
-                TerminationTermCfg(func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)),
-            )
-        env.termination_manager.compute()
-    elif hasattr(env, "_get_dones"):
-        # fallback for some Direct envs
-        env.cfg.return_success_status = success
-    return False
-
-
-def main() -> None:
-    """Run a pick-orange state machine in an Isaac Lab manipulation environment.
-
-    Creates the environment, initializes the pick-and-place state machine for
-    picking an orange, and runs the main simulation loop until the application
-    is closed.
-    """
-    output_dir = os.path.dirname(args_cli.dataset_file)
-    output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs)
-
-    # Temporarily use so101_state_machine as the teleoperation device
-    device = "so101_state_machine"
-    env_cfg.use_teleop_device(device)
-    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
-    task_name = args_cli.task
-
-    # Timeout and termination preprocessing
-    is_direct_env = "Direct" in task_name
-    if is_direct_env:
-        env_cfg.never_time_out = True
-        env_cfg.auto_terminate = True
-    else:
-        # Modify termination configuration
-        if hasattr(env_cfg.terminations, "time_out"):
-            env_cfg.terminations.time_out = None
-        if hasattr(env_cfg.terminations, "success"):
-            env_cfg.terminations.success = None
-
-    # Recorder preprocessing & manual success-termination preprocessing
-    if args_cli.record:
-        if args_cli.use_lerobot_recorder:
-            if args_cli.resume:
-                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_SUCCEEDED_ONLY_RESUME
-            else:
-                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_SUCCEEDED_ONLY
-        else:
-            if args_cli.resume:
-                env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
-                assert os.path.exists(
-                    args_cli.dataset_file
-                ), "The dataset file does not exist. Do not use '--resume' when recording a new dataset."
-            else:
-                env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
-                assert not os.path.exists(
-                    args_cli.dataset_file
-                ), "The dataset file already exists. Use '--resume' to resume recording."
-        env_cfg.recorders.dataset_export_dir_path = output_dir
-        env_cfg.recorders.dataset_filename = output_file_name
-        if is_direct_env:
-            env_cfg.return_success_status = False
-        else:
-            if not hasattr(env_cfg.terminations, "success"):
-                setattr(env_cfg.terminations, "success", None)
-            env_cfg.terminations.success = TerminationTermCfg(
-                func=lambda env: torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)
-            )
-    else:
-        env_cfg.recorders = None
-
-    # Create environment
-    env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
-
-    # Disable gravity for every robot link prim.
-    # IsaacLab's disable_gravity flag in ArticulationCfg.spawn.rigid_props only writes
-    # to the articulation ROOT prim; individual link prims (shoulder_pan_link, elbow_flex_link,
-    # etc.) each carry their own PhysicsRigidBodyAPI with gravity still enabled.
-    # This block traverses the USD stage and explicitly sets disableGravity=True on every
-    # rigid-body prim that belongs to the robot, fixing the gravity-induced arm drop at reset.
-    import omni.usd
-    from pxr import PhysxSchema, UsdPhysics
-
-    _stage = omni.usd.get_context().get_stage()
-    for _prim in _stage.Traverse():
-        if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
-            PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
-
-    if args_cli.record:
-        del env.recorder_manager
-        if args_cli.use_lerobot_recorder:
-            from leisaac.enhance.datasets.lerobot_dataset_handler import (
-                LeRobotDatasetCfg,
-            )
-            from leisaac.enhance.managers.lerobot_recorder_manager import (
-                LeRobotRecorderManager,
-            )
-
-            dataset_cfg = LeRobotDatasetCfg(
-                repo_id=args_cli.lerobot_dataset_repo_id,
-                fps=args_cli.lerobot_dataset_fps,
-            )
-            env.recorder_manager = LeRobotRecorderManager(env_cfg.recorders, dataset_cfg, env)
-        else:
-            env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
-            env.recorder_manager.flush_steps = 100
-            env.recorder_manager.compression = "lzf"
-
-    # Rate limiter
-    rate_limiter = RateLimiter(args_cli.step_hz)
-
-    # Initialize / reset
-    if hasattr(env, "initialize"):
-        env.initialize()
-    env.reset()
-
-    resume_recorded_demo_count = 0
-    if args_cli.record and args_cli.resume:
-        resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
-        print(f"Resuming recording from existing dataset with {resume_recorded_demo_count} demonstrations.")
-    current_recorded_demo_count = resume_recorded_demo_count
-
-    # --- FK calibration: drive arm to rest pose and record the EE world position ---
-    # task_done() requires SO-101 joints within SO101_FOLLOWER_REST_POSE_RANGE
-    # (shoulder_lift≈-100°, elbow_flex≈90°, wrist_flex≈50°, ±30° each).
-    # The IK action controls EE position, not joints directly.  We find the EE
-    # position that corresponds to the rest pose by temporarily driving the arm
-    # there with high stiffness, then reading body_pos_w.  env.reset() afterwards
-    # returns every scene object to its default state.
-    _robot = env.scene["robot"]
-    _joint_names = list(_robot.data.joint_names)
-    _REST_POSE_DEG = {
-        "shoulder_pan": 0.0,
-        "shoulder_lift": -100.0,
-        "elbow_flex": 90.0,
-        "wrist_flex": 50.0,
-        "wrist_roll": 0.0,
-        "gripper": -10.0,
-    }
-    _rest_joint_pos = torch.zeros(env.num_envs, len(_joint_names), device=env.device)
-    for _idx, _name in enumerate(_joint_names):
-        if _name in _REST_POSE_DEG:
-            _rest_joint_pos[:, _idx] = _REST_POSE_DEG[_name] * torch.pi / 180.0
-
-    # write_joint_state_to_sim teleports joints to the exact rest pose instantly.
-    # This is simpler than target-based control: no stiffness tuning or multiple steps needed.
-    _robot.write_joint_state_to_sim(
-        position=_rest_joint_pos,
-        velocity=torch.zeros_like(_rest_joint_pos),
-    )
-    env.sim.step(render=False)
-    env.scene.update(dt=env.physics_dt)
-    _rest_ee_pos_world = _robot.data.body_pos_w[:, -1, :].clone()
-    print(f"[Calibration] Rest pose EE (world): {_rest_ee_pos_world[0].cpu().numpy()}")
-
-    # Reset scene to initial state (joints back to zero, objects to default positions)
-    env.reset()
-    # -------------------------------------------------------------------------
-
-    sm = PickOrangeStateMachine(num_oranges=3, rest_ee_pos_world=_rest_ee_pos_world)
-    sm.reset()
-    start_record_state = False
-    _home_start_pos: torch.Tensor | None = None  # joint positions captured at start of return home
-
-    while simulation_app.is_running() and not simulation_app.is_exiting():
-        if env.cfg.dynamic_reset_gripper_effort_limit:
-            dynamic_reset_gripper_effort_limit_sim(env, device)
-
-        if sm.is_episode_done:
-            # --- Check whether the current episode is considered successful ---
-            try:
-                _robot.write_joint_state_to_sim(
-                    position=_rest_joint_pos,
-                    velocity=torch.zeros_like(_rest_joint_pos),
-                )
-                env.scene.update(dt=env.physics_dt)
-
-                print("Completed one cycle. Checking task success status...")
-                success_tensor = task_done(
-                    env,
-                    oranges_cfg=[
-                        SceneEntityCfg("Orange001"),
-                        SceneEntityCfg("Orange002"),
-                        SceneEntityCfg("Orange003"),
-                    ],
-                    plate_cfg=SceneEntityCfg("Plate"),
-                )
-                # For multiple environments, consider success only if all envs succeed
-                success = bool(success_tensor.all().item())
-                print("Task success status:", success)
-            except Exception as e:
-                print("Task failed due to:", e)
-                success = False
-
-            if start_record_state:
-                if args_cli.record:
-                    print("Stop recording.")
-                start_record_state = False
-
-            # Only mark the episode as successful if the task succeeds
-            if args_cli.record and success:
-                print("✅ Task succeeded. Marking this demonstration as SUCCESS.")
-                auto_terminate(env, True)
-                print("SUCCESS.")
-                current_recorded_demo_count += 1
-            else:
-                print("❌ Task failed. Marking this demonstration as FAILURE.")
-                auto_terminate(env, False)
-
-            if (
-                args_cli.record
-                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                > current_recorded_demo_count
-            ):
-                current_recorded_demo_count = (
-                    env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                )
-                print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
-
-            if (
-                args_cli.record
-                and args_cli.num_demos > 0
-                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                >= args_cli.num_demos
-            ):
-                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
-                break
-
-            # Reset environment and state machine for the next episode
-            env.reset()
-            sm.reset()
-
-            if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
-                print(f"All {args_cli.num_demos} demonstrations have been recorded. Exiting.")
-                break
-        else:
-            if not start_record_state:
-                if args_cli.record:
-                    print("Start recording.")
-                start_record_state = True
-            actions = sm.get_action(env)
-            if sm.orange_now == 3 and sm.step_count >= 680:
-                if sm.step_count == 680:
-                    _home_start_pos = _robot.data.joint_pos.clone()
-                _alpha = (sm.step_count - 680) / 299.0  # 0.0 at step 680, 1.0 at step 979
-                _blended_pos = _home_start_pos + (_rest_joint_pos - _home_start_pos) * _alpha
-                _robot.write_joint_state_to_sim(
-                    position=_blended_pos,
-                    velocity=torch.zeros_like(_blended_pos),
-                )
-            _orange_before_step = sm.orange_now
-            env.step(actions)
-            sm.advance()
-            # For oranges 1 and 2: skip the return home phase (steps 680-979) without
-            # simulation — there is no point returning home when the next orange follows
-            # immediately. Fast-forward advance() until the orange index increments.
-            if (
-                not sm.is_episode_done
-                and sm.orange_now == _orange_before_step
-                and sm.orange_now < 3
-                and sm.step_count >= 680
-            ):
-                while sm.orange_now == _orange_before_step and not sm.is_episode_done:
-                    sm.advance()
-
-        if rate_limiter:
-            rate_limiter.sleep(env)
-
-    if args_cli.record and hasattr(env.recorder_manager, "finalize"):
-        env.recorder_manager.finalize()
-
-    env.close()
-    simulation_app.close()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/source/leisaac/leisaac/datagen/__init__.py b/source/leisaac/leisaac/datagen/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/source/leisaac/leisaac/datagen/state_machine/__init__.py b/source/leisaac/leisaac/datagen/state_machine/__init__.py
new file mode 100644
index 00000000..98a314a2
--- /dev/null
+++ b/source/leisaac/leisaac/datagen/state_machine/__init__.py
@@ -0,0 +1,3 @@
+from .pick_orange import PickOrangeStateMachine
+
+__all__ = ["PickOrangeStateMachine"]
diff --git a/source/leisaac/leisaac/state_machine/base.py b/source/leisaac/leisaac/datagen/state_machine/base.py
similarity index 63%
rename from source/leisaac/leisaac/state_machine/base.py
rename to source/leisaac/leisaac/datagen/state_machine/base.py
index 3fc2ba86..8139635b 100644
--- a/source/leisaac/leisaac/state_machine/base.py
+++ b/source/leisaac/leisaac/datagen/state_machine/base.py
@@ -17,13 +17,44 @@ class StateMachineBase(ABC):
 
         sm = MyStateMachine()
         env.reset()
+        sm.setup(env)
         while not sm.is_episode_done:
+            sm.pre_step(env)
             actions = sm.get_action(env)
             env.step(actions)
             sm.advance()
+        success = sm.check_success(env)
         sm.reset()
     """
 
+    @abstractmethod
+    def setup(self, env) -> None:
+        """One-time setup after the environment is created.
+
+        Use this for any calibration that requires a live env (e.g. FK
+        calibration to compute rest-pose EE positions). Called once before
+        the main recording loop starts.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def check_success(self, env) -> bool:
+        """Evaluate whether the current episode was successful.
+
+        Called at episode end (when :attr:`is_episode_done` is ``True``)
+        before resetting. The implementation may temporarily teleport joints
+        to a canonical pose for evaluation.
+        """
+        raise NotImplementedError
+
+    def pre_step(self, env) -> None:
+        """Optional per-step hook called before :meth:`get_action` and ``env.step``.
+
+        Override to inject direct joint-state writes or other per-step
+        overrides that must happen before the action is applied (e.g. blended
+        home-pose control).  Default implementation is a no-op.
+        """
+
     @abstractmethod
     def get_action(self, env) -> torch.Tensor:
         """Compute and return the action tensor for the current step.
@@ -45,7 +76,8 @@ def advance(self) -> None:
         """Advance the internal step counter and manage state transitions.
 
         Should be called exactly once after each :meth:`env.step` call.
-        Internally handles multi-phase and multi-object transitions.
+        Internally handles multi-phase and multi-object transitions,
+        including any fast-forward optimisations.
         """
         raise NotImplementedError
 
diff --git a/source/leisaac/leisaac/datagen/state_machine/pick_orange.py b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
new file mode 100644
index 00000000..fc27f848
--- /dev/null
+++ b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
@@ -0,0 +1,320 @@
+"""State machine for the pick-orange task."""
+
+import math
+
+import torch
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+
+from .base import StateMachineBase
+from leisaac.tasks.pick_orange.mdp import task_done
+
+# ---------------------------------------------------------------------------
+# Module-level helpers
+# ---------------------------------------------------------------------------
+
+_GRIPPER_OPEN = 1.0
+_GRIPPER_CLOSE = -1.0
+_GRIPPER_OFFSET = 0.1  # vertical clearance for the gripper tip
+_APPROACH_STEPS: int = 120  # steps to smoothly interpolate from init EE pos to hover (first orange only)
+
+_REST_POSE_DEG = {
+    "shoulder_pan": 0.0,
+    "shoulder_lift": -100.0,
+    "elbow_flex": 90.0,
+    "wrist_flex": 50.0,
+    "wrist_roll": 0.0,
+    "gripper": -10.0,
+}
+
+
+def _apply_triangle_offset(pos_tensor: torch.Tensor, orange_now: int, radius: float = 0.1) -> torch.Tensor:
+    """Apply an equilateral-triangle offset on the x-y plane."""
+    idx = (orange_now - 1) % 3
+    angle = idx * (2 * math.pi / 3)
+    pos_tensor[:, 0] += radius * math.cos(angle)
+    pos_tensor[:, 1] += radius * math.sin(angle)
+    return pos_tensor
+
+
+# ---------------------------------------------------------------------------
+# State machine
+# ---------------------------------------------------------------------------
+
+
+class PickOrangeStateMachine(StateMachineBase):
+    """State machine for the pick-orange manipulation task.
+
+    The robot cycles through *num_oranges* oranges.  For each orange it
+    executes a fixed sequence of steps that moves the gripper above the
+    orange, grasps it, lifts it, transports it to the plate and places it.
+
+    Args:
+        num_oranges: Total number of oranges to pick and place. Defaults to 3.
+    """
+
+    MAX_STEPS_PER_ORANGE: int = 980
+
+    def __init__(self, num_oranges: int = 3) -> None:
+        self._num_oranges = num_oranges
+        self._step_count: int = 0
+        self._orange_now: int = 1
+        self._episode_done: bool = False
+        self._initial_ee_pos: torch.Tensor | None = None
+        self._rest_ee_pos_world: torch.Tensor | None = None
+        self._rest_joint_pos: torch.Tensor | None = None
+        self._home_start_pos: torch.Tensor | None = None
+
+    # ------------------------------------------------------------------
+    # StateMachineBase interface
+    # ------------------------------------------------------------------
+
+    def setup(self, env) -> None:
+        """FK calibration: drive arm to rest pose and record the EE world position.
+
+        Teleports joints to the SO-101 rest pose, steps the simulation once
+        to propagate kinematics, then reads ``body_pos_w`` to get the EE world
+        position that corresponds to the rest pose.  This position is used as
+        the home target during the return-home phase so that ``task_done()``
+        can verify the arm is within the expected joint-angle tolerances.
+        """
+        robot = env.scene["robot"]
+        joint_names = list(robot.data.joint_names)
+
+        self._rest_joint_pos = torch.zeros(env.num_envs, len(joint_names), device=env.device)
+        for idx, name in enumerate(joint_names):
+            if name in _REST_POSE_DEG:
+                self._rest_joint_pos[:, idx] = _REST_POSE_DEG[name] * torch.pi / 180.0
+
+        robot.write_joint_state_to_sim(
+            position=self._rest_joint_pos,
+            velocity=torch.zeros_like(self._rest_joint_pos),
+        )
+        env.sim.step(render=False)
+        env.scene.update(dt=env.physics_dt)
+        self._rest_ee_pos_world = robot.data.body_pos_w[:, -1, :].clone()
+        print(f"[Calibration] Rest pose EE (world): {self._rest_ee_pos_world[0].cpu().numpy()}")
+
+        # Return scene to default initial state before the recording loop.
+        env.reset()
+
+    def check_success(self, env) -> bool:
+        """Return True if all oranges are on the plate and the arm is at rest."""
+        robot = env.scene["robot"]
+        if self._rest_joint_pos is not None:
+            robot.write_joint_state_to_sim(
+                position=self._rest_joint_pos,
+                velocity=torch.zeros_like(self._rest_joint_pos),
+            )
+            env.scene.update(dt=env.physics_dt)
+
+        success_tensor = task_done(
+            env,
+            oranges_cfg=[
+                SceneEntityCfg("Orange001"),
+                SceneEntityCfg("Orange002"),
+                SceneEntityCfg("Orange003"),
+            ],
+            plate_cfg=SceneEntityCfg("Plate"),
+        )
+        return bool(success_tensor.all().item())
+
+    def pre_step(self, env) -> None:
+        """Blend joint state toward rest pose during the final orange's home phase.
+
+        Only active when ``orange_now == num_oranges`` and ``step_count >= 680``.
+        Writes blended joint positions directly to the sim so the arm smoothly
+        returns home while IK still receives the rest-pose EE target.
+        """
+        if (
+            self._orange_now == self._num_oranges
+            and self._step_count >= 680
+            and self._rest_joint_pos is not None
+        ):
+            robot = env.scene["robot"]
+            if self._step_count == 680:
+                self._home_start_pos = robot.data.joint_pos.clone()
+            if self._home_start_pos is not None:
+                alpha = min((self._step_count - 680) / 299.0, 1.0)
+                blended = self._home_start_pos + (self._rest_joint_pos - self._home_start_pos) * alpha
+                robot.write_joint_state_to_sim(position=blended, velocity=torch.zeros_like(blended))
+
+    def get_action(self, env) -> torch.Tensor:
+        """Compute the action tensor for the current step (8D IK pose target)."""
+        robot = env.scene["robot"]
+        robot.write_joint_damping_to_sim(damping=10.0)
+
+        device = env.device
+        num_envs = env.num_envs
+        step = self._step_count
+
+        orange_pos_w = env.scene[f"Orange00{self._orange_now}"].data.root_pos_w.clone()
+        plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
+        robot_base_pos_w = robot.data.root_pos_w.clone()
+        robot_base_quat_w = robot.data.root_quat_w.clone()
+
+        target_quat_w = quat_from_euler_xyz(
+            torch.tensor(0.0, device=device),
+            torch.tensor(0.0, device=device),
+            torch.tensor(0.0, device=device),
+        ).repeat(num_envs, 1)
+        target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
+
+        if self._orange_now == 1 and step == 0:
+            self._initial_ee_pos = robot.data.body_pos_w[:, -1, :].clone()
+
+        if self._orange_now == 1 and step < _APPROACH_STEPS:
+            target_pos_w, gripper_cmd = self._phase_approach_hover(orange_pos_w, num_envs, device)
+        elif step < 180:
+            target_pos_w, gripper_cmd = self._phase_move_above_orange(orange_pos_w, num_envs, device)
+        elif step < 300:
+            target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
+        elif step < 360:
+            target_pos_w, gripper_cmd = self._phase_lower_to_orange(orange_pos_w, num_envs, device)
+        elif step < 420:
+            target_pos_w, gripper_cmd = self._phase_grasp(orange_pos_w, num_envs, device)
+        elif step < 500:
+            target_pos_w, gripper_cmd = self._phase_lift_orange(orange_pos_w, num_envs, device)
+        elif step < 550:
+            target_pos_w, gripper_cmd = self._phase_move_above_plate(plate_pos_w, num_envs, device)
+        elif step < 600:
+            target_pos_w, gripper_cmd = self._phase_lower_to_plate(plate_pos_w, num_envs, device)
+        elif step < 640:
+            target_pos_w, gripper_cmd = self._phase_release(plate_pos_w, num_envs, device)
+        elif step < 680:
+            target_pos_w, gripper_cmd = self._phase_lift_gripper(plate_pos_w, num_envs, device)
+        else:
+            target_pos_w, gripper_cmd = self._phase_return_home(num_envs, device)
+
+        diff_w = target_pos_w - robot_base_pos_w
+        target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
+        return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
+
+    def advance(self) -> None:
+        """Advance step counter, handle orange transitions, and fast-forward home phase.
+
+        For non-final oranges, the return-home phase (steps 680–979) is skipped
+        without simulation — the arm goes straight to the next orange.
+        """
+        self._step_count += 1
+        if self._step_count >= self.MAX_STEPS_PER_ORANGE:
+            if self._orange_now >= self._num_oranges:
+                self._episode_done = True
+            else:
+                self._orange_now += 1
+                self._step_count = 0
+        elif self._orange_now < self._num_oranges and self._step_count >= 680:
+            # Fast-forward: skip the home phase for intermediate oranges.
+            prev_orange = self._orange_now
+            while self._orange_now == prev_orange and not self._episode_done:
+                self._step_count += 1
+                if self._step_count >= self.MAX_STEPS_PER_ORANGE:
+                    self._orange_now += 1
+                    self._step_count = 0
+
+    def reset(self) -> None:
+        """Reset the state machine to its initial state for a new episode."""
+        self._step_count = 0
+        self._orange_now = 1
+        self._episode_done = False
+        self._initial_ee_pos = None
+        self._home_start_pos = None
+
+    # ------------------------------------------------------------------
+    # Phase methods
+    # ------------------------------------------------------------------
+
+    def _phase_approach_hover(self, orange_pos_w, num_envs, device):
+        hover_target = orange_pos_w.clone()
+        hover_target[:, 0] -= 0.03
+        hover_target[:, 1] -= 0.01
+        hover_target[:, 2] += 0.1 + _GRIPPER_OFFSET
+        alpha = self._step_count / _APPROACH_STEPS
+        if self._initial_ee_pos is not None:
+            target_pos_w = (1.0 - alpha) * self._initial_ee_pos + alpha * hover_target
+        else:
+            target_pos_w = hover_target
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+    def _phase_move_above_orange(self, orange_pos_w, num_envs, device):
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.01
+        target_pos_w[:, 2] += 0.15 + _GRIPPER_OFFSET
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+    def _phase_hover_above_orange(self, orange_pos_w, num_envs, device):
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.01
+        target_pos_w[:, 2] += 0.1 + _GRIPPER_OFFSET
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+    def _phase_lower_to_orange(self, orange_pos_w, num_envs, device):
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.01
+        target_pos_w[:, 2] += _GRIPPER_OFFSET
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+    def _phase_grasp(self, orange_pos_w, num_envs, device):
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.01
+        target_pos_w[:, 2] += _GRIPPER_OFFSET
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+
+    def _phase_lift_orange(self, orange_pos_w, num_envs, device):
+        target_pos_w = orange_pos_w.clone()
+        target_pos_w[:, 0] -= 0.03
+        target_pos_w[:, 1] -= 0.01
+        target_pos_w[:, 2] += 0.25
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+
+    def _phase_move_above_plate(self, plate_pos_w, num_envs, device):
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += 0.25
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+
+    def _phase_lower_to_plate(self, plate_pos_w, num_envs, device):
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
+        _apply_triangle_offset(target_pos_w, self._orange_now)
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
+
+    def _phase_release(self, plate_pos_w, num_envs, device):
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
+        _apply_triangle_offset(target_pos_w, self._orange_now)
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+    def _phase_lift_gripper(self, plate_pos_w, num_envs, device):
+        target_pos_w = plate_pos_w.clone()
+        target_pos_w[:, 2] += 0.2
+        _apply_triangle_offset(target_pos_w, self._orange_now)
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+    def _phase_return_home(self, num_envs, device):
+        if self._rest_ee_pos_world is not None:
+            target_pos_w = self._rest_ee_pos_world.clone()
+        elif self._initial_ee_pos is not None:
+            target_pos_w = self._initial_ee_pos.clone()
+        else:
+            target_pos_w = torch.zeros(num_envs, 3, device=device)
+        return target_pos_w, torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def is_episode_done(self) -> bool:
+        return self._episode_done
+
+    @property
+    def orange_now(self) -> int:
+        return self._orange_now
+
+    @property
+    def step_count(self) -> int:
+        return self._step_count
diff --git a/source/leisaac/leisaac/state_machine/__init__.py b/source/leisaac/leisaac/state_machine/__init__.py
deleted file mode 100644
index b53ddf16..00000000
--- a/source/leisaac/leisaac/state_machine/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""State machine implementations for LeIsaac tasks."""
-
-from .base import StateMachineBase
-from .fold_cloth import FoldClothStateMachine
-from .pick_orange import PickOrangeStateMachine
diff --git a/source/leisaac/leisaac/state_machine/pick_orange.py b/source/leisaac/leisaac/state_machine/pick_orange.py
deleted file mode 100644
index f5aea210..00000000
--- a/source/leisaac/leisaac/state_machine/pick_orange.py
+++ /dev/null
@@ -1,458 +0,0 @@
-"""State machine for the pick-orange task."""
-
-import math
-
-import torch
-from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
-
-from .base import StateMachineBase
-
-# ---------------------------------------------------------------------------
-# Module-level helpers
-# ---------------------------------------------------------------------------
-
-_GRIPPER_OPEN = 1.0
-_GRIPPER_CLOSE = -1.0
-_GRIPPER_OFFSET = 0.1  # vertical clearance for the gripper tip
-_APPROACH_STEPS: int = 120  # steps to smoothly interpolate from init EE pos to hover (first orange only)
-
-
-def _apply_triangle_offset(pos_tensor: torch.Tensor, orange_now: int, radius: float = 0.1) -> torch.Tensor:
-    """Apply an equilateral-triangle offset on the x-y plane.
-
-    Distributes up to three target positions evenly around a circle so that
-    oranges placed on the plate do not overlap.
-
-    Args:
-        pos_tensor: Position tensor of shape ``(num_envs, 3)`` in world coordinates.
-            Modified **in-place**.
-        orange_now: 1-based index of the current orange (1, 2, or 3).
-        radius: Distance from the plate centre to each triangle vertex in metres.
-
-    Returns:
-        The modified ``pos_tensor`` (same object, for convenience).
-    """
-    idx = (orange_now - 1) % 3
-    angle = idx * (2 * math.pi / 3)
-    pos_tensor[:, 0] += radius * math.cos(angle)
-    pos_tensor[:, 1] += radius * math.sin(angle)
-    return pos_tensor
-
-
-# ---------------------------------------------------------------------------
-# State machine
-# ---------------------------------------------------------------------------
-
-
-class PickOrangeStateMachine(StateMachineBase):
-    """State machine for the pick-orange manipulation task.
-
-    The robot cycles through *num_oranges* oranges.  For each orange it
-    executes a fixed sequence of 420 steps that moves the gripper above the
-    orange, grasps it, lifts it, transports it to the plate and places it at
-    one vertex of an equilateral triangle arrangement.
-
-    Args:
-        num_oranges: Total number of oranges to pick and place. Defaults to 3.
-
-    Attributes:
-        MAX_STEPS_PER_ORANGE (int): Number of simulation steps per orange cycle.
-
-    Example::
-
-        sm = PickOrangeStateMachine(num_oranges=3)
-        env.reset()
-        while not sm.is_episode_done:
-            actions = sm.get_action(env)
-            env.step(actions)
-            sm.advance()
-        success = task_done(env, ...)
-        sm.reset()
-    """
-
-    MAX_STEPS_PER_ORANGE: int = 980
-
-    def __init__(self, num_oranges: int = 3, rest_ee_pos_world: torch.Tensor | None = None) -> None:
-        self._num_oranges = num_oranges
-        self._step_count: int = 0
-        self._orange_now: int = 1
-        self._episode_done: bool = False
-        self._initial_ee_pos: torch.Tensor | None = None
-        self._rest_ee_pos_world = rest_ee_pos_world  # (num_envs, 3) world frame, computed by FK calibration
-
-    # ------------------------------------------------------------------
-    # StateMachineBase interface
-    # ------------------------------------------------------------------
-
-    def get_action(self, env) -> torch.Tensor:
-        """Compute the action tensor for the current step.
-
-        Reads object poses from ``env.scene``, dispatches to the appropriate
-        phase method, then converts the world-frame target position to the
-        robot base frame and assembles the full action tensor.
-
-        The scene is expected to contain:
-        - ``"Orange00{orange_now}"`` – the current orange rigid object
-        - ``"Plate"`` – the target plate rigid object
-        - ``"robot"`` – the manipulator
-
-        Args:
-            env: The simulation environment.  Must expose ``env.device``,
-                ``env.num_envs``, and ``env.scene``.
-
-        Returns:
-            Action tensor of shape ``(num_envs, 8)`` with layout
-            ``[pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w, gripper]``
-            expressed in the robot base frame.
-        """
-        robot = env.scene["robot"]
-        robot.write_joint_damping_to_sim(damping=10.0)
-
-        device = env.device
-        num_envs = env.num_envs
-        step = self._step_count
-
-        orange_pos_w = env.scene[f"Orange00{self._orange_now}"].data.root_pos_w.clone()
-        plate_pos_w = env.scene["Plate"].data.root_pos_w.clone()
-        robot_base_pos_w = env.scene["robot"].data.root_pos_w.clone()
-        robot_base_quat_w = env.scene["robot"].data.root_quat_w.clone()
-
-        # Fixed end-effector orientation (no pitch/roll)
-        target_quat_w = quat_from_euler_xyz(
-            torch.tensor(0.0, device=device),
-            torch.tensor(0.0, device=device),
-            torch.tensor(0.0, device=device),
-        ).repeat(num_envs, 1)
-        target_quat = quat_mul(quat_inv(robot_base_quat_w), target_quat_w)
-
-        # Capture initial EE position from robot body data at episode start (step 0, orange 1).
-        # body_pos_w is always valid after env.reset() and does not suffer from stale sensor data.
-        if self._orange_now == 1 and step == 0:
-            self._initial_ee_pos = env.scene["robot"].data.body_pos_w[:, -1, :].clone()
-            # 或者直接设置 drive params（依据你 robot API）
-        # --- Phase dispatch ---
-        if self._orange_now == 1 and step < _APPROACH_STEPS:
-            target_pos_w, gripper_cmd = self._phase_approach_hover(orange_pos_w, num_envs, device)
-        elif step < 180:
-            target_pos_w, gripper_cmd = self._phase_move_above_orange(orange_pos_w, num_envs, device)
-        elif step < 300:
-            target_pos_w, gripper_cmd = self._phase_hover_above_orange(orange_pos_w, num_envs, device)
-        elif step < 360:
-            target_pos_w, gripper_cmd = self._phase_lower_to_orange(orange_pos_w, num_envs, device)
-        elif step < 420:
-            target_pos_w, gripper_cmd = self._phase_grasp(orange_pos_w, num_envs, device)
-        elif step < 500:
-            target_pos_w, gripper_cmd = self._phase_lift_orange(orange_pos_w, num_envs, device)
-        elif step < 550:
-            target_pos_w, gripper_cmd = self._phase_move_above_plate(plate_pos_w, num_envs, device)
-        elif step < 600:
-            target_pos_w, gripper_cmd = self._phase_lower_to_plate(plate_pos_w, num_envs, device)
-        elif step < 640:
-            target_pos_w, gripper_cmd = self._phase_release(plate_pos_w, num_envs, device)
-        elif step < 680:
-            target_pos_w, gripper_cmd = self._phase_lift_gripper(plate_pos_w, num_envs, device)
-        else:
-            target_pos_w, gripper_cmd = self._phase_return_home(num_envs, device)
-
-        diff_w = target_pos_w - robot_base_pos_w
-        target_pos_local = quat_apply(quat_inv(robot_base_quat_w), diff_w)
-
-        return torch.cat([target_pos_local, target_quat, gripper_cmd], dim=-1)
-
-    # ------------------------------------------------------------------
-    # Phase methods  (steps 0-419, one method per phase)
-    # ------------------------------------------------------------------
-
-    def _phase_approach_hover(
-        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 0–_APPROACH_STEPS-1 of the first orange: smoothly approach hover position.
-
-        Linearly interpolates the IK target from the robot's initial end-effector
-        position (captured at step 0 from ``robot.data.body_pos_w``) to the hover
-        position above the orange.  This avoids the sudden large IK error that
-        occurs when jumping directly from the zero-joint configuration to the hover
-        target, which caused the arm to visually drop and oscillate.
-
-        Only active for the first orange; subsequent oranges proceed directly to the
-        hover phase because the arm is already in a reasonable working configuration.
-
-        Args:
-            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – interpolated world position and open gripper command.
-        """
-        hover_target = orange_pos_w.clone()
-        hover_target[:, 0] -= 0.03
-        hover_target[:, 1] -= 0.01
-        hover_target[:, 2] += 0.1 + _GRIPPER_OFFSET
-
-        alpha = self._step_count / _APPROACH_STEPS  # 0.0 at step 0, 1.0 at step _APPROACH_STEPS
-        if self._initial_ee_pos is not None:
-            target_pos_w = (1.0 - alpha) * self._initial_ee_pos + alpha * hover_target
-        else:
-            target_pos_w = hover_target
-
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_move_above_orange(
-        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 120–179 (orange 1) / 0–179 (oranges 2-3): move to a high position directly above the orange.
-
-        This phase brings the gripper to a clear altitude above the orange before
-        the finer hover and lower phases.  It is the first phase executed for
-        oranges 2 and 3 (where the robot transits from the plate side), and runs
-        immediately after ``_phase_approach_hover`` for the first orange.
-
-        Args:
-            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and open gripper command.
-        """
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.01
-        target_pos_w[:, 2] += 0.15 + _GRIPPER_OFFSET
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_hover_above_orange(
-        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 180–299: hold the gripper at a hover position above the orange.
-
-        Args:
-            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.01
-        target_pos_w[:, 2] += 0.1 + _GRIPPER_OFFSET
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_lower_to_orange(
-        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 300–359: lower the gripper down to grasp height over the orange.
-
-        Args:
-            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.01
-        target_pos_w[:, 2] += _GRIPPER_OFFSET
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_grasp(
-        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 360–419: close the gripper to grasp the orange.
-
-        Args:
-            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.01
-        target_pos_w[:, 2] += _GRIPPER_OFFSET
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_lift_orange(
-        self, orange_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 420–499: lift the grasped orange upward.
-
-        Args:
-            orange_pos_w: Orange position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = orange_pos_w.clone()
-        target_pos_w[:, 0] -= 0.03
-        target_pos_w[:, 1] -= 0.01
-        target_pos_w[:, 2] += 0.25
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_move_above_plate(
-        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 220–319: transport the orange to a hover position above the plate.
-
-        Args:
-            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += 0.25
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_lower_to_plate(
-        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 320–349: lower the orange to the placement height on the plate.
-
-        The target position is offset to one vertex of an equilateral triangle
-        so that successive oranges do not overlap.
-
-        Args:
-            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
-        _apply_triangle_offset(target_pos_w, self._orange_now)
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_CLOSE, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_release(
-        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 350–379: open the gripper to release the orange onto the plate.
-
-        Args:
-            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += _GRIPPER_OFFSET + 0.1
-        _apply_triangle_offset(target_pos_w, self._orange_now)
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_lift_gripper(
-        self, plate_pos_w: torch.Tensor, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 380–419: lift the gripper clear of the plate after releasing.
-
-        Args:
-            plate_pos_w: Plate position in world frame, shape ``(num_envs, 3)``.
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – target world position and gripper command.
-        """
-        target_pos_w = plate_pos_w.clone()
-        target_pos_w[:, 2] += 0.2
-        _apply_triangle_offset(target_pos_w, self._orange_now)
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return target_pos_w, gripper_cmd
-
-    def _phase_return_home(
-        self, num_envs: int, device: str
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Steps 620–919: return the gripper to the robot's rest pose end-effector position.
-
-        Targets the EE position that was computed by FK calibration at startup
-        (``rest_ee_pos_world``), which corresponds to the SO-101 rest pose joints
-        (shoulder_lift ≈ -100°, elbow_flex ≈ 90°, wrist_flex ≈ 50°).  The
-        incremental IK will drive the arm toward this position over the 300 steps
-        (~5 s at 60 Hz), giving it enough time to converge within the ±30° joint
-        tolerance required by ``task_done``.
-
-        Falls back to ``_initial_ee_pos`` (zero-joint EE) if calibration was
-        not performed.
-
-        Args:
-            num_envs: Number of parallel environments.
-            device: Torch device string.
-
-        Returns:
-            ``(target_pos_w, gripper_cmd)`` – rest-pose world position and open gripper command.
-        """
-        if self._rest_ee_pos_world is not None:
-            target_pos_w = self._rest_ee_pos_world.clone()
-        elif self._initial_ee_pos is not None:
-            target_pos_w = self._initial_ee_pos.clone()
-        else:
-            target_pos_w = torch.zeros(num_envs, 3, device=device)
-        gripper_cmd = torch.full((num_envs, 1), _GRIPPER_OPEN, device=device)
-        return target_pos_w, gripper_cmd
-
-    def advance(self) -> None:
-        """Advance the internal step counter and manage orange transitions.
-
-        When the current orange's cycle completes (``step_count`` reaches
-        :attr:`MAX_STEPS_PER_ORANGE`), the machine either:
-
-        - increments ``orange_now`` and resets ``step_count`` if more oranges
-          remain, or
-        - sets :attr:`is_episode_done` to ``True`` if all oranges are done.
-        """
-        self._step_count += 1
-        if self._step_count >= self.MAX_STEPS_PER_ORANGE:
-            if self._orange_now >= self._num_oranges:
-                self._episode_done = True
-            else:
-                self._orange_now += 1
-                self._step_count = 0
-
-    def reset(self) -> None:
-        """Reset the state machine to its initial state for a new episode."""
-        self._step_count = 0
-        self._orange_now = 1
-        self._episode_done = False
-        self._initial_ee_pos = None
-
-    # ------------------------------------------------------------------
-    # Properties
-    # ------------------------------------------------------------------
-
-    @property
-    def is_episode_done(self) -> bool:
-        """``True`` once all oranges have been picked and placed."""
-        return self._episode_done
-
-    @property
-    def orange_now(self) -> int:
-        """1-based index of the orange currently being handled."""
-        return self._orange_now
-
-    @property
-    def step_count(self) -> int:
-        """Number of steps elapsed within the current orange's cycle."""
-        return self._step_count

From f26debff558c71667602a4efde1161c31305831c Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 5 Mar 2026 21:17:56 -0500
Subject: [PATCH 47/68] Change format

---
 scripts/datagen/state_machine/generate.py | 210 +++++++++++++---------
 1 file changed, 125 insertions(+), 85 deletions(-)

diff --git a/scripts/datagen/state_machine/generate.py b/scripts/datagen/state_machine/generate.py
index a7e4cf60..f2eac32f 100644
--- a/scripts/datagen/state_machine/generate.py
+++ b/scripts/datagen/state_machine/generate.py
@@ -9,33 +9,46 @@
         --record --dataset_file ./datasets/pick_orange.hdf5 --num_demos 50
 """
 
-import argparse
+"""Launch Isaac Sim Simulator first."""
 import multiprocessing
-import os
-import time
 
 if multiprocessing.get_start_method() != "spawn":
     multiprocessing.set_start_method("spawn", force=True)
 
+import argparse
+import os
+import time
+
 from isaaclab.app import AppLauncher
 
+# add argparse arguments
 parser = argparse.ArgumentParser(description="State machine data generation for LeIsaac tasks.")
-parser.add_argument("--num_envs", type=int, default=1)
-parser.add_argument("--task", type=str, required=True)
-parser.add_argument("--seed", type=int, default=None)
-parser.add_argument("--record", action="store_true")
-parser.add_argument("--step_hz", type=int, default=60)
-parser.add_argument("--dataset_file", type=str, default="./datasets/dataset.hdf5")
-parser.add_argument("--resume", action="store_true")
-parser.add_argument("--num_demos", type=int, default=1)
-parser.add_argument("--quality", action="store_true")
-parser.add_argument("--use_lerobot_recorder", action="store_true")
-parser.add_argument("--lerobot_dataset_repo_id", type=str, default=None)
-parser.add_argument("--lerobot_dataset_fps", type=int, default=30)
+parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, required=True, help="Name of the task.")
+parser.add_argument("--seed", type=int, default=None, help="Seed for the environment.")
+parser.add_argument("--record", action="store_true", help="Whether to enable record function.")
+parser.add_argument("--step_hz", type=int, default=60, help="Environment stepping rate in Hz.")
+parser.add_argument(
+    "--dataset_file", type=str, default="./datasets/dataset.hdf5", help="File path to export recorded demos."
+)
+parser.add_argument("--resume", action="store_true", help="Whether to resume recording in the existing dataset file.")
+parser.add_argument(
+    "--num_demos", type=int, default=1, help="Number of demonstrations to record. Set to 0 for infinite."
+)
+parser.add_argument("--quality", action="store_true", help="Whether to enable quality render mode.")
+parser.add_argument("--use_lerobot_recorder", action="store_true", help="Whether to use lerobot recorder.")
+parser.add_argument("--lerobot_dataset_repo_id", type=str, default=None, help="Lerobot Dataset repository ID.")
+parser.add_argument("--lerobot_dataset_fps", type=int, default=30, help="Lerobot Dataset frames per second.")
+
+# append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
 args_cli = parser.parse_args()
 
-app_launcher = AppLauncher(vars(args_cli))
+app_launcher_args = vars(args_cli)
+
+# launch omniverse app
+app_launcher = AppLauncher(app_launcher_args)
 simulation_app = app_launcher.app
 
 import gymnasium as gym
@@ -59,18 +72,28 @@
 
 
 class RateLimiter:
+    """Convenience class for enforcing rates in loops."""
+
     def __init__(self, hz):
+        """
+        Args:
+            hz (int): frequency to enforce
+        """
         self.hz = hz
         self.last_time = time.time()
         self.sleep_duration = 1.0 / hz
         self.render_period = min(0.0166, self.sleep_duration)
 
     def sleep(self, env):
+        """Attempt to sleep at the specified rate in hz."""
         next_wakeup_time = self.last_time + self.sleep_duration
         while time.time() < next_wakeup_time:
             time.sleep(self.render_period)
             env.sim.render()
+
         self.last_time = self.last_time + self.sleep_duration
+
+        # detect time jumping forwards (e.g. loop is too slow)
         if self.last_time < time.time():
             while self.last_time < time.time():
                 self.last_time += self.sleep_duration
@@ -91,10 +114,10 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.termination_manager.compute()
     elif hasattr(env, "_get_dones"):
         env.cfg.return_success_status = success
-    return False
 
 
 def main():
+    """Run a state machine in a LeIsaac manipulation environment."""
     task_name = args_cli.task
     if task_name not in TASK_REGISTRY:
         raise ValueError(
@@ -103,8 +126,10 @@ def main():
         )
     SMClass, device = TASK_REGISTRY[task_name]
 
+    # get directory path and file name (without extension) from cli arguments
     output_dir = os.path.dirname(args_cli.dataset_file)
     output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
+    # create directory if it does not exist
     if output_dir and not os.path.exists(output_dir):
         os.makedirs(output_dir)
 
@@ -112,16 +137,19 @@ def main():
     env_cfg.use_teleop_device(device)
     env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
 
+    # timeout and terminate preprocess
     is_direct_env = "Direct" in task_name
     if is_direct_env:
         env_cfg.never_time_out = True
         env_cfg.auto_terminate = True
     else:
+        # modify configuration
         if hasattr(env_cfg.terminations, "time_out"):
             env_cfg.terminations.time_out = None
         if hasattr(env_cfg.terminations, "success"):
             env_cfg.terminations.success = None
 
+    # recorder preprocess & manual success terminate preprocess
     if args_cli.record:
         if args_cli.use_lerobot_recorder:
             if args_cli.resume:
@@ -131,12 +159,14 @@ def main():
         else:
             if args_cli.resume:
                 env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
-                assert os.path.exists(args_cli.dataset_file), \
-                    "Dataset file does not exist. Do not use '--resume' when recording a new dataset."
+                assert os.path.exists(
+                    args_cli.dataset_file
+                ), "the dataset file does not exist, please don't use '--resume' if you want to record a new dataset"
             else:
                 env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
-                assert not os.path.exists(args_cli.dataset_file), \
-                    "Dataset file already exists. Use '--resume' to resume recording."
+                assert not os.path.exists(
+                    args_cli.dataset_file
+                ), "the dataset file already exists, please use '--resume' to resume recording"
         env_cfg.recorders.dataset_export_dir_path = output_dir
         env_cfg.recorders.dataset_filename = output_file_name
         if is_direct_env:
@@ -150,8 +180,10 @@ def main():
     else:
         env_cfg.recorders = None
 
+    # create environment
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
 
+    # disable gravity for every robot link prim
     import omni.usd
     from pxr import PhysxSchema, UsdPhysics
 
@@ -160,11 +192,16 @@ def main():
         if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
             PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
 
+    # replace the original recorder manager with the streaming recorder manager or lerobot recorder manager
     if args_cli.record:
         del env.recorder_manager
         if args_cli.use_lerobot_recorder:
-            from leisaac.enhance.datasets.lerobot_dataset_handler import LeRobotDatasetCfg
-            from leisaac.enhance.managers.lerobot_recorder_manager import LeRobotRecorderManager
+            from leisaac.enhance.datasets.lerobot_dataset_handler import (
+                LeRobotDatasetCfg,
+            )
+            from leisaac.enhance.managers.lerobot_recorder_manager import (
+                LeRobotRecorderManager,
+            )
 
             dataset_cfg = LeRobotDatasetCfg(
                 repo_id=args_cli.lerobot_dataset_repo_id,
@@ -178,11 +215,12 @@ def main():
 
     rate_limiter = RateLimiter(args_cli.step_hz)
 
+    # reset environment
     if hasattr(env, "initialize"):
         env.initialize()
     env.reset()
 
-    # One-time state machine setup (e.g. FK calibration)
+    # one-time state machine setup (e.g. FK calibration)
     sm = SMClass()
     sm.setup(env)
     sm.reset()
@@ -190,81 +228,83 @@ def main():
     resume_recorded_demo_count = 0
     if args_cli.record and args_cli.resume:
         resume_recorded_demo_count = env.recorder_manager._dataset_file_handler.get_num_episodes()
-        print(f"Resuming recording with {resume_recorded_demo_count} existing demonstrations.")
+        print(f"Resume recording from existing dataset file with {resume_recorded_demo_count} demonstrations.")
     current_recorded_demo_count = resume_recorded_demo_count
 
     start_record_state = False
 
     while simulation_app.is_running() and not simulation_app.is_exiting():
-        if env.cfg.dynamic_reset_gripper_effort_limit:
-            dynamic_reset_gripper_effort_limit_sim(env, device)
-
-        if sm.is_episode_done:
-            try:
-                print("Episode complete. Checking task success...")
-                success = sm.check_success(env)
-                print("Task success:", success)
-            except Exception as e:
-                print("Success check failed:", e)
-                success = False
-
-            if start_record_state:
-                if args_cli.record:
-                    print("Stop recording.")
-                start_record_state = False
-
-            if args_cli.record and success:
-                print("✅ Task succeeded. Marking as SUCCESS.")
-                auto_terminate(env, True)
-                current_recorded_demo_count += 1
+        # run everything in inference mode
+        with torch.inference_mode():
+            if env.cfg.dynamic_reset_gripper_effort_limit:
+                dynamic_reset_gripper_effort_limit_sim(env, device)
+
+            if sm.is_episode_done:
+                try:
+                    success = sm.check_success(env)
+                except Exception as e:
+                    print("Success check failed:", e)
+                    success = False
+
+                if start_record_state:
+                    if args_cli.record:
+                        print("Stop Recording!!!")
+                    start_record_state = False
+
+                if args_cli.record and success:
+                    auto_terminate(env, True)
+                    current_recorded_demo_count += 1
+                else:
+                    auto_terminate(env, False)
+
+                # print out the current demo count if it has changed
+                if (
+                    args_cli.record
+                    and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                    > current_recorded_demo_count
+                ):
+                    current_recorded_demo_count = (
+                        env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                    )
+                    print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
+
+                if (
+                    args_cli.record
+                    and args_cli.num_demos > 0
+                    and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                    >= args_cli.num_demos
+                ):
+                    print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                    break
+
+                env.reset()
+                sm.reset()
+
+                if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
+                    print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                    break
             else:
-                print("❌ Task failed. Marking as FAILURE.")
-                auto_terminate(env, False)
-
-            if (
-                args_cli.record
-                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                > current_recorded_demo_count
-            ):
-                current_recorded_demo_count = (
-                    env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                )
-                print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
-
-            if (
-                args_cli.record
-                and args_cli.num_demos > 0
-                and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                >= args_cli.num_demos
-            ):
-                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting.")
-                break
-
-            env.reset()
-            sm.reset()
-
-            if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
-                print(f"All {args_cli.num_demos} demonstrations recorded. Exiting.")
-                break
-        else:
-            if not start_record_state:
-                if args_cli.record:
-                    print("Start recording.")
-                start_record_state = True
+                if not start_record_state:
+                    if args_cli.record:
+                        print("Start Recording!!!")
+                    start_record_state = True
 
-            sm.pre_step(env)
-            actions = sm.get_action(env)
-            env.step(actions)
-            sm.advance()
+                sm.pre_step(env)
+                actions = sm.get_action(env)
+                env.step(actions)
+                sm.advance()
 
-        rate_limiter.sleep(env)
+            if rate_limiter:
+                rate_limiter.sleep(env)
 
+    # finalize the recorder manager
     if args_cli.record and hasattr(env.recorder_manager, "finalize"):
         env.recorder_manager.finalize()
-
+    # close the simulator
     env.close()
     simulation_app.close()
 
 
 if __name__ == "__main__":
+    # run the main function
     main()

From f4d40b763da054bfebe0867021fe1f918b0774de Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 5 Mar 2026 21:24:55 -0500
Subject: [PATCH 48/68] Refactor

---
 scripts/datagen/state_machine/generate.py | 27 +----------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/scripts/datagen/state_machine/generate.py b/scripts/datagen/state_machine/generate.py
index f2eac32f..3c84e986 100644
--- a/scripts/datagen/state_machine/generate.py
+++ b/scripts/datagen/state_machine/generate.py
@@ -9,7 +9,6 @@
         --record --dataset_file ./datasets/pick_orange.hdf5 --num_demos 50
 """
 
-"""Launch Isaac Sim Simulator first."""
 import multiprocessing
 
 if multiprocessing.get_start_method() != "spawn":
@@ -21,7 +20,6 @@
 
 from isaaclab.app import AppLauncher
 
-# add argparse arguments
 parser = argparse.ArgumentParser(description="State machine data generation for LeIsaac tasks.")
 parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.")
 parser.add_argument("--task", type=str, required=True, help="Name of the task.")
@@ -40,14 +38,10 @@
 parser.add_argument("--lerobot_dataset_repo_id", type=str, default=None, help="Lerobot Dataset repository ID.")
 parser.add_argument("--lerobot_dataset_fps", type=int, default=30, help="Lerobot Dataset frames per second.")
 
-# append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
-# parse the arguments
 args_cli = parser.parse_args()
 
 app_launcher_args = vars(args_cli)
-
-# launch omniverse app
 app_launcher = AppLauncher(app_launcher_args)
 simulation_app = app_launcher.app
 
@@ -63,9 +57,7 @@
 
 import leisaac.tasks  # noqa: F401
 
-# ---------------------------------------------------------------------------
-# Task registry: maps gym task id → (StateMachineClass, device_type)
-# ---------------------------------------------------------------------------
+# Maps gym task id → (StateMachineClass, device_type)
 TASK_REGISTRY = {
     "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
 }
@@ -75,10 +67,6 @@ class RateLimiter:
     """Convenience class for enforcing rates in loops."""
 
     def __init__(self, hz):
-        """
-        Args:
-            hz (int): frequency to enforce
-        """
         self.hz = hz
         self.last_time = time.time()
         self.sleep_duration = 1.0 / hz
@@ -126,10 +114,8 @@ def main():
         )
     SMClass, device = TASK_REGISTRY[task_name]
 
-    # get directory path and file name (without extension) from cli arguments
     output_dir = os.path.dirname(args_cli.dataset_file)
     output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
-    # create directory if it does not exist
     if output_dir and not os.path.exists(output_dir):
         os.makedirs(output_dir)
 
@@ -137,19 +123,16 @@ def main():
     env_cfg.use_teleop_device(device)
     env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
 
-    # timeout and terminate preprocess
     is_direct_env = "Direct" in task_name
     if is_direct_env:
         env_cfg.never_time_out = True
         env_cfg.auto_terminate = True
     else:
-        # modify configuration
         if hasattr(env_cfg.terminations, "time_out"):
             env_cfg.terminations.time_out = None
         if hasattr(env_cfg.terminations, "success"):
             env_cfg.terminations.success = None
 
-    # recorder preprocess & manual success terminate preprocess
     if args_cli.record:
         if args_cli.use_lerobot_recorder:
             if args_cli.resume:
@@ -180,7 +163,6 @@ def main():
     else:
         env_cfg.recorders = None
 
-    # create environment
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
 
     # disable gravity for every robot link prim
@@ -192,7 +174,6 @@ def main():
         if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
             PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
 
-    # replace the original recorder manager with the streaming recorder manager or lerobot recorder manager
     if args_cli.record:
         del env.recorder_manager
         if args_cli.use_lerobot_recorder:
@@ -215,7 +196,6 @@ def main():
 
     rate_limiter = RateLimiter(args_cli.step_hz)
 
-    # reset environment
     if hasattr(env, "initialize"):
         env.initialize()
     env.reset()
@@ -234,7 +214,6 @@ def main():
     start_record_state = False
 
     while simulation_app.is_running() and not simulation_app.is_exiting():
-        # run everything in inference mode
         with torch.inference_mode():
             if env.cfg.dynamic_reset_gripper_effort_limit:
                 dynamic_reset_gripper_effort_limit_sim(env, device)
@@ -257,7 +236,6 @@ def main():
                 else:
                     auto_terminate(env, False)
 
-                # print out the current demo count if it has changed
                 if (
                     args_cli.record
                     and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
@@ -297,14 +275,11 @@ def main():
             if rate_limiter:
                 rate_limiter.sleep(env)
 
-    # finalize the recorder manager
     if args_cli.record and hasattr(env.recorder_manager, "finalize"):
         env.recorder_manager.finalize()
-    # close the simulator
     env.close()
     simulation_app.close()
 
 
 if __name__ == "__main__":
-    # run the main function
     main()

From 88d5c41346a2584d4cf2eb4eb4c174c8ab84014e Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 5 Mar 2026 21:50:08 -0500
Subject: [PATCH 49/68] Change Isaaclab version==2.3.2

---
 dependencies/IsaacLab | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dependencies/IsaacLab b/dependencies/IsaacLab
index 0f030aa6..37ddf626 160000
--- a/dependencies/IsaacLab
+++ b/dependencies/IsaacLab
@@ -1 +1 @@
-Subproject commit 0f030aa6bdd968ae387ac4b74289748437586618
+Subproject commit 37ddf626871758333d6ed89cf64ad702aef127d0

From 87ced234460beb788ec1fe0fc6a6e27dd761eb61 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 5 Mar 2026 21:57:59 -0500
Subject: [PATCH 50/68] Change Isaaclab version==2.3.0

---
 dependencies/IsaacLab | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dependencies/IsaacLab b/dependencies/IsaacLab
index 37ddf626..3c6e67bb 160000
--- a/dependencies/IsaacLab
+++ b/dependencies/IsaacLab
@@ -1 +1 @@
-Subproject commit 37ddf626871758333d6ed89cf64ad702aef127d0
+Subproject commit 3c6e67bb5c7ada942a6d1884ab69338f57596f77

From 65484c55563a65e239d286a92fce6fd4aaf45261 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 5 Mar 2026 23:17:32 -0500
Subject: [PATCH 51/68] Change documents.

---
 STATE_MACHINE_README.md                       | 197 -----------------
 STATE_MACHINE_README_CN.md                    | 199 ------------------
 .../docs/getting_started/state_machine.md     | 111 ++++++++++
 docs/docs/docs/introduction.md                |   1 +
 docs/sidebars.js                              |   6 +
 scripts/datagen/state_machine/generate.py     | 154 ++++++++------
 .../datagen/state_machine/pick_orange.py      |   1 -
 7 files changed, 207 insertions(+), 462 deletions(-)
 delete mode 100644 STATE_MACHINE_README.md
 delete mode 100644 STATE_MACHINE_README_CN.md
 create mode 100644 docs/docs/docs/getting_started/state_machine.md

diff --git a/STATE_MACHINE_README.md b/STATE_MACHINE_README.md
deleted file mode 100644
index 6ebbc9c4..00000000
--- a/STATE_MACHINE_README.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# State Machine: Recording & Replay Guide
-
-## Overview
-
-The state machine module provides automated data collection for manipulation tasks without human teleoperation. It runs a scripted policy, records demonstrations to HDF5 datasets, and supports replaying those demonstrations.
-
-```
-scripts/datagen/state_machine/
-├── generate.py     # Unified runner script: select task via --task argument
-└── replay.py       # Replay script for state-machine demonstrations
-
-source/leisaac/leisaac/datagen/state_machine/
-├── base.py         # StateMachineBase abstract class
-└── pick_orange.py  # PickOrangeStateMachine
-```
-
----
-
-## Recording
-
-### Quick Start
-
-```bash
-# Single-arm pick-orange (3 oranges → plate)
-python scripts/datagen/state_machine/generate.py \
-    --task LeIsaac-SO101-PickOrange-v0 \
-    --num_envs 1 \
-    --device cuda \
-    --enable_cameras \
-    --record \
-    --dataset_file ./datasets/pick_orange.hdf5 \
-    --num_demos 1
-```
-
-> **Note — Grasp Success Rate:** In the default scene configuration the oranges are placed relatively far from the robot's end-effector, which results in a low grasp success rate. Adjusting the orange spawn positions in the task's environment config file (e.g. moving them closer to the robot base) significantly improves the success rate.
-
-### How It Works
-
-```
-generate.py
-  └── TASK_REGISTRY[task] → (SMClass, device)   # select SM from registry
-  └── gym.make(task, cfg=env_cfg)                # create env
-       └── env_cfg.use_teleop_device(device)     # configure IK action manager
-  └── sm = SMClass()
-  └── sm.setup(env)                              # FK calibration, etc.
-  └── Main loop:
-       sm.pre_step(env)                          # e.g. home-blend joint write
-       actions = sm.get_action(env)              # state machine computes 8D IK action
-       env.step(actions)                         # steps sim + recorder captures data
-       sm.advance()                              # advance state machine (incl. fast-forward)
-```
-
-### Adding a New Task
-
-1. Implement a `StateMachineBase` subclass in `source/leisaac/leisaac/datagen/state_machine/`.
-2. Register it in `TASK_REGISTRY` inside `generate.py`:
-   ```python
-   TASK_REGISTRY = {
-       "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
-       "LeIsaac-MY-NewTask-v0":       (MyNewStateMachine,      "so101_state_machine"),
-   }
-   ```
-
-### Action Format
-
-| Device | Dims | Layout |
-|---|---|---|
-| `so101_state_machine` | 8D | `[pos(3), quat(4), gripper(1)]` in robot base frame |
-| `bi_so101_state_machine` | 16D | `[left_pos(3), left_quat(4), left_grip(1), right_pos(3), right_quat(4), right_grip(1)]` |
-
-IK targets are expressed in the **robot base local frame**, not world frame:
-```python
-diff_w = target_pos_w - base_pos_w
-target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
-```
-
----
-
-## Dataset Structure
-
-Episodes are stored in HDF5 format under the `data/` group:
-
-```
-data/
-├── demo_0      # EMPTY — artifact of the initial env.reset() at startup
-│   └── initial_state        # only field; num_samples=0; no actions
-├── demo_1      # First real demonstration
-│   ├── actions              # (T, 8) — IK pose targets passed to env.step()
-│   ├── processed_actions    # (T, 6) — joint targets computed by IK solver
-│   ├── initial_state        # scene state at episode start (for reset_to)
-│   ├── states/              # per-step articulation/sensor states
-│   └── obs/                 # per-step observations (images, joint_pos, etc.)
-├── demo_2      # Second real demonstration
-...
-```
-
-**Important:** `demo_0` is always empty. The **K-th recorded demonstration** is stored as `demo_K`.
-
-When replaying, use `--select_episodes K` to load `demo_K`:
-```bash
---select_episodes 1   # → demo_1, the first real episode
---select_episodes 2   # → demo_2, the second real episode
-```
-
----
-
-## Replay
-
-### Quick Start
-
-```bash
-python scripts/datagen/state_machine/replay.py \
-    --task LeIsaac-SO101-PickOrange-v0 \
-    --dataset_file ./datasets/pick_orange.hdf5 \
-    --task_type so101_state_machine \
-    --select_episodes 1 \
-    --device cuda \
-    --enable_cameras \
-    --replay_mode action
-```
-
-### Replay Modes
-
-| Mode | Data replayed | Use case |
-|---|---|---|
-| `action` | `HDF5["actions"]` (8D IK pose targets) | IK-based devices (`so101_state_machine`) |
-| `state` | `HDF5["states"]["articulation"]["robot"]["joint_position"]` | Joint-position devices (`so101leader`) |
-
-For `so101_state_machine`, **only `action` mode is valid**. The IK action manager expects an 8D pose target; passing raw 6D joint positions (state mode) would cause a dimension mismatch.
-
----
-
-## Technical Details
-
-### 1. StateMachineBase Interface
-
-Each state machine must implement:
-
-| Method | Description |
-|---|---|
-| `setup(env)` | One-time calibration after env creation (e.g. FK rest-pose computation) |
-| `check_success(env) → bool` | Episode-end success evaluation |
-| `pre_step(env)` | Optional per-step hook before `get_action` (default: no-op) |
-| `get_action(env) → Tensor` | Compute IK action for current step |
-| `advance()` | Increment step counter; handles state transitions and fast-forward |
-| `reset()` | Reset to initial state for a new episode |
-| `is_episode_done` | Property: True when all phases complete |
-
-### 2. Gravity Disable (Two Steps Required)
-
-IsaacLab's `disable_gravity` flag in `ArticulationCfg.spawn.rigid_props` only writes to the articulation root prim. Individual link prims each carry their own `PhysicsRigidBodyAPI` with gravity still enabled.
-
-**Step 1** — Config level (in `use_teleop_device()`):
-```python
-self.scene.robot.spawn.rigid_props.disable_gravity = True
-```
-
-**Step 2** — USD stage traversal (in runner script, after `gym.make()`):
-```python
-_stage = omni.usd.get_context().get_stage()
-for _prim in _stage.Traverse():
-    if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
-        PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
-```
-
-Both steps must be present. The same pattern applies to bi-arm tasks (`"Robot"` matches both `Left_Robot` and `Right_Robot`).
-
-### 3. Joint Damping
-
-The IK controller requires higher damping than the robot's default (0.6 N·m·s/rad) for stable, smooth trajectories. Damping is set to **10.0 N·m·s/rad** inside `get_action()`:
-
-```python
-robot.write_joint_damping_to_sim(damping=10.0)
-```
-
-This must also be applied during **replay** to match recording conditions. The replay script calls `apply_damping(env, task_type)` before every `env.step()`.
-
-### 4. Episode Numbering
-
-The IsaacLab recorder saves an initial-state-only episode (`num_samples=0`) on the very first `env.reset()` call (before any steps). This becomes `demo_0`.
-
-| `--select_episodes N` | Episode loaded | Content |
-|---|---|---|
-| 0 | `demo_0` | Empty (no actions) — causes `TypeError` |
-| 1 | `demo_1` | 1st real demonstration |
-| K | `demo_K` | K-th real demonstration |
-
----
-
-## File Reference
-
-| File | Purpose |
-|---|---|
-| `scripts/datagen/state_machine/generate.py` | Unified recording runner (task selected via `--task`) |
-| `scripts/datagen/state_machine/replay.py` | State-machine replay (with damping) |
-| `source/leisaac/leisaac/datagen/state_machine/base.py` | `StateMachineBase` abstract class |
-| `source/leisaac/leisaac/datagen/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
diff --git a/STATE_MACHINE_README_CN.md b/STATE_MACHINE_README_CN.md
deleted file mode 100644
index 66f62501..00000000
--- a/STATE_MACHINE_README_CN.md
+++ /dev/null
@@ -1,199 +0,0 @@
-# 状态机模块：录制与回放说明
-
-## 概述
-
-状态机模块提供无需人工遥操作的自动化数据采集能力。它运行脚本化的策略，将演示数据录制为 HDF5 数据集，并支持回放已录制的演示。
-
-```
-scripts/datagen/state_machine/
-├── generate.py     # 统一 Runner 脚本：通过 --task 参数选择任务
-└── replay.py       # 状态机演示回放脚本
-
-source/leisaac/leisaac/datagen/state_machine/
-├── base.py         # StateMachineBase 抽象基类
-└── pick_orange.py  # PickOrangeStateMachine
-```
-
----
-
-## 录制
-
-### 快速开始
-
-```bash
-# 单臂拾橙（3 个橘子 → 盘子）
-python scripts/datagen/state_machine/generate.py \
-    --task LeIsaac-SO101-PickOrange-v0 \
-    --num_envs 1 \
-    --device cuda \
-    --enable_cameras \
-    --record \
-    --dataset_file ./datasets/pick_orange.hdf5 \
-    --num_demos 1
-```
-
-> **注意 — 夹取成功率：** 默认场景配置中橙子离机械臂末端较远，导致夹取成功率较低。在任务的环境 cfg 文件中调整橙子的生成位置（例如将其移近机械臂底座），可以显著提升夹取成功率。
-
-### 工作原理
-
-```
-generate.py
-  └── TASK_REGISTRY[task] → (SMClass, device)   # 从注册表选择状态机
-  └── gym.make(task, cfg=env_cfg)                # 创建环境
-       └── env_cfg.use_teleop_device(device)     # 配置 IK action manager
-  └── sm = SMClass()
-  └── sm.setup(env)                              # FK 校准等一次性初始化
-  └── 主循环:
-       sm.pre_step(env)                          # 例如 home-blend 关节写入
-       actions = sm.get_action(env)              # 状态机计算 8D IK 动作
-       env.step(actions)                         # 推进仿真 + 录制器采集数据
-       sm.advance()                              # 推进状态机（含快进逻辑）
-```
-
-### 添加新任务
-
-1. 在 `source/leisaac/leisaac/datagen/state_machine/` 中实现一个 `StateMachineBase` 子类。
-2. 在 `generate.py` 的 `TASK_REGISTRY` 中注册：
-   ```python
-   TASK_REGISTRY = {
-       "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
-       "LeIsaac-MY-NewTask-v0":       (MyNewStateMachine,      "so101_state_machine"),
-   }
-   ```
-
-### 动作格式
-
-| 设备 | 维度 | 格式 |
-|---|---|---|
-| `so101_state_machine` | 8D | `[pos(3), quat(4), gripper(1)]`，机械臂 base 局部坐标系 |
-| `bi_so101_state_machine` | 16D | `[左_pos(3), 左_quat(4), 左_grip(1), 右_pos(3), 右_quat(4), 右_grip(1)]` |
-
-IK 目标位置需表示在**机械臂 base 局部坐标系**下，而非世界坐标系：
-
-```python
-diff_w = target_pos_w - base_pos_w
-target_pos_local = quat_apply(quat_inv(base_quat_w), diff_w)
-```
-
----
-
-## 数据集结构
-
-演示数据以 HDF5 格式存储在 `data/` 组中：
-
-```
-data/
-├── demo_0      # 空 episode —— 初始 env.reset() 产生的副产物
-│   └── initial_state        # 仅有此字段；num_samples=0；无 actions
-├── demo_1      # 第一条真实演示
-│   ├── actions              # (T, 8) —— 传入 env.step() 的 IK pose 目标
-│   ├── processed_actions    # (T, 6) —— IK 求解器算出的关节目标位置
-│   ├── initial_state        # episode 开始时的场景状态（用于 reset_to）
-│   ├── states/              # 每步的关节/传感器状态
-│   └── obs/                 # 每步的观测（图像、关节位置等）
-├── demo_2      # 第二条真实演示
-...
-```
-
-**重要：** `demo_0` 永远是空的。**第 K 条录制的演示**存储为 `demo_K`。
-
-回放时用 `--select_episodes K` 加载 `demo_K`：
-
-```bash
---select_episodes 1   # → demo_1，第一条真实演示
---select_episodes 2   # → demo_2，第二条真实演示
-```
-
----
-
-## 回放
-
-### 快速开始
-
-```bash
-python scripts/datagen/state_machine/replay.py \
-    --task LeIsaac-SO101-PickOrange-v0 \
-    --dataset_file ./datasets/pick_orange.hdf5 \
-    --task_type so101_state_machine \
-    --select_episodes 1 \
-    --device cuda \
-    --enable_cameras \
-    --replay_mode action
-```
-
-### 回放模式
-
-| 模式 | 回放的数据 | 适用场景 |
-|---|---|---|
-| `action` | `HDF5["actions"]`（8D IK pose 目标） | IK 控制设备（`so101_state_machine`） |
-| `state` | `HDF5["states"]["articulation"]["robot"]["joint_position"]` | 关节位置控制设备（`so101leader`） |
-
-对 `so101_state_machine` 而言，**只有 `action` 模式有效**。IK action manager 期望 8D pose 输入，而 `state` 模式传入的是 6D 关节位置，会导致维度不匹配。
-
----
-
-## 技术细节
-
-### 1. StateMachineBase 接口
-
-每个状态机必须实现：
-
-| 方法 | 说明 |
-|---|---|
-| `setup(env)` | 环境创建后的一次性初始化（如 FK rest-pose 校准） |
-| `check_success(env) → bool` | episode 结束时的成功判断 |
-| `pre_step(env)` | 可选的每步前置钩子（默认空操作） |
-| `get_action(env) → Tensor` | 计算当前步的 IK 动作 |
-| `advance()` | 推进步计数器，处理状态转移和快进 |
-| `reset()` | 重置到初始状态，供新 episode 使用 |
-| `is_episode_done` | 属性：所有阶段完成时为 True |
-
-### 2. 重力禁用（两步缺一不可）
-
-IsaacLab 的 `disable_gravity` 标志只写入关节根 prim，各子 link prim 各自带有 `PhysicsRigidBodyAPI`，重力默认仍然开启。
-
-**第一步** —— 配置层（在 `use_teleop_device()` 中）：
-```python
-self.scene.robot.spawn.rigid_props.disable_gravity = True
-```
-
-**第二步** —— USD Stage 遍历（在 runner 脚本 `gym.make()` 之后）：
-```python
-_stage = omni.usd.get_context().get_stage()
-for _prim in _stage.Traverse():
-    if "Robot" in str(_prim.GetPath()) and _prim.HasAPI(UsdPhysics.RigidBodyAPI):
-        PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
-```
-
-两步必须同时存在。双臂任务同理（`"Robot"` 可同时匹配 `Left_Robot` 和 `Right_Robot`）。
-
-### 3. 关节阻尼
-
-IK 控制器需要比机械臂默认阻尼（0.6 N·m·s/rad）更高的阻尼值，才能获得稳定、平滑的轨迹。在 `get_action()` 中每步调用：
-
-```python
-robot.write_joint_damping_to_sim(damping=10.0)
-```
-
-**回放时也必须应用相同阻尼**，以匹配录制条件。回放脚本在每次 `env.step()` 前调用 `apply_damping(env, task_type)`。
-
-### 4. Episode 编号规则
-
-IsaacLab 录制器在第一次 `env.reset()` 调用时（此时还未执行任何步骤）会保存一个仅含初始状态的 episode（`num_samples=0`），即 `demo_0`。
-
-| `--select_episodes N` | 加载的 episode | 内容 |
-|---|---|---|
-| 0 | `demo_0` | 空（无 actions）—— 会导致 `TypeError` |
-| 1 | `demo_1` | 第 1 条真实演示 |
-| K | `demo_K` | 第 K 条真实演示 |
-
----
-
-## 文件说明
-
-| 文件 | 用途 |
-|---|---|
-| `scripts/datagen/state_machine/generate.py` | 统一录制 Runner（通过 `--task` 选择任务） |
-| `scripts/datagen/state_machine/replay.py` | 状态机专用回放脚本（含阻尼设置） |
-| `source/leisaac/leisaac/datagen/state_machine/base.py` | `StateMachineBase` 抽象基类 |
-| `source/leisaac/leisaac/datagen/state_machine/pick_orange.py` | `PickOrangeStateMachine` |
diff --git a/docs/docs/docs/getting_started/state_machine.md b/docs/docs/docs/getting_started/state_machine.md
new file mode 100644
index 00000000..80e496e5
--- /dev/null
+++ b/docs/docs/docs/getting_started/state_machine.md
@@ -0,0 +1,111 @@
+# State Machine Data Generation
+
+The state machine module provides automated data collection for manipulation tasks without human teleoperation. It runs a scripted policy and records demonstrations to HDF5 datasets.
+
+## Recording
+
+```shell
+python scripts/datagen/state_machine/generate.py \
+    --task LeIsaac-SO101-PickOrange-v0 \
+    --num_envs 1 \
+    --device cuda \
+    --enable_cameras \
+    --record \
+    --dataset_file ./datasets/pick_orange.hdf5 \
+    --num_demos 50
+```
+
+<details>
+<summary><strong>Parameter descriptions for generate.py</strong></summary>
+
+- `--task`: Task environment name to run, e.g., `LeIsaac-SO101-PickOrange-v0`. See [here](/resources/available_env) for available tasks.
+
+- `--num_envs`: Number of parallel simulation environments, usually `1`.
+
+- `--device`: Computation device, such as `cpu` or `cuda` (GPU).
+
+- `--enable_cameras`: Enable camera sensors to collect visual data.
+
+- `--seed`: Seed for the environment. Defaults to current timestamp if not set.
+
+- `--record`: Enable data recording; saves demonstrations to an HDF5 file.
+
+- `--dataset_file`: Path to save the recorded dataset, e.g., `./datasets/pick_orange.hdf5`.
+
+- `--resume`: Resume recording from an existing dataset file.
+
+- `--num_demos`: Number of successful demonstrations to record. Set to `0` for infinite.
+
+- `--step_hz`: Environment stepping rate in Hz (default: `60`).
+
+- `--quality`: Enable quality render mode.
+
+- `--use_lerobot_recorder`: Record directly in LeRobot Dataset format instead of HDF5.
+
+- `--lerobot_dataset_repo_id`: HuggingFace dataset repository ID (format: `username/repository_name`). Required when `--use_lerobot_recorder` is set.
+
+- `--lerobot_dataset_fps`: Dataset frame rate when using LeRobot recorder (default: `30`).
+
+</details>
+
+::::tip
+Grasp success rate depends heavily on orange spawn positions. Adjusting the spawn positions in the task's environment config file (e.g. moving oranges closer to the robot base) can significantly improve success rate.
+::::
+
+## Replay
+
+After recording, you can replay the collected demonstrations in simulation:
+
+```shell
+python scripts/datagen/state_machine/replay.py \
+    --task LeIsaac-SO101-PickOrange-v0 \
+    --dataset_file ./datasets/pick_orange.hdf5 \
+    --task_type so101_state_machine \
+    --select_episodes 1 \
+    --device cuda \
+    --enable_cameras \
+    --replay_mode action
+```
+
+<details>
+<summary><strong>Parameter descriptions for replay.py</strong></summary>
+
+- `--task`: Task environment name to run, e.g., `LeIsaac-SO101-PickOrange-v0`.
+
+- `--num_envs`: Number of parallel simulation environments, usually `1`.
+
+- `--device`: Computation device, such as `cpu` or `cuda` (GPU).
+
+- `--enable_cameras`: Enable camera sensors to visualize during replay.
+
+- `--dataset_file`: Path to the recorded dataset, e.g., `./datasets/pick_orange.hdf5`.
+
+- `--replay_mode`: Replay mode — `action` replays IK pose targets, `state` replays joint positions.
+
+- `--task_type`: State machine device type used during recording, e.g., `so101_state_machine` or `bi_so101_state_machine`. Inferred from task name if not set.
+
+- `--select_episodes`: List of episode indices to replay. Leave empty to replay all episodes.
+
+- `--step_hz`: Environment stepping rate in Hz (default: `60`).
+
+</details>
+
+::::tip
+NOTE:
+
+`demo_0` is always empty (an artifact of the initial `env.reset()`). The first real demonstration is `demo_1`. Use `--select_episodes 1` to replay the first recorded demonstration.
+
+For `so101_state_machine`, only `--replay_mode action` is valid. The IK action manager expects an 8D pose target; passing raw joint positions would cause a dimension mismatch.
+::::
+
+## Adding a New Task
+
+1. Implement a `StateMachineBase` subclass in `source/leisaac/leisaac/datagen/state_machine/`.
+2. Register it in `TASK_REGISTRY` inside `scripts/datagen/state_machine/generate.py`:
+
+```python
+TASK_REGISTRY = {
+    "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
+    "LeIsaac-MY-NewTask-v0":       (MyNewStateMachine,      "so101_state_machine"),
+}
+```
diff --git a/docs/docs/docs/introduction.md b/docs/docs/docs/introduction.md
index 4be9365f..19149e2f 100644
--- a/docs/docs/docs/introduction.md
+++ b/docs/docs/docs/introduction.md
@@ -14,6 +14,7 @@ slug: /
 LeIsaac provides teleoperation functionality in [IsaacLab](https://isaac-sim.github.io/IsaacLab/main/index.html) using the SO101Leader ([LeRobot](https://github.com/huggingface/lerobot)), including data collection, data conversion, and subsequent policy training.
 
 - 🤖 We use the SO101 Follower robot (and other related robot) in IsaacLab and provide practical teleoperation methods.
+- 🦾 State machine scripted policies enable fully automated data collection without human teleoperation.
 - 🔄 Ready-to-use scripts convert HDF5 data into the LeRobot dataset format.
 - 🧠 Simulation data is used to fine-tune [GR00T N1.5](https://github.com/NVIDIA/Isaac-GR00T) and deploy the policy on real hardware. And more policies will be supported.
 
diff --git a/docs/sidebars.js b/docs/sidebars.js
index f3423d85..27521974 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -45,6 +45,12 @@ const sidebars = {
           link: { type: 'doc', id: 'docs/getting_started/dataset_replay' },
           items: [],
         },
+        {
+          type: 'category',
+          label: 'State Machine Data Generation',
+          link: { type: 'doc', id: 'docs/getting_started/state_machine' },
+          items: [],
+        },
         {
           type: 'category',
           label: 'Policy Training & Inference',
diff --git a/scripts/datagen/state_machine/generate.py b/scripts/datagen/state_machine/generate.py
index 3c84e986..aa450f22 100644
--- a/scripts/datagen/state_machine/generate.py
+++ b/scripts/datagen/state_machine/generate.py
@@ -16,6 +16,7 @@
 
 import argparse
 import os
+import signal
 import time
 
 from isaaclab.app import AppLauncher
@@ -213,72 +214,95 @@ def main():
 
     start_record_state = False
 
-    while simulation_app.is_running() and not simulation_app.is_exiting():
-        with torch.inference_mode():
-            if env.cfg.dynamic_reset_gripper_effort_limit:
-                dynamic_reset_gripper_effort_limit_sim(env, device)
-
-            if sm.is_episode_done:
-                try:
-                    success = sm.check_success(env)
-                except Exception as e:
-                    print("Success check failed:", e)
-                    success = False
-
-                if start_record_state:
-                    if args_cli.record:
-                        print("Stop Recording!!!")
-                    start_record_state = False
-
-                if args_cli.record and success:
-                    auto_terminate(env, True)
-                    current_recorded_demo_count += 1
+    interrupted = False
+
+    def signal_handler(signum, frame):
+        """Handle SIGINT (Ctrl+C) signal."""
+        nonlocal interrupted
+        interrupted = True
+        print("\n[INFO] KeyboardInterrupt (Ctrl+C) detected. Cleaning up resources...")
+
+    original_sigint_handler = signal.signal(signal.SIGINT, signal_handler)
+
+    try:
+        while simulation_app.is_running() and not simulation_app.is_exiting() and not interrupted:
+            with torch.inference_mode():
+                if env.cfg.dynamic_reset_gripper_effort_limit:
+                    dynamic_reset_gripper_effort_limit_sim(env, device)
+
+                if sm.is_episode_done:
+                    try:
+                        success = sm.check_success(env)
+                    except Exception as e:
+                        print("Success check failed:", e)
+                        success = False
+
+                    print("Episode success!" if success else "Episode failed!")
+
+                    if start_record_state:
+                        if args_cli.record:
+                            print("Stop Recording!!!")
+                        start_record_state = False
+
+                    if args_cli.record and success:
+                        auto_terminate(env, True)
+                        current_recorded_demo_count += 1
+                    else:
+                        auto_terminate(env, False)
+
+                    if (
+                        args_cli.record
+                        and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                        > current_recorded_demo_count
+                    ):
+                        current_recorded_demo_count = (
+                            env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                        )
+                        print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
+
+                    if (
+                        args_cli.record
+                        and args_cli.num_demos > 0
+                        and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+                        >= args_cli.num_demos
+                    ):
+                        print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                        break
+
+                    env.reset()
+                    sm.reset()
+
+                    if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
+                        print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                        break
                 else:
-                    auto_terminate(env, False)
-
-                if (
-                    args_cli.record
-                    and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                    > current_recorded_demo_count
-                ):
-                    current_recorded_demo_count = (
-                        env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                    )
-                    print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
-
-                if (
-                    args_cli.record
-                    and args_cli.num_demos > 0
-                    and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                    >= args_cli.num_demos
-                ):
-                    print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
-                    break
-
-                env.reset()
-                sm.reset()
-
-                if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
-                    print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
-                    break
-            else:
-                if not start_record_state:
-                    if args_cli.record:
-                        print("Start Recording!!!")
-                    start_record_state = True
-
-                sm.pre_step(env)
-                actions = sm.get_action(env)
-                env.step(actions)
-                sm.advance()
-
-            if rate_limiter:
-                rate_limiter.sleep(env)
-
-    if args_cli.record and hasattr(env.recorder_manager, "finalize"):
-        env.recorder_manager.finalize()
-    env.close()
-    simulation_app.close()
+                    if not start_record_state:
+                        if args_cli.record:
+                            print("Start Recording!!!")
+                        start_record_state = True
+
+                    sm.pre_step(env)
+                    actions = sm.get_action(env)
+                    env.step(actions)
+                    sm.advance()
+
+                if rate_limiter:
+                    rate_limiter.sleep(env)
+
+            if interrupted:
+                break
+    except Exception as e:
+        import traceback
+
+        print(f"\n[ERROR] An error occurred: {e}\n")
+        traceback.print_exc()
+        print("[INFO] Cleaning up resources...")
+    finally:
+        signal.signal(signal.SIGINT, original_sigint_handler)
+        if args_cli.record and hasattr(env.recorder_manager, "finalize"):
+            env.recorder_manager.finalize()
+        env.close()
+        simulation_app.close()
 
 
 if __name__ == "__main__":
diff --git a/source/leisaac/leisaac/datagen/state_machine/pick_orange.py b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
index fc27f848..3fed4845 100644
--- a/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
@@ -93,7 +93,6 @@ def setup(self, env) -> None:
         env.sim.step(render=False)
         env.scene.update(dt=env.physics_dt)
         self._rest_ee_pos_world = robot.data.body_pos_w[:, -1, :].clone()
-        print(f"[Calibration] Rest pose EE (world): {self._rest_ee_pos_world[0].cpu().numpy()}")
 
         # Return scene to default initial state before the recording loop.
         env.reset()

From 5155f6a0489615dc729f5df8b9b7b3a7d2cfc27a Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Fri, 6 Mar 2026 01:52:49 -0500
Subject: [PATCH 52/68] Fix bugs.

---
 .../{getting_started => features}/state_machine.md   | 10 +---------
 docs/sidebars.js                                     | 12 ++++++------
 scripts/datagen/state_machine/generate.py            |  9 +++++----
 .../leisaac/datagen/state_machine/pick_orange.py     |  3 ---
 4 files changed, 12 insertions(+), 22 deletions(-)
 rename docs/docs/docs/{getting_started => features}/state_machine.md (90%)

diff --git a/docs/docs/docs/getting_started/state_machine.md b/docs/docs/docs/features/state_machine.md
similarity index 90%
rename from docs/docs/docs/getting_started/state_machine.md
rename to docs/docs/docs/features/state_machine.md
index 80e496e5..081b9def 100644
--- a/docs/docs/docs/getting_started/state_machine.md
+++ b/docs/docs/docs/features/state_machine.md
@@ -61,7 +61,7 @@ python scripts/datagen/state_machine/replay.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --dataset_file ./datasets/pick_orange.hdf5 \
     --task_type so101_state_machine \
-    --select_episodes 1 \
+    --select_episodes 0 \
     --device cuda \
     --enable_cameras \
     --replay_mode action
@@ -90,14 +90,6 @@ python scripts/datagen/state_machine/replay.py \
 
 </details>
 
-::::tip
-NOTE:
-
-`demo_0` is always empty (an artifact of the initial `env.reset()`). The first real demonstration is `demo_1`. Use `--select_episodes 1` to replay the first recorded demonstration.
-
-For `so101_state_machine`, only `--replay_mode action` is valid. The IK action manager expects an 8D pose target; passing raw joint positions would cause a dimension mismatch.
-::::
-
 ## Adding a New Task
 
 1. Implement a `StateMachineBase` subclass in `source/leisaac/leisaac/datagen/state_machine/`.
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 27521974..7f6402b3 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -45,12 +45,6 @@ const sidebars = {
           link: { type: 'doc', id: 'docs/getting_started/dataset_replay' },
           items: [],
         },
-        {
-          type: 'category',
-          label: 'State Machine Data Generation',
-          link: { type: 'doc', id: 'docs/getting_started/state_machine' },
-          items: [],
-        },
         {
           type: 'category',
           label: 'Policy Training & Inference',
@@ -123,6 +117,12 @@ const sidebars = {
           link: { type: 'doc', id: 'docs/features/lerobot_recorder' },
           items: [],
         },
+        {
+          type: 'category',
+          label: 'State Machine Data Generation',
+          link: { type: 'doc', id: 'docs/features/state_machine' },
+          items: [],
+        },
       ],
     },
     'docs/trouble_shooting',
diff --git a/scripts/datagen/state_machine/generate.py b/scripts/datagen/state_machine/generate.py
index aa450f22..3aa7eff1 100644
--- a/scripts/datagen/state_machine/generate.py
+++ b/scripts/datagen/state_machine/generate.py
@@ -3,9 +3,9 @@
 Selects the appropriate state machine based on --task and runs the recording loop.
 
 Usage:
-    python scripts/datagen/state_machine/generate.py \\
-        --task LeIsaac-SO101-PickOrange-v0 \\
-        --num_envs 1 --device cuda --enable_cameras \\
+    python scripts/datagen/state_machine/generate.py \
+        --task LeIsaac-SO101-PickOrange-v0 \
+        --num_envs 1 --device cuda --enable_cameras \
         --record --dataset_file ./datasets/pick_orange.hdf5 --num_demos 50
 """
 
@@ -199,11 +199,11 @@ def main():
 
     if hasattr(env, "initialize"):
         env.initialize()
-    env.reset()
 
     # one-time state machine setup (e.g. FK calibration)
     sm = SMClass()
     sm.setup(env)
+    env.reset()
     sm.reset()
 
     resume_recorded_demo_count = 0
@@ -271,6 +271,7 @@ def signal_handler(signum, frame):
 
                     env.reset()
                     sm.reset()
+                    auto_terminate(env, False)
 
                     if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
                         print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
diff --git a/source/leisaac/leisaac/datagen/state_machine/pick_orange.py b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
index 3fed4845..ded8ae59 100644
--- a/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
@@ -94,9 +94,6 @@ def setup(self, env) -> None:
         env.scene.update(dt=env.physics_dt)
         self._rest_ee_pos_world = robot.data.body_pos_w[:, -1, :].clone()
 
-        # Return scene to default initial state before the recording loop.
-        env.reset()
-
     def check_success(self, env) -> bool:
         """Return True if all oranges are on the plate and the arm is at rest."""
         robot = env.scene["robot"]

From 1d3a9129ca68967137c72351040e83c567d7a84e Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Fri, 6 Mar 2026 17:44:19 -0500
Subject: [PATCH 53/68] Change format.

---
 scripts/datagen/state_machine/generate.py     | 188 ++++++++++--------
 .../datagen/state_machine/pick_orange.py      |   8 +-
 2 files changed, 103 insertions(+), 93 deletions(-)

diff --git a/scripts/datagen/state_machine/generate.py b/scripts/datagen/state_machine/generate.py
index 3aa7eff1..6d73407e 100644
--- a/scripts/datagen/state_machine/generate.py
+++ b/scripts/datagen/state_machine/generate.py
@@ -47,17 +47,15 @@
 simulation_app = app_launcher.app
 
 import gymnasium as gym
+import leisaac.tasks  # noqa: F401
 import torch
 from isaaclab.envs import DirectRLEnv, ManagerBasedRLEnv
 from isaaclab.managers import DatasetExportMode, TerminationTermCfg
 from isaaclab_tasks.utils import parse_env_cfg
-
-from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
 from leisaac.datagen.state_machine import PickOrangeStateMachine
+from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
 from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
 
-import leisaac.tasks  # noqa: F401
-
 # Maps gym task id → (StateMachineClass, device_type)
 TASK_REGISTRY = {
     "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
@@ -105,26 +103,8 @@ def auto_terminate(env: ManagerBasedRLEnv | DirectRLEnv, success: bool):
         env.cfg.return_success_status = success
 
 
-def main():
-    """Run a state machine in a LeIsaac manipulation environment."""
-    task_name = args_cli.task
-    if task_name not in TASK_REGISTRY:
-        raise ValueError(
-            f"Task '{task_name}' is not registered in TASK_REGISTRY.\n"
-            f"Available tasks: {list(TASK_REGISTRY.keys())}"
-        )
-    SMClass, device = TASK_REGISTRY[task_name]
-
-    output_dir = os.path.dirname(args_cli.dataset_file)
-    output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
-    if output_dir and not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    env_cfg = parse_env_cfg(task_name, device=args_cli.device, num_envs=args_cli.num_envs)
-    env_cfg.use_teleop_device(device)
-    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
-
-    is_direct_env = "Direct" in task_name
+def _configure_env_cfg(env_cfg, args_cli, is_direct_env, output_dir, output_file_name):
+    """Configure termination and recorder settings on env_cfg."""
     if is_direct_env:
         env_cfg.never_time_out = True
         env_cfg.auto_terminate = True
@@ -164,6 +144,98 @@ def main():
     else:
         env_cfg.recorders = None
 
+
+def _replace_recorder_manager(env, env_cfg, args_cli):
+    """Replace the default recorder manager with streaming or lerobot recorder."""
+    del env.recorder_manager
+    if args_cli.use_lerobot_recorder:
+        from leisaac.enhance.datasets.lerobot_dataset_handler import LeRobotDatasetCfg
+        from leisaac.enhance.managers.lerobot_recorder_manager import (
+            LeRobotRecorderManager,
+        )
+
+        dataset_cfg = LeRobotDatasetCfg(
+            repo_id=args_cli.lerobot_dataset_repo_id,
+            fps=args_cli.lerobot_dataset_fps,
+        )
+        env.recorder_manager = LeRobotRecorderManager(env_cfg.recorders, dataset_cfg, env)
+    else:
+        env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
+        env.recorder_manager.flush_steps = 100
+        env.recorder_manager.compression = "lzf"
+
+
+def _on_episode_done(env, sm, args_cli, resume_recorded_demo_count, current_recorded_demo_count, start_record_state):
+    """Handle end-of-episode logic. Returns (current_recorded_demo_count, start_record_state, should_break)."""
+    try:
+        success = sm.check_success(env)
+    except Exception as e:
+        print("Success check failed:", e)
+        success = False
+
+    print("Episode success!" if success else "Episode failed!")
+
+    if start_record_state:
+        if args_cli.record:
+            print("Stop Recording!!!")
+        start_record_state = False
+
+    if args_cli.record and success:
+        auto_terminate(env, True)
+        current_recorded_demo_count += 1
+    else:
+        auto_terminate(env, False)
+
+    if (
+        args_cli.record
+        and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+        > current_recorded_demo_count
+    ):
+        current_recorded_demo_count = (
+            env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
+        )
+        print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
+
+    if (
+        args_cli.record
+        and args_cli.num_demos > 0
+        and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count >= args_cli.num_demos
+    ):
+        print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+        return current_recorded_demo_count, start_record_state, True
+
+    env.reset()
+    sm.reset()
+    auto_terminate(env, False)
+
+    if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
+        print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+        return current_recorded_demo_count, start_record_state, True
+
+    return current_recorded_demo_count, start_record_state, False
+
+
+def main():
+    """Run a state machine in a LeIsaac manipulation environment."""
+    task_name = args_cli.task
+    if task_name not in TASK_REGISTRY:
+        raise ValueError(
+            f"Task '{task_name}' is not registered in TASK_REGISTRY.\nAvailable tasks: {list(TASK_REGISTRY.keys())}"
+        )
+    SMClass, device = TASK_REGISTRY[task_name]
+
+    output_dir = os.path.dirname(args_cli.dataset_file)
+    output_file_name = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    env_cfg = parse_env_cfg(task_name, device=args_cli.device, num_envs=args_cli.num_envs)
+    env_cfg.use_teleop_device(device)
+    env_cfg.seed = args_cli.seed if args_cli.seed is not None else int(time.time())
+
+    is_direct_env = "Direct" in task_name
+    _configure_env_cfg(env_cfg, args_cli, is_direct_env, output_dir, output_file_name)
+
     env: ManagerBasedRLEnv | DirectRLEnv = gym.make(task_name, cfg=env_cfg).unwrapped
 
     # disable gravity for every robot link prim
@@ -176,24 +248,7 @@ def main():
             PhysxSchema.PhysxRigidBodyAPI.Apply(_prim).CreateDisableGravityAttr(True)
 
     if args_cli.record:
-        del env.recorder_manager
-        if args_cli.use_lerobot_recorder:
-            from leisaac.enhance.datasets.lerobot_dataset_handler import (
-                LeRobotDatasetCfg,
-            )
-            from leisaac.enhance.managers.lerobot_recorder_manager import (
-                LeRobotRecorderManager,
-            )
-
-            dataset_cfg = LeRobotDatasetCfg(
-                repo_id=args_cli.lerobot_dataset_repo_id,
-                fps=args_cli.lerobot_dataset_fps,
-            )
-            env.recorder_manager = LeRobotRecorderManager(env_cfg.recorders, dataset_cfg, env)
-        else:
-            env.recorder_manager = StreamingRecorderManager(env_cfg.recorders, env)
-            env.recorder_manager.flush_steps = 100
-            env.recorder_manager.compression = "lzf"
+        _replace_recorder_manager(env, env_cfg, args_cli)
 
     rate_limiter = RateLimiter(args_cli.step_hz)
 
@@ -213,7 +268,6 @@ def main():
     current_recorded_demo_count = resume_recorded_demo_count
 
     start_record_state = False
-
     interrupted = False
 
     def signal_handler(signum, frame):
@@ -231,50 +285,10 @@ def signal_handler(signum, frame):
                     dynamic_reset_gripper_effort_limit_sim(env, device)
 
                 if sm.is_episode_done:
-                    try:
-                        success = sm.check_success(env)
-                    except Exception as e:
-                        print("Success check failed:", e)
-                        success = False
-
-                    print("Episode success!" if success else "Episode failed!")
-
-                    if start_record_state:
-                        if args_cli.record:
-                            print("Stop Recording!!!")
-                        start_record_state = False
-
-                    if args_cli.record and success:
-                        auto_terminate(env, True)
-                        current_recorded_demo_count += 1
-                    else:
-                        auto_terminate(env, False)
-
-                    if (
-                        args_cli.record
-                        and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                        > current_recorded_demo_count
-                    ):
-                        current_recorded_demo_count = (
-                            env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                        )
-                        print(f"Recorded {current_recorded_demo_count} successful demonstrations.")
-
-                    if (
-                        args_cli.record
-                        and args_cli.num_demos > 0
-                        and env.recorder_manager.exported_successful_episode_count + resume_recorded_demo_count
-                        >= args_cli.num_demos
-                    ):
-                        print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
-                        break
-
-                    env.reset()
-                    sm.reset()
-                    auto_terminate(env, False)
-
-                    if args_cli.record and args_cli.num_demos > 0 and current_recorded_demo_count >= args_cli.num_demos:
-                        print(f"All {args_cli.num_demos} demonstrations recorded. Exiting the app.")
+                    current_recorded_demo_count, start_record_state, should_break = _on_episode_done(
+                        env, sm, args_cli, resume_recorded_demo_count, current_recorded_demo_count, start_record_state
+                    )
+                    if should_break:
                         break
                 else:
                     if not start_record_state:
diff --git a/source/leisaac/leisaac/datagen/state_machine/pick_orange.py b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
index ded8ae59..bf63d4c9 100644
--- a/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
+++ b/source/leisaac/leisaac/datagen/state_machine/pick_orange.py
@@ -5,9 +5,9 @@
 import torch
 from isaaclab.managers import SceneEntityCfg
 from isaaclab.utils.math import quat_apply, quat_from_euler_xyz, quat_inv, quat_mul
+from leisaac.tasks.pick_orange.mdp import task_done
 
 from .base import StateMachineBase
-from leisaac.tasks.pick_orange.mdp import task_done
 
 # ---------------------------------------------------------------------------
 # Module-level helpers
@@ -122,11 +122,7 @@ def pre_step(self, env) -> None:
         Writes blended joint positions directly to the sim so the arm smoothly
         returns home while IK still receives the rest-pose EE target.
         """
-        if (
-            self._orange_now == self._num_oranges
-            and self._step_count >= 680
-            and self._rest_joint_pos is not None
-        ):
+        if self._orange_now == self._num_oranges and self._step_count >= 680 and self._rest_joint_pos is not None:
             robot = env.scene["robot"]
             if self._step_count == 680:
                 self._home_start_pos = robot.data.joint_pos.clone()

From 4faee0c8b77890566f8327e76d0468b4d9d20f37 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Fri, 6 Mar 2026 17:54:35 -0500
Subject: [PATCH 54/68] Change format.

---
 source/leisaac/leisaac/devices/action_process.py     | 12 +++++++++---
 .../leisaac/tasks/template/single_arm_env_cfg.py     |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 0c038d8a..6abcc5dd 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -81,7 +81,9 @@ def init_action_cfg(action_cfg, device):
             asset_name="robot",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
             body_name="gripper",
-            controller=mdp.DifferentialIKControllerCfg(command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}),
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}
+            ),
         )
         action_cfg.gripper_action = mdp.BinaryJointPositionActionCfg(
             asset_name="robot",
@@ -94,7 +96,9 @@ def init_action_cfg(action_cfg, device):
             asset_name="left_arm",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
             body_name="gripper",
-            controller=mdp.DifferentialIKControllerCfg(command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}),
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}
+            ),
         )
         action_cfg.left_gripper_action = mdp.BinaryJointPositionActionCfg(
             asset_name="left_arm",
@@ -106,7 +110,9 @@ def init_action_cfg(action_cfg, device):
             asset_name="right_arm",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
             body_name="gripper",
-            controller=mdp.DifferentialIKControllerCfg(command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}),
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose", ik_method="dls", ik_params={"lambda_val": 0.04}
+            ),
         )
         action_cfg.right_gripper_action = mdp.BinaryJointPositionActionCfg(
             asset_name="right_arm",
diff --git a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
index 2cabeeb0..b11664a3 100644
--- a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
@@ -196,6 +196,7 @@ def use_teleop_device(self, teleop_device) -> None:
         self.actions = init_action_cfg(self.actions, device=teleop_device)
         if teleop_device in ["keyboard", "gamepad", "so101_state_machine"]:
             self.scene.robot.spawn.rigid_props.disable_gravity = True
+
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
         return preprocess_device_action(action, teleop_device)
 

From b3796ab8dd7b1c096674ca9888d19400240a5af7 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sun, 8 Mar 2026 19:23:36 -0400
Subject: [PATCH 55/68] Add RL module

---
 docs/docs/docs/features/rl_training.md        | 130 ++++++++++++
 docs/sidebars.js                              |   6 +
 scripts/datagen/state_machine/generate.py     |   2 +-
 scripts/datagen/state_machine/replay.py       |  10 +-
 scripts/rl/play.py                            | 118 +++++++++++
 scripts/rl/train.py                           |  82 ++++++++
 .../leisaac/leisaac/devices/action_process.py |  54 ++++-
 .../leisaac/tasks/lift_cube/__init__.py       |  12 +-
 .../tasks/lift_cube/lift_cube_rl_env_cfg.py   | 195 ++++++++++++++++++
 .../leisaac/tasks/lift_cube/mdp/__init__.py   |   1 +
 .../tasks/lift_cube/mdp/observations.py       |  12 ++
 .../leisaac/tasks/lift_cube/mdp/rewards.py    | 127 ++++++++++++
 .../tasks/pick_orange/agents/__init__.py      |   0
 .../leisaac/tasks/template/bi_arm_env_cfg.py  |   2 +-
 .../tasks/template/direct/bi_arm_env.py       |   2 +-
 .../tasks/template/direct/single_arm_env.py   |   2 +-
 .../tasks/template/single_arm_env_cfg.py      |   2 +-
 17 files changed, 744 insertions(+), 13 deletions(-)
 create mode 100644 docs/docs/docs/features/rl_training.md
 create mode 100644 scripts/rl/play.py
 create mode 100644 scripts/rl/train.py
 create mode 100644 source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
 create mode 100644 source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
 create mode 100644 source/leisaac/leisaac/tasks/pick_orange/agents/__init__.py

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
new file mode 100644
index 00000000..ce24eaf8
--- /dev/null
+++ b/docs/docs/docs/features/rl_training.md
@@ -0,0 +1,130 @@
+# RL Training
+
+The RL training module enables training manipulation policies with reinforcement learning using [rsl_rl](https://github.com/leggedrobotics/rsl_rl) (PPO). It runs fully in simulation with parallel environments and no human teleoperation required.
+
+## Training
+
+```shell
+python scripts/rl/train.py \
+    --task LeIsaac-SO101-LiftCube-RL-v0 \
+    --num_envs 64 \
+    --max_iterations 1500 \
+    --headless
+```
+
+<details>
+<summary><strong>Parameter descriptions for train.py</strong></summary>
+
+- `--task`: Gym task ID to train. Required.
+
+- `--num_envs`: Number of parallel simulation environments. More environments = faster data collection. Default: `64`.
+
+- `--max_iterations`: Number of PPO update iterations. Default: `1500`.
+
+- `--log_dir`: Base directory for logs. Runs are saved to `<log_dir>/<task_slug>/<timestamp>/`. Default: `logs/rl`.
+
+- `--seed`: Random seed for reproducibility. Default: `42`.
+
+- `--headless`: Run without rendering window for faster training.
+
+- `--device`: Computation device, such as `cpu` or `cuda`.
+
+</details>
+
+::::tip
+Training logs (tensorboard) are written to `logs/rl/<task_slug>/<timestamp>/`. Monitor progress with:
+
+```shell
+tensorboard --logdir logs/rl
+```
+
+Key metrics to watch: `Train/mean_reward` (total episode reward) and individual reward terms such as `Episode/rew_cube_height`.
+::::
+
+## Evaluation
+
+```shell
+python scripts/rl/play.py \
+    --task LeIsaac-SO101-LiftCube-RL-v0 \
+    --checkpoint logs/rl/<run>/model_<iter>.pt \
+    --num_envs 4 \
+    --num_episodes 20
+```
+
+## Reward Design
+
+The LiftCube RL task uses four reward terms:
+
+| Term | Weight | Description |
+|------|--------|-------------|
+| `cube_success` | 100.0 | One-time bonus when cube height ≥ 20 cm above robot base. Episode ends immediately after (early termination). |
+| `ee_to_cube` | 2.5 | `1 - tanh(5 × dist)` — guides gripper body toward a point 10 cm above cube center. Always active. Range [0, 1]. |
+| `cube_grasped` | 7.0 | Soft grasp score in [0, 1]. Active when cube is properly grasped (see below). |
+| `cube_height` | 20.0 | `exp(-10 × |h - 0.20|)` peaking at 20 cm. Only active when grasped AND cube height > 5 cm (filters resting-on-table baseline and tipped-corner cases). |
+
+**Grasp detection** (`_is_grasped`) uses three conditions multiplied together (all soft via sigmoid):
+
+1. `jaw_contact` force > 0.5 N on cube (ContactSensor filtered to cube-only)
+2. `gripper_contact` force > 0.5 N on cube (ContactSensor filtered to cube-only)
+3. Gripper joint position < 0.5 rad (gripper is actually closed)
+
+**Termination**: episode ends on timeout (15 s) or when cube height ≥ 20 cm (success).
+
+## Action Space
+
+RL training uses the `rl_so101leader` device mode — delta end-effector control with a binary gripper:
+
+| Component | Dims | Description |
+|-----------|------|-------------|
+| `arm_action` | 6 | Delta EE pose (dx, dy, dz, droll, dpitch, dyaw), scale=(0.02, 0.02, 0.02, 0.1, 0.1, 0.1) → ±2 cm / ±0.1 rad per step |
+| `gripper_action` | 1 | Binary: action > 0 → open (1.0 rad), action < 0 → close (0.2 rad) |
+| **Total** | **7** | |
+
+## Observation Space
+
+22D flat vector (concatenated):
+
+| Term | Dims |
+|------|------|
+| `joint_pos` | 6 |
+| `joint_vel` | 6 |
+| `ee_frame_state` (pos + quat, robot frame) | 7 |
+| `cube_pos_relative_to_ee` | 3 |
+| **Total** | **22** |
+
+## Adding a New RL Task
+
+1. Create `<task>/mdp/rewards.py` with reward functions.
+2. Create `<task>/<task>_rl_env_cfg.py` with `TRAIN_CFG` dict and env config class:
+
+```python
+TRAIN_CFG = { ... }  # PPO hyperparameters
+
+@configclass
+class MyTaskRLEnvCfg(MyTaskEnvCfg):
+    observations: MyTaskRLObsCfg = MyTaskRLObsCfg()
+    rewards: MyTaskRLRewardsCfg = MyTaskRLRewardsCfg()
+    terminations: MyTaskRLTerminationsCfg = MyTaskRLTerminationsCfg()
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.use_teleop_device("rl_so101leader")  # or "bi_rl_so101leader" for bi-arm
+        self.scene.front = None  # disable camera for faster training
+        self.episode_length_s = 15.0
+```
+
+3. Register the gym environment in `<task>/__init__.py` with both `env_cfg_entry_point` and `rsl_rl_cfg_entry_point`:
+
+```python
+gym.register(
+    id="LeIsaac-SO101-MyTask-RL-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.<task>_rl_env_cfg:MyTaskRLEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{__name__}.<task>_rl_env_cfg:TRAIN_CFG",
+    },
+)
+```
+
+4. Train with the generic script: `python scripts/rl/train.py --task LeIsaac-SO101-MyTask-RL-v0`.
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 7f6402b3..2b037604 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -123,6 +123,12 @@ const sidebars = {
           link: { type: 'doc', id: 'docs/features/state_machine' },
           items: [],
         },
+        {
+          type: 'category',
+          label: 'RL Training',
+          link: { type: 'doc', id: 'docs/features/rl_training' },
+          items: [],
+        },
       ],
     },
     'docs/trouble_shooting',
diff --git a/scripts/datagen/state_machine/generate.py b/scripts/datagen/state_machine/generate.py
index 6d73407e..cb402451 100644
--- a/scripts/datagen/state_machine/generate.py
+++ b/scripts/datagen/state_machine/generate.py
@@ -58,7 +58,7 @@
 
 # Maps gym task id → (StateMachineClass, device_type)
 TASK_REGISTRY = {
-    "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
+    "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "ik_so101leader"),
 }
 
 
diff --git a/scripts/datagen/state_machine/replay.py b/scripts/datagen/state_machine/replay.py
index e8744afc..7a2224aa 100644
--- a/scripts/datagen/state_machine/replay.py
+++ b/scripts/datagen/state_machine/replay.py
@@ -35,8 +35,8 @@
     type=str,
     default=None,
     help=(
-        "State machine device type used during recording, e.g. 'so101_state_machine' or "
-        "'bi_so101_state_machine'. If not set, inferred from the task name."
+        "State machine device type used during recording, e.g. 'ik_so101leader' or "
+        "'bi_ik_so101leader'. If not set, inferred from the task name."
     ),
 )
 
@@ -83,7 +83,7 @@ def get_next_action(episode_data: EpisodeData, return_state: bool = False, task_
         next_state = episode_data.get_next_state()
         if next_state is None:
             return None
-        if task_type == "bi_so101_state_machine":
+        if task_type == "bi_ik_so101leader":
             left_joint_pos = next_state["articulation"]["left_arm"]["joint_position"]
             right_joint_pos = next_state["articulation"]["right_arm"]["joint_position"]
             return torch.cat([left_joint_pos, right_joint_pos], dim=0)
@@ -95,9 +95,9 @@ def get_next_action(episode_data: EpisodeData, return_state: bool = False, task_
 
 def apply_damping(env, task_type: str):
     """Apply joint damping each step to match state-machine recording behavior."""
-    if task_type == "so101_state_machine":
+    if task_type == "ik_so101leader":
         env.scene["robot"].write_joint_damping_to_sim(damping=10.0)
-    elif task_type == "bi_so101_state_machine":
+    elif task_type == "bi_ik_so101leader":
         env.scene["left_arm"].write_joint_damping_to_sim(damping=10.0)
         env.scene["right_arm"].write_joint_damping_to_sim(damping=10.0)
 
diff --git a/scripts/rl/play.py b/scripts/rl/play.py
new file mode 100644
index 00000000..e0447b8d
--- /dev/null
+++ b/scripts/rl/play.py
@@ -0,0 +1,118 @@
+"""Generic script to play any leisaac RL task with a trained RSL-RL checkpoint."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="Play a leisaac RL task with RSL-RL.")
+parser.add_argument("--task", type=str, required=True, help="Gym task ID (e.g. LeIsaac-SO101-LiftCube-RL-v0).")
+parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint (.pt).")
+parser.add_argument("--num_envs", type=int, default=4, help="Number of parallel environments.")
+parser.add_argument("--num_episodes", type=int, default=0, help="Episodes to run per env (0 = infinite).")
+parser.add_argument("--seed", type=int, default=42, help="Random seed.")
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+# Force rendering on
+args_cli.headless = False
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+import importlib
+
+import gymnasium as gym
+import leisaac.tasks  # noqa: F401 — registers all leisaac gym envs
+import torch
+from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
+from rsl_rl.runners import OnPolicyRunner
+
+
+def _load_from_entry_point(entry_point: str):
+    """Load an object from a 'module.path:attribute' entry point string."""
+    module_path, attr = entry_point.rsplit(":", 1)
+    module = importlib.import_module(module_path)
+    return getattr(module, attr)
+
+
+def main():
+    # --- Load configs from task registry ---
+    task_spec = gym.registry[args_cli.task]
+    kwargs = task_spec.kwargs
+
+    env_cfg_cls = _load_from_entry_point(kwargs["env_cfg_entry_point"])
+    train_cfg = dict(_load_from_entry_point(kwargs["rsl_rl_cfg_entry_point"]))
+
+    # --- Build env ---
+    env_cfg = env_cfg_cls()
+    env_cfg.scene.num_envs = args_cli.num_envs
+    env_cfg.seed = args_cli.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+    env_cfg.recorders = None  # disable HDF5 recording during eval
+
+    device = env_cfg.sim.device
+
+    env = gym.make(args_cli.task, cfg=env_cfg)
+    env = RslRlVecEnvWrapper(env)
+
+    # --- Load checkpoint ---
+    runner = OnPolicyRunner(env, train_cfg, log_dir=None, device=device)
+    runner.load(args_cli.checkpoint)
+    print(f"[INFO] Task: {args_cli.task}")
+    print(f"[INFO] Loaded checkpoint: {args_cli.checkpoint}")
+
+    policy = runner.get_inference_policy(device=device)
+
+    # --- Episode tracking ---
+    episode_count = 0
+    success_count = 0
+
+    step = 0
+    obs = env.get_observations()
+    while simulation_app.is_running() and (args_cli.num_episodes <= 0 or episode_count < args_cli.num_episodes):
+        with torch.no_grad():
+            actions = policy(obs)
+        # Print actions, obs, and heights every 10 steps for env 0
+        if step % 10 == 0:
+            a = actions[0].cpu().numpy()
+            o = obs["policy"][0].cpu().numpy()
+            # obs layout: joint_pos(6) | joint_vel(6) | ee_state(7) | cube_rel_ee(3)
+            cube_rel = o[19:22]  # cube position relative to EE jaw (dx, dy, dz)
+
+            # Raw world heights for debugging
+            unwrapped = env.unwrapped
+            robot = unwrapped.scene["robot"]
+            cube = unwrapped.scene["cube"]
+            base_index = robot.data.body_names.index("base")
+            robot_base_z = robot.data.body_pos_w[0, base_index, 2].item()
+            cube_z = cube.data.root_pos_w[0, 2].item()
+            height_above_base = cube_z - robot_base_z
+
+            print(
+                f"[step {step:4d}] actions: {' '.join(f'{v:+.3f}' for v in a)}"
+                f"  cube_rel_ee(dx,dy,dz): {' '.join(f'{v:+.3f}' for v in cube_rel)}"
+                f"  cube_z={cube_z:.3f} base_z={robot_base_z:.3f} height_above_base={height_above_base:.3f}"
+            )
+        step += 1
+        obs, _, dones, extras = env.step(actions)
+
+        time_outs = extras.get("time_outs", torch.zeros_like(dones))
+        finished = dones.bool()
+        if finished.any():
+            successes = finished & ~time_outs.bool()
+            episode_count += int(finished.sum().item())
+            success_count += int(successes.sum().item())
+            success_rate = success_count / episode_count if episode_count > 0 else 0.0
+            print(
+                f"[Episode {episode_count}] "
+                f"success={int(successes.sum().item())} "
+                f"timeout={int((finished & time_outs.bool()).sum().item())} "
+                f"| total success rate: {success_rate:.1%} ({success_count}/{episode_count})"
+            )
+
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
diff --git a/scripts/rl/train.py b/scripts/rl/train.py
new file mode 100644
index 00000000..be347609
--- /dev/null
+++ b/scripts/rl/train.py
@@ -0,0 +1,82 @@
+"""Generic script to train any leisaac RL task with RSL-RL (PPO)."""
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="Train a leisaac RL task with RSL-RL.")
+parser.add_argument("--task", type=str, required=True, help="Gym task ID (e.g. LeIsaac-SO101-LiftCube-RL-v0).")
+parser.add_argument("--num_envs", type=int, default=64, help="Number of parallel environments.")
+parser.add_argument("--max_iterations", type=int, default=1500, help="Number of PPO iterations.")
+parser.add_argument("--log_dir", type=str, default="logs/rl", help="Base logging directory.")
+parser.add_argument("--seed", type=int, default=42, help="Random seed.")
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+import importlib
+import os
+from datetime import datetime
+
+import gymnasium as gym
+import leisaac.tasks  # noqa: F401 — registers all leisaac gym envs
+from isaaclab.utils.io import dump_yaml
+from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
+from rsl_rl.runners import OnPolicyRunner
+
+
+def _load_from_entry_point(entry_point: str):
+    """Load an object from a 'module.path:attribute' entry point string."""
+    module_path, attr = entry_point.rsplit(":", 1)
+    module = importlib.import_module(module_path)
+    return getattr(module, attr)
+
+
+def main():
+    # --- Load configs from task registry ---
+    task_spec = gym.registry[args_cli.task]
+    kwargs = task_spec.kwargs
+
+    env_cfg_cls = _load_from_entry_point(kwargs["env_cfg_entry_point"])
+    train_cfg = dict(_load_from_entry_point(kwargs["rsl_rl_cfg_entry_point"]))
+
+    # --- Build env ---
+    env_cfg = env_cfg_cls()
+    env_cfg.scene.num_envs = args_cli.num_envs
+    env_cfg.seed = args_cli.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    train_cfg["seed"] = args_cli.seed
+
+    device = env_cfg.sim.device
+
+    # --- Set up logging ---
+    task_slug = args_cli.task.lower().replace("-", "_").replace("leisaac_", "").replace("_v0", "")
+    log_dir = os.path.join(args_cli.log_dir, task_slug, datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+    log_dir = os.path.abspath(log_dir)
+    print(f"[INFO] Task: {args_cli.task}")
+    print(f"[INFO] Logging to: {log_dir}")
+    env_cfg.log_dir = log_dir
+
+    # --- Create environment ---
+    env = gym.make(args_cli.task, cfg=env_cfg)
+    env = RslRlVecEnvWrapper(env)
+
+    # --- Create PPO runner ---
+    runner = OnPolicyRunner(env, train_cfg, log_dir=log_dir, device=device)
+
+    # --- Dump env config ---
+    os.makedirs(os.path.join(log_dir, "params"), exist_ok=True)
+    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
+
+    # --- Train ---
+    runner.learn(num_learning_iterations=args_cli.max_iterations, init_at_random_ep_len=True)
+
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 6abcc5dd..33bc77ac 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -76,7 +76,7 @@ def init_action_cfg(action_cfg, device):
             joint_names=["gripper"],
             scale=1.0,
         )
-    elif device in ["so101_state_machine"]:  # IK-based: action = EE pose (7D) + binary gripper, not raw joint angles
+    elif device in ["ik_so101leader"]:  # IK-based: action = EE pose (7D) + binary gripper, not raw joint angles
         action_cfg.arm_action = mdp.DifferentialInverseKinematicsActionCfg(
             asset_name="robot",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
@@ -91,7 +91,25 @@ def init_action_cfg(action_cfg, device):
             open_command_expr={"gripper": 1.0},
             close_command_expr={"gripper": 0.4},
         )
-    elif device in ["bi_so101_state_machine"]:  # IK-based: action = EE pose (7D) + binary gripper, not raw joint angles
+    elif device in ["rl_so101leader"]:  # RL: delta EE pose (6D) + binary gripper (1D) = 7D total
+        # use_relative_mode=True: action = (dx, dy, dz, droll, dpitch, dyaw)
+        # translation scale=0.02 → ±2cm/step; rotation scale=0.1 → ±0.1rad/step (~5.7°) at max action
+        action_cfg.arm_action = mdp.DifferentialInverseKinematicsActionCfg(
+            asset_name="robot",
+            joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
+            body_name="gripper",
+            scale=(0.02, 0.02, 0.02, 0.1, 0.1, 0.1),
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose", ik_method="dls", use_relative_mode=True, ik_params={"lambda_val": 0.04}
+            ),
+        )
+        action_cfg.gripper_action = mdp.BinaryJointPositionActionCfg(
+            asset_name="robot",
+            joint_names=["gripper"],
+            open_command_expr={"gripper": 1.0},
+            close_command_expr={"gripper": 0.2},
+        )
+    elif device in ["bi_ik_so101leader"]:  # IK-based: action = EE pose (7D) + binary gripper, not raw joint angles
         action_cfg.left_arm_action = mdp.DifferentialInverseKinematicsActionCfg(
             asset_name="left_arm",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
@@ -120,6 +138,38 @@ def init_action_cfg(action_cfg, device):
             open_command_expr={"gripper": 1.0},
             close_command_expr={"gripper": 0.01},
         )
+    elif device in ["bi_rl_so101leader"]:  # RL: delta EE control for both arms, 6D×2 + binary gripper×2 = 14D total
+        # use_relative_mode=True: action = (dx, dy, dz, droll, dpitch, dyaw) per arm, scale=0.05 → ±5cm/step
+        action_cfg.left_arm_action = mdp.DifferentialInverseKinematicsActionCfg(
+            asset_name="left_arm",
+            joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
+            body_name="gripper",
+            scale=0.05,
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose", ik_method="dls", use_relative_mode=True, ik_params={"lambda_val": 0.04}
+            ),
+        )
+        action_cfg.left_gripper_action = mdp.BinaryJointPositionActionCfg(
+            asset_name="left_arm",
+            joint_names=["gripper"],
+            open_command_expr={"gripper": 1.0},
+            close_command_expr={"gripper": 0.01},
+        )
+        action_cfg.right_arm_action = mdp.DifferentialInverseKinematicsActionCfg(
+            asset_name="right_arm",
+            joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
+            body_name="gripper",
+            scale=0.05,
+            controller=mdp.DifferentialIKControllerCfg(
+                command_type="pose", ik_method="dls", use_relative_mode=True, ik_params={"lambda_val": 0.04}
+            ),
+        )
+        action_cfg.right_gripper_action = mdp.BinaryJointPositionActionCfg(
+            asset_name="right_arm",
+            joint_names=["gripper"],
+            open_command_expr={"gripper": 1.0},
+            close_command_expr={"gripper": 0.01},
+        )
     """LeKiwi action configuration"""
     if device in ["lekiwi-leader", "lekiwi-keyboard", "lekiwi-gamepad"]:
         action_cfg.wheel_action = mdp.JointVelocityActionCfg(
diff --git a/source/leisaac/leisaac/tasks/lift_cube/__init__.py b/source/leisaac/leisaac/tasks/lift_cube/__init__.py
index 54e2d807..c4896fef 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/__init__.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/__init__.py
@@ -9,6 +9,16 @@
     },
 )
 
+gym.register(
+    id="LeIsaac-SO101-LiftCube-RL-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.lift_cube_rl_env_cfg:LiftCubeRLEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{__name__}.lift_cube_rl_env_cfg:TRAIN_CFG",
+    },
+)
+
 gym.register(
     id="LeIsaac-SO101-LiftCube-DigitalTwin-v0",
     entry_point="leisaac.enhance.envs:ManagerBasedRLDigitalTwinEnv",
@@ -20,7 +30,7 @@
 
 gym.register(
     id="LeIsaac-SO101-LiftCube-Mimic-v0",
-    entry_point=f"leisaac.enhance.envs:ManagerBasedRLLeIsaacMimicEnv",
+    entry_point="leisaac.enhance.envs:ManagerBasedRLLeIsaacMimicEnv",
     disable_env_checker=True,
     kwargs={
         "env_cfg_entry_point": f"{__name__}.lift_cube_mimic_env_cfg:LiftCubeMimicEnvCfg",
diff --git a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
new file mode 100644
index 00000000..3adf0300
--- /dev/null
+++ b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
@@ -0,0 +1,195 @@
+import math
+
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.sensors import ContactSensorCfg
+from isaaclab.utils import configclass
+
+from . import mdp
+from .lift_cube_env_cfg import LiftCubeEnvCfg, LiftCubeSceneCfg
+
+TRAIN_CFG = {
+    "actor": {
+        "class_name": "MLPModel",
+        "hidden_dims": [256, 128, 64],
+        "activation": "elu",
+        "obs_normalization": True,
+        "distribution_cfg": {
+            "class_name": "GaussianDistribution",
+            "init_std": 1.0,
+        },
+    },
+    "critic": {
+        "class_name": "MLPModel",
+        "hidden_dims": [256, 128, 64],
+        "activation": "elu",
+        "obs_normalization": True,
+    },
+    "algorithm": {
+        "class_name": "PPO",
+        "value_loss_coef": 1.0,
+        "use_clipped_value_loss": True,
+        "clip_param": 0.1,
+        "entropy_coef": 0.005,
+        "num_learning_epochs": 5,
+        "num_mini_batches": 4,
+        "learning_rate": 3.0e-4,
+        "schedule": "fixed",
+        "gamma": 0.95,
+        "lam": 0.95,
+        "desired_kl": 0.01,
+        "max_grad_norm": 0.5,
+    },
+    "obs_groups": {"actor": ["policy"], "critic": ["policy"]},
+    "num_steps_per_env": 256,
+    "save_interval": 50,
+    "experiment_name": "lift_cube_rl",
+    "seed": 42,
+}
+
+_CUBE_CFG = SceneEntityCfg("cube")
+_ROBOT_CFG = SceneEntityCfg("robot")
+
+
+@configclass
+class LiftCubeRLSceneCfg(LiftCubeSceneCfg):
+    """RL-specific scene: adds ContactSensors on both gripper sides for accurate grasp detection."""
+
+    jaw_contact: ContactSensorCfg = ContactSensorCfg(
+        prim_path="{ENV_REGEX_NS}/Robot/jaw",
+        filter_prim_paths_expr=["{ENV_REGEX_NS}/Scene/cube"],
+    )
+    gripper_contact: ContactSensorCfg = ContactSensorCfg(
+        prim_path="{ENV_REGEX_NS}/Robot/gripper",
+        filter_prim_paths_expr=["{ENV_REGEX_NS}/Scene/cube"],
+    )
+
+
+@configclass
+class LiftCubeRLObservationsCfg:
+    """Flat vector observations for RL (22D total).
+
+    joint_pos(6) + joint_vel(6) + ee_frame_state(7) + cube_rel_ee(3) = 22D
+    """
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        joint_pos = ObsTerm(func=mdp.joint_pos)
+        joint_vel = ObsTerm(func=mdp.joint_vel)
+        ee_frame_state = ObsTerm(
+            func=mdp.ee_frame_state,
+            params={"ee_frame_cfg": SceneEntityCfg("ee_frame"), "robot_cfg": _ROBOT_CFG},
+        )
+        # Cube position relative to EE jaw (3D): tells policy where to reach
+        cube_pos_relative_to_ee = ObsTerm(
+            func=mdp.cube_pos_relative_to_ee,
+            params={"cube_cfg": _CUBE_CFG, "ee_frame_cfg": SceneEntityCfg("ee_frame")},
+        )
+
+        def __post_init__(self):
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class LiftCubeRLRewardsCfg:
+    """Reward terms for lift-cube RL training."""
+
+    # Success: large one-time bonus when cube reaches target height (episode ends immediately after)
+    cube_success = RewTerm(
+        func=mdp.cube_success_bonus,
+        weight=100.0,
+        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "height_threshold": 0.20},
+    )
+    # Stage 1: tanh reaching reward [0, 1] — always has gradient toward cube
+    ee_to_cube = RewTerm(
+        func=mdp.ee_to_cube_reward,
+        weight=2.5,
+        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG},
+    )
+    # Stage 2: grasped bonus — both gripper sides in contact with cube
+    cube_grasped = RewTerm(
+        func=mdp.cube_grasped_reward,
+        weight=7.0,
+        params={"robot_cfg": _ROBOT_CFG},
+    )
+    # Stage 3: height reward [0, 1], only when grasped — peaks at 20cm
+    cube_height = RewTerm(
+        func=mdp.cube_height_if_grasped,
+        weight=20.0,
+        params={
+            "cube_cfg": _CUBE_CFG,
+            "robot_cfg": _ROBOT_CFG,
+            "sharpness": 10.0,
+            "lift_threshold": 0.05,
+        },
+    )
+
+
+@configclass
+class LiftCubeRLTerminationsCfg:
+    """Terminations: timeout + early success."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+    success = DoneTerm(
+        func=mdp.cube_height_above_base,
+        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "height_threshold": 0.20},
+    )
+
+
+@configclass
+class LiftCubeRLEnvCfg(LiftCubeEnvCfg):
+    """RL-specific configuration for the lift-cube environment.
+
+    Observations: 22D flat vector.
+    Action space: 7D (rl_so101leader: 6D delta EE pose + 1D binary gripper).
+    Cameras disabled for faster training.
+    """
+
+    scene: LiftCubeRLSceneCfg = LiftCubeRLSceneCfg(env_spacing=8.0)
+    observations: LiftCubeRLObservationsCfg = LiftCubeRLObservationsCfg()
+    rewards: LiftCubeRLRewardsCfg = LiftCubeRLRewardsCfg()
+    terminations: LiftCubeRLTerminationsCfg = LiftCubeRLTerminationsCfg()
+
+    def __post_init__(self) -> None:
+        super().__post_init__()
+
+        self.use_teleop_device("rl_so101leader")
+
+        # Enable contact sensors on robot bodies (required for jaw_contact sensor)
+        self.scene.robot.spawn.activate_contact_sensors = True
+
+        # Disable camera for faster RL training
+        self.scene.front = None
+
+        # Remove camera randomization events that reference the disabled front camera
+        for attr_name in list(vars(self.events).keys()):
+            term = getattr(self.events, attr_name, None)
+            if (
+                hasattr(term, "params")
+                and isinstance(term.params.get("asset_cfg"), SceneEntityCfg)
+                and term.params["asset_cfg"].name == "front"
+            ):
+                delattr(self.events, attr_name)
+
+        self.episode_length_s = 15.0
+
+        # Increase joint damping for smoother RL movement (default=0.60 causes oscillation)
+        for actuator_cfg in self.scene.robot.actuators.values():
+            actuator_cfg.damping = 15.0
+
+        # Set robot initial joint positions to the rest pose used by the state machine
+        # (shoulder_lift=-100°, elbow_flex=90°, wrist_flex=50°) so the EE points toward the table.
+        self.scene.robot.init_state.joint_pos = {
+            "shoulder_pan": 0.0,
+            "shoulder_lift": math.radians(-100.0),
+            "elbow_flex": math.radians(90.0),
+            "wrist_flex": math.radians(50.0),
+            "wrist_roll": 0.0,
+            "gripper": 1.0,  # open
+        }
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/__init__.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/__init__.py
index 38ebdb6c..1d602cf2 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/__init__.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/__init__.py
@@ -2,4 +2,5 @@
 from leisaac.enhance.envs.mdp import *
 
 from .observations import *
+from .rewards import *
 from .terminations import *
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py
index eb808e5b..0e3f86ad 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py
@@ -25,3 +25,15 @@ def object_grasped(
     grasped = torch.logical_and(pos_diff < diff_threshold, robot.data.joint_pos[:, -1] < grasp_threshold)
 
     return grasped
+
+
+def cube_pos_relative_to_ee(
+    env: ManagerBasedRLEnv,
+    cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
+    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
+) -> torch.Tensor:
+    """Returns cube position relative to the EE jaw. (num_envs, 3)"""
+    ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
+    ee_pos = ee_frame.data.target_pos_w[:, 1, :]  # jaw position
+    cube: RigidObject = env.scene[cube_cfg.name]
+    return cube.data.root_pos_w - ee_pos
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
new file mode 100644
index 00000000..b880ca43
--- /dev/null
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
@@ -0,0 +1,127 @@
+import torch
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.envs import ManagerBasedRLEnv
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.sensors import ContactSensor
+
+
+def _gripper_root_pos(env: ManagerBasedRLEnv, robot_cfg: SceneEntityCfg, body_name: str = "gripper") -> torch.Tensor:
+    robot: Articulation = env.scene[robot_cfg.name]
+    body_index = robot.data.body_names.index(body_name)
+    return robot.data.body_pos_w[:, body_index, :]  # (num_envs, 3)
+
+
+def _cube_pos(env: ManagerBasedRLEnv, cube_cfg: SceneEntityCfg) -> torch.Tensor:
+    cube: RigidObject = env.scene[cube_cfg.name]
+    return cube.data.root_pos_w  # (num_envs, 3)
+
+
+def _robot_base_height(env: ManagerBasedRLEnv, robot_cfg: SceneEntityCfg, base_name: str = "base") -> torch.Tensor:
+    robot: Articulation = env.scene[robot_cfg.name]
+    base_index = robot.data.body_names.index(base_name)
+    return robot.data.body_pos_w[:, base_index, 2]  # (num_envs,)
+
+
+def ee_to_cube_reward(
+    env: ManagerBasedRLEnv,
+    cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    k: float = 5.0,
+) -> torch.Tensor:
+    """Reaching reward: closer gripper body is to 10cm above cube center, the higher. Range [0, 1]."""
+    target_pos = _cube_pos(env, cube_cfg).clone()
+    target_pos[:, 2] += 0.10
+    dist = torch.linalg.vector_norm(target_pos - _gripper_root_pos(env, robot_cfg), dim=1)
+    return 1.0 - torch.tanh(k * dist)
+
+
+def _is_grasped(
+    env: ManagerBasedRLEnv,
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+) -> torch.Tensor:
+    """Grasp detection: contact on both sides + gripper closed + upward support force.
+
+    Four conditions (all soft via sigmoid):
+      1. jaw_contact force > 0.5 N  (sensor filtered to cube-only)
+      2. gripper_contact force > 0.5 N  (sensor filtered to cube-only)
+      3. gripper joint < 0.5 rad (gripper actually closed)
+      4. contact force Z < 0 on both sensors: cube pulls gripper down via friction,
+         consistent with supporting cube weight. Rejects top-smash (Z > 0).
+    Shape: (num_envs,), range [0, 1].
+    """
+    jaw_sensor: ContactSensor = env.scene["jaw_contact"]
+    gripper_sensor: ContactSensor = env.scene["gripper_contact"]
+
+    jaw_force = torch.linalg.vector_norm(jaw_sensor.data.net_forces_w[:, 0, :], dim=1)
+    gripper_force = torch.linalg.vector_norm(gripper_sensor.data.net_forces_w[:, 0, :], dim=1)
+
+    jaw_contact = torch.sigmoid(5.0 * (jaw_force - 0.5))
+    gripper_contact = torch.sigmoid(5.0 * (gripper_force - 0.5))
+
+    # Gripper closed constraint: joint≈0.2 (closed) → 1, joint≈1.0 (open) → 0
+    robot: Articulation = env.scene[robot_cfg.name]
+    gripper_pos = robot.data.joint_pos[:, -1]
+    gripper_closed = torch.sigmoid(10.0 * (0.5 - gripper_pos))
+
+    # Upward support: sensor Z force < 0 means cube is pulling gripper down (friction grip).
+    # Top-smash produces Z > 0 (cube pushes gripper up). Scale 2.0 gives soft threshold near 0N.
+    jaw_z = jaw_sensor.data.net_forces_w[:, 0, 2]
+    grip_z = gripper_sensor.data.net_forces_w[:, 0, 2]
+    upward_support = torch.sigmoid(-2.0 * jaw_z) * torch.sigmoid(-2.0 * grip_z)
+
+    return jaw_contact * gripper_contact * gripper_closed * upward_support
+
+
+def cube_grasped_reward(
+    env: ManagerBasedRLEnv,
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+) -> torch.Tensor:
+    """Grasp reward: contact + closed + upward friction support. Range [0, 1]."""
+    return _is_grasped(env, robot_cfg=robot_cfg)
+
+
+def cube_height_if_grasped(
+    env: ManagerBasedRLEnv,
+    cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    target_height: float = 0.20,
+    sharpness: float = 2.0,
+    lift_threshold: float = 0.08,
+) -> torch.Tensor:
+    """Exponential height reward, only when grasped AND cube lifted above lift_threshold.
+
+    lift_threshold filters out the resting-on-table baseline (~0.046m). Range [0, 1].
+    """
+    height_above_base = _cube_pos(env, cube_cfg)[:, 2] - _robot_base_height(env, robot_cfg)
+    height_rew = torch.exp(-sharpness * torch.abs(height_above_base - target_height))
+    lifted = (height_above_base > lift_threshold).float()
+    return height_rew * _is_grasped(env, robot_cfg=robot_cfg) * lifted
+
+
+def gripper_close_when_near(
+    env: ManagerBasedRLEnv,
+    cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    near_dist: float = 0.10,
+    k: float = 50.0,
+) -> torch.Tensor:
+    """Reward gripper closure when gripper body is within near_dist of cube. Range [0, 1]."""
+    dist = torch.linalg.vector_norm(_cube_pos(env, cube_cfg) - _gripper_root_pos(env, robot_cfg), dim=1)
+    near = torch.sigmoid(k * (near_dist - dist))
+
+    robot: Articulation = env.scene[robot_cfg.name]
+    gripper_pos = robot.data.joint_pos[:, -1]
+    closed_signal = torch.sigmoid(10.0 * (0.5 - gripper_pos))  # 1 when closed (joint≈0.2)
+
+    return near * closed_signal
+
+
+def cube_success_bonus(
+    env: ManagerBasedRLEnv,
+    cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
+    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    height_threshold: float = 0.20,
+) -> torch.Tensor:
+    """One-time large bonus when cube reaches the success height threshold."""
+    height_above_base = _cube_pos(env, cube_cfg)[:, 2] - _robot_base_height(env, robot_cfg)
+    return (height_above_base >= height_threshold).float()
diff --git a/source/leisaac/leisaac/tasks/pick_orange/agents/__init__.py b/source/leisaac/leisaac/tasks/pick_orange/agents/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py b/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py
index 6a07c6ff..ea2d2af9 100644
--- a/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/template/bi_arm_env_cfg.py
@@ -211,7 +211,7 @@ def __post_init__(self) -> None:
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
         self.actions = init_action_cfg(self.actions, device=teleop_device)
-        if teleop_device in ["bi_so101_state_machine"]:
+        if teleop_device in ["bi_ik_so101leader"]:
             self.scene.left_arm.spawn.rigid_props.disable_gravity = True
             self.scene.right_arm.spawn.rigid_props.disable_gravity = True
 
diff --git a/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py b/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
index 614cdd5f..f0a92960 100644
--- a/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
+++ b/source/leisaac/leisaac/tasks/template/direct/bi_arm_env.py
@@ -92,7 +92,7 @@ def __post_init__(self) -> None:
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
         # self.actions = init_action_cfg(self.actions, device=teleop_device)
-        if teleop_device in ["bi_so101_state_machine"]:
+        if teleop_device in ["bi_ik_so101leader"]:
             self.scene.left_arm.spawn.rigid_props.disable_gravity = True
             self.scene.right_arm.spawn.rigid_props.disable_gravity = True
 
diff --git a/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py b/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
index aa7ea7a6..147cd370 100644
--- a/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
+++ b/source/leisaac/leisaac/tasks/template/direct/single_arm_env.py
@@ -83,7 +83,7 @@ def __post_init__(self) -> None:
 
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
-        if teleop_device in ["keyboard", "gamepad", "so101_state_machine"]:
+        if teleop_device in ["keyboard", "gamepad", "ik_so101leader"]:
             self.scene.robot.spawn.rigid_props.disable_gravity = True
 
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:
diff --git a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
index b11664a3..a2acdeb1 100644
--- a/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/template/single_arm_env_cfg.py
@@ -194,7 +194,7 @@ def __post_init__(self) -> None:
     def use_teleop_device(self, teleop_device) -> None:
         self.task_type = teleop_device
         self.actions = init_action_cfg(self.actions, device=teleop_device)
-        if teleop_device in ["keyboard", "gamepad", "so101_state_machine"]:
+        if teleop_device in ["keyboard", "gamepad", "ik_so101leader", "rl_so101leader"]:
             self.scene.robot.spawn.rigid_props.disable_gravity = True
 
     def preprocess_device_action(self, action: dict[str, Any], teleop_device) -> torch.Tensor:

From c7013c5dc7c3de3a43a6128293fd6e77624f8724 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 10 Mar 2026 16:46:17 -0400
Subject: [PATCH 56/68] Change docstring format

---
 .../tasks/lift_cube/lift_cube_rl_env_cfg.py   |  3 +-
 .../leisaac/tasks/lift_cube/mdp/rewards.py    | 31 ++++++-------------
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
index 3adf0300..acf7ed63 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
@@ -183,8 +183,7 @@ def __post_init__(self) -> None:
         for actuator_cfg in self.scene.robot.actuators.values():
             actuator_cfg.damping = 15.0
 
-        # Set robot initial joint positions to the rest pose used by the state machine
-        # (shoulder_lift=-100°, elbow_flex=90°, wrist_flex=50°) so the EE points toward the table.
+        # Initial pose: EE pointing toward table (shoulder_lift=-100°, elbow_flex=90°, wrist_flex=50°).
         self.scene.robot.init_state.joint_pos = {
             "shoulder_pan": 0.0,
             "shoulder_lift": math.radians(-100.0),
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
index b880ca43..b77d899a 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
@@ -39,15 +39,11 @@ def _is_grasped(
     env: ManagerBasedRLEnv,
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
 ) -> torch.Tensor:
-    """Grasp detection: contact on both sides + gripper closed + upward support force.
-
-    Four conditions (all soft via sigmoid):
-      1. jaw_contact force > 0.5 N  (sensor filtered to cube-only)
-      2. gripper_contact force > 0.5 N  (sensor filtered to cube-only)
-      3. gripper joint < 0.5 rad (gripper actually closed)
-      4. contact force Z < 0 on both sensors: cube pulls gripper down via friction,
-         consistent with supporting cube weight. Rejects top-smash (Z > 0).
-    Shape: (num_envs,), range [0, 1].
+    """Grasp score in [0, 1]: product of three soft sigmoid conditions.
+
+    1. jaw_contact force > 0.5 N (cube-filtered sensor)
+    2. gripper_contact force > 0.5 N (cube-filtered sensor)
+    3. gripper joint < 0.5 rad (gripper closed)
     """
     jaw_sensor: ContactSensor = env.scene["jaw_contact"]
     gripper_sensor: ContactSensor = env.scene["gripper_contact"]
@@ -58,25 +54,19 @@ def _is_grasped(
     jaw_contact = torch.sigmoid(5.0 * (jaw_force - 0.5))
     gripper_contact = torch.sigmoid(5.0 * (gripper_force - 0.5))
 
-    # Gripper closed constraint: joint≈0.2 (closed) → 1, joint≈1.0 (open) → 0
+    # Gripper closed: joint≈0.2 (closed) → 1, joint≈1.0 (open) → 0
     robot: Articulation = env.scene[robot_cfg.name]
     gripper_pos = robot.data.joint_pos[:, -1]
     gripper_closed = torch.sigmoid(10.0 * (0.5 - gripper_pos))
 
-    # Upward support: sensor Z force < 0 means cube is pulling gripper down (friction grip).
-    # Top-smash produces Z > 0 (cube pushes gripper up). Scale 2.0 gives soft threshold near 0N.
-    jaw_z = jaw_sensor.data.net_forces_w[:, 0, 2]
-    grip_z = gripper_sensor.data.net_forces_w[:, 0, 2]
-    upward_support = torch.sigmoid(-2.0 * jaw_z) * torch.sigmoid(-2.0 * grip_z)
-
-    return jaw_contact * gripper_contact * gripper_closed * upward_support
+    return jaw_contact * gripper_contact * gripper_closed
 
 
 def cube_grasped_reward(
     env: ManagerBasedRLEnv,
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
 ) -> torch.Tensor:
-    """Grasp reward: contact + closed + upward friction support. Range [0, 1]."""
+    """Grasp reward: bilateral contact + gripper closed. Range [0, 1]."""
     return _is_grasped(env, robot_cfg=robot_cfg)
 
 
@@ -88,10 +78,7 @@ def cube_height_if_grasped(
     sharpness: float = 2.0,
     lift_threshold: float = 0.08,
 ) -> torch.Tensor:
-    """Exponential height reward, only when grasped AND cube lifted above lift_threshold.
-
-    lift_threshold filters out the resting-on-table baseline (~0.046m). Range [0, 1].
-    """
+    """Exponential height reward, only when grasped AND cube above lift_threshold (~0.05m). Range [0, 1]."""
     height_above_base = _cube_pos(env, cube_cfg)[:, 2] - _robot_base_height(env, robot_cfg)
     height_rew = torch.exp(-sharpness * torch.abs(height_above_base - target_height))
     lifted = (height_above_base > lift_threshold).float()

From c8f3202e7455543610349b31195ad3a4cac8d129 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 10 Mar 2026 16:49:29 -0400
Subject: [PATCH 57/68] Move the RL directory into the datagen directory

---
 docs/docs/docs/features/rl_training.md | 6 +++---
 scripts/{ => datagen}/rl/play.py       | 0
 scripts/{ => datagen}/rl/train.py      | 0
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename scripts/{ => datagen}/rl/play.py (100%)
 rename scripts/{ => datagen}/rl/train.py (100%)

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index ce24eaf8..429f83e4 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -5,7 +5,7 @@ The RL training module enables training manipulation policies with reinforcement
 ## Training
 
 ```shell
-python scripts/rl/train.py \
+python scripts/datagen/rl/train.py \
     --task LeIsaac-SO101-LiftCube-RL-v0 \
     --num_envs 64 \
     --max_iterations 1500 \
@@ -44,7 +44,7 @@ Key metrics to watch: `Train/mean_reward` (total episode reward) and individual
 ## Evaluation
 
 ```shell
-python scripts/rl/play.py \
+python scripts/datagen/rl/play.py \
     --task LeIsaac-SO101-LiftCube-RL-v0 \
     --checkpoint logs/rl/<run>/model_<iter>.pt \
     --num_envs 4 \
@@ -127,4 +127,4 @@ gym.register(
 )
 ```
 
-4. Train with the generic script: `python scripts/rl/train.py --task LeIsaac-SO101-MyTask-RL-v0`.
+4. Train with the generic script: `python scripts/datagen/rl/train.py --task LeIsaac-SO101-MyTask-RL-v0`.
diff --git a/scripts/rl/play.py b/scripts/datagen/rl/play.py
similarity index 100%
rename from scripts/rl/play.py
rename to scripts/datagen/rl/play.py
diff --git a/scripts/rl/train.py b/scripts/datagen/rl/train.py
similarity index 100%
rename from scripts/rl/train.py
rename to scripts/datagen/rl/train.py

From ecf0153df91394ab3dc1943262759d977fb377c9 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 10 Mar 2026 16:55:31 -0400
Subject: [PATCH 58/68] Update state machine documentation to align with the
 latest changes

---
 docs/docs/docs/features/state_machine.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/docs/docs/features/state_machine.md b/docs/docs/docs/features/state_machine.md
index 081b9def..994e9d14 100644
--- a/docs/docs/docs/features/state_machine.md
+++ b/docs/docs/docs/features/state_machine.md
@@ -60,7 +60,7 @@ After recording, you can replay the collected demonstrations in simulation:
 python scripts/datagen/state_machine/replay.py \
     --task LeIsaac-SO101-PickOrange-v0 \
     --dataset_file ./datasets/pick_orange.hdf5 \
-    --task_type so101_state_machine \
+    --task_type ik_so101leader \
     --select_episodes 0 \
     --device cuda \
     --enable_cameras \
@@ -82,7 +82,7 @@ python scripts/datagen/state_machine/replay.py \
 
 - `--replay_mode`: Replay mode — `action` replays IK pose targets, `state` replays joint positions.
 
-- `--task_type`: State machine device type used during recording, e.g., `so101_state_machine` or `bi_so101_state_machine`. Inferred from task name if not set.
+- `--task_type`: State machine device type used during recording, e.g., `ik_so101leader` or `bi_ik_so101leader`. Inferred from task name if not set.
 
 - `--select_episodes`: List of episode indices to replay. Leave empty to replay all episodes.
 
@@ -97,7 +97,7 @@ python scripts/datagen/state_machine/replay.py \
 
 ```python
 TASK_REGISTRY = {
-    "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "so101_state_machine"),
-    "LeIsaac-MY-NewTask-v0":       (MyNewStateMachine,      "so101_state_machine"),
+    "LeIsaac-SO101-PickOrange-v0": (PickOrangeStateMachine, "ik_so101leader"),
+    "LeIsaac-MY-NewTask-v0":       (MyNewStateMachine,      "ik_so101leader"),
 }
 ```

From 4978c3fb73f1988e90bc8ae7f0b432a0c9fbf16e Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Tue, 10 Mar 2026 20:49:27 -0400
Subject: [PATCH 59/68] Hyperparameter tuning

---
 docs/docs/docs/features/rl_training.md        |  2 +-
 scripts/datagen/rl/train.py                   |  4 ++++
 .../leisaac/leisaac/devices/action_process.py |  4 ++--
 .../tasks/lift_cube/lift_cube_rl_env_cfg.py   |  2 +-
 .../leisaac/tasks/lift_cube/mdp/rewards.py    | 22 ++-----------------
 5 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index 429f83e4..3dcd5d15 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -76,7 +76,7 @@ RL training uses the `rl_so101leader` device mode — delta end-effector control
 
 | Component | Dims | Description |
 |-----------|------|-------------|
-| `arm_action` | 6 | Delta EE pose (dx, dy, dz, droll, dpitch, dyaw), scale=(0.02, 0.02, 0.02, 0.1, 0.1, 0.1) → ±2 cm / ±0.1 rad per step |
+| `arm_action` | 6 | Delta EE pose (dx, dy, dz, droll, dpitch, dyaw), scale=(0.02, 0.02, 0.02, 0.5, 0.5, 0.5) → ±2 cm / ±0.5 rad per step |
 | `gripper_action` | 1 | Binary: action > 0 → open (1.0 rad), action < 0 → close (0.2 rad) |
 | **Total** | **7** | |
 
diff --git a/scripts/datagen/rl/train.py b/scripts/datagen/rl/train.py
index be347609..b9781816 100644
--- a/scripts/datagen/rl/train.py
+++ b/scripts/datagen/rl/train.py
@@ -10,6 +10,7 @@
 parser.add_argument("--max_iterations", type=int, default=1500, help="Number of PPO iterations.")
 parser.add_argument("--log_dir", type=str, default="logs/rl", help="Base logging directory.")
 parser.add_argument("--seed", type=int, default=42, help="Random seed.")
+parser.add_argument("--checkpoint", type=str, default=None, help="Path to checkpoint (.pt) to resume training from.")
 AppLauncher.add_app_launcher_args(parser)
 args_cli = parser.parse_args()
 
@@ -66,6 +67,9 @@ def main():
 
     # --- Create PPO runner ---
     runner = OnPolicyRunner(env, train_cfg, log_dir=log_dir, device=device)
+    if args_cli.checkpoint is not None:
+        runner.load(args_cli.checkpoint)
+        print(f"[INFO] Resumed from checkpoint: {args_cli.checkpoint}")
 
     # --- Dump env config ---
     os.makedirs(os.path.join(log_dir, "params"), exist_ok=True)
diff --git a/source/leisaac/leisaac/devices/action_process.py b/source/leisaac/leisaac/devices/action_process.py
index 33bc77ac..665446c9 100644
--- a/source/leisaac/leisaac/devices/action_process.py
+++ b/source/leisaac/leisaac/devices/action_process.py
@@ -93,12 +93,12 @@ def init_action_cfg(action_cfg, device):
         )
     elif device in ["rl_so101leader"]:  # RL: delta EE pose (6D) + binary gripper (1D) = 7D total
         # use_relative_mode=True: action = (dx, dy, dz, droll, dpitch, dyaw)
-        # translation scale=0.02 → ±2cm/step; rotation scale=0.1 → ±0.1rad/step (~5.7°) at max action
+        # translation scale=0.02 → ±2cm/step; rotation scale=0.5 → ±0.5rad/step (~28.6°) at max action
         action_cfg.arm_action = mdp.DifferentialInverseKinematicsActionCfg(
             asset_name="robot",
             joint_names=["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll"],
             body_name="gripper",
-            scale=(0.02, 0.02, 0.02, 0.1, 0.1, 0.1),
+            scale=(0.02, 0.02, 0.02, 0.5, 0.5, 0.5),
             controller=mdp.DifferentialIKControllerCfg(
                 command_type="pose", ik_method="dls", use_relative_mode=True, ik_params={"lambda_val": 0.04}
             ),
diff --git a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
index acf7ed63..3a19b710 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
@@ -33,7 +33,7 @@
         "value_loss_coef": 1.0,
         "use_clipped_value_loss": True,
         "clip_param": 0.1,
-        "entropy_coef": 0.005,
+        "entropy_coef": 0.02,
         "num_learning_epochs": 5,
         "num_mini_batches": 4,
         "learning_rate": 3.0e-4,
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
index b77d899a..60d75e29 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
@@ -75,8 +75,8 @@ def cube_height_if_grasped(
     cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
     target_height: float = 0.20,
-    sharpness: float = 2.0,
-    lift_threshold: float = 0.08,
+    sharpness: float = 10.0,
+    lift_threshold: float = 0.05,
 ) -> torch.Tensor:
     """Exponential height reward, only when grasped AND cube above lift_threshold (~0.05m). Range [0, 1]."""
     height_above_base = _cube_pos(env, cube_cfg)[:, 2] - _robot_base_height(env, robot_cfg)
@@ -85,24 +85,6 @@ def cube_height_if_grasped(
     return height_rew * _is_grasped(env, robot_cfg=robot_cfg) * lifted
 
 
-def gripper_close_when_near(
-    env: ManagerBasedRLEnv,
-    cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
-    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
-    near_dist: float = 0.10,
-    k: float = 50.0,
-) -> torch.Tensor:
-    """Reward gripper closure when gripper body is within near_dist of cube. Range [0, 1]."""
-    dist = torch.linalg.vector_norm(_cube_pos(env, cube_cfg) - _gripper_root_pos(env, robot_cfg), dim=1)
-    near = torch.sigmoid(k * (near_dist - dist))
-
-    robot: Articulation = env.scene[robot_cfg.name]
-    gripper_pos = robot.data.joint_pos[:, -1]
-    closed_signal = torch.sigmoid(10.0 * (0.5 - gripper_pos))  # 1 when closed (joint≈0.2)
-
-    return near * closed_signal
-
-
 def cube_success_bonus(
     env: ManagerBasedRLEnv,
     cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),

From 4a808fee2516c5aec530bc23716d374b0cf0dadd Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 02:03:34 -0400
Subject: [PATCH 60/68] Modify hyperparameters and rewards

---
 docs/docs/docs/features/rl_training.md        | 39 ++++++---
 scripts/datagen/rl/play.py                    |  2 +-
 .../tasks/lift_cube/lift_cube_rl_env_cfg.py   | 71 +++++-----------
 .../tasks/lift_cube/mdp/observations.py       |  9 ++
 .../leisaac/tasks/lift_cube/mdp/rewards.py    | 82 +++++++------------
 5 files changed, 87 insertions(+), 116 deletions(-)

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index 3dcd5d15..93bd49da 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -7,7 +7,7 @@ The RL training module enables training manipulation policies with reinforcement
 ```shell
 python scripts/datagen/rl/train.py \
     --task LeIsaac-SO101-LiftCube-RL-v0 \
-    --num_envs 64 \
+    --num_envs 512 \
     --max_iterations 1500 \
     --headless
 ```
@@ -17,7 +17,7 @@ python scripts/datagen/rl/train.py \
 
 - `--task`: Gym task ID to train. Required.
 
-- `--num_envs`: Number of parallel simulation environments. More environments = faster data collection. Default: `64`.
+- `--num_envs`: Number of parallel simulation environments. More environments = faster data collection. Default: `512`.
 
 - `--max_iterations`: Number of PPO update iterations. Default: `1500`.
 
@@ -53,23 +53,37 @@ python scripts/datagen/rl/play.py \
 
 ## Reward Design
 
-The LiftCube RL task uses four reward terms:
+The LiftCube RL task uses three reward terms:
 
 | Term | Weight | Description |
 |------|--------|-------------|
 | `cube_success` | 100.0 | One-time bonus when cube height ≥ 20 cm above robot base. Episode ends immediately after (early termination). |
-| `ee_to_cube` | 2.5 | `1 - tanh(5 × dist)` — guides gripper body toward a point 10 cm above cube center. Always active. Range [0, 1]. |
-| `cube_grasped` | 7.0 | Soft grasp score in [0, 1]. Active when cube is properly grasped (see below). |
-| `cube_height` | 20.0 | `exp(-10 × |h - 0.20|)` peaking at 20 cm. Only active when grasped AND cube height > 5 cm (filters resting-on-table baseline and tipped-corner cases). |
+| `ee_to_cube` | 1.5 | `1 - tanh(5 × dist(TCP, cube))` — guides TCP to cube center. Range [0, 1]. |
+| `cube_height` | 10.0 | `tanh(5 × max(h - 4.6 cm, 0))` — zero below 4.6 cm, monotonically increasing above. Range [0, 1]. |
 
-**Grasp detection** (`_is_grasped`) uses three conditions multiplied together (all soft via sigmoid):
+**TCP (Tool Center Point)** is computed as the midpoint between the two fingertip contact surfaces, derived from `body_pos_w` and `body_quat_w` with calibrated local offsets from the USD collision mesh:
 
-1. `jaw_contact` force > 0.5 N on cube (ContactSensor filtered to cube-only)
-2. `gripper_contact` force > 0.5 N on cube (ContactSensor filtered to cube-only)
-3. Gripper joint position < 0.5 rad (gripper is actually closed)
+- Jaw tip offset (jaw body local frame): `(0.0, -0.05, 0.02)`
+- Gripper tip offset (gripper body local frame): `(-0.012, 0.0, -0.08)`
 
 **Termination**: episode ends on timeout (15 s) or when cube height ≥ 20 cm (success).
 
+## PPO Hyperparameters
+
+| Parameter | Value | Notes |
+|-----------|-------|-------|
+| `gamma` | 0.95 | Discount factor |
+| `lam` | 0.95 | GAE lambda |
+| `clip_param` | 0.1 | PPO clip range |
+| `entropy_coef` | 0.005 | Entropy regularization to encourage exploration |
+| `learning_rate` | 1e-4 | Adam learning rate |
+| `schedule` | adaptive | Adjusts learning rate based on KL divergence |
+| `desired_kl` | 0.01 | Target KL divergence for adaptive schedule |
+| `num_learning_epochs` | 5 | PPO update epochs per rollout |
+| `num_mini_batches` | 4 | Mini-batches per epoch |
+| `num_steps_per_env` | 100 | Rollout steps per environment per update |
+| `num_envs` (recommended) | 512 | Parallel environments — more = faster sparse reward discovery |
+
 ## Action Space
 
 RL training uses the `rl_so101leader` device mode — delta end-effector control with a binary gripper:
@@ -82,7 +96,7 @@ RL training uses the `rl_so101leader` device mode — delta end-effector control
 
 ## Observation Space
 
-22D flat vector (concatenated):
+26D flat vector (concatenated):
 
 | Term | Dims |
 |------|------|
@@ -90,7 +104,8 @@ RL training uses the `rl_so101leader` device mode — delta end-effector control
 | `joint_vel` | 6 |
 | `ee_frame_state` (pos + quat, robot frame) | 7 |
 | `cube_pos_relative_to_ee` | 3 |
-| **Total** | **22** |
+| `cube_quat` (orientation in world frame) | 4 |
+| **Total** | **26** |
 
 ## Adding a New RL Task
 
diff --git a/scripts/datagen/rl/play.py b/scripts/datagen/rl/play.py
index e0447b8d..c433eeaa 100644
--- a/scripts/datagen/rl/play.py
+++ b/scripts/datagen/rl/play.py
@@ -76,7 +76,7 @@ def main():
         if step % 10 == 0:
             a = actions[0].cpu().numpy()
             o = obs["policy"][0].cpu().numpy()
-            # obs layout: joint_pos(6) | joint_vel(6) | ee_state(7) | cube_rel_ee(3)
+            # obs layout: joint_pos(6) | joint_vel(6) | ee_state(7) | cube_rel_ee(3) | cube_quat(4)
             cube_rel = o[19:22]  # cube position relative to EE jaw (dx, dy, dz)
 
             # Raw world heights for debugging
diff --git a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
index 3a19b710..64a667c1 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
@@ -5,7 +5,6 @@
 from isaaclab.managers import RewardTermCfg as RewTerm
 from isaaclab.managers import SceneEntityCfg
 from isaaclab.managers import TerminationTermCfg as DoneTerm
-from isaaclab.sensors import ContactSensorCfg
 from isaaclab.utils import configclass
 
 from . import mdp
@@ -19,7 +18,7 @@
         "obs_normalization": True,
         "distribution_cfg": {
             "class_name": "GaussianDistribution",
-            "init_std": 1.0,
+            "init_std": 0.3,
         },
     },
     "critic": {
@@ -33,18 +32,18 @@
         "value_loss_coef": 1.0,
         "use_clipped_value_loss": True,
         "clip_param": 0.1,
-        "entropy_coef": 0.02,
+        "entropy_coef": 0.005,
         "num_learning_epochs": 5,
         "num_mini_batches": 4,
-        "learning_rate": 3.0e-4,
-        "schedule": "fixed",
+        "learning_rate": 1.0e-4,
+        "schedule": "adaptive",
         "gamma": 0.95,
         "lam": 0.95,
         "desired_kl": 0.01,
         "max_grad_norm": 0.5,
     },
     "obs_groups": {"actor": ["policy"], "critic": ["policy"]},
-    "num_steps_per_env": 256,
+    "num_steps_per_env": 100,
     "save_interval": 50,
     "experiment_name": "lift_cube_rl",
     "seed": 42,
@@ -54,25 +53,11 @@
 _ROBOT_CFG = SceneEntityCfg("robot")
 
 
-@configclass
-class LiftCubeRLSceneCfg(LiftCubeSceneCfg):
-    """RL-specific scene: adds ContactSensors on both gripper sides for accurate grasp detection."""
-
-    jaw_contact: ContactSensorCfg = ContactSensorCfg(
-        prim_path="{ENV_REGEX_NS}/Robot/jaw",
-        filter_prim_paths_expr=["{ENV_REGEX_NS}/Scene/cube"],
-    )
-    gripper_contact: ContactSensorCfg = ContactSensorCfg(
-        prim_path="{ENV_REGEX_NS}/Robot/gripper",
-        filter_prim_paths_expr=["{ENV_REGEX_NS}/Scene/cube"],
-    )
-
-
 @configclass
 class LiftCubeRLObservationsCfg:
-    """Flat vector observations for RL (22D total).
+    """Flat vector observations for RL (26D total).
 
-    joint_pos(6) + joint_vel(6) + ee_frame_state(7) + cube_rel_ee(3) = 22D
+    joint_pos(6) + joint_vel(6) + ee_frame_state(7) + cube_rel_ee(3) + cube_quat(4) = 26D
     """
 
     @configclass
@@ -83,11 +68,14 @@ class PolicyCfg(ObsGroup):
             func=mdp.ee_frame_state,
             params={"ee_frame_cfg": SceneEntityCfg("ee_frame"), "robot_cfg": _ROBOT_CFG},
         )
-        # Cube position relative to EE jaw (3D): tells policy where to reach
         cube_pos_relative_to_ee = ObsTerm(
             func=mdp.cube_pos_relative_to_ee,
             params={"cube_cfg": _CUBE_CFG, "ee_frame_cfg": SceneEntityCfg("ee_frame")},
         )
+        cube_quat = ObsTerm(
+            func=mdp.cube_quat,
+            params={"cube_cfg": _CUBE_CFG},
+        )
 
         def __post_init__(self):
             self.enable_corruption = False
@@ -100,34 +88,20 @@ def __post_init__(self):
 class LiftCubeRLRewardsCfg:
     """Reward terms for lift-cube RL training."""
 
-    # Success: large one-time bonus when cube reaches target height (episode ends immediately after)
     cube_success = RewTerm(
         func=mdp.cube_success_bonus,
         weight=100.0,
         params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "height_threshold": 0.20},
     )
-    # Stage 1: tanh reaching reward [0, 1] — always has gradient toward cube
     ee_to_cube = RewTerm(
         func=mdp.ee_to_cube_reward,
-        weight=2.5,
+        weight=1.5,
         params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG},
     )
-    # Stage 2: grasped bonus — both gripper sides in contact with cube
-    cube_grasped = RewTerm(
-        func=mdp.cube_grasped_reward,
-        weight=7.0,
-        params={"robot_cfg": _ROBOT_CFG},
-    )
-    # Stage 3: height reward [0, 1], only when grasped — peaks at 20cm
     cube_height = RewTerm(
-        func=mdp.cube_height_if_grasped,
-        weight=20.0,
-        params={
-            "cube_cfg": _CUBE_CFG,
-            "robot_cfg": _ROBOT_CFG,
-            "sharpness": 10.0,
-            "lift_threshold": 0.05,
-        },
+        func=mdp.cube_height_reward,
+        weight=10.0,
+        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "min_height": 0.046},
     )
 
 
@@ -146,12 +120,12 @@ class LiftCubeRLTerminationsCfg:
 class LiftCubeRLEnvCfg(LiftCubeEnvCfg):
     """RL-specific configuration for the lift-cube environment.
 
-    Observations: 22D flat vector.
+    Observations: 26D flat vector.
     Action space: 7D (rl_so101leader: 6D delta EE pose + 1D binary gripper).
     Cameras disabled for faster training.
     """
 
-    scene: LiftCubeRLSceneCfg = LiftCubeRLSceneCfg(env_spacing=8.0)
+    scene: LiftCubeSceneCfg = LiftCubeSceneCfg(env_spacing=8.0)
     observations: LiftCubeRLObservationsCfg = LiftCubeRLObservationsCfg()
     rewards: LiftCubeRLRewardsCfg = LiftCubeRLRewardsCfg()
     terminations: LiftCubeRLTerminationsCfg = LiftCubeRLTerminationsCfg()
@@ -161,13 +135,8 @@ def __post_init__(self) -> None:
 
         self.use_teleop_device("rl_so101leader")
 
-        # Enable contact sensors on robot bodies (required for jaw_contact sensor)
-        self.scene.robot.spawn.activate_contact_sensors = True
-
-        # Disable camera for faster RL training
         self.scene.front = None
 
-        # Remove camera randomization events that reference the disabled front camera
         for attr_name in list(vars(self.events).keys()):
             term = getattr(self.events, attr_name, None)
             if (
@@ -179,16 +148,14 @@ def __post_init__(self) -> None:
 
         self.episode_length_s = 15.0
 
-        # Increase joint damping for smoother RL movement (default=0.60 causes oscillation)
         for actuator_cfg in self.scene.robot.actuators.values():
-            actuator_cfg.damping = 15.0
+            actuator_cfg.damping = 8.0
 
-        # Initial pose: EE pointing toward table (shoulder_lift=-100°, elbow_flex=90°, wrist_flex=50°).
         self.scene.robot.init_state.joint_pos = {
             "shoulder_pan": 0.0,
             "shoulder_lift": math.radians(-100.0),
             "elbow_flex": math.radians(90.0),
             "wrist_flex": math.radians(50.0),
             "wrist_roll": 0.0,
-            "gripper": 1.0,  # open
+            "gripper": 1.0,
         }
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py
index 0e3f86ad..985a4af1 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/observations.py
@@ -27,6 +27,15 @@ def object_grasped(
     return grasped
 
 
+def cube_quat(
+    env: ManagerBasedRLEnv,
+    cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
+) -> torch.Tensor:
+    """Returns cube orientation as quaternion (w, x, y, z) in world frame. (num_envs, 4)"""
+    cube: RigidObject = env.scene[cube_cfg.name]
+    return cube.data.root_quat_w
+
+
 def cube_pos_relative_to_ee(
     env: ManagerBasedRLEnv,
     cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
index 60d75e29..9c214f43 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
@@ -2,13 +2,32 @@
 from isaaclab.assets import Articulation, RigidObject
 from isaaclab.envs import ManagerBasedRLEnv
 from isaaclab.managers import SceneEntityCfg
-from isaaclab.sensors import ContactSensor
+from isaaclab.utils.math import quat_apply
 
+# Fingertip offsets in each body's local frame, calibrated from USD collision mesh geometry
+_JAW_TIP_OFFSET = torch.tensor([0.0, -0.05, 0.02])
+_GRIPPER_TIP_OFFSET = torch.tensor([-0.012, 0.0, -0.08])
 
-def _gripper_root_pos(env: ManagerBasedRLEnv, robot_cfg: SceneEntityCfg, body_name: str = "gripper") -> torch.Tensor:
+
+def _tcp_pos(env: ManagerBasedRLEnv, robot_cfg: SceneEntityCfg) -> torch.Tensor:
+    """TCP position: midpoint between jaw tip and gripper tip in world frame. (num_envs, 3)"""
     robot: Articulation = env.scene[robot_cfg.name]
-    body_index = robot.data.body_names.index(body_name)
-    return robot.data.body_pos_w[:, body_index, :]  # (num_envs, 3)
+    jaw_idx = robot.data.body_names.index("jaw")
+    gripper_idx = robot.data.body_names.index("gripper")
+
+    jaw_pos = robot.data.body_pos_w[:, jaw_idx, :]
+    jaw_quat = robot.data.body_quat_w[:, jaw_idx, :]
+    gripper_pos = robot.data.body_pos_w[:, gripper_idx, :]
+    gripper_quat = robot.data.body_quat_w[:, gripper_idx, :]
+
+    device = jaw_pos.device
+    N = jaw_pos.shape[0]
+    jaw_offset = _JAW_TIP_OFFSET.to(device).unsqueeze(0).expand(N, -1)
+    gripper_offset = _GRIPPER_TIP_OFFSET.to(device).unsqueeze(0).expand(N, -1)
+
+    jaw_tip = jaw_pos + quat_apply(jaw_quat, jaw_offset)
+    gripper_tip = gripper_pos + quat_apply(gripper_quat, gripper_offset)
+    return (jaw_tip + gripper_tip) / 2.0
 
 
 def _cube_pos(env: ManagerBasedRLEnv, cube_cfg: SceneEntityCfg) -> torch.Tensor:
@@ -28,61 +47,22 @@ def ee_to_cube_reward(
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
     k: float = 5.0,
 ) -> torch.Tensor:
-    """Reaching reward: closer gripper body is to 10cm above cube center, the higher. Range [0, 1]."""
-    target_pos = _cube_pos(env, cube_cfg).clone()
-    target_pos[:, 2] += 0.10
-    dist = torch.linalg.vector_norm(target_pos - _gripper_root_pos(env, robot_cfg), dim=1)
+    """Reaching reward: 1 - tanh(k * dist) from TCP (grasp midpoint) to cube center. Range [0, 1]."""
+    dist = torch.linalg.vector_norm(_cube_pos(env, cube_cfg) - _tcp_pos(env, robot_cfg), dim=1)
     return 1.0 - torch.tanh(k * dist)
 
 
-def _is_grasped(
-    env: ManagerBasedRLEnv,
-    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
-) -> torch.Tensor:
-    """Grasp score in [0, 1]: product of three soft sigmoid conditions.
-
-    1. jaw_contact force > 0.5 N (cube-filtered sensor)
-    2. gripper_contact force > 0.5 N (cube-filtered sensor)
-    3. gripper joint < 0.5 rad (gripper closed)
-    """
-    jaw_sensor: ContactSensor = env.scene["jaw_contact"]
-    gripper_sensor: ContactSensor = env.scene["gripper_contact"]
-
-    jaw_force = torch.linalg.vector_norm(jaw_sensor.data.net_forces_w[:, 0, :], dim=1)
-    gripper_force = torch.linalg.vector_norm(gripper_sensor.data.net_forces_w[:, 0, :], dim=1)
-
-    jaw_contact = torch.sigmoid(5.0 * (jaw_force - 0.5))
-    gripper_contact = torch.sigmoid(5.0 * (gripper_force - 0.5))
-
-    # Gripper closed: joint≈0.2 (closed) → 1, joint≈1.0 (open) → 0
-    robot: Articulation = env.scene[robot_cfg.name]
-    gripper_pos = robot.data.joint_pos[:, -1]
-    gripper_closed = torch.sigmoid(10.0 * (0.5 - gripper_pos))
-
-    return jaw_contact * gripper_contact * gripper_closed
-
-
-def cube_grasped_reward(
-    env: ManagerBasedRLEnv,
-    robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
-) -> torch.Tensor:
-    """Grasp reward: bilateral contact + gripper closed. Range [0, 1]."""
-    return _is_grasped(env, robot_cfg=robot_cfg)
-
-
-def cube_height_if_grasped(
+def cube_height_reward(
     env: ManagerBasedRLEnv,
     cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
-    target_height: float = 0.20,
-    sharpness: float = 10.0,
-    lift_threshold: float = 0.05,
+    min_height: float = 0.046,
+    k: float = 5.0,
 ) -> torch.Tensor:
-    """Exponential height reward, only when grasped AND cube above lift_threshold (~0.05m). Range [0, 1]."""
+    """Height reward: tanh(k * max(h - min_height, 0)). Zero below min_height, monotonically increasing above. Range [0, 1]."""
     height_above_base = _cube_pos(env, cube_cfg)[:, 2] - _robot_base_height(env, robot_cfg)
-    height_rew = torch.exp(-sharpness * torch.abs(height_above_base - target_height))
-    lifted = (height_above_base > lift_threshold).float()
-    return height_rew * _is_grasped(env, robot_cfg=robot_cfg) * lifted
+    h = torch.clamp(height_above_base - min_height, min=0.0)
+    return torch.tanh(k * h)
 
 
 def cube_success_bonus(

From d232a4000f987cf2990a0b2d4734dc12075a1c9c Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 02:10:19 -0400
Subject: [PATCH 61/68] Update documents

---
 docs/docs/docs/features/rl_training.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index 93bd49da..b4deb316 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -2,6 +2,10 @@
 
 The RL training module enables training manipulation policies with reinforcement learning using [rsl_rl](https://github.com/leggedrobotics/rsl_rl) (PPO). It runs fully in simulation with parallel environments and no human teleoperation required.
 
+:::note
+End-to-end RL for manipulation is challenging — reward design, exploration, and sim-to-real transfer all require significant task-specific tuning. Currently only the **LiftCube** task is supported. Support for additional tasks will be added in future updates.
+:::
+
 ## Training
 
 ```shell

From e01549cfaca8f41fa0ba771454310f7ccaeca4be Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 03:02:06 -0400
Subject: [PATCH 62/68] Refactor the validation module into a record module and
 add signal handling to support graceful exit with Ctrl+C

---
 docs/docs/docs/features/rl_training.md |  30 +++++-
 scripts/datagen/rl/play.py             | 118 -----------------------
 scripts/datagen/rl/record.py           | 125 +++++++++++++++++++++++++
 3 files changed, 151 insertions(+), 122 deletions(-)
 delete mode 100644 scripts/datagen/rl/play.py
 create mode 100644 scripts/datagen/rl/record.py

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index b4deb316..808fc478 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -45,16 +45,38 @@ tensorboard --logdir logs/rl
 Key metrics to watch: `Train/mean_reward` (total episode reward) and individual reward terms such as `Episode/rew_cube_height`.
 ::::
 
-## Evaluation
+## Evaluation & Recording
+
+Evaluate a checkpoint and save all episodes to HDF5 (both success and failure, tagged with `attrs["success"]`):
 
 ```shell
-python scripts/datagen/rl/play.py \
+python scripts/datagen/rl/record.py \
     --task LeIsaac-SO101-LiftCube-RL-v0 \
     --checkpoint logs/rl/<run>/model_<iter>.pt \
-    --num_envs 4 \
-    --num_episodes 20
+    --num_envs 1 \
+    --num_episodes 100 \
+    --record --dataset_file ./datasets/rl_eval.hdf5
 ```
 
+<details>
+<summary><strong>Parameter descriptions for record.py</strong></summary>
+
+- `--task`: Gym task ID. Required.
+
+- `--checkpoint`: Path to a saved model checkpoint (`.pt`). Required.
+
+- `--num_envs`: Number of parallel environments. Default: `1`.
+
+- `--num_episodes`: Total episodes to run across all envs. `0` = run indefinitely. Default: `0`.
+
+- `--seed`: Random seed. Default: `42`.
+
+- `--record`: Enable HDF5 recording. Both successful and failed episodes are saved.
+
+- `--dataset_file`: Output HDF5 file path. Default: `./datasets/rl_eval.hdf5`.
+
+</details>
+
 ## Reward Design
 
 The LiftCube RL task uses three reward terms:
diff --git a/scripts/datagen/rl/play.py b/scripts/datagen/rl/play.py
deleted file mode 100644
index c433eeaa..00000000
--- a/scripts/datagen/rl/play.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""Generic script to play any leisaac RL task with a trained RSL-RL checkpoint."""
-
-import argparse
-
-from isaaclab.app import AppLauncher
-
-parser = argparse.ArgumentParser(description="Play a leisaac RL task with RSL-RL.")
-parser.add_argument("--task", type=str, required=True, help="Gym task ID (e.g. LeIsaac-SO101-LiftCube-RL-v0).")
-parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint (.pt).")
-parser.add_argument("--num_envs", type=int, default=4, help="Number of parallel environments.")
-parser.add_argument("--num_episodes", type=int, default=0, help="Episodes to run per env (0 = infinite).")
-parser.add_argument("--seed", type=int, default=42, help="Random seed.")
-AppLauncher.add_app_launcher_args(parser)
-args_cli = parser.parse_args()
-
-# Force rendering on
-args_cli.headless = False
-
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-import importlib
-
-import gymnasium as gym
-import leisaac.tasks  # noqa: F401 — registers all leisaac gym envs
-import torch
-from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
-from rsl_rl.runners import OnPolicyRunner
-
-
-def _load_from_entry_point(entry_point: str):
-    """Load an object from a 'module.path:attribute' entry point string."""
-    module_path, attr = entry_point.rsplit(":", 1)
-    module = importlib.import_module(module_path)
-    return getattr(module, attr)
-
-
-def main():
-    # --- Load configs from task registry ---
-    task_spec = gym.registry[args_cli.task]
-    kwargs = task_spec.kwargs
-
-    env_cfg_cls = _load_from_entry_point(kwargs["env_cfg_entry_point"])
-    train_cfg = dict(_load_from_entry_point(kwargs["rsl_rl_cfg_entry_point"]))
-
-    # --- Build env ---
-    env_cfg = env_cfg_cls()
-    env_cfg.scene.num_envs = args_cli.num_envs
-    env_cfg.seed = args_cli.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-    env_cfg.recorders = None  # disable HDF5 recording during eval
-
-    device = env_cfg.sim.device
-
-    env = gym.make(args_cli.task, cfg=env_cfg)
-    env = RslRlVecEnvWrapper(env)
-
-    # --- Load checkpoint ---
-    runner = OnPolicyRunner(env, train_cfg, log_dir=None, device=device)
-    runner.load(args_cli.checkpoint)
-    print(f"[INFO] Task: {args_cli.task}")
-    print(f"[INFO] Loaded checkpoint: {args_cli.checkpoint}")
-
-    policy = runner.get_inference_policy(device=device)
-
-    # --- Episode tracking ---
-    episode_count = 0
-    success_count = 0
-
-    step = 0
-    obs = env.get_observations()
-    while simulation_app.is_running() and (args_cli.num_episodes <= 0 or episode_count < args_cli.num_episodes):
-        with torch.no_grad():
-            actions = policy(obs)
-        # Print actions, obs, and heights every 10 steps for env 0
-        if step % 10 == 0:
-            a = actions[0].cpu().numpy()
-            o = obs["policy"][0].cpu().numpy()
-            # obs layout: joint_pos(6) | joint_vel(6) | ee_state(7) | cube_rel_ee(3) | cube_quat(4)
-            cube_rel = o[19:22]  # cube position relative to EE jaw (dx, dy, dz)
-
-            # Raw world heights for debugging
-            unwrapped = env.unwrapped
-            robot = unwrapped.scene["robot"]
-            cube = unwrapped.scene["cube"]
-            base_index = robot.data.body_names.index("base")
-            robot_base_z = robot.data.body_pos_w[0, base_index, 2].item()
-            cube_z = cube.data.root_pos_w[0, 2].item()
-            height_above_base = cube_z - robot_base_z
-
-            print(
-                f"[step {step:4d}] actions: {' '.join(f'{v:+.3f}' for v in a)}"
-                f"  cube_rel_ee(dx,dy,dz): {' '.join(f'{v:+.3f}' for v in cube_rel)}"
-                f"  cube_z={cube_z:.3f} base_z={robot_base_z:.3f} height_above_base={height_above_base:.3f}"
-            )
-        step += 1
-        obs, _, dones, extras = env.step(actions)
-
-        time_outs = extras.get("time_outs", torch.zeros_like(dones))
-        finished = dones.bool()
-        if finished.any():
-            successes = finished & ~time_outs.bool()
-            episode_count += int(finished.sum().item())
-            success_count += int(successes.sum().item())
-            success_rate = success_count / episode_count if episode_count > 0 else 0.0
-            print(
-                f"[Episode {episode_count}] "
-                f"success={int(successes.sum().item())} "
-                f"timeout={int((finished & time_outs.bool()).sum().item())} "
-                f"| total success rate: {success_rate:.1%} ({success_count}/{episode_count})"
-            )
-
-    env.close()
-
-
-if __name__ == "__main__":
-    main()
-    simulation_app.close()
diff --git a/scripts/datagen/rl/record.py b/scripts/datagen/rl/record.py
new file mode 100644
index 00000000..cb95c9e2
--- /dev/null
+++ b/scripts/datagen/rl/record.py
@@ -0,0 +1,125 @@
+"""Evaluate a trained RSL-RL policy on a leisaac task, with optional HDF5 episode recording."""
+
+import argparse
+import importlib
+import os
+import signal
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="Evaluate an RSL-RL policy on a leisaac task.")
+parser.add_argument("--task", type=str, required=True, help="Gym task ID (e.g. LeIsaac-SO101-LiftCube-RL-v0).")
+parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint (.pt).")
+parser.add_argument("--num_envs", type=int, default=1, help="Number of parallel environments.")
+parser.add_argument("--num_episodes", type=int, default=0, help="Total episodes to run (0 = infinite).")
+parser.add_argument("--seed", type=int, default=42, help="Random seed.")
+parser.add_argument("--record", action="store_true", help="Save all episodes to HDF5 with success/failure tags.")
+parser.add_argument("--dataset_file", type=str, default="./datasets/rl_eval.hdf5", help="Output HDF5 file path.")
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+args_cli.headless = False
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+import gymnasium as gym
+import leisaac.tasks  # noqa: F401
+import torch
+from isaaclab.managers import DatasetExportMode
+from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
+from leisaac.enhance.managers import StreamingRecorderManager
+from rsl_rl.runners import OnPolicyRunner
+
+
+def _load_from_entry_point(entry_point: str):
+    module_path, attr = entry_point.rsplit(":", 1)
+    return getattr(importlib.import_module(module_path), attr)
+
+
+def main():
+    kwargs = gym.registry[args_cli.task].kwargs
+    env_cfg = _load_from_entry_point(kwargs["env_cfg_entry_point"])()
+    train_cfg = dict(_load_from_entry_point(kwargs["rsl_rl_cfg_entry_point"]))
+
+    env_cfg.scene.num_envs = args_cli.num_envs
+    env_cfg.seed = args_cli.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    if args_cli.record:
+        output_dir = os.path.dirname(os.path.abspath(args_cli.dataset_file))
+        output_filename = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
+        env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+        env_cfg.recorders.dataset_export_dir_path = output_dir
+        env_cfg.recorders.dataset_filename = output_filename
+    else:
+        env_cfg.recorders = None
+
+    env = gym.make(args_cli.task, cfg=env_cfg)
+
+    if args_cli.record:
+        unwrapped = env.unwrapped
+        del unwrapped.recorder_manager
+        unwrapped.recorder_manager = StreamingRecorderManager(env_cfg.recorders, unwrapped)
+        unwrapped.recorder_manager.flush_steps = 100
+        unwrapped.recorder_manager.compression = "lzf"
+        print(f"[INFO] Recording to: {args_cli.dataset_file}")
+
+    env = RslRlVecEnvWrapper(env)
+
+    runner = OnPolicyRunner(env, train_cfg, log_dir=None, device=env_cfg.sim.device)
+    runner.load(args_cli.checkpoint)
+    print(f"[INFO] Task: {args_cli.task}")
+    print(f"[INFO] Loaded checkpoint: {args_cli.checkpoint}")
+
+    policy = runner.get_inference_policy(device=env_cfg.sim.device)
+
+    episode_count = 0
+    success_count = 0
+    interrupted = False
+
+    def signal_handler(signum, frame):
+        nonlocal interrupted
+        interrupted = True
+        print("\n[INFO] KeyboardInterrupt detected. Cleaning up resources...")
+
+    original_sigint_handler = signal.signal(signal.SIGINT, signal_handler)
+
+    try:
+        step = 0
+        obs = env.get_observations()
+        while (
+            simulation_app.is_running()
+            and not interrupted
+            and (args_cli.num_episodes <= 0 or episode_count < args_cli.num_episodes)
+        ):
+            with torch.no_grad():
+                actions = policy(obs)
+
+            step += 1
+            obs, _, dones, extras = env.step(actions)
+
+            time_outs = extras.get("time_outs", torch.zeros_like(dones))
+            finished = dones.bool()
+            if finished.any():
+                successes = finished & ~time_outs.bool()
+                for i in range(finished.shape[0]):
+                    if finished[i]:
+                        print("Episode success!" if successes[i] else "Episode failed!")
+                episode_count += int(finished.sum().item())
+                success_count += int(successes.sum().item())
+                print(f"Total success rate: {success_count / episode_count:.1%} ({success_count}/{episode_count})")
+    except Exception as e:
+        import traceback
+
+        print(f"\n[ERROR] {e}\n")
+        traceback.print_exc()
+    finally:
+        signal.signal(signal.SIGINT, original_sigint_handler)
+        if args_cli.record and hasattr(env.unwrapped.recorder_manager, "finalize"):
+            env.unwrapped.recorder_manager.finalize()
+        env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()

From 3301e5acee4d54dc24b922faa91c9cd91263ea54 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 03:08:17 -0400
Subject: [PATCH 63/68] Change docstrings

---
 scripts/datagen/rl/train.py | 42 +++++++++++--------------------------
 1 file changed, 12 insertions(+), 30 deletions(-)

diff --git a/scripts/datagen/rl/train.py b/scripts/datagen/rl/train.py
index b9781816..46570fe8 100644
--- a/scripts/datagen/rl/train.py
+++ b/scripts/datagen/rl/train.py
@@ -1,12 +1,15 @@
-"""Generic script to train any leisaac RL task with RSL-RL (PPO)."""
+"""Train a leisaac RL task with RSL-RL (PPO)."""
 
 import argparse
+import importlib
+import os
+from datetime import datetime
 
 from isaaclab.app import AppLauncher
 
 parser = argparse.ArgumentParser(description="Train a leisaac RL task with RSL-RL.")
 parser.add_argument("--task", type=str, required=True, help="Gym task ID (e.g. LeIsaac-SO101-LiftCube-RL-v0).")
-parser.add_argument("--num_envs", type=int, default=64, help="Number of parallel environments.")
+parser.add_argument("--num_envs", type=int, default=512, help="Number of parallel environments.")
 parser.add_argument("--max_iterations", type=int, default=1500, help="Number of PPO iterations.")
 parser.add_argument("--log_dir", type=str, default="logs/rl", help="Base logging directory.")
 parser.add_argument("--seed", type=int, default=42, help="Random seed.")
@@ -17,67 +20,46 @@
 app_launcher = AppLauncher(args_cli)
 simulation_app = app_launcher.app
 
-import importlib
-import os
-from datetime import datetime
-
 import gymnasium as gym
-import leisaac.tasks  # noqa: F401 — registers all leisaac gym envs
+import leisaac.tasks  # noqa: F401
 from isaaclab.utils.io import dump_yaml
 from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
 from rsl_rl.runners import OnPolicyRunner
 
 
 def _load_from_entry_point(entry_point: str):
-    """Load an object from a 'module.path:attribute' entry point string."""
     module_path, attr = entry_point.rsplit(":", 1)
-    module = importlib.import_module(module_path)
-    return getattr(module, attr)
+    return getattr(importlib.import_module(module_path), attr)
 
 
 def main():
-    # --- Load configs from task registry ---
-    task_spec = gym.registry[args_cli.task]
-    kwargs = task_spec.kwargs
-
-    env_cfg_cls = _load_from_entry_point(kwargs["env_cfg_entry_point"])
+    kwargs = gym.registry[args_cli.task].kwargs
+    env_cfg = _load_from_entry_point(kwargs["env_cfg_entry_point"])()
     train_cfg = dict(_load_from_entry_point(kwargs["rsl_rl_cfg_entry_point"]))
 
-    # --- Build env ---
-    env_cfg = env_cfg_cls()
     env_cfg.scene.num_envs = args_cli.num_envs
     env_cfg.seed = args_cli.seed
     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-
     train_cfg["seed"] = args_cli.seed
 
-    device = env_cfg.sim.device
-
-    # --- Set up logging ---
     task_slug = args_cli.task.lower().replace("-", "_").replace("leisaac_", "").replace("_v0", "")
-    log_dir = os.path.join(args_cli.log_dir, task_slug, datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-    log_dir = os.path.abspath(log_dir)
+    log_dir = os.path.abspath(os.path.join(args_cli.log_dir, task_slug, datetime.now().strftime("%Y-%m-%d_%H-%M-%S")))
+    env_cfg.log_dir = log_dir
     print(f"[INFO] Task: {args_cli.task}")
     print(f"[INFO] Logging to: {log_dir}")
-    env_cfg.log_dir = log_dir
 
-    # --- Create environment ---
     env = gym.make(args_cli.task, cfg=env_cfg)
     env = RslRlVecEnvWrapper(env)
 
-    # --- Create PPO runner ---
-    runner = OnPolicyRunner(env, train_cfg, log_dir=log_dir, device=device)
+    runner = OnPolicyRunner(env, train_cfg, log_dir=log_dir, device=env_cfg.sim.device)
     if args_cli.checkpoint is not None:
         runner.load(args_cli.checkpoint)
         print(f"[INFO] Resumed from checkpoint: {args_cli.checkpoint}")
 
-    # --- Dump env config ---
     os.makedirs(os.path.join(log_dir, "params"), exist_ok=True)
     dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
 
-    # --- Train ---
     runner.learn(num_learning_iterations=args_cli.max_iterations, init_at_random_ep_len=True)
-
     env.close()
 
 

From fb90080012dd00d2aebdd6a1fff456f3d2223412 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 13:00:50 -0400
Subject: [PATCH 64/68] Change reward weights

---
 docs/docs/docs/features/rl_training.md        |  2 +-
 .../tasks/lift_cube/lift_cube_rl_env_cfg.py   |  2 +-
 .../leisaac/tasks/lift_cube/mdp/rewards.py    | 47 +++++++++++++++++--
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index 808fc478..9ec808e0 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -85,7 +85,7 @@ The LiftCube RL task uses three reward terms:
 |------|--------|-------------|
 | `cube_success` | 100.0 | One-time bonus when cube height ≥ 20 cm above robot base. Episode ends immediately after (early termination). |
 | `ee_to_cube` | 1.5 | `1 - tanh(5 × dist(TCP, cube))` — guides TCP to cube center. Range [0, 1]. |
-| `cube_height` | 10.0 | `tanh(5 × max(h - 4.6 cm, 0))` — zero below 4.6 cm, monotonically increasing above. Range [0, 1]. |
+| `cube_height` | 10.0 | `clamp((h - 4.6 cm) / (20 cm - 4.6 cm), 0, 1)` — linear ramp from 0 at 4.6 cm to 1 at 20 cm, constant gradient all the way to success. |
 
 **TCP (Tool Center Point)** is computed as the midpoint between the two fingertip contact surfaces, derived from `body_pos_w` and `body_quat_w` with calibrated local offsets from the USD collision mesh:
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
index 64a667c1..0e40ddfb 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
@@ -101,7 +101,7 @@ class LiftCubeRLRewardsCfg:
     cube_height = RewTerm(
         func=mdp.cube_height_reward,
         weight=10.0,
-        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "min_height": 0.046},
+        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "min_height": 0.046, "max_height": 0.20},
     )
 
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
index 9c214f43..f5c38c7f 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
@@ -1,3 +1,43 @@
+"""Reward functions for the LiftCube RL task.
+
+Reward structure
+----------------
+The task uses three complementary reward terms to guide the policy through a
+natural lifting curriculum:
+
+1. ee_to_cube  (weight 1.5, shaped)
+   Guides the end-effector toward the cube before and during grasping.
+   Formula: 1 - tanh(k * dist(TCP, cube))
+   - Peaks at 1.0 when TCP is exactly at the cube center, decays smoothly with distance.
+   - k = 5.0 gives a reach radius of ~10 cm before the reward drops below 0.5.
+
+2. cube_height  (weight 10.0, shaped)
+   Encourages lifting the cube continuously from table height to the success
+   threshold with a constant gradient.
+   Formula: clamp((h - min_height) / (max_height - min_height), 0, 1)
+   - Zero below min_height (4.6 cm above robot base) — ignores ground-level noise.
+   - Linear ramp to 1.0 at max_height (20 cm), never saturates before success.
+   - Unlike a tanh formulation, the gradient does not vanish mid-trajectory, so
+     the policy always receives a clear signal to keep lifting.
+
+3. cube_success  (weight 100.0, sparse)
+   One-time terminal bonus when h >= 20 cm. Also triggers early episode
+   termination so the policy does not receive further shaped reward after success.
+
+TCP computation
+---------------
+The Tool Center Point (TCP) is the midpoint between the two fingertip contact
+surfaces, computed from body_pos_w / body_quat_w plus calibrated local offsets
+derived from the USD collision mesh geometry:
+  - jaw body local frame  : (0.0, -0.05, 0.02)
+  - gripper body local frame : (-0.012, 0.0, -0.08)
+
+Height reference
+----------------
+All heights are measured relative to the robot base link ("base") in the world
+frame, making the reward invariant to the absolute table / mount height.
+"""
+
 import torch
 from isaaclab.assets import Articulation, RigidObject
 from isaaclab.envs import ManagerBasedRLEnv
@@ -57,12 +97,11 @@ def cube_height_reward(
     cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
     min_height: float = 0.046,
-    k: float = 5.0,
+    max_height: float = 0.20,
 ) -> torch.Tensor:
-    """Height reward: tanh(k * max(h - min_height, 0)). Zero below min_height, monotonically increasing above. Range [0, 1]."""
+    """Height reward: linear ramp from 0 at min_height to 1 at max_height. Constant gradient encourages lifting all the way to success."""
     height_above_base = _cube_pos(env, cube_cfg)[:, 2] - _robot_base_height(env, robot_cfg)
-    h = torch.clamp(height_above_base - min_height, min=0.0)
-    return torch.tanh(k * h)
+    return torch.clamp((height_above_base - min_height) / (max_height - min_height), 0.0, 1.0)
 
 
 def cube_success_bonus(

From 723238ebe27959cd5ba10f21c7d8956b61ac1953 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 13:16:58 -0400
Subject: [PATCH 65/68] Standardize dataset recording behavior

---
 scripts/datagen/rl/record.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/scripts/datagen/rl/record.py b/scripts/datagen/rl/record.py
index cb95c9e2..aa09fb33 100644
--- a/scripts/datagen/rl/record.py
+++ b/scripts/datagen/rl/record.py
@@ -14,6 +14,9 @@
 parser.add_argument("--num_episodes", type=int, default=0, help="Total episodes to run (0 = infinite).")
 parser.add_argument("--seed", type=int, default=42, help="Random seed.")
 parser.add_argument("--record", action="store_true", help="Save all episodes to HDF5 with success/failure tags.")
+parser.add_argument(
+    "--resume", action="store_true", help="Append to an existing dataset file instead of creating a new one."
+)
 parser.add_argument("--dataset_file", type=str, default="./datasets/rl_eval.hdf5", help="Output HDF5 file path.")
 AppLauncher.add_app_launcher_args(parser)
 args_cli = parser.parse_args()
@@ -27,7 +30,7 @@
 import torch
 from isaaclab.managers import DatasetExportMode
 from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
-from leisaac.enhance.managers import StreamingRecorderManager
+from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
 from rsl_rl.runners import OnPolicyRunner
 
 
@@ -48,7 +51,16 @@ def main():
     if args_cli.record:
         output_dir = os.path.dirname(os.path.abspath(args_cli.dataset_file))
         output_filename = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
-        env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+        if args_cli.resume:
+            env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
+            assert os.path.exists(
+                args_cli.dataset_file
+            ), "the dataset file does not exist, please don't use '--resume' if you want to record a new dataset"
+        else:
+            env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+            assert not os.path.exists(
+                args_cli.dataset_file
+            ), "the dataset file already exists, please use '--resume' to resume recording"
         env_cfg.recorders.dataset_export_dir_path = output_dir
         env_cfg.recorders.dataset_filename = output_filename
     else:

From 3f2a94030351c9b10bb4d00df36c081dd84d551b Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 14:18:29 -0400
Subject: [PATCH 66/68] Modify the reward function, replacing the linear
 function with tanh to improve training speed

---
 docs/docs/docs/features/rl_training.md           |  4 ++--
 .../tasks/lift_cube/lift_cube_rl_env_cfg.py      |  4 ++--
 .../leisaac/tasks/lift_cube/mdp/rewards.py       | 16 +++++++---------
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index 9ec808e0..2943fae9 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -83,9 +83,9 @@ The LiftCube RL task uses three reward terms:
 
 | Term | Weight | Description |
 |------|--------|-------------|
-| `cube_success` | 100.0 | One-time bonus when cube height ≥ 20 cm above robot base. Episode ends immediately after (early termination). |
+| `cube_success` | 200.0 | One-time bonus when cube height ≥ 20 cm above robot base. Episode ends immediately after (early termination). |
 | `ee_to_cube` | 1.5 | `1 - tanh(5 × dist(TCP, cube))` — guides TCP to cube center. Range [0, 1]. |
-| `cube_height` | 10.0 | `clamp((h - 4.6 cm) / (20 cm - 4.6 cm), 0, 1)` — linear ramp from 0 at 4.6 cm to 1 at 20 cm, constant gradient all the way to success. |
+| `cube_height` | 10.0 | `tanh(3 × max(h - 4.6 cm, 0))` — zero below 4.6 cm, monotonically increasing above. Range [0, 1]. |
 
 **TCP (Tool Center Point)** is computed as the midpoint between the two fingertip contact surfaces, derived from `body_pos_w` and `body_quat_w` with calibrated local offsets from the USD collision mesh:
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
index 0e40ddfb..44f5882f 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
@@ -90,7 +90,7 @@ class LiftCubeRLRewardsCfg:
 
     cube_success = RewTerm(
         func=mdp.cube_success_bonus,
-        weight=100.0,
+        weight=200.0,
         params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "height_threshold": 0.20},
     )
     ee_to_cube = RewTerm(
@@ -101,7 +101,7 @@ class LiftCubeRLRewardsCfg:
     cube_height = RewTerm(
         func=mdp.cube_height_reward,
         weight=10.0,
-        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "min_height": 0.046, "max_height": 0.20},
+        params={"cube_cfg": _CUBE_CFG, "robot_cfg": _ROBOT_CFG, "min_height": 0.046, "k": 3.0},
     )
 
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
index f5c38c7f..57295b4b 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
@@ -12,13 +12,10 @@
    - k = 5.0 gives a reach radius of ~10 cm before the reward drops below 0.5.
 
 2. cube_height  (weight 10.0, shaped)
-   Encourages lifting the cube continuously from table height to the success
-   threshold with a constant gradient.
-   Formula: clamp((h - min_height) / (max_height - min_height), 0, 1)
+   Encourages lifting the cube above the table surface.
+   Formula: tanh(k * max(h - min_height, 0))
    - Zero below min_height (4.6 cm above robot base) — ignores ground-level noise.
-   - Linear ramp to 1.0 at max_height (20 cm), never saturates before success.
-   - Unlike a tanh formulation, the gradient does not vanish mid-trajectory, so
-     the policy always receives a clear signal to keep lifting.
+   - Monotonically increasing above min_height. Range [0, 1].
 
 3. cube_success  (weight 100.0, sparse)
    One-time terminal bonus when h >= 20 cm. Also triggers early episode
@@ -97,11 +94,12 @@ def cube_height_reward(
     cube_cfg: SceneEntityCfg = SceneEntityCfg("cube"),
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
     min_height: float = 0.046,
-    max_height: float = 0.20,
+    k: float = 5.0,
 ) -> torch.Tensor:
-    """Height reward: linear ramp from 0 at min_height to 1 at max_height. Constant gradient encourages lifting all the way to success."""
+    """Height reward: tanh(k * max(h - min_height, 0)). Zero below min_height, monotonically increasing above. Range [0, 1]."""
     height_above_base = _cube_pos(env, cube_cfg)[:, 2] - _robot_base_height(env, robot_cfg)
-    return torch.clamp((height_above_base - min_height) / (max_height - min_height), 0.0, 1.0)
+    h = torch.clamp(height_above_base - min_height, min=0.0)
+    return torch.tanh(k * h)
 
 
 def cube_success_bonus(

From d670f1bbb1e0024e732e164736d9186eb9d9ca0e Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Thu, 12 Mar 2026 14:53:18 -0400
Subject: [PATCH 67/68] Modify the reward function, replacing the linear
 function with tanh to improve training speed

---
 scripts/datagen/rl/record.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/scripts/datagen/rl/record.py b/scripts/datagen/rl/record.py
index aa09fb33..70899f5a 100644
--- a/scripts/datagen/rl/record.py
+++ b/scripts/datagen/rl/record.py
@@ -74,15 +74,11 @@ def main():
         unwrapped.recorder_manager = StreamingRecorderManager(env_cfg.recorders, unwrapped)
         unwrapped.recorder_manager.flush_steps = 100
         unwrapped.recorder_manager.compression = "lzf"
-        print(f"[INFO] Recording to: {args_cli.dataset_file}")
 
     env = RslRlVecEnvWrapper(env)
 
     runner = OnPolicyRunner(env, train_cfg, log_dir=None, device=env_cfg.sim.device)
     runner.load(args_cli.checkpoint)
-    print(f"[INFO] Task: {args_cli.task}")
-    print(f"[INFO] Loaded checkpoint: {args_cli.checkpoint}")
-
     policy = runner.get_inference_policy(device=env_cfg.sim.device)
 
     episode_count = 0

From 1080309c7512a5e48128b0ff461d642ccc113e00 Mon Sep 17 00:00:00 2001
From: Zihan Gao <zg137@duke.edu>
Date: Sat, 28 Mar 2026 14:13:06 -0400
Subject: [PATCH 68/68] Reconstruct the RL module to keep it consistent with
 Isaac Lab.

---
 docs/docs/docs/features/rl_training.md        | 111 +++++++----
 scripts/datagen/rl/play.py                    | 179 ++++++++++++++++++
 scripts/datagen/rl/record.py                  | 133 -------------
 scripts/datagen/rl/replay.py                  | 143 ++++++++++++++
 scripts/datagen/rl/train.py                   | 120 ++++++++----
 .../leisaac/tasks/lift_cube/__init__.py       |   4 +-
 .../tasks/lift_cube/lift_cube_rl_env_cfg.py   |  39 ----
 .../leisaac/tasks/lift_cube/mdp/rewards.py    |   2 +-
 .../tasks/lift_cube/rl_agents/__init__.py     |   0
 .../lift_cube/rl_agents/rsl_rl_ppo_cfg.py     |  39 ++++
 10 files changed, 527 insertions(+), 243 deletions(-)
 create mode 100644 scripts/datagen/rl/play.py
 delete mode 100644 scripts/datagen/rl/record.py
 create mode 100644 scripts/datagen/rl/replay.py
 create mode 100644 source/leisaac/leisaac/tasks/lift_cube/rl_agents/__init__.py
 create mode 100644 source/leisaac/leisaac/tasks/lift_cube/rl_agents/rsl_rl_ppo_cfg.py

diff --git a/docs/docs/docs/features/rl_training.md b/docs/docs/docs/features/rl_training.md
index 2943fae9..a254e901 100644
--- a/docs/docs/docs/features/rl_training.md
+++ b/docs/docs/docs/features/rl_training.md
@@ -21,13 +21,11 @@ python scripts/datagen/rl/train.py \
 
 - `--task`: Gym task ID to train. Required.
 
-- `--num_envs`: Number of parallel simulation environments. More environments = faster data collection. Default: `512`.
+- `--num_envs`: Number of parallel simulation environments. More environments = faster data collection. Default: value from task config.
 
-- `--max_iterations`: Number of PPO update iterations. Default: `1500`.
+- `--max_iterations`: Number of PPO update iterations. Default: value from agent config.
 
-- `--log_dir`: Base directory for logs. Runs are saved to `<log_dir>/<task_slug>/<timestamp>/`. Default: `logs/rl`.
-
-- `--seed`: Random seed for reproducibility. Default: `42`.
+- `--seed`: Random seed for reproducibility. Default: value from agent config.
 
 - `--headless`: Run without rendering window for faster training.
 
@@ -36,10 +34,10 @@ python scripts/datagen/rl/train.py \
 </details>
 
 ::::tip
-Training logs (tensorboard) are written to `logs/rl/<task_slug>/<timestamp>/`. Monitor progress with:
+Training logs (tensorboard) are written to `logs/rsl_rl/<experiment_name>/<timestamp>/`. Monitor progress with:
 
 ```shell
-tensorboard --logdir logs/rl
+tensorboard --logdir logs/rsl_rl
 ```
 
 Key metrics to watch: `Train/mean_reward` (total episode reward) and individual reward terms such as `Episode/rew_cube_height`.
@@ -47,34 +45,47 @@ Key metrics to watch: `Train/mean_reward` (total episode reward) and individual
 
 ## Evaluation & Recording
 
-Evaluate a checkpoint and save all episodes to HDF5 (both success and failure, tagged with `attrs["success"]`):
+Evaluate a checkpoint visually (no recording):
+
+```shell
+python scripts/datagen/rl/play.py \
+    --task LeIsaac-SO101-LiftCube-RL-v0 \
+    --checkpoint logs/rsl_rl/lift_cube_rl/<run>/model_<iter>.pt \
+    --num_envs 1
+```
+
+Save all episodes to HDF5 (both success and failure) by adding `--record`:
 
 ```shell
-python scripts/datagen/rl/record.py \
+python scripts/datagen/rl/play.py \
     --task LeIsaac-SO101-LiftCube-RL-v0 \
-    --checkpoint logs/rl/<run>/model_<iter>.pt \
+    --checkpoint logs/rsl_rl/lift_cube_rl/<run>/model_<iter>.pt \
     --num_envs 1 \
     --num_episodes 100 \
     --record --dataset_file ./datasets/rl_eval.hdf5
 ```
 
 <details>
-<summary><strong>Parameter descriptions for record.py</strong></summary>
+<summary><strong>Parameter descriptions for play.py</strong></summary>
 
 - `--task`: Gym task ID. Required.
 
 - `--checkpoint`: Path to a saved model checkpoint (`.pt`). Required.
 
-- `--num_envs`: Number of parallel environments. Default: `1`.
+- `--num_envs`: Number of parallel environments. Default: value from task config.
 
 - `--num_episodes`: Total episodes to run across all envs. `0` = run indefinitely. Default: `0`.
 
-- `--seed`: Random seed. Default: `42`.
+- `--seed`: Random seed. Default: value from agent config.
 
 - `--record`: Enable HDF5 recording. Both successful and failed episodes are saved.
 
+- `--resume_recording`: Append to an existing dataset file instead of creating a new one.
+
 - `--dataset_file`: Output HDF5 file path. Default: `./datasets/rl_eval.hdf5`.
 
+- `--real-time`: Slow down simulation to real-time speed.
+
 </details>
 
 ## Reward Design
@@ -94,22 +105,6 @@ The LiftCube RL task uses three reward terms:
 
 **Termination**: episode ends on timeout (15 s) or when cube height ≥ 20 cm (success).
 
-## PPO Hyperparameters
-
-| Parameter | Value | Notes |
-|-----------|-------|-------|
-| `gamma` | 0.95 | Discount factor |
-| `lam` | 0.95 | GAE lambda |
-| `clip_param` | 0.1 | PPO clip range |
-| `entropy_coef` | 0.005 | Entropy regularization to encourage exploration |
-| `learning_rate` | 1e-4 | Adam learning rate |
-| `schedule` | adaptive | Adjusts learning rate based on KL divergence |
-| `desired_kl` | 0.01 | Target KL divergence for adaptive schedule |
-| `num_learning_epochs` | 5 | PPO update epochs per rollout |
-| `num_mini_batches` | 4 | Mini-batches per epoch |
-| `num_steps_per_env` | 100 | Rollout steps per environment per update |
-| `num_envs` (recommended) | 512 | Parallel environments — more = faster sparse reward discovery |
-
 ## Action Space
 
 RL training uses the `rl_so101leader` device mode — delta end-effector control with a binary gripper:
@@ -136,11 +131,9 @@ RL training uses the `rl_so101leader` device mode — delta end-effector control
 ## Adding a New RL Task
 
 1. Create `<task>/mdp/rewards.py` with reward functions.
-2. Create `<task>/<task>_rl_env_cfg.py` with `TRAIN_CFG` dict and env config class:
+2. Create `<task>/<task>_rl_env_cfg.py` with the RL env config class:
 
 ```python
-TRAIN_CFG = { ... }  # PPO hyperparameters
-
 @configclass
 class MyTaskRLEnvCfg(MyTaskEnvCfg):
     observations: MyTaskRLObsCfg = MyTaskRLObsCfg()
@@ -154,18 +147,66 @@ class MyTaskRLEnvCfg(MyTaskEnvCfg):
         self.episode_length_s = 15.0
 ```
 
-3. Register the gym environment in `<task>/__init__.py` with both `env_cfg_entry_point` and `rsl_rl_cfg_entry_point`:
+3. Create `<task>/rl_agents/rsl_rl_ppo_cfg.py` with the PPO runner config:
+
+```python
+from isaaclab.utils import configclass
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+
+@configclass
+class MyTaskRLPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 100
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "my_task_rl"
+    obs_groups = {"actor": ["policy"], "critic": ["policy"]}
+
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=0.3,
+        actor_obs_normalization=True,
+        critic_obs_normalization=True,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+```
+
+4. Register the gym environment in `<task>/__init__.py`:
 
 ```python
+from . import rl_agents
+
 gym.register(
     id="LeIsaac-SO101-MyTask-RL-v0",
     entry_point="isaaclab.envs:ManagerBasedRLEnv",
     disable_env_checker=True,
     kwargs={
         "env_cfg_entry_point": f"{__name__}.<task>_rl_env_cfg:MyTaskRLEnvCfg",
-        "rsl_rl_cfg_entry_point": f"{__name__}.<task>_rl_env_cfg:TRAIN_CFG",
+        "rsl_rl_cfg_entry_point": f"{rl_agents.__name__}.rsl_rl_ppo_cfg:MyTaskRLPPORunnerCfg",
     },
 )
 ```
 
-4. Train with the generic script: `python scripts/datagen/rl/train.py --task LeIsaac-SO101-MyTask-RL-v0`.
+5. Train:
+
+```bash
+python scripts/datagen/rl/train.py \
+    --task LeIsaac-SO101-MyTask-RL-v0 \
+    --num_envs 512 \
+    --headless
+```
diff --git a/scripts/datagen/rl/play.py b/scripts/datagen/rl/play.py
new file mode 100644
index 00000000..c17637a6
--- /dev/null
+++ b/scripts/datagen/rl/play.py
@@ -0,0 +1,179 @@
+"""Play or record a trained leisaac RL policy.
+
+Use --record to save episodes to HDF5. Without --record, runs in visualization mode only.
+"""
+
+import os
+import sys
+
+# Make cli_args importable (local module in IsaacLab's rsl_rl script directory)
+_ISAACLAB_RSL_RL_DIR = os.path.join(
+    os.path.dirname(__file__),
+    "..",
+    "..",
+    "..",
+    "dependencies",
+    "IsaacLab",
+    "scripts",
+    "reinforcement_learning",
+    "rsl_rl",
+)
+sys.path.insert(0, os.path.abspath(_ISAACLAB_RSL_RL_DIR))
+
+import argparse
+import signal
+
+from isaaclab.app import AppLauncher
+
+import cli_args  # isort: skip
+
+parser = argparse.ArgumentParser(description="Play or record a trained leisaac RL policy.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument(
+    "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment.")
+parser.add_argument("--num_episodes", type=int, default=0, help="Total episodes to run (0 = infinite).")
+parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+# HDF5 recording
+parser.add_argument("--record", action="store_true", help="Save episodes to HDF5 with success/failure tags.")
+parser.add_argument(
+    "--resume_recording", action="store_true", help="Append to an existing dataset file instead of creating a new one."
+)
+parser.add_argument("--dataset_file", type=str, default="./datasets/rl_eval.hdf5", help="Output HDF5 file path.")
+cli_args.add_rsl_rl_args(parser)
+AppLauncher.add_app_launcher_args(parser)
+args_cli, hydra_args = parser.parse_known_args()
+
+sys.argv = [sys.argv[0]] + hydra_args
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+import time
+
+import gymnasium as gym
+import isaaclab_tasks  # noqa: F401
+import leisaac.tasks  # noqa: F401
+import torch
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.managers import DatasetExportMode
+from isaaclab.utils.assets import retrieve_file_path
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
+from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
+from rsl_rl.runners import OnPolicyRunner
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    """Play or record with RSL-RL agent."""
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    # resolve checkpoint path
+    log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
+    print(f"[INFO] Loading experiment from directory: {log_root_path}")
+    if args_cli.checkpoint:
+        resume_path = retrieve_file_path(args_cli.checkpoint)
+    else:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    log_dir = os.path.dirname(resume_path)
+    env_cfg.log_dir = log_dir
+
+    # configure HDF5 recording
+    if args_cli.record:
+        output_dir = os.path.dirname(os.path.abspath(args_cli.dataset_file))
+        output_filename = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
+        if args_cli.resume_recording:
+            env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
+            assert os.path.exists(
+                args_cli.dataset_file
+            ), "Dataset file does not exist. Remove --resume_recording to create a new one."
+        else:
+            env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
+            assert not os.path.exists(
+                args_cli.dataset_file
+            ), "Dataset file already exists. Use --resume_recording to append, or choose a different path."
+        env_cfg.recorders.dataset_export_dir_path = output_dir
+        env_cfg.recorders.dataset_filename = output_filename
+    else:
+        env_cfg.recorders = None
+
+    env = gym.make(args_cli.task, cfg=env_cfg)
+
+    # replace recorder manager with streaming version for HDF5 recording
+    if args_cli.record:
+        unwrapped = env.unwrapped
+        del unwrapped.recorder_manager
+        unwrapped.recorder_manager = StreamingRecorderManager(env_cfg.recorders, unwrapped)
+        unwrapped.recorder_manager.flush_steps = 100
+        unwrapped.recorder_manager.compression = "lzf"
+
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+    runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    runner.load(resume_path)
+
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+    dt = env.unwrapped.step_dt
+    episode_count = 0
+    success_count = 0
+    interrupted = False
+
+    def signal_handler(signum, frame):
+        nonlocal interrupted
+        interrupted = True
+        print("\n[INFO] Interrupted. Cleaning up...")
+
+    original_sigint_handler = signal.signal(signal.SIGINT, signal_handler)
+
+    try:
+        obs = env.get_observations()
+        while (
+            simulation_app.is_running()
+            and not interrupted
+            and (args_cli.num_episodes <= 0 or episode_count < args_cli.num_episodes)
+        ):
+            start_time = time.time()
+            with torch.inference_mode():
+                actions = policy(obs)
+                obs, _, dones, extras = env.step(actions)
+
+            time_outs = extras.get("time_outs", torch.zeros_like(dones))
+            finished = dones.bool()
+            if finished.any():
+                successes = finished & ~time_outs.bool()
+                for i in range(finished.shape[0]):
+                    if finished[i]:
+                        print("Episode success!" if successes[i] else "Episode failed!")
+                episode_count += int(finished.sum().item())
+                success_count += int(successes.sum().item())
+                print(f"Success rate: {success_count / episode_count:.1%} ({success_count}/{episode_count})")
+
+            sleep_time = dt - (time.time() - start_time)
+            if args_cli.real_time and sleep_time > 0:
+                time.sleep(sleep_time)
+
+    except Exception as e:
+        import traceback
+
+        print(f"\n[ERROR] {e}\n")
+        traceback.print_exc()
+    finally:
+        signal.signal(signal.SIGINT, original_sigint_handler)
+        if args_cli.record and hasattr(env.unwrapped.recorder_manager, "finalize"):
+            env.unwrapped.recorder_manager.finalize()
+        env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
diff --git a/scripts/datagen/rl/record.py b/scripts/datagen/rl/record.py
deleted file mode 100644
index 70899f5a..00000000
--- a/scripts/datagen/rl/record.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""Evaluate a trained RSL-RL policy on a leisaac task, with optional HDF5 episode recording."""
-
-import argparse
-import importlib
-import os
-import signal
-
-from isaaclab.app import AppLauncher
-
-parser = argparse.ArgumentParser(description="Evaluate an RSL-RL policy on a leisaac task.")
-parser.add_argument("--task", type=str, required=True, help="Gym task ID (e.g. LeIsaac-SO101-LiftCube-RL-v0).")
-parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint (.pt).")
-parser.add_argument("--num_envs", type=int, default=1, help="Number of parallel environments.")
-parser.add_argument("--num_episodes", type=int, default=0, help="Total episodes to run (0 = infinite).")
-parser.add_argument("--seed", type=int, default=42, help="Random seed.")
-parser.add_argument("--record", action="store_true", help="Save all episodes to HDF5 with success/failure tags.")
-parser.add_argument(
-    "--resume", action="store_true", help="Append to an existing dataset file instead of creating a new one."
-)
-parser.add_argument("--dataset_file", type=str, default="./datasets/rl_eval.hdf5", help="Output HDF5 file path.")
-AppLauncher.add_app_launcher_args(parser)
-args_cli = parser.parse_args()
-args_cli.headless = False
-
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-import gymnasium as gym
-import leisaac.tasks  # noqa: F401
-import torch
-from isaaclab.managers import DatasetExportMode
-from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
-from leisaac.enhance.managers import EnhanceDatasetExportMode, StreamingRecorderManager
-from rsl_rl.runners import OnPolicyRunner
-
-
-def _load_from_entry_point(entry_point: str):
-    module_path, attr = entry_point.rsplit(":", 1)
-    return getattr(importlib.import_module(module_path), attr)
-
-
-def main():
-    kwargs = gym.registry[args_cli.task].kwargs
-    env_cfg = _load_from_entry_point(kwargs["env_cfg_entry_point"])()
-    train_cfg = dict(_load_from_entry_point(kwargs["rsl_rl_cfg_entry_point"]))
-
-    env_cfg.scene.num_envs = args_cli.num_envs
-    env_cfg.seed = args_cli.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-
-    if args_cli.record:
-        output_dir = os.path.dirname(os.path.abspath(args_cli.dataset_file))
-        output_filename = os.path.splitext(os.path.basename(args_cli.dataset_file))[0]
-        if args_cli.resume:
-            env_cfg.recorders.dataset_export_mode = EnhanceDatasetExportMode.EXPORT_ALL_RESUME
-            assert os.path.exists(
-                args_cli.dataset_file
-            ), "the dataset file does not exist, please don't use '--resume' if you want to record a new dataset"
-        else:
-            env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_ALL
-            assert not os.path.exists(
-                args_cli.dataset_file
-            ), "the dataset file already exists, please use '--resume' to resume recording"
-        env_cfg.recorders.dataset_export_dir_path = output_dir
-        env_cfg.recorders.dataset_filename = output_filename
-    else:
-        env_cfg.recorders = None
-
-    env = gym.make(args_cli.task, cfg=env_cfg)
-
-    if args_cli.record:
-        unwrapped = env.unwrapped
-        del unwrapped.recorder_manager
-        unwrapped.recorder_manager = StreamingRecorderManager(env_cfg.recorders, unwrapped)
-        unwrapped.recorder_manager.flush_steps = 100
-        unwrapped.recorder_manager.compression = "lzf"
-
-    env = RslRlVecEnvWrapper(env)
-
-    runner = OnPolicyRunner(env, train_cfg, log_dir=None, device=env_cfg.sim.device)
-    runner.load(args_cli.checkpoint)
-    policy = runner.get_inference_policy(device=env_cfg.sim.device)
-
-    episode_count = 0
-    success_count = 0
-    interrupted = False
-
-    def signal_handler(signum, frame):
-        nonlocal interrupted
-        interrupted = True
-        print("\n[INFO] KeyboardInterrupt detected. Cleaning up resources...")
-
-    original_sigint_handler = signal.signal(signal.SIGINT, signal_handler)
-
-    try:
-        step = 0
-        obs = env.get_observations()
-        while (
-            simulation_app.is_running()
-            and not interrupted
-            and (args_cli.num_episodes <= 0 or episode_count < args_cli.num_episodes)
-        ):
-            with torch.no_grad():
-                actions = policy(obs)
-
-            step += 1
-            obs, _, dones, extras = env.step(actions)
-
-            time_outs = extras.get("time_outs", torch.zeros_like(dones))
-            finished = dones.bool()
-            if finished.any():
-                successes = finished & ~time_outs.bool()
-                for i in range(finished.shape[0]):
-                    if finished[i]:
-                        print("Episode success!" if successes[i] else "Episode failed!")
-                episode_count += int(finished.sum().item())
-                success_count += int(successes.sum().item())
-                print(f"Total success rate: {success_count / episode_count:.1%} ({success_count}/{episode_count})")
-    except Exception as e:
-        import traceback
-
-        print(f"\n[ERROR] {e}\n")
-        traceback.print_exc()
-    finally:
-        signal.signal(signal.SIGINT, original_sigint_handler)
-        if args_cli.record and hasattr(env.unwrapped.recorder_manager, "finalize"):
-            env.unwrapped.recorder_manager.finalize()
-        env.close()
-
-
-if __name__ == "__main__":
-    main()
-    simulation_app.close()
diff --git a/scripts/datagen/rl/replay.py b/scripts/datagen/rl/replay.py
new file mode 100644
index 00000000..e5a5d115
--- /dev/null
+++ b/scripts/datagen/rl/replay.py
@@ -0,0 +1,143 @@
+"""Script to replay recorded RL episodes from HDF5 dataset."""
+
+import multiprocessing
+
+if multiprocessing.get_start_method() != "spawn":
+    multiprocessing.set_start_method("spawn", force=True)
+
+import argparse
+
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser(description="Replay recorded RL episodes.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.")
+parser.add_argument(
+    "--dataset_file", type=str, default="./datasets/rl_eval.hdf5", help="File path to load recorded episodes."
+)
+parser.add_argument(
+    "--replay_mode",
+    type=str,
+    default="action",
+    choices=["action", "state"],
+    help="Replay mode: action replays actions, state replays joint states.",
+)
+parser.add_argument(
+    "--select_episodes",
+    type=int,
+    nargs="+",
+    default=[],
+    help="List of episode indices to replay. Empty = replay all.",
+)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(vars(args_cli))
+simulation_app = app_launcher.app
+
+import contextlib
+import os
+
+import gymnasium as gym
+import leisaac.tasks  # noqa: F401
+import torch
+from isaaclab.envs import ManagerBasedRLEnv
+from isaaclab.utils.datasets import EpisodeData, HDF5DatasetFileHandler
+from isaaclab_tasks.utils import parse_env_cfg
+
+
+def get_next_action(episode_data: EpisodeData, return_state: bool = False) -> torch.Tensor | None:
+    if return_state:
+        next_state = episode_data.get_next_state()
+        if next_state is None:
+            return None
+        return next_state["articulation"]["robot"]["joint_position"]
+    else:
+        return episode_data.get_next_action()
+
+
+def main():
+    if not os.path.exists(args_cli.dataset_file):
+        raise FileNotFoundError(f"Dataset file not found: {args_cli.dataset_file}")
+
+    dataset_file_handler = HDF5DatasetFileHandler()
+    dataset_file_handler.open(args_cli.dataset_file)
+    episode_count = dataset_file_handler.get_num_episodes()
+
+    if episode_count == 0:
+        print("No episodes found in the dataset.")
+        return
+
+    episode_indices_to_replay = args_cli.select_episodes or list(range(episode_count))
+    num_envs = args_cli.num_envs
+
+    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=num_envs)
+    env_cfg.recorders = {}
+    env_cfg.terminations = {}
+
+    env: ManagerBasedRLEnv = gym.make(args_cli.task, cfg=env_cfg).unwrapped
+
+    idle_action = torch.zeros(env.action_space.shape)
+
+    if hasattr(env, "initialize"):
+        env.initialize()
+    env.reset()
+
+    episode_names = list(dataset_file_handler.get_episode_names())
+    replayed_episode_count = 0
+
+    with contextlib.suppress(KeyboardInterrupt) and torch.inference_mode():
+        while simulation_app.is_running() and not simulation_app.is_exiting():
+            env_episode_data_map = {i: EpisodeData() for i in range(num_envs)}
+            has_next_action = True
+            while has_next_action:
+                actions = idle_action.clone()
+                has_next_action = False
+                for env_id in range(num_envs):
+                    env_next_action = get_next_action(
+                        env_episode_data_map[env_id],
+                        return_state=args_cli.replay_mode == "state",
+                    )
+                    if env_next_action is None:
+                        next_episode_index = None
+                        while episode_indices_to_replay:
+                            next_episode_index = episode_indices_to_replay.pop(0)
+                            if next_episode_index < episode_count:
+                                break
+                            next_episode_index = None
+
+                        if next_episode_index is not None:
+                            replayed_episode_count += 1
+                            print(f"{replayed_episode_count:4}: Loading #{next_episode_index} episode to env_{env_id}")
+                            episode_data = dataset_file_handler.load_episode(
+                                episode_names[next_episode_index], env.device
+                            )
+                            env_episode_data_map[env_id] = episode_data
+                            initial_state = episode_data.get_initial_state()
+                            env.reset_to(
+                                initial_state,
+                                torch.tensor([env_id], device=env.device),
+                                seed=int(episode_data.seed) if episode_data.seed is not None else None,
+                                is_relative=True,
+                            )
+                            env_next_action = get_next_action(
+                                env_episode_data_map[env_id],
+                                return_state=args_cli.replay_mode == "state",
+                            )
+                            has_next_action = True
+                        else:
+                            continue
+                    else:
+                        has_next_action = True
+                    actions[env_id] = env_next_action
+
+                env.step(actions)
+            break
+
+    print(f"Finished replaying {replayed_episode_count} episode{'s' if replayed_episode_count != 1 else ''}.")
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
diff --git a/scripts/datagen/rl/train.py b/scripts/datagen/rl/train.py
index 46570fe8..9db31b75 100644
--- a/scripts/datagen/rl/train.py
+++ b/scripts/datagen/rl/train.py
@@ -1,65 +1,117 @@
-"""Train a leisaac RL task with RSL-RL (PPO)."""
+"""Train a leisaac RL task with RSL-RL (PPO) via IsaacLab's training script."""
 
-import argparse
-import importlib
 import os
-from datetime import datetime
+import sys
+
+# Make cli_args importable (local module in IsaacLab's rsl_rl script directory)
+_ISAACLAB_RSL_RL_DIR = os.path.join(
+    os.path.dirname(__file__),
+    "..",
+    "..",
+    "..",
+    "dependencies",
+    "IsaacLab",
+    "scripts",
+    "reinforcement_learning",
+    "rsl_rl",
+)
+sys.path.insert(0, os.path.abspath(_ISAACLAB_RSL_RL_DIR))
+
+import argparse
 
 from isaaclab.app import AppLauncher
 
+import cli_args  # isort: skip
+
 parser = argparse.ArgumentParser(description="Train a leisaac RL task with RSL-RL.")
-parser.add_argument("--task", type=str, required=True, help="Gym task ID (e.g. LeIsaac-SO101-LiftCube-RL-v0).")
-parser.add_argument("--num_envs", type=int, default=512, help="Number of parallel environments.")
-parser.add_argument("--max_iterations", type=int, default=1500, help="Number of PPO iterations.")
-parser.add_argument("--log_dir", type=str, default="logs/rl", help="Base logging directory.")
-parser.add_argument("--seed", type=int, default=42, help="Random seed.")
-parser.add_argument("--checkpoint", type=str, default=None, help="Path to checkpoint (.pt) to resume training from.")
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument(
+    "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+)
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment.")
+parser.add_argument("--max_iterations", type=int, default=None, help="RL policy training iterations.")
+parser.add_argument(
+    "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
+)
+cli_args.add_rsl_rl_args(parser)
 AppLauncher.add_app_launcher_args(parser)
-args_cli = parser.parse_args()
+args_cli, hydra_args = parser.parse_known_args()
+
+sys.argv = [sys.argv[0]] + hydra_args
 
 app_launcher = AppLauncher(args_cli)
 simulation_app = app_launcher.app
 
+from datetime import datetime
+
 import gymnasium as gym
+import isaaclab_tasks  # noqa: F401
 import leisaac.tasks  # noqa: F401
+import torch
+from isaaclab.envs import ManagerBasedRLEnvCfg
 from isaaclab.utils.io import dump_yaml
-from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
 from rsl_rl.runners import OnPolicyRunner
 
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+torch.backends.cudnn.deterministic = False
+torch.backends.cudnn.benchmark = False
 
-def _load_from_entry_point(entry_point: str):
-    module_path, attr = entry_point.rsplit(":", 1)
-    return getattr(importlib.import_module(module_path), attr)
 
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    """Train with RSL-RL agent."""
+    agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+    agent_cfg.max_iterations = (
+        args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations
+    )
 
-def main():
-    kwargs = gym.registry[args_cli.task].kwargs
-    env_cfg = _load_from_entry_point(kwargs["env_cfg_entry_point"])()
-    train_cfg = dict(_load_from_entry_point(kwargs["rsl_rl_cfg_entry_point"]))
-
-    env_cfg.scene.num_envs = args_cli.num_envs
-    env_cfg.seed = args_cli.seed
+    env_cfg.seed = agent_cfg.seed
     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-    train_cfg["seed"] = args_cli.seed
 
-    task_slug = args_cli.task.lower().replace("-", "_").replace("leisaac_", "").replace("_v0", "")
-    log_dir = os.path.abspath(os.path.join(args_cli.log_dir, task_slug, datetime.now().strftime("%Y-%m-%d_%H-%M-%S")))
+    if args_cli.distributed and args_cli.device is not None and "cpu" in args_cli.device:
+        raise ValueError("Distributed training is not supported when using CPU device.")
+
+    if args_cli.distributed:
+        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+        agent_cfg.device = f"cuda:{app_launcher.local_rank}"
+        seed = agent_cfg.seed + app_launcher.local_rank
+        env_cfg.seed = seed
+        agent_cfg.seed = seed
+
+    log_root_path = os.path.abspath(os.path.join("logs", "rsl_rl", agent_cfg.experiment_name))
+    print(f"[INFO] Logging experiment in directory: {log_root_path}")
+    log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    if agent_cfg.run_name:
+        log_dir += f"_{agent_cfg.run_name}"
+    log_dir = os.path.join(log_root_path, log_dir)
+
     env_cfg.log_dir = log_dir
-    print(f"[INFO] Task: {args_cli.task}")
-    print(f"[INFO] Logging to: {log_dir}")
 
     env = gym.make(args_cli.task, cfg=env_cfg)
-    env = RslRlVecEnvWrapper(env)
 
-    runner = OnPolicyRunner(env, train_cfg, log_dir=log_dir, device=env_cfg.sim.device)
-    if args_cli.checkpoint is not None:
-        runner.load(args_cli.checkpoint)
-        print(f"[INFO] Resumed from checkpoint: {args_cli.checkpoint}")
+    if agent_cfg.resume:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+    runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
+    runner.add_git_repo_to_log(__file__)
+
+    if agent_cfg.resume:
+        print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+        runner.load(resume_path)
 
-    os.makedirs(os.path.join(log_dir, "params"), exist_ok=True)
     dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
+    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
+
+    runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
 
-    runner.learn(num_learning_iterations=args_cli.max_iterations, init_at_random_ep_len=True)
     env.close()
 
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/__init__.py b/source/leisaac/leisaac/tasks/lift_cube/__init__.py
index c4896fef..71f13b21 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/__init__.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/__init__.py
@@ -1,5 +1,7 @@
 import gymnasium as gym
 
+from . import rl_agents
+
 gym.register(
     id="LeIsaac-SO101-LiftCube-v0",
     entry_point="isaaclab.envs:ManagerBasedRLEnv",
@@ -15,7 +17,7 @@
     disable_env_checker=True,
     kwargs={
         "env_cfg_entry_point": f"{__name__}.lift_cube_rl_env_cfg:LiftCubeRLEnvCfg",
-        "rsl_rl_cfg_entry_point": f"{__name__}.lift_cube_rl_env_cfg:TRAIN_CFG",
+        "rsl_rl_cfg_entry_point": f"{rl_agents.__name__}.rsl_rl_ppo_cfg:LiftCubeRLPPORunnerCfg",
     },
 )
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
index 44f5882f..cad82d03 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/lift_cube_rl_env_cfg.py
@@ -10,45 +10,6 @@
 from . import mdp
 from .lift_cube_env_cfg import LiftCubeEnvCfg, LiftCubeSceneCfg
 
-TRAIN_CFG = {
-    "actor": {
-        "class_name": "MLPModel",
-        "hidden_dims": [256, 128, 64],
-        "activation": "elu",
-        "obs_normalization": True,
-        "distribution_cfg": {
-            "class_name": "GaussianDistribution",
-            "init_std": 0.3,
-        },
-    },
-    "critic": {
-        "class_name": "MLPModel",
-        "hidden_dims": [256, 128, 64],
-        "activation": "elu",
-        "obs_normalization": True,
-    },
-    "algorithm": {
-        "class_name": "PPO",
-        "value_loss_coef": 1.0,
-        "use_clipped_value_loss": True,
-        "clip_param": 0.1,
-        "entropy_coef": 0.005,
-        "num_learning_epochs": 5,
-        "num_mini_batches": 4,
-        "learning_rate": 1.0e-4,
-        "schedule": "adaptive",
-        "gamma": 0.95,
-        "lam": 0.95,
-        "desired_kl": 0.01,
-        "max_grad_norm": 0.5,
-    },
-    "obs_groups": {"actor": ["policy"], "critic": ["policy"]},
-    "num_steps_per_env": 100,
-    "save_interval": 50,
-    "experiment_name": "lift_cube_rl",
-    "seed": 42,
-}
-
 _CUBE_CFG = SceneEntityCfg("cube")
 _ROBOT_CFG = SceneEntityCfg("robot")
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
index 57295b4b..c164e275 100644
--- a/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
+++ b/source/leisaac/leisaac/tasks/lift_cube/mdp/rewards.py
@@ -17,7 +17,7 @@
    - Zero below min_height (4.6 cm above robot base) — ignores ground-level noise.
    - Monotonically increasing above min_height. Range [0, 1].
 
-3. cube_success  (weight 100.0, sparse)
+3. cube_success  (weight 200.0, sparse)
    One-time terminal bonus when h >= 20 cm. Also triggers early episode
    termination so the policy does not receive further shaped reward after success.
 
diff --git a/source/leisaac/leisaac/tasks/lift_cube/rl_agents/__init__.py b/source/leisaac/leisaac/tasks/lift_cube/rl_agents/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/source/leisaac/leisaac/tasks/lift_cube/rl_agents/rsl_rl_ppo_cfg.py b/source/leisaac/leisaac/tasks/lift_cube/rl_agents/rsl_rl_ppo_cfg.py
new file mode 100644
index 00000000..c8d68ad3
--- /dev/null
+++ b/source/leisaac/leisaac/tasks/lift_cube/rl_agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,39 @@
+from isaaclab.utils import configclass
+from isaaclab_rl.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+
+
+@configclass
+class LiftCubeRLPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 100
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "lift_cube_rl"
+    obs_groups = {"actor": ["policy"], "critic": ["policy"]}
+
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=0.3,
+        actor_obs_normalization=True,
+        critic_obs_normalization=True,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.1,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-4,
+        schedule="adaptive",
+        gamma=0.95,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=0.5,
+    )