From 2bb62577fc7cf8330c51f2241aae89f994f7c391 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Tue, 13 Jan 2026 18:43:03 +0000
Subject: [PATCH 01/22] Configure the RAID1 member disks in qemu, and
 initialize disk selection policy when starting RAID.

---
 OSDK.toml                                     |  2 -
 .../comps/virtio/src/device/block/device.rs   |  2 +-
 kernel/src/fs/mod.rs                          | 44 +++++++++++++++----
 test/Makefile                                 | 11 ++---
 test/apps/test_common.mk                      |  2 +-
 tools/qemu_args.sh                            |  6 ++-
 6 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/OSDK.toml b/OSDK.toml
index 5caf2ff00..2687e3257 100644
--- a/OSDK.toml
+++ b/OSDK.toml
@@ -62,8 +62,6 @@ qemu.args = """\
     -chardev stdio,id=mux,mux=on,signal=off,logfile=qemu.log \
     -drive if=none,format=raw,id=x0,file=./test/build/ext2.img \
     -drive if=none,format=raw,id=x1,file=./test/build/exfat.img \
-    -drive if=none,format=raw,id=r0,file=./test/build/raid1_0.img \
-    -drive if=none,format=raw,id=r1,file=./test/build/raid1_1.img \
     -device virtio-blk-device,drive=x0 \
     -device virtio-keyboard-device \
     -device virtio-serial-device \
diff --git a/kernel/comps/virtio/src/device/block/device.rs b/kernel/comps/virtio/src/device/block/device.rs
index 65303b7dd..455965e79 100644
--- a/kernel/comps/virtio/src/device/block/device.rs
+++ b/kernel/comps/virtio/src/device/block/device.rs
@@ -132,7 +132,7 @@ impl BlockDevice {
     /// processes the request.
     pub fn handle_requests(&self) {
         let request = self.queue.dequeue();
-        info!("Handle Request: {:?}", request);
+        // info!("Handle Request: {:?}", request);
         match request.type_() {
             BioType::Read => self.device.read(request),
             BioType::Write => self.device.write(request),
diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index 46e276d2c..e70b573fb 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -82,20 +82,24 @@ pub fn lazy_init() {
     }
 
     if let Some(raid) = aster_block::get_device(raid1_device_name) {
-        let raid_fs = Ext2::open(raid).unwrap();
-        let target_path = FsPath::try_from("/raid1").unwrap();
-        if let Err(err) = self::rootfs::mount_fs_at(raid_fs, &target_path) {
-            error!("[raid] failed to mount RAID-1 at /raid1: {:?}", err);
+
+        match Ext2::open(raid) {
+            Ok(raid_fs) => {
+                let target_path = FsPath::try_from("/raid1").unwrap();
+                self::rootfs::mount_fs_at(raid_fs, &target_path).unwrap();
+                info!("[kernel] Mounted RAID-1 at {:?} ", target_path);
+            }
+            Err(err) => {
+                error!("[raid] failed to mount RAID-1 at /raid1: {:?}", err);
+            }
         }
-        info!("[kernel] Mounted RAID-1 at {:?} ", target_path);
     } else {
         error!("[raid] failed to get RAID-1 device: {:?}", Errno::ENOENT);
     }
 }
 
 fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
-    const RAID_MEMBER_NAMES: &[&str] = &["raid0", "raid1"];
-    // const RAID_MEMBER_NAMES: &[&str] = &["raid0"];
+    const RAID_MEMBER_NAMES: &[&str] = &["raid0", "raid1", "raid2"];
     info!(
         "[raid] initializing RAID-1 '{}' with members {:?}",
         raid_device_name, RAID_MEMBER_NAMES
@@ -119,7 +123,6 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
             }
         }
     }
-
     #[cfg(not(baseline_asterinas))]
     info!("[raid] creating selection policy");
     #[cfg(not(baseline_asterinas))]
@@ -147,6 +150,31 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
         }
     };
 
+    // early stop for testing
+    // Ok(());
+
+    info!("[raid] creating selection policy");
+    let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
+
+    Raid1Device::init(raid_device_name, members, selection_policy).map_err(|err| match err {
+        Raid1DeviceError::NotEnoughMembers => {
+            Error::with_message(Errno::EINVAL, "RAID-1 device requires at least two members")
+        }
+    })?;
+    info!("[raid] RAID-1 device created");
+
+    let worker = aster_block::get_device(raid_device_name).unwrap();
+    // The registry stores `Arc<dyn BlockDevice>`. Use `downcast_ref` on the captured Arc each
+    // iteration to call the RAID-specific helper without needing ownership of `Raid1Device`.
+    // TODO(Yingqi): Merge the starting of the RAID-1 thread inside block device server.
+    let task_fn = move || {
+        info!("spawn the RAID-1 device thread");
+        let raid = worker.downcast_ref::<Raid1Device>().unwrap();
+        loop {
+            raid.handle_requests();
+        }
+    };
+
     crate::ThreadOptions::new(task_fn).spawn();
 
     info!(
diff --git a/test/Makefile b/test/Makefile
index cf0c9d15d..b667f1f4c 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -23,8 +23,6 @@ INITRAMFS_IMAGE := $(BUILD_DIR)/initramfs.cpio.gz
 endif
 EXT2_IMAGE := $(BUILD_DIR)/ext2.img
 EXFAT_IMAGE := $(BUILD_DIR)/exfat.img
-RAID1_IMAGE0 := $(BUILD_DIR)/raid1_0.img
-RAID1_IMAGE1 := $(BUILD_DIR)/raid1_1.img
 
 INITRAMFS_EMPTY_DIRS := \
 	$(INITRAMFS)/root \
@@ -39,6 +37,7 @@ INITRAMFS_EMPTY_DIRS := \
 	$(INITRAMFS)/.ssh \
 	$(INITRAMFS)/etc/dropbear
 
+
 INITRAMFS_ALL_DIRS := \
 	$(INITRAMFS)/etc \
 	$(INITRAMFS)/lib/x86_64-linux-gnu \
@@ -243,20 +242,16 @@ $(INITRAMFS)/service:
 	@cp $(CUR_DIR)/service/* $@
 
 $(EXT2_IMAGE):
-	@dd if=/dev/zero of=$(EXT2_IMAGE) bs=2G count=1
+	@dd if=/dev/zero of=$(EXT2_IMAGE) bs=1G count=2
 	@mke2fs $(EXT2_IMAGE)
 
 $(EXFAT_IMAGE):
 	@dd if=/dev/zero of=$(EXFAT_IMAGE) bs=64M count=1
 	@mkfs.exfat $(EXFAT_IMAGE)
 
-$(RAID1_IMAGE0) $(RAID1_IMAGE1):
-	@dd if=/dev/zero of=$(RAID1_IMAGE0) bs=128M count=1
-	@mke2fs $(RAID1_IMAGE0)
-	@dd if=$(RAID1_IMAGE0) of=$(RAID1_IMAGE1)
 
 .PHONY: build
-build: $(INITRAMFS_IMAGE) $(EXT2_IMAGE) $(EXFAT_IMAGE) $(RAID1_IMAGE0) $(RAID1_IMAGE1)
+build: $(INITRAMFS_IMAGE) $(EXT2_IMAGE) $(EXFAT_IMAGE)
 
 .PHONY: format
 format:
diff --git a/test/apps/test_common.mk b/test/apps/test_common.mk
index 64afa40d8..f1b337233 100644
--- a/test/apps/test_common.mk
+++ b/test/apps/test_common.mk
@@ -13,7 +13,7 @@ C_DEPS := $(addprefix $(DEP_OUTPUT_DIR)/,$(C_SRCS:%.c=%.d))
 ASM_SRCS := $(wildcard *.S)
 ASM_OBJS := $(addprefix $(OBJ_OUTPUT_DIR)/,$(ASM_SRCS:%.S=%))
 CC := gcc
-C_FLAGS := -Wall -Werror
+# C_FLAGS := -Wall -Werror
 
 .PHONY: all
 all: $(C_OBJS) $(ASM_OBJS)
diff --git a/tools/qemu_args.sh b/tools/qemu_args.sh
index 3afa64af1..403589bcf 100755
--- a/tools/qemu_args.sh
+++ b/tools/qemu_args.sh
@@ -89,8 +89,9 @@ COMMON_QEMU_ARGS="\
     -device isa-debug-exit,iobase=0xf4,iosize=0x04 \
     -drive if=none,format=raw,id=x0,file=./test/build/ext2.img \
     -drive if=none,format=raw,id=x1,file=./test/build/exfat.img \
-    -drive if=none,format=raw,id=r0,file=./test/build/raid1_0.img,cache=directsync \
-    -drive if=none,format=raw,id=r1,file=./test/build/raid1_1.img,cache=directsync \
+    -drive if=none,format=raw,id=r0,file=/dev/nvme0n1p1 \
+    -drive if=none,format=raw,id=r1,file=/dev/nvme1n1p1 \
+    -drive if=none,format=raw,id=r2,file=/dev/nvme2n1p1 \
 "
 
 if [ "$1" = "iommu" ]; then
@@ -113,6 +114,7 @@ QEMU_ARGS="\
     -device virtio-blk-pci,bus=pcie.0,addr=0x7,drive=x1,serial=vexfat,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
     -device virtio-blk-pci,bus=pcie.0,addr=0x8,drive=r0,serial=raid0,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
     -device virtio-blk-pci,bus=pcie.0,addr=0x9,drive=r1,serial=raid1,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
+    -device virtio-blk-pci,bus=pcie.0,addr=0xa,drive=r2,serial=raid2,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
     -device virtio-net-pci,netdev=net01,disable-legacy=on,disable-modern=off$VIRTIO_NET_FEATURES$IOMMU_DEV_EXTRA \
     -device virtio-serial-pci,disable-legacy=on,disable-modern=off$IOMMU_DEV_EXTRA \
     -device virtconsole,chardev=mux \

From ddcbdb5874f4a85796f5db027dd2d0b9a5e49074 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Mon, 9 Mar 2026 21:22:40 +0000
Subject: [PATCH 02/22] Change the RAID1 prcess_read to asynchronous
 implementation

---
 Cargo.lock                   | 1 +
 kernel/comps/raid/Cargo.toml | 1 +
 kernel/comps/raid/src/lib.rs | 7 ++++++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7e8841f56..7a3afa39a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -262,6 +262,7 @@ name = "aster-raid"
 version = "0.1.0"
 dependencies = [
  "aster-block",
+ "aster-time",
  "aster-virtio",
  "log",
  "ostd",
diff --git a/kernel/comps/raid/Cargo.toml b/kernel/comps/raid/Cargo.toml
index c2fcd8554..766b4b60d 100644
--- a/kernel/comps/raid/Cargo.toml
+++ b/kernel/comps/raid/Cargo.toml
@@ -9,6 +9,7 @@ edition = "2024"
 ostd = { path = "../../../ostd" }
 aster-block = { path = "../block" }
 aster-virtio = { path = "../virtio" }
+aster-time = { path = "../time" }
 log = "0.4"
 
 [lints]
diff --git a/kernel/comps/raid/src/lib.rs b/kernel/comps/raid/src/lib.rs
index 2c1aa40af..0ba584c48 100644
--- a/kernel/comps/raid/src/lib.rs
+++ b/kernel/comps/raid/src/lib.rs
@@ -28,6 +28,7 @@ pub mod selection_policies;
 pub mod server_traits;
 
 use alloc::{borrow::ToOwned, sync::Arc, vec::Vec};
+use ostd::task::scheduler::info;
 use core::{cmp, ops::Range};
 
 use aster_block::{
@@ -148,6 +149,7 @@ impl Raid1Device {
     /// Dispatches a request by type. The RAID-1 device accepts the same BIOs as
     /// any `BlockDevice` and applies RAID semantics underneath.
     fn process_request(&self, request: BioRequest) {
+        // log::info!("Raid1Device process request, type: {:?}", request.type_());
         match request.type_() {
             BioType::Read => self.process_read_async(request),
             BioType::Write => self.process_write(request),
@@ -176,7 +178,7 @@ impl Raid1Device {
     #[cfg(baseline_asterinas)]
     fn process_read(&self, request: BioRequest) {
         for parent in request.bios() {
-            let member = self.members[0].clone();
+            let member = self.selection_policy.select_block_device().unwrap();
             let child = Bio::new(
                 BioType::Read,
                 parent.sid_range().start,
@@ -236,6 +238,7 @@ impl Raid1Device {
         }
     }
 
+
     /// Processes read requests asynchronously.
     ///
     /// Each `SubmittedBio` in the merged `BioRequest` is assigned to a read
@@ -280,6 +283,8 @@ impl Raid1Device {
             let status =
                 self.fanout_to_members(parent, BioType::Write, || Self::clone_segments(parent));
             parent.complete(status);
+            // let status = BioStatus::Complete;
+            // parent.complete(status);
         }
     }
 

From 669f9289ac7631e7316cb9800c067b27389e7696 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Mon, 9 Mar 2026 21:38:09 +0000
Subject: [PATCH 03/22] Resolve the long RAID1 tail latency issue by setting
 the RAID1 module's thread to use realtime scheduler

---
 kernel/src/fs/mod.rs | 43 +++++++++++++++++--------------------------
 tools/qemu_args.sh   |  8 ++++----
 2 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index e70b573fb..5fcd396eb 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -76,6 +76,18 @@ pub fn lazy_init() {
     //     info!("[kernel] Mount ExFat fs at {:?} ", target_path);
     // }
 
+    // single disk benchmark
+    // let nvme_device_name = "raid0";
+    // if let Ok(block_device_nvme) = start_block_device(nvme_device_name) {
+    //     let nvme_fs = Ext2::open(block_device_nvme).unwrap();
+    //     let target_path = FsPath::try_from("/raid1").unwrap();
+    //     self::rootfs::mount_fs_at(nvme_fs, &target_path).unwrap();
+    //     info!("[kernel] Mounted NVMe fs at {:?} ", target_path);
+    // } else {
+    //     error!("[kernel] Failed to start NVMe block device '{}'", nvme_device_name);
+    // }
+    // return;
+
     info!("[raid] initializing RAID-1 device: {:?}", raid1_device_name);
     if let Err(err) = setup_raid1_device(raid1_device_name) {
         error!("[raid] failed to setup RAID-1 device: {:?}", err);
@@ -123,6 +135,7 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
             }
         }
     }
+
     #[cfg(not(baseline_asterinas))]
     info!("[raid] creating selection policy");
     #[cfg(not(baseline_asterinas))]
@@ -150,32 +163,10 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
         }
     };
 
-    // early stop for testing
-    // Ok(());
-
-    info!("[raid] creating selection policy");
-    let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
-
-    Raid1Device::init(raid_device_name, members, selection_policy).map_err(|err| match err {
-        Raid1DeviceError::NotEnoughMembers => {
-            Error::with_message(Errno::EINVAL, "RAID-1 device requires at least two members")
-        }
-    })?;
-    info!("[raid] RAID-1 device created");
-
-    let worker = aster_block::get_device(raid_device_name).unwrap();
-    // The registry stores `Arc<dyn BlockDevice>`. Use `downcast_ref` on the captured Arc each
-    // iteration to call the RAID-specific helper without needing ownership of `Raid1Device`.
-    // TODO(Yingqi): Merge the starting of the RAID-1 thread inside block device server.
-    let task_fn = move || {
-        info!("spawn the RAID-1 device thread");
-        let raid = worker.downcast_ref::<Raid1Device>().unwrap();
-        loop {
-            raid.handle_requests();
-        }
-    };
-
-    crate::ThreadOptions::new(task_fn).spawn();
+    crate::ThreadOptions::new(task_fn).sched_policy(crate::sched::SchedPolicy::RealTime { 
+        rt_prio: 50.try_into().unwrap(), 
+        rt_policy: crate::sched::RealTimePolicy::RoundRobin { base_slice_factor: None }, 
+    }).spawn();
 
     info!(
         "[raid] RAID-1 device '{}' registered and worker thread spawned",
diff --git a/tools/qemu_args.sh b/tools/qemu_args.sh
index 403589bcf..88e9d9100 100755
--- a/tools/qemu_args.sh
+++ b/tools/qemu_args.sh
@@ -20,7 +20,7 @@ VHOST=${VHOST:-"off"}
 VSOCK=${VSOCK:-"off"}
 NETDEV=${NETDEV:-"user"}
 
-SSH_RAND_PORT=${SSH_PORT:-22}
+SSH_RAND_PORT=${SSH_PORT:-61541}
 NGINX_RAND_PORT=${NGINX_PORT:-8080}
 REDIS_RAND_PORT=${REDIS_PORT:-6379}
 IPERF_RAND_PORT=${IPERF_PORT:-5201}
@@ -89,9 +89,9 @@ COMMON_QEMU_ARGS="\
     -device isa-debug-exit,iobase=0xf4,iosize=0x04 \
     -drive if=none,format=raw,id=x0,file=./test/build/ext2.img \
     -drive if=none,format=raw,id=x1,file=./test/build/exfat.img \
-    -drive if=none,format=raw,id=r0,file=/dev/nvme0n1p1 \
-    -drive if=none,format=raw,id=r1,file=/dev/nvme1n1p1 \
-    -drive if=none,format=raw,id=r2,file=/dev/nvme2n1p1 \
+    -drive if=none,format=raw,id=r0,file=/dev/nvme0n1p1,cache=directsync \
+    -drive if=none,format=raw,id=r1,file=/dev/nvme1n1p1,cache=directsync \
+    -drive if=none,format=raw,id=r2,file=/dev/nvme2n1p1,cache=directsync \
 "
 
 if [ "$1" = "iommu" ]; then

From e22a1a12dcc5ac6b5ec072cac608584bcaf50226 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Tue, 31 Mar 2026 03:39:06 +0000
Subject: [PATCH 04/22] Update the RAID1 and VirtIO module to use the new
 OQueue API

---
 .../comps/virtio/src/device/block/device.rs   | 31 +++++++++++++------
 .../virtio/src/device/block/server_traits.rs  | 17 +++++-----
 kernel/comps/virtio/src/device/mod.rs         |  6 ++--
 kernel/src/error.rs                           |  2 +-
 4 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/kernel/comps/virtio/src/device/block/device.rs b/kernel/comps/virtio/src/device/block/device.rs
index 455965e79..a7bea5a1e 100644
--- a/kernel/comps/virtio/src/device/block/device.rs
+++ b/kernel/comps/virtio/src/device/block/device.rs
@@ -2,13 +2,18 @@
 
 use alloc::{
     boxed::Box,
-    collections::BTreeMap,
+    collections::{BTreeMap, VecDeque},
     string::{String, ToString},
     sync::Arc,
     vec,
     vec::Vec,
 };
-use core::{fmt::Debug, hint::spin_loop, mem::size_of};
+use core::{
+    fmt::Debug,
+    hint::spin_loop,
+    mem::size_of,
+    sync::atomic::{AtomicU64, Ordering},
+};
 
 use aster_block::{
     BlockDeviceMeta,
@@ -23,7 +28,7 @@ use log::{debug, info};
 #[cfg(not(baseline_asterinas))]
 use ostd::orpc::framework::spawn_thread;
 #[cfg(not(baseline_asterinas))]
-use ostd::orpc::legacy_oqueue::{OQueueRef, Producer};
+use ostd::orpc::oqueue::{ConsumableOQueue as _, ConsumableOQueueRef, OQueue as _, OQueueRef};
 #[cfg(not(baseline_asterinas))]
 use ostd::orpc::{orpc_impl, orpc_server};
 use ostd::{
@@ -65,7 +70,7 @@ pub struct BlockDevice {
 #[cfg(not(baseline_asterinas))]
 #[orpc_impl]
 impl server_traits::BlockIOObservable for BlockDevice {
-    fn bio_submission_oqueue(&self) -> OQueueRef<SubmittedBio>;
+    fn bio_submission_oqueue(&self) -> ConsumableOQueueRef<SubmittedBio>;
     fn bio_completion_oqueue(&self) -> OQueueRef<BlockDeviceCompletionStats>;
 }
 
@@ -151,6 +156,11 @@ impl BlockDevice {
     pub fn submit(&self, bio: Bio) -> Result<BioWaiter, BioEnqueueError> {
         bio.submit(self)
     }
+
+    /// Sets the logical index for this device, used to tag I/O completion stats.
+    pub fn set_device_index(&self, index: u64) {
+        self.device.device_index.store(index, Ordering::Relaxed);
+    }
 }
 
 #[cfg(baseline_asterinas)]
@@ -170,12 +180,13 @@ impl aster_block::BlockDevice for BlockDevice {
 #[cfg(not(baseline_asterinas))]
 impl aster_block::BlockDevice for BlockDevice {
     fn enqueue(&self, bio: SubmittedBio) -> Result<(), BioEnqueueError> {
-        let reply_handle: Box<dyn Producer<BlockDeviceCompletionStats>> =
-            self.bio_completion_oqueue().attach_producer()?;
+        let reply_handle = self.bio_completion_oqueue().attach_ref_producer()?;
 
         let mut bio = bio;
-        bio.prepare_enqueue(reply_handle, self.queue.clone());
-        self.bio_submission_oqueue().produce(bio)?;
+        let device_index = self.device.device_index.load(Ordering::Relaxed);
+        bio.prepare_enqueue(reply_handle, self.queue.clone(), device_index);
+        let producer = self.bio_submission_oqueue().attach_value_producer()?;
+        producer.produce(bio);
         Ok(())
     }
 
@@ -197,6 +208,7 @@ struct DeviceInner {
     block_responses: DmaStream,
     id_allocator: SpinLock<IdAlloc>,
     submitted_requests: SpinLock<BTreeMap<u16, SubmittedRequest>>,
+    device_index: AtomicU64,
 }
 
 impl DeviceInner {
@@ -245,6 +257,7 @@ impl DeviceInner {
             block_responses,
             id_allocator: SpinLock::new(IdAlloc::with_capacity(Self::QUEUE_SIZE as usize)),
             submitted_requests: SpinLock::new(BTreeMap::new()),
+            device_index: AtomicU64::new(u64::MAX),
         });
 
         let cloned_device = device.clone();
@@ -273,7 +286,7 @@ impl DeviceInner {
 
     /// Handles the irq issued from the device
     fn handle_irq(&self) {
-        info!("Virtio block device handle irq");
+        // info!("Virtio block device handle irq");
         // When we enter the IRQs handling function,
         // IRQs have already been disabled,
         // so there is no need to call `disable_irq`.
diff --git a/kernel/comps/virtio/src/device/block/server_traits.rs b/kernel/comps/virtio/src/device/block/server_traits.rs
index c72d89eba..393366f9b 100644
--- a/kernel/comps/virtio/src/device/block/server_traits.rs
+++ b/kernel/comps/virtio/src/device/block/server_traits.rs
@@ -3,10 +3,7 @@
 use aster_block::bio::{BlockDeviceCompletionStats, SubmittedBio};
 use ostd::orpc::{
     errors::RPCError,
-    legacy_oqueue::{
-        OQueueAttachError, OQueueRef,
-        locking::{LockingQueue, ObservableLockingQueue},
-    },
+    oqueue::{ConsumableOQueue as _, ConsumableOQueueRef, OQueue as _, OQueueError, OQueueRef},
     orpc_trait,
 };
 
@@ -18,9 +15,9 @@ impl From<RPCError> for VirtioDeviceError {
     }
 }
 
-impl From<OQueueAttachError> for VirtioDeviceError {
-    fn from(value: OQueueAttachError) -> Self {
-        VirtioDeviceError::OQueueAttachError(value)
+impl From<OQueueError> for VirtioDeviceError {
+    fn from(value: OQueueError) -> Self {
+        VirtioDeviceError::OQueueError(value)
     }
 }
 
@@ -28,14 +25,14 @@ impl From<OQueueAttachError> for VirtioDeviceError {
 pub trait BlockIOObservable {
     /// The OQueue containing every bio submission request.
     /// The submission queue doesn't needed to be observable.
-    fn bio_submission_oqueue(&self) -> OQueueRef<SubmittedBio> {
-        LockingQueue::new(32)
+    fn bio_submission_oqueue(&self) -> ConsumableOQueueRef<SubmittedBio> {
+        ConsumableOQueueRef::new_anonymous(32)
     }
 
     /// The OQueue containing every write request. This includes both sync and async writes and any
     /// other write operations on other traits
     fn bio_completion_oqueue(&self) -> OQueueRef<BlockDeviceCompletionStats> {
-        ObservableLockingQueue::new(32, 1)
+        OQueueRef::new_anonymous(4096)
     }
 }
 
diff --git a/kernel/comps/virtio/src/device/mod.rs b/kernel/comps/virtio/src/device/mod.rs
index 990af57b9..6e32da023 100644
--- a/kernel/comps/virtio/src/device/mod.rs
+++ b/kernel/comps/virtio/src/device/mod.rs
@@ -2,7 +2,7 @@
 
 use int_to_c_enum::TryFromInt;
 #[cfg(not(baseline_asterinas))]
-use ostd::orpc::{errors::RPCError, legacy_oqueue::OQueueAttachError};
+use ostd::orpc::{errors::RPCError, oqueue::OQueueError};
 
 use crate::queue::QueueError;
 
@@ -52,9 +52,9 @@ pub enum VirtioDeviceError {
     /// The ORPC Errors
     #[cfg(not(baseline_asterinas))]
     RPCError(RPCError),
-    /// The OQueue attachment errors
+    /// The OQueue errors
     #[cfg(not(baseline_asterinas))]
-    OQueueAttachError(OQueueAttachError),
+    OQueueError(OQueueError),
 }
 
 impl From<QueueError> for VirtioDeviceError {
diff --git a/kernel/src/error.rs b/kernel/src/error.rs
index 89a19d919..54a9e3538 100644
--- a/kernel/src/error.rs
+++ b/kernel/src/error.rs
@@ -497,7 +497,7 @@ impl From<aster_block::bio::BioEnqueueError> for Error {
                 Error::with_message(Errno::EINVAL, "Bio is too big")
             }
             #[cfg(not(baseline_asterinas))]
-            aster_block::bio::BioEnqueueError::OQueueAttachError(err) => err.into(),
+            aster_block::bio::BioEnqueueError::OQueueError(err) => err.into(),
         }
     }
 }

From 0d69f437aa6f2c3b832acf09aae9616144b1bc47 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Tue, 31 Mar 2026 03:45:55 +0000
Subject: [PATCH 05/22] Setup ORPC data capture for RAID1 IO data

---
 Cargo.lock                                    |   2 +
 kernel/comps/block/Cargo.toml                 |   1 +
 kernel/comps/block/src/bio.rs                 |  57 +++++---
 kernel/comps/mariposa_data_capture/Cargo.toml |   1 +
 kernel/src/fs/mod.rs                          | 125 +++++++++++++++++-
 tools/qemu_args.sh                            |   2 +
 6 files changed, 164 insertions(+), 24 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7a3afa39a..edb5fb30f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -102,6 +102,7 @@ version = "0.1.0"
 dependencies = [
  "align_ext",
  "aster-time",
+ "binary_serde",
  "bitvec",
  "component",
  "int-to-c-enum",
@@ -1172,6 +1173,7 @@ version = "0.1.0"
 dependencies = [
  "aster-block",
  "aster-logger",
+ "aster-time",
  "binary_serde",
  "component",
  "log",
diff --git a/kernel/comps/block/Cargo.toml b/kernel/comps/block/Cargo.toml
index a0357a340..98e5d90e2 100644
--- a/kernel/comps/block/Cargo.toml
+++ b/kernel/comps/block/Cargo.toml
@@ -12,6 +12,7 @@ align_ext = { path = "../../../ostd/libs/align_ext" }
 int-to-c-enum = { path = "../../libs/int-to-c-enum" }
 component = { path = "../../libs/comp-sys/component" }
 aster-time = { path = "../time" }
+binary_serde = "1.0.25"
 log = "0.4"
 bitvec = { version = "1.0.1", default-features = false, features = ["alloc"] }
 
diff --git a/kernel/comps/block/src/bio.rs b/kernel/comps/block/src/bio.rs
index aa546049b..ed3466434 100644
--- a/kernel/comps/block/src/bio.rs
+++ b/kernel/comps/block/src/bio.rs
@@ -1,14 +1,15 @@
 // SPDX-License-Identifier: MPL-2.0
 
 use alloc::{boxed::Box, sync::Weak};
-use core::{fmt::Display, time::Duration};
+use binary_serde::BinarySerde;
+use core::fmt::Display;
 
 use align_ext::AlignExt;
 use aster_time::read_monotonic_time;
 use bitvec::array::BitArray;
 use int_to_c_enum::TryFromInt;
 #[cfg(not(baseline_asterinas))]
-use ostd::orpc::legacy_oqueue::{OQueueAttachError, Producer};
+use ostd::orpc::oqueue::{OQueueError, RefProducer};
 use ostd::{
     Error,
     mm::{
@@ -25,12 +26,15 @@ use crate::{BLOCK_SIZE, SECTOR_SIZE, prelude::*, request_queue::BioRequestSingle
 /// Trace data for block device I/O completion.
 ///
 /// This struct captures performance metrics when a block I/O request completes.
-#[derive(Clone)]
+#[derive(Clone, Copy, Default, BinarySerde)]
+#[repr(C)]
 pub struct BlockDeviceCompletionStats {
-    /// The latency of the I/O request (time from submission to completion).
-    pub latency: Duration,
+    /// The latency of the I/O request in microseconds.
+    pub latency_us: u64,
     /// The number of outstanding requests at completion time.
-    pub outstanding_requests: usize,
+    pub outstanding_requests: u64,
+    /// The index of the device that produced this stat.
+    pub device_index: u64,
 }
 
 /// The unit for block I/O.
@@ -145,9 +149,11 @@ impl Bio {
             bio_inner: self.0.clone(),
             #[cfg(not(baseline_asterinas))]
             reply_handle: None,
-            submission_time: None,
+            submission_time_us: None,
             #[cfg(not(baseline_asterinas))]
             bio_request_single_queue: None,
+            #[cfg(not(baseline_asterinas))]
+            device_index: None,
         }) {
             // Fail to submit, revert the status.
             let result = self.0.status.compare_exchange(
@@ -200,15 +206,15 @@ pub enum BioEnqueueError {
     Refused,
     /// Too big bio
     TooBig,
-    /// OQueue attachment failures
+    /// OQueue error
     #[cfg(not(baseline_asterinas))]
-    OQueueAttachError(OQueueAttachError),
+    OQueueError(OQueueError),
 }
 
 #[cfg(not(baseline_asterinas))]
-impl From<OQueueAttachError> for BioEnqueueError {
-    fn from(err: OQueueAttachError) -> Self {
-        Self::OQueueAttachError(err)
+impl From<OQueueError> for BioEnqueueError {
+    fn from(err: OQueueError) -> Self {
+        Self::OQueueError(err)
     }
 }
 
@@ -325,12 +331,15 @@ pub struct SubmittedBio {
     bio_inner: Arc<BioInner>,
 
     #[cfg(not(baseline_asterinas))]
-    reply_handle: Option<Box<dyn Producer<BlockDeviceCompletionStats>>>,
+    reply_handle: Option<RefProducer<BlockDeviceCompletionStats>>,
 
-    submission_time: Option<Duration>,
+    submission_time_us: Option<u64>,
 
     #[cfg(not(baseline_asterinas))]
     bio_request_single_queue: Option<Weak<BioRequestSingleQueue>>,
+
+    #[cfg(not(baseline_asterinas))]
+    device_index: Option<u64>,
 }
 
 impl core::fmt::Debug for SubmittedBio {
@@ -339,7 +348,7 @@ impl core::fmt::Debug for SubmittedBio {
         let d = d.field("bio_inner", &self.bio_inner);
         #[cfg(not(baseline_asterinas))]
         let d = d
-            .field("submission_time", &self.submission_time)
+            .field("submission_time_us", &self.submission_time_us)
             .field("bio_request_single_queue", &self.bio_request_single_queue)
             .field(
                 "reply_handle",
@@ -391,8 +400,8 @@ impl SubmittedBio {
         }
     }
 
-    pub fn submission_time(&self) -> Option<Duration> {
-        self.submission_time
+    pub fn submission_time_us(&self) -> Option<u64> {
+        self.submission_time_us
     }
 
     #[cfg(not(baseline_asterinas))]
@@ -406,12 +415,14 @@ impl SubmittedBio {
     #[cfg(not(baseline_asterinas))]
     pub fn prepare_enqueue(
         &mut self,
-        reply_handle: Box<dyn Producer<BlockDeviceCompletionStats>>,
+        reply_handle: RefProducer<BlockDeviceCompletionStats>,
         bio_request_single_queue: Arc<BioRequestSingleQueue>,
+        device_index: u64,
     ) {
         self.reply_handle = Some(reply_handle);
         self.bio_request_single_queue = Some(Arc::downgrade(&bio_request_single_queue));
-        self.submission_time = Some(read_monotonic_time());
+        self.submission_time_us = Some(read_monotonic_time().as_micros() as u64);
+        self.device_index = Some(device_index);
     }
 
     #[cfg(not(baseline_asterinas))]
@@ -419,9 +430,11 @@ impl SubmittedBio {
         self.reply_handle
             .as_ref()
             .unwrap()
-            .produce(BlockDeviceCompletionStats {
-                latency: read_monotonic_time() - self.submission_time.unwrap(),
-                outstanding_requests: self.num_outstanding_requests().unwrap_or(0),
+            .try_produce_ref(&BlockDeviceCompletionStats {
+                latency_us: read_monotonic_time().as_micros() as u64
+                    - self.submission_time_us.unwrap(),
+                outstanding_requests: self.num_outstanding_requests().unwrap_or(0) as u64,
+                device_index: self.device_index.unwrap_or(u64::MAX),
             });
     }
 }
diff --git a/kernel/comps/mariposa_data_capture/Cargo.toml b/kernel/comps/mariposa_data_capture/Cargo.toml
index 771e3152b..caa0a996f 100644
--- a/kernel/comps/mariposa_data_capture/Cargo.toml
+++ b/kernel/comps/mariposa_data_capture/Cargo.toml
@@ -9,6 +9,7 @@ edition = "2024"
 component = { path = "../../libs/comp-sys/component" }
 aster-logger = { path = "../logger" }
 aster-block = { path = "../block" }
+aster-time = { path = "../time" }
 ostd = { path = "../../../ostd" }
 binary_serde = "1.0.25"
 log = "0.4"
diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index 5fcd396eb..626ed373b 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -47,7 +47,13 @@ fn start_block_device(device_name: &str) -> Result<Arc<dyn BlockDevice>> {
                 virtio_block_device.handle_requests();
             }
         };
-        crate::ThreadOptions::new(task_fn).spawn();
+        // Elevate to RealTime 50 so these I/O threads are not starved by other RealTime threads.
+        crate::ThreadOptions::new(task_fn)
+            .sched_policy(crate::sched::SchedPolicy::RealTime {
+                rt_prio: 50.try_into().unwrap(),
+                rt_policy: crate::sched::RealTimePolicy::RoundRobin { base_slice_factor: None },
+            })
+            .spawn();
         Ok(device)
     } else {
         return_errno_with_message!(Errno::ENOENT, "Device does not exist")
@@ -120,10 +126,13 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     let mut members = Vec::with_capacity(RAID_MEMBER_NAMES.len());
 
     // Start the RAID-1's underlying member devices.
-    for &name in RAID_MEMBER_NAMES {
+    for (index, &name) in RAID_MEMBER_NAMES.iter().enumerate() {
         match start_block_device(name) {
             Ok(device) => {
                 info!("[raid] member '{}' online", name);
+                if let Some(virtio_dev) = device.downcast_ref::<VirtIoBlockDevice>() {
+                    virtio_dev.set_device_index(index as u64);
+                }
                 members.push(device);
             }
             Err(err) => {
@@ -136,6 +145,9 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
         }
     }
 
+    // #[cfg(not(baseline_asterinas))]
+    setup_data_capture(&members, RAID_MEMBER_NAMES);
+
     #[cfg(not(baseline_asterinas))]
     info!("[raid] creating selection policy");
     #[cfg(not(baseline_asterinas))]
@@ -174,3 +186,112 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     );
     Ok(())
 }
+
+/// Set up data capture for the RAID-1 member devices' bio completion stats.
+///
+/// This starts the capture block device and uses the legacy `DataCaptureDevice` /
+/// `DataCaptureFile` server to observe each member's `bio_completion_oqueue` and write the
+/// serialized data to disk.
+#[cfg(not(baseline_asterinas))]
+fn setup_data_capture(
+    members: &[Arc<dyn BlockDevice>],
+    member_names: &[&str],
+) {
+    use aster_block::{SECTOR_SIZE, bio::BlockDeviceCompletionStats};
+    use aster_virtio::device::block::server_traits::BlockIOObservable as _;
+    use mariposa_data_capture::{
+        DataCaptureDevice as _, DataCaptureDeviceServer, DataCaptureFile as _, FileDescriptor,
+        ObserverRegistration,
+    };
+    use ostd::orpc::oqueue::{OQueueBase as _, ObservationQuery};
+
+    // Start the capture block device
+    // let capture_dev = match start_block_device("capture") {
+    //     Ok(dev) => dev,
+    //     Err(e) => {
+    //         error!("[capture] failed to start capture device: {:?}", e);
+    //         return;
+    //     }
+    // };
+    let device_name = "capture";
+    let capture_dev = aster_block::get_device(device_name).unwrap_or_else(|| {
+        panic!("[capture] failed to get capture device '{}'", device_name);
+    });
+    let cloned_device = capture_dev.clone();
+    let task_fn = move || {
+        info!("[capture] spawn the virt-io-block thread for the capturing device");
+        let virtio_block_device = cloned_device.downcast_ref::<VirtIoBlockDevice>().unwrap();
+        loop {
+            virtio_block_device.handle_requests();
+        }
+    };
+    crate::ThreadOptions::new(task_fn).sched_policy(crate::sched::SchedPolicy::RealTime { 
+        rt_prio: 50.try_into().unwrap(), 
+        rt_policy: crate::sched::RealTimePolicy::RoundRobin { base_slice_factor: None }, 
+    }).spawn();
+
+
+    // Display the capture device backend info
+    let capture_size = capture_dev.metadata().nr_sectors * SECTOR_SIZE;
+    info!(
+        "[capture] capture device online, size = {} bytes",
+        capture_size
+    );
+
+    // Create the data capture device and file
+    let capture_device = DataCaptureDeviceServer::new(capture_dev.clone());
+    let capture_path = ostd::path!(data_capture.bio_completion);
+    let capture_file = match capture_device.new_file(FileDescriptor { length: 65536, path: capture_path.clone() }) {  // 512MB * 1024 * 1024 / 2 / 4096  (using half of the space, and number of pages here)
+        Ok(builder) => builder.build::<BlockDeviceCompletionStats>(),
+        Err(e) => {
+            error!("[capture] failed to create capture file: {:?}", e);
+            return;
+        }
+    };
+
+    // Attach a strong observer to each RAID member's bio_completion_oqueue
+    // and register it directly with the capture file.
+    for (member, &name) in members.iter().zip(member_names.iter()) {  // (member, name)
+        let virtio_dev = member.downcast_ref::<VirtIoBlockDevice>().unwrap();
+        let oqueue = virtio_dev.bio_completion_oqueue();
+        let observer_path = capture_path.append(&ostd::path!({name}));
+        match oqueue.attach_strong_observer(ObservationQuery::identity()) {
+            Ok(observer) => {
+                let registration = ObserverRegistration { path: observer_path, observer };
+                if let Err(e) = capture_file.register_observer(registration) {
+                    error!("[capture] failed to register observer for '{}': {:?}", name, e);
+                } else {
+                    info!("[capture] attached observer to '{}'", name);
+                }
+            }
+            Err(e) => {
+                error!("[capture] failed to attach observer to '{}': {:?}", name, e);
+            }
+        }
+    }
+
+    // Enable capturing
+    if let Err(e) = capture_file.start() {
+        error!("[capture] failed to enable capturing: {:?}", e);
+    }
+
+    // Spawn a timer task that sends TimedFlush every 10 seconds to trigger
+    // a flush if data has been idle for that long.
+    let capture_file_for_timer = capture_file.clone();
+    crate::ThreadOptions::new(move || {
+        use core::time::Duration;
+        use ostd::timer::Jiffies;
+        loop {
+            let target = Jiffies::elapsed().as_duration() + Duration::from_secs(5);
+            while Jiffies::elapsed().as_duration() < target {
+                ostd::task::Task::yield_now();
+            }
+            if let Err(e) = capture_file_for_timer.timed_flush() {
+                log::error!("[capture] timed_flush failed: {:?}", e);
+            }
+        }
+    })
+    .spawn();
+
+    info!("[capture] data capture enabled for bio completion stats");
+}
diff --git a/tools/qemu_args.sh b/tools/qemu_args.sh
index 88e9d9100..b654a109f 100755
--- a/tools/qemu_args.sh
+++ b/tools/qemu_args.sh
@@ -92,6 +92,7 @@ COMMON_QEMU_ARGS="\
     -drive if=none,format=raw,id=r0,file=/dev/nvme0n1p1,cache=directsync \
     -drive if=none,format=raw,id=r1,file=/dev/nvme1n1p1,cache=directsync \
     -drive if=none,format=raw,id=r2,file=/dev/nvme2n1p1,cache=directsync \
+    -drive if=none,format=raw,id=cap0,file=./dataset/capture.raw,cache=writeback \
 "
 
 if [ "$1" = "iommu" ]; then
@@ -115,6 +116,7 @@ QEMU_ARGS="\
     -device virtio-blk-pci,bus=pcie.0,addr=0x8,drive=r0,serial=raid0,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
     -device virtio-blk-pci,bus=pcie.0,addr=0x9,drive=r1,serial=raid1,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
     -device virtio-blk-pci,bus=pcie.0,addr=0xa,drive=r2,serial=raid2,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
+    -device virtio-blk-pci,bus=pcie.0,addr=0xb,drive=cap0,serial=capture,disable-legacy=on,disable-modern=off,queue-size=64,num-queues=1,request-merging=off,backend_defaults=off,discard=off,write-zeroes=off,event_idx=off,indirect_desc=off,queue_reset=off$IOMMU_DEV_EXTRA \
     -device virtio-net-pci,netdev=net01,disable-legacy=on,disable-modern=off$VIRTIO_NET_FEATURES$IOMMU_DEV_EXTRA \
     -device virtio-serial-pci,disable-legacy=on,disable-modern=off$IOMMU_DEV_EXTRA \
     -device virtconsole,chardev=mux \

From 84607a77af6f3bf25c8ea9394c70f1b59e178742 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Tue, 31 Mar 2026 03:47:07 +0000
Subject: [PATCH 06/22] Fix the data capturing hanging problem by disable IRQ
 for OQueue strong observe and converting data writeout on the fly into flush
 all on inactive.

---
 .../src/data_buffering.rs                     | 16 ++++-
 .../src/data_capture_file.rs                  | 60 ++++++++++++++++++-
 ostd/src/orpc/oqueue/implementation.rs        |  4 +-
 3 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/kernel/comps/mariposa_data_capture/src/data_buffering.rs b/kernel/comps/mariposa_data_capture/src/data_buffering.rs
index 3e26b8a69..d3651883c 100644
--- a/kernel/comps/mariposa_data_capture/src/data_buffering.rs
+++ b/kernel/comps/mariposa_data_capture/src/data_buffering.rs
@@ -12,8 +12,8 @@ use binary_serde::{BinarySerde, Endianness};
 use ostd::orpc::path::Path;
 
 /// A buffer for managing data which will be written bit by bit, but the extracted in larger blocks.
-struct DataBuf {
-    data: Vec<u8>,
+pub(crate) struct DataBuf {
+    pub data: Vec<u8>,
 }
 
 impl DataBuf {
@@ -63,7 +63,7 @@ impl DataBuf {
 
 /// Handles buffering and flushing data to a block device.
 pub(crate) struct ChunkingWriteWrapper {
-    data_buf: DataBuf,
+    pub data_buf: DataBuf,
     pub(crate) block_device: Arc<dyn aster_block::BlockDevice>,
     pub(crate) current_bid: Bid,
 }
@@ -115,9 +115,19 @@ impl ChunkingWriteWrapper {
         let _ = self
             .block_device
             .write_blocks_async(self.current_bid, bio_segment)?;
+        waiter.wait();
         Ok(n_written)
     }
 
+    /// Flushes all complete blocks from the buffer to storage.
+    /// Stops when fewer than BLOCK_SIZE bytes remain to avoid writing partial blocks.
+    pub fn flush_all(&mut self) -> Result<(), Box<dyn Error + 'static>> {
+        while self.data_buf.len() > BLOCK_SIZE {
+            self.flush_if_needed()?;
+        }
+        Ok(())
+    }
+
     pub fn sync(&mut self) -> Result<(), Box<dyn Error + 'static>> {
         self.block_device.sync()?;
         Ok(())
diff --git a/kernel/comps/mariposa_data_capture/src/data_capture_file.rs b/kernel/comps/mariposa_data_capture/src/data_capture_file.rs
index 61c653882..1a0e03f83 100644
--- a/kernel/comps/mariposa_data_capture/src/data_capture_file.rs
+++ b/kernel/comps/mariposa_data_capture/src/data_capture_file.rs
@@ -21,6 +21,8 @@
 use alloc::{boxed::Box, sync::Arc, vec::Vec};
 use core::{any::Any, error::Error, sync::atomic::AtomicBool};
 
+use aster_time::read_monotonic_time;
+
 use aster_block::{BLOCK_SIZE, BlockDevice, id::Bid};
 use binary_serde::BinarySerde;
 use ostd::{
@@ -63,6 +65,10 @@ pub trait DataCaptureFile<T: Copy + Send + BinarySerde>: Any {
     fn register_observer(&self, attachment: ObserverRegistration<T>) -> Result<(), RPCError>;
     /// Flush any data remaining in the output buffers to disk.
     fn flush(&self) -> Result<(), RPCError>;
+    /// Flush All data in the output buffer to disk.
+    fn flush_all(&self) -> Result<(), RPCError>;
+    /// Flush if data has been observed but not flushed for at least 10 seconds.
+    fn timed_flush(&self) -> Result<(), RPCError>;
     /// Sync writes to disk.
     fn sync(&self) -> Result<(), RPCError>;
     /// Enable capturing to this file.
@@ -75,6 +81,9 @@ pub trait DataCaptureFile<T: Copy + Send + BinarySerde>: Any {
 enum DataCaptureFileCommand<T: Copy + Send + BinarySerde + 'static> {
     RegisterObserver(ObserverRegistration<T>),
     Flush,
+    FlushAll,
+    /// Flush only if data has been observed but not yet flushed for at least 10 seconds.
+    TimedFlush,
     Sync,
     Stop,
 }
@@ -84,6 +93,8 @@ impl<T: Copy + Send + BinarySerde + 'static> core::fmt::Debug for DataCaptureFil
         match self {
             Self::RegisterObserver(arg0) => f.debug_tuple("AttachOqueue").field(arg0).finish(),
             Self::Flush => write!(f, "Flush"),
+            Self::FlushAll => write!(f, "FlushAll"),
+            Self::TimedFlush => write!(f, "TimedFlush"),
             Self::Sync => write!(f, "Sync"),
             Self::Stop => write!(f, "Stop"),
         }
@@ -109,12 +120,15 @@ pub struct DataCaptureFileServerThread<T: Copy + Send + BinarySerde + 'static> {
 impl<T: Copy + Send + BinarySerde + 'static> DataCaptureFileServerThread<T> {
     fn run(&self) -> Result<(), Box<dyn Error>> {
         let mut data_buf_handler =
-            ChunkingWriteWrapper::new(BLOCK_SIZE * 2, self.block_device.clone(), self.start_bid);
+            ChunkingWriteWrapper::new(BLOCK_SIZE * 65536, self.block_device.clone(), self.start_bid);
         let mut observers: Vec<StrongObserver<T>> = Default::default();
         // The paths of the attached OQueues. Once the header is written this is set to None and
         // paths are no longer collected even if more OQueues are attached.
         let mut paths = Some(Vec::default());
         let mut block_handler = BlockOnMany::new();
+        // Tracks whether unflushed data exists and when the most recent value was observed.
+        let mut need_flush = false;
+        let mut latest_data_observed_us: Option<u64> = None;
 
         loop {
             let blockers = [(&self.command_consumer) as &dyn Blocker]
@@ -140,6 +154,22 @@ impl<T: Copy + Send + BinarySerde + 'static> DataCaptureFileServerThread<T> {
                     DataCaptureFileCommand::Sync => {
                         data_buf_handler.sync()?;
                     }
+                    DataCaptureFileCommand::FlushAll => {
+                        data_buf_handler.flush_all()?;
+                    }
+                    DataCaptureFileCommand::TimedFlush => {
+                        if need_flush {
+                            if let Some(last_us) = latest_data_observed_us {
+                                let now_us = read_monotonic_time().as_micros() as u64;
+                                if now_us.saturating_sub(last_us) > 5000000 {
+                                    log::info!("[capture] Timed flush triggered after {} seconds of inactivity", (now_us - last_us) as f64 / 1_000_000.0);
+                                    data_buf_handler.flush_all()?;
+                                    need_flush = false;
+                                    log::info!("[capture] Timed flush completed");
+                                }
+                            }
+                        }
+                    }
                     DataCaptureFileCommand::Stop => {
                         self.server
                             .stopped
@@ -157,7 +187,14 @@ impl<T: Copy + Send + BinarySerde + 'static> DataCaptureFileServerThread<T> {
             for o in &observers {
                 // We can't skip the try_strong_observe calls when not `capturing` because that
                 // would leave the values in the OQueues and block them.
-                while let Ok(Some(v)) = o.try_strong_observe() {
+                let mut drain_count = 0usize;
+                while let Ok(Some(v)) = {
+                    // Disable IRQs while holding the OQueue's SpinLock inside
+                    // try_strong_observe to prevent deadlock with the IRQ handler
+                    // that produces to the same OQueue (bio completion stats).
+                    let _irq_guard = ostd::trap::irq::disable_local();
+                    o.try_strong_observe()
+                } {
                     if started {
                         if paths.is_some() {
                             data_buf_handler.write_header::<T>(paths.as_ref().unwrap())?;
@@ -165,7 +202,14 @@ impl<T: Copy + Send + BinarySerde + 'static> DataCaptureFileServerThread<T> {
                         }
 
                         data_buf_handler.write_value(&v);
-                        data_buf_handler.flush_if_needed()?;
+                        latest_data_observed_us = Some(read_monotonic_time().as_micros() as u64);
+                        need_flush = true;
+                        // data_buf_handler.flush_if_needed()?;
+                        if data_buf_handler.data_buf.len() % (32 * 1024) == 0 {  // 32 * 1024
+                            log::info!("Captured Data from OQueue to Capture Buffer, size of buffer: {}, capacity: {}",
+                                data_buf_handler.data_buf.len(),
+                                data_buf_handler.data_buf.data.capacity());
+                        }
                         if data_buf_handler.current_bid == self.end_bid {
                             log::warn!("Data capture ran out of space.");
                         }
@@ -189,6 +233,16 @@ impl<T: Copy + Send + BinarySerde> DataCaptureFile<T> for DataCaptureFileServer<
         Ok(())
     }
 
+    fn flush_all(&self) -> Result<(), RPCError> {
+        self.command_producer.produce(DataCaptureFileCommand::FlushAll);
+        Ok(())
+    }
+
+    fn timed_flush(&self) -> Result<(), RPCError> {
+        self.command_producer.produce(DataCaptureFileCommand::TimedFlush);
+        Ok(())
+    }
+
     fn sync(&self) -> Result<(), RPCError> {
         self.command_producer.produce(DataCaptureFileCommand::Sync);
         Ok(())
diff --git a/ostd/src/orpc/oqueue/implementation.rs b/ostd/src/orpc/oqueue/implementation.rs
index eeffe9acf..c59f84178 100644
--- a/ostd/src/orpc/oqueue/implementation.rs
+++ b/ostd/src/orpc/oqueue/implementation.rs
@@ -774,7 +774,9 @@ impl<T: ?Sized + 'static> UntypedOQueueImplementation for OQueueImplementation<T
     }
 
     fn can_strong_observe(&self, observer_id: ObserverKey) -> bool {
-        let mut inner = self.inner.lock();
+        // Disable IRQs before acquiring the lock to prevent deadlock with IRQ handlers
+        // (e.g. handle_irq → try_produce_ref) that acquire the same lock on the same CPU.
+        let mut inner = self.inner.disable_irq().lock();
         let ObservationRingBuffer { ring_buffer, .. } = inner
             .observer_ring_buffers
             .get_mut(observer_id)

From 01c296fe21aa1cb33ea3fcacac2c376f14bfada9 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Wed, 1 Apr 2026 03:47:10 +0000
Subject: [PATCH 07/22] Change synchronous flushing to asynchronous flushing

---
 kernel/comps/mariposa_data_capture/src/data_buffering.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/comps/mariposa_data_capture/src/data_buffering.rs b/kernel/comps/mariposa_data_capture/src/data_buffering.rs
index d3651883c..ee6e74229 100644
--- a/kernel/comps/mariposa_data_capture/src/data_buffering.rs
+++ b/kernel/comps/mariposa_data_capture/src/data_buffering.rs
@@ -112,7 +112,7 @@ impl ChunkingWriteWrapper {
         let raw_data = self.data_buf.written_data();
         let bio_segment = BioSegment::alloc(1, BioDirection::ToDevice);
         let n_written = bio_segment.writer()?.write(&mut raw_data.into());
-        let _ = self
+        let waiter = self
             .block_device
             .write_blocks_async(self.current_bid, bio_segment)?;
         waiter.wait();

From 4b947631ef6e76c49a9a2b8f92e066c5a584e5a9 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Wed, 1 Apr 2026 04:43:54 +0000
Subject: [PATCH 08/22] Update Selector Policy to use new OQueue API

---
 kernel/comps/raid/src/selection_policies.rs | 22 ++++++++++++++-------
 ostd/src/orpc/oqueue/implementation.rs      |  4 +---
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/kernel/comps/raid/src/selection_policies.rs b/kernel/comps/raid/src/selection_policies.rs
index 3344fc8b0..1964e06aa 100644
--- a/kernel/comps/raid/src/selection_policies.rs
+++ b/kernel/comps/raid/src/selection_policies.rs
@@ -2,7 +2,7 @@
 
 #![cfg(not(baseline_asterinas))]
 
-use alloc::{boxed::Box, sync::Arc, vec::Vec};
+use alloc::{sync::Arc, vec::Vec};
 use core::sync::atomic::{AtomicUsize, Ordering};
 
 use aster_block::{
@@ -11,7 +11,10 @@ use aster_block::{
 };
 use ostd::{
     Error,
-    orpc::{legacy_oqueue::WeakObserver, orpc_server},
+    orpc::{
+        oqueue::{OQueueBase, ObservationQuery},
+        orpc_server,
+    },
     sync::Mutex,
 };
 
@@ -85,7 +88,7 @@ impl SelectionPolicy for RoundRobinPolicy {
 pub struct LinnOSPolicy {
     read_cursor: AtomicUsize,
     members: Vec<Arc<dyn ObservableBlockDevice>>,
-    observers: Vec<Mutex<Box<dyn WeakObserver<BlockDeviceCompletionStats>>>>,
+    observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
     hidden_layers: Vec<[[f32; 256]; 31]>,
     output_layers: Vec<[[f32; 2]; 256]>,
 }
@@ -123,7 +126,7 @@ impl LinnOSPolicy {
                 Mutex::new(
                     device
                         .bio_completion_oqueue()
-                        .attach_weak_observer()
+                        .attach_weak_observer(4, ObservationQuery::identity())
                         .expect("Failed to attach weak observer to bio_completion_oqueue"),
                 )
             })
@@ -151,7 +154,9 @@ impl SelectionPolicy for LinnOSPolicy {
             let idx = self.read_cursor.fetch_add(1, Ordering::Relaxed);
             let device_idx = idx % num_devices;
             let observer = self.observers[device_idx].lock();
-            let completion_trace = observer.weak_observe_recent(4); // observe 4 steps in the history
+            let completion_trace = observer
+                .weak_observe_recent(4)
+                .expect("Failed to observe completion trace");
 
             // Build the 31-element input feature vector:
             //   [0..3]:  current outstanding requests (3 digits, from most recent trace)
@@ -169,8 +174,11 @@ impl SelectionPolicy for LinnOSPolicy {
             // Feature Engineering in LinnOS: Decompose numbers into digits.
             // Historical features: 4 steps, each with 3 digits outstanding + 4 digits latency
             for (i, trace_entry) in completion_trace.iter().enumerate().take(4) {
-                let outstanding = trace_entry.outstanding_requests;
-                let latency_us = trace_entry.latency.as_micros() as usize;
+                let Some(trace_entry) = trace_entry else {
+                    continue;
+                };
+                let outstanding = trace_entry.outstanding_requests as usize;
+                let latency_us = trace_entry.latency_us as usize;
                 let base = 3 + i * 7;
 
                 // Outstanding requests -> 3 digits (hundreds, tens, ones)
diff --git a/ostd/src/orpc/oqueue/implementation.rs b/ostd/src/orpc/oqueue/implementation.rs
index c59f84178..eeffe9acf 100644
--- a/ostd/src/orpc/oqueue/implementation.rs
+++ b/ostd/src/orpc/oqueue/implementation.rs
@@ -774,9 +774,7 @@ impl<T: ?Sized + 'static> UntypedOQueueImplementation for OQueueImplementation<T
     }
 
     fn can_strong_observe(&self, observer_id: ObserverKey) -> bool {
-        // Disable IRQs before acquiring the lock to prevent deadlock with IRQ handlers
-        // (e.g. handle_irq → try_produce_ref) that acquire the same lock on the same CPU.
-        let mut inner = self.inner.disable_irq().lock();
+        let mut inner = self.inner.lock();
         let ObservationRingBuffer { ring_buffer, .. } = inner
             .observer_ring_buffers
             .get_mut(observer_id)

From c53cd49ff14a623fd66661743506045af4ba11b5 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Thu, 2 Apr 2026 04:44:10 +0000
Subject: [PATCH 09/22] Change the number of outstanding requests from the num
 requests in the BioRequestSingleQueue to a manually tracked atomic number.
 Update LinnOS Policy.

---
 kernel/comps/block/src/bio.rs                 |  39 +++---
 .../src/data_capture_file.rs                  |  24 +---
 kernel/comps/raid/src/generate_weights.py     | 125 ++++++++++++++++++
 kernel/comps/raid/src/linnos_weights.rs.j2    |  39 ++++--
 kernel/comps/raid/src/selection_policies.rs   |  72 +++++-----
 .../comps/virtio/src/device/block/device.rs   |  15 ++-
 kernel/src/fs/mod.rs                          |  74 ++++++++---
 kernel/src/lib.rs                             |   4 +
 8 files changed, 285 insertions(+), 107 deletions(-)
 create mode 100644 kernel/comps/raid/src/generate_weights.py

diff --git a/kernel/comps/block/src/bio.rs b/kernel/comps/block/src/bio.rs
index ed3466434..54722b244 100644
--- a/kernel/comps/block/src/bio.rs
+++ b/kernel/comps/block/src/bio.rs
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: MPL-2.0
 
-use alloc::{boxed::Box, sync::Weak};
+use alloc::{boxed::Box};
 use binary_serde::BinarySerde;
 use core::fmt::Display;
 
@@ -31,8 +31,8 @@ use crate::{BLOCK_SIZE, SECTOR_SIZE, prelude::*, request_queue::BioRequestSingle
 pub struct BlockDeviceCompletionStats {
     /// The latency of the I/O request in microseconds.
     pub latency_us: u64,
-    /// The number of outstanding requests at completion time.
-    pub outstanding_requests: u64,
+    /// The number of outstanding 4KB pages at completion time.
+    pub outstanding_pages: u64,
     /// The index of the device that produced this stat.
     pub device_index: u64,
 }
@@ -149,11 +149,12 @@ impl Bio {
             bio_inner: self.0.clone(),
             #[cfg(not(baseline_asterinas))]
             reply_handle: None,
-            submission_time_us: None,
             #[cfg(not(baseline_asterinas))]
-            bio_request_single_queue: None,
+            submission_time_us: None,
             #[cfg(not(baseline_asterinas))]
             device_index: None,
+            #[cfg(not(baseline_asterinas))]
+            num_pages: None,
         }) {
             // Fail to submit, revert the status.
             let result = self.0.status.compare_exchange(
@@ -333,13 +334,14 @@ pub struct SubmittedBio {
     #[cfg(not(baseline_asterinas))]
     reply_handle: Option<RefProducer<BlockDeviceCompletionStats>>,
 
+    #[cfg(not(baseline_asterinas))]
     submission_time_us: Option<u64>,
 
     #[cfg(not(baseline_asterinas))]
-    bio_request_single_queue: Option<Weak<BioRequestSingleQueue>>,
+    device_index: Option<u64>,
 
     #[cfg(not(baseline_asterinas))]
-    device_index: Option<u64>,
+    num_pages: Option<u64>,
 }
 
 impl core::fmt::Debug for SubmittedBio {
@@ -349,7 +351,7 @@ impl core::fmt::Debug for SubmittedBio {
         #[cfg(not(baseline_asterinas))]
         let d = d
             .field("submission_time_us", &self.submission_time_us)
-            .field("bio_request_single_queue", &self.bio_request_single_queue)
+            .field("device_index", &self.device_index)
             .field(
                 "reply_handle",
                 &self.reply_handle.as_ref().map(|_| "<Producer>"),
@@ -369,6 +371,12 @@ impl SubmittedBio {
         self.bio_inner.sid_range()
     }
 
+    /// Returns the number of 4KB pages covered by this bio's sector range.
+    pub fn num_pages(&self) -> u64 {
+        let sectors = self.bio_inner.sid_range().end.to_raw() - self.bio_inner.sid_range().start.to_raw();
+        (sectors + 7) / 8
+    }
+
     /// Returns the slice to the memory segments.
     pub fn segments(&self) -> &[BioSegment] {
         self.bio_inner.segments()
@@ -404,36 +412,27 @@ impl SubmittedBio {
         self.submission_time_us
     }
 
-    #[cfg(not(baseline_asterinas))]
-    pub fn num_outstanding_requests(&self) -> Option<usize> {
-        self.bio_request_single_queue
-            .as_ref()
-            .and_then(|w| w.upgrade())
-            .map(|q| q.num_requests())
-    }
-
     #[cfg(not(baseline_asterinas))]
     pub fn prepare_enqueue(
         &mut self,
         reply_handle: RefProducer<BlockDeviceCompletionStats>,
-        bio_request_single_queue: Arc<BioRequestSingleQueue>,
         device_index: u64,
     ) {
         self.reply_handle = Some(reply_handle);
-        self.bio_request_single_queue = Some(Arc::downgrade(&bio_request_single_queue));
         self.submission_time_us = Some(read_monotonic_time().as_micros() as u64);
         self.device_index = Some(device_index);
+        self.num_pages = Some(self.num_pages());
     }
 
     #[cfg(not(baseline_asterinas))]
-    pub fn report_statistics(&self) {
+    pub fn report_statistics(&self, outstanding_pages: u64) {
         self.reply_handle
             .as_ref()
             .unwrap()
             .try_produce_ref(&BlockDeviceCompletionStats {
                 latency_us: read_monotonic_time().as_micros() as u64
                     - self.submission_time_us.unwrap(),
-                outstanding_requests: self.num_outstanding_requests().unwrap_or(0) as u64,
+                outstanding_pages,
                 device_index: self.device_index.unwrap_or(u64::MAX),
             });
     }
diff --git a/kernel/comps/mariposa_data_capture/src/data_capture_file.rs b/kernel/comps/mariposa_data_capture/src/data_capture_file.rs
index 1a0e03f83..e26deec05 100644
--- a/kernel/comps/mariposa_data_capture/src/data_capture_file.rs
+++ b/kernel/comps/mariposa_data_capture/src/data_capture_file.rs
@@ -67,8 +67,6 @@ pub trait DataCaptureFile<T: Copy + Send + BinarySerde>: Any {
     fn flush(&self) -> Result<(), RPCError>;
     /// Flush All data in the output buffer to disk.
     fn flush_all(&self) -> Result<(), RPCError>;
-    /// Flush if data has been observed but not flushed for at least 10 seconds.
-    fn timed_flush(&self) -> Result<(), RPCError>;
     /// Sync writes to disk.
     fn sync(&self) -> Result<(), RPCError>;
     /// Enable capturing to this file.
@@ -82,8 +80,6 @@ enum DataCaptureFileCommand<T: Copy + Send + BinarySerde + 'static> {
     RegisterObserver(ObserverRegistration<T>),
     Flush,
     FlushAll,
-    /// Flush only if data has been observed but not yet flushed for at least 10 seconds.
-    TimedFlush,
     Sync,
     Stop,
 }
@@ -94,7 +90,6 @@ impl<T: Copy + Send + BinarySerde + 'static> core::fmt::Debug for DataCaptureFil
             Self::RegisterObserver(arg0) => f.debug_tuple("AttachOqueue").field(arg0).finish(),
             Self::Flush => write!(f, "Flush"),
             Self::FlushAll => write!(f, "FlushAll"),
-            Self::TimedFlush => write!(f, "TimedFlush"),
             Self::Sync => write!(f, "Sync"),
             Self::Stop => write!(f, "Stop"),
         }
@@ -156,19 +151,7 @@ impl<T: Copy + Send + BinarySerde + 'static> DataCaptureFileServerThread<T> {
                     }
                     DataCaptureFileCommand::FlushAll => {
                         data_buf_handler.flush_all()?;
-                    }
-                    DataCaptureFileCommand::TimedFlush => {
-                        if need_flush {
-                            if let Some(last_us) = latest_data_observed_us {
-                                let now_us = read_monotonic_time().as_micros() as u64;
-                                if now_us.saturating_sub(last_us) > 5000000 {
-                                    log::info!("[capture] Timed flush triggered after {} seconds of inactivity", (now_us - last_us) as f64 / 1_000_000.0);
-                                    data_buf_handler.flush_all()?;
-                                    need_flush = false;
-                                    log::info!("[capture] Timed flush completed");
-                                }
-                            }
-                        }
+                        log::info!("[capture internal] Flush all completed");
                     }
                     DataCaptureFileCommand::Stop => {
                         self.server
@@ -238,11 +221,6 @@ impl<T: Copy + Send + BinarySerde> DataCaptureFile<T> for DataCaptureFileServer<
         Ok(())
     }
 
-    fn timed_flush(&self) -> Result<(), RPCError> {
-        self.command_producer.produce(DataCaptureFileCommand::TimedFlush);
-        Ok(())
-    }
-
     fn sync(&self) -> Result<(), RPCError> {
         self.command_producer.produce(DataCaptureFileCommand::Sync);
         Ok(())
diff --git a/kernel/comps/raid/src/generate_weights.py b/kernel/comps/raid/src/generate_weights.py
new file mode 100644
index 000000000..b633b26d4
--- /dev/null
+++ b/kernel/comps/raid/src/generate_weights.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MPL-2.0
+
+"""
+Load trained PyTorch LinnOS models and generate the Rust weights file
+using the Jinja2 template.
+
+Usage:
+    python generate_weights.py \
+        --models models/model_device0_lr0.001_bs32768_ep20.pt \
+                 models/model_device1_lr0.001_bs32768_ep20.pt \
+                 models/model_device2_lr0.001_bs32768_ep20.pt \
+        --template kernel/comps/raid/src/linnos_weights.rs.j2 \
+        --output   kernel/comps/raid/src/linnos_weights.rs
+
+Run from the repository root.
+"""
+
+import argparse
+from pathlib import Path
+
+import torch
+from jinja2 import Environment, FileSystemLoader
+
+
+def load_model(path: str) -> dict:
+    """Load a model checkpoint and return its state dict."""
+    state = torch.load(path, map_location="cpu", weights_only=False)
+    return state
+
+
+def print_architecture(state: dict, device_idx: int) -> None:
+    """Print model architecture for sanity check."""
+    print(f"  Device {device_idx}:")
+    for name, tensor in state.items():
+        print(f"    {name:20s}  shape={str(list(tensor.shape)):16s}  dtype={tensor.dtype}")
+
+
+def tensor_to_list(tensor: torch.Tensor) -> list:
+    """Convert a tensor to a nested Python list of floats."""
+    return tensor.tolist()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate LinnOS Rust weight file from PyTorch models")
+    parser.add_argument(
+        "--models", nargs="+", required=True,
+        help="Paths to .pt model files, one per device in order",
+    )
+    parser.add_argument(
+        "--template", required=True,
+        help="Path to the Jinja2 template (.rs.j2)",
+    )
+    parser.add_argument(
+        "--output", required=True,
+        help="Path for the generated Rust file (.rs)",
+    )
+    args = parser.parse_args()
+
+    # Load all models
+    models = []
+    for path in args.models:
+        models.append(load_model(path))
+
+    num_devices = len(models)
+
+    # Sanity check: print architecture
+    print(f"Loaded {num_devices} model(s).\n")
+    print("Model architecture:")
+    for i, state in enumerate(models):
+        print_architecture(state, i)
+    print()
+
+    # Extract dimensions from the first model
+    hidden_weight_shape = models[0]["net.0.weight"].shape  # [hidden_size, 31]
+    hidden_size = hidden_weight_shape[0]
+    input_size = hidden_weight_shape[1]
+    output_size = models[0]["net.2.weight"].shape[0]  # 2
+
+    print(f"Network: {input_size} -> {hidden_size} (ReLU) -> {output_size}")
+    print()
+
+    # Extract weights and biases for each device
+    # Hidden weights: net.0.weight has shape [hidden_size, input_size].
+    # In the Rust code, we index as hidden_weights[input][hidden],
+    # so we need to transpose: [input_size, hidden_size] = [31][hidden_size].
+    hidden_weights = []
+    hidden_biases = []
+    output_weights = []
+    output_biases = []
+
+    for i, state in enumerate(models):
+        # Transpose: [hidden_size, 31] -> [31, hidden_size]
+        hw = state["net.0.weight"].T  # [31, hidden_size]
+        hidden_weights.append(tensor_to_list(hw))
+        hidden_biases.append(tensor_to_list(state["net.0.bias"]))
+
+        # Transpose: [2, hidden_size] -> [hidden_size, 2]
+        ow = state["net.2.weight"].T  # [hidden_size, 2]
+        output_weights.append(tensor_to_list(ow))
+        output_biases.append(tensor_to_list(state["net.2.bias"]))
+
+    # Render template
+    template_path = Path(args.template)
+    env = Environment(
+        loader=FileSystemLoader(str(template_path.parent)),
+        keep_trailing_newline=True,
+    )
+    template = env.get_template(template_path.name)
+
+    rendered = template.render(
+        num_devices=num_devices,
+        hidden_size=hidden_size,
+        hidden_weights=hidden_weights,
+        hidden_biases=hidden_biases,
+        output_weights=output_weights,
+        output_biases=output_biases,
+    )
+
+    Path(args.output).write_text(rendered)
+    print(f"Generated {args.output} ({len(rendered)} bytes)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/kernel/comps/raid/src/linnos_weights.rs.j2 b/kernel/comps/raid/src/linnos_weights.rs.j2
index 3de554c10..7ed97f331 100644
--- a/kernel/comps/raid/src/linnos_weights.rs.j2
+++ b/kernel/comps/raid/src/linnos_weights.rs.j2
@@ -2,8 +2,8 @@
 
 // LinnOS neural network weights hardcoded for {{ num_devices }} devices.
 // Each device has:
-//   - hidden layer: 31 x 256 matrix
-//   - output layer: 256 x 2 matrix
+//   - hidden layer: 31 x {{ hidden_size }} matrix + {{ hidden_size }} bias
+//   - output layer: {{ hidden_size }} x 2 matrix + 2 bias
 //
 // AUTO-GENERATED by generate_weights.py using Jinja2.
 // Do not edit this file manually.
@@ -11,34 +11,57 @@
 /// Number of devices with hardcoded weights.
 pub const NUM_DEVICES: usize = {{ num_devices }};
 
+/// Hidden layer size (number of neurons).
+pub const HIDDEN_SIZE: usize = {{ hidden_size }};
+
 {% for dev in range(num_devices) %}
-/// Hidden layer weights for device {{ dev }}: 31 inputs -> 256 neurons
-pub static HIDDEN_WEIGHTS_{{ dev }}: [[f32; 256]; 31] = [
+/// Hidden layer weights for device {{ dev }}: 31 inputs -> {{ hidden_size }} neurons
+pub static HIDDEN_WEIGHTS_{{ dev }}: [[f32; {{ hidden_size }}]; 31] = [
 {% for row in hidden_weights[dev] %}
     [{{ row | join(', ') }}],
 {% endfor %}
 ];
 
+/// Hidden layer bias for device {{ dev }}
+pub static HIDDEN_BIAS_{{ dev }}: [f32; {{ hidden_size }}] = [{{ hidden_biases[dev] | join(', ') }}];
+
 {% endfor %}
 {% for dev in range(num_devices) %}
-/// Output layer weights for device {{ dev }}: 256 neurons -> 2 classes
-pub static OUTPUT_WEIGHTS_{{ dev }}: [[f32; 2]; 256] = [
+/// Output layer weights for device {{ dev }}: {{ hidden_size }} neurons -> 2 classes
+pub static OUTPUT_WEIGHTS_{{ dev }}: [[f32; 2]; {{ hidden_size }}] = [
 {% for row in output_weights[dev] %}
     [{{ row | join(', ') }}],
 {% endfor %}
 ];
 
+/// Output layer bias for device {{ dev }}
+pub static OUTPUT_BIAS_{{ dev }}: [f32; 2] = [{{ output_biases[dev] | join(', ') }}];
+
 {% endfor %}
 /// All hidden layer weights indexed by device.
-pub static HIDDEN_WEIGHTS: [&[[f32; 256]; 31]; NUM_DEVICES] = [
+pub static HIDDEN_WEIGHTS: [&[[f32; {{ hidden_size }}]; 31]; NUM_DEVICES] = [
 {% for dev in range(num_devices) %}
     &HIDDEN_WEIGHTS_{{ dev }},
 {% endfor %}
 ];
 
+/// All hidden layer biases indexed by device.
+pub static HIDDEN_BIASES: [&[f32; {{ hidden_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &HIDDEN_BIAS_{{ dev }},
+{% endfor %}
+];
+
 /// All output layer weights indexed by device.
-pub static OUTPUT_WEIGHTS: [&[[f32; 2]; 256]; NUM_DEVICES] = [
+pub static OUTPUT_WEIGHTS: [&[[f32; 2]; {{ hidden_size }}]; NUM_DEVICES] = [
 {% for dev in range(num_devices) %}
     &OUTPUT_WEIGHTS_{{ dev }},
 {% endfor %}
 ];
+
+/// All output layer biases indexed by device.
+pub static OUTPUT_BIASES: [&[f32; 2]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &OUTPUT_BIAS_{{ dev }},
+{% endfor %}
+];
diff --git a/kernel/comps/raid/src/selection_policies.rs b/kernel/comps/raid/src/selection_policies.rs
index 1964e06aa..277806396 100644
--- a/kernel/comps/raid/src/selection_policies.rs
+++ b/kernel/comps/raid/src/selection_policies.rs
@@ -11,14 +11,11 @@ use aster_block::{
 };
 use ostd::{
     Error,
-    orpc::{
-        oqueue::{OQueueBase, ObservationQuery},
-        orpc_server,
-    },
+    orpc::orpc_server,
     sync::Mutex,
 };
 
-use crate::server_traits::{ObservableBlockDevice, SelectionPolicy};
+use crate::server_traits::SelectionPolicy;
 
 #[derive(Debug)]
 #[orpc_server]
@@ -73,11 +70,12 @@ impl SelectionPolicy for RoundRobinPolicy {
     }
 }
 
-/// hidden_layers and output_layers: machine learning model weights.
+/// hidden_layers, hidden_biases, output_layers, output_biases: machine learning model weights.
 /// There is one model per device. Each model contains three layers, an input layer,
 /// a hidden layer with 256 neurons, and an output layer with 2 neurons for the binary
-/// classification (fast/slow). Thus, there are two matrices per device, a 31*256 matrix
-/// for the hidden layer and a 256*2 matrix for the output layer.
+/// classification (fast/slow). Thus, there are two weight matrices and two bias vectors
+/// per device: a 31x256 matrix + 256 bias for the hidden layer and a 256x2 matrix + 2
+/// bias for the output layer.
 /// Each latency number is decomposed into 4 digits, and each number of outstanding
 /// request number is decomposed into 3 digits. Thus, the total number of input features
 /// is 3+4*(3+4) = 31. The number of history is R=4.
@@ -87,10 +85,12 @@ impl SelectionPolicy for RoundRobinPolicy {
 #[orpc_server]
 pub struct LinnOSPolicy {
     read_cursor: AtomicUsize,
-    members: Vec<Arc<dyn ObservableBlockDevice>>,
+    members: Vec<Arc<dyn BlockDevice>>,
     observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
     hidden_layers: Vec<[[f32; 256]; 31]>,
+    hidden_biases: Vec<[f32; 256]>,
     output_layers: Vec<[[f32; 2]; 256]>,
+    output_biases: Vec<[f32; 2]>,
 }
 
 impl core::fmt::Debug for LinnOSPolicy {
@@ -107,30 +107,23 @@ impl core::fmt::Debug for LinnOSPolicy {
 }
 
 impl LinnOSPolicy {
-    pub fn new(members: Vec<Arc<dyn ObservableBlockDevice>>) -> Result<Arc<Self>, Error> {
-        use crate::linnos_weights::{HIDDEN_WEIGHTS, OUTPUT_WEIGHTS};
+    pub fn new(
+        members: Vec<Arc<dyn BlockDevice>>,
+        observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
+    ) -> Result<Arc<Self>, Error> {
+        use crate::linnos_weights::{HIDDEN_BIASES, HIDDEN_WEIGHTS, OUTPUT_BIASES, OUTPUT_WEIGHTS};
 
         let num_devices = members.len();
 
         // Copy hardcoded weights into Vecs, one entry per device
         let hidden_layers: Vec<[[f32; 256]; 31]> =
             (0..num_devices).map(|i| *HIDDEN_WEIGHTS[i]).collect();
+        let hidden_biases: Vec<[f32; 256]> =
+            (0..num_devices).map(|i| *HIDDEN_BIASES[i]).collect();
         let output_layers: Vec<[[f32; 2]; 256]> =
             (0..num_devices).map(|i| *OUTPUT_WEIGHTS[i]).collect();
-
-        // Attach one weak observer per device, each peeking 4 steps in the history.
-        // Wrapped in Mutex because WeakObserver is Send but not Sync.
-        let observers: Vec<_> = members
-            .iter()
-            .map(|device| {
-                Mutex::new(
-                    device
-                        .bio_completion_oqueue()
-                        .attach_weak_observer(4, ObservationQuery::identity())
-                        .expect("Failed to attach weak observer to bio_completion_oqueue"),
-                )
-            })
-            .collect();
+        let output_biases: Vec<[f32; 2]> =
+            (0..num_devices).map(|i| *OUTPUT_BIASES[i]).collect();
 
         let server = Self::new_with(|orpc_internal, _| Self {
             orpc_internal,
@@ -138,7 +131,9 @@ impl LinnOSPolicy {
             members,
             observers,
             hidden_layers,
+            hidden_biases,
             output_layers,
+            output_biases,
         });
 
         Ok(server)
@@ -159,14 +154,19 @@ impl SelectionPolicy for LinnOSPolicy {
                 .expect("Failed to observe completion trace");
 
             // Build the 31-element input feature vector:
-            //   [0..3]:  current outstanding requests (3 digits, from most recent trace)
+            //   [0..3]:  current outstanding pages (3 digits, from most recent trace)
             //   For each history step i (0..4):
-            //     [3+i*7 .. 3+i*7+3]: outstanding requests (3 digits)
+            //     [3+i*7 .. 3+i*7+3]: outstanding pages (3 digits)
             //     [3+i*7+3 .. 3+i*7+7]: latency in microseconds (4 digits)
             let mut input = [0.0f32; 31];
 
-            // Current outstanding requests: use most recent trace entry, decompose into 3 digits
-            let current_outstanding = submitted.num_outstanding_requests().unwrap_or(0);
+            // Current outstanding pages: use most recent trace entry, decompose into 3 digits
+            let current_outstanding = completion_trace
+                .iter()
+                .flatten()
+                .next()
+                .map(|t| t.outstanding_pages as usize)
+                .unwrap_or(0);
             input[0] = ((current_outstanding / 100) % 10) as f32;
             input[1] = ((current_outstanding / 10) % 10) as f32;
             input[2] = (current_outstanding % 10) as f32;
@@ -177,11 +177,11 @@ impl SelectionPolicy for LinnOSPolicy {
                 let Some(trace_entry) = trace_entry else {
                     continue;
                 };
-                let outstanding = trace_entry.outstanding_requests as usize;
+                let outstanding = trace_entry.outstanding_pages as usize;
                 let latency_us = trace_entry.latency_us as usize;
                 let base = 3 + i * 7;
 
-                // Outstanding requests -> 3 digits (hundreds, tens, ones)
+                // Outstanding pages -> 3 digits (hundreds, tens, ones)
                 input[base] = ((outstanding / 100) % 10) as f32;
                 input[base + 1] = ((outstanding / 10) % 10) as f32;
                 input[base + 2] = (outstanding % 10) as f32;
@@ -193,11 +193,12 @@ impl SelectionPolicy for LinnOSPolicy {
                 input[base + 6] = (latency_us % 10) as f32;
             }
 
-            // Hidden layer: input (31) x hidden_weights (31x256) -> hidden_out (256)
+            // Hidden layer: input (31) x hidden_weights (31x256) + bias (256) -> hidden_out (256)
             let hidden_weights = &self.hidden_layers[device_idx];
+            let hidden_bias = &self.hidden_biases[device_idx];
             let mut hidden_out = [0.0f32; 256];
             for j in 0..256 {
-                let mut sum = 0.0f32;
+                let mut sum = hidden_bias[j];
                 for i in 0..31 {
                     sum += input[i] * hidden_weights[i][j];
                 }
@@ -205,9 +206,10 @@ impl SelectionPolicy for LinnOSPolicy {
                 hidden_out[j] = if sum > 0.0 { sum } else { 0.0 };
             }
 
-            // Output layer: hidden_out (256) x output_weights (256x2) -> output (2)
+            // Output layer: hidden_out (256) x output_weights (256x2) + bias (2) -> output (2)
             let output_weights = &self.output_layers[device_idx];
-            let mut output = [0.0f32; 2];
+            let output_bias = &self.output_biases[device_idx];
+            let mut output = [output_bias[0], output_bias[1]];
             for k in 0..2 {
                 for j in 0..256 {
                     output[k] += hidden_out[j] * output_weights[j][k];
diff --git a/kernel/comps/virtio/src/device/block/device.rs b/kernel/comps/virtio/src/device/block/device.rs
index a7bea5a1e..610151008 100644
--- a/kernel/comps/virtio/src/device/block/device.rs
+++ b/kernel/comps/virtio/src/device/block/device.rs
@@ -184,7 +184,8 @@ impl aster_block::BlockDevice for BlockDevice {
 
         let mut bio = bio;
         let device_index = self.device.device_index.load(Ordering::Relaxed);
-        bio.prepare_enqueue(reply_handle, self.queue.clone(), device_index);
+        bio.prepare_enqueue(reply_handle, device_index);
+        self.device.inc_page_counter(bio.num_pages());
         let producer = self.bio_submission_oqueue().attach_value_producer()?;
         producer.produce(bio);
         Ok(())
@@ -209,6 +210,7 @@ struct DeviceInner {
     id_allocator: SpinLock<IdAlloc>,
     submitted_requests: SpinLock<BTreeMap<u16, SubmittedRequest>>,
     device_index: AtomicU64,
+    num_outstanding_pages: AtomicU64
 }
 
 impl DeviceInner {
@@ -258,6 +260,7 @@ impl DeviceInner {
             id_allocator: SpinLock::new(IdAlloc::with_capacity(Self::QUEUE_SIZE as usize)),
             submitted_requests: SpinLock::new(BTreeMap::new()),
             device_index: AtomicU64::new(u64::MAX),
+            num_outstanding_pages: AtomicU64::new(0)
         });
 
         let cloned_device = device.clone();
@@ -329,7 +332,11 @@ impl DeviceInner {
             complete_request.bio_request.bios().for_each(|bio| {
                 bio.complete(BioStatus::Complete);
                 #[cfg(not(baseline_asterinas))]
-                bio.report_statistics();
+                {
+                    let pages = bio.num_pages();
+                    let outstanding = self.num_outstanding_pages.fetch_sub(pages, Ordering::Relaxed) - pages;
+                    bio.report_statistics(outstanding);
+                }
             });
         }
     }
@@ -582,6 +589,10 @@ impl DeviceInner {
             return;
         }
     }
+
+    fn inc_page_counter(&self, n_pages: u64) {
+        self.num_outstanding_pages.fetch_add(n_pages, Ordering::Relaxed);
+    }
 }
 
 /// A submitted bio request for callback.
diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index 626ed373b..6f90778c7 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -34,6 +34,38 @@ use crate::{
     prelude::*,
 };
 
+#[cfg(not(baseline_asterinas))]
+use spin::Once;
+
+/// Global handle to the data capture file, set during `setup_data_capture`.
+#[cfg(not(baseline_asterinas))]
+static DATA_CAPTURE_FILE: Once<
+    Arc<dyn mariposa_data_capture::DataCaptureFile<aster_block::bio::BlockDeviceCompletionStats>>,
+> = Once::new();
+
+/// Flush all buffered capture data to disk. Call before kernel exit.
+///
+/// Commands are enqueued into the server's OQueue and processed in FIFO order.
+/// `stop()` spins until the server thread acknowledges, so by the time it returns,
+/// the preceding `flush_all` and `sync` are guaranteed to have been processed.
+#[cfg(not(baseline_asterinas))]
+pub fn flush_data_capture() {
+    if let Some(capture_file) = DATA_CAPTURE_FILE.get() {
+        info!("[capture] Flushing all capture data before exit...");
+        if let Err(e) = capture_file.flush_all() {
+            error!("[capture] flush_all failed: {:?}", e);
+        }
+        if let Err(e) = capture_file.sync() {
+            error!("[capture] sync failed: {:?}", e);
+        }
+        // stop() blocks until the server thread processes all preceding commands.
+        if let Err(e) = capture_file.stop() {
+            error!("[capture] stop failed: {:?}", e);
+        }
+        info!("[capture] Capture data flushed.");
+    }
+}
+
 /// Start a thread of the block device to pop requests from the block device's
 /// request queue and process them if there are any. If the request queue is empty,
 /// the thread will wait until there is a request in the queue.
@@ -146,13 +178,32 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     }
 
     // #[cfg(not(baseline_asterinas))]
-    setup_data_capture(&members, RAID_MEMBER_NAMES);
+    // setup_data_capture(&members, RAID_MEMBER_NAMES);
 
     #[cfg(not(baseline_asterinas))]
     info!("[raid] creating selection policy");
-    #[cfg(not(baseline_asterinas))]
+    // #[cfg(not(baseline_asterinas))]
     let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
     #[cfg(not(baseline_asterinas))]
+    let observers = members
+        .iter()
+        .map(|dev| {
+            use aster_virtio::device::block::server_traits::BlockIOObservable;
+            use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
+            let virtio_dev = dev
+                .downcast_ref::<VirtIoBlockDevice>()
+                .expect("RAID member must be a VirtIoBlockDevice for LinnOS");
+            ostd::sync::Mutex::new(
+                virtio_dev
+                    .bio_completion_oqueue()
+                    .attach_weak_observer(4, ObservationQuery::identity())
+                    .expect("Failed to attach weak observer to bio_completion_oqueue"),
+            )
+        })
+        .collect();
+    #[cfg(not(baseline_asterinas))]
+    let selection_policy = LinnOSPolicy::new(members.clone(), observers).unwrap();
+    #[cfg(not(baseline_asterinas))]
     let raid1device = Raid1Device::init(raid_device_name, members, selection_policy);
     #[cfg(baseline_asterinas)]
     let raid1device = Raid1Device::init(raid_device_name, members);
@@ -275,23 +326,8 @@ fn setup_data_capture(
         error!("[capture] failed to enable capturing: {:?}", e);
     }
 
-    // Spawn a timer task that sends TimedFlush every 10 seconds to trigger
-    // a flush if data has been idle for that long.
-    let capture_file_for_timer = capture_file.clone();
-    crate::ThreadOptions::new(move || {
-        use core::time::Duration;
-        use ostd::timer::Jiffies;
-        loop {
-            let target = Jiffies::elapsed().as_duration() + Duration::from_secs(5);
-            while Jiffies::elapsed().as_duration() < target {
-                ostd::task::Task::yield_now();
-            }
-            if let Err(e) = capture_file_for_timer.timed_flush() {
-                log::error!("[capture] timed_flush failed: {:?}", e);
-            }
-        }
-    })
-    .spawn();
+    // Store the capture file globally so it can be flushed on kernel exit.
+    DATA_CAPTURE_FILE.call_once(|| capture_file);
 
     info!("[capture] data capture enabled for bio completion stats");
 }
diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs
index 85b14c3fb..77d4110f1 100644
--- a/kernel/src/lib.rs
+++ b/kernel/src/lib.rs
@@ -221,6 +221,10 @@ fn init_thread() {
         ostd::task::halt_cpu();
     }
 
+    // Flush all capture data before exiting.
+    #[cfg(not(baseline_asterinas))]
+    fs::flush_data_capture();
+
     // TODO: exit via qemu isa debug device should not be the only way.
     let exit_code = if initproc.status().exit_code() == 0 {
         QemuExitCode::Success

From 0f8bb74c22528d816a32f81e616c6ce17e51712e Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Mon, 6 Apr 2026 03:21:01 +0000
Subject: [PATCH 10/22] Convert RAID1 write to asynchronous.

---
 kernel/comps/raid/src/lib.rs                  | 65 ++++++++++++++++++-
 .../comps/virtio/src/device/block/device.rs   |  3 +
 2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/kernel/comps/raid/src/lib.rs b/kernel/comps/raid/src/lib.rs
index 0ba584c48..666127f51 100644
--- a/kernel/comps/raid/src/lib.rs
+++ b/kernel/comps/raid/src/lib.rs
@@ -152,7 +152,7 @@ impl Raid1Device {
         // log::info!("Raid1Device process request, type: {:?}", request.type_());
         match request.type_() {
             BioType::Read => self.process_read_async(request),
-            BioType::Write => self.process_write(request),
+            BioType::Write => self.process_write_async(request),
             BioType::Flush => self.process_flush(request),
             BioType::Discard => self.process_discard(request),
         }
@@ -277,6 +277,7 @@ impl Raid1Device {
 
     /// Processes write requests by fanning out to all mirrors and aggregating
     /// the results (all must succeed).
+    #[expect(dead_code)]
     fn process_write(&self, request: BioRequest) {
         for parent in request.bios() {
             // Submit the same write to all members.
@@ -288,6 +289,68 @@ impl Raid1Device {
         }
     }
 
+    /// Processes write requests asynchronously by fanning out to all mirrors.
+    ///
+    /// Each child BIO carries a callback that atomically decrements a shared
+    /// counter. The last callback to fire (or the dispatch thread on submission
+    /// failure) completes the parent. Any failed member marks the write as
+    /// `IoError`; all members must succeed for `Complete` to be reported.
+    fn process_write_async(&self, request: BioRequest) {
+        use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+        use ostd::sync::{LocalIrqDisabled, SpinLock};
+
+        for parent in request.into_bios() {
+            let n = self.members.len();
+            let remaining = Arc::new(AtomicUsize::new(n));
+            let had_error = Arc::new(AtomicBool::new(false));
+
+            // Extract before moving parent into the guard.
+            let start_sid = parent.sid_range().start;
+            let segments = parent.segments().to_vec();
+            let guard = Arc::new(SpinLock::<_, LocalIrqDisabled>::new(Some(ParentGuard::new(parent))));
+
+            for member in &self.members {
+                let remaining_cb = remaining.clone();
+                let had_error_cb = had_error.clone();
+                let guard_cb = guard.clone();
+                let remaining_err = remaining.clone();
+                let had_error_err = had_error.clone();
+                let guard_err = guard.clone();
+                let member = member.clone();
+
+                let child = Bio::new_with_closure(
+                    BioType::Write,
+                    start_sid,
+                    segments.clone(),
+                    move |child_bio: &SubmittedBio| {
+                        if child_bio.status() != BioStatus::Complete {
+                            had_error_cb.store(true, Ordering::Release);
+                        }
+                        if remaining_cb.fetch_sub(1, Ordering::AcqRel) == 1 {
+                            let status = if had_error_cb.load(Ordering::Acquire) {
+                                BioStatus::IoError
+                            } else {
+                                BioStatus::Complete
+                            };
+                            if let Some(g) = guard_cb.lock().take() {
+                                g.complete(status);
+                            }
+                        }
+                    },
+                );
+
+                if member.submit(child).is_err() {
+                    had_error_err.store(true, Ordering::Release);
+                    if remaining_err.fetch_sub(1, Ordering::AcqRel) == 1 {
+                        if let Some(g) = guard_err.lock().take() {
+                            g.complete(BioStatus::IoError);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
     /// Propagates a flush to all members and completes after they finish.
     fn process_flush(&self, request: BioRequest) {
         for parent in request.bios() {
diff --git a/kernel/comps/virtio/src/device/block/device.rs b/kernel/comps/virtio/src/device/block/device.rs
index 610151008..16f70268d 100644
--- a/kernel/comps/virtio/src/device/block/device.rs
+++ b/kernel/comps/virtio/src/device/block/device.rs
@@ -186,6 +186,7 @@ impl aster_block::BlockDevice for BlockDevice {
         let device_index = self.device.device_index.load(Ordering::Relaxed);
         bio.prepare_enqueue(reply_handle, device_index);
         self.device.inc_page_counter(bio.num_pages());
+        // log::info!("\x1b[32mIncremented\x1b[0m Page Counter by {}, new value: {}, device_index: {}, type: {:?}", bio.num_pages(), self.device.num_outstanding_pages.load(Ordering::Relaxed), device_index, bio.type_());
         let producer = self.bio_submission_oqueue().attach_value_producer()?;
         producer.produce(bio);
         Ok(())
@@ -329,12 +330,14 @@ impl DeviceInner {
             }
 
             // Completes the bio request
+            // let req_type = complete_request.bio_request.type_();
             complete_request.bio_request.bios().for_each(|bio| {
                 bio.complete(BioStatus::Complete);
                 #[cfg(not(baseline_asterinas))]
                 {
                     let pages = bio.num_pages();
                     let outstanding = self.num_outstanding_pages.fetch_sub(pages, Ordering::Relaxed) - pages;
+                    // log::info!("\x1b[31mDecremented\x1b[0m Page Counter by {}, new value: {}, device_index: {}, type: {:?}", pages, outstanding, self.device_index.load(Ordering::Relaxed), req_type);
                     bio.report_statistics(outstanding);
                 }
             });

From b13133151af251a2e096c88397d291c7c402317d Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Fri, 10 Apr 2026 23:33:48 +0000
Subject: [PATCH 11/22] IMPORTANT: fix oqueue panics by preventing the cursor
 underflow using wrapping_sub.

---
 ostd/src/orpc/oqueue/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ostd/src/orpc/oqueue/mod.rs b/ostd/src/orpc/oqueue/mod.rs
index 0b298f3a4..9db2e038c 100644
--- a/ostd/src/orpc/oqueue/mod.rs
+++ b/ostd/src/orpc/oqueue/mod.rs
@@ -599,7 +599,7 @@ impl Sub<usize> for Cursor {
     type Output = Cursor;
 
     fn sub(self, rhs: usize) -> Self::Output {
-        Cursor(self.0 - rhs)
+        Cursor(self.0.wrapping_sub(rhs))
     }
 }
 

From 0bd648dd74c37f60987102bfb28e255af876462a Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Sat, 11 Apr 2026 01:39:59 +0000
Subject: [PATCH 12/22] Ignoring futex error for benchmark

---
 kernel/src/process/posix_thread/futex.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/src/process/posix_thread/futex.rs b/kernel/src/process/posix_thread/futex.rs
index 99015fb16..ddcadf4b2 100644
--- a/kernel/src/process/posix_thread/futex.rs
+++ b/kernel/src/process/posix_thread/futex.rs
@@ -478,7 +478,7 @@ impl FutexKey {
 
     pub fn load_val(&self, ctx: &Context) -> Result<i32> {
         // FIXME: how to implement a atomic load?
-        warn!("implement an atomic load");
+        // warn!("implement an atomic load");
         ctx.user_space().read_val(self.addr)
     }
 

From c46005e5e43055242ce0d8486b96a1511825d6cc Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Sat, 11 Apr 2026 02:59:24 +0000
Subject: [PATCH 13/22] Updated the LinnOS Weight Placeholder

---
 kernel/comps/raid/src/linnos_weights.rs | 26 +++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/kernel/comps/raid/src/linnos_weights.rs b/kernel/comps/raid/src/linnos_weights.rs
index 43472f64b..d07ea21c2 100644
--- a/kernel/comps/raid/src/linnos_weights.rs
+++ b/kernel/comps/raid/src/linnos_weights.rs
@@ -17,25 +17,51 @@ pub const NUM_DEVICES: usize = 3;
 /// Hidden layer weights for device 0: 31 inputs -> 256 neurons
 pub static HIDDEN_WEIGHTS_0: [[f32; 256]; 31] = [[0.0; 256]; 31];
 
+/// Hidden layer bias for device 0
+pub static HIDDEN_BIAS_0: [f32; 256] = [0.0; 256];
+
 /// Hidden layer weights for device 1
 pub static HIDDEN_WEIGHTS_1: [[f32; 256]; 31] = [[0.0; 256]; 31];
 
+/// Hidden layer bias for device 1
+pub static HIDDEN_BIAS_1: [f32; 256] = [0.0; 256];
+
 /// Hidden layer weights for device 2
 pub static HIDDEN_WEIGHTS_2: [[f32; 256]; 31] = [[0.0; 256]; 31];
 
+/// Hidden layer bias for device 2
+pub static HIDDEN_BIAS_2: [f32; 256] = [0.0; 256];
+
 /// Output layer weights for device 0: 256 neurons -> 2 classes
 pub static OUTPUT_WEIGHTS_0: [[f32; 2]; 256] = [[0.0; 2]; 256];
 
+/// Output layer bias for device 0
+pub static OUTPUT_BIAS_0: [f32; 2] = [0.0; 2];
+
 /// Output layer weights for device 1
 pub static OUTPUT_WEIGHTS_1: [[f32; 2]; 256] = [[0.0; 2]; 256];
 
+/// Output layer bias for device 1
+pub static OUTPUT_BIAS_1: [f32; 2] = [0.0; 2];
+
 /// Output layer weights for device 2
 pub static OUTPUT_WEIGHTS_2: [[f32; 2]; 256] = [[0.0; 2]; 256];
 
+/// Output layer bias for device 2
+pub static OUTPUT_BIAS_2: [f32; 2] = [0.0; 2];
+
 /// All hidden layer weights indexed by device.
 pub static HIDDEN_WEIGHTS: [&[[f32; 256]; 31]; NUM_DEVICES] =
     [&HIDDEN_WEIGHTS_0, &HIDDEN_WEIGHTS_1, &HIDDEN_WEIGHTS_2];
 
+/// All hidden layer biases indexed by device.
+pub static HIDDEN_BIASES: [&[f32; 256]; NUM_DEVICES] =
+    [&HIDDEN_BIAS_0, &HIDDEN_BIAS_1, &HIDDEN_BIAS_2];
+
 /// All output layer weights indexed by device.
 pub static OUTPUT_WEIGHTS: [&[[f32; 2]; 256]; NUM_DEVICES] =
     [&OUTPUT_WEIGHTS_0, &OUTPUT_WEIGHTS_1, &OUTPUT_WEIGHTS_2];
+
+/// All output layer biases indexed by device.
+pub static OUTPUT_BIASES: [&[f32; 2]; NUM_DEVICES] =
+    [&OUTPUT_BIAS_0, &OUTPUT_BIAS_1, &OUTPUT_BIAS_2];

From 53f47e014aeabb9688e33e215b9c67d4d504ac51 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Sat, 11 Apr 2026 03:44:40 +0000
Subject: [PATCH 14/22] Remove linnos weights from tracking

---
 kernel/comps/raid/src/linnos_weights.rs | 67 -------------------------
 1 file changed, 67 deletions(-)
 delete mode 100644 kernel/comps/raid/src/linnos_weights.rs

diff --git a/kernel/comps/raid/src/linnos_weights.rs b/kernel/comps/raid/src/linnos_weights.rs
deleted file mode 100644
index d07ea21c2..000000000
--- a/kernel/comps/raid/src/linnos_weights.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: MPL-2.0
-
-// LinnOS neural network weights hardcoded for 3 devices.
-// Each device has:
-//   - hidden layer: 31 x 256 matrix
-//   - output layer: 256 x 2 matrix
-//
-// These weights will be filled in by a Python script with trained values.
-// For now, all weights are initialized to 0.0 as placeholders.
-//
-// The actual weights numbers are expected to be filled with the jinja2
-// templates by the Python scripts that trains the model.
-
-/// Number of devices with hardcoded weights.
-pub const NUM_DEVICES: usize = 3;
-
-/// Hidden layer weights for device 0: 31 inputs -> 256 neurons
-pub static HIDDEN_WEIGHTS_0: [[f32; 256]; 31] = [[0.0; 256]; 31];
-
-/// Hidden layer bias for device 0
-pub static HIDDEN_BIAS_0: [f32; 256] = [0.0; 256];
-
-/// Hidden layer weights for device 1
-pub static HIDDEN_WEIGHTS_1: [[f32; 256]; 31] = [[0.0; 256]; 31];
-
-/// Hidden layer bias for device 1
-pub static HIDDEN_BIAS_1: [f32; 256] = [0.0; 256];
-
-/// Hidden layer weights for device 2
-pub static HIDDEN_WEIGHTS_2: [[f32; 256]; 31] = [[0.0; 256]; 31];
-
-/// Hidden layer bias for device 2
-pub static HIDDEN_BIAS_2: [f32; 256] = [0.0; 256];
-
-/// Output layer weights for device 0: 256 neurons -> 2 classes
-pub static OUTPUT_WEIGHTS_0: [[f32; 2]; 256] = [[0.0; 2]; 256];
-
-/// Output layer bias for device 0
-pub static OUTPUT_BIAS_0: [f32; 2] = [0.0; 2];
-
-/// Output layer weights for device 1
-pub static OUTPUT_WEIGHTS_1: [[f32; 2]; 256] = [[0.0; 2]; 256];
-
-/// Output layer bias for device 1
-pub static OUTPUT_BIAS_1: [f32; 2] = [0.0; 2];
-
-/// Output layer weights for device 2
-pub static OUTPUT_WEIGHTS_2: [[f32; 2]; 256] = [[0.0; 2]; 256];
-
-/// Output layer bias for device 2
-pub static OUTPUT_BIAS_2: [f32; 2] = [0.0; 2];
-
-/// All hidden layer weights indexed by device.
-pub static HIDDEN_WEIGHTS: [&[[f32; 256]; 31]; NUM_DEVICES] =
-    [&HIDDEN_WEIGHTS_0, &HIDDEN_WEIGHTS_1, &HIDDEN_WEIGHTS_2];
-
-/// All hidden layer biases indexed by device.
-pub static HIDDEN_BIASES: [&[f32; 256]; NUM_DEVICES] =
-    [&HIDDEN_BIAS_0, &HIDDEN_BIAS_1, &HIDDEN_BIAS_2];
-
-/// All output layer weights indexed by device.
-pub static OUTPUT_WEIGHTS: [&[[f32; 2]; 256]; NUM_DEVICES] =
-    [&OUTPUT_WEIGHTS_0, &OUTPUT_WEIGHTS_1, &OUTPUT_WEIGHTS_2];
-
-/// All output layer biases indexed by device.
-pub static OUTPUT_BIASES: [&[f32; 2]; NUM_DEVICES] =
-    [&OUTPUT_BIAS_0, &OUTPUT_BIAS_1, &OUTPUT_BIAS_2];

From fc00a08d3ded82160a832c73c8e1eb5156b5a45d Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Sat, 11 Apr 2026 06:58:35 +0000
Subject: [PATCH 15/22] Kept track of the number of outstanding pages in
 SubmittedBio, log this feature at the time the IO arrives, rather than
 completing, and corrected the current IO's pages feature.

---
 kernel/comps/block/src/bio.rs                 | 35 +++++++++++++++----
 kernel/comps/block/src/lib.rs                 |  5 +++
 kernel/comps/raid/src/lib.rs                  | 15 ++++----
 kernel/comps/raid/src/selection_policies.rs   | 30 ++++++++++------
 kernel/comps/raid/src/server_traits.rs        |  2 +-
 .../comps/virtio/src/device/block/device.rs   | 12 ++++---
 6 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/kernel/comps/block/src/bio.rs b/kernel/comps/block/src/bio.rs
index 54722b244..04d3ecef7 100644
--- a/kernel/comps/block/src/bio.rs
+++ b/kernel/comps/block/src/bio.rs
@@ -145,6 +145,7 @@ impl Bio {
 
         // enqueue to the block device
         // A SubmittedBio is created here from a Bio, and then pass down to the lower layers.
+        // Those empty fields will be set just before in the block_device.enqueue function in the prepare_enqueue function. 
         if let Err(e) = block_device.enqueue(SubmittedBio {
             bio_inner: self.0.clone(),
             #[cfg(not(baseline_asterinas))]
@@ -155,6 +156,8 @@ impl Bio {
             device_index: None,
             #[cfg(not(baseline_asterinas))]
             num_pages: None,
+            #[cfg(not(baseline_asterinas))]
+            outstanding_pages: None,
         }) {
             // Fail to submit, revert the status.
             let result = self.0.status.compare_exchange(
@@ -342,6 +345,9 @@ pub struct SubmittedBio {
 
     #[cfg(not(baseline_asterinas))]
     num_pages: Option<u64>,
+
+    #[cfg(not(baseline_asterinas))]
+    outstanding_pages: Option<u64>,
 }
 
 impl core::fmt::Debug for SubmittedBio {
@@ -355,7 +361,8 @@ impl core::fmt::Debug for SubmittedBio {
             .field(
                 "reply_handle",
                 &self.reply_handle.as_ref().map(|_| "<Producer>"),
-            );
+            )
+            .field("outstanding_pages", &self.outstanding_pages);
         d.finish()
     }
 }
@@ -371,10 +378,18 @@ impl SubmittedBio {
         self.bio_inner.sid_range()
     }
 
+    /// an immutable version of the num_pages function. Panic if the num_pages field is not set yet.
+    pub fn get_num_pages(&self) -> u64 {
+        self.num_pages.expect("num_pages is not set yet")
+    }
+
     /// Returns the number of 4KB pages covered by this bio's sector range.
-    pub fn num_pages(&self) -> u64 {
-        let sectors = self.bio_inner.sid_range().end.to_raw() - self.bio_inner.sid_range().start.to_raw();
-        (sectors + 7) / 8
+    /// Note the field num_pages is only available when calling this function, but accessing it directly is not available. 
+    pub fn num_pages(&mut self) -> u64 {
+        *self.num_pages.get_or_insert_with(|| {
+            let sectors = self.bio_inner.sid_range().end.to_raw() - self.bio_inner.sid_range().start.to_raw();
+            (sectors + 7) / 8
+        })
     }
 
     /// Returns the slice to the memory segments.
@@ -412,27 +427,33 @@ impl SubmittedBio {
         self.submission_time_us
     }
 
+    /// Argument:
+    /// - `num_pages`: The number of pages covered by this bio's sector range. This is used to update the outstanding page counter in the block device, and also used for performance statistics reporting.
+    /// - `outstanding_pages`: The number of outstanding pages on the fly before enqueing this bio request. 
     #[cfg(not(baseline_asterinas))]
     pub fn prepare_enqueue(
         &mut self,
         reply_handle: RefProducer<BlockDeviceCompletionStats>,
         device_index: u64,
+        outstanding_pages: u64
     ) {
+        
         self.reply_handle = Some(reply_handle);
         self.submission_time_us = Some(read_monotonic_time().as_micros() as u64);
         self.device_index = Some(device_index);
-        self.num_pages = Some(self.num_pages());
+        self.num_pages();  // set the num_pages field
+        self.outstanding_pages = Some(outstanding_pages + self.num_pages.unwrap());  // accumulate the number of outstanding pages
     }
 
     #[cfg(not(baseline_asterinas))]
-    pub fn report_statistics(&self, outstanding_pages: u64) {
+    pub fn report_statistics(&self) {
         self.reply_handle
             .as_ref()
             .unwrap()
             .try_produce_ref(&BlockDeviceCompletionStats {
                 latency_us: read_monotonic_time().as_micros() as u64
                     - self.submission_time_us.unwrap(),
-                outstanding_pages,
+                outstanding_pages: self.outstanding_pages.unwrap_or(u64::MAX),
                 device_index: self.device_index.unwrap_or(u64::MAX),
             });
     }
diff --git a/kernel/comps/block/src/lib.rs b/kernel/comps/block/src/lib.rs
index 9e89e4432..9d85f674f 100644
--- a/kernel/comps/block/src/lib.rs
+++ b/kernel/comps/block/src/lib.rs
@@ -58,6 +58,11 @@ pub trait BlockDevice: Send + Sync + Any + Debug {
 
     /// Returns the metadata of the block device.
     fn metadata(&self) -> BlockDeviceMeta;
+
+    /// Returns the number of outstanding pages for this device.
+    fn num_outstanding_pages(&self) -> u64 {
+        0
+    }
 }
 
 /// Metadata for a block device.
diff --git a/kernel/comps/raid/src/lib.rs b/kernel/comps/raid/src/lib.rs
index 666127f51..4c2c80ab0 100644
--- a/kernel/comps/raid/src/lib.rs
+++ b/kernel/comps/raid/src/lib.rs
@@ -208,21 +208,20 @@ impl Raid1Device {
     #[cfg(not(baseline_asterinas))]
     fn process_read(&self, request: BioRequest) {
         // Submit all children first to overlap device I/O.
-        let mut pending: alloc::vec::Vec<(&SubmittedBio, BioWaiter)> = alloc::vec::Vec::new();
+        let mut pending: alloc::vec::Vec<(SubmittedBio, BioWaiter)> = alloc::vec::Vec::new();
 
-        for parent in request.bios() {
-            let member = self.selection_policy.select_block_device(parent).unwrap();
+        for mut parent in request.into_bios() {
+            let member = self.selection_policy.select_block_device(&mut parent).unwrap();
             let child = Bio::new(
                 // Child BIO mirrors the parent’s type, range, and buffers.
                 BioType::Read,
                 parent.sid_range().start,
-                Self::clone_segments(parent),
+                Self::clone_segments(&parent),
                 None,
             );
             match child.submit(&*member) {
                 Ok(waiter) => pending.push((parent, waiter)),
-                // Err(_) => parent.complete(BioStatus::IoError),
-                Err(_) => todo!("Failed to submit child BIO, Don't know what to do"),
+                Err(_) => todo!("Failed to submit child BIO, Don’t know what to do"),
             }
         }
 
@@ -245,9 +244,9 @@ impl Raid1Device {
     /// member by the selection policy (device 0 if asterinas baseline) and submitted with `Bio::submit` to overlap device
     /// I/O. Completion of the parent is reported after the child finishes.    
     fn process_read_async(&self, request: BioRequest) {
-        for parent in request.into_bios() {
+        for mut parent in request.into_bios() {
             #[cfg(not(baseline_asterinas))]
-            let member = self.selection_policy.select_block_device(&parent).unwrap();
+            let member = self.selection_policy.select_block_device(&mut parent).unwrap();
 
             #[cfg(baseline_asterinas)]
             let member = self.members[0].clone();
diff --git a/kernel/comps/raid/src/selection_policies.rs b/kernel/comps/raid/src/selection_policies.rs
index 277806396..57b44053c 100644
--- a/kernel/comps/raid/src/selection_policies.rs
+++ b/kernel/comps/raid/src/selection_policies.rs
@@ -36,7 +36,7 @@ impl Dummy0Policy {
 impl SelectionPolicy for Dummy0Policy {
     fn select_block_device(
         &self,
-        _submitted: &SubmittedBio,
+        _submitted: &mut SubmittedBio,
     ) -> Result<Arc<dyn BlockDevice>, Error> {
         Ok(self.members[0].clone())
     }
@@ -63,7 +63,7 @@ impl RoundRobinPolicy {
 impl SelectionPolicy for RoundRobinPolicy {
     fn select_block_device(
         &self,
-        _submitted: &SubmittedBio,
+        _submitted: &mut SubmittedBio,
     ) -> Result<Arc<dyn BlockDevice>, Error> {
         let idx = self.read_cursor.fetch_add(1, Ordering::Relaxed);
         Ok(self.members[idx % self.members.len()].clone())
@@ -141,9 +141,10 @@ impl LinnOSPolicy {
 }
 
 impl SelectionPolicy for LinnOSPolicy {
-    fn select_block_device(&self, submitted: &SubmittedBio) -> Result<Arc<dyn BlockDevice>, Error> {
+    fn select_block_device(&self, submitted: &mut SubmittedBio) -> Result<Arc<dyn BlockDevice>, Error> {
         let num_devices = self.members.len();
         let mut fail_cnt = 0;
+        let num_pages = submitted.num_pages();
 
         loop {
             let idx = self.read_cursor.fetch_add(1, Ordering::Relaxed);
@@ -161,18 +162,14 @@ impl SelectionPolicy for LinnOSPolicy {
             let mut input = [0.0f32; 31];
 
             // Current outstanding pages: use most recent trace entry, decompose into 3 digits
-            let current_outstanding = completion_trace
-                .iter()
-                .flatten()
-                .next()
-                .map(|t| t.outstanding_pages as usize)
-                .unwrap_or(0);
+            let current_outstanding = num_pages as usize + self.members[device_idx].num_outstanding_pages() as usize;
             input[0] = ((current_outstanding / 100) % 10) as f32;
             input[1] = ((current_outstanding / 10) % 10) as f32;
             input[2] = (current_outstanding % 10) as f32;
 
             // Feature Engineering in LinnOS: Decompose numbers into digits.
             // Historical features: 4 steps, each with 3 digits outstanding + 4 digits latency
+            let mut observed: [(usize, usize); 4] = [(0, 0); 4];
             for (i, trace_entry) in completion_trace.iter().enumerate().take(4) {
                 let Some(trace_entry) = trace_entry else {
                     continue;
@@ -181,6 +178,8 @@ impl SelectionPolicy for LinnOSPolicy {
                 let latency_us = trace_entry.latency_us as usize;
                 let base = 3 + i * 7;
 
+                observed[i] = (outstanding, latency_us);
+
                 // Outstanding pages -> 3 digits (hundreds, tens, ones)
                 input[base] = ((outstanding / 100) % 10) as f32;
                 input[base + 1] = ((outstanding / 10) % 10) as f32;
@@ -193,6 +192,13 @@ impl SelectionPolicy for LinnOSPolicy {
                 input[base + 6] = (latency_us % 10) as f32;
             }
 
+            // log::info!(
+            //     "LinnOS dev={} cur_outstanding={} outstanding=[{},{},{},{}] latency_us=[{},{},{},{}]",
+            //     device_idx, current_outstanding,
+            //     observed[0].0, observed[1].0, observed[2].0, observed[3].0,
+            //     observed[0].1, observed[1].1, observed[2].1, observed[3].1,
+            // );
+
             // Hidden layer: input (31) x hidden_weights (31x256) + bias (256) -> hidden_out (256)
             let hidden_weights = &self.hidden_layers[device_idx];
             let hidden_bias = &self.hidden_biases[device_idx];
@@ -216,8 +222,9 @@ impl SelectionPolicy for LinnOSPolicy {
                 }
             }
 
-            // Argmax: output[0] > output[1] means fast, otherwise slow
-            if output[0] > output[1] {
+            // Argmax: output[0] < output[1] means fast, otherwise slow
+            if output[0] < output[1] {
+                log::info!("Submitting to device {} predicted FAST. output=[{:.4},{:.4}]", device_idx, output[0], output[1]);
                 return Ok(self.members[device_idx].clone());
             }
 
@@ -225,6 +232,7 @@ impl SelectionPolicy for LinnOSPolicy {
             // All devices predicted slow -- fall back to round-robin
             if fail_cnt >= num_devices {
                 let fallback_idx = self.read_cursor.fetch_add(1, Ordering::Relaxed) % num_devices;
+                log::info!("Submitting to device {} as all devices are busy. output=[{:.4},{:.4}]", fallback_idx, output[0], output[1]);
                 return Ok(self.members[fallback_idx].clone());
             }
         }
diff --git a/kernel/comps/raid/src/server_traits.rs b/kernel/comps/raid/src/server_traits.rs
index f390cd927..7515d5b96 100644
--- a/kernel/comps/raid/src/server_traits.rs
+++ b/kernel/comps/raid/src/server_traits.rs
@@ -26,5 +26,5 @@ pub trait SelectionPolicy: Debug {
     /// Get the block device to read from. The policy cannot decide, for whatever reason, this should
     /// return an error. The caller will use some fallback. If the returned block device does not
     /// exist, then the caller will also fallback.
-    fn select_block_device(&self, submitted: &SubmittedBio) -> Result<Arc<dyn BlockDevice>, Error>;
+    fn select_block_device(&self, submitted: &mut SubmittedBio) -> Result<Arc<dyn BlockDevice>, Error>;
 }
diff --git a/kernel/comps/virtio/src/device/block/device.rs b/kernel/comps/virtio/src/device/block/device.rs
index 16f70268d..d420034ce 100644
--- a/kernel/comps/virtio/src/device/block/device.rs
+++ b/kernel/comps/virtio/src/device/block/device.rs
@@ -184,7 +184,7 @@ impl aster_block::BlockDevice for BlockDevice {
 
         let mut bio = bio;
         let device_index = self.device.device_index.load(Ordering::Relaxed);
-        bio.prepare_enqueue(reply_handle, device_index);
+        bio.prepare_enqueue(reply_handle, device_index, self.device.num_outstanding_pages.load(Ordering::Relaxed));
         self.device.inc_page_counter(bio.num_pages());
         // log::info!("\x1b[32mIncremented\x1b[0m Page Counter by {}, new value: {}, device_index: {}, type: {:?}", bio.num_pages(), self.device.num_outstanding_pages.load(Ordering::Relaxed), device_index, bio.type_());
         let producer = self.bio_submission_oqueue().attach_value_producer()?;
@@ -198,6 +198,10 @@ impl aster_block::BlockDevice for BlockDevice {
             nr_sectors: self.device.config_manager.capacity_sectors(),
         }
     }
+
+    fn num_outstanding_pages(&self) -> u64 {
+        self.device.num_outstanding_pages.load(Ordering::Relaxed)
+    }
 }
 
 #[derive(Debug)]
@@ -335,10 +339,10 @@ impl DeviceInner {
                 bio.complete(BioStatus::Complete);
                 #[cfg(not(baseline_asterinas))]
                 {
-                    let pages = bio.num_pages();
-                    let outstanding = self.num_outstanding_pages.fetch_sub(pages, Ordering::Relaxed) - pages;
+                    let pages = bio.get_num_pages();
+                    let outstanding = self.num_outstanding_pages.fetch_sub(pages, Ordering::Relaxed);
                     // log::info!("\x1b[31mDecremented\x1b[0m Page Counter by {}, new value: {}, device_index: {}, type: {:?}", pages, outstanding, self.device_index.load(Ordering::Relaxed), req_type);
-                    bio.report_statistics(outstanding);
+                    bio.report_statistics();
                 }
             });
         }

From c6ccfaa60af6d5d8f544a6767efc8baacfccdc9f Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Sat, 11 Apr 2026 20:47:08 +0000
Subject: [PATCH 16/22] Added LinnOS Plus

---
 .../raid/src/generate_linnos_plus_weights.py  | 147 ++++++++++++++++
 kernel/comps/raid/src/lib.rs                  |   2 +
 .../comps/raid/src/linnos_plus_weights.rs.j2  |  97 ++++++++++
 kernel/comps/raid/src/selection_policies.rs   | 165 ++++++++++++++++++
 4 files changed, 411 insertions(+)
 create mode 100644 kernel/comps/raid/src/generate_linnos_plus_weights.py
 create mode 100644 kernel/comps/raid/src/linnos_plus_weights.rs.j2

diff --git a/kernel/comps/raid/src/generate_linnos_plus_weights.py b/kernel/comps/raid/src/generate_linnos_plus_weights.py
new file mode 100644
index 000000000..2a6da1987
--- /dev/null
+++ b/kernel/comps/raid/src/generate_linnos_plus_weights.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MPL-2.0
+
+"""
+Load trained PyTorch LinnOSPlus models and generate the Rust weights file
+using the Jinja2 template.
+
+The LinnOSPlus architecture has three linear layers:
+    Linear(31, 8) -> ReLU -> Linear(8, 8) -> ReLU -> Linear(8, 2)
+
+PyTorch state dict keys:
+    net.0.weight [8, 31]   net.0.bias [8]
+    net.2.weight [8, 8]    net.2.bias [8]
+    net.4.weight [2, 8]    net.4.bias [2]
+
+Usage:
+    python generate_linnos_plus_weights.py \
+        --models models/linnos_plus_device0.pt \
+                 models/linnos_plus_device1.pt \
+                 models/linnos_plus_device2.pt \
+        --template kernel/comps/raid/src/linnos_plus_weights.rs.j2 \
+        --output   kernel/comps/raid/src/linnos_plus_weights.rs
+
+Run from the repository root.
+"""
+
+import argparse
+from pathlib import Path
+
+import torch
+from jinja2 import Environment, FileSystemLoader
+
+
+def load_model(path: str) -> dict:
+    """Load a model checkpoint and return its state dict."""
+    state = torch.load(path, map_location="cpu", weights_only=False)
+    return state
+
+
+def print_architecture(state: dict, device_idx: int) -> None:
+    """Print model architecture for sanity check."""
+    print(f"  Device {device_idx}:")
+    for name, tensor in state.items():
+        print(f"    {name:20s}  shape={str(list(tensor.shape)):16s}  dtype={tensor.dtype}")
+
+
+def tensor_to_list(tensor: torch.Tensor) -> list:
+    """Convert a tensor to a nested Python list of floats."""
+    return tensor.tolist()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate LinnOSPlus Rust weight file from PyTorch models"
+    )
+    parser.add_argument(
+        "--models", nargs="+", required=True,
+        help="Paths to .pt model files, one per device in order",
+    )
+    parser.add_argument(
+        "--template", required=True,
+        help="Path to the Jinja2 template (.rs.j2)",
+    )
+    parser.add_argument(
+        "--output", required=True,
+        help="Path for the generated Rust file (.rs)",
+    )
+    args = parser.parse_args()
+
+    # Load all models
+    models = []
+    for path in args.models:
+        models.append(load_model(path))
+
+    num_devices = len(models)
+
+    # Sanity check: print architecture
+    print(f"Loaded {num_devices} model(s).\n")
+    print("Model architecture:")
+    for i, state in enumerate(models):
+        print_architecture(state, i)
+    print()
+
+    # Extract dimensions from the first model
+    # net.0: Linear(31, hidden1_size)
+    # net.2: Linear(hidden1_size, hidden2_size)
+    # net.4: Linear(hidden2_size, 2)
+    hidden1_size = models[0]["net.0.weight"].shape[0]
+    input_size = models[0]["net.0.weight"].shape[1]
+    hidden2_size = models[0]["net.2.weight"].shape[0]
+    output_size = models[0]["net.4.weight"].shape[0]
+
+    print(f"Network: {input_size} -> {hidden1_size} (ReLU) -> {hidden2_size} (ReLU) -> {output_size}")
+    print()
+
+    # Extract weights and biases for each device
+    # PyTorch stores weights as [out_features, in_features].
+    # In Rust we index as weights[input][output], so we transpose.
+    hidden1_weights = []
+    hidden1_biases = []
+    hidden2_weights = []
+    hidden2_biases = []
+    output_weights = []
+    output_biases = []
+
+    for i, state in enumerate(models):
+        # Hidden layer 1: [hidden1_size, 31] -> [31, hidden1_size]
+        hw1 = state["net.0.weight"].T
+        hidden1_weights.append(tensor_to_list(hw1))
+        hidden1_biases.append(tensor_to_list(state["net.0.bias"]))
+
+        # Hidden layer 2: [hidden2_size, hidden1_size] -> [hidden1_size, hidden2_size]
+        hw2 = state["net.2.weight"].T
+        hidden2_weights.append(tensor_to_list(hw2))
+        hidden2_biases.append(tensor_to_list(state["net.2.bias"]))
+
+        # Output layer: [2, hidden2_size] -> [hidden2_size, 2]
+        ow = state["net.4.weight"].T
+        output_weights.append(tensor_to_list(ow))
+        output_biases.append(tensor_to_list(state["net.4.bias"]))
+
+    # Render template
+    template_path = Path(args.template)
+    env = Environment(
+        loader=FileSystemLoader(str(template_path.parent)),
+        keep_trailing_newline=True,
+    )
+    template = env.get_template(template_path.name)
+
+    rendered = template.render(
+        num_devices=num_devices,
+        hidden1_size=hidden1_size,
+        hidden2_size=hidden2_size,
+        hidden1_weights=hidden1_weights,
+        hidden1_biases=hidden1_biases,
+        hidden2_weights=hidden2_weights,
+        hidden2_biases=hidden2_biases,
+        output_weights=output_weights,
+        output_biases=output_biases,
+    )
+
+    Path(args.output).write_text(rendered)
+    print(f"Generated {args.output} ({len(rendered)} bytes)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/kernel/comps/raid/src/lib.rs b/kernel/comps/raid/src/lib.rs
index 4c2c80ab0..9511cb5b3 100644
--- a/kernel/comps/raid/src/lib.rs
+++ b/kernel/comps/raid/src/lib.rs
@@ -23,6 +23,8 @@ extern crate alloc;
 #[cfg(not(baseline_asterinas))]
 pub mod linnos_weights;
 #[cfg(not(baseline_asterinas))]
+pub mod linnos_plus_weights;
+#[cfg(not(baseline_asterinas))]
 pub mod selection_policies;
 #[cfg(not(baseline_asterinas))]
 pub mod server_traits;
diff --git a/kernel/comps/raid/src/linnos_plus_weights.rs.j2 b/kernel/comps/raid/src/linnos_plus_weights.rs.j2
new file mode 100644
index 000000000..ccde1b011
--- /dev/null
+++ b/kernel/comps/raid/src/linnos_plus_weights.rs.j2
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: MPL-2.0
+
+// LinnOSPlus neural network weights hardcoded for {{ num_devices }} devices.
+// Each device has:
+//   - hidden layer 1: 31 x {{ hidden1_size }} matrix + {{ hidden1_size }} bias
+//   - hidden layer 2: {{ hidden1_size }} x {{ hidden2_size }} matrix + {{ hidden2_size }} bias
+//   - output layer:   {{ hidden2_size }} x 2 matrix + 2 bias
+//
+// AUTO-GENERATED by generate_linnos_plus_weights.py using Jinja2.
+// Do not edit this file manually.
+
+/// Number of devices with hardcoded weights.
+pub const NUM_DEVICES: usize = {{ num_devices }};
+
+/// Hidden layer 1 size (number of neurons).
+pub const HIDDEN1_SIZE: usize = {{ hidden1_size }};
+
+/// Hidden layer 2 size (number of neurons).
+pub const HIDDEN2_SIZE: usize = {{ hidden2_size }};
+
+{% for dev in range(num_devices) %}
+/// Hidden layer 1 weights for device {{ dev }}: 31 inputs -> {{ hidden1_size }} neurons
+pub static HIDDEN1_WEIGHTS_{{ dev }}: [[f32; {{ hidden1_size }}]; 31] = [
+{% for row in hidden1_weights[dev] %}
+    [{{ row | join(', ') }}],
+{% endfor %}
+];
+
+/// Hidden layer 1 bias for device {{ dev }}
+pub static HIDDEN1_BIAS_{{ dev }}: [f32; {{ hidden1_size }}] = [{{ hidden1_biases[dev] | join(', ') }}];
+
+{% endfor %}
+{% for dev in range(num_devices) %}
+/// Hidden layer 2 weights for device {{ dev }}: {{ hidden1_size }} -> {{ hidden2_size }} neurons
+pub static HIDDEN2_WEIGHTS_{{ dev }}: [[f32; {{ hidden2_size }}]; {{ hidden1_size }}] = [
+{% for row in hidden2_weights[dev] %}
+    [{{ row | join(', ') }}],
+{% endfor %}
+];
+
+/// Hidden layer 2 bias for device {{ dev }}
+pub static HIDDEN2_BIAS_{{ dev }}: [f32; {{ hidden2_size }}] = [{{ hidden2_biases[dev] | join(', ') }}];
+
+{% endfor %}
+{% for dev in range(num_devices) %}
+/// Output layer weights for device {{ dev }}: {{ hidden2_size }} neurons -> 2 classes
+pub static OUTPUT_WEIGHTS_{{ dev }}: [[f32; 2]; {{ hidden2_size }}] = [
+{% for row in output_weights[dev] %}
+    [{{ row | join(', ') }}],
+{% endfor %}
+];
+
+/// Output layer bias for device {{ dev }}
+pub static OUTPUT_BIAS_{{ dev }}: [f32; 2] = [{{ output_biases[dev] | join(', ') }}];
+
+{% endfor %}
+/// All hidden layer 1 weights indexed by device.
+pub static HIDDEN1_WEIGHTS: [&[[f32; {{ hidden1_size }}]; 31]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &HIDDEN1_WEIGHTS_{{ dev }},
+{% endfor %}
+];
+
+/// All hidden layer 1 biases indexed by device.
+pub static HIDDEN1_BIASES: [&[f32; {{ hidden1_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &HIDDEN1_BIAS_{{ dev }},
+{% endfor %}
+];
+
+/// All hidden layer 2 weights indexed by device.
+pub static HIDDEN2_WEIGHTS: [&[[f32; {{ hidden2_size }}]; {{ hidden1_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &HIDDEN2_WEIGHTS_{{ dev }},
+{% endfor %}
+];
+
+/// All hidden layer 2 biases indexed by device.
+pub static HIDDEN2_BIASES: [&[f32; {{ hidden2_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &HIDDEN2_BIAS_{{ dev }},
+{% endfor %}
+];
+
+/// All output layer weights indexed by device.
+pub static OUTPUT_WEIGHTS: [&[[f32; 2]; {{ hidden2_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &OUTPUT_WEIGHTS_{{ dev }},
+{% endfor %}
+];
+
+/// All output layer biases indexed by device.
+pub static OUTPUT_BIASES: [&[f32; 2]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &OUTPUT_BIAS_{{ dev }},
+{% endfor %}
+];
diff --git a/kernel/comps/raid/src/selection_policies.rs b/kernel/comps/raid/src/selection_policies.rs
index 57b44053c..0b07bfc76 100644
--- a/kernel/comps/raid/src/selection_policies.rs
+++ b/kernel/comps/raid/src/selection_policies.rs
@@ -238,3 +238,168 @@ impl SelectionPolicy for LinnOSPolicy {
         }
     }
 }
+
+/// LinnOSPlus: a deeper variant of the LinnOS neural-network selection policy.
+///
+/// Architecture (per device):
+///   Linear(31, 8) -> ReLU -> Linear(8, 8) -> ReLU -> Linear(8, 2)
+///
+/// The input feature vector is identical to LinnOS (31 elements).
+/// Weights are loaded from `linnos_plus_weights`.
+#[orpc_server]
+pub struct LinnOSPlusPolicy {
+    read_cursor: AtomicUsize,
+    members: Vec<Arc<dyn BlockDevice>>,
+    observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
+    hidden1_weights: Vec<[[f32; 8]; 31]>,
+    hidden1_biases: Vec<[f32; 8]>,
+    hidden2_weights: Vec<[[f32; 8]; 8]>,
+    hidden2_biases: Vec<[f32; 8]>,
+    output_weights: Vec<[[f32; 2]; 8]>,
+    output_biases: Vec<[f32; 2]>,
+}
+
+impl core::fmt::Debug for LinnOSPlusPolicy {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("LinnOSPlusPolicy")
+            .field("read_cursor", &self.read_cursor)
+            .field("members", &self.members)
+            .field(
+                "observers",
+                &format_args!("[{} observers]", self.observers.len()),
+            )
+            .finish()
+    }
+}
+
+impl LinnOSPlusPolicy {
+    pub fn new(
+        members: Vec<Arc<dyn BlockDevice>>,
+        observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
+    ) -> Result<Arc<Self>, Error> {
+        use crate::linnos_plus_weights::{
+            HIDDEN1_BIASES, HIDDEN1_WEIGHTS, HIDDEN2_BIASES, HIDDEN2_WEIGHTS, OUTPUT_BIASES,
+            OUTPUT_WEIGHTS,
+        };
+
+        let num_devices = members.len();
+
+        let hidden1_weights: Vec<[[f32; 8]; 31]> =
+            (0..num_devices).map(|i| *HIDDEN1_WEIGHTS[i]).collect();
+        let hidden1_biases: Vec<[f32; 8]> =
+            (0..num_devices).map(|i| *HIDDEN1_BIASES[i]).collect();
+        let hidden2_weights: Vec<[[f32; 8]; 8]> =
+            (0..num_devices).map(|i| *HIDDEN2_WEIGHTS[i]).collect();
+        let hidden2_biases: Vec<[f32; 8]> =
+            (0..num_devices).map(|i| *HIDDEN2_BIASES[i]).collect();
+        let output_weights: Vec<[[f32; 2]; 8]> =
+            (0..num_devices).map(|i| *OUTPUT_WEIGHTS[i]).collect();
+        let output_biases: Vec<[f32; 2]> =
+            (0..num_devices).map(|i| *OUTPUT_BIASES[i]).collect();
+
+        let server = Self::new_with(|orpc_internal, _| Self {
+            orpc_internal,
+            read_cursor: AtomicUsize::new(0),
+            members,
+            observers,
+            hidden1_weights,
+            hidden1_biases,
+            hidden2_weights,
+            hidden2_biases,
+            output_weights,
+            output_biases,
+        });
+
+        Ok(server)
+    }
+}
+
+impl SelectionPolicy for LinnOSPlusPolicy {
+    fn select_block_device(&self, submitted: &mut SubmittedBio) -> Result<Arc<dyn BlockDevice>, Error> {
+        let num_devices = self.members.len();
+        let mut fail_cnt = 0;
+        let num_pages = submitted.num_pages();
+
+        loop {
+            let idx = self.read_cursor.fetch_add(1, Ordering::Relaxed);
+            let device_idx = idx % num_devices;
+            let observer = self.observers[device_idx].lock();
+            let completion_trace = observer
+                .weak_observe_recent(4)
+                .expect("Failed to observe completion trace");
+
+            // Build the 31-element input feature vector (same as LinnOS)
+            let mut input = [0.0f32; 31];
+
+            let current_outstanding = num_pages as usize + self.members[device_idx].num_outstanding_pages() as usize;
+            input[0] = ((current_outstanding / 100) % 10) as f32;
+            input[1] = ((current_outstanding / 10) % 10) as f32;
+            input[2] = (current_outstanding % 10) as f32;
+
+            for (i, trace_entry) in completion_trace.iter().enumerate().take(4) {
+                let Some(trace_entry) = trace_entry else {
+                    continue;
+                };
+                let outstanding = trace_entry.outstanding_pages as usize;
+                let latency_us = trace_entry.latency_us as usize;
+                let base = 3 + i * 7;
+
+                input[base] = ((outstanding / 100) % 10) as f32;
+                input[base + 1] = ((outstanding / 10) % 10) as f32;
+                input[base + 2] = (outstanding % 10) as f32;
+
+                input[base + 3] = ((latency_us / 1000) % 10) as f32;
+                input[base + 4] = ((latency_us / 100) % 10) as f32;
+                input[base + 5] = ((latency_us / 10) % 10) as f32;
+                input[base + 6] = (latency_us % 10) as f32;
+            }
+
+            // Hidden layer 1: input (31) x hidden1_weights (31x8) + bias (8) -> hidden1_out (8)
+            let h1_weights = &self.hidden1_weights[device_idx];
+            let h1_bias = &self.hidden1_biases[device_idx];
+            let mut hidden1_out = [0.0f32; 8];
+            for j in 0..8 {
+                let mut sum = h1_bias[j];
+                for i in 0..31 {
+                    sum += input[i] * h1_weights[i][j];
+                }
+                hidden1_out[j] = if sum > 0.0 { sum } else { 0.0 };
+            }
+
+            // Hidden layer 2: hidden1_out (8) x hidden2_weights (8x8) + bias (8) -> hidden2_out (8)
+            let h2_weights = &self.hidden2_weights[device_idx];
+            let h2_bias = &self.hidden2_biases[device_idx];
+            let mut hidden2_out = [0.0f32; 8];
+            for j in 0..8 {
+                let mut sum = h2_bias[j];
+                for i in 0..8 {
+                    sum += hidden1_out[i] * h2_weights[i][j];
+                }
+                hidden2_out[j] = if sum > 0.0 { sum } else { 0.0 };
+            }
+
+            // Output layer: hidden2_out (8) x output_weights (8x2) + bias (2) -> output (2)
+            let out_weights = &self.output_weights[device_idx];
+            let out_bias = &self.output_biases[device_idx];
+            let mut output = [out_bias[0], out_bias[1]];
+            for k in 0..2 {
+                for j in 0..8 {
+                    output[k] += hidden2_out[j] * out_weights[j][k];
+                }
+            }
+
+            // Argmax: output[0] < output[1] means fast, otherwise slow
+            if output[0] < output[1] {
+                log::info!("LinnOSPlus: device {} predicted FAST. output=[{:.4},{:.4}]", device_idx, output[0], output[1]);
+                return Ok(self.members[device_idx].clone());
+            }
+
+            fail_cnt += 1;
+            if fail_cnt >= num_devices {
+                let fallback_idx = self.read_cursor.fetch_add(1, Ordering::Relaxed) % num_devices;
+                log::info!("LinnOSPlus: device {} fallback (all busy). output=[{:.4},{:.4}]", fallback_idx, output[0], output[1]);
+                return Ok(self.members[fallback_idx].clone());
+            }
+        }
+    }
+}

From f0d3654b9c0806f9e725d157cd5671610e9561e3 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Mon, 13 Apr 2026 03:42:38 +0000
Subject: [PATCH 17/22] Decision Tree policy, and using kernel build parameters
 to select the policy to build into the kernel.

---
 Cargo.toml                                    |   2 +-
 Makefile                                      |   8 +
 .../raid/src/decision_tree_predictions.rs     | 803 ++++++++++++++++++
 kernel/comps/raid/src/lib.rs                  |   2 +
 kernel/comps/raid/src/selection_policies.rs   | 115 ++-
 kernel/src/fs/mod.rs                          |  62 +-
 6 files changed, 965 insertions(+), 27 deletions(-)
 create mode 100644 kernel/comps/raid/src/decision_tree_predictions.rs

diff --git a/Cargo.toml b/Cargo.toml
index 4750a0f90..a547c2c4e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,7 +54,7 @@ exclude = [
 function_casts_as_integer = "allow"
 mismatched_lifetime_syntaxes = "allow"
 missing_crate_level_docs = "warn"
-unexpected_cfgs = { level = "deny", check-cfg = ['cfg(baseline_asterinas)', 'cfg(ktest)'] }
+unexpected_cfgs = { level = "deny", check-cfg = ['cfg(baseline_asterinas)', 'cfg(ktest)', 'cfg(capture_data)', 'cfg(raid_selection, values("roundrobin", "linnos", "linnos_plus", "decision_tree"))'] }
 unpredictable-function-pointer-comparisons = "allow"
 unsafe_op_in_unsafe_fn = "deny"
 unused_parens = "allow"
diff --git a/Makefile b/Makefile
index 1c8b23828..e439b4e6a 100644
--- a/Makefile
+++ b/Makefile
@@ -168,6 +168,14 @@ RUSTFLAGS += --cfg=baseline_asterinas
 CLIPPY_COMMON_ARGS += --cfg=baseline_asterinas -A unused-imports -A dead-code -A unfulfilled-lint-expectations
 endif
 
+ifeq ($(CAPTURE_DATA), 1)
+RUSTFLAGS += --cfg=capture_data
+endif
+
+ifdef RAID_SELECTION
+RUSTFLAGS += --cfg=raid_selection="$(RAID_SELECTION)"
+endif
+
 # To test the linux-efi-handover64 boot protocol, we need to use Debian's
 # GRUB release, which is installed in /usr/bin in our Docker image.
 ifeq ($(BOOT_PROTOCOL), linux-efi-handover64)
diff --git a/kernel/comps/raid/src/decision_tree_predictions.rs b/kernel/comps/raid/src/decision_tree_predictions.rs
new file mode 100644
index 000000000..0846d795c
--- /dev/null
+++ b/kernel/comps/raid/src/decision_tree_predictions.rs
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: MPL-2.0
+
+// Per-device decision tree prediction functions.
+//
+// Each function is generated by export_dt.py and pasted here:
+//
+//   python export_dt.py \
+//       --model results/<model>.pkl \
+//       --format rust \
+//       --fn_name predict_device<N> \
+//       --out /tmp/dt_device<N>.rs
+//
+// Input: &[u8; 31] — one byte per feature digit (0–9), same layout as the LinnOS
+// feature vector:
+//   x[0..3]   current outstanding pages (3 digits: hundreds, tens, ones)
+//   x[3..10]  history step 0 (3 outstanding digits + 4 latency-us digits)
+//   x[10..17] history step 1
+//   x[17..24] history step 2
+//   x[24..31] history step 3
+//
+// Returns: 0 (slow) or 1 (fast)
+//
+// ── DEVICE 0 ─────────────────────────────────────────────────────────────────
+// PASTE the output of `export_dt.py --fn_name predict_device0` below,
+// replacing this placeholder function.
+
+/// Predict fast (1) or slow (0) for device 0.
+#[inline]
+pub fn predict_device0(x: &[u8; 31]) -> u8 {
+    if x[30] <= 1 {  // cur_out_2 <= 1.5000
+        if x[29] <= 0 {  // cur_out_1 <= 0.5000
+            if x[27] <= 1 {  // prev1_out_2 <= 1.5000
+                if x[26] <= 0 {  // prev1_out_1 <= 0.5000
+                    if x[18] <= 5 {  // prev4_out_2 <= 5.5000
+                        if x[6] <= 0 {  // prev3_lat_2 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[1] <= 0 {  // prev4_lat_1 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[21] <= 2 {  // prev3_out_2 <= 2.5000
+                        if x[3] <= 8 {  // prev4_lat_3 <= 8.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[17] <= 0 {  // prev4_out_1 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[0] <= 3 {  // prev4_lat_0 <= 3.5000
+                    if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                        if x[13] <= 1 {  // prev1_lat_1 <= 1.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[8] <= 3 {  // prev2_lat_0 <= 3.5000
+                        if x[18] <= 0 {  // prev4_out_2 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[4] <= 3 {  // prev3_lat_0 <= 3.5000
+                            1  // fast  (counts: [0, 1])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        } else {
+            if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                    if x[20] <= 0 {  // prev3_out_1 <= 0.5000
+                        if x[5] <= 3 {  // prev3_lat_1 <= 3.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[7] <= 5 {  // prev3_lat_3 <= 5.5000
+                            0  // slow  (counts: [1, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                        if x[9] <= 4 {  // prev2_lat_1 <= 4.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[18] <= 1 {  // prev4_out_2 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                    if x[27] <= 1 {  // prev1_out_2 <= 1.5000
+                        if x[4] <= 0 {  // prev3_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[15] <= 0 {  // prev1_lat_3 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[24] <= 2 {  // prev2_out_2 <= 2.5000
+                        if x[1] <= 1 {  // prev4_lat_1 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[15] <= 7 {  // prev1_lat_3 <= 7.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        if x[30] <= 5 {  // cur_out_2 <= 5.5000
+            if x[30] <= 4 {  // cur_out_2 <= 4.5000
+                if x[29] <= 0 {  // cur_out_1 <= 0.5000
+                    if x[30] <= 3 {  // cur_out_2 <= 3.5000
+                        if x[8] <= 1 {  // prev2_lat_0 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                        if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[12] <= 1 {  // prev1_lat_0 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                    if x[23] <= 0 {  // prev2_out_1 <= 0.5000
+                        if x[8] <= 1 {  // prev2_lat_0 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[12] <= 1 {  // prev1_lat_0 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[12] <= 1 {  // prev1_lat_0 <= 1.5000
+                        if x[27] <= 2 {  // prev1_out_2 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        } else {
+            if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                    if x[20] <= 0 {  // prev3_out_1 <= 0.5000
+                        if x[13] <= 2 {  // prev1_lat_1 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[24] <= 3 {  // prev2_out_2 <= 3.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                        if x[30] <= 7 {  // cur_out_2 <= 7.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[0] <= 1 {  // prev4_lat_0 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                    if x[27] <= 2 {  // prev1_out_2 <= 2.5000
+                        if x[30] <= 6 {  // cur_out_2 <= 6.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[13] <= 4 {  // prev1_lat_1 <= 4.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[30] <= 6 {  // cur_out_2 <= 6.5000
+                        if x[5] <= 0 {  // prev3_lat_1 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[30] <= 7 {  // cur_out_2 <= 7.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+// ── DEVICE 1 ─────────────────────────────────────────────────────────────────
+// PASTE the output of `export_dt.py --fn_name predict_device1` below,
+// replacing this placeholder function.
+
+/// Predict fast (1) or slow (0) for device 1.
+#[inline]
+pub fn predict_device1(x: &[u8; 31]) -> u8 {
+    if x[30] <= 1 {  // cur_out_2 <= 1.5000
+        if x[29] <= 0 {  // cur_out_1 <= 0.5000
+            if x[0] <= 2 {  // prev4_lat_0 <= 2.5000
+                if x[27] <= 1 {  // prev1_out_2 <= 1.5000
+                    if x[26] <= 0 {  // prev1_out_1 <= 0.5000
+                        if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 1])
+                        }
+                    } else {
+                        if x[9] <= 3 {  // prev2_lat_1 <= 3.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                        if x[9] <= 8 {  // prev2_lat_1 <= 8.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[8] <= 2 {  // prev2_lat_0 <= 2.5000
+                    if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                        if x[0] <= 4 {  // prev4_lat_0 <= 4.5000
+                            0  // slow  (counts: [1, 0])
+                        } else {
+                            1  // fast  (counts: [0, 1])
+                        }
+                    } else {
+                        if x[6] <= 0 {  // prev3_lat_2 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[4] <= 1 {  // prev3_lat_0 <= 1.5000
+                        1  // fast  (counts: [0, 1])
+                    } else {
+                        if x[12] <= 2 {  // prev1_lat_0 <= 2.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        } else {
+            if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                    if x[21] <= 2 {  // prev3_out_2 <= 2.5000
+                        if x[15] <= 0 {  // prev1_lat_3 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[3] <= 8 {  // prev4_lat_3 <= 8.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                        if x[20] <= 0 {  // prev3_out_1 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [1, 0])
+                        }
+                    } else {
+                        if x[18] <= 2 {  // prev4_out_2 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                    if x[27] <= 5 {  // prev1_out_2 <= 5.5000
+                        if x[4] <= 0 {  // prev3_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[13] <= 3 {  // prev1_lat_1 <= 3.5000
+                            1  // fast  (counts: [0, 1])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[11] <= 4 {  // prev2_lat_3 <= 4.5000
+                        if x[26] <= 0 {  // prev1_out_1 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[9] <= 8 {  // prev2_lat_1 <= 8.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        if x[30] <= 5 {  // cur_out_2 <= 5.5000
+            if x[30] <= 4 {  // cur_out_2 <= 4.5000
+                if x[29] <= 0 {  // cur_out_1 <= 0.5000
+                    if x[30] <= 3 {  // cur_out_2 <= 3.5000
+                        if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                        if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                    if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                        if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[24] <= 3 {  // prev2_out_2 <= 3.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                        if x[27] <= 2 {  // prev1_out_2 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[13] <= 8 {  // prev1_lat_1 <= 8.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        } else {
+            if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                    if x[20] <= 0 {  // prev3_out_1 <= 0.5000
+                        if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[5] <= 4 {  // prev3_lat_1 <= 4.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                        if x[30] <= 6 {  // cur_out_2 <= 6.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[0] <= 0 {  // prev4_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                    if x[27] <= 3 {  // prev1_out_2 <= 3.5000
+                        if x[4] <= 0 {  // prev3_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[13] <= 4 {  // prev1_lat_1 <= 4.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[30] <= 6 {  // cur_out_2 <= 6.5000
+                        if x[12] <= 2 {  // prev1_lat_0 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[30] <= 7 {  // cur_out_2 <= 7.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+// ── DEVICE 2 ─────────────────────────────────────────────────────────────────
+// PASTE the output of `export_dt.py --fn_name predict_device2` below,
+// replacing this placeholder function.
+
+/// Predict fast (1) or slow (0) for device 2.
+#[inline]
+pub fn predict_device2(x: &[u8; 31]) -> u8 {
+    if x[30] <= 1 {  // cur_out_2 <= 1.5000
+        if x[29] <= 0 {  // cur_out_1 <= 0.5000
+            if x[4] <= 2 {  // prev3_lat_0 <= 2.5000
+                if x[27] <= 1 {  // prev1_out_2 <= 1.5000
+                    if x[26] <= 0 {  // prev1_out_1 <= 0.5000
+                        if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 1])
+                        }
+                    } else {
+                        if x[12] <= 3 {  // prev1_lat_0 <= 3.0000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                        if x[1] <= 0 {  // prev4_lat_1 <= 0.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[13] <= 7 {  // prev1_lat_1 <= 7.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[0] <= 1 {  // prev4_lat_0 <= 1.5000
+                    if x[24] <= 8 {  // prev2_out_2 <= 8.5000
+                        if x[4] <= 3 {  // prev3_lat_0 <= 3.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[1] <= 3 {  // prev4_lat_1 <= 3.5000
+                            0  // slow  (counts: [1, 0])
+                        } else {
+                            1  // fast  (counts: [0, 1])
+                        }
+                    }
+                } else {
+                    if x[8] <= 1 {  // prev2_lat_0 <= 1.5000
+                        if x[11] <= 2 {  // prev2_lat_3 <= 2.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[12] <= 2 {  // prev1_lat_0 <= 2.5000
+                            1  // fast  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        } else {
+            if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                if x[23] <= 0 {  // prev2_out_1 <= 0.5000
+                    if x[9] <= 4 {  // prev2_lat_1 <= 4.5000
+                        if x[9] <= 2 {  // prev2_lat_1 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[6] <= 4 {  // prev3_lat_2 <= 4.5000
+                            0  // slow  (counts: [1, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                        if x[2] <= 6 {  // prev4_lat_2 <= 6.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[15] <= 8 {  // prev1_lat_3 <= 8.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            1  // fast  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                    if x[27] <= 1 {  // prev1_out_2 <= 1.5000
+                        if x[5] <= 4 {  // prev3_lat_1 <= 4.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[8] <= 3 {  // prev2_lat_0 <= 3.0000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                        if x[4] <= 0 {  // prev3_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[27] <= 6 {  // prev1_out_2 <= 6.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        if x[30] <= 5 {  // cur_out_2 <= 5.5000
+            if x[30] <= 4 {  // cur_out_2 <= 4.5000
+                if x[29] <= 0 {  // cur_out_1 <= 0.5000
+                    if x[30] <= 3 {  // cur_out_2 <= 3.5000
+                        if x[24] <= 2 {  // prev2_out_2 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                        if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                    if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                        if x[20] <= 0 {  // prev3_out_1 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[24] <= 2 {  // prev2_out_2 <= 2.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                        if x[27] <= 3 {  // prev1_out_2 <= 3.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[29] <= 0 {  // cur_out_1 <= 0.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        } else {
+            if x[8] <= 0 {  // prev2_lat_0 <= 0.5000
+                if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                    if x[20] <= 0 {  // prev3_out_1 <= 0.5000
+                        if x[30] <= 7 {  // cur_out_2 <= 7.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[24] <= 1 {  // prev2_out_2 <= 1.5000
+                        if x[30] <= 6 {  // cur_out_2 <= 6.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[18] <= 1 {  // prev4_out_2 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            } else {
+                if x[12] <= 0 {  // prev1_lat_0 <= 0.5000
+                    if x[27] <= 2 {  // prev1_out_2 <= 2.5000
+                        if x[21] <= 1 {  // prev3_out_2 <= 1.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[13] <= 4 {  // prev1_lat_1 <= 4.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                } else {
+                    if x[30] <= 6 {  // cur_out_2 <= 6.5000
+                        if x[14] <= 3 {  // prev1_lat_2 <= 3.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    } else {
+                        if x[30] <= 7 {  // cur_out_2 <= 7.5000
+                            0  // slow  (counts: [0, 0])
+                        } else {
+                            0  // slow  (counts: [0, 0])
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/kernel/comps/raid/src/lib.rs b/kernel/comps/raid/src/lib.rs
index 9511cb5b3..25e603dfc 100644
--- a/kernel/comps/raid/src/lib.rs
+++ b/kernel/comps/raid/src/lib.rs
@@ -25,6 +25,8 @@ pub mod linnos_weights;
 #[cfg(not(baseline_asterinas))]
 pub mod linnos_plus_weights;
 #[cfg(not(baseline_asterinas))]
+pub mod decision_tree_predictions;
+#[cfg(not(baseline_asterinas))]
 pub mod selection_policies;
 #[cfg(not(baseline_asterinas))]
 pub mod server_traits;
diff --git a/kernel/comps/raid/src/selection_policies.rs b/kernel/comps/raid/src/selection_policies.rs
index 0b07bfc76..264b7a43f 100644
--- a/kernel/comps/raid/src/selection_policies.rs
+++ b/kernel/comps/raid/src/selection_policies.rs
@@ -224,7 +224,7 @@ impl SelectionPolicy for LinnOSPolicy {
 
             // Argmax: output[0] < output[1] means fast, otherwise slow
             if output[0] < output[1] {
-                log::info!("Submitting to device {} predicted FAST. output=[{:.4},{:.4}]", device_idx, output[0], output[1]);
+                // log::info!("Submitting to device {} predicted FAST. output=[{:.4},{:.4}]", device_idx, output[0], output[1]);
                 return Ok(self.members[device_idx].clone());
             }
 
@@ -232,7 +232,114 @@ impl SelectionPolicy for LinnOSPolicy {
             // All devices predicted slow -- fall back to round-robin
             if fail_cnt >= num_devices {
                 let fallback_idx = self.read_cursor.fetch_add(1, Ordering::Relaxed) % num_devices;
-                log::info!("Submitting to device {} as all devices are busy. output=[{:.4},{:.4}]", fallback_idx, output[0], output[1]);
+                // log::info!("Submitting to device {} as all devices are busy. output=[{:.4},{:.4}]", fallback_idx, output[0], output[1]);
+                return Ok(self.members[fallback_idx].clone());
+            }
+        }
+    }
+}
+
+/// Decision tree selection policy.
+///
+/// Uses a per-device binary decision tree trained on the same 31-element LinnOS
+/// feature vector (3 digits current outstanding + 4 history steps × 7 digits).
+/// The prediction functions are generated by `export_dt.py --format rust` and
+/// live in `decision_tree_predictions`. Each function takes `&[u8; 31]` (one
+/// digit per feature, 0–9) and returns 0 (slow) or 1 (fast).
+///
+/// Looping and fallback logic mirrors LinnOS exactly.
+#[orpc_server]
+pub struct DecisionTreePolicy {
+    read_cursor: AtomicUsize,
+    members: Vec<Arc<dyn BlockDevice>>,
+    observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
+}
+
+impl core::fmt::Debug for DecisionTreePolicy {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("DecisionTreePolicy")
+            .field("read_cursor", &self.read_cursor)
+            .field("members", &self.members)
+            .field(
+                "observers",
+                &format_args!("[{} observers]", self.observers.len()),
+            )
+            .finish()
+    }
+}
+
+impl DecisionTreePolicy {
+    pub fn new(
+        members: Vec<Arc<dyn BlockDevice>>,
+        observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
+    ) -> Result<Arc<Self>, Error> {
+        let server = Self::new_with(|orpc_internal, _| Self {
+            orpc_internal,
+            read_cursor: AtomicUsize::new(0),
+            members,
+            observers,
+        });
+        Ok(server)
+    }
+}
+
+impl SelectionPolicy for DecisionTreePolicy {
+    fn select_block_device(&self, submitted: &mut SubmittedBio) -> Result<Arc<dyn BlockDevice>, Error> {
+        use crate::decision_tree_predictions::{predict_device0, predict_device1, predict_device2};
+
+        let num_devices = self.members.len();
+        let mut fail_cnt = 0;
+        let num_pages = submitted.num_pages();
+
+        loop {
+            let idx = self.read_cursor.fetch_add(1, Ordering::Relaxed);
+            let device_idx = idx % num_devices;
+            let observer = self.observers[device_idx].lock();
+            let completion_trace = observer
+                .weak_observe_recent(4)
+                .expect("Failed to observe completion trace");
+
+            // Build the 31-element input feature vector as u8 digits (0–9)
+            let mut input = [0u8; 31];
+
+            let current_outstanding = num_pages as usize
+                + self.members[device_idx].num_outstanding_pages() as usize;
+            input[0] = ((current_outstanding / 100) % 10) as u8;
+            input[1] = ((current_outstanding / 10) % 10) as u8;
+            input[2] = (current_outstanding % 10) as u8;
+
+            for (i, trace_entry) in completion_trace.iter().enumerate().take(4) {
+                let Some(trace_entry) = trace_entry else {
+                    continue;
+                };
+                let outstanding = trace_entry.outstanding_pages as usize;
+                let latency_us = trace_entry.latency_us as usize;
+                let base = 3 + i * 7;
+
+                input[base]     = ((outstanding / 100) % 10) as u8;
+                input[base + 1] = ((outstanding / 10) % 10) as u8;
+                input[base + 2] = (outstanding % 10) as u8;
+
+                input[base + 3] = ((latency_us / 1000) % 10) as u8;
+                input[base + 4] = ((latency_us / 100) % 10) as u8;
+                input[base + 5] = ((latency_us / 10) % 10) as u8;
+                input[base + 6] = (latency_us % 10) as u8;
+            }
+
+            let prediction = match device_idx {
+                0 => predict_device0(&input),
+                1 => predict_device1(&input),
+                2 => predict_device2(&input),
+                _ => 1, // unknown device: predict fast
+            };
+
+            if prediction == 1 {
+                return Ok(self.members[device_idx].clone());
+            }
+
+            fail_cnt += 1;
+            if fail_cnt >= num_devices {
+                let fallback_idx = self.read_cursor.fetch_add(1, Ordering::Relaxed) % num_devices;
                 return Ok(self.members[fallback_idx].clone());
             }
         }
@@ -390,14 +497,14 @@ impl SelectionPolicy for LinnOSPlusPolicy {
 
             // Argmax: output[0] < output[1] means fast, otherwise slow
             if output[0] < output[1] {
-                log::info!("LinnOSPlus: device {} predicted FAST. output=[{:.4},{:.4}]", device_idx, output[0], output[1]);
+                // log::info!("LinnOSPlus: device {} predicted FAST. output=[{:.4},{:.4}]", device_idx, output[0], output[1]);
                 return Ok(self.members[device_idx].clone());
             }
 
             fail_cnt += 1;
             if fail_cnt >= num_devices {
                 let fallback_idx = self.read_cursor.fetch_add(1, Ordering::Relaxed) % num_devices;
-                log::info!("LinnOSPlus: device {} fallback (all busy). output=[{:.4},{:.4}]", fallback_idx, output[0], output[1]);
+                // log::info!("LinnOSPlus: device {} fallback (all busy). output=[{:.4},{:.4}]", fallback_idx, output[0], output[1]);
                 return Ok(self.members[fallback_idx].clone());
             }
         }
diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index 6f90778c7..188066b36 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -25,7 +25,7 @@ pub mod utils;
 use aster_block::BlockDevice;
 #[cfg(not(baseline_asterinas))]
 #[expect(unused_imports)]
-use aster_raid::selection_policies::{Dummy0Policy, LinnOSPolicy, RoundRobinPolicy};
+use aster_raid::selection_policies::{DecisionTreePolicy, Dummy0Policy, LinnOSPolicy, LinnOSPlusPolicy, RoundRobinPolicy};
 use aster_raid::{Raid1Device, Raid1DeviceError};
 use aster_virtio::device::block::device::BlockDevice as VirtIoBlockDevice;
 
@@ -177,32 +177,50 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
         }
     }
 
-    // #[cfg(not(baseline_asterinas))]
-    // setup_data_capture(&members, RAID_MEMBER_NAMES);
+    #[cfg(all(not(baseline_asterinas), capture_data))]
+    setup_data_capture(&members, RAID_MEMBER_NAMES);
 
     #[cfg(not(baseline_asterinas))]
     info!("[raid] creating selection policy");
-    // #[cfg(not(baseline_asterinas))]
+
+    // Round Robin Policy
+    #[cfg(all(not(baseline_asterinas), raid_selection = "roundrobin"))]
     let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
-    #[cfg(not(baseline_asterinas))]
-    let observers = members
-        .iter()
-        .map(|dev| {
-            use aster_virtio::device::block::server_traits::BlockIOObservable;
-            use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
-            let virtio_dev = dev
-                .downcast_ref::<VirtIoBlockDevice>()
-                .expect("RAID member must be a VirtIoBlockDevice for LinnOS");
-            ostd::sync::Mutex::new(
-                virtio_dev
-                    .bio_completion_oqueue()
-                    .attach_weak_observer(4, ObservationQuery::identity())
-                    .expect("Failed to attach weak observer to bio_completion_oqueue"),
-            )
-        })
-        .collect();
-    #[cfg(not(baseline_asterinas))]
+
+    // Shared weak observer setup for all observer-based policies (LinnOS, LinnOS Plus, Decision Tree)
+    #[cfg(all(not(baseline_asterinas), any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree")))]
+    let observers = {
+        use aster_virtio::device::block::server_traits::BlockIOObservable;
+        use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
+        members
+            .iter()
+            .map(|dev| {
+                let virtio_dev = dev
+                    .downcast_ref::<VirtIoBlockDevice>()
+                    .expect("RAID member must be a VirtIoBlockDevice");
+                ostd::sync::Mutex::new(
+                    virtio_dev
+                        .bio_completion_oqueue()
+                        .attach_weak_observer(4, ObservationQuery::identity())
+                        .expect("Failed to attach weak observer to bio_completion_oqueue"),
+                )
+            })
+            .collect()
+    };
+
+    // LinnOS Policy
+    #[cfg(all(not(baseline_asterinas), raid_selection = "linnos"))]
     let selection_policy = LinnOSPolicy::new(members.clone(), observers).unwrap();
+
+    // LinnOS Plus Policy
+    #[cfg(all(not(baseline_asterinas), raid_selection = "linnos_plus"))]
+    let selection_policy = LinnOSPlusPolicy::new(members.clone(), observers).unwrap();
+
+    // Decision Tree Policy
+    #[cfg(all(not(baseline_asterinas), raid_selection = "decision_tree"))]
+    let selection_policy = DecisionTreePolicy::new(members.clone(), observers).unwrap();
+
+    // Initialize and Register RAID-1 device
     #[cfg(not(baseline_asterinas))]
     let raid1device = Raid1Device::init(raid_device_name, members, selection_policy);
     #[cfg(baseline_asterinas)]

From 2fec8671a0a0499ada2d7bf5a50086adc1577f36 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Wed, 15 Apr 2026 04:23:18 +0000
Subject: [PATCH 18/22] Created Heimdall Module (Not wired with Submission
 Policy Yet)

---
 kernel/comps/block/src/bio.rs                 |  37 +-
 kernel/comps/block/src/lib.rs                 |   7 +-
 .../raid/src/generate_heimdall_weights.py     | 150 ++++++++
 kernel/comps/raid/src/heimdall.rs             | 320 ++++++++++++++++++
 kernel/comps/raid/src/heimdall_weights.rs.j2  |  96 ++++++
 kernel/comps/raid/src/lib.rs                  |   4 +
 .../comps/virtio/src/device/block/device.rs   |  30 +-
 kernel/src/fs/mod.rs                          |  56 ++-
 8 files changed, 673 insertions(+), 27 deletions(-)
 create mode 100644 kernel/comps/raid/src/generate_heimdall_weights.py
 create mode 100644 kernel/comps/raid/src/heimdall.rs
 create mode 100644 kernel/comps/raid/src/heimdall_weights.rs.j2

diff --git a/kernel/comps/block/src/bio.rs b/kernel/comps/block/src/bio.rs
index 04d3ecef7..72b5c7ad3 100644
--- a/kernel/comps/block/src/bio.rs
+++ b/kernel/comps/block/src/bio.rs
@@ -32,9 +32,13 @@ pub struct BlockDeviceCompletionStats {
     /// The latency of the I/O request in microseconds.
     pub latency_us: u64,
     /// The number of outstanding 4KB pages at completion time.
-    pub outstanding_pages: u64,
+    pub outstanding_pages: u32,
+    /// Length of the IO queue at the time the IO arrives, which is num_outstanding_request of a block device. 
+    pub queue_len: u32, 
+    /// Size of the IO request, which is num_pages of a bio request.
+    pub request_size_pages: u32,
     /// The index of the device that produced this stat.
-    pub device_index: u64,
+    pub device_index: u32,
 }
 
 /// The unit for block I/O.
@@ -158,6 +162,8 @@ impl Bio {
             num_pages: None,
             #[cfg(not(baseline_asterinas))]
             outstanding_pages: None,
+            #[cfg(not(baseline_asterinas))]
+            outstanding_requests: None,
         }) {
             // Fail to submit, revert the status.
             let result = self.0.status.compare_exchange(
@@ -341,13 +347,16 @@ pub struct SubmittedBio {
     submission_time_us: Option<u64>,
 
     #[cfg(not(baseline_asterinas))]
-    device_index: Option<u64>,
+    device_index: Option<u32>,
+
+    #[cfg(not(baseline_asterinas))]
+    num_pages: Option<u32>,
 
     #[cfg(not(baseline_asterinas))]
-    num_pages: Option<u64>,
+    outstanding_pages: Option<u32>,
 
     #[cfg(not(baseline_asterinas))]
-    outstanding_pages: Option<u64>,
+    outstanding_requests: Option<u32>,
 }
 
 impl core::fmt::Debug for SubmittedBio {
@@ -379,16 +388,16 @@ impl SubmittedBio {
     }
 
     /// an immutable version of the num_pages function. Panic if the num_pages field is not set yet.
-    pub fn get_num_pages(&self) -> u64 {
+    pub fn get_num_pages(&self) -> u32 {
         self.num_pages.expect("num_pages is not set yet")
     }
 
     /// Returns the number of 4KB pages covered by this bio's sector range.
     /// Note the field num_pages is only available when calling this function, but accessing it directly is not available. 
-    pub fn num_pages(&mut self) -> u64 {
+    pub fn num_pages(&mut self) -> u32 {
         *self.num_pages.get_or_insert_with(|| {
             let sectors = self.bio_inner.sid_range().end.to_raw() - self.bio_inner.sid_range().start.to_raw();
-            (sectors + 7) / 8
+            ((sectors + 7) / 8) as u32  // each page has 8 sectors
         })
     }
 
@@ -434,8 +443,9 @@ impl SubmittedBio {
     pub fn prepare_enqueue(
         &mut self,
         reply_handle: RefProducer<BlockDeviceCompletionStats>,
-        device_index: u64,
-        outstanding_pages: u64
+        device_index: u32,
+        outstanding_pages: u32,
+        outstanding_requests: u32,
     ) {
         
         self.reply_handle = Some(reply_handle);
@@ -443,6 +453,7 @@ impl SubmittedBio {
         self.device_index = Some(device_index);
         self.num_pages();  // set the num_pages field
         self.outstanding_pages = Some(outstanding_pages + self.num_pages.unwrap());  // accumulate the number of outstanding pages
+        self.outstanding_requests = Some(outstanding_requests);
     }
 
     #[cfg(not(baseline_asterinas))]
@@ -453,8 +464,10 @@ impl SubmittedBio {
             .try_produce_ref(&BlockDeviceCompletionStats {
                 latency_us: read_monotonic_time().as_micros() as u64
                     - self.submission_time_us.unwrap(),
-                outstanding_pages: self.outstanding_pages.unwrap_or(u64::MAX),
-                device_index: self.device_index.unwrap_or(u64::MAX),
+                outstanding_pages: self.outstanding_pages.unwrap_or(u32::MAX),
+                queue_len: self.outstanding_requests.unwrap_or(u32::MAX),    
+                request_size_pages: self.num_pages.unwrap_or(u32::MAX),
+                device_index: self.device_index.unwrap_or(u32::MAX),
             });
     }
 }
diff --git a/kernel/comps/block/src/lib.rs b/kernel/comps/block/src/lib.rs
index 9d85f674f..810f34fbc 100644
--- a/kernel/comps/block/src/lib.rs
+++ b/kernel/comps/block/src/lib.rs
@@ -60,7 +60,12 @@ pub trait BlockDevice: Send + Sync + Any + Debug {
     fn metadata(&self) -> BlockDeviceMeta;
 
     /// Returns the number of outstanding pages for this device.
-    fn num_outstanding_pages(&self) -> u64 {
+    fn num_outstanding_pages(&self) -> u32 {
+        0
+    }
+
+    /// Returns the number of outstanding requests for this device.
+    fn num_outstanding_requests(&self) -> u32 {
         0
     }
 }
diff --git a/kernel/comps/raid/src/generate_heimdall_weights.py b/kernel/comps/raid/src/generate_heimdall_weights.py
new file mode 100644
index 000000000..5098bd4ad
--- /dev/null
+++ b/kernel/comps/raid/src/generate_heimdall_weights.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MPL-2.0
+
+"""
+Load trained PyTorch Heimdall models and generate the Rust weights file
+using the Jinja2 template.
+
+The HeimdallNet architecture has three linear layers (one model per device):
+    Linear(input_dim, 128) -> ReLU -> Linear(128, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
+
+PyTorch state dict keys:
+    fc1.weight [128, input_dim]   fc1.bias [128]
+    fc2.weight [16, 128]          fc2.bias [16]
+    fc3.weight [1, 16]            fc3.bias [1]
+
+Usage:
+    python generate_heimdall_weights.py \\
+        --models models/heimdall_device0.pt \\
+                 models/heimdall_device1.pt \\
+                 models/heimdall_device2.pt \\
+        --template kernel/comps/raid/src/heimdall_weights.rs.j2 \\
+        --output   kernel/comps/raid/src/heimdall_weights.rs
+
+Run from the repository root.
+"""
+
+import argparse
+from pathlib import Path
+
+import torch
+from jinja2 import Environment, FileSystemLoader
+
+
+def load_model(path: str) -> dict:
+    """Load a model checkpoint and return its state dict."""
+    state = torch.load(path, map_location="cpu", weights_only=False)
+    return state
+
+
+def print_architecture(state: dict, device_idx: int) -> None:
+    """Print model architecture for sanity check."""
+    print(f"  Device {device_idx}:")
+    for name, tensor in state.items():
+        print(f"    {name:20s}  shape={str(list(tensor.shape)):16s}  dtype={tensor.dtype}")
+
+
+def tensor_to_list(tensor: torch.Tensor) -> list:
+    """Convert a tensor to a nested Python list of floats."""
+    return tensor.tolist()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate Heimdall Rust weight file from PyTorch models"
+    )
+    parser.add_argument(
+        "--models", nargs="+", required=True,
+        help="Paths to .pt model files, one per device in order",
+    )
+    parser.add_argument(
+        "--template", required=True,
+        help="Path to the Jinja2 template (.rs.j2)",
+    )
+    parser.add_argument(
+        "--output", required=True,
+        help="Path for the generated Rust file (.rs)",
+    )
+    args = parser.parse_args()
+
+    # Load all models
+    models = []
+    for path in args.models:
+        models.append(load_model(path))
+
+    num_devices = len(models)
+
+    # Sanity check: print architecture
+    print(f"Loaded {num_devices} model(s).\n")
+    print("Model architecture:")
+    for i, state in enumerate(models):
+        print_architecture(state, i)
+    print()
+
+    # Extract dimensions from the first model
+    # fc1: Linear(input_dim, hidden1_size)
+    # fc2: Linear(hidden1_size, hidden2_size)
+    # fc3: Linear(hidden2_size, 1)
+    input_dim = models[0]["fc1.weight"].shape[1]
+    hidden1_size = models[0]["fc1.weight"].shape[0]
+    hidden2_size = models[0]["fc2.weight"].shape[0]
+    output_size = models[0]["fc3.weight"].shape[0]
+
+    assert output_size == 1, f"Expected output size 1 (sigmoid), got {output_size}"
+
+    print(f"Network: {input_dim} -> {hidden1_size} (ReLU) -> {hidden2_size} (ReLU) -> {output_size} (Sigmoid)")
+    print()
+
+    # Extract weights and biases for each device
+    # PyTorch stores weights as [out_features, in_features].
+    # In Rust we index as weights[input][output], so we transpose.
+    fc1_weights = []
+    fc1_biases = []
+    fc2_weights = []
+    fc2_biases = []
+    fc3_weights = []
+    fc3_biases = []
+
+    for i, state in enumerate(models):
+        # fc1: [hidden1_size, input_dim] -> [input_dim, hidden1_size]
+        w1 = state["fc1.weight"].T
+        fc1_weights.append(tensor_to_list(w1))
+        fc1_biases.append(tensor_to_list(state["fc1.bias"]))
+
+        # fc2: [hidden2_size, hidden1_size] -> [hidden1_size, hidden2_size]
+        w2 = state["fc2.weight"].T
+        fc2_weights.append(tensor_to_list(w2))
+        fc2_biases.append(tensor_to_list(state["fc2.bias"]))
+
+        # fc3: [1, hidden2_size] -> [hidden2_size] (squeeze since output is scalar)
+        w3 = state["fc3.weight"].squeeze(0)
+        fc3_weights.append(tensor_to_list(w3))
+        fc3_biases.append(state["fc3.bias"].item())
+
+    # Render template
+    template_path = Path(args.template)
+    env = Environment(
+        loader=FileSystemLoader(str(template_path.parent)),
+        keep_trailing_newline=True,
+    )
+    template = env.get_template(template_path.name)
+
+    rendered = template.render(
+        num_devices=num_devices,
+        input_dim=input_dim,
+        hidden1_size=hidden1_size,
+        hidden2_size=hidden2_size,
+        fc1_weights=fc1_weights,
+        fc1_biases=fc1_biases,
+        fc2_weights=fc2_weights,
+        fc2_biases=fc2_biases,
+        fc3_weights=fc3_weights,
+        fc3_biases=fc3_biases,
+    )
+
+    Path(args.output).write_text(rendered)
+    print(f"Generated {args.output} ({len(rendered)} bytes)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/kernel/comps/raid/src/heimdall.rs b/kernel/comps/raid/src/heimdall.rs
new file mode 100644
index 000000000..14f72a8fe
--- /dev/null
+++ b/kernel/comps/raid/src/heimdall.rs
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![cfg(not(baseline_asterinas))]
+
+use alloc::{sync::Arc, vec::Vec};
+use core::sync::atomic::{AtomicBool, Ordering};
+
+use aster_block::{
+    BlockDevice,
+    bio::BlockDeviceCompletionStats,
+};
+use ostd::{
+    Error,
+    orpc::oqueue::{OQueueError, StrongObserver},
+    sync::Mutex,
+};
+
+/// Heimdall: an asynchronous device performance monitor for RAID-1 arrays.
+///
+/// Heimdall maintains one ML model and one strong observer per member device.
+/// A dedicated background thread continuously drains completion stats from each
+/// device's OQueue.  Every `BATCH_SIZE` (16) completions, it runs an ML inference
+/// to update that device's fast/slow indicator.
+///
+/// Model architecture (per device):
+///   Linear(INPUT_DIM, 128) -> ReLU -> Linear(128, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
+///
+/// Selection policies can query `is_device_fast(idx)` to incorporate Heimdall's
+/// classification into their scheduling decisions.
+pub struct Heimdall {
+    members: Vec<Arc<dyn BlockDevice>>,
+    observers: Vec<Mutex<StrongObserver<BlockDeviceCompletionStats>>>,
+    /// Per-device fast/slow indicator. `true` means the device is predicted fast.
+    fast_indicators: Vec<AtomicBool>,
+    /// Per-device fc1 weights: [INPUT_DIM][HIDDEN1_SIZE]
+    fc1_weights: Vec<[[f32; HIDDEN1_SIZE]; INPUT_DIM]>,
+    /// Per-device fc1 biases: [HIDDEN1_SIZE]
+    fc1_biases: Vec<[f32; HIDDEN1_SIZE]>,
+    /// Per-device fc2 weights: [HIDDEN1_SIZE][HIDDEN2_SIZE]
+    fc2_weights: Vec<[[f32; HIDDEN2_SIZE]; HIDDEN1_SIZE]>,
+    /// Per-device fc2 biases: [HIDDEN2_SIZE]
+    fc2_biases: Vec<[f32; HIDDEN2_SIZE]>,
+    /// Per-device fc3 weights: [HIDDEN2_SIZE]
+    fc3_weights: Vec<[f32; HIDDEN2_SIZE]>,
+    /// Per-device fc3 biases: scalar
+    fc3_biases: Vec<f32>,
+}
+
+impl core::fmt::Debug for Heimdall {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("Heimdall")
+            .field("members", &self.members)
+            .field(
+                "observers",
+                &format_args!("[{} observers]", self.observers.len()),
+            )
+            .field("fast_indicators", &self.fast_indicators)
+            .finish()
+    }
+}
+
+use crate::heimdall_weights::{HIDDEN1_SIZE, HIDDEN2_SIZE, INPUT_DIM};
+
+/// Number of completion records to drain before running an inference.
+const BATCH_SIZE: usize = 16;
+
+/// Inference timeout in milliseconds. If this duration elapses since the last
+/// inference for a device, inference is triggered even if fewer than `BATCH_SIZE`
+/// records have been observed.
+const INFERENCE_TIMEOUT_MS: u64 = 5;
+
+impl Heimdall {
+    /// Creates a new Heimdall monitor.
+    ///
+    /// `members` — the RAID-1 member devices to monitor.
+    /// `observers` — one strong observer per member, attached to its bio completion OQueue.
+    pub fn new(
+        members: Vec<Arc<dyn BlockDevice>>,
+        observers: Vec<StrongObserver<BlockDeviceCompletionStats>>,
+    ) -> Result<Arc<Self>, Error> {
+        use crate::heimdall_weights::{
+            FC1_BIASES, FC1_WEIGHTS, FC2_BIASES, FC2_WEIGHTS, FC3_BIASES, FC3_WEIGHTS,
+        };
+
+        let num_devices = members.len();
+
+        let fast_indicators: Vec<AtomicBool> = (0..num_devices)
+            .map(|_| AtomicBool::new(true)) // optimistic: assume fast initially
+            .collect();
+
+        let observers: Vec<Mutex<StrongObserver<BlockDeviceCompletionStats>>> = observers
+            .into_iter()
+            .map(Mutex::new)
+            .collect();
+
+        let fc1_weights: Vec<_> = (0..num_devices).map(|i| *FC1_WEIGHTS[i]).collect();
+        let fc1_biases: Vec<_> = (0..num_devices).map(|i| *FC1_BIASES[i]).collect();
+        let fc2_weights: Vec<_> = (0..num_devices).map(|i| *FC2_WEIGHTS[i]).collect();
+        let fc2_biases: Vec<_> = (0..num_devices).map(|i| *FC2_BIASES[i]).collect();
+        let fc3_weights: Vec<_> = (0..num_devices).map(|i| *FC3_WEIGHTS[i]).collect();
+        let fc3_biases: Vec<_> = (0..num_devices).map(|i| FC3_BIASES[i]).collect();
+
+        log::info!(
+            "Heimdall created with {} devices",
+            fast_indicators.len()
+        );
+
+        Ok(Arc::new(Self {
+            members,
+            observers,
+            fast_indicators,
+            fc1_weights,
+            fc1_biases,
+            fc2_weights,
+            fc2_biases,
+            fc3_weights,
+            fc3_biases,
+        }))
+    }
+
+    /// Returns whether device `idx` is currently classified as fast.
+    pub fn is_device_fast(&self, idx: usize) -> bool {
+        self.fast_indicators[idx].load(Ordering::Relaxed)
+    }
+
+    /// The number of member devices being monitored.
+    pub fn num_devices(&self) -> usize {
+        self.members.len()
+    }
+
+    /// Main monitoring loop. This should be spawned on a dedicated thread.
+    ///
+    /// For each device, drains completion records from its strong observer.
+    /// Inference is triggered for a device when either condition is met first:
+    ///   1. `BATCH_SIZE` (16) records have been drained, or
+    ///   2. `INFERENCE_TIMEOUT_MS` (5 ms) have elapsed since the last inference.
+    pub fn run(&self) {
+        use ostd::timer::Jiffies;
+
+        let num_devices = self.members.len();
+        // TIMER_FREQ is 1000 Hz, so 1 jiffy = 1 ms. 5 ms = 5 jiffies.
+        let timeout_jiffies = INFERENCE_TIMEOUT_MS * ostd::arch::timer::TIMER_FREQ / 1000;
+
+        // Per-device batch buffers for accumulating stats between inferences.
+        let mut batch_buffers: Vec<Vec<BlockDeviceCompletionStats>> = (0..num_devices)
+            .map(|_| Vec::with_capacity(BATCH_SIZE))
+            .collect();
+
+        // Per-device jiffies timestamp of the last inference (or loop start).
+        let now = Jiffies::elapsed().as_u64();
+        let mut last_inference_jiffies = alloc::vec![now; num_devices];
+
+        loop {
+            for device_idx in 0..num_devices {
+                let observer = self.observers[device_idx].lock();
+
+                // Drain all available records (non-blocking).
+                loop {
+                    match observer.try_strong_observe() {
+                        Ok(Some(stats)) => {
+                            batch_buffers[device_idx].push(stats);
+
+                            // Condition 1: batch is full.
+                            // Do device inference, then break to give other devices a turn.
+                            if batch_buffers[device_idx].len() >= BATCH_SIZE {
+                                self.run_inference(device_idx, &mut batch_buffers[device_idx]);
+                                last_inference_jiffies[device_idx] = Jiffies::elapsed().as_u64();
+                                break;
+                            }
+                        }
+                        Ok(None) => break,  // queue is empty, move on to timeout check
+                        Err(OQueueError::Detached { .. }) => {
+                            log::warn!(
+                                "Heimdall: observer for device {} detached",
+                                device_idx
+                            );
+                            break;
+                        }
+                        Err(e) => {
+                            log::warn!(
+                                "Heimdall: error observing device {}: {:?}",
+                                device_idx, e
+                            );
+                            break;
+                        }
+                    }
+                }
+
+                drop(observer);
+
+                // Condition 2: timeout elapsed and there is at least some data
+                // (or even no data — we still re-evaluate so the device can
+                // transition back to fast when IO pressure drops).
+                let elapsed = Jiffies::elapsed().as_u64().wrapping_sub(last_inference_jiffies[device_idx]);
+                if elapsed >= timeout_jiffies && !batch_buffers[device_idx].is_empty() {
+                    self.run_inference(device_idx, &mut batch_buffers[device_idx]);
+                    last_inference_jiffies[device_idx] = Jiffies::elapsed().as_u64();
+                }
+            }
+
+            // Yield to avoid busy-spinning when all queues are empty.
+            ostd::task::Task::yield_now();
+        }
+    }
+
+    /// Run inference for a single device and update its fast indicator.
+    fn run_inference(
+        &self,
+        device_idx: usize,
+        batch: &mut Vec<BlockDeviceCompletionStats>,
+    ) {
+        // Model output: 1 → slow (reject IO), 0 → fast (accept IO).
+        let is_slow = self.infer_device_speed(device_idx, batch);
+        self.fast_indicators[device_idx].store(!is_slow, Ordering::Relaxed);
+        log::info!(
+            "Heimdall: labeling device {} to {} (by {} records)",
+            device_idx,
+            if is_slow { "slow" } else { "fast" },
+            batch.len()
+        );
+        batch.clear();
+    }
+
+    /// Build the 11-element input feature vector from a batch of completion stats.
+    ///
+    /// Features (matching HeimdallNet training input):
+    ///   [0]  queue_len_now      — queue_len of the most recent record
+    ///   [1]  size_now           — request_size_pages of the most recent record
+    ///   [2]  hist_que_len_t-1   — queue_len of the 2nd-most-recent record
+    ///   [3]  hist_que_len_t-2   — queue_len of the 3rd-most-recent record
+    ///   [4]  hist_que_len_t-3   — queue_len of the 4th-most-recent record
+    ///   [5]  hist_lat_t-1       — latency_us of the 2nd-most-recent record
+    ///   [6]  hist_lat_t-2       — latency_us of the 3rd-most-recent record
+    ///   [7]  hist_lat_t-3       — latency_us of the 4th-most-recent record
+    ///   [8]  hist_thpt_t-1      — request_size_pages / latency_us (2nd-most-recent)
+    ///   [9]  hist_thpt_t-2      — request_size_pages / latency_us (3rd-most-recent)
+    ///   [10] hist_thpt_t-3      — request_size_pages / latency_us (4th-most-recent)
+    fn build_features(&self, batch: &[BlockDeviceCompletionStats]) -> [f32; INPUT_DIM] {
+        let mut input = [0.0f32; INPUT_DIM];
+        let n = batch.len();
+        if n == 0 {
+            return input;
+        }
+
+        // Most recent record: "now" features.
+        let now = &batch[n - 1];
+        input[0] = now.queue_len as f32;
+        input[1] = now.request_size_pages as f32;
+
+        // Historical records: t-1 = batch[n-2], t-2 = batch[n-3], t-3 = batch[n-4].
+        // Missing history (batch too small) stays 0.0.
+        for hist in 0..3usize {
+            let idx = n.wrapping_sub(hist + 2);
+            if idx < n {
+                let rec = &batch[idx];
+                input[2 + hist] = rec.queue_len as f32;
+                input[5 + hist] = rec.latency_us as f32;
+                input[8 + hist] = if rec.latency_us > 0 {
+                    rec.request_size_pages as f32 / rec.latency_us as f32
+                } else {
+                    0.0
+                };
+            }
+        }
+
+        input
+    }
+
+    /// Run the HeimdallNet forward pass for `device_idx` on the given batch.
+    ///
+    /// Each device has its own model weights.
+    /// Architecture:
+    ///   Linear(INPUT_DIM, 128) -> ReLU -> Linear(128, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
+    ///
+    /// Returns `true` if the device is predicted fast (sigmoid output >= 0.5).
+    fn infer_device_speed(
+        &self,
+        device_idx: usize,
+        batch: &[BlockDeviceCompletionStats],
+    ) -> bool {
+        let input = self.build_features(batch);
+
+        // fc1: input (INPUT_DIM) x fc1_weights (INPUT_DIM x HIDDEN1_SIZE) + bias -> ReLU
+        let w1 = &self.fc1_weights[device_idx];
+        let b1 = &self.fc1_biases[device_idx];
+        let mut h1 = [0.0f32; HIDDEN1_SIZE];
+        for j in 0..HIDDEN1_SIZE {
+            let mut sum = b1[j];
+            for i in 0..INPUT_DIM {
+                sum += input[i] * w1[i][j];
+            }
+            h1[j] = if sum > 0.0 { sum } else { 0.0 }; // ReLU
+        }
+
+        // fc2: h1 (HIDDEN1_SIZE) x fc2_weights (HIDDEN1_SIZE x HIDDEN2_SIZE) + bias -> ReLU
+        let w2 = &self.fc2_weights[device_idx];
+        let b2 = &self.fc2_biases[device_idx];
+        let mut h2 = [0.0f32; HIDDEN2_SIZE];
+        for j in 0..HIDDEN2_SIZE {
+            let mut sum = b2[j];
+            for i in 0..HIDDEN1_SIZE {
+                sum += h1[i] * w2[i][j];
+            }
+            h2[j] = if sum > 0.0 { sum } else { 0.0 }; // ReLU
+        }
+
+        // fc3: h2 (HIDDEN2_SIZE) x fc3_weights (HIDDEN2_SIZE) + bias -> Sigmoid
+        let w3 = &self.fc3_weights[device_idx];
+        let b3 = self.fc3_biases[device_idx];
+        let mut logit = b3;
+        for j in 0..HIDDEN2_SIZE {
+            logit += h2[j] * w3[j];
+        }
+
+        // Sigmoid: 1 / (1 + exp(-x)).  Equivalent to: logit >= 0.
+        // We skip the actual sigmoid computation since we only need the
+        // threshold comparison at 0.5.
+        logit >= 0.0
+    }
+}
diff --git a/kernel/comps/raid/src/heimdall_weights.rs.j2 b/kernel/comps/raid/src/heimdall_weights.rs.j2
new file mode 100644
index 000000000..51f13aea3
--- /dev/null
+++ b/kernel/comps/raid/src/heimdall_weights.rs.j2
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MPL-2.0
+
+// Heimdall neural network weights hardcoded for {{ num_devices }} devices.
+// Each device has:
+//   - fc1: {{ input_dim }} x {{ hidden1_size }} matrix + {{ hidden1_size }} bias (ReLU)
+//   - fc2: {{ hidden1_size }} x {{ hidden2_size }} matrix + {{ hidden2_size }} bias (ReLU)
+//   - fc3: {{ hidden2_size }} x 1 matrix + 1 bias (Sigmoid)
+//
+// AUTO-GENERATED by generate_heimdall_weights.py using Jinja2.
+// Do not edit this file manually.
+
+/// Number of devices with hardcoded weights.
+pub const NUM_DEVICES: usize = {{ num_devices }};
+
+/// Input dimension.
+pub const INPUT_DIM: usize = {{ input_dim }};
+
+/// First hidden layer size.
+pub const HIDDEN1_SIZE: usize = {{ hidden1_size }};
+
+/// Second hidden layer size.
+pub const HIDDEN2_SIZE: usize = {{ hidden2_size }};
+
+{% for dev in range(num_devices) %}
+/// fc1 weights for device {{ dev }}: {{ input_dim }} inputs -> {{ hidden1_size }} neurons
+pub static FC1_WEIGHTS_{{ dev }}: [[f32; {{ hidden1_size }}]; {{ input_dim }}] = [
+{% for row in fc1_weights[dev] %}
+    [{{ row | join(', ') }}],
+{% endfor %}
+];
+
+/// fc1 bias for device {{ dev }}
+pub static FC1_BIAS_{{ dev }}: [f32; {{ hidden1_size }}] = [{{ fc1_biases[dev] | join(', ') }}];
+
+{% endfor %}
+{% for dev in range(num_devices) %}
+/// fc2 weights for device {{ dev }}: {{ hidden1_size }} -> {{ hidden2_size }} neurons
+pub static FC2_WEIGHTS_{{ dev }}: [[f32; {{ hidden2_size }}]; {{ hidden1_size }}] = [
+{% for row in fc2_weights[dev] %}
+    [{{ row | join(', ') }}],
+{% endfor %}
+];
+
+/// fc2 bias for device {{ dev }}
+pub static FC2_BIAS_{{ dev }}: [f32; {{ hidden2_size }}] = [{{ fc2_biases[dev] | join(', ') }}];
+
+{% endfor %}
+{% for dev in range(num_devices) %}
+/// fc3 weights for device {{ dev }}: {{ hidden2_size }} -> 1 output
+pub static FC3_WEIGHTS_{{ dev }}: [f32; {{ hidden2_size }}] = [{{ fc3_weights[dev] | join(', ') }}];
+
+/// fc3 bias for device {{ dev }}
+pub static FC3_BIAS_{{ dev }}: f32 = {{ fc3_biases[dev] }};
+
+{% endfor %}
+/// All fc1 weights indexed by device.
+pub static FC1_WEIGHTS: [&[[f32; {{ hidden1_size }}]; {{ input_dim }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &FC1_WEIGHTS_{{ dev }},
+{% endfor %}
+];
+
+/// All fc1 biases indexed by device.
+pub static FC1_BIASES: [&[f32; {{ hidden1_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &FC1_BIAS_{{ dev }},
+{% endfor %}
+];
+
+/// All fc2 weights indexed by device.
+pub static FC2_WEIGHTS: [&[[f32; {{ hidden2_size }}]; {{ hidden1_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &FC2_WEIGHTS_{{ dev }},
+{% endfor %}
+];
+
+/// All fc2 biases indexed by device.
+pub static FC2_BIASES: [&[f32; {{ hidden2_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &FC2_BIAS_{{ dev }},
+{% endfor %}
+];
+
+/// All fc3 weights indexed by device.
+pub static FC3_WEIGHTS: [&[f32; {{ hidden2_size }}]; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    &FC3_WEIGHTS_{{ dev }},
+{% endfor %}
+];
+
+/// All fc3 biases indexed by device.
+pub static FC3_BIASES: [f32; NUM_DEVICES] = [
+{% for dev in range(num_devices) %}
+    FC3_BIAS_{{ dev }},
+{% endfor %}
+];
diff --git a/kernel/comps/raid/src/lib.rs b/kernel/comps/raid/src/lib.rs
index 25e603dfc..dbb56ac4c 100644
--- a/kernel/comps/raid/src/lib.rs
+++ b/kernel/comps/raid/src/lib.rs
@@ -27,6 +27,10 @@ pub mod linnos_plus_weights;
 #[cfg(not(baseline_asterinas))]
 pub mod decision_tree_predictions;
 #[cfg(not(baseline_asterinas))]
+pub mod heimdall;
+#[cfg(not(baseline_asterinas))]
+pub mod heimdall_weights;
+#[cfg(not(baseline_asterinas))]
 pub mod selection_policies;
 #[cfg(not(baseline_asterinas))]
 pub mod server_traits;
diff --git a/kernel/comps/virtio/src/device/block/device.rs b/kernel/comps/virtio/src/device/block/device.rs
index d420034ce..fb9ed8721 100644
--- a/kernel/comps/virtio/src/device/block/device.rs
+++ b/kernel/comps/virtio/src/device/block/device.rs
@@ -12,7 +12,7 @@ use core::{
     fmt::Debug,
     hint::spin_loop,
     mem::size_of,
-    sync::atomic::{AtomicU64, Ordering},
+    sync::atomic::{AtomicU32, AtomicU64, Ordering},
 };
 
 use aster_block::{
@@ -158,7 +158,7 @@ impl BlockDevice {
     }
 
     /// Sets the logical index for this device, used to tag I/O completion stats.
-    pub fn set_device_index(&self, index: u64) {
+    pub fn set_device_index(&self, index: u32) {
         self.device.device_index.store(index, Ordering::Relaxed);
     }
 }
@@ -184,8 +184,9 @@ impl aster_block::BlockDevice for BlockDevice {
 
         let mut bio = bio;
         let device_index = self.device.device_index.load(Ordering::Relaxed);
-        bio.prepare_enqueue(reply_handle, device_index, self.device.num_outstanding_pages.load(Ordering::Relaxed));
+        bio.prepare_enqueue(reply_handle, device_index as u32, self.device.num_outstanding_pages.load(Ordering::Relaxed) as u32, self.device.num_outstanding_requests.load(Ordering::Relaxed) as u32);
         self.device.inc_page_counter(bio.num_pages());
+        self.device.inc_request_counter();
         // log::info!("\x1b[32mIncremented\x1b[0m Page Counter by {}, new value: {}, device_index: {}, type: {:?}", bio.num_pages(), self.device.num_outstanding_pages.load(Ordering::Relaxed), device_index, bio.type_());
         let producer = self.bio_submission_oqueue().attach_value_producer()?;
         producer.produce(bio);
@@ -199,9 +200,13 @@ impl aster_block::BlockDevice for BlockDevice {
         }
     }
 
-    fn num_outstanding_pages(&self) -> u64 {
+    fn num_outstanding_pages(&self) -> u32 {
         self.device.num_outstanding_pages.load(Ordering::Relaxed)
     }
+
+    fn num_outstanding_requests(&self) -> u32 {
+        self.device.num_outstanding_requests.load(Ordering::Relaxed)
+    }
 }
 
 #[derive(Debug)]
@@ -214,8 +219,9 @@ struct DeviceInner {
     block_responses: DmaStream,
     id_allocator: SpinLock<IdAlloc>,
     submitted_requests: SpinLock<BTreeMap<u16, SubmittedRequest>>,
-    device_index: AtomicU64,
-    num_outstanding_pages: AtomicU64
+    device_index: AtomicU32,
+    num_outstanding_pages: AtomicU32,
+    num_outstanding_requests: AtomicU32,
 }
 
 impl DeviceInner {
@@ -264,8 +270,9 @@ impl DeviceInner {
             block_responses,
             id_allocator: SpinLock::new(IdAlloc::with_capacity(Self::QUEUE_SIZE as usize)),
             submitted_requests: SpinLock::new(BTreeMap::new()),
-            device_index: AtomicU64::new(u64::MAX),
-            num_outstanding_pages: AtomicU64::new(0)
+            num_outstanding_pages: AtomicU32::new(0),
+            num_outstanding_requests: AtomicU32::new(0),
+            device_index: AtomicU32::new(u32::MAX-1),
         });
 
         let cloned_device = device.clone();
@@ -341,6 +348,7 @@ impl DeviceInner {
                 {
                     let pages = bio.get_num_pages();
                     let outstanding = self.num_outstanding_pages.fetch_sub(pages, Ordering::Relaxed);
+                    self.num_outstanding_requests.fetch_sub(1, Ordering::Relaxed);
                     // log::info!("\x1b[31mDecremented\x1b[0m Page Counter by {}, new value: {}, device_index: {}, type: {:?}", pages, outstanding, self.device_index.load(Ordering::Relaxed), req_type);
                     bio.report_statistics();
                 }
@@ -597,9 +605,13 @@ impl DeviceInner {
         }
     }
 
-    fn inc_page_counter(&self, n_pages: u64) {
+    fn inc_page_counter(&self, n_pages: u32) {
         self.num_outstanding_pages.fetch_add(n_pages, Ordering::Relaxed);
     }
+
+    fn inc_request_counter(&self) {
+        self.num_outstanding_requests.fetch_add(1, Ordering::Relaxed);
+    }
 }
 
 /// A submitted bio request for callback.
diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index 188066b36..fc570a04f 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -163,7 +163,7 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
             Ok(device) => {
                 info!("[raid] member '{}' online", name);
                 if let Some(virtio_dev) = device.downcast_ref::<VirtIoBlockDevice>() {
-                    virtio_dev.set_device_index(index as u64);
+                    virtio_dev.set_device_index((index) as u32);
                 }
                 members.push(device);
             }
@@ -180,12 +180,12 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     #[cfg(all(not(baseline_asterinas), capture_data))]
     setup_data_capture(&members, RAID_MEMBER_NAMES);
 
+    // Clone members for Heimdall before they are consumed by the selection policy / RAID init.
     #[cfg(not(baseline_asterinas))]
-    info!("[raid] creating selection policy");
+    let members_for_heimdall = members.clone();
 
-    // Round Robin Policy
-    #[cfg(all(not(baseline_asterinas), raid_selection = "roundrobin"))]
-    let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
+    #[cfg(not(baseline_asterinas))]
+    info!("[raid] creating selection policy");
 
     // Shared weak observer setup for all observer-based policies (LinnOS, LinnOS Plus, Decision Tree)
     #[cfg(all(not(baseline_asterinas), any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree")))]
@@ -220,6 +220,10 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     #[cfg(all(not(baseline_asterinas), raid_selection = "decision_tree"))]
     let selection_policy = DecisionTreePolicy::new(members.clone(), observers).unwrap();
 
+    // Round Robin Policy (explicit or default when no raid_selection is specified)
+    #[cfg(all(not(baseline_asterinas), any(raid_selection = "roundrobin", not(any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree")))))]
+    let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
+
     // Initialize and Register RAID-1 device
     #[cfg(not(baseline_asterinas))]
     let raid1device = Raid1Device::init(raid_device_name, members, selection_policy);
@@ -232,6 +236,48 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     })?;
     info!("[raid] RAID-1 device created");
 
+    // Initialize Heimdall device performance monitor
+    #[cfg(not(baseline_asterinas))]
+    {
+        use aster_virtio::device::block::server_traits::BlockIOObservable;
+        use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
+
+        let heimdall_observers: Vec<_> = members_for_heimdall
+            .iter()
+            .map(|dev| {
+                let virtio_dev = dev
+                    .downcast_ref::<VirtIoBlockDevice>()
+                    .expect("RAID member must be a VirtIoBlockDevice");
+                virtio_dev
+                    .bio_completion_oqueue()
+                    .attach_strong_observer(ObservationQuery::identity())
+                    .expect("Failed to attach strong observer for Heimdall")
+            })
+            .collect();
+
+        let heimdall = aster_raid::heimdall::Heimdall::new(
+            members_for_heimdall,
+            heimdall_observers,
+        )
+        .expect("Failed to create Heimdall monitor");
+
+        let heimdall_task = move || {
+            info!("[heimdall] Heimdall monitor thread started");
+            heimdall.run();
+        };
+
+        crate::ThreadOptions::new(heimdall_task)
+            .sched_policy(crate::sched::SchedPolicy::RealTime {
+                rt_prio: 50.try_into().unwrap(),
+                rt_policy: crate::sched::RealTimePolicy::RoundRobin {
+                    base_slice_factor: None,
+                },
+            })
+            .spawn();
+
+        info!("[heimdall] Heimdall monitor initialized and thread spawned");
+    }
+
     let worker = aster_block::get_device(raid_device_name).unwrap();
     // The registry stores `Arc<dyn BlockDevice>`. Use `downcast_ref` on the captured Arc each
     // iteration to call the RAID-specific helper without needing ownership of `Raid1Device`.

From 07190f6f42f7d36a6ab22a7a6173990923f9ebc7 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Wed, 15 Apr 2026 05:17:08 +0000
Subject: [PATCH 19/22] Changed Admission and Submission Policy's
 initialization order

---
 kernel/src/fs/mod.rs | 86 ++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 42 deletions(-)

diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index fc570a04f..52fb6e8a6 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -184,6 +184,50 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     #[cfg(not(baseline_asterinas))]
     let members_for_heimdall = members.clone();
 
+    // Initialize Heimdall device performance monitor
+    #[cfg(not(baseline_asterinas))]
+    {
+        use aster_virtio::device::block::server_traits::BlockIOObservable;
+        use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
+
+        let heimdall_observers: Vec<_> = members_for_heimdall
+            .iter()
+            .map(|dev| {
+                let virtio_dev = dev
+                    .downcast_ref::<VirtIoBlockDevice>()
+                    .expect("RAID member must be a VirtIoBlockDevice");
+                virtio_dev
+                    .bio_completion_oqueue()
+                    .attach_strong_observer(ObservationQuery::identity())
+                    .expect("Failed to attach strong observer for Heimdall")
+            })
+            .collect();
+
+        let heimdall = aster_raid::heimdall::Heimdall::new(
+            members_for_heimdall,
+            heimdall_observers,
+        )
+        .expect("Failed to create Heimdall monitor");
+
+        let heimdall_task = move || {
+            info!("[heimdall] Heimdall monitor thread started");
+            heimdall.run();
+        };
+
+        crate::ThreadOptions::new(heimdall_task)
+            .sched_policy(crate::sched::SchedPolicy::RealTime {
+                rt_prio: 50.try_into().unwrap(),
+                rt_policy: crate::sched::RealTimePolicy::RoundRobin {
+                    base_slice_factor: None,
+                },
+            })
+            .spawn();
+
+        info!("[heimdall] Heimdall monitor initialized and thread spawned");
+    }
+
+    
+
     #[cfg(not(baseline_asterinas))]
     info!("[raid] creating selection policy");
 
@@ -236,48 +280,6 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     })?;
     info!("[raid] RAID-1 device created");
 
-    // Initialize Heimdall device performance monitor
-    #[cfg(not(baseline_asterinas))]
-    {
-        use aster_virtio::device::block::server_traits::BlockIOObservable;
-        use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
-
-        let heimdall_observers: Vec<_> = members_for_heimdall
-            .iter()
-            .map(|dev| {
-                let virtio_dev = dev
-                    .downcast_ref::<VirtIoBlockDevice>()
-                    .expect("RAID member must be a VirtIoBlockDevice");
-                virtio_dev
-                    .bio_completion_oqueue()
-                    .attach_strong_observer(ObservationQuery::identity())
-                    .expect("Failed to attach strong observer for Heimdall")
-            })
-            .collect();
-
-        let heimdall = aster_raid::heimdall::Heimdall::new(
-            members_for_heimdall,
-            heimdall_observers,
-        )
-        .expect("Failed to create Heimdall monitor");
-
-        let heimdall_task = move || {
-            info!("[heimdall] Heimdall monitor thread started");
-            heimdall.run();
-        };
-
-        crate::ThreadOptions::new(heimdall_task)
-            .sched_policy(crate::sched::SchedPolicy::RealTime {
-                rt_prio: 50.try_into().unwrap(),
-                rt_policy: crate::sched::RealTimePolicy::RoundRobin {
-                    base_slice_factor: None,
-                },
-            })
-            .spawn();
-
-        info!("[heimdall] Heimdall monitor initialized and thread spawned");
-    }
-
     let worker = aster_block::get_device(raid_device_name).unwrap();
     // The registry stores `Arc<dyn BlockDevice>`. Use `downcast_ref` on the captured Arc each
     // iteration to call the RAID-specific helper without needing ownership of `Raid1Device`.

From d65fb13664a8f165d2ef6fbb5c97fa4147d1aa97 Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Wed, 15 Apr 2026 05:17:25 +0000
Subject: [PATCH 20/22] Tuned Heimdall's parameter and added extra
 functionalities.

---
 kernel/comps/raid/src/heimdall.rs | 35 ++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/kernel/comps/raid/src/heimdall.rs b/kernel/comps/raid/src/heimdall.rs
index 14f72a8fe..422c0bf72 100644
--- a/kernel/comps/raid/src/heimdall.rs
+++ b/kernel/comps/raid/src/heimdall.rs
@@ -62,12 +62,12 @@ impl core::fmt::Debug for Heimdall {
 use crate::heimdall_weights::{HIDDEN1_SIZE, HIDDEN2_SIZE, INPUT_DIM};
 
 /// Number of completion records to drain before running an inference.
-const BATCH_SIZE: usize = 16;
+const BATCH_SIZE: usize = 8;
 
 /// Inference timeout in milliseconds. If this duration elapses since the last
 /// inference for a device, inference is triggered even if fewer than `BATCH_SIZE`
 /// records have been observed.
-const INFERENCE_TIMEOUT_MS: u64 = 5;
+const INFERENCE_TIMEOUT_MS: u64 = 28;
 
 impl Heimdall {
     /// Creates a new Heimdall monitor.
@@ -123,6 +123,13 @@ impl Heimdall {
         self.fast_indicators[idx].load(Ordering::Relaxed)
     }
 
+    /// Returns the fast indicator for device `idx`.
+    ///
+    /// `true` means the device is currently predicted fast; `false` means slow.
+    pub fn check_device(&self, idx: usize) -> bool {
+        self.fast_indicators[idx].load(Ordering::Relaxed)
+    }
+
     /// The number of member devices being monitored.
     pub fn num_devices(&self) -> usize {
         self.members.len()
@@ -163,6 +170,11 @@ impl Heimdall {
                             // Condition 1: batch is full.
                             // Do device inference, then break to give other devices a turn.
                             if batch_buffers[device_idx].len() >= BATCH_SIZE {
+                                // log::info!(
+                                //     "Heimdall: triggered by batch for device {} ({} records)",
+                                //     device_idx,
+                                //     batch_buffers[device_idx].len()
+                                // );
                                 self.run_inference(device_idx, &mut batch_buffers[device_idx]);
                                 last_inference_jiffies[device_idx] = Jiffies::elapsed().as_u64();
                                 break;
@@ -193,6 +205,11 @@ impl Heimdall {
                 // transition back to fast when IO pressure drops).
                 let elapsed = Jiffies::elapsed().as_u64().wrapping_sub(last_inference_jiffies[device_idx]);
                 if elapsed >= timeout_jiffies && !batch_buffers[device_idx].is_empty() {
+                    // log::info!(
+                    //     "Heimdall: triggered by timeout for device {} ({} ms since last inference)",
+                    //     device_idx,
+                    //     elapsed * 1000 / ostd::arch::timer::TIMER_FREQ
+                    // );
                     self.run_inference(device_idx, &mut batch_buffers[device_idx]);
                     last_inference_jiffies[device_idx] = Jiffies::elapsed().as_u64();
                 }
@@ -212,12 +229,12 @@ impl Heimdall {
         // Model output: 1 → slow (reject IO), 0 → fast (accept IO).
         let is_slow = self.infer_device_speed(device_idx, batch);
         self.fast_indicators[device_idx].store(!is_slow, Ordering::Relaxed);
-        log::info!(
-            "Heimdall: labeling device {} to {} (by {} records)",
-            device_idx,
-            if is_slow { "slow" } else { "fast" },
-            batch.len()
-        );
+        // log::info!(
+        //     "Heimdall: labeling device {} to {} (by {} records)",
+        //     device_idx,
+        //     if is_slow { "slow" } else { "fast" },
+        //     batch.len()
+        // );
         batch.clear();
     }
 
@@ -317,4 +334,6 @@ impl Heimdall {
         // threshold comparison at 0.5.
         logit >= 0.0
     }
+
+
 }

From ab22d7758e3643aba56c5b669080139d14a10bbc Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Wed, 15 Apr 2026 18:09:11 +0000
Subject: [PATCH 21/22] Heimdall Round Robin Policy

---
 Cargo.toml                                    |  2 +-
 .../raid/src/generate_heimdall_weights.py     | 33 ++++-------
 kernel/comps/raid/src/heimdall.rs             | 56 ++++++-------------
 kernel/comps/raid/src/heimdall_weights.rs.j2  | 54 ++++--------------
 kernel/comps/raid/src/selection_policies.rs   | 52 +++++++++++++++++
 kernel/src/fs/mod.rs                          | 18 ++++--
 6 files changed, 105 insertions(+), 110 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index a547c2c4e..80dd2731e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,7 +54,7 @@ exclude = [
 function_casts_as_integer = "allow"
 mismatched_lifetime_syntaxes = "allow"
 missing_crate_level_docs = "warn"
-unexpected_cfgs = { level = "deny", check-cfg = ['cfg(baseline_asterinas)', 'cfg(ktest)', 'cfg(capture_data)', 'cfg(raid_selection, values("roundrobin", "linnos", "linnos_plus", "decision_tree"))'] }
+unexpected_cfgs = { level = "deny", check-cfg = ['cfg(baseline_asterinas)', 'cfg(ktest)', 'cfg(capture_data)', 'cfg(raid_selection, values("roundrobin", "linnos", "linnos_plus", "decision_tree", "heimdall"))'] }
 unpredictable-function-pointer-comparisons = "allow"
 unsafe_op_in_unsafe_fn = "deny"
 unused_parens = "allow"
diff --git a/kernel/comps/raid/src/generate_heimdall_weights.py b/kernel/comps/raid/src/generate_heimdall_weights.py
index 5098bd4ad..857c1a241 100644
--- a/kernel/comps/raid/src/generate_heimdall_weights.py
+++ b/kernel/comps/raid/src/generate_heimdall_weights.py
@@ -5,12 +5,11 @@
 Load trained PyTorch Heimdall models and generate the Rust weights file
 using the Jinja2 template.
 
-The HeimdallNet architecture has three linear layers (one model per device):
-    Linear(input_dim, 128) -> ReLU -> Linear(128, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
+The HeimdallNet architecture has two linear layers (one model per device):
+    Linear(input_dim, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
 
 PyTorch state dict keys:
-    fc1.weight [128, input_dim]   fc1.bias [128]
-    fc2.weight [16, 128]          fc2.bias [16]
+    fc1.weight [16, input_dim]    fc1.bias [16]
     fc3.weight [1, 16]            fc3.bias [1]
 
 Usage:
@@ -82,17 +81,15 @@ def main():
     print()
 
     # Extract dimensions from the first model
-    # fc1: Linear(input_dim, hidden1_size)
-    # fc2: Linear(hidden1_size, hidden2_size)
-    # fc3: Linear(hidden2_size, 1)
+    # fc1: Linear(input_dim, hidden_size)
+    # fc3: Linear(hidden_size, 1)
     input_dim = models[0]["fc1.weight"].shape[1]
-    hidden1_size = models[0]["fc1.weight"].shape[0]
-    hidden2_size = models[0]["fc2.weight"].shape[0]
+    hidden_size = models[0]["fc1.weight"].shape[0]
     output_size = models[0]["fc3.weight"].shape[0]
 
     assert output_size == 1, f"Expected output size 1 (sigmoid), got {output_size}"
 
-    print(f"Network: {input_dim} -> {hidden1_size} (ReLU) -> {hidden2_size} (ReLU) -> {output_size} (Sigmoid)")
+    print(f"Network: {input_dim} -> {hidden_size} (ReLU) -> {output_size} (Sigmoid)")
     print()
 
     # Extract weights and biases for each device
@@ -100,23 +97,16 @@ def main():
     # In Rust we index as weights[input][output], so we transpose.
     fc1_weights = []
     fc1_biases = []
-    fc2_weights = []
-    fc2_biases = []
     fc3_weights = []
     fc3_biases = []
 
     for i, state in enumerate(models):
-        # fc1: [hidden1_size, input_dim] -> [input_dim, hidden1_size]
+        # fc1: [hidden_size, input_dim] -> [input_dim, hidden_size]
         w1 = state["fc1.weight"].T
         fc1_weights.append(tensor_to_list(w1))
         fc1_biases.append(tensor_to_list(state["fc1.bias"]))
 
-        # fc2: [hidden2_size, hidden1_size] -> [hidden1_size, hidden2_size]
-        w2 = state["fc2.weight"].T
-        fc2_weights.append(tensor_to_list(w2))
-        fc2_biases.append(tensor_to_list(state["fc2.bias"]))
-
-        # fc3: [1, hidden2_size] -> [hidden2_size] (squeeze since output is scalar)
+        # fc3: [1, hidden_size] -> [hidden_size] (squeeze since output is scalar)
         w3 = state["fc3.weight"].squeeze(0)
         fc3_weights.append(tensor_to_list(w3))
         fc3_biases.append(state["fc3.bias"].item())
@@ -132,12 +122,9 @@ def main():
     rendered = template.render(
         num_devices=num_devices,
         input_dim=input_dim,
-        hidden1_size=hidden1_size,
-        hidden2_size=hidden2_size,
+        hidden_size=hidden_size,
         fc1_weights=fc1_weights,
         fc1_biases=fc1_biases,
-        fc2_weights=fc2_weights,
-        fc2_biases=fc2_biases,
         fc3_weights=fc3_weights,
         fc3_biases=fc3_biases,
     )
diff --git a/kernel/comps/raid/src/heimdall.rs b/kernel/comps/raid/src/heimdall.rs
index 422c0bf72..2096b440d 100644
--- a/kernel/comps/raid/src/heimdall.rs
+++ b/kernel/comps/raid/src/heimdall.rs
@@ -23,7 +23,7 @@ use ostd::{
 /// to update that device's fast/slow indicator.
 ///
 /// Model architecture (per device):
-///   Linear(INPUT_DIM, 128) -> ReLU -> Linear(128, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
+///   Linear(INPUT_DIM, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
 ///
 /// Selection policies can query `is_device_fast(idx)` to incorporate Heimdall's
 /// classification into their scheduling decisions.
@@ -32,16 +32,12 @@ pub struct Heimdall {
     observers: Vec<Mutex<StrongObserver<BlockDeviceCompletionStats>>>,
     /// Per-device fast/slow indicator. `true` means the device is predicted fast.
     fast_indicators: Vec<AtomicBool>,
-    /// Per-device fc1 weights: [INPUT_DIM][HIDDEN1_SIZE]
-    fc1_weights: Vec<[[f32; HIDDEN1_SIZE]; INPUT_DIM]>,
-    /// Per-device fc1 biases: [HIDDEN1_SIZE]
-    fc1_biases: Vec<[f32; HIDDEN1_SIZE]>,
-    /// Per-device fc2 weights: [HIDDEN1_SIZE][HIDDEN2_SIZE]
-    fc2_weights: Vec<[[f32; HIDDEN2_SIZE]; HIDDEN1_SIZE]>,
-    /// Per-device fc2 biases: [HIDDEN2_SIZE]
-    fc2_biases: Vec<[f32; HIDDEN2_SIZE]>,
-    /// Per-device fc3 weights: [HIDDEN2_SIZE]
-    fc3_weights: Vec<[f32; HIDDEN2_SIZE]>,
+    /// Per-device fc1 weights: [INPUT_DIM][HIDDEN_SIZE]
+    fc1_weights: Vec<[[f32; HIDDEN_SIZE]; INPUT_DIM]>,
+    /// Per-device fc1 biases: [HIDDEN_SIZE]
+    fc1_biases: Vec<[f32; HIDDEN_SIZE]>,
+    /// Per-device fc3 weights: [HIDDEN_SIZE]
+    fc3_weights: Vec<[f32; HIDDEN_SIZE]>,
     /// Per-device fc3 biases: scalar
     fc3_biases: Vec<f32>,
 }
@@ -59,10 +55,10 @@ impl core::fmt::Debug for Heimdall {
     }
 }
 
-use crate::heimdall_weights::{HIDDEN1_SIZE, HIDDEN2_SIZE, INPUT_DIM};
+use crate::heimdall_weights::{HIDDEN_SIZE, INPUT_DIM};
 
 /// Number of completion records to drain before running an inference.
-const BATCH_SIZE: usize = 8;
+const BATCH_SIZE: usize = 6;
 
 /// Inference timeout in milliseconds. If this duration elapses since the last
 /// inference for a device, inference is triggered even if fewer than `BATCH_SIZE`
@@ -79,7 +75,7 @@ impl Heimdall {
         observers: Vec<StrongObserver<BlockDeviceCompletionStats>>,
     ) -> Result<Arc<Self>, Error> {
         use crate::heimdall_weights::{
-            FC1_BIASES, FC1_WEIGHTS, FC2_BIASES, FC2_WEIGHTS, FC3_BIASES, FC3_WEIGHTS,
+            FC1_BIASES, FC1_WEIGHTS, FC3_BIASES, FC3_WEIGHTS,
         };
 
         let num_devices = members.len();
@@ -95,8 +91,6 @@ impl Heimdall {
 
         let fc1_weights: Vec<_> = (0..num_devices).map(|i| *FC1_WEIGHTS[i]).collect();
         let fc1_biases: Vec<_> = (0..num_devices).map(|i| *FC1_BIASES[i]).collect();
-        let fc2_weights: Vec<_> = (0..num_devices).map(|i| *FC2_WEIGHTS[i]).collect();
-        let fc2_biases: Vec<_> = (0..num_devices).map(|i| *FC2_BIASES[i]).collect();
         let fc3_weights: Vec<_> = (0..num_devices).map(|i| *FC3_WEIGHTS[i]).collect();
         let fc3_biases: Vec<_> = (0..num_devices).map(|i| FC3_BIASES[i]).collect();
 
@@ -111,8 +105,6 @@ impl Heimdall {
             fast_indicators,
             fc1_weights,
             fc1_biases,
-            fc2_weights,
-            fc2_biases,
             fc3_weights,
             fc3_biases,
         }))
@@ -287,9 +279,9 @@ impl Heimdall {
     ///
     /// Each device has its own model weights.
     /// Architecture:
-    ///   Linear(INPUT_DIM, 128) -> ReLU -> Linear(128, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
+    ///   Linear(INPUT_DIM, 16) -> ReLU -> Linear(16, 1) -> Sigmoid
     ///
-    /// Returns `true` if the device is predicted fast (sigmoid output >= 0.5).
+    /// Returns `true` if the model output >= 0.5 (sigmoid(logit) >= 0.5 ⟺ logit >= 0).
     fn infer_device_speed(
         &self,
         device_idx: usize,
@@ -297,11 +289,11 @@ impl Heimdall {
     ) -> bool {
         let input = self.build_features(batch);
 
-        // fc1: input (INPUT_DIM) x fc1_weights (INPUT_DIM x HIDDEN1_SIZE) + bias -> ReLU
+        // fc1: input (INPUT_DIM) x fc1_weights (INPUT_DIM x HIDDEN_SIZE) + bias -> ReLU
         let w1 = &self.fc1_weights[device_idx];
         let b1 = &self.fc1_biases[device_idx];
-        let mut h1 = [0.0f32; HIDDEN1_SIZE];
-        for j in 0..HIDDEN1_SIZE {
+        let mut h1 = [0.0f32; HIDDEN_SIZE];
+        for j in 0..HIDDEN_SIZE {
             let mut sum = b1[j];
             for i in 0..INPUT_DIM {
                 sum += input[i] * w1[i][j];
@@ -309,24 +301,12 @@ impl Heimdall {
             h1[j] = if sum > 0.0 { sum } else { 0.0 }; // ReLU
         }
 
-        // fc2: h1 (HIDDEN1_SIZE) x fc2_weights (HIDDEN1_SIZE x HIDDEN2_SIZE) + bias -> ReLU
-        let w2 = &self.fc2_weights[device_idx];
-        let b2 = &self.fc2_biases[device_idx];
-        let mut h2 = [0.0f32; HIDDEN2_SIZE];
-        for j in 0..HIDDEN2_SIZE {
-            let mut sum = b2[j];
-            for i in 0..HIDDEN1_SIZE {
-                sum += h1[i] * w2[i][j];
-            }
-            h2[j] = if sum > 0.0 { sum } else { 0.0 }; // ReLU
-        }
-
-        // fc3: h2 (HIDDEN2_SIZE) x fc3_weights (HIDDEN2_SIZE) + bias -> Sigmoid
+        // fc3: h1 (HIDDEN_SIZE) x fc3_weights (HIDDEN_SIZE) + bias -> Sigmoid
         let w3 = &self.fc3_weights[device_idx];
         let b3 = self.fc3_biases[device_idx];
         let mut logit = b3;
-        for j in 0..HIDDEN2_SIZE {
-            logit += h2[j] * w3[j];
+        for j in 0..HIDDEN_SIZE {
+            logit += h1[j] * w3[j];
         }
 
         // Sigmoid: 1 / (1 + exp(-x)).  Equivalent to: logit >= 0.
diff --git a/kernel/comps/raid/src/heimdall_weights.rs.j2 b/kernel/comps/raid/src/heimdall_weights.rs.j2
index 51f13aea3..80114bf9b 100644
--- a/kernel/comps/raid/src/heimdall_weights.rs.j2
+++ b/kernel/comps/raid/src/heimdall_weights.rs.j2
@@ -2,9 +2,8 @@
 
 // Heimdall neural network weights hardcoded for {{ num_devices }} devices.
 // Each device has:
-//   - fc1: {{ input_dim }} x {{ hidden1_size }} matrix + {{ hidden1_size }} bias (ReLU)
-//   - fc2: {{ hidden1_size }} x {{ hidden2_size }} matrix + {{ hidden2_size }} bias (ReLU)
-//   - fc3: {{ hidden2_size }} x 1 matrix + 1 bias (Sigmoid)
+//   - fc1: {{ input_dim }} x {{ hidden_size }} matrix + {{ hidden_size }} bias (ReLU)
+//   - fc3: {{ hidden_size }} x 1 matrix + 1 bias (Sigmoid)
 //
 // AUTO-GENERATED by generate_heimdall_weights.py using Jinja2.
 // Do not edit this file manually.
@@ -15,74 +14,45 @@ pub const NUM_DEVICES: usize = {{ num_devices }};
 /// Input dimension.
 pub const INPUT_DIM: usize = {{ input_dim }};
 
-/// First hidden layer size.
-pub const HIDDEN1_SIZE: usize = {{ hidden1_size }};
-
-/// Second hidden layer size.
-pub const HIDDEN2_SIZE: usize = {{ hidden2_size }};
+/// Hidden layer size.
+pub const HIDDEN_SIZE: usize = {{ hidden_size }};
 
 {% for dev in range(num_devices) %}
-/// fc1 weights for device {{ dev }}: {{ input_dim }} inputs -> {{ hidden1_size }} neurons
-pub static FC1_WEIGHTS_{{ dev }}: [[f32; {{ hidden1_size }}]; {{ input_dim }}] = [
+/// fc1 weights for device {{ dev }}: {{ input_dim }} inputs -> {{ hidden_size }} neurons
+pub static FC1_WEIGHTS_{{ dev }}: [[f32; {{ hidden_size }}]; {{ input_dim }}] = [
 {% for row in fc1_weights[dev] %}
     [{{ row | join(', ') }}],
 {% endfor %}
 ];
 
 /// fc1 bias for device {{ dev }}
-pub static FC1_BIAS_{{ dev }}: [f32; {{ hidden1_size }}] = [{{ fc1_biases[dev] | join(', ') }}];
-
-{% endfor %}
-{% for dev in range(num_devices) %}
-/// fc2 weights for device {{ dev }}: {{ hidden1_size }} -> {{ hidden2_size }} neurons
-pub static FC2_WEIGHTS_{{ dev }}: [[f32; {{ hidden2_size }}]; {{ hidden1_size }}] = [
-{% for row in fc2_weights[dev] %}
-    [{{ row | join(', ') }}],
-{% endfor %}
-];
-
-/// fc2 bias for device {{ dev }}
-pub static FC2_BIAS_{{ dev }}: [f32; {{ hidden2_size }}] = [{{ fc2_biases[dev] | join(', ') }}];
+pub static FC1_BIAS_{{ dev }}: [f32; {{ hidden_size }}] = [{{ fc1_biases[dev] | join(', ') }}];
 
 {% endfor %}
 {% for dev in range(num_devices) %}
-/// fc3 weights for device {{ dev }}: {{ hidden2_size }} -> 1 output
-pub static FC3_WEIGHTS_{{ dev }}: [f32; {{ hidden2_size }}] = [{{ fc3_weights[dev] | join(', ') }}];
+/// fc3 weights for device {{ dev }}: {{ hidden_size }} -> 1 output
+pub static FC3_WEIGHTS_{{ dev }}: [f32; {{ hidden_size }}] = [{{ fc3_weights[dev] | join(', ') }}];
 
 /// fc3 bias for device {{ dev }}
 pub static FC3_BIAS_{{ dev }}: f32 = {{ fc3_biases[dev] }};
 
 {% endfor %}
 /// All fc1 weights indexed by device.
-pub static FC1_WEIGHTS: [&[[f32; {{ hidden1_size }}]; {{ input_dim }}]; NUM_DEVICES] = [
+pub static FC1_WEIGHTS: [&[[f32; {{ hidden_size }}]; {{ input_dim }}]; NUM_DEVICES] = [
 {% for dev in range(num_devices) %}
     &FC1_WEIGHTS_{{ dev }},
 {% endfor %}
 ];
 
 /// All fc1 biases indexed by device.
-pub static FC1_BIASES: [&[f32; {{ hidden1_size }}]; NUM_DEVICES] = [
+pub static FC1_BIASES: [&[f32; {{ hidden_size }}]; NUM_DEVICES] = [
 {% for dev in range(num_devices) %}
     &FC1_BIAS_{{ dev }},
 {% endfor %}
 ];
 
-/// All fc2 weights indexed by device.
-pub static FC2_WEIGHTS: [&[[f32; {{ hidden2_size }}]; {{ hidden1_size }}]; NUM_DEVICES] = [
-{% for dev in range(num_devices) %}
-    &FC2_WEIGHTS_{{ dev }},
-{% endfor %}
-];
-
-/// All fc2 biases indexed by device.
-pub static FC2_BIASES: [&[f32; {{ hidden2_size }}]; NUM_DEVICES] = [
-{% for dev in range(num_devices) %}
-    &FC2_BIAS_{{ dev }},
-{% endfor %}
-];
-
 /// All fc3 weights indexed by device.
-pub static FC3_WEIGHTS: [&[f32; {{ hidden2_size }}]; NUM_DEVICES] = [
+pub static FC3_WEIGHTS: [&[f32; {{ hidden_size }}]; NUM_DEVICES] = [
 {% for dev in range(num_devices) %}
     &FC3_WEIGHTS_{{ dev }},
 {% endfor %}
diff --git a/kernel/comps/raid/src/selection_policies.rs b/kernel/comps/raid/src/selection_policies.rs
index 264b7a43f..f23a46722 100644
--- a/kernel/comps/raid/src/selection_policies.rs
+++ b/kernel/comps/raid/src/selection_policies.rs
@@ -15,6 +15,7 @@ use ostd::{
     sync::Mutex,
 };
 
+use crate::heimdall::Heimdall;
 use crate::server_traits::SelectionPolicy;
 
 #[derive(Debug)]
@@ -346,6 +347,57 @@ impl SelectionPolicy for DecisionTreePolicy {
     }
 }
 
+/// Heimdall-guided round-robin selection policy.
+///
+/// Uses the Heimdall asynchronous monitor to skip devices predicted slow.
+/// If all devices are slow, falls back to plain round-robin so IO is never
+/// stalled.  The Heimdall monitor must be spawned on a separate thread via
+/// `Heimdall::run()` before any IO arrives.
+#[derive(Debug)]
+#[orpc_server]
+pub struct HeimdallRoundRobinPolicy {
+    read_cursor: AtomicUsize,
+    members: Vec<Arc<dyn BlockDevice>>,
+    heimdall: Arc<Heimdall>,
+}
+
+impl HeimdallRoundRobinPolicy {
+    pub fn new(
+        members: Vec<Arc<dyn BlockDevice>>,
+        heimdall: Arc<Heimdall>,
+    ) -> Result<Arc<Self>, Error> {
+        let server = Self::new_with(|orpc_internal, _| Self {
+            orpc_internal,
+            read_cursor: AtomicUsize::new(0),
+            members,
+            heimdall,
+        });
+        Ok(server)
+    }
+}
+
+impl SelectionPolicy for HeimdallRoundRobinPolicy {
+    fn select_block_device(
+        &self,
+        _submitted: &mut SubmittedBio,
+    ) -> Result<Arc<dyn BlockDevice>, Error> {
+        let num_devices = self.members.len();
+        let start_idx = self.read_cursor.fetch_add(1, Ordering::Relaxed);
+
+        // Try each device once, starting from the round-robin cursor.
+        for offset in 0..num_devices {
+            let device_idx = (start_idx + offset) % num_devices;
+            if self.heimdall.is_device_fast(device_idx) {
+                return Ok(self.members[device_idx].clone());
+            }
+        }
+
+        // All devices are slow — fall back to round-robin.
+        let fallback_idx = start_idx % num_devices;
+        Ok(self.members[fallback_idx].clone())
+    }
+}
+
 /// LinnOSPlus: a deeper variant of the LinnOS neural-network selection policy.
 ///
 /// Architecture (per device):
diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index 52fb6e8a6..45a2e0491 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -25,7 +25,7 @@ pub mod utils;
 use aster_block::BlockDevice;
 #[cfg(not(baseline_asterinas))]
 #[expect(unused_imports)]
-use aster_raid::selection_policies::{DecisionTreePolicy, Dummy0Policy, LinnOSPolicy, LinnOSPlusPolicy, RoundRobinPolicy};
+use aster_raid::selection_policies::{DecisionTreePolicy, Dummy0Policy, HeimdallRoundRobinPolicy, LinnOSPolicy, LinnOSPlusPolicy, RoundRobinPolicy};
 use aster_raid::{Raid1Device, Raid1DeviceError};
 use aster_virtio::device::block::device::BlockDevice as VirtIoBlockDevice;
 
@@ -186,7 +186,7 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
 
     // Initialize Heimdall device performance monitor
     #[cfg(not(baseline_asterinas))]
-    {
+    let heimdall = {
         use aster_virtio::device::block::server_traits::BlockIOObservable;
         use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
 
@@ -209,9 +209,10 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
         )
         .expect("Failed to create Heimdall monitor");
 
+        let heimdall_clone = heimdall.clone();
         let heimdall_task = move || {
             info!("[heimdall] Heimdall monitor thread started");
-            heimdall.run();
+            heimdall_clone.run();
         };
 
         crate::ThreadOptions::new(heimdall_task)
@@ -223,8 +224,9 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
             })
             .spawn();
 
-        info!("[heimdall] Heimdall monitor initialized and thread spawned");
-    }
+        info!("[heimdall] is Online");
+        heimdall
+    };
 
     
 
@@ -264,8 +266,12 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     #[cfg(all(not(baseline_asterinas), raid_selection = "decision_tree"))]
     let selection_policy = DecisionTreePolicy::new(members.clone(), observers).unwrap();
 
+    // Heimdall Round Robin Policy
+    #[cfg(all(not(baseline_asterinas), raid_selection = "heimdall"))]
+    let selection_policy = HeimdallRoundRobinPolicy::new(members.clone(), heimdall).unwrap();
+
     // Round Robin Policy (explicit or default when no raid_selection is specified)
-    #[cfg(all(not(baseline_asterinas), any(raid_selection = "roundrobin", not(any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree")))))]
+    #[cfg(all(not(baseline_asterinas), any(raid_selection = "roundrobin", not(any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree", raid_selection = "heimdall")))))]
     let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
 
     // Initialize and Register RAID-1 device

From 8395cdc5892be2e63d3f7558a50b6574d64a375c Mon Sep 17 00:00:00 2001
From: Yingqi Cao <yingqi@utexas.edu>
Date: Mon, 4 May 2026 18:05:34 -0500
Subject: [PATCH 22/22] Heimdall LinnOS Plus Policy

---
 Cargo.toml                                  |   2 +-
 kernel/comps/raid/src/selection_policies.rs | 171 ++++++++++++++++++++
 kernel/src/fs/mod.rs                        |  14 +-
 3 files changed, 181 insertions(+), 6 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 80dd2731e..faa9a892a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,7 +54,7 @@ exclude = [
 function_casts_as_integer = "allow"
 mismatched_lifetime_syntaxes = "allow"
 missing_crate_level_docs = "warn"
-unexpected_cfgs = { level = "deny", check-cfg = ['cfg(baseline_asterinas)', 'cfg(ktest)', 'cfg(capture_data)', 'cfg(raid_selection, values("roundrobin", "linnos", "linnos_plus", "decision_tree", "heimdall"))'] }
+unexpected_cfgs = { level = "deny", check-cfg = ['cfg(baseline_asterinas)', 'cfg(ktest)', 'cfg(capture_data)', 'cfg(raid_selection, values("roundrobin", "linnos", "linnos_plus", "decision_tree", "heimdall", "heimdalllinnosplus"))'] }
 unpredictable-function-pointer-comparisons = "allow"
 unsafe_op_in_unsafe_fn = "deny"
 unused_parens = "allow"
diff --git a/kernel/comps/raid/src/selection_policies.rs b/kernel/comps/raid/src/selection_policies.rs
index f23a46722..869855cf0 100644
--- a/kernel/comps/raid/src/selection_policies.rs
+++ b/kernel/comps/raid/src/selection_policies.rs
@@ -562,3 +562,174 @@ impl SelectionPolicy for LinnOSPlusPolicy {
         }
     }
 }
+
+/// Heimdall + LinnOS Plus combined selection policy.
+///
+/// For each candidate device (round-robin order), first checks Heimdall's
+/// asynchronous prediction. If Heimdall says "fast", a LinnOS Plus neural
+/// network inference is performed as a second gate. Only if both predict
+/// "fast" is the device selected. If no device passes both checks, falls
+/// back to round-robin.
+#[orpc_server]
+pub struct HeimdallLinnOSPlusPolicy {
+    read_cursor: AtomicUsize,
+    members: Vec<Arc<dyn BlockDevice>>,
+    heimdall: Arc<Heimdall>,
+    observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
+    hidden1_weights: Vec<[[f32; 8]; 31]>,
+    hidden1_biases: Vec<[f32; 8]>,
+    hidden2_weights: Vec<[[f32; 8]; 8]>,
+    hidden2_biases: Vec<[f32; 8]>,
+    output_weights: Vec<[[f32; 2]; 8]>,
+    output_biases: Vec<[f32; 2]>,
+}
+
+impl core::fmt::Debug for HeimdallLinnOSPlusPolicy {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("HeimdallLinnOSPlusPolicy")
+            .field("read_cursor", &self.read_cursor)
+            .field("members", &self.members)
+            .field(
+                "observers",
+                &format_args!("[{} observers]", self.observers.len()),
+            )
+            .finish()
+    }
+}
+
+impl HeimdallLinnOSPlusPolicy {
+    pub fn new(
+        members: Vec<Arc<dyn BlockDevice>>,
+        heimdall: Arc<Heimdall>,
+        observers: Vec<Mutex<ostd::orpc::oqueue::WeakObserver<BlockDeviceCompletionStats>>>,
+    ) -> Result<Arc<Self>, Error> {
+        use crate::linnos_plus_weights::{
+            HIDDEN1_BIASES, HIDDEN1_WEIGHTS, HIDDEN2_BIASES, HIDDEN2_WEIGHTS, OUTPUT_BIASES,
+            OUTPUT_WEIGHTS,
+        };
+
+        let num_devices = members.len();
+
+        let hidden1_weights: Vec<[[f32; 8]; 31]> =
+            (0..num_devices).map(|i| *HIDDEN1_WEIGHTS[i]).collect();
+        let hidden1_biases: Vec<[f32; 8]> =
+            (0..num_devices).map(|i| *HIDDEN1_BIASES[i]).collect();
+        let hidden2_weights: Vec<[[f32; 8]; 8]> =
+            (0..num_devices).map(|i| *HIDDEN2_WEIGHTS[i]).collect();
+        let hidden2_biases: Vec<[f32; 8]> =
+            (0..num_devices).map(|i| *HIDDEN2_BIASES[i]).collect();
+        let output_weights: Vec<[[f32; 2]; 8]> =
+            (0..num_devices).map(|i| *OUTPUT_WEIGHTS[i]).collect();
+        let output_biases: Vec<[f32; 2]> =
+            (0..num_devices).map(|i| *OUTPUT_BIASES[i]).collect();
+
+        let server = Self::new_with(|orpc_internal, _| Self {
+            orpc_internal,
+            read_cursor: AtomicUsize::new(0),
+            members,
+            heimdall,
+            observers,
+            hidden1_weights,
+            hidden1_biases,
+            hidden2_weights,
+            hidden2_biases,
+            output_weights,
+            output_biases,
+        });
+
+        Ok(server)
+    }
+}
+
+impl SelectionPolicy for HeimdallLinnOSPlusPolicy {
+    fn select_block_device(&self, submitted: &mut SubmittedBio) -> Result<Arc<dyn BlockDevice>, Error> {
+        let num_devices = self.members.len();
+        let start_idx = self.read_cursor.fetch_add(1, Ordering::Relaxed);
+        let num_pages = submitted.num_pages();
+
+        // Try each device once, starting from the round-robin cursor.
+        for offset in 0..num_devices {
+            let device_idx = (start_idx + offset) % num_devices;
+
+            // First gate: Heimdall asynchronous prediction.
+            if self.heimdall.is_device_fast(device_idx) {
+                return Ok(self.members[device_idx].clone());
+            }
+
+            // Second gate: LinnOS Plus neural network inference.
+            let observer = self.observers[device_idx].lock();
+            let completion_trace = observer
+                .weak_observe_recent(4)
+                .expect("Failed to observe completion trace");
+
+            // Build the 31-element input feature vector (same as LinnOS Plus)
+            let mut input = [0.0f32; 31];
+
+            let current_outstanding = num_pages as usize + self.members[device_idx].num_outstanding_pages() as usize;
+            input[0] = ((current_outstanding / 100) % 10) as f32;
+            input[1] = ((current_outstanding / 10) % 10) as f32;
+            input[2] = (current_outstanding % 10) as f32;
+
+            for (i, trace_entry) in completion_trace.iter().enumerate().take(4) {
+                let Some(trace_entry) = trace_entry else {
+                    continue;
+                };
+                let outstanding = trace_entry.outstanding_pages as usize;
+                let latency_us = trace_entry.latency_us as usize;
+                let base = 3 + i * 7;
+
+                input[base] = ((outstanding / 100) % 10) as f32;
+                input[base + 1] = ((outstanding / 10) % 10) as f32;
+                input[base + 2] = (outstanding % 10) as f32;
+
+                input[base + 3] = ((latency_us / 1000) % 10) as f32;
+                input[base + 4] = ((latency_us / 100) % 10) as f32;
+                input[base + 5] = ((latency_us / 10) % 10) as f32;
+                input[base + 6] = (latency_us % 10) as f32;
+            }
+
+            // Hidden layer 1: input (31) x hidden1_weights (31x8) + bias (8) -> hidden1_out (8)
+            let h1_weights = &self.hidden1_weights[device_idx];
+            let h1_bias = &self.hidden1_biases[device_idx];
+            let mut hidden1_out = [0.0f32; 8];
+            for j in 0..8 {
+                let mut sum = h1_bias[j];
+                for i in 0..31 {
+                    sum += input[i] * h1_weights[i][j];
+                }
+                hidden1_out[j] = if sum > 0.0 { sum } else { 0.0 };
+            }
+
+            // Hidden layer 2: hidden1_out (8) x hidden2_weights (8x8) + bias (8) -> hidden2_out (8)
+            let h2_weights = &self.hidden2_weights[device_idx];
+            let h2_bias = &self.hidden2_biases[device_idx];
+            let mut hidden2_out = [0.0f32; 8];
+            for j in 0..8 {
+                let mut sum = h2_bias[j];
+                for i in 0..8 {
+                    sum += hidden1_out[i] * h2_weights[i][j];
+                }
+                hidden2_out[j] = if sum > 0.0 { sum } else { 0.0 };
+            }
+
+            // Output layer: hidden2_out (8) x output_weights (8x2) + bias (2) -> output (2)
+            let out_weights = &self.output_weights[device_idx];
+            let out_bias = &self.output_biases[device_idx];
+            let mut output = [out_bias[0], out_bias[1]];
+            for k in 0..2 {
+                for j in 0..8 {
+                    output[k] += hidden2_out[j] * out_weights[j][k];
+                }
+            }
+
+            // Argmax: output[0] < output[1] means fast, otherwise slow
+            if output[0] < output[1] {
+                return Ok(self.members[device_idx].clone());
+            }
+        }
+
+        // All devices failed both checks — fall back to round-robin.
+        let fallback_idx = start_idx % num_devices;
+        Ok(self.members[fallback_idx].clone())
+    }
+}
diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs
index 45a2e0491..81bc903b8 100644
--- a/kernel/src/fs/mod.rs
+++ b/kernel/src/fs/mod.rs
@@ -25,7 +25,7 @@ pub mod utils;
 use aster_block::BlockDevice;
 #[cfg(not(baseline_asterinas))]
 #[expect(unused_imports)]
-use aster_raid::selection_policies::{DecisionTreePolicy, Dummy0Policy, HeimdallRoundRobinPolicy, LinnOSPolicy, LinnOSPlusPolicy, RoundRobinPolicy};
+use aster_raid::selection_policies::{DecisionTreePolicy, Dummy0Policy, HeimdallLinnOSPlusPolicy, HeimdallRoundRobinPolicy, LinnOSPolicy, LinnOSPlusPolicy, RoundRobinPolicy};
 use aster_raid::{Raid1Device, Raid1DeviceError};
 use aster_virtio::device::block::device::BlockDevice as VirtIoBlockDevice;
 
@@ -181,11 +181,11 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     setup_data_capture(&members, RAID_MEMBER_NAMES);
 
     // Clone members for Heimdall before they are consumed by the selection policy / RAID init.
-    #[cfg(not(baseline_asterinas))]
+    #[cfg(all(not(baseline_asterinas), any(raid_selection = "heimdall", raid_selection = "heimdalllinnosplus")))]
     let members_for_heimdall = members.clone();
 
     // Initialize Heimdall device performance monitor
-    #[cfg(not(baseline_asterinas))]
+    #[cfg(all(not(baseline_asterinas), any(raid_selection = "heimdall", raid_selection = "heimdalllinnosplus")))]
     let heimdall = {
         use aster_virtio::device::block::server_traits::BlockIOObservable;
         use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
@@ -234,7 +234,7 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     info!("[raid] creating selection policy");
 
     // Shared weak observer setup for all observer-based policies (LinnOS, LinnOS Plus, Decision Tree)
-    #[cfg(all(not(baseline_asterinas), any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree")))]
+    #[cfg(all(not(baseline_asterinas), any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree", raid_selection = "heimdalllinnosplus")))]
     let observers = {
         use aster_virtio::device::block::server_traits::BlockIOObservable;
         use ostd::orpc::oqueue::{OQueueBase, ObservationQuery};
@@ -270,8 +270,12 @@ fn setup_raid1_device(raid_device_name: &str) -> Result<()> {
     #[cfg(all(not(baseline_asterinas), raid_selection = "heimdall"))]
     let selection_policy = HeimdallRoundRobinPolicy::new(members.clone(), heimdall).unwrap();
 
+    // Heimdall + LinnOS Plus Policy
+    #[cfg(all(not(baseline_asterinas), raid_selection = "heimdalllinnosplus"))]
+    let selection_policy = HeimdallLinnOSPlusPolicy::new(members.clone(), heimdall, observers).unwrap();
+
     // Round Robin Policy (explicit or default when no raid_selection is specified)
-    #[cfg(all(not(baseline_asterinas), any(raid_selection = "roundrobin", not(any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree", raid_selection = "heimdall")))))]
+    #[cfg(all(not(baseline_asterinas), any(raid_selection = "roundrobin", not(any(raid_selection = "linnos", raid_selection = "linnos_plus", raid_selection = "decision_tree", raid_selection = "heimdall", raid_selection = "heimdalllinnosplus")))))]
     let selection_policy = RoundRobinPolicy::new(members.clone()).unwrap();
 
     // Initialize and Register RAID-1 device