From 1e6173ec3e31badb94c0e87d18cba72592a5790b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 15 Apr 2026 18:08:30 +0800 Subject: [PATCH] netlink: virtualize NETLINK_ROUTE as loopback-only view Replaces the unconditional AF_NETLINK seccomp block with a userspace virtualization that lets sandboxed processes open NETLINK_ROUTE sockets and see a synthetic one-interface (`lo`) view. Other netlink protocols (AUDIT, GENERIC, etc.) remain blocked via EAFNOSUPPORT in the handler. Signed-off-by: Cong Wang --- crates/sandlock-core/src/context.rs | 37 ++- crates/sandlock-core/src/lib.rs | 1 + crates/sandlock-core/src/netlink/handlers.rs | 232 ++++++++++++++++++ crates/sandlock-core/src/netlink/mod.rs | 13 + crates/sandlock-core/src/netlink/proto.rs | 157 ++++++++++++ crates/sandlock-core/src/netlink/proxy.rs | 101 ++++++++ crates/sandlock-core/src/netlink/state.rs | 38 +++ crates/sandlock-core/src/netlink/synth.rs | 174 +++++++++++++ crates/sandlock-core/src/sandbox.rs | 1 + crates/sandlock-core/src/seccomp/ctx.rs | 2 + crates/sandlock-core/src/seccomp/dispatch.rs | 52 ++++ crates/sandlock-core/src/seccomp/notif.rs | 38 +++ crates/sandlock-core/src/sys/structs.rs | 1 - crates/sandlock-core/tests/integration.rs | 3 + .../tests/integration/test_netlink_virt.rs | 122 +++++++++ .../tests/integration/test_seccomp_enforce.rs | 35 --- 16 files changed, 950 insertions(+), 57 deletions(-) create mode 100644 crates/sandlock-core/src/netlink/handlers.rs create mode 100644 crates/sandlock-core/src/netlink/mod.rs create mode 100644 crates/sandlock-core/src/netlink/proto.rs create mode 100644 crates/sandlock-core/src/netlink/proxy.rs create mode 100644 crates/sandlock-core/src/netlink/state.rs create mode 100644 crates/sandlock-core/src/netlink/synth.rs create mode 100644 crates/sandlock-core/tests/integration/test_netlink_virt.rs diff --git a/crates/sandlock-core/src/context.rs b/crates/sandlock-core/src/context.rs index 39b826f..a89f2a4 100644 --- a/crates/sandlock-core/src/context.rs +++ b/crates/sandlock-core/src/context.rs @@ -8,7 +8,7 @@ use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}; use crate::policy::{FsIsolation, Policy}; use crate::seccomp::bpf::{self, stmt, jump}; use crate::sys::structs::{ - AF_INET, AF_INET6, AF_NETLINK, + AF_INET, AF_INET6, BPF_ABS, BPF_ALU, BPF_AND, BPF_JEQ, BPF_JSET, BPF_JMP, BPF_K, BPF_LD, BPF_RET, BPF_W, CLONE_NS_FLAGS, DEFAULT_DENY_SYSCALLS, EPERM, SECCOMP_RET_ALLOW, SECCOMP_RET_ERRNO, @@ -280,6 +280,21 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { libc::SYS_getdents64 as u32, libc::SYS_getdents as u32, ]); + + // Netlink virtualization (always on): + // socket, bind, getsockname — swap in a unix socketpair for AF_NETLINK + // recvfrom, recvmsg — zero msg_name so glibc accepts the reply + // (kernel only writes sun_family on unix + // recvmsg, leaving nl_pid uninitialized) + // close — unregister (pid, fd) so reuse doesn't + // collide with the cookie set + // Send traffic flows through the real socketpair untouched. + nrs.push(libc::SYS_socket as u32); + nrs.push(libc::SYS_bind as u32); + nrs.push(libc::SYS_getsockname as u32); + nrs.push(libc::SYS_recvfrom as u32); + nrs.push(libc::SYS_recvmsg as u32); + nrs.push(libc::SYS_close as u32); // Virtualize sched_getaffinity so nproc/sysconf agree with /proc/cpuinfo if policy.num_cpus.is_some() { nrs.push(libc::SYS_sched_getaffinity as u32); @@ -447,7 +462,6 @@ pub fn deny_syscall_numbers(policy: &Policy) -> Vec { /// - clone: block namespace creation flags /// - ioctl: block TIOCSTI, TIOCLINUX /// - prctl: block PR_SET_DUMPABLE, PR_SET_SECUREBITS, PR_SET_PTRACER -/// - socket: block all AF_NETLINK sockets (network topology enumeration) /// - socket: block SOCK_RAW/SOCK_DGRAM on AF_INET/AF_INET6 (with type mask) pub fn arg_filters(policy: &Policy) -> Vec { let ret_errno = SECCOMP_RET_ERRNO | EPERM as u32; @@ -497,22 +511,6 @@ pub fn arg_filters(policy: &Policy) -> Vec { insns.push(stmt(BPF_RET | BPF_K, ret_errno)); } - // --- socket: block all AF_NETLINK sockets --- - // Netlink sockets allow network topology enumeration (interfaces, routes, - // ARP, etc.) which leaks host network configuration. Block the entire - // AF_NETLINK family, not just NETLINK_SOCK_DIAG. - // 5 instructions: - // LD NR - // JEQ socket → +0, skip 3 - // LD arg0 (domain) - // JEQ AF_NETLINK → +0, skip 1 - // RET ERRNO - insns.push(stmt(BPF_LD | BPF_W | BPF_ABS, OFFSET_NR)); - insns.push(jump(BPF_JMP | BPF_JEQ | BPF_K, nr_socket, 0, 3)); - insns.push(stmt(BPF_LD | BPF_W | BPF_ABS, OFFSET_ARGS0_LO)); - insns.push(jump(BPF_JMP | BPF_JEQ | BPF_K, AF_NETLINK, 0, 1)); - insns.push(stmt(BPF_RET | BPF_K, ret_errno)); - // --- socket: block SOCK_RAW and/or SOCK_DGRAM on AF_INET/AF_INET6 --- let mut blocked_types: Vec = Vec::new(); if policy.no_raw_sockets { @@ -1114,9 +1112,6 @@ mod tests { // Should contain JEQ for PR_SET_DUMPABLE assert!(filters.iter().any(|f| f.code == (BPF_JMP | BPF_JEQ | BPF_K) && f.k == PR_SET_DUMPABLE)); - // Should contain JEQ for socket + AF_NETLINK (all netlink blocked) - assert!(filters.iter().any(|f| f.code == (BPF_JMP | BPF_JEQ | BPF_K) - && f.k == AF_NETLINK)); } #[test] diff --git a/crates/sandlock-core/src/lib.rs b/crates/sandlock-core/src/lib.rs index 36b7eb5..caf5654 100644 --- a/crates/sandlock-core/src/lib.rs +++ b/crates/sandlock-core/src/lib.rs @@ -14,6 +14,7 @@ pub(crate) mod random; pub(crate) mod time; pub(crate) mod cow; pub(crate) mod checkpoint; +pub mod netlink; pub(crate) mod procfs; pub(crate) mod port_remap; pub mod pipeline; diff --git a/crates/sandlock-core/src/netlink/handlers.rs b/crates/sandlock-core/src/netlink/handlers.rs new file mode 100644 index 0000000..701c107 --- /dev/null +++ b/crates/sandlock-core/src/netlink/handlers.rs @@ -0,0 +1,232 @@ +use std::os::unix::io::{FromRawFd, OwnedFd, RawFd}; +use std::sync::Arc; + +use crate::netlink::{proxy, state::NetlinkState}; +use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction, OnInjectSuccess}; +use crate::sys::structs::SeccompNotif; + +const AF_NETLINK: u64 = 16; +const NETLINK_ROUTE: u64 = 0; + +/// Resolve `notif.pid` (which is a TID per the kernel's `task_pid_vnr`) to +/// the enclosing thread group id. fds are shared across all threads of a +/// process, so cookie entries must be keyed by TGID — otherwise a cookie +/// created by thread A is invisible to thread B in the same process. +fn tgid_of(tid: i32) -> i32 { + let path = format!("/proc/{}/status", tid); + if let Ok(s) = std::fs::read_to_string(&path) { + for line in s.lines() { + if let Some(rest) = line.strip_prefix("Tgid:") { + if let Ok(v) = rest.trim().parse::() { + return v; + } + } + } + } + // Fallback: if we can't read status, treat the tid as the tgid. + tid +} + +/// Read a POD struct `T` from child memory via `process_vm_readv`, with the +/// shared `notif::read_child_mem` helper that ID-validates the notification +/// before and after the read. +fn read_struct( + notif_fd: RawFd, + id: u64, + pid: u32, + addr: usize, +) -> Option { + let bytes = read_child_mem(notif_fd, id, pid, addr as u64, std::mem::size_of::()).ok()?; + Some(unsafe { std::ptr::read_unaligned(bytes.as_ptr() as *const T) }) +} + +/// Intercept `socket(AF_NETLINK, *, NETLINK_ROUTE)` and substitute one end +/// of a `socketpair(AF_UNIX, SOCK_SEQPACKET)`. A tokio task takes the +/// supervisor-side end and speaks synthesized NETLINK_ROUTE replies. +/// Other domains pass through; other netlink protocols are denied. +pub async fn handle_socket( + notif: &SeccompNotif, + state: &Arc, +) -> NotifAction { + let domain = notif.data.args[0]; + let protocol = notif.data.args[2]; + + if domain != AF_NETLINK { + return NotifAction::Continue; + } + if protocol != NETLINK_ROUTE { + return NotifAction::Errno(libc::EAFNOSUPPORT); + } + + let mut fds = [0i32; 2]; + let rc = unsafe { + libc::socketpair( + libc::AF_UNIX, + libc::SOCK_SEQPACKET | libc::SOCK_CLOEXEC, + 0, + fds.as_mut_ptr(), + ) + }; + if rc != 0 { + return NotifAction::Errno(libc::ENOMEM); + } + // fds[0] → supervisor side (responder owns) + // fds[1] → child side (injected) + // + // The supervisor end is driven by a tokio task via AsyncFd, so it + // must be non-blocking. The child end stays blocking (glibc's + // netlink code expects blocking semantics). + let flags = unsafe { libc::fcntl(fds[0], libc::F_GETFL) }; + if flags < 0 + || unsafe { libc::fcntl(fds[0], libc::F_SETFL, flags | libc::O_NONBLOCK) } < 0 + { + unsafe { + libc::close(fds[0]); + libc::close(fds[1]); + } + return NotifAction::Errno(libc::ENOMEM); + } + let responder_fd = unsafe { OwnedFd::from_raw_fd(fds[0]) }; + let child_fd = unsafe { OwnedFd::from_raw_fd(fds[1]) }; + + // tgid, not tid: fds are process-scoped, so the cookie set must be + // keyed per-process to be visible across threads of the same app. + // The responder also uses tgid as `nlmsg_pid` in its replies so the + // value is consistent with what `handle_getsockname` writes for the + // same process (glibc compares incoming nlmsg_pid against the value + // it read back from getsockname — they must agree). + let tgid = tgid_of(notif.pid as i32); + proxy::spawn_responder(responder_fd, tgid as u32); + + // Record the (tgid, fd) once the kernel's ADDFD ioctl returns the + // child-side fd number. Doing it from the on-success callback + // (rather than guessing via inode matching afterwards) closes the + // TOCTOU gap: the entry lands in the state map *before* the child's + // syscall unblocks, and the key is the exact fd slot the kernel + // allocated — not derivable by racing the child. + let state = Arc::clone(state); + NotifAction::InjectFdSendTracked { + srcfd: child_fd, + newfd_flags: libc::O_CLOEXEC as u32, + on_success: OnInjectSuccess::new(move |child_fd_num| { + state.register(tgid, child_fd_num); + }), + } +} + +/// Zero out the `msg_name` region of a recvmsg/recvfrom before the kernel +/// runs the syscall, so that the source address glibc sees has +/// `nl_pid == 0` (the kernel only writes `sun_family` = AF_UNIX = 2 bytes +/// into a unix-socketpair recvmsg's source address; bytes 2..end remain as +/// whatever we pre-filled). +/// +/// glibc's netlink receive loop rejects messages where +/// `source_addr.nl_pid != 0` with a silent `continue`, interpreting them as +/// coming from a non-kernel peer. Without this zeroing the `nl_pid` bits +/// are uninitialized stack and the check is flaky. +pub async fn handle_netlink_recvmsg( + notif: &SeccompNotif, + state: &Arc, + notif_fd: RawFd, +) -> NotifAction { + let fd = notif.data.args[0] as i32; + let tgid = tgid_of(notif.pid as i32); + if !state.is_cookie(tgid, fd) { + return NotifAction::Continue; + } + + let nr = notif.data.nr as i64; + let sockaddr_nl_len: usize = 12; + let zeros = [0u8; 12]; + let pid = notif.pid; + let id = notif.id; + + if nr == libc::SYS_recvmsg { + // args: (fd, msghdr*, flags) + let msghdr_ptr = notif.data.args[1] as usize; + if let Some(hdr) = read_struct::(notif_fd, id, pid, msghdr_ptr) { + if !hdr.msg_name.is_null() && (hdr.msg_namelen as usize) >= sockaddr_nl_len { + let _ = write_child_mem(notif_fd, id, pid, hdr.msg_name as u64, &zeros); + } + } + } else if nr == libc::SYS_recvfrom { + // args: (fd, buf, len, flags, src_addr*, addrlen_ptr) + let src_addr = notif.data.args[4] as u64; + let addrlen_ptr = notif.data.args[5] as u64; + if src_addr != 0 && addrlen_ptr != 0 { + if let Ok(b) = read_child_mem(notif_fd, id, pid, addrlen_ptr, 4) { + let cap = u32::from_ne_bytes(b.try_into().unwrap_or([0; 4])) as usize; + if cap >= sockaddr_nl_len { + let _ = write_child_mem(notif_fd, id, pid, src_addr, &zeros); + } + } + } + } + + NotifAction::Continue +} + +pub async fn handle_bind( + notif: &SeccompNotif, + state: &Arc, +) -> NotifAction { + let fd = notif.data.args[0] as i32; + let tgid = tgid_of(notif.pid as i32); + if state.is_cookie(tgid, fd) { + return NotifAction::ReturnValue(0); + } + NotifAction::Continue +} + +/// Remove `(tgid, fd)` from the cookie set when the child closes a +/// tracked netlink socket. Lets the kernel actually close the fd too. +pub async fn handle_close( + notif: &SeccompNotif, + state: &Arc, +) -> NotifAction { + let fd = notif.data.args[0] as i32; + let tgid = tgid_of(notif.pid as i32); + if state.is_cookie(tgid, fd) { + state.unregister(tgid, fd); + } + NotifAction::Continue +} + +pub async fn handle_getsockname( + notif: &SeccompNotif, + state: &Arc, + notif_fd: RawFd, +) -> NotifAction { + let fd = notif.data.args[0] as i32; + let tgid = tgid_of(notif.pid as i32); + if !state.is_cookie(tgid, fd) { + return NotifAction::Continue; + } + + // struct sockaddr_nl { u16 nl_family; u16 _pad; u32 nl_pid; u32 nl_groups; } + // + // We use the tgid as the synthesized nl_pid so it's stable across + // threads of the same process — matching the real kernel's netlink + // auto-bind behavior which assigns one nl_pid per netlink socket. + let mut addr = [0u8; 12]; + let nl_family = libc::AF_NETLINK as u16; + addr[0..2].copy_from_slice(&nl_family.to_ne_bytes()); + addr[4..8].copy_from_slice(&(tgid as u32).to_ne_bytes()); + + let addr_ptr = notif.data.args[1] as u64; + let addrlen_ptr = notif.data.args[2] as u64; + let pid = notif.pid; + let id = notif.id; + + let cur = match read_child_mem(notif_fd, id, pid, addrlen_ptr, 4) { + Ok(b) => u32::from_ne_bytes(b.try_into().unwrap_or([0; 4])) as usize, + Err(_) => return NotifAction::Errno(libc::EFAULT), + }; + let to_write = cur.min(addr.len()); + if write_child_mem(notif_fd, id, pid, addr_ptr, &addr[..to_write]).is_err() { + return NotifAction::Errno(libc::EFAULT); + } + let actual = (addr.len() as u32).to_ne_bytes(); + let _ = write_child_mem(notif_fd, id, pid, addrlen_ptr, &actual); + NotifAction::ReturnValue(0) +} diff --git a/crates/sandlock-core/src/netlink/mod.rs b/crates/sandlock-core/src/netlink/mod.rs new file mode 100644 index 0000000..36b7e12 --- /dev/null +++ b/crates/sandlock-core/src/netlink/mod.rs @@ -0,0 +1,13 @@ +//! NETLINK_ROUTE virtualization for sandboxed processes. +//! +//! Presents a synthetic network view (one loopback interface) without +//! exposing real host netlink. See `state.rs` for the fd registry and +//! `handlers.rs` for seccomp-notify integration. + +pub mod handlers; +pub mod proto; +pub mod proxy; +pub mod state; +pub mod synth; + +pub use state::NetlinkState; diff --git a/crates/sandlock-core/src/netlink/proto.rs b/crates/sandlock-core/src/netlink/proto.rs new file mode 100644 index 0000000..2a157ae --- /dev/null +++ b/crates/sandlock-core/src/netlink/proto.rs @@ -0,0 +1,157 @@ +use std::mem::size_of; + +pub const NLMSG_ALIGN_TO: usize = 4; +pub const fn nlmsg_align(len: usize) -> usize { + (len + NLMSG_ALIGN_TO - 1) & !(NLMSG_ALIGN_TO - 1) +} + +pub const NLMSG_ERROR: u16 = 0x0002; +pub const NLMSG_DONE: u16 = 0x0003; +pub const RTM_GETLINK: u16 = 18; +pub const RTM_NEWLINK: u16 = 16; +pub const RTM_GETADDR: u16 = 22; +pub const RTM_NEWADDR: u16 = 20; + +pub const NLM_F_REQUEST: u16 = 0x001; +pub const NLM_F_MULTI: u16 = 0x002; +pub const NLM_F_DUMP: u16 = 0x300; + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct NlMsgHdr { + pub nlmsg_len: u32, + pub nlmsg_type: u16, + pub nlmsg_flags: u16, + pub nlmsg_seq: u32, + pub nlmsg_pid: u32, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct IfInfoMsg { + pub ifi_family: u8, + pub _pad: u8, + pub ifi_type: u16, + pub ifi_index: i32, + pub ifi_flags: u32, + pub ifi_change: u32, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct IfAddrMsg { + pub ifa_family: u8, + pub ifa_prefixlen: u8, + pub ifa_flags: u8, + pub ifa_scope: u8, + pub ifa_index: u32, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct RtAttr { + pub rta_len: u16, + pub rta_type: u16, +} + +pub const NLMSG_HDRLEN: usize = size_of::(); +pub const RTA_HDRLEN: usize = size_of::(); + +pub struct Writer { buf: Vec } + +impl Writer { + pub fn new() -> Self { Self { buf: Vec::new() } } + pub fn into_vec(self) -> Vec { self.buf } + + pub fn write_aligned(&mut self, bytes: &[u8]) { + self.buf.extend_from_slice(bytes); + let pad = nlmsg_align(bytes.len()) - bytes.len(); + self.buf.resize(self.buf.len() + pad, 0); + } + + pub fn write_attr(&mut self, rta_type: u16, payload: &[u8]) { + let total = RTA_HDRLEN + payload.len(); + let hdr = RtAttr { rta_len: total as u16, rta_type }; + let hdr_bytes = unsafe { + std::slice::from_raw_parts(&hdr as *const _ as *const u8, RTA_HDRLEN) + }; + self.buf.extend_from_slice(hdr_bytes); + self.buf.extend_from_slice(payload); + let pad = nlmsg_align(total) - total; + self.buf.resize(self.buf.len() + pad, 0); + } + + pub fn begin_msg(&mut self, nlmsg_type: u16, flags: u16, seq: u32, pid: u32) -> usize { + let start = self.buf.len(); + let hdr = NlMsgHdr { + nlmsg_len: 0, + nlmsg_type, nlmsg_flags: flags, nlmsg_seq: seq, nlmsg_pid: pid, + }; + let hdr_bytes = unsafe { + std::slice::from_raw_parts(&hdr as *const _ as *const u8, NLMSG_HDRLEN) + }; + self.buf.extend_from_slice(hdr_bytes); + start + } + + pub fn finish_msg(&mut self, start: usize) { + let total = self.buf.len() - start; + let len_bytes = (total as u32).to_ne_bytes(); + self.buf[start..start + 4].copy_from_slice(&len_bytes); + let pad = nlmsg_align(total) - total; + self.buf.resize(self.buf.len() + pad, 0); + } +} + +#[derive(Debug, Clone, Copy)] +pub struct ParsedRequest { + pub nlmsg_type: u16, + pub nlmsg_flags: u16, + pub nlmsg_seq: u32, + pub nlmsg_pid: u32, +} + +pub fn parse_request(buf: &[u8]) -> Option { + if buf.len() < NLMSG_HDRLEN { return None; } + let hdr: NlMsgHdr = unsafe { std::ptr::read_unaligned(buf.as_ptr() as *const _) }; + if (hdr.nlmsg_len as usize) > buf.len() { return None; } + Some(ParsedRequest { + nlmsg_type: hdr.nlmsg_type, + nlmsg_flags: hdr.nlmsg_flags, + nlmsg_seq: hdr.nlmsg_seq, + nlmsg_pid: hdr.nlmsg_pid, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn align_rounds_to_4() { + assert_eq!(nlmsg_align(0), 0); + assert_eq!(nlmsg_align(1), 4); + assert_eq!(nlmsg_align(4), 4); + assert_eq!(nlmsg_align(5), 8); + assert_eq!(nlmsg_align(16), 16); + } + + #[test] + fn writer_msg_round_trip() { + let mut w = Writer::new(); + let start = w.begin_msg(RTM_NEWLINK, NLM_F_MULTI, 42, 0); + w.write_attr(3 /* IFLA_IFNAME */, b"lo\0"); + w.finish_msg(start); + let buf = w.into_vec(); + let parsed = parse_request(&buf).unwrap(); + assert_eq!(parsed.nlmsg_type, RTM_NEWLINK); + assert_eq!(parsed.nlmsg_seq, 42); + let total = u32::from_ne_bytes(buf[0..4].try_into().unwrap()) as usize; + assert!(total >= NLMSG_HDRLEN + RTA_HDRLEN + 3); + } + + #[test] + fn parse_request_rejects_short_buffer() { + assert!(parse_request(&[0u8; 4]).is_none()); + } +} diff --git a/crates/sandlock-core/src/netlink/proxy.rs b/crates/sandlock-core/src/netlink/proxy.rs new file mode 100644 index 0000000..13142b3 --- /dev/null +++ b/crates/sandlock-core/src/netlink/proxy.rs @@ -0,0 +1,101 @@ +//! Async responder that speaks synthesized NETLINK_ROUTE over a unix +//! `SOCK_SEQPACKET` socketpair. The child process holds one end; this +//! task owns the other and runs on the supervisor's tokio runtime — +//! same runtime as the seccomp-notify dispatcher and the HTTP ACL proxy. +//! +//! Loop: +//! await readable on the supervisor-side fd +//! recv one datagram (request) +//! parse → synthesize reply datagrams → concatenate → send +//! on EOF (child closed), exit +//! +//! Task lifetime is bounded by the supervisor's tokio runtime: when the +//! sandbox shuts down and the runtime is dropped, every in-flight +//! responder task is cancelled. No OS threads and no explicit shutdown +//! handle are needed. + +use std::os::unix::io::{AsRawFd, OwnedFd}; + +use tokio::io::unix::AsyncFd; +use tokio::io::Interest; + +use crate::netlink::{proto, synth}; + +const RECV_BUF: usize = 8192; + +/// Spawn the responder task for a newly-created cookie fd. The task +/// takes ownership of `fd`; the caller must not use it further. +/// +/// `reply_pid` is the Linux pid of the sandboxed process, used as the +/// `nlmsg_pid` field in reply messages so glibc accepts them. +/// +/// Must be called from within the supervisor's tokio runtime (all +/// seccomp-notify handlers satisfy this). The supervisor-side fd must +/// be non-blocking; see `handle_socket` for the `F_SETFL` call. +pub fn spawn_responder(fd: OwnedFd, reply_pid: u32) { + tokio::spawn(async move { + if let Err(e) = responder_loop(fd, reply_pid).await { + eprintln!("sandlock netlink responder error: {e}"); + } + }); +} + +async fn responder_loop(fd: OwnedFd, reply_pid: u32) -> std::io::Result<()> { + let async_fd = AsyncFd::with_interest(fd, Interest::READABLE)?; + let mut buf = vec![0u8; RECV_BUF]; + + loop { + let mut guard = async_fd.readable().await?; + let raw = guard.get_inner().as_raw_fd(); + let n = match guard.try_io(|_| { + let ret = unsafe { + libc::recv(raw, buf.as_mut_ptr() as *mut _, buf.len(), 0) + }; + if ret < 0 { + Err(std::io::Error::last_os_error()) + } else { + Ok(ret as usize) + } + }) { + Ok(Ok(n)) => n, + Ok(Err(e)) if e.kind() == std::io::ErrorKind::WouldBlock => continue, + Ok(Err(e)) => return Err(e), + Err(_would_block) => continue, + }; + if n == 0 { + return Ok(()); + } + + let req = match proto::parse_request(&buf[..n]) { + Some(r) => r, + None => continue, + }; + + // Pack all reply messages for this request into a single + // datagram. glibc's dump reader walks the recvmsg buffer + // looking for NLMSG_DONE, so keeping everything in one send + // is simpler and matches the common kernel behavior for + // small dumps. + let reply: Vec = synth::synthesize_reply(&req, reply_pid) + .into_iter() + .flatten() + .collect(); + + // The supervisor-side fd is non-blocking, but the kernel + // socket buffer is large enough for any reply we produce + // (< 64 KB for the biggest synthetic dump we'd ever make), + // so send() never returns EAGAIN in practice. If it ever + // does, we'd need to wait for writability too. + let sent = unsafe { + libc::send( + raw, + reply.as_ptr() as *const _, + reply.len(), + libc::MSG_NOSIGNAL | libc::MSG_DONTWAIT, + ) + }; + if sent < 0 { + return Err(std::io::Error::last_os_error()); + } + } +} diff --git a/crates/sandlock-core/src/netlink/state.rs b/crates/sandlock-core/src/netlink/state.rs new file mode 100644 index 0000000..687ead5 --- /dev/null +++ b/crates/sandlock-core/src/netlink/state.rs @@ -0,0 +1,38 @@ +use std::collections::HashSet; +use std::sync::Mutex; + +/// Per-sandbox registry of virtualized netlink cookie fds. +/// +/// Keyed by `(pid, fd)` — the exact fd number allocated in the child +/// when our `socket(AF_NETLINK, ..., NETLINK_ROUTE)` handler returned +/// `InjectFdSendTracked`. Using the fd number directly (instead of +/// comparing `/proc//fd/` inodes against a set of injected +/// inodes) avoids TOCTOU: once we record `(pid, fd)`, no other thread +/// can redirect that fd slot without our `close` handler observing it +/// and removing the entry first. +#[derive(Default)] +pub struct NetlinkState { + cookies: Mutex>, +} + +impl NetlinkState { + pub fn new() -> Self { + Self { cookies: Mutex::new(HashSet::new()) } + } + + /// Register a new cookie fd injected into the child. + pub fn register(&self, pid: i32, fd: i32) { + self.cookies.lock().unwrap().insert((pid, fd)); + } + + /// Remove a cookie entry. Called from the close handler when the + /// child closes a tracked fd. + pub fn unregister(&self, pid: i32, fd: i32) { + self.cookies.lock().unwrap().remove(&(pid, fd)); + } + + /// Is this (pid, fd) one of our injected netlink cookies? + pub fn is_cookie(&self, pid: i32, fd: i32) -> bool { + self.cookies.lock().unwrap().contains(&(pid, fd)) + } +} diff --git a/crates/sandlock-core/src/netlink/synth.rs b/crates/sandlock-core/src/netlink/synth.rs new file mode 100644 index 0000000..61ac3f4 --- /dev/null +++ b/crates/sandlock-core/src/netlink/synth.rs @@ -0,0 +1,174 @@ +use super::proto::*; + +const IFI_LO_INDEX: i32 = 1; +const IFI_LO_TYPE: u16 = 772; // ARPHRD_LOOPBACK +const IFF_UP: u32 = 0x1; +const IFF_LOOPBACK: u32 = 0x8; +const IFF_RUNNING: u32 = 0x40; +const LO_FLAGS: u32 = IFF_UP | IFF_LOOPBACK | IFF_RUNNING; +const LO_MTU: u32 = 65536; + +const IFLA_ADDRESS: u16 = 1; +const IFLA_BROADCAST: u16 = 2; +const IFLA_IFNAME: u16 = 3; +const IFLA_MTU: u16 = 4; +const IFLA_TXQLEN: u16 = 13; + +const IFA_ADDRESS: u16 = 1; +const IFA_LOCAL: u16 = 2; +const IFA_LABEL: u16 = 3; + +/// Synthesize a kernel-side reply as a sequence of datagrams. Each Vec +/// in the returned list is one netlink datagram that should be delivered via +/// a single recvmsg call (netlink is datagram-oriented). +/// +/// `reply_pid` is the Linux pid of the sandboxed process (used as the +/// `nlmsg_pid` field so glibc's pid-matching check on replies accepts them). +pub fn synthesize_reply(req: &ParsedRequest, reply_pid: u32) -> Vec> { + match req.nlmsg_type { + RTM_GETLINK if req.nlmsg_flags & NLM_F_DUMP != 0 => + build_link_dump(req.nlmsg_seq, reply_pid), + RTM_GETADDR if req.nlmsg_flags & NLM_F_DUMP != 0 => + build_addr_dump(req.nlmsg_seq, reply_pid), + _ => vec![build_error(req, -libc::EOPNOTSUPP)], + } +} + +/// Encode a single nlmsghdr + payload closure into one datagram. +fn encode_one( + nlmsg_type: u16, + flags: u16, + seq: u32, + pid: u32, + body: F, +) -> Vec { + let mut w = Writer::new(); + let start = w.begin_msg(nlmsg_type, flags, seq, pid); + body(&mut w); + w.finish_msg(start); + w.into_vec() +} + +fn done_datagram(seq: u32, pid: u32) -> Vec { + encode_one(NLMSG_DONE, NLM_F_MULTI, seq, pid, |w| { + w.write_aligned(&0i32.to_ne_bytes()); + }) +} + +fn build_link_dump(seq: u32, pid: u32) -> Vec> { + let link = encode_one(RTM_NEWLINK, NLM_F_MULTI, seq, pid, |w| { + let ifi = IfInfoMsg { + ifi_family: libc::AF_UNSPEC as u8, _pad: 0, + ifi_type: IFI_LO_TYPE, ifi_index: IFI_LO_INDEX, + ifi_flags: LO_FLAGS, ifi_change: 0, + }; + let ifi_bytes = unsafe { + std::slice::from_raw_parts(&ifi as *const _ as *const u8, std::mem::size_of::()) + }; + w.write_aligned(ifi_bytes); + w.write_attr(IFLA_IFNAME, b"lo\0"); + w.write_attr(IFLA_MTU, &LO_MTU.to_ne_bytes()); + w.write_attr(IFLA_TXQLEN, &1000u32.to_ne_bytes()); + w.write_attr(IFLA_ADDRESS, &[0u8; 6]); + w.write_attr(IFLA_BROADCAST, &[0u8; 6]); + }); + vec![link, done_datagram(seq, pid)] +} + +fn build_addr_dump(seq: u32, pid: u32) -> Vec> { + let v4 = encode_one(RTM_NEWADDR, NLM_F_MULTI, seq, pid, |w| { + let ifa = IfAddrMsg { + ifa_family: libc::AF_INET as u8, ifa_prefixlen: 8, + ifa_flags: 0, ifa_scope: 254, + ifa_index: IFI_LO_INDEX as u32, + }; + let ifa_bytes = unsafe { + std::slice::from_raw_parts(&ifa as *const _ as *const u8, std::mem::size_of::()) + }; + w.write_aligned(ifa_bytes); + w.write_attr(IFA_ADDRESS, &[127, 0, 0, 1]); + w.write_attr(IFA_LOCAL, &[127, 0, 0, 1]); + w.write_attr(IFA_LABEL, b"lo\0"); + }); + let v6 = encode_one(RTM_NEWADDR, NLM_F_MULTI, seq, pid, |w| { + let ifa = IfAddrMsg { + ifa_family: libc::AF_INET6 as u8, ifa_prefixlen: 128, + ifa_flags: 0, ifa_scope: 254, + ifa_index: IFI_LO_INDEX as u32, + }; + let ifa_bytes = unsafe { + std::slice::from_raw_parts(&ifa as *const _ as *const u8, std::mem::size_of::()) + }; + w.write_aligned(ifa_bytes); + let mut v6addr = [0u8; 16]; v6addr[15] = 1; + w.write_attr(IFA_ADDRESS, &v6addr); + w.write_attr(IFA_LOCAL, &v6addr); + }); + vec![v4, v6, done_datagram(seq, pid)] +} + +fn build_error(req: &ParsedRequest, err: i32) -> Vec { + encode_one(NLMSG_ERROR, 0, req.nlmsg_seq, req.nlmsg_pid, |w| { + w.write_aligned(&err.to_ne_bytes()); + let orig = NlMsgHdr { + nlmsg_len: NLMSG_HDRLEN as u32, + nlmsg_type: req.nlmsg_type, + nlmsg_flags: req.nlmsg_flags, + nlmsg_seq: req.nlmsg_seq, + nlmsg_pid: req.nlmsg_pid, + }; + let bytes = unsafe { + std::slice::from_raw_parts(&orig as *const _ as *const u8, NLMSG_HDRLEN) + }; + w.write_aligned(bytes); + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn link_dump_is_two_datagrams_newlink_then_done() { + let req = ParsedRequest { + nlmsg_type: RTM_GETLINK, nlmsg_flags: NLM_F_REQUEST | NLM_F_DUMP, + nlmsg_seq: 1, nlmsg_pid: 0, + }; + let reply = synthesize_reply(&req, 1234); + assert_eq!(reply.len(), 2, "expected 2 datagrams (NEWLINK, DONE)"); + let t0 = u16::from_ne_bytes(reply[0][4..6].try_into().unwrap()); + assert_eq!(t0, RTM_NEWLINK); + assert!(reply[0].windows(3).any(|w| w == b"lo\0")); + let t1 = u16::from_ne_bytes(reply[1][4..6].try_into().unwrap()); + assert_eq!(t1, NLMSG_DONE); + } + + #[test] + fn addr_dump_is_three_datagrams_v4_v6_done() { + let req = ParsedRequest { + nlmsg_type: RTM_GETADDR, nlmsg_flags: NLM_F_REQUEST | NLM_F_DUMP, + nlmsg_seq: 1, nlmsg_pid: 0, + }; + let reply = synthesize_reply(&req, 1234); + assert_eq!(reply.len(), 3, "expected 3 datagrams (v4 addr, v6 addr, DONE)"); + assert!(reply[0].windows(4).any(|w| w == [127, 0, 0, 1])); + let mut v6 = [0u8; 16]; v6[15] = 1; + assert!(reply[1].windows(16).any(|w| w == v6)); + let t2 = u16::from_ne_bytes(reply[2][4..6].try_into().unwrap()); + assert_eq!(t2, NLMSG_DONE); + } + + #[test] + fn unknown_type_returns_eopnotsupp() { + let req = ParsedRequest { + nlmsg_type: 999, nlmsg_flags: NLM_F_REQUEST, + nlmsg_seq: 7, nlmsg_pid: 0, + }; + let reply = synthesize_reply(&req, 1234); + assert_eq!(reply.len(), 1); + let t = u16::from_ne_bytes(reply[0][4..6].try_into().unwrap()); + assert_eq!(t, NLMSG_ERROR); + let err = i32::from_ne_bytes(reply[0][16..20].try_into().unwrap()); + assert_eq!(err, -libc::EOPNOTSUPP); + } +} diff --git a/crates/sandlock-core/src/sandbox.rs b/crates/sandlock-core/src/sandbox.rs index 8f8e214..81dead1 100644 --- a/crates/sandlock-core/src/sandbox.rs +++ b/crates/sandlock-core/src/sandbox.rs @@ -1038,6 +1038,7 @@ impl Sandbox { time_random: Arc::clone(&time_random_state), policy_fn: Arc::clone(&policy_fn_state), chroot: Arc::clone(&chroot_state), + netlink: Arc::new(crate::netlink::NetlinkState::new()), policy: Arc::new(notif_policy), child_pidfd: child_pidfd_raw, notif_fd: notif_raw_fd, diff --git a/crates/sandlock-core/src/seccomp/ctx.rs b/crates/sandlock-core/src/seccomp/ctx.rs index d21ed77..e415ae9 100644 --- a/crates/sandlock-core/src/seccomp/ctx.rs +++ b/crates/sandlock-core/src/seccomp/ctx.rs @@ -21,6 +21,8 @@ pub struct SupervisorCtx { pub policy_fn: Arc>, /// Chroot-specific runtime state. pub chroot: Arc>, + /// NETLINK_ROUTE virtualization state. + pub netlink: Arc, /// Immutable policy — no lock needed. pub policy: Arc, /// pidfd for the child process (immutable after spawn). diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 969dfd6..558492f 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -312,6 +312,58 @@ pub fn build_dispatch_table( } } + // ------------------------------------------------------------------ + // NETLINK_ROUTE virtualization (always on). + // + // Send/recv traffic flows through a `socketpair(AF_UNIX, + // SOCK_SEQPACKET)` whose supervisor-side end is driven by a tokio + // task spawned in `handle_socket`. Only `socket`, `bind`, + // `getsockname`, `recvmsg`/`recvfrom`, and `close` need supervisor + // intercepts; send uses the kernel directly. + // + // Must register before `port_remap` so the netlink `bind` handler + // runs first and returns `Continue` for non-cookie fds. + // ------------------------------------------------------------------ + { + table.register(libc::SYS_socket, Box::new(|notif, ctx, _fd| { + let state = Arc::clone(&ctx.netlink); + Box::pin(async move { + crate::netlink::handlers::handle_socket(¬if, &state).await + }) + })); + table.register(libc::SYS_bind, Box::new(|notif, ctx, _fd| { + let state = Arc::clone(&ctx.netlink); + Box::pin(async move { + crate::netlink::handlers::handle_bind(¬if, &state).await + }) + })); + table.register(libc::SYS_getsockname, Box::new(|notif, ctx, notif_fd| { + let state = Arc::clone(&ctx.netlink); + Box::pin(async move { + crate::netlink::handlers::handle_getsockname(¬if, &state, notif_fd).await + }) + })); + // Zero the msg_name region on recv so glibc sees nl_pid=0 + // (the kernel only writes sun_family on unix socketpair recvmsg, + // leaving the rest of the buffer as stack garbage otherwise). + for &nr in &[libc::SYS_recvfrom, libc::SYS_recvmsg] { + table.register(nr, Box::new(|notif, ctx, notif_fd| { + let state = Arc::clone(&ctx.netlink); + Box::pin(async move { + crate::netlink::handlers::handle_netlink_recvmsg(¬if, &state, notif_fd).await + }) + })); + } + // Unregister on close so the (pid, fd) slot isn't left in the + // cookie set once the child reuses the fd for something else. + table.register(libc::SYS_close, Box::new(|notif, ctx, _fd| { + let state = Arc::clone(&ctx.netlink); + Box::pin(async move { + crate::netlink::handlers::handle_close(¬if, &state).await + }) + })); + } + // ------------------------------------------------------------------ // Bind — on-behalf // ------------------------------------------------------------------ diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index f211112..1d47a21 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -21,6 +21,26 @@ use crate::sys::structs::{ // NotifAction — how the supervisor should respond // ============================================================ +/// A one-shot callback invoked with the child-side fd number returned by +/// `SECCOMP_IOCTL_NOTIF_ADDFD` after a successful `InjectFdSendTracked`. +/// Wraps a boxed closure with a manual `Debug` impl so that `NotifAction` +/// can keep deriving `Debug`. The closure is both `Send` and `Sync` so +/// that `&NotifAction` remains `Send` (required because `NotifAction` is +/// borrowed across `.await` points in the notifier loop). +pub struct OnInjectSuccess(pub Box); + +impl std::fmt::Debug for OnInjectSuccess { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("OnInjectSuccess()") + } +} + +impl OnInjectSuccess { + pub fn new(f: F) -> Self { + Self(Box::new(f)) + } +} + /// How the supervisor should respond to a notification. #[derive(Debug)] pub enum NotifAction { @@ -35,6 +55,15 @@ pub enum NotifAction { /// The `OwnedFd` is closed automatically after the ioctl completes. /// `newfd_flags` controls flags on the injected fd (e.g. O_CLOEXEC). InjectFdSend { srcfd: OwnedFd, newfd_flags: u32 }, + /// Like `InjectFdSend`, but also invokes `on_success` with the + /// child-side fd number that `SECCOMP_IOCTL_NOTIF_ADDFD` returned. + /// Used when the caller needs to track the exact fd number allocated + /// in the child (e.g. to key per-fd state without TOCTOU). + InjectFdSendTracked { + srcfd: OwnedFd, + newfd_flags: u32, + on_success: OnInjectSuccess, + }, /// Synthetic return value (the child sees this as the syscall result). ReturnValue(i64), /// Don't respond — used for checkpoint/freeze. @@ -398,6 +427,15 @@ fn send_response(fd: RawFd, id: u64, action: NotifAction) -> io::Result<()> { Err(_) => respond_continue(fd, id), } } + NotifAction::InjectFdSendTracked { srcfd, newfd_flags, on_success } => { + match inject_fd_and_send(fd, id, srcfd.as_raw_fd(), newfd_flags) { + Ok(new_fd) => { + (on_success.0)(new_fd); + Ok(()) + } + Err(_) => respond_continue(fd, id), + } + } NotifAction::ReturnValue(val) => respond_value(fd, id, val), NotifAction::Hold => Ok(()), // Don't send a response. NotifAction::Kill { sig, pgid } => { diff --git a/crates/sandlock-core/src/sys/structs.rs b/crates/sandlock-core/src/sys/structs.rs index 35e9746..4fd89d0 100644 --- a/crates/sandlock-core/src/sys/structs.rs +++ b/crates/sandlock-core/src/sys/structs.rs @@ -240,7 +240,6 @@ pub const PR_SET_PTRACER: u32 = 0x5961_6d61; pub const AF_INET: u32 = 2; pub const AF_INET6: u32 = 10; -pub const AF_NETLINK: u32 = 16; pub const SOCK_RAW: u32 = 3; pub const SOCK_DGRAM: u32 = 2; pub const SOCK_TYPE_MASK: u32 = 0xFF; diff --git a/crates/sandlock-core/tests/integration.rs b/crates/sandlock-core/tests/integration.rs index f531e9e..40829cf 100644 --- a/crates/sandlock-core/tests/integration.rs +++ b/crates/sandlock-core/tests/integration.rs @@ -34,6 +34,9 @@ mod test_pipeline; #[path = "integration/test_network.rs"] mod test_network; +#[path = "integration/test_netlink_virt.rs"] +mod test_netlink_virt; + #[path = "integration/test_policy_fn.rs"] mod test_policy_fn; diff --git a/crates/sandlock-core/tests/integration/test_netlink_virt.rs b/crates/sandlock-core/tests/integration/test_netlink_virt.rs new file mode 100644 index 0000000..a162780 --- /dev/null +++ b/crates/sandlock-core/tests/integration/test_netlink_virt.rs @@ -0,0 +1,122 @@ +use std::path::PathBuf; +use sandlock_core::{Policy, Sandbox}; + +fn base_policy() -> sandlock_core::PolicyBuilder { + Policy::builder() + .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/bin").fs_read("/etc").fs_read("/proc") + .fs_read("/dev").fs_write("/tmp") +} + +fn temp_out(name: &str) -> PathBuf { + std::env::temp_dir().join(format!( + "sandlock-test-nlvirt-{}-{}", name, std::process::id() + )) +} + +#[tokio::test] +async fn if_nameindex_returns_only_lo() { + let out = temp_out("if-nameindex"); + let script = format!(concat!( + "import socket\n", + "ifs = socket.if_nameindex()\n", + "open('{out}', 'w').write(repr(ifs))\n", + ), out = out.display()); + + let policy = base_policy().build().unwrap(); + let result = Sandbox::run_interactive(&policy, &["python3", "-c", &script]) + .await.unwrap(); + + let contents = std::fs::read_to_string(&out).unwrap_or_default(); + let _ = std::fs::remove_file(&out); + assert!( + contents.contains("'lo'") && !contents.contains("'eth"), + "expected only lo, got: {}", contents + ); + assert!(result.success()); +} + +#[tokio::test] +async fn loopback_bind_succeeds() { + let out = temp_out("loopback-bind"); + let script = format!(concat!( + "import socket\n", + "s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n", + "try:\n", + " s.bind(('127.0.0.1', 0))\n", + " result = 'OK'\n", + "except OSError as e:\n", + " result = f'FAIL:{{e}}'\n", + "finally:\n", + " s.close()\n", + "open('{out}', 'w').write(result)\n", + ), out = out.display()); + + // port 0 in Landlock net rules means "allow any port" + let policy = base_policy().net_bind_port(0).build().unwrap(); + let result = Sandbox::run_interactive(&policy, &["python3", "-c", &script]) + .await.unwrap(); + + let contents = std::fs::read_to_string(&out).unwrap_or_default(); + let _ = std::fs::remove_file(&out); + assert_eq!(contents.trim(), "OK", "loopback bind failed: {}", contents); + assert!(result.success()); +} + +/// Exercises `RTM_GETADDR` via glibc's `__check_pf`. With `AI_ADDRCONFIG`, +/// glibc opens a NETLINK_ROUTE socket and dumps addresses to decide which +/// families (v4/v6) the host supports. Our synthesized dump advertises +/// both 127.0.0.1 and ::1, so getaddrinfo must return entries for both +/// families for `localhost`. +#[tokio::test] +async fn getaddrinfo_ai_addrconfig_returns_v4_and_v6() { + let out = temp_out("getaddrinfo"); + let script = format!(concat!( + "import socket\n", + "fams = sorted({{i[0].name for i in socket.getaddrinfo(", + "'localhost', 443, type=socket.SOCK_STREAM, flags=socket.AI_ADDRCONFIG)}})\n", + "open('{out}', 'w').write(','.join(fams))\n", + ), out = out.display()); + + let policy = base_policy().build().unwrap(); + let result = Sandbox::run_interactive(&policy, &["python3", "-c", &script]) + .await.unwrap(); + + let contents = std::fs::read_to_string(&out).unwrap_or_default(); + let _ = std::fs::remove_file(&out); + assert_eq!( + contents.trim(), + "AF_INET,AF_INET6", + "AI_ADDRCONFIG should return both families for localhost, got: {}", + contents + ); + assert!(result.success()); +} + +#[tokio::test] +async fn non_route_netlink_still_blocked() { + let out = temp_out("netlink-audit-blocked"); + let script = format!(concat!( + "import socket\n", + "NETLINK_AUDIT = 9\n", + "try:\n", + " s = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, NETLINK_AUDIT)\n", + " s.close()\n", + " result = 'ALLOWED'\n", + "except OSError as e:\n", + " result = f'BLOCKED:{{e.errno}}'\n", + "open('{out}', 'w').write(result)\n", + ), out = out.display()); + + let policy = base_policy().build().unwrap(); + let result = Sandbox::run_interactive(&policy, &["python3", "-c", &script]) + .await.unwrap(); + + let contents = std::fs::read_to_string(&out).unwrap_or_default(); + let _ = std::fs::remove_file(&out); + assert!( + contents.starts_with("BLOCKED:"), + "NETLINK_AUDIT should be blocked, got: {}", contents + ); + assert!(result.success()); +} diff --git a/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs b/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs index d516082..30f970c 100644 --- a/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs +++ b/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs @@ -255,41 +255,6 @@ async fn test_udp_allowed_by_default() { assert!(result.success()); } -// ------------------------------------------------------------------ -// 7. All AF_NETLINK sockets blocked (network topology leak) -// ------------------------------------------------------------------ -#[tokio::test] -async fn test_netlink_socket_blocked() { - let out = temp_out("netlink-blocked"); - let script = format!(concat!( - "import socket\n", - "try:\n", - " s = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, 0)\n", - " s.close()\n", - " result = 'ALLOWED'\n", - "except PermissionError:\n", - " result = 'BLOCKED'\n", - "except OSError as e:\n", - " result = f'ERROR:{{e.errno}}'\n", - "open('{out}', 'w').write(result)\n", - ), out = out.display()); - - let policy = base_policy().build().unwrap(); - let result = Sandbox::run_interactive(&policy, &["python3", "-c", &script]) - .await - .unwrap(); - - let contents = std::fs::read_to_string(&out).unwrap_or_default(); - let _ = std::fs::remove_file(&out); - assert_eq!( - contents.trim(), - "BLOCKED", - "AF_NETLINK socket should be blocked, got: {}", - contents.trim() - ); - assert!(result.success()); -} - // ------------------------------------------------------------------ // 8. TCP always allowed even with no_raw_sockets + no_udp // ------------------------------------------------------------------