diff --git a/Cargo.lock b/Cargo.lock index 0c1c903e..bcdf7dca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2379,6 +2379,17 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "dstack-port-forward" +version = "0.5.6" +dependencies = [ + "anyhow", + "nix 0.29.0", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "dstack-sdk" version = "0.1.2" @@ -2516,6 +2527,7 @@ dependencies = [ "clap", "dirs", "dstack-kms-rpc", + "dstack-port-forward", "dstack-types", "dstack-vmm-rpc", "fatfs", @@ -4551,6 +4563,7 @@ dependencies = [ "cfg-if", "cfg_aliases", "libc", + "memoffset", ] [[package]] @@ -7553,6 +7566,7 @@ dependencies = [ "bytes", "futures-core", "futures-sink", + "futures-util", "pin-project-lite", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 218c01a7..6c3c5f0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ members = [ "verifier", "no_std_check", "size-parser", + "port-forward", ] resolver = "2" @@ -64,6 +65,7 @@ dstack-gateway-rpc = { path = "gateway/rpc" } dstack-kms-rpc = { path = "kms/rpc" } dstack-guest-agent-rpc = { path = "guest-agent/rpc" } dstack-vmm-rpc = { path = "vmm/rpc" } +dstack-port-forward = { path = "port-forward" } cc-eventlog = { path = "cc-eventlog" } supervisor = { path = "supervisor" } supervisor-client = { path = "supervisor/client" } diff --git a/docs/bridge-networking.md b/docs/bridge-networking.md new file mode 100644 index 00000000..4dac27bd --- /dev/null +++ b/docs/bridge-networking.md @@ -0,0 +1,217 @@ +# Bridge Networking for VMM + +By default, dstack-vmm uses **user** networking (QEMU's built-in SLIRP stack, no host setup required). Bridge networking is an alternative that provides better performance for high-connection workloads by using kernel-level bridging with TAP devices. + +## When to use bridge networking + +- High connection concurrency (passt becomes CPU-bound at ~25K+ concurrent connections) +- Workloads that need full L2 network access +- Environments where VMs need to be directly reachable on the LAN + +## Configuration + +### VMM global config (`vmm.toml`) + +```toml +[cvm.networking] +mode = "bridge" +bridge = "virbr0" +``` + +### Per-VM override + +Individual VMs can override the global networking mode via: +- **CLI**: `vmm-cli.py deploy --net bridge` or `--net passt` +- **Web UI**: Networking dropdown in the deploy dialog +- **API**: `networking: { mode: "bridge" }` in `VmConfiguration` + +Only the mode is per-VM; the bridge interface name always comes from the global config. + +## Host setup + +### Option A: Using libvirt default network + +libvirt's default network provides a bridge (`virbr0`) with DHCP (dnsmasq) and NAT out of the box. + +```bash +# Install libvirt (if not already present) +sudo apt install -y libvirt-daemon-system + +# Ensure default network is active +sudo virsh net-start default 2>/dev/null +sudo virsh net-autostart default +``` + +Verify: +```bash +ip addr show virbr0 +# Should show 192.168.122.1/24 + +virsh net-dhcp-leases default +# Lists DHCP leases for connected VMs +``` + +### Option B: Manual bridge without libvirt + +Create a bridge with systemd-networkd and run a standalone DHCP server. + +**1. Create the bridge:** + +```bash +# /etc/systemd/network/10-dstack-br.netdev +[NetDev] +Name=dstack-br0 +Kind=bridge + +# /etc/systemd/network/11-dstack-br.network +[Match] +Name=dstack-br0 + +[Network] +Address=10.0.100.1/24 +ConfigureWithoutCarrier=yes +IPMasquerade=both +``` + +```bash +sudo systemctl restart systemd-networkd +``` + +**2. Enable IP forwarding:** + +```bash +echo "net.ipv4.ip_forward=1" | sudo tee /etc/sysctl.d/99-dstack-bridge.conf +sudo sysctl -p /etc/sysctl.d/99-dstack-bridge.conf +``` + +**3. Run a DHCP server (dnsmasq):** + +```bash +sudo apt install -y dnsmasq +``` + +Install the DHCP notification script (notifies VMM when a VM gets an IP so port forwarding can be established): + +```bash +sudo cp scripts/dhcp-notify.sh /usr/local/bin/dhcp-notify.sh +sudo chmod +x /usr/local/bin/dhcp-notify.sh +``` + +Create dnsmasq config: + +```ini +# /etc/dnsmasq.d/dstack-br0.conf +interface=dstack-br0 +bind-interfaces +dhcp-range=10.0.100.10,10.0.100.254,255.255.255.0,12h +dhcp-option=option:router,10.0.100.1 +dhcp-option=option:dns-server,8.8.8.8,1.1.1.1 +dhcp-script=/usr/local/bin/dhcp-notify.sh +``` + +The `dhcp-script` option tells dnsmasq to call the notification script on every lease event. The script sends the MAC and IP to VMM's `ReportDhcpLease` RPC, which triggers automatic port forwarding for the VM. + +```bash +sudo systemctl restart dnsmasq +``` + +**4. Firewall rules (nftables):** + +When the host firewall has a restrictive INPUT policy (e.g. `drop`), the bridge's DHCP and DNS traffic will be silently blocked. libvirt handles this automatically for virbr0, but a standalone bridge needs explicit rules. + +```bash +BRIDGE=dstack-br0 +SUBNET=10.0.100.0/24 + +# Allow DHCP and DNS from VMs (INPUT/OUTPUT) +sudo nft add rule ip filter INPUT iifname "$BRIDGE" udp dport 67 counter accept +sudo nft add rule ip filter INPUT iifname "$BRIDGE" udp dport 53 counter accept +sudo nft add rule ip filter INPUT iifname "$BRIDGE" tcp dport 53 counter accept +sudo nft add rule ip filter OUTPUT oifname "$BRIDGE" udp dport 68 counter accept +sudo nft add rule ip filter OUTPUT oifname "$BRIDGE" udp dport 53 counter accept + +# Allow forwarding for VM traffic +sudo nft add rule ip filter FORWARD ip saddr "$SUBNET" iifname "$BRIDGE" counter accept +sudo nft add rule ip filter FORWARD ip daddr "$SUBNET" oifname "$BRIDGE" ct state related,established counter accept +sudo nft add rule ip filter FORWARD iifname "$BRIDGE" oifname "$BRIDGE" counter accept + +# NAT masquerade for outbound traffic +sudo nft add rule ip nat POSTROUTING ip saddr "$SUBNET" ip daddr 224.0.0.0/24 counter return +sudo nft add rule ip nat POSTROUTING ip saddr "$SUBNET" ip daddr 255.255.255.255 counter return +sudo nft add rule ip nat POSTROUTING ip saddr "$SUBNET" ip daddr != "$SUBNET" counter masquerade +``` + +If the host uses libvirt, nftables rules may be in custom chains (`LIBVIRT_INP`, `LIBVIRT_FWO`, etc.) instead of the default `INPUT`/`FORWARD` chains. Adjust the chain names accordingly. + +To make these rules persistent across reboots, save them with `nft list ruleset > /etc/nftables.conf` or add them to a systemd service. + +**5. Update vmm.toml:** + +```toml +[cvm.networking] +mode = "bridge" +bridge = "dstack-br0" +``` + +### QEMU bridge helper setup (required for both options) + +The bridge helper allows QEMU to create and attach TAP devices without VMM needing root privileges. + +```bash +# Allow QEMU to use the bridge +sudo mkdir -p /etc/qemu +echo "allow virbr0" | sudo tee /etc/qemu/bridge.conf +# Or for manual bridge: echo "allow dstack-br0" | sudo tee /etc/qemu/bridge.conf + +# Set setuid on bridge helper +sudo chmod u+s /usr/lib/qemu/qemu-bridge-helper +``` + +## How it works + +- VMM passes `-netdev bridge,id=net0,br=` to QEMU +- QEMU's bridge helper (setuid) creates a TAP device and attaches it to the bridge +- Guest MAC address is derived from SHA256 of the VM ID, with an optional configurable prefix (stable across restarts for DHCP IP consistency) +- The host DHCP server (dnsmasq) assigns an IP and calls `dhcp-notify.sh`, which notifies VMM via the `ReportDhcpLease` RPC +- VMM matches the MAC address to identify the VM and establishes port forwarding rules +- When QEMU exits, the TAP device is automatically destroyed +- VMM does not need root or `CAP_NET_ADMIN` + +### MAC address prefix + +You can configure a fixed MAC address prefix (0–3 bytes) in vmm.toml: + +```toml +[cvm.networking] +mode = "bridge" +bridge = "dstack-br0" +mac_prefix = "52:54:00" +``` + +The remaining bytes are derived from the VM ID hash. The prefix applies to all networking modes, not just bridge. The locally-administered bit is always set on the first byte. + +## Operational notes + +### Do not restart the bridge while VMs are running + +`virsh net-destroy`/`net-start` (or removing/recreating the bridge) will detach all TAP interfaces from the bridge, breaking VM networking. If this happens, affected VMs must be restarted. + +### Firewall considerations + +- libvirt automatically injects nftables rules for INPUT (DHCP/DNS), FORWARD, and NAT masquerade into its own chains (`LIBVIRT_INP`, `LIBVIRT_FWO`, `LIBVIRT_FWI`, `LIBVIRT_PRT`) +- A standalone bridge requires **all** of these rules to be added manually (see Option B step 4 above). The most common failure mode is a restrictive INPUT policy silently dropping DHCP requests from VMs — if VMs on a custom bridge don't get an IP, check `sudo nft list chain ip filter INPUT` first +- Docker's nftables chains (`DOCKER-FORWARD`) run before libvirt's but do not block virbr0 traffic +- Use `setup-bridge.sh check --bridge ` to diagnose missing rules + +### Mixing networking modes + +Bridge and passt VMs can coexist. Set the global default in `vmm.toml` and override per-VM as needed: + +```bash +# Global default is bridge, but deploy this VM with passt +vmm-cli.py deploy --name my-vm --image dstack-0.5.6 --compose app.yaml --net passt +``` + +### vhost-net and TDX + +vhost-net (kernel data plane offload for virtio-net) is **not enabled** for bridge mode. TDX encrypts guest memory, which prevents the host kernel from performing DMA-based packet offload. The default QEMU userspace virtio backend is used instead. diff --git a/gateway/dstack-app/deploy-to-vmm.sh b/gateway/dstack-app/deploy-to-vmm.sh index 47da3fcf..a44bc66e 100755 --- a/gateway/dstack-app/deploy-to-vmm.sh +++ b/gateway/dstack-app/deploy-to-vmm.sh @@ -59,6 +59,9 @@ PUBLIC_IP=$(curl -s4 ifconfig.me) # Whether to use ACME staging (yes/no) ACME_STAGING=no +# Networking mode: bridge or user (default: user) +# NET_MODE=bridge + # Subnet index. 0~15 SUBNET_INDEX=0 @@ -219,17 +222,27 @@ fi echo "Deploying dstack-gateway to dstack-vmm..." -$CLI deploy \ - --name dstack-gateway \ - --app-id "$GATEWAY_APP_ID" \ - --compose .app-compose.json \ - --env-file .app_env \ - --image $OS_IMAGE \ - --port tcp:$GATEWAY_RPC_ADDR:8000 \ - --port tcp:$GATEWAY_ADMIN_RPC_ADDR:8001 \ - --port tcp:$GATEWAY_SERVING_ADDR:443 \ - --port tcp:$GUEST_AGENT_ADDR:8090 \ - --port udp:$WG_ADDR:51820 \ - --vcpu 32 \ - --memory 32G \ +DEPLOY_ARGS=( + --name dstack-gateway + --app-id "$GATEWAY_APP_ID" + --compose .app-compose.json + --env-file .app_env + --image "$OS_IMAGE" + --vcpu 32 + --memory 32G +) + +if [ "${NET_MODE:-}" = "bridge" ]; then + DEPLOY_ARGS+=(--net bridge) +else + DEPLOY_ARGS+=( + --port "tcp:$GATEWAY_RPC_ADDR:8000" + --port "tcp:$GATEWAY_ADMIN_RPC_ADDR:8001" + --port "tcp:$GATEWAY_SERVING_ADDR:443" + --port "tcp:$GUEST_AGENT_ADDR:8090" + --port "udp:$WG_ADDR:51820" + ) +fi + +$CLI deploy "${DEPLOY_ARGS[@]}" diff --git a/guest-agent/src/guest_api_service.rs b/guest-agent/src/guest_api_service.rs index 6af35136..8306a104 100644 --- a/guest-agent/src/guest_api_service.rs +++ b/guest-agent/src/guest_api_service.rs @@ -145,8 +145,11 @@ fn get_interfaces() -> Vec { sysinfo::Networks::new_with_refreshed_list() .into_iter() .filter_map(|(interface_name, network)| { - if !(interface_name == "dstack-wg0" || interface_name.starts_with("enp")) { - // We only get dstack-wg0 and enp interfaces. + if !(interface_name == "dstack-wg0" + || interface_name.starts_with("enp") + || interface_name.starts_with("eth")) + { + // We only get dstack-wg0, enp and eth interfaces. // Docker bridge is not included due to privacy concerns. return None; } diff --git a/port-forward/Cargo.toml b/port-forward/Cargo.toml new file mode 100644 index 00000000..5422080d --- /dev/null +++ b/port-forward/Cargo.toml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: © 2024-2025 Phala Network +# +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "dstack-port-forward" +version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +tracing.workspace = true +tokio = { workspace = true, features = ["net", "rt", "io-util", "macros", "sync", "time"] } +tokio-util = { version = "0.7", features = ["rt"] } +nix = { workspace = true, features = ["fs", "net", "zerocopy"] } + +[dev-dependencies] +tokio = { workspace = true, features = ["full"] } diff --git a/port-forward/src/lib.rs b/port-forward/src/lib.rs new file mode 100644 index 00000000..beeb87d9 --- /dev/null +++ b/port-forward/src/lib.rs @@ -0,0 +1,137 @@ +// SPDX-FileCopyrightText: © 2024-2025 Phala Network +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::net::{IpAddr, SocketAddr}; + +use anyhow::{bail, Result}; +use tokio::task::JoinHandle; +use tokio_util::sync::CancellationToken; + +mod tcp; +mod udp; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Protocol { + Tcp, + Udp, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ForwardRule { + pub protocol: Protocol, + pub listen_addr: IpAddr, + pub listen_port: u16, + pub target_ip: IpAddr, + pub target_port: u16, +} + +impl ForwardRule { + fn listen_sock(&self) -> SocketAddr { + SocketAddr::new(self.listen_addr, self.listen_port) + } + + fn target_sock(&self) -> SocketAddr { + SocketAddr::new(self.target_ip, self.target_port) + } +} + +struct RunningRule { + cancel: CancellationToken, + task: JoinHandle<()>, +} + +/// Manages a dynamic set of port forwarding rules. +/// +/// Rules can be added and removed at runtime. Dropping the service cancels all +/// forwarding tasks. +pub struct ForwardService { + cancel: CancellationToken, + rules: HashMap, +} + +impl ForwardService { + pub fn new() -> Self { + Self { + cancel: CancellationToken::new(), + rules: HashMap::new(), + } + } + + /// Add a forwarding rule. Returns error if the rule already exists. + pub fn add_rule(&mut self, rule: ForwardRule) -> Result<()> { + if self.rules.contains_key(&rule) { + bail!("rule already exists: {:?}", rule); + } + + let token = self.cancel.child_token(); + let listen = rule.listen_sock(); + let target = rule.target_sock(); + + let task = match rule.protocol { + Protocol::Tcp => tokio::spawn(tcp::run_tcp_forwarder(listen, target, token.clone())), + Protocol::Udp => tokio::spawn(udp::run_udp_forwarder(listen, target, token.clone())), + }; + + tracing::info!( + "added forwarding rule: {listen} -> {target} ({:?})", + rule.protocol + ); + self.rules.insert( + rule, + RunningRule { + cancel: token, + task, + }, + ); + Ok(()) + } + + /// Remove a forwarding rule and stop its task. + pub async fn remove_rule(&mut self, rule: &ForwardRule) -> Result<()> { + match self.rules.remove(rule) { + Some(running) => { + running.cancel.cancel(); + let _ = running.task.await; + tracing::info!( + "removed forwarding rule: {} -> {} ({:?})", + rule.listen_sock(), + rule.target_sock(), + rule.protocol, + ); + Ok(()) + } + None => bail!("rule not found: {:?}", rule), + } + } + + /// Number of active rules. + pub fn len(&self) -> usize { + self.rules.len() + } + + pub fn is_empty(&self) -> bool { + self.rules.is_empty() + } + + /// Gracefully stop all forwarding and wait for tasks to finish. + pub async fn shutdown(mut self) { + self.cancel.cancel(); + for (_, running) in std::mem::take(&mut self.rules) { + let _ = running.task.await; + } + } +} + +impl Default for ForwardService { + fn default() -> Self { + Self::new() + } +} + +impl Drop for ForwardService { + fn drop(&mut self) { + self.cancel.cancel(); + } +} diff --git a/port-forward/src/tcp.rs b/port-forward/src/tcp.rs new file mode 100644 index 00000000..50878552 --- /dev/null +++ b/port-forward/src/tcp.rs @@ -0,0 +1,207 @@ +// SPDX-FileCopyrightText: © 2024-2025 Phala Network +// +// SPDX-License-Identifier: Apache-2.0 + +use std::net::SocketAddr; +use std::os::unix::io::{AsFd, AsRawFd, BorrowedFd, OwnedFd}; + +use tokio::io; +use tokio::net::{TcpListener, TcpStream}; +use tokio_util::sync::CancellationToken; + +/// Run a TCP port forwarder: listen on `listen_addr`, forward to `target`. +pub async fn run_tcp_forwarder( + listen_addr: SocketAddr, + target: SocketAddr, + cancel: CancellationToken, +) { + let listener = match TcpListener::bind(listen_addr).await { + Ok(l) => l, + Err(e) => { + tracing::error!("tcp bind {listen_addr} failed: {e}"); + return; + } + }; + tracing::info!("tcp forwarding {listen_addr} -> {target}"); + + loop { + tokio::select! { + _ = cancel.cancelled() => break, + result = listener.accept() => { + let (client, peer) = match result { + Ok(v) => v, + Err(e) => { + tracing::warn!("tcp accept on {listen_addr}: {e}"); + continue; + } + }; + let cancel = cancel.child_token(); + tokio::spawn(handle_tcp_connection(client, peer, target, cancel)); + } + } + } +} + +async fn handle_tcp_connection( + mut client: TcpStream, + peer: SocketAddr, + target: SocketAddr, + cancel: CancellationToken, +) { + let mut server = match TcpStream::connect(target).await { + Ok(s) => s, + Err(e) => { + tracing::debug!("tcp connect to {target} for {peer}: {e}"); + return; + } + }; + + let _ = client.set_nodelay(true); + let _ = server.set_nodelay(true); + + let result = tokio::select! { + _ = cancel.cancelled() => return, + r = relay(&mut client, &mut server) => r, + }; + + if let Err(e) = result { + tracing::debug!("tcp relay {peer} <-> {target}: {e}"); + } +} + +async fn relay(client: &mut TcpStream, server: &mut TcpStream) -> io::Result<()> { + // Try splice(2) zero-copy first, fall back to userspace copy. + match splice_bidirectional(client, server).await { + Ok(()) => Ok(()), + Err(e) if e.kind() == io::ErrorKind::Unsupported => { + tracing::debug!("splice not supported, falling back to copy_bidirectional"); + io::copy_bidirectional(client, server).await?; + Ok(()) + } + Err(e) => Err(e), + } +} + +/// Zero-copy bidirectional TCP relay using Linux splice(2). +/// +/// When one direction hits EOF, select! drops the other direction. +async fn splice_bidirectional(a: &TcpStream, b: &TcpStream) -> io::Result<()> { + let a_fd = a.as_raw_fd(); + let b_fd = b.as_raw_fd(); + + tokio::select! { + r = splice_one_direction(a, a_fd, b, b_fd) => r, + r = splice_one_direction(b, b_fd, a, a_fd) => r, + } +} + +/// Splice data from src fd to dst fd via an intermediate pipe. +/// +/// Uses `TcpStream::try_io` for proper readiness handling: when splice returns +/// EAGAIN, try_io automatically clears the readiness flag so the next +/// `readable().await` / `writable().await` blocks until the fd is truly ready. +async fn splice_one_direction( + src: &TcpStream, + src_fd: i32, + dst: &TcpStream, + dst_fd: i32, +) -> io::Result<()> { + use nix::fcntl::{splice, SpliceFFlags}; + use nix::unistd::pipe; + + let (pipe_r, pipe_w): (OwnedFd, OwnedFd) = pipe().map_err(io::Error::other)?; + + let flags = SpliceFFlags::SPLICE_F_NONBLOCK | SpliceFFlags::SPLICE_F_MOVE; + let chunk: usize = 65536; + + let src_bfd = unsafe { BorrowedFd::borrow_raw(src_fd) }; + let dst_bfd = unsafe { BorrowedFd::borrow_raw(dst_fd) }; + + loop { + src.readable().await?; + + let n = match src.try_io(io::Interest::READABLE, || { + match splice(src_bfd, None, pipe_w.as_fd(), None, chunk, flags) { + Ok(n) => Ok(n), + Err(nix::errno::Errno::EAGAIN) => Err(io::ErrorKind::WouldBlock.into()), + Err(nix::errno::Errno::EINVAL) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "splice not supported for this fd type", + )), + Err(e) => Err(io::Error::other(e)), + } + }) { + Ok(0) => return Ok(()), + Ok(n) => n, + Err(e) if e.kind() == io::ErrorKind::WouldBlock => continue, + Err(e) => return Err(e), + }; + + let mut written = 0; + while written < n { + dst.writable().await?; + match dst.try_io(io::Interest::WRITABLE, || { + match splice(pipe_r.as_fd(), None, dst_bfd, None, n - written, flags) { + Ok(w) => Ok(w), + Err(nix::errno::Errno::EAGAIN) => Err(io::ErrorKind::WouldBlock.into()), + Err(e) => Err(io::Error::other(e)), + } + }) { + Ok(w) => written += w, + Err(e) if e.kind() == io::ErrorKind::WouldBlock => continue, + Err(e) => return Err(e), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + + #[tokio::test] + async fn test_tcp_forward_roundtrip() { + // Echo server + let echo = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let echo_addr = echo.local_addr().unwrap(); + tokio::spawn(async move { + loop { + let (mut conn, _) = echo.accept().await.unwrap(); + tokio::spawn(async move { + let mut buf = vec![0u8; 4096]; + loop { + let n = match conn.read(&mut buf).await { + Ok(0) | Err(_) => break, + Ok(n) => n, + }; + if conn.write_all(&buf[..n]).await.is_err() { + break; + } + } + }); + } + }); + + // Start forwarder on a free port + let tmp = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let fwd_addr = tmp.local_addr().unwrap(); + drop(tmp); + + let cancel = CancellationToken::new(); + tokio::spawn(run_tcp_forwarder(fwd_addr, echo_addr, cancel.child_token())); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let mut conn = TcpStream::connect(fwd_addr).await.unwrap(); + conn.write_all(b"hello splice").await.unwrap(); + + let mut buf = vec![0u8; 64]; + let n = tokio::time::timeout(std::time::Duration::from_secs(2), conn.read(&mut buf)) + .await + .unwrap() + .unwrap(); + assert_eq!(&buf[..n], b"hello splice"); + + cancel.cancel(); + } +} diff --git a/port-forward/src/udp.rs b/port-forward/src/udp.rs new file mode 100644 index 00000000..78b7af5b --- /dev/null +++ b/port-forward/src/udp.rs @@ -0,0 +1,201 @@ +// SPDX-FileCopyrightText: © 2024-2025 Phala Network +// +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::net::SocketAddr; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use tokio::net::UdpSocket; +use tokio::sync::Mutex; +use tokio_util::sync::CancellationToken; + +const UDP_IDLE_TIMEOUT: Duration = Duration::from_secs(60); +const UDP_BUF_SIZE: usize = 65535; +const CLEANUP_INTERVAL: Duration = Duration::from_secs(10); + +struct ClientState { + /// Ephemeral socket for communicating with the target on behalf of this client. + socket: Arc, + last_active: Instant, + /// Cancel token for the return-path task. + _cancel: CancellationToken, +} + +/// Run a UDP port forwarder: listen on `listen_addr`, forward to `target`. +pub async fn run_udp_forwarder( + listen_addr: SocketAddr, + target: SocketAddr, + cancel: CancellationToken, +) { + let host_socket = match UdpSocket::bind(listen_addr).await { + Ok(s) => s, + Err(e) => { + tracing::error!("udp bind {listen_addr} failed: {e}"); + return; + } + }; + tracing::info!("udp forwarding {listen_addr} -> {target}"); + + let host_socket = Arc::new(host_socket); + let clients: Arc>> = + Arc::new(Mutex::new(HashMap::new())); + + // Periodic cleanup of idle client entries + let cleanup_clients = clients.clone(); + let cleanup_cancel = cancel.child_token(); + tokio::spawn(async move { + loop { + tokio::select! { + _ = cleanup_cancel.cancelled() => break, + _ = tokio::time::sleep(CLEANUP_INTERVAL) => { + let mut map = cleanup_clients.lock().await; + let now = Instant::now(); + map.retain(|addr, entry| { + let alive = now.duration_since(entry.last_active) < UDP_IDLE_TIMEOUT; + if !alive { + tracing::debug!("udp client {addr} idle timeout"); + } + alive + // Dropping ClientState cancels the return-path task via _cancel + }); + } + } + } + }); + + let mut buf = vec![0u8; UDP_BUF_SIZE]; + + loop { + tokio::select! { + _ = cancel.cancelled() => break, + result = host_socket.recv_from(&mut buf) => { + let (n, client_addr) = match result { + Ok(v) => v, + Err(e) => { + tracing::warn!("udp recv on {listen_addr}: {e}"); + continue; + } + }; + + let data = &buf[..n]; + let mut map = clients.lock().await; + + // Get or create per-client ephemeral socket + let entry = match map.entry(client_addr) { + std::collections::hash_map::Entry::Occupied(e) => e.into_mut(), + std::collections::hash_map::Entry::Vacant(e) => { + let socket = match (|| -> anyhow::Result> { + let std_sock = std::net::UdpSocket::bind("0.0.0.0:0")?; + std_sock.set_nonblocking(true)?; + Ok(Arc::new(UdpSocket::from_std(std_sock)?)) + })() { + Ok(s) => s, + Err(e) => { + tracing::warn!("udp ephemeral socket for {client_addr}: {e}"); + continue; + } + }; + + let return_cancel = cancel.child_token(); + tokio::spawn(udp_return_path( + host_socket.clone(), + socket.clone(), + client_addr, + return_cancel.child_token(), + )); + + e.insert(ClientState { + socket, + last_active: Instant::now(), + _cancel: return_cancel, + }) + } + }; + entry.last_active = Instant::now(); + + // Forward client data to target + if let Err(e) = entry.socket.send_to(data, target).await { + tracing::debug!("udp send to {target} for {client_addr}: {e}"); + } + } + } + } +} + +/// Return path: read from the per-client ephemeral socket, send back to the +/// original client via the host socket. +async fn udp_return_path( + host_socket: Arc, + client_socket: Arc, + client_addr: SocketAddr, + cancel: CancellationToken, +) { + let mut buf = vec![0u8; UDP_BUF_SIZE]; + + loop { + tokio::select! { + _ = cancel.cancelled() => break, + result = client_socket.recv_from(&mut buf) => { + let (n, _from) = match result { + Ok(v) => v, + Err(e) => { + tracing::debug!("udp return recv for {client_addr}: {e}"); + break; + } + }; + if let Err(e) = host_socket.send_to(&buf[..n], client_addr).await { + tracing::debug!("udp return send to {client_addr}: {e}"); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_udp_forward_roundtrip() { + // UDP echo server + let echo = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + let echo_addr = echo.local_addr().unwrap(); + tokio::spawn(async move { + let mut buf = vec![0u8; UDP_BUF_SIZE]; + loop { + match echo.recv_from(&mut buf).await { + Ok((n, from)) => { + let _ = echo.send_to(&buf[..n], from).await; + } + Err(_) => break, + } + } + }); + + // Start forwarder + let cancel = CancellationToken::new(); + // Bind to get a free port, then release and let forwarder bind + let tmp = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + let fwd_addr = tmp.local_addr().unwrap(); + drop(tmp); + + let token = cancel.child_token(); + tokio::spawn(run_udp_forwarder(fwd_addr, echo_addr, token)); + tokio::time::sleep(Duration::from_millis(50)).await; + + // Send through forwarder + let client = UdpSocket::bind("127.0.0.1:0").await.unwrap(); + client.send_to(b"hello udp", fwd_addr).await.unwrap(); + + let mut buf = vec![0u8; 64]; + let (n, _) = tokio::time::timeout(Duration::from_secs(2), client.recv_from(&mut buf)) + .await + .unwrap() + .unwrap(); + assert_eq!(&buf[..n], b"hello udp"); + + cancel.cancel(); + } +} diff --git a/scripts/dhcp-notify.sh b/scripts/dhcp-notify.sh new file mode 100755 index 00000000..697b3dd1 --- /dev/null +++ b/scripts/dhcp-notify.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-FileCopyrightText: © 2025 Phala Network +# SPDX-License-Identifier: Apache-2.0 +# +# DHCP lease notification script for dnsmasq. +# +# Called by dnsmasq via --dhcp-script on lease events. +# Notifies dstack-vmm of the guest's MAC and IP so that port +# forwarding can be established automatically. +# +# Arguments (set by dnsmasq): +# $1 action add | del | old +# $2 mac MAC address of the guest NIC +# $3 ip IPv4 address assigned by DHCP +# $4 hostname (optional) +# +# Configuration: +# VMM_URL Base URL of dstack-vmm (default: http://127.0.0.1:9080) + +ACTION="$1" +MAC="$2" +IP="$3" + +VMM_URL="${VMM_URL:-http://127.0.0.1:9080}" + +logger -t dhcp-notify "action=$ACTION mac=$MAC ip=$IP" + +case "$ACTION" in + add|old) + curl -s -X POST "${VMM_URL}/prpc/ReportDhcpLease" \ + -H 'Content-Type: application/json' \ + -d "{\"mac\":\"$MAC\",\"ip\":\"$IP\"}" \ + || logger -t dhcp-notify "failed to notify VMM" + ;; + del) + # Could clear forwarding on lease expiry; not implemented yet. + ;; +esac diff --git a/scripts/setup-bridge.sh b/scripts/setup-bridge.sh new file mode 100755 index 00000000..f6fbf224 --- /dev/null +++ b/scripts/setup-bridge.sh @@ -0,0 +1,893 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: © 2025 Phala Network +# SPDX-License-Identifier: Apache-2.0 +# +# Bridge networking setup and diagnostics for dstack-vmm. +# +# Setup modes: +# libvirt Use libvirt default network (virbr0, dnsmasq, NAT included) +# standalone Create bridge + dnsmasq + NAT without libvirt +# +# Usage: +# setup-bridge.sh check [--bridge NAME] +# setup-bridge.sh setup --mode libvirt [--bridge virbr0] +# setup-bridge.sh setup --mode standalone [--bridge dstack-br0] +# setup-bridge.sh destroy [--bridge NAME] + +set -euo pipefail + +BRIDGE="virbr0" +SETUP_MODE="" # libvirt | standalone +DRY_RUN=false +COMMAND="" +PASS=0 +FAIL=0 +WARN=0 + +# --- Output helpers --- + +red() { printf '\033[31m%s\033[0m' "$*"; } +green() { printf '\033[32m%s\033[0m' "$*"; } +yellow() { printf '\033[33m%s\033[0m' "$*"; } +bold() { printf '\033[1m%s\033[0m' "$*"; } + +check_pass() { PASS=$((PASS+1)); echo " $(green '[PASS]') $*"; } +check_fail() { FAIL=$((FAIL+1)); echo " $(red '[FAIL]') $*"; } +check_warn() { WARN=$((WARN+1)); echo " $(yellow '[WARN]') $*"; } +check_info() { echo " [INFO] $*"; } + +run_cmd() { + if $DRY_RUN; then + echo " [DRY-RUN] $*" + else + "$@" + fi +} + +# --- Detect qemu-bridge-helper path --- + +find_bridge_helper() { + local paths=( + /usr/lib/qemu/qemu-bridge-helper + /usr/libexec/qemu-bridge-helper + /usr/local/lib/qemu/qemu-bridge-helper + /usr/local/libexec/qemu-bridge-helper + ) + for p in "${paths[@]}"; do + if [[ -f "$p" ]]; then + echo "$p" + return 0 + fi + done + return 1 +} + +# --- Detect current bridge provider --- + +# Returns "libvirt:" if bridge is managed by a libvirt network, +# "standalone" otherwise. +detect_bridge_provider() { + if command -v virsh &>/dev/null; then + local name br + while read -r name; do + [[ -z "$name" ]] && continue + br=$(virsh net-dumpxml "$name" 2>/dev/null | grep -oP "/dev/null) + fi + echo "standalone" +} + +# --- Check functions --- + +check_bridge_helper() { + echo + bold "qemu-bridge-helper" + local helper + if ! helper=$(find_bridge_helper); then + check_fail "qemu-bridge-helper not found" + check_info "Install QEMU: sudo apt install qemu-system-x86" + return + fi + check_pass "found at $helper" + + if [[ -u "$helper" ]]; then + check_pass "setuid bit is set" + else + check_fail "setuid bit not set" + check_info "Fix: sudo chmod u+s $helper" + fi +} + +check_bridge_conf() { + echo + bold "/etc/qemu/bridge.conf" + local conf="/etc/qemu/bridge.conf" + if [[ ! -f "$conf" ]]; then + check_fail "$conf does not exist" + check_info "Fix: sudo mkdir -p /etc/qemu && echo 'allow $BRIDGE' | sudo tee $conf" + return + fi + check_pass "$conf exists" + + if grep -qE "^allow[[:space:]]+($BRIDGE|all)[[:space:]]*$" "$conf" 2>/dev/null; then + check_pass "bridge '$BRIDGE' is allowed" + else + check_fail "bridge '$BRIDGE' not found in $conf" + check_info "Fix: echo 'allow $BRIDGE' | sudo tee -a $conf" + fi +} + +check_bridge_interface() { + echo + bold "bridge interface: $BRIDGE" + if ! ip link show "$BRIDGE" &>/dev/null; then + check_fail "bridge '$BRIDGE' does not exist" + check_info "Run: $(basename "$0") setup --mode libvirt" + check_info " or: $(basename "$0") setup --mode standalone --bridge $BRIDGE" + return + fi + + local state + state=$(ip -j link show "$BRIDGE" 2>/dev/null | python3 -c "import sys,json;print(json.load(sys.stdin)[0].get('operstate',''))" 2>/dev/null || echo "UNKNOWN") + if [[ "$state" == "UP" ]]; then + check_pass "interface is UP" + else + check_warn "interface state: $state (expected UP)" + fi + + local addr + addr=$(ip -4 -o addr show "$BRIDGE" 2>/dev/null | awk '{print $4}' | head -1) + if [[ -n "$addr" ]]; then + check_pass "has IP address: $addr" + else + check_fail "no IPv4 address assigned" + fi +} + +check_dhcp() { + echo + bold "DHCP service" + + local provider + provider=$(detect_bridge_provider) + + if [[ "$provider" == libvirt:* ]]; then + local net_name="${provider#libvirt:}" + if virsh net-info "$net_name" 2>/dev/null | grep "Active:.*yes" >/dev/null; then + check_pass "libvirt network '$net_name' active on $BRIDGE (includes DHCP)" + return + else + check_fail "libvirt network '$net_name' exists but not active" + check_info "Fix: sudo virsh net-start $net_name" + return + fi + fi + + # Standalone: check dnsmasq + if pgrep -f "dnsmasq.*$BRIDGE" &>/dev/null; then + local pid + pid=$(pgrep -f "dnsmasq.*$BRIDGE" | head -1) + check_pass "dnsmasq running for $BRIDGE (pid $pid)" + return + fi + + if [[ -d /etc/dnsmasq.d ]]; then + if [[ -f "/etc/dnsmasq.d/dstack-${BRIDGE}.conf" ]] || grep -rl "interface=$BRIDGE" /etc/dnsmasq.d/ &>/dev/null 2>&1; then + if systemctl is-active dnsmasq &>/dev/null; then + check_pass "dnsmasq service active with $BRIDGE config" + return + else + check_fail "dnsmasq config exists but service not running" + check_info "Fix: sudo systemctl start dnsmasq" + return + fi + fi + fi + + check_fail "no DHCP server found for $BRIDGE" + check_info "Run: $(basename "$0") setup --mode standalone --bridge $BRIDGE" +} + +check_dhcp_notify() { + echo + bold "DHCP lease notification" + + local provider + provider=$(detect_bridge_provider) + + if [[ "$provider" == libvirt:* ]]; then + check_warn "libvirt DHCP does not support dhcp-script callback" + check_info "port forwarding requires manual PRPC call or alternative notification" + return + fi + + # Check dnsmasq config for dhcp-script + local conf_files=(/etc/dnsmasq.d/*"$BRIDGE"* /etc/dnsmasq.d/*.conf) + local found_script="" + for f in "${conf_files[@]}"; do + [[ -f "$f" ]] || continue + local script_path + script_path=$(grep -oP '^dhcp-script=\K.*' "$f" 2>/dev/null || true) + if [[ -n "$script_path" ]]; then + found_script="$script_path" + break + fi + done + + if [[ -z "$found_script" ]]; then + check_warn "no dhcp-script configured in dnsmasq" + check_info "port forwarding will not be set up automatically" + check_info "add 'dhcp-script=/usr/local/bin/dhcp-notify.sh' to dnsmasq config" + return + fi + + if [[ -x "$found_script" ]]; then + check_pass "dhcp-script configured: $found_script" + else + check_fail "dhcp-script $found_script is not executable or missing" + check_info "Fix: sudo chmod +x $found_script" + fi +} + +check_ip_forward() { + echo + bold "IP forwarding" + local val + val=$(sysctl -n net.ipv4.ip_forward 2>/dev/null || echo "0") + if [[ "$val" == "1" ]]; then + check_pass "net.ipv4.ip_forward = 1" + else + check_fail "net.ipv4.ip_forward = $val" + check_info "Fix: sudo sysctl -w net.ipv4.ip_forward=1" + fi +} + +check_nat_rules() { + echo + bold "NAT / masquerade rules" + + local subnet + subnet=$(ip -4 -o addr show "$BRIDGE" 2>/dev/null | awk '{print $4}' | head -1) + if [[ -z "$subnet" ]]; then + check_warn "cannot determine subnet (bridge has no IP)" + return + fi + + local net_cidr + net_cidr=$(python3 -c " +import ipaddress +iface = ipaddress.ip_interface('$subnet') +print(iface.network) +" 2>/dev/null || echo "") + + if [[ -z "$net_cidr" ]]; then + check_warn "cannot parse subnet $subnet" + return + fi + + local nft_rules="" + if command -v nft &>/dev/null; then + nft_rules=$(sudo nft list ruleset 2>/dev/null || true) + fi + + if [[ -n "$nft_rules" ]]; then + if echo "$nft_rules" | grep -q "masquerade" && \ + echo "$nft_rules" | grep -q "${net_cidr%/*}"; then + check_pass "nftables masquerade rules found for $net_cidr" + return + fi + fi + + if command -v iptables &>/dev/null; then + if sudo iptables -t nat -L -n 2>/dev/null | grep -i "masquerade" >/dev/null && \ + sudo iptables -t nat -L -n 2>/dev/null | grep "${net_cidr%/*}" >/dev/null; then + check_pass "iptables masquerade rules found for $net_cidr" + return + fi + fi + + check_fail "no NAT masquerade rules found for $net_cidr" + check_info "Libvirt adds these automatically." + check_info "For standalone: systemd-networkd IPMasquerade=both or manual nftables rules." +} + +check_dhcp_firewall() { + echo + bold "DHCP/DNS firewall rules" + + local nft_rules="" + if command -v nft &>/dev/null; then + nft_rules=$(sudo nft list ruleset 2>/dev/null || true) + fi + + if [[ -z "$nft_rules" ]]; then + check_warn "nft not available, cannot check firewall rules" + return + fi + + # Check INPUT policy + local input_policy + input_policy=$(echo "$nft_rules" | grep -A1 'chain INPUT' | grep -oP 'policy \K\w+' | head -1 || echo "") + + if [[ "$input_policy" == "accept" ]]; then + check_pass "INPUT policy is accept (DHCP/DNS allowed by default)" + return + fi + + # Restrictive INPUT policy — need explicit rules + if echo "$nft_rules" | grep -q "iifname \"$BRIDGE\".*udp dport 67.*accept"; then + check_pass "DHCP input rule for $BRIDGE" + else + check_fail "no DHCP input rule for $BRIDGE (INPUT policy: ${input_policy:-unknown})" + check_info "VMs will not get DHCP leases without this rule" + check_info "Fix: sudo nft add rule ip filter INPUT iifname \"$BRIDGE\" udp dport 67 counter accept" + fi + + if echo "$nft_rules" | grep -q "iifname \"$BRIDGE\".*udp dport 53.*accept"; then + check_pass "DNS input rule for $BRIDGE" + else + check_warn "no DNS input rule for $BRIDGE" + fi +} + +check_forward_rules() { + echo + bold "forwarding rules" + + local nft_rules="" + if command -v nft &>/dev/null; then + nft_rules=$(sudo nft list ruleset 2>/dev/null || true) + fi + + if [[ -n "$nft_rules" ]] && echo "$nft_rules" | grep -q "iifname \"$BRIDGE\".*accept"; then + check_pass "nftables forward accept rule for $BRIDGE" + return + fi + + if command -v iptables &>/dev/null; then + if sudo iptables -L FORWARD -n 2>/dev/null | grep -i "$BRIDGE.*ACCEPT" >/dev/null; then + check_pass "iptables forward accept rule for $BRIDGE" + return + fi + fi + + local policy + policy=$(sudo iptables -L FORWARD 2>/dev/null | head -1 | grep -oP '\(policy \K[^)]+' || echo "") + if [[ "$policy" == "ACCEPT" ]]; then + check_pass "forward policy is ACCEPT" + return + fi + + check_warn "no explicit forward rules found for $BRIDGE (may still work via libvirt chains)" +} + +# --- Setup: common --- + +setup_bridge_conf() { + echo + bold "Setting up /etc/qemu/bridge.conf" + run_cmd sudo mkdir -p /etc/qemu + if [[ -f /etc/qemu/bridge.conf ]] && grep -qE "^allow[[:space:]]+($BRIDGE|all)" /etc/qemu/bridge.conf 2>/dev/null; then + echo " already configured" + else + run_cmd bash -c "echo 'allow $BRIDGE' | sudo tee -a /etc/qemu/bridge.conf" + fi +} + +setup_bridge_helper() { + echo + bold "Setting up qemu-bridge-helper" + local helper + if ! helper=$(find_bridge_helper); then + echo " $(red 'ERROR'): qemu-bridge-helper not found. Install QEMU first." + return 1 + fi + if [[ -u "$helper" ]]; then + echo " setuid already set on $helper" + else + run_cmd sudo chmod u+s "$helper" + echo " setuid set on $helper" + fi +} + +setup_ip_forward() { + echo + bold "Enabling IP forwarding" + local val + val=$(sysctl -n net.ipv4.ip_forward 2>/dev/null || echo "0") + if [[ "$val" == "1" ]]; then + echo " already enabled" + else + run_cmd sudo sysctl -w net.ipv4.ip_forward=1 + run_cmd bash -c "echo 'net.ipv4.ip_forward=1' | sudo tee /etc/sysctl.d/99-dstack-bridge.conf" + fi +} + +# --- Setup: libvirt mode --- + +setup_libvirt() { + echo + bold "Setting up libvirt network" + + if ! command -v virsh &>/dev/null; then + echo " $(red 'ERROR'): virsh not found. Install libvirt first:" + echo " sudo apt install -y libvirt-daemon-system" + return 1 + fi + + # Find existing libvirt network for this bridge + local existing_net="" + local name br + while read -r name; do + [[ -z "$name" ]] && continue + br=$(virsh net-dumpxml "$name" 2>/dev/null | grep -oP "/dev/null) + + if [[ -n "$existing_net" ]]; then + echo " found libvirt network '$existing_net' for bridge $BRIDGE" + if virsh net-info "$existing_net" 2>/dev/null | grep "Active:.*yes" >/dev/null; then + echo " already active" + else + run_cmd sudo virsh net-start "$existing_net" + echo " started network '$existing_net'" + fi + if ! virsh net-info "$existing_net" 2>/dev/null | grep "Autostart:.*yes" >/dev/null; then + run_cmd sudo virsh net-autostart "$existing_net" + fi + elif [[ "$BRIDGE" == "virbr0" ]]; then + if virsh net-info default &>/dev/null; then + if ! virsh net-info default 2>/dev/null | grep "Active:.*yes" >/dev/null; then + run_cmd sudo virsh net-start default + fi + run_cmd sudo virsh net-autostart default + echo " libvirt default network active on virbr0" + else + echo " $(red 'ERROR'): no libvirt default network found" + echo " Recreate it or use --mode standalone" + return 1 + fi + else + echo " $(red 'ERROR'): no libvirt network found for bridge '$BRIDGE'" + echo " Create a libvirt network XML or use --mode standalone" + return 1 + fi + + echo + echo " libvirt provides: bridge, DHCP (dnsmasq), NAT, forwarding rules" +} + +# --- Setup: standalone mode --- + +setup_standalone() { + echo + bold "Setting up standalone bridge: $BRIDGE" + + # 1. Create bridge via systemd-networkd + if ip link show "$BRIDGE" &>/dev/null; then + echo " bridge $BRIDGE already exists" + else + echo " creating bridge $BRIDGE via systemd-networkd" + + local netdev="/etc/systemd/network/50-dstack-${BRIDGE}.netdev" + local network="/etc/systemd/network/51-dstack-${BRIDGE}.network" + + if [[ ! -f "$netdev" ]]; then + run_cmd bash -c "cat > /tmp/.dstack-br-netdev <<'HEREDOC' +[NetDev] +Name=$BRIDGE +Kind=bridge +HEREDOC +sudo mv /tmp/.dstack-br-netdev $netdev" + echo " created $netdev" + fi + + if [[ ! -f "$network" ]]; then + run_cmd bash -c "cat > /tmp/.dstack-br-network <<'HEREDOC' +[Match] +Name=$BRIDGE + +[Network] +Address=10.0.100.1/24 +ConfigureWithoutCarrier=yes +IPMasquerade=both +HEREDOC +sudo mv /tmp/.dstack-br-network $network" + echo " created $network" + fi + + run_cmd sudo systemctl restart systemd-networkd + echo " restarted systemd-networkd" + + echo " waiting for bridge to come up..." + local i + for i in $(seq 1 10); do + if ip link show "$BRIDGE" &>/dev/null; then + break + fi + sleep 0.5 + done + fi + + # 2. DHCP server (dnsmasq) + echo + bold "Setting up dnsmasq for $BRIDGE" + + if ! command -v dnsmasq &>/dev/null; then + echo " installing dnsmasq..." + run_cmd sudo apt install -y dnsmasq + fi + + local conf="/etc/dnsmasq.d/dstack-${BRIDGE}.conf" + if [[ -f "$conf" ]]; then + echo " dnsmasq config already exists at $conf" + else + # Derive DHCP range from bridge IP + local bridge_ip="10.0.100.1" + local dhcp_start="10.0.100.10" + local dhcp_end="10.0.100.254" + local addr + addr=$(ip -4 -o addr show "$BRIDGE" 2>/dev/null | awk '{print $4}' | head -1 || true) + if [[ -n "$addr" ]]; then + bridge_ip="${addr%/*}" + local prefix="${bridge_ip%.*}" + dhcp_start="${prefix}.10" + dhcp_end="${prefix}.254" + fi + + # Install dhcp-notify.sh if present + local notify_script="/usr/local/bin/dhcp-notify.sh" + local dhcp_script_line="" + local src_notify + src_notify="$(cd "$(dirname "$0")" && pwd)/dhcp-notify.sh" + if [[ -f "$src_notify" ]]; then + run_cmd sudo cp "$src_notify" "$notify_script" + run_cmd sudo chmod +x "$notify_script" + dhcp_script_line="dhcp-script=${notify_script}" + echo " installed $notify_script" + else + echo " $(yellow '[WARN]') dhcp-notify.sh not found at $src_notify" + echo " VM port forwarding will not be set up automatically" + fi + + run_cmd bash -c "cat > /tmp/.dstack-dnsmasq </dev/null | awk '{print $4}' | head -1) + if [[ -z "$subnet" ]]; then + echo " $(red 'ERROR'): bridge $BRIDGE has no IP, cannot configure firewall" + return 1 + fi + + local net_cidr + net_cidr=$(python3 -c " +import ipaddress +iface = ipaddress.ip_interface('$subnet') +print(iface.network) +" 2>/dev/null || echo "") + + if [[ -z "$net_cidr" ]]; then + echo " $(red 'ERROR'): cannot parse subnet $subnet" + return 1 + fi + + if ! command -v nft &>/dev/null; then + echo " $(yellow '[WARN]') nft not found, skipping firewall setup" + echo " you may need to configure iptables manually" + return 0 + fi + + # Detect whether to use libvirt chains or default chains + local inp_chain="INPUT" + local out_chain="OUTPUT" + local fwd_chain="FORWARD" + local nat_chain="POSTROUTING" + + if sudo nft list chain ip filter LIBVIRT_INP &>/dev/null 2>&1; then + inp_chain="LIBVIRT_INP" + out_chain="LIBVIRT_OUT" + echo " using libvirt filter chains (LIBVIRT_INP/LIBVIRT_OUT)" + fi + if sudo nft list chain ip filter LIBVIRT_FWO &>/dev/null 2>&1; then + fwd_chain="" # use individual libvirt chains + echo " using libvirt forward chains (LIBVIRT_FWO/FWI/FWX)" + fi + if sudo nft list chain ip nat LIBVIRT_PRT &>/dev/null 2>&1; then + nat_chain="LIBVIRT_PRT" + echo " using libvirt NAT chain (LIBVIRT_PRT)" + fi + + # Check if rules already exist (simple heuristic: look for bridge name in ruleset) + local existing + existing=$(sudo nft list ruleset 2>/dev/null || true) + if echo "$existing" | grep -q "iifname \"$BRIDGE\".*udp dport 67.*accept"; then + echo " firewall rules for $BRIDGE already present, skipping" + return 0 + fi + + echo " adding INPUT/OUTPUT rules for DHCP and DNS" + run_cmd sudo nft add rule ip filter "$inp_chain" iifname "$BRIDGE" udp dport 67 counter accept + run_cmd sudo nft add rule ip filter "$inp_chain" iifname "$BRIDGE" udp dport 53 counter accept + run_cmd sudo nft add rule ip filter "$inp_chain" iifname "$BRIDGE" tcp dport 53 counter accept + run_cmd sudo nft add rule ip filter "$out_chain" oifname "$BRIDGE" udp dport 68 counter accept + run_cmd sudo nft add rule ip filter "$out_chain" oifname "$BRIDGE" udp dport 53 counter accept + + echo " adding FORWARD rules for $net_cidr" + if [[ -z "$fwd_chain" ]]; then + # libvirt forward chains + run_cmd sudo nft add rule ip filter LIBVIRT_FWO ip saddr "$net_cidr" iifname "$BRIDGE" counter accept + run_cmd sudo nft add rule ip filter LIBVIRT_FWI ip daddr "$net_cidr" oifname "$BRIDGE" ct state related,established counter accept + run_cmd sudo nft add rule ip filter LIBVIRT_FWX iifname "$BRIDGE" oifname "$BRIDGE" counter accept + else + run_cmd sudo nft add rule ip filter FORWARD ip saddr "$net_cidr" iifname "$BRIDGE" counter accept + run_cmd sudo nft add rule ip filter FORWARD ip daddr "$net_cidr" oifname "$BRIDGE" ct state related,established counter accept + run_cmd sudo nft add rule ip filter FORWARD iifname "$BRIDGE" oifname "$BRIDGE" counter accept + fi + + echo " adding NAT masquerade for $net_cidr" + run_cmd sudo nft add rule ip nat "$nat_chain" ip saddr "$net_cidr" ip daddr 224.0.0.0/24 counter return + run_cmd sudo nft add rule ip nat "$nat_chain" ip saddr "$net_cidr" ip daddr 255.255.255.255 counter return + run_cmd sudo nft add rule ip nat "$nat_chain" ip saddr "$net_cidr" ip daddr != "$net_cidr" counter masquerade + + echo " firewall rules configured" +} + +# --- Main --- + +usage() { + cat <<'USAGE' +Usage: setup-bridge.sh [options] + +Commands: + check Check bridge networking prerequisites + setup Configure bridge networking prerequisites + destroy Tear down bridge networking configuration + +Options: + --bridge NAME Bridge interface name (default: virbr0) + --mode MODE Setup mode: libvirt or standalone (required for setup) + --dry-run Show what would be done without executing (setup only) + -h, --help Show this help + +Setup modes: + libvirt Use libvirt network. Provides bridge, DHCP, NAT, and + firewall rules automatically. Recommended if libvirt is + already installed. Default bridge: virbr0. + + standalone Create bridge with systemd-networkd and dnsmasq. Does + not require libvirt. Use a custom bridge name to avoid + conflicts with libvirt. + +Examples: + setup-bridge.sh check + setup-bridge.sh check --bridge dstack-br0 + setup-bridge.sh setup --mode libvirt + setup-bridge.sh setup --mode standalone --bridge dstack-br0 + setup-bridge.sh setup --mode standalone --bridge dstack-br0 --dry-run + setup-bridge.sh destroy + setup-bridge.sh destroy --bridge dstack-br0 --dry-run +USAGE +} + +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + check|setup|destroy) + COMMAND="$1"; shift ;; + --bridge) + BRIDGE="${2:?--bridge requires a value}"; shift 2 ;; + --mode) + SETUP_MODE="${2:?--mode requires a value}"; shift 2 ;; + --dry-run) + DRY_RUN=true; shift ;; + -h|--help) + usage; exit 0 ;; + *) + echo "unknown argument: $1"; usage; exit 1 ;; + esac + done + + if [[ -z "$COMMAND" ]]; then + usage + exit 1 + fi +} + +cmd_check() { + echo + echo "$(bold 'dstack bridge networking diagnostics')" + echo "target bridge: $(bold "$BRIDGE")" + + local provider + provider=$(detect_bridge_provider) + if [[ "$provider" == libvirt:* ]]; then + echo "provider: $(bold 'libvirt') (network: ${provider#libvirt:})" + else + echo "provider: $(bold 'standalone')" + fi + + check_bridge_helper + check_bridge_conf + check_bridge_interface + check_dhcp + check_dhcp_notify + check_dhcp_firewall + check_ip_forward + check_nat_rules + check_forward_rules + + echo + echo "---" + echo "$(green "PASS: $PASS") $(red "FAIL: $FAIL") $(yellow "WARN: $WARN")" + + if [[ $FAIL -gt 0 ]]; then + echo + echo "Run '$(basename "$0") setup --mode --bridge $BRIDGE' to fix issues." + return 1 + fi +} + +cmd_setup() { + if [[ -z "$SETUP_MODE" ]]; then + echo + echo "$(red 'ERROR'): --mode is required for setup." + echo + echo "Choose a mode:" + echo " --mode libvirt Use libvirt (provides bridge + DHCP + NAT)" + echo " --mode standalone No libvirt (systemd-networkd + dnsmasq)" + echo + echo "Example:" + echo " $(basename "$0") setup --mode libvirt" + echo " $(basename "$0") setup --mode standalone --bridge dstack-br0" + exit 1 + fi + + if [[ "$SETUP_MODE" != "libvirt" && "$SETUP_MODE" != "standalone" ]]; then + echo "$(red 'ERROR'): unknown mode '$SETUP_MODE'. Use 'libvirt' or 'standalone'." + exit 1 + fi + + echo + echo "$(bold 'dstack bridge networking setup')" + echo "target bridge: $(bold "$BRIDGE")" + echo "mode: $(bold "$SETUP_MODE")" + $DRY_RUN && echo "dry-run: $(yellow 'yes')" + + # Common setup + setup_bridge_conf + setup_bridge_helper + setup_ip_forward + + # Mode-specific setup + case "$SETUP_MODE" in + libvirt) setup_libvirt ;; + standalone) setup_standalone ;; + esac + + echo + echo "---" + echo "$(bold 'Setup complete.')" + echo + echo "Add to vmm.toml:" + echo + echo " [cvm.networking]" + echo " mode = \"bridge\"" + echo " bridge = \"$BRIDGE\"" + echo + echo "Verify: $(basename "$0") check --bridge $BRIDGE" +} + +cmd_destroy() { + local provider + provider=$(detect_bridge_provider) + + echo + echo "$(bold 'dstack bridge networking destroy')" + echo "target bridge: $(bold "$BRIDGE")" + if [[ "$provider" == libvirt:* ]]; then + echo "provider: $(bold 'libvirt') (network: ${provider#libvirt:})" + else + echo "provider: $(bold 'standalone')" + fi + $DRY_RUN && echo "dry-run: $(yellow 'yes')" + + if [[ "$provider" == libvirt:* ]]; then + local net_name="${provider#libvirt:}" + echo + bold "Stopping libvirt network '$net_name'" + if virsh net-info "$net_name" 2>/dev/null | grep "Active:.*yes" >/dev/null; then + run_cmd sudo virsh net-destroy "$net_name" + echo " stopped" + else + echo " already inactive" + fi + if virsh net-info "$net_name" 2>/dev/null | grep "Autostart:.*yes" >/dev/null; then + run_cmd sudo virsh net-autostart --disable "$net_name" + echo " autostart disabled" + fi + else + # Standalone: remove dnsmasq config and systemd-networkd units + local dnsmasq_conf="/etc/dnsmasq.d/dstack-${BRIDGE}.conf" + if [[ -f "$dnsmasq_conf" ]]; then + echo + bold "Removing dnsmasq config" + run_cmd sudo rm -f "$dnsmasq_conf" + echo " removed $dnsmasq_conf" + run_cmd sudo systemctl restart dnsmasq 2>/dev/null || true + echo " restarted dnsmasq" + fi + + local netdev="/etc/systemd/network/50-dstack-${BRIDGE}.netdev" + local network="/etc/systemd/network/51-dstack-${BRIDGE}.network" + if [[ -f "$netdev" ]] || [[ -f "$network" ]]; then + echo + bold "Removing systemd-networkd units" + [[ -f "$netdev" ]] && { run_cmd sudo rm -f "$netdev"; echo " removed $netdev"; } + [[ -f "$network" ]] && { run_cmd sudo rm -f "$network"; echo " removed $network"; } + run_cmd sudo systemctl restart systemd-networkd + echo " restarted systemd-networkd" + fi + + # Delete bridge interface if it still exists + if ip link show "$BRIDGE" &>/dev/null; then + echo + bold "Deleting bridge interface $BRIDGE" + run_cmd sudo ip link set "$BRIDGE" down + run_cmd sudo ip link delete "$BRIDGE" type bridge + echo " deleted" + fi + fi + + # Remove bridge.conf entry + local conf="/etc/qemu/bridge.conf" + if [[ -f "$conf" ]] && grep -qE "^allow[[:space:]]+${BRIDGE}[[:space:]]*$" "$conf" 2>/dev/null; then + echo + bold "Removing '$BRIDGE' from $conf" + run_cmd sudo sed -i "/^allow[[:space:]]\+${BRIDGE}[[:space:]]*$/d" "$conf" + echo " removed" + fi + + echo + echo "---" + echo "$(bold 'Destroy complete.')" +} + +parse_args "$@" + +case "$COMMAND" in + check) cmd_check ;; + setup) cmd_setup ;; + destroy) cmd_destroy ;; +esac diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 95893373..4d166c6d 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -44,6 +44,7 @@ safe-write.workspace = true guest-api = { workspace = true, features = ["client"] } load_config.workspace = true key-provider-client.workspace = true +dstack-port-forward.workspace = true dstack-types.workspace = true hex_fmt.workspace = true lspci.workspace = true diff --git a/vmm/rpc/proto/vmm_rpc.proto b/vmm/rpc/proto/vmm_rpc.proto index d6604ace..fe4c95a9 100644 --- a/vmm/rpc/proto/vmm_rpc.proto +++ b/vmm/rpc/proto/vmm_rpc.proto @@ -99,6 +99,14 @@ message VmConfiguration { bool stopped = 16; // Disable confidential computing (fallback to non-TEE VM). bool no_tee = 17; + // Per-VM networking mode override (if unset, uses global cvm.networking). + optional NetworkingConfig networking = 18; +} + +// Per-VM networking configuration. +message NetworkingConfig { + // Networking mode: "passt", "bridge", "user" + string mode = 1; } // Requested GPU layout for a CVM. @@ -331,4 +339,16 @@ service Vmm { // Reload VMs directory and sync with memory state rpc ReloadVms(google.protobuf.Empty) returns (ReloadVmsResponse); + + // Report a DHCP lease event (called by the DHCP server, e.g. dnsmasq --dhcp-script). + // The VMM resolves the MAC address to a VM and reconfigures port forwarding. + rpc ReportDhcpLease(DhcpLeaseRequest) returns (google.protobuf.Empty); +} + +// DHCP lease event reported by the host DHCP server. +message DhcpLeaseRequest { + // MAC address of the guest NIC (e.g. "02:ab:cd:ef:01:23") + string mac = 1; + // IPv4 address assigned by DHCP (e.g. "192.168.122.100") + string ip = 2; } diff --git a/vmm/src/app.rs b/vmm/src/app.rs index c176ce94..c20ba60a 100644 --- a/vmm/src/app.rs +++ b/vmm/src/app.rs @@ -2,7 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 -use crate::config::{Config, ProcessAnnotation, Protocol}; +use crate::config::{Config, Networking, ProcessAnnotation, Protocol}; +use dstack_port_forward::{ForwardRule, ForwardService, Protocol as FwdProtocol}; use anyhow::{bail, Context, Result}; use bon::Builder; @@ -26,7 +27,7 @@ use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex, MutexGuard}; use std::time::SystemTime; use supervisor_client::SupervisorClient; -use tracing::{error, info, warn}; +use tracing::{debug, error, info, warn}; pub use image::{Image, ImageInfo}; pub use qemu::{VmConfig, VmWorkDir}; @@ -66,6 +67,8 @@ pub struct Manifest { pub gateway_urls: Vec, #[serde(default)] pub no_tee: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub networking: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -120,6 +123,7 @@ pub struct App { pub config: Arc, pub supervisor: SupervisorClient, state: Arc>, + forward_service: Arc>, } impl App { @@ -144,8 +148,10 @@ impl App { state: Arc::new(Mutex::new(AppState { cid_pool, vms: HashMap::new(), + active_forwards: HashMap::new(), })), config: Arc::new(config), + forward_service: Arc::new(tokio::sync::Mutex::new(ForwardService::new())), } } @@ -223,7 +229,12 @@ impl App { }; if !is_running { // Try to stop passt if already running - if self.config.cvm.networking.is_passt() { + let networking = vm_config + .manifest + .networking + .as_ref() + .unwrap_or(&self.config.cvm.networking); + if networking.is_passt() { self.supervisor.stop(&format!("passt-{}", id)).await.ok(); } @@ -259,6 +270,7 @@ impl App { pub async fn stop_vm(&self, id: &str) -> Result<()> { self.set_started(id, false)?; + self.cleanup_port_forward(id).await; self.supervisor.stop(id).await?; Ok(()) } @@ -275,15 +287,13 @@ impl App { self.supervisor.stop(id).await?; } self.supervisor.remove(id).await?; - if self.config.cvm.networking.is_passt() { - let passt_id = format!("passt-{}", id); - let info = self.supervisor.info(&passt_id).await.ok().flatten(); - if let Some(info) = info { - if info.state.status.is_running() { - self.supervisor.stop(&passt_id).await?; - } - self.supervisor.remove(&passt_id).await?; + // Try to clean up passt process if it exists (safe no-op if not passt mode) + let passt_id = format!("passt-{}", id); + if let Some(info) = self.supervisor.info(&passt_id).await.ok().flatten() { + if info.state.status.is_running() { + self.supervisor.stop(&passt_id).await?; } + self.supervisor.remove(&passt_id).await?; } } @@ -294,11 +304,137 @@ impl App { } } + self.cleanup_port_forward(id).await; + let vm_path = self.work_dir(id); fs::remove_dir_all(&vm_path).context("Failed to remove VM directory")?; Ok(()) } + /// Handle a DHCP lease notification: look up VM by MAC address, persist + /// the guest IP, and reconfigure port forwarding. + pub async fn report_dhcp_lease(&self, mac: &str, ip: &str) { + use crate::app::qemu::mac_address_for_vm; + + let vm_id = { + let mut state = self.lock(); + let prefix = self.config.cvm.networking.mac_prefix_bytes(); + let found = state + .vms + .iter_mut() + .find(|(id, _)| mac_address_for_vm(id, &prefix) == mac); + let Some((id, vm)) = found else { + debug!(mac, ip, "DHCP lease for unknown MAC, ignoring"); + return; + }; + let vm_id = id.clone(); + let workdir = VmWorkDir::new(vm.config.workdir.clone()); + if let Err(e) = workdir.set_guest_ip(ip) { + error!(mac, ip, "failed to persist guest IP: {e}"); + } + vm.state.guest_ip = ip.to_string(); + info!(mac, ip, id = %vm_id, "DHCP lease updated"); + vm_id + }; + self.reconfigure_port_forward(&vm_id).await; + } + + /// Reconfigure port forwarding for a bridge-mode VM. + /// + /// Computes desired rules from the VM's port_map and guest_ip, then diffs + /// against currently active rules. Only changed rules are added/removed so + /// existing connections on unchanged rules are not interrupted. + pub async fn reconfigure_port_forward(&self, id: &str) { + let info = { + let state = self.lock(); + let Some(vm) = state.get(id) else { + return; + }; + let networking = vm + .config + .manifest + .networking + .as_ref() + .unwrap_or(&self.config.cvm.networking); + if !networking.is_bridge() || !networking.forward_service_enabled { + return; + } + let guest_ip = vm.state.guest_ip.clone(); + let port_map = vm.config.manifest.port_map.clone(); + (guest_ip, port_map) + }; + + let (guest_ip_str, port_map) = info; + if guest_ip_str.is_empty() { + return; + } + let Ok(guest_ip) = guest_ip_str.parse::() else { + warn!(id, ip = %guest_ip_str, "invalid guest IP, skipping port forward"); + return; + }; + + let new_rules: Vec = port_map + .iter() + .map(|pm| ForwardRule { + protocol: match pm.protocol { + Protocol::Tcp => FwdProtocol::Tcp, + Protocol::Udp => FwdProtocol::Udp, + }, + listen_addr: pm.address, + listen_port: pm.from, + target_ip: guest_ip, + target_port: pm.to, + }) + .collect(); + + let old_rules = self + .lock() + .active_forwards + .get(id) + .cloned() + .unwrap_or_default(); + + let old_set: HashSet<_> = old_rules.iter().collect(); + let new_set: HashSet<_> = new_rules.iter().collect(); + + let mut fwd = self.forward_service.lock().await; + + // Remove rules no longer needed + for rule in old_rules.iter().filter(|r| !new_set.contains(r)) { + if let Err(e) = fwd.remove_rule(rule).await { + warn!(id, ?rule, "failed to remove forwarding rule: {e}"); + } + } + + // Add new rules + for rule in new_rules.iter().filter(|r| !old_set.contains(r)) { + if let Err(e) = fwd.add_rule(rule.clone()) { + warn!(id, ?rule, "failed to add forwarding rule: {e}"); + } + } + + drop(fwd); + self.lock() + .active_forwards + .insert(id.to_string(), new_rules); + info!(id, "port forwarding reconfigured"); + } + + /// Remove all port forwarding rules for a VM. + pub async fn cleanup_port_forward(&self, id: &str) { + let old_rules = self.lock().active_forwards.remove(id).unwrap_or_default(); + if old_rules.is_empty() { + return; + } + let mut fwd = self.forward_service.lock().await; + for rule in &old_rules { + if let Err(e) = fwd.remove_rule(rule).await { + warn!(id, ?rule, "failed to remove forwarding rule: {e}"); + } + } + info!(id, count = old_rules.len(), "port forwarding cleaned up"); + } + pub async fn reload_vms(&self) -> Result<()> { let vm_path = self.vm_dir(); let running_vms = self.supervisor.list().await.context("Failed to list VMs")?; @@ -328,6 +464,22 @@ impl App { } } } + + // Restore port forwarding for running bridge-mode VMs with persisted guest IPs + let vm_ids: Vec = self.lock().vms.keys().cloned().collect(); + for id in vm_ids { + let workdir = self.work_dir(&id); + if let Some(ip) = workdir.guest_ip() { + { + let mut state = self.lock(); + if let Some(vm) = state.get_mut(&id) { + vm.state.guest_ip = ip; + } + } + self.reconfigure_port_forward(&id).await; + } + } + Ok(()) } @@ -865,6 +1017,7 @@ struct VmStateMut { boot_progress: String, boot_error: String, shutdown_progress: String, + guest_ip: String, devices: GpuConfig, events: VecDeque, } @@ -899,6 +1052,8 @@ impl VmState { pub(crate) struct AppState { cid_pool: IdPool, vms: HashMap, + /// Tracks active port forwarding rules per VM ID (bridge mode only). + active_forwards: HashMap>, } impl AppState { diff --git a/vmm/src/app/qemu.rs b/vmm/src/app/qemu.rs index c4ce2bbb..25085424 100644 --- a/vmm/src/app/qemu.rs +++ b/vmm/src/app/qemu.rs @@ -5,7 +5,7 @@ //! QEMU related code use crate::{ app::Manifest, - config::{CvmConfig, GatewayConfig, Networking, PasstNetworking, ProcessAnnotation, Protocol}, + config::{CvmConfig, GatewayConfig, Networking, NetworkingMode, ProcessAnnotation, Protocol}, }; use std::{collections::HashMap, os::unix::fs::PermissionsExt}; use std::{ @@ -28,11 +28,47 @@ use dstack_types::{ AppCompose, KeyProviderKind, }; use dstack_vmm_rpc as pb; +use sha2::{Digest, Sha256}; + +/// Derive a deterministic MAC address from a VM ID using SHA256. +/// Sets locally-administered + unicast bits (0x02) per IEEE 802. +/// Derive a deterministic MAC address from a VM ID. +/// +/// `prefix` may contain 0-3 fixed bytes. The first byte always has the +/// locally-administered + unicast bits set (0x02). Remaining bytes are +/// filled from SHA256(vm_id). +pub fn mac_address_for_vm(vm_id: &str, prefix: &[u8]) -> String { + let hash = Sha256::digest(vm_id.as_bytes()); + let prefix_len = prefix.len().min(3); + let mut bytes = [0u8; 6]; + // Fill prefix bytes + bytes[..prefix_len].copy_from_slice(&prefix[..prefix_len]); + // Fill remaining bytes from hash + for i in prefix_len..6 { + bytes[i] = hash[i - prefix_len]; + } + // Ensure locally-administered + unicast on first byte + bytes[0] = (bytes[0] & 0xfe) | 0x02; + format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5] + ) +} use fs_err as fs; use serde::{Deserialize, Serialize}; use serde_human_bytes as hex_bytes; use supervisor_client::supervisor::{ProcessConfig, ProcessInfo}; +fn networking_to_proto(n: &Networking) -> pb::NetworkingConfig { + let mode = match n.mode { + NetworkingMode::Bridge => "bridge", + NetworkingMode::User => "user", + NetworkingMode::Passt => "passt", + NetworkingMode::Custom => "custom", + }; + pb::NetworkingConfig { mode: mode.into() } +} + #[derive(Debug, Deserialize)] pub struct InstanceInfo { #[serde(default)] @@ -230,6 +266,7 @@ impl VmInfo { gateway_urls: custom_gateway_urls.clone(), stopped, no_tee, + networking: self.manifest.networking.as_ref().map(networking_to_proto), }) }, app_url: self @@ -314,8 +351,8 @@ impl VmState { } impl VmConfig { - fn config_passt(&self, workdir: &VmWorkDir, netcfg: &PasstNetworking) -> Result { - let PasstNetworking { + fn config_passt(&self, workdir: &VmWorkDir, netcfg: &Networking) -> Result { + let Networking { passt_exec, interface, address, @@ -326,6 +363,7 @@ impl VmConfig { map_guest_addr, no_map_gw, ipv4_only, + .. } = netcfg; let passt_socket = workdir.passt_socket(); @@ -382,13 +420,14 @@ impl VmConfig { Protocol::Udp => udp_ports.push(port_spec), } } - // Add TCP port forwarding if any - if !tcp_ports.is_empty() { - passt_cmd.arg("--tcp-ports").arg(tcp_ports.join(",")); + // Add TCP port forwarding — one --tcp-ports per spec to avoid + // exceeding passt's single-argument parser limit. + for spec in &tcp_ports { + passt_cmd.arg("--tcp-ports").arg(spec); } - // Add UDP port forwarding if any - if !udp_ports.is_empty() { - passt_cmd.arg("--udp-ports").arg(udp_ports.join(",")); + // Add UDP port forwarding + for spec in &udp_ports { + passt_cmd.arg("--udp-ports").arg(spec); } passt_cmd.arg("-f").arg("-1"); @@ -510,13 +549,37 @@ impl VmConfig { .arg(format!("file={},if=none,id=hd1", hda_path.display())) .arg("-device") .arg("virtio-blk-pci,drive=hd1"); - let netdev = match &cfg.networking { - Networking::User(netcfg) => { + // Resolve per-VM networking override against global config. + // Per-VM only sets mode; shared fields (bridge name, mac_prefix, etc.) + // are merged from global config. + let resolved_networking; + let networking = match self.manifest.networking.as_ref() { + Some(vm_net) => { + // Per-VM override: take mode from VM, fill other fields from global + resolved_networking = Networking { + mode: vm_net.mode, + bridge: if vm_net.bridge.is_empty() { + cfg.networking.bridge.clone() + } else { + vm_net.bridge.clone() + }, + ..cfg.networking.clone() + }; + &resolved_networking + } + None => &cfg.networking, + }; + // Generate deterministic MAC for all networking modes + let prefix = networking.mac_prefix_bytes(); + let mac = mac_address_for_vm(&self.manifest.id, &prefix); + let net_device = format!("virtio-net-pci,netdev=net0,mac={mac}"); + let netdev = match networking.mode { + NetworkingMode::User => { let mut netdev = format!( "user,id=net0,net={},dhcpstart={},restrict={}", - netcfg.net, - netcfg.dhcp_start, - if netcfg.restrict { "yes" } else { "no" } + networking.net, + networking.dhcp_start, + if networking.restrict { "yes" } else { "no" } ); for pm in &self.manifest.port_map { netdev.push_str(&format!( @@ -529,9 +592,9 @@ impl VmConfig { } netdev } - Networking::Passt(netcfg) => { + NetworkingMode::Passt => { processes.push( - self.config_passt(&workdir, netcfg) + self.config_passt(&workdir, networking) .context("Failed to configure passt")?, ); format!( @@ -539,10 +602,14 @@ impl VmConfig { workdir.passt_socket().display() ) } - Networking::Custom(netcfg) => netcfg.netdev.clone(), + NetworkingMode::Bridge => { + tracing::info!("bridge networking: mac={mac} bridge={}", networking.bridge); + format!("bridge,id=net0,br={}", networking.bridge) + } + NetworkingMode::Custom => networking.netdev.clone(), }; command.arg("-netdev").arg(netdev); - command.arg("-device").arg("virtio-net-pci,netdev=net0"); + command.arg("-device").arg(net_device); self.configure_machine(&mut command, &workdir, cfg, &app_compose)?; self.configure_smbios(&mut command, cfg); @@ -1069,6 +1136,21 @@ impl VmWorkDir { self.shared_dir().join(INSTANCE_INFO) } + pub fn guest_ip_path(&self) -> PathBuf { + self.workdir.join("guest-ip") + } + + pub fn guest_ip(&self) -> Option { + fs::read_to_string(self.guest_ip_path()) + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + } + + pub fn set_guest_ip(&self, ip: &str) -> Result<()> { + fs::write(self.guest_ip_path(), ip).context("failed to write guest IP") + } + pub fn serial_file(&self) -> PathBuf { self.workdir.join("serial.log") } diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 302a7bc9..d0e2ad70 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -363,46 +363,93 @@ impl Config { } } -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(tag = "mode", rename_all = "lowercase")] -pub enum Networking { - User(UserNetworking), - Passt(PasstNetworking), - Custom(CustomNetworking), -} - -impl Networking { - pub fn is_passt(&self) -> bool { - matches!(self, Networking::Passt(_)) - } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum NetworkingMode { + User, + Passt, + Bridge, + Custom, } +/// Flat networking configuration. The `mode` field selects which backend is +/// active; the remaining fields are only relevant for their respective mode +/// and carry serde defaults so they can be omitted in the config file. #[derive(Debug, Clone, Deserialize, Serialize)] -pub struct UserNetworking { +pub struct Networking { + pub mode: NetworkingMode, + + // ── Bridge fields ────────────────────────────────────────────── + /// Bridge interface to attach TAP device to (e.g., "virbr0") + #[serde(default)] + pub bridge: String, + + /// Enable userspace port forwarding for bridge-mode VMs. + pub forward_service_enabled: bool, + + // ── MAC prefix ───────────────────────────────────────────────── + /// Fixed MAC address prefix (0-3 colon-separated hex bytes, e.g. "02:ab:cd"). + /// Remaining bytes are derived from the VM ID hash. + #[serde(default)] + pub mac_prefix: String, + + // ── User-mode fields ─────────────────────────────────────────── + #[serde(default)] pub net: String, + #[serde(default)] pub dhcp_start: String, + #[serde(default)] pub restrict: bool, -} -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct PasstNetworking { + // ── Passt fields ─────────────────────────────────────────────── + #[serde(default)] pub passt_exec: String, + #[serde(default)] pub interface: String, + #[serde(default)] pub address: String, + #[serde(default)] pub netmask: String, + #[serde(default)] pub gateway: String, + #[serde(default)] pub dns: Vec, + #[serde(default)] pub map_host_loopback: String, + #[serde(default)] pub map_guest_addr: String, + #[serde(default)] pub no_map_gw: bool, + #[serde(default)] pub ipv4_only: bool, -} -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct CustomNetworking { + // ── Custom fields ────────────────────────────────────────────── + #[serde(default)] pub netdev: String, } +impl Networking { + pub fn is_passt(&self) -> bool { + self.mode == NetworkingMode::Passt + } + + pub fn is_bridge(&self) -> bool { + self.mode == NetworkingMode::Bridge + } + + /// Parse the mac_prefix into bytes. Returns 0-3 bytes. + pub fn mac_prefix_bytes(&self) -> Vec { + if self.mac_prefix.is_empty() { + return vec![]; + } + self.mac_prefix + .split(':') + .filter_map(|s| u8::from_str_radix(s, 16).ok()) + .take(3) + .collect() + } +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct HostApiConfig { pub address: String, diff --git a/vmm/src/console_v1.html b/vmm/src/console_v1.html index 139f6ae7..66f9370b 100644 --- a/vmm/src/console_v1.html +++ b/vmm/src/console_v1.html @@ -2097,6 +2097,16 @@

Deploy a new instance

+
+ + +
+
@@ -2384,6 +2394,7 @@

Derive VM

no_tee: false, pin_numa: false, hugepages: false, + net_mode: '', user_config: '', kms_urls: [], gateway_urls: [], @@ -2602,6 +2613,7 @@

Derive VM

hugepages: !!source.hugepages, pin_numa: !!source.pin_numa, no_tee: (_a = source.no_tee) !== null && _a !== void 0 ? _a : false, + networking: source.net_mode ? { mode: source.net_mode } : undefined, gpus: source.gpus, kms_urls: (_c = (_b = source.kms_urls) === null || _b === void 0 ? void 0 : _b.filter((url) => url && url.trim().length)) !== null && _c !== void 0 ? _c : [], gateway_urls: (_e = (_d = source.gateway_urls) === null || _d === void 0 ? void 0 : _d.filter((url) => url && url.trim().length)) !== null && _e !== void 0 ? _e : [], @@ -3064,6 +3076,7 @@

Derive VM

hugepages: vmForm.value.hugepages, pin_numa: vmForm.value.pin_numa, no_tee: vmForm.value.no_tee, + net_mode: vmForm.value.net_mode, gpus: configGpu(vmForm.value) || undefined, kms_urls: vmForm.value.kms_urls, gateway_urls: vmForm.value.gateway_urls, @@ -3169,7 +3182,7 @@

Derive VM

return 'none'; } async function showCloneConfig(vm) { - var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o; + var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p; const theVm = await ensureVmDetails(vm); if (!((_a = theVm === null || theVm === void 0 ? void 0 : theVm.configuration) === null || _a === void 0 ? void 0 : _a.compose_file)) { alert('Compose file not available for this VM. Please open its details first.'); @@ -3208,6 +3221,7 @@

Derive VM

pin_numa: !!config.pin_numa, hugepages: !!config.hugepages, no_tee: !!config.no_tee, + net_mode: ((_p = config.networking) === null || _p === void 0 ? void 0 : _p.mode) || '', user_config: config.user_config || '', stopped: !!config.stopped, }; @@ -6587,6 +6601,7 @@

Derive VM

* @property {Array.|null} [gateway_urls] VmConfiguration gateway_urls * @property {boolean|null} [stopped] VmConfiguration stopped * @property {boolean|null} [no_tee] VmConfiguration no_tee + * @property {vmm.INetworkingConfig|null} [networking] VmConfiguration networking */ /** * Constructs a new VmConfiguration. @@ -6724,6 +6739,13 @@

Derive VM

* @instance */ VmConfiguration.prototype.no_tee = false; + /** + * VmConfiguration networking. + * @member {vmm.INetworkingConfig|null|undefined} networking + * @memberof vmm.VmConfiguration + * @instance + */ + VmConfiguration.prototype.networking = null; // OneOf field names bound to virtual getters and setters var $oneOfFields; /** @@ -6736,6 +6758,16 @@

Derive VM

get: $util.oneOfGetter($oneOfFields = ["app_id"]), set: $util.oneOfSetter($oneOfFields) }); + /** + * VmConfiguration _networking. + * @member {"networking"|undefined} _networking + * @memberof vmm.VmConfiguration + * @instance + */ + Object.defineProperty(VmConfiguration.prototype, "_networking", { + get: $util.oneOfGetter($oneOfFields = ["networking"]), + set: $util.oneOfSetter($oneOfFields) + }); /** * Creates a new VmConfiguration instance using the specified properties. * @function create @@ -6796,6 +6828,8 @@

Derive VM

writer.uint32(/* id 16, wireType 0 =*/ 128).bool(message.stopped); if (message.no_tee != null && Object.hasOwnProperty.call(message, "no_tee")) writer.uint32(/* id 17, wireType 0 =*/ 136).bool(message.no_tee); + if (message.networking != null && Object.hasOwnProperty.call(message, "networking")) + $root.vmm.NetworkingConfig.encode(message.networking, writer.uint32(/* id 18, wireType 2 =*/ 146).fork()).ldelim(); return writer; }; /** @@ -6904,6 +6938,10 @@

Derive VM

message.no_tee = reader.bool(); break; } + case 18: { + message.networking = $root.vmm.NetworkingConfig.decode(reader, reader.uint32()); + break; + } default: reader.skipType(tag & 7); break; @@ -7007,6 +7045,14 @@

Derive VM

if (message.no_tee != null && message.hasOwnProperty("no_tee")) if (typeof message.no_tee !== "boolean") return "no_tee: boolean expected"; + if (message.networking != null && message.hasOwnProperty("networking")) { + properties._networking = 1; + { + var error = $root.vmm.NetworkingConfig.verify(message.networking); + if (error) + return "networking." + error; + } + } return null; }; /** @@ -7079,6 +7125,11 @@

Derive VM

message.stopped = Boolean(object.stopped); if (object.no_tee != null) message.no_tee = Boolean(object.no_tee); + if (object.networking != null) { + if (typeof object.networking !== "object") + throw TypeError(".vmm.VmConfiguration.networking: object expected"); + message.networking = $root.vmm.NetworkingConfig.fromObject(object.networking); + } return message; }; /** @@ -7166,6 +7217,11 @@

Derive VM

object.stopped = message.stopped; if (message.no_tee != null && message.hasOwnProperty("no_tee")) object.no_tee = message.no_tee; + if (message.networking != null && message.hasOwnProperty("networking")) { + object.networking = $root.vmm.NetworkingConfig.toObject(message.networking, options); + if (options.oneofs) + object._networking = "networking"; + } return object; }; /** @@ -7194,6 +7250,196 @@

Derive VM

}; return VmConfiguration; })(); + vmm.NetworkingConfig = (function () { + /** + * Properties of a NetworkingConfig. + * @memberof vmm + * @interface INetworkingConfig + * @property {string|null} [mode] NetworkingConfig mode + */ + /** + * Constructs a new NetworkingConfig. + * @memberof vmm + * @classdesc Represents a NetworkingConfig. + * @implements INetworkingConfig + * @constructor + * @param {vmm.INetworkingConfig=} [properties] Properties to set + */ + function NetworkingConfig(properties) { + if (properties) + for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i) + if (properties[keys[i]] != null) + this[keys[i]] = properties[keys[i]]; + } + /** + * NetworkingConfig mode. + * @member {string} mode + * @memberof vmm.NetworkingConfig + * @instance + */ + NetworkingConfig.prototype.mode = ""; + /** + * Creates a new NetworkingConfig instance using the specified properties. + * @function create + * @memberof vmm.NetworkingConfig + * @static + * @param {vmm.INetworkingConfig=} [properties] Properties to set + * @returns {vmm.NetworkingConfig} NetworkingConfig instance + */ + NetworkingConfig.create = function create(properties) { + return new NetworkingConfig(properties); + }; + /** + * Encodes the specified NetworkingConfig message. Does not implicitly {@link vmm.NetworkingConfig.verify|verify} messages. + * @function encode + * @memberof vmm.NetworkingConfig + * @static + * @param {vmm.INetworkingConfig} message NetworkingConfig message or plain object to encode + * @param {$protobuf.Writer} [writer] Writer to encode to + * @returns {$protobuf.Writer} Writer + */ + NetworkingConfig.encode = function encode(message, writer) { + if (!writer) + writer = $Writer.create(); + if (message.mode != null && Object.hasOwnProperty.call(message, "mode")) + writer.uint32(/* id 1, wireType 2 =*/ 10).string(message.mode); + return writer; + }; + /** + * Encodes the specified NetworkingConfig message, length delimited. Does not implicitly {@link vmm.NetworkingConfig.verify|verify} messages. + * @function encodeDelimited + * @memberof vmm.NetworkingConfig + * @static + * @param {vmm.INetworkingConfig} message NetworkingConfig message or plain object to encode + * @param {$protobuf.Writer} [writer] Writer to encode to + * @returns {$protobuf.Writer} Writer + */ + NetworkingConfig.encodeDelimited = function encodeDelimited(message, writer) { + return this.encode(message, writer).ldelim(); + }; + /** + * Decodes a NetworkingConfig message from the specified reader or buffer. + * @function decode + * @memberof vmm.NetworkingConfig + * @static + * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from + * @param {number} [length] Message length if known beforehand + * @returns {vmm.NetworkingConfig} NetworkingConfig + * @throws {Error} If the payload is not a reader or valid buffer + * @throws {$protobuf.util.ProtocolError} If required fields are missing + */ + NetworkingConfig.decode = function decode(reader, length, error) { + if (!(reader instanceof $Reader)) + reader = $Reader.create(reader); + var end = length === undefined ? reader.len : reader.pos + length, message = new $root.vmm.NetworkingConfig(); + while (reader.pos < end) { + var tag = reader.uint32(); + if (tag === error) + break; + switch (tag >>> 3) { + case 1: { + message.mode = reader.string(); + break; + } + default: + reader.skipType(tag & 7); + break; + } + } + return message; + }; + /** + * Decodes a NetworkingConfig message from the specified reader or buffer, length delimited. + * @function decodeDelimited + * @memberof vmm.NetworkingConfig + * @static + * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from + * @returns {vmm.NetworkingConfig} NetworkingConfig + * @throws {Error} If the payload is not a reader or valid buffer + * @throws {$protobuf.util.ProtocolError} If required fields are missing + */ + NetworkingConfig.decodeDelimited = function decodeDelimited(reader) { + if (!(reader instanceof $Reader)) + reader = new $Reader(reader); + return this.decode(reader, reader.uint32()); + }; + /** + * Verifies a NetworkingConfig message. + * @function verify + * @memberof vmm.NetworkingConfig + * @static + * @param {Object.} message Plain object to verify + * @returns {string|null} `null` if valid, otherwise the reason why it is not + */ + NetworkingConfig.verify = function verify(message) { + if (typeof message !== "object" || message === null) + return "object expected"; + if (message.mode != null && message.hasOwnProperty("mode")) + if (!$util.isString(message.mode)) + return "mode: string expected"; + return null; + }; + /** + * Creates a NetworkingConfig message from a plain object. Also converts values to their respective internal types. + * @function fromObject + * @memberof vmm.NetworkingConfig + * @static + * @param {Object.} object Plain object + * @returns {vmm.NetworkingConfig} NetworkingConfig + */ + NetworkingConfig.fromObject = function fromObject(object) { + if (object instanceof $root.vmm.NetworkingConfig) + return object; + var message = new $root.vmm.NetworkingConfig(); + if (object.mode != null) + message.mode = String(object.mode); + return message; + }; + /** + * Creates a plain object from a NetworkingConfig message. Also converts values to other types if specified. + * @function toObject + * @memberof vmm.NetworkingConfig + * @static + * @param {vmm.NetworkingConfig} message NetworkingConfig + * @param {$protobuf.IConversionOptions} [options] Conversion options + * @returns {Object.} Plain object + */ + NetworkingConfig.toObject = function toObject(message, options) { + if (!options) + options = {}; + var object = {}; + if (options.defaults) + object.mode = ""; + if (message.mode != null && message.hasOwnProperty("mode")) + object.mode = message.mode; + return object; + }; + /** + * Converts this NetworkingConfig to JSON. + * @function toJSON + * @memberof vmm.NetworkingConfig + * @instance + * @returns {Object.} JSON object + */ + NetworkingConfig.prototype.toJSON = function toJSON() { + return this.constructor.toObject(this, $protobuf.util.toJSONOptions); + }; + /** + * Gets the default type url for NetworkingConfig + * @function getTypeUrl + * @memberof vmm.NetworkingConfig + * @static + * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com") + * @returns {string} The default type url + */ + NetworkingConfig.getTypeUrl = function getTypeUrl(typeUrlPrefix) { + if (typeUrlPrefix === undefined) { + typeUrlPrefix = "type.googleapis.com"; + } + return typeUrlPrefix + "/vmm.NetworkingConfig"; + }; + return NetworkingConfig; + })(); vmm.GpuConfig = (function () { /** * Properties of a GpuConfig. @@ -13116,8 +13362,251 @@

Derive VM

* @returns {Promise} Promise * @variation 2 */ + /** + * Callback as used by {@link vmm.Vmm#reportDhcpLease}. + * @memberof vmm.Vmm + * @typedef ReportDhcpLeaseCallback + * @type {function} + * @param {Error|null} error Error, if any + * @param {google.protobuf.Empty} [response] Empty + */ + /** + * Calls ReportDhcpLease. + * @function reportDhcpLease + * @memberof vmm.Vmm + * @instance + * @param {vmm.IDhcpLeaseRequest} request DhcpLeaseRequest message or plain object + * @param {vmm.Vmm.ReportDhcpLeaseCallback} callback Node-style callback called with the error, if any, and Empty + * @returns {undefined} + * @variation 1 + */ + Object.defineProperty(Vmm.prototype.reportDhcpLease = function reportDhcpLease(request, callback) { + return this.rpcCall(reportDhcpLease, $root.vmm.DhcpLeaseRequest, $root.google.protobuf.Empty, request, callback); + }, "name", { value: "ReportDhcpLease" }); + /** + * Calls ReportDhcpLease. + * @function reportDhcpLease + * @memberof vmm.Vmm + * @instance + * @param {vmm.IDhcpLeaseRequest} request DhcpLeaseRequest message or plain object + * @returns {Promise} Promise + * @variation 2 + */ return Vmm; })(); + vmm.DhcpLeaseRequest = (function () { + /** + * Properties of a DhcpLeaseRequest. + * @memberof vmm + * @interface IDhcpLeaseRequest + * @property {string|null} [mac] DhcpLeaseRequest mac + * @property {string|null} [ip] DhcpLeaseRequest ip + */ + /** + * Constructs a new DhcpLeaseRequest. + * @memberof vmm + * @classdesc Represents a DhcpLeaseRequest. + * @implements IDhcpLeaseRequest + * @constructor + * @param {vmm.IDhcpLeaseRequest=} [properties] Properties to set + */ + function DhcpLeaseRequest(properties) { + if (properties) + for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i) + if (properties[keys[i]] != null) + this[keys[i]] = properties[keys[i]]; + } + /** + * DhcpLeaseRequest mac. + * @member {string} mac + * @memberof vmm.DhcpLeaseRequest + * @instance + */ + DhcpLeaseRequest.prototype.mac = ""; + /** + * DhcpLeaseRequest ip. + * @member {string} ip + * @memberof vmm.DhcpLeaseRequest + * @instance + */ + DhcpLeaseRequest.prototype.ip = ""; + /** + * Creates a new DhcpLeaseRequest instance using the specified properties. + * @function create + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {vmm.IDhcpLeaseRequest=} [properties] Properties to set + * @returns {vmm.DhcpLeaseRequest} DhcpLeaseRequest instance + */ + DhcpLeaseRequest.create = function create(properties) { + return new DhcpLeaseRequest(properties); + }; + /** + * Encodes the specified DhcpLeaseRequest message. Does not implicitly {@link vmm.DhcpLeaseRequest.verify|verify} messages. + * @function encode + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {vmm.IDhcpLeaseRequest} message DhcpLeaseRequest message or plain object to encode + * @param {$protobuf.Writer} [writer] Writer to encode to + * @returns {$protobuf.Writer} Writer + */ + DhcpLeaseRequest.encode = function encode(message, writer) { + if (!writer) + writer = $Writer.create(); + if (message.mac != null && Object.hasOwnProperty.call(message, "mac")) + writer.uint32(/* id 1, wireType 2 =*/ 10).string(message.mac); + if (message.ip != null && Object.hasOwnProperty.call(message, "ip")) + writer.uint32(/* id 2, wireType 2 =*/ 18).string(message.ip); + return writer; + }; + /** + * Encodes the specified DhcpLeaseRequest message, length delimited. Does not implicitly {@link vmm.DhcpLeaseRequest.verify|verify} messages. + * @function encodeDelimited + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {vmm.IDhcpLeaseRequest} message DhcpLeaseRequest message or plain object to encode + * @param {$protobuf.Writer} [writer] Writer to encode to + * @returns {$protobuf.Writer} Writer + */ + DhcpLeaseRequest.encodeDelimited = function encodeDelimited(message, writer) { + return this.encode(message, writer).ldelim(); + }; + /** + * Decodes a DhcpLeaseRequest message from the specified reader or buffer. + * @function decode + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from + * @param {number} [length] Message length if known beforehand + * @returns {vmm.DhcpLeaseRequest} DhcpLeaseRequest + * @throws {Error} If the payload is not a reader or valid buffer + * @throws {$protobuf.util.ProtocolError} If required fields are missing + */ + DhcpLeaseRequest.decode = function decode(reader, length, error) { + if (!(reader instanceof $Reader)) + reader = $Reader.create(reader); + var end = length === undefined ? reader.len : reader.pos + length, message = new $root.vmm.DhcpLeaseRequest(); + while (reader.pos < end) { + var tag = reader.uint32(); + if (tag === error) + break; + switch (tag >>> 3) { + case 1: { + message.mac = reader.string(); + break; + } + case 2: { + message.ip = reader.string(); + break; + } + default: + reader.skipType(tag & 7); + break; + } + } + return message; + }; + /** + * Decodes a DhcpLeaseRequest message from the specified reader or buffer, length delimited. + * @function decodeDelimited + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from + * @returns {vmm.DhcpLeaseRequest} DhcpLeaseRequest + * @throws {Error} If the payload is not a reader or valid buffer + * @throws {$protobuf.util.ProtocolError} If required fields are missing + */ + DhcpLeaseRequest.decodeDelimited = function decodeDelimited(reader) { + if (!(reader instanceof $Reader)) + reader = new $Reader(reader); + return this.decode(reader, reader.uint32()); + }; + /** + * Verifies a DhcpLeaseRequest message. + * @function verify + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {Object.} message Plain object to verify + * @returns {string|null} `null` if valid, otherwise the reason why it is not + */ + DhcpLeaseRequest.verify = function verify(message) { + if (typeof message !== "object" || message === null) + return "object expected"; + if (message.mac != null && message.hasOwnProperty("mac")) + if (!$util.isString(message.mac)) + return "mac: string expected"; + if (message.ip != null && message.hasOwnProperty("ip")) + if (!$util.isString(message.ip)) + return "ip: string expected"; + return null; + }; + /** + * Creates a DhcpLeaseRequest message from a plain object. Also converts values to their respective internal types. + * @function fromObject + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {Object.} object Plain object + * @returns {vmm.DhcpLeaseRequest} DhcpLeaseRequest + */ + DhcpLeaseRequest.fromObject = function fromObject(object) { + if (object instanceof $root.vmm.DhcpLeaseRequest) + return object; + var message = new $root.vmm.DhcpLeaseRequest(); + if (object.mac != null) + message.mac = String(object.mac); + if (object.ip != null) + message.ip = String(object.ip); + return message; + }; + /** + * Creates a plain object from a DhcpLeaseRequest message. Also converts values to other types if specified. + * @function toObject + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {vmm.DhcpLeaseRequest} message DhcpLeaseRequest + * @param {$protobuf.IConversionOptions} [options] Conversion options + * @returns {Object.} Plain object + */ + DhcpLeaseRequest.toObject = function toObject(message, options) { + if (!options) + options = {}; + var object = {}; + if (options.defaults) { + object.mac = ""; + object.ip = ""; + } + if (message.mac != null && message.hasOwnProperty("mac")) + object.mac = message.mac; + if (message.ip != null && message.hasOwnProperty("ip")) + object.ip = message.ip; + return object; + }; + /** + * Converts this DhcpLeaseRequest to JSON. + * @function toJSON + * @memberof vmm.DhcpLeaseRequest + * @instance + * @returns {Object.} JSON object + */ + DhcpLeaseRequest.prototype.toJSON = function toJSON() { + return this.constructor.toObject(this, $protobuf.util.toJSONOptions); + }; + /** + * Gets the default type url for DhcpLeaseRequest + * @function getTypeUrl + * @memberof vmm.DhcpLeaseRequest + * @static + * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com") + * @returns {string} The default type url + */ + DhcpLeaseRequest.getTypeUrl = function getTypeUrl(typeUrlPrefix) { + if (typeUrlPrefix === undefined) { + typeUrlPrefix = "type.googleapis.com"; + } + return typeUrlPrefix + "/vmm.DhcpLeaseRequest"; + }; + return DhcpLeaseRequest; + })(); return vmm; })(); $root.google = (function () { diff --git a/vmm/src/main_service.rs b/vmm/src/main_service.rs index 5a42f57a..1b68f328 100644 --- a/vmm/src/main_service.rs +++ b/vmm/src/main_service.rs @@ -10,10 +10,10 @@ use dstack_types::AppCompose; use dstack_vmm_rpc as rpc; use dstack_vmm_rpc::vmm_server::{VmmRpc, VmmServer}; use dstack_vmm_rpc::{ - AppId, ComposeHash as RpcComposeHash, GatewaySettings, GetInfoResponse, GetMetaResponse, Id, - ImageInfo as RpcImageInfo, ImageListResponse, KmsSettings, ListGpusResponse, PublicKeyResponse, - ReloadVmsResponse, ResizeVmRequest, ResourcesSettings, StatusRequest, StatusResponse, - UpdateVmRequest, VersionResponse, VmConfiguration, + AppId, ComposeHash as RpcComposeHash, DhcpLeaseRequest, GatewaySettings, GetInfoResponse, + GetMetaResponse, Id, ImageInfo as RpcImageInfo, ImageListResponse, KmsSettings, + ListGpusResponse, PublicKeyResponse, ReloadVmsResponse, ResizeVmRequest, ResourcesSettings, + StatusRequest, StatusResponse, UpdateVmRequest, VersionResponse, VmConfiguration, }; use fs_err as fs; use ra_rpc::{CallContext, RpcCall}; @@ -193,6 +193,43 @@ pub fn create_manifest_from_vm_config( kms_urls: request.kms_urls.clone(), gateway_urls: request.gateway_urls.clone(), no_tee: request.no_tee, + networking: request.networking.as_ref().and_then(networking_from_proto), + }) +} + +fn networking_from_proto(proto: &rpc::NetworkingConfig) -> Option { + use crate::config::NetworkingMode; + let mode = match proto.mode.as_str() { + "bridge" => NetworkingMode::Bridge, + "passt" => NetworkingMode::Passt, + "user" => NetworkingMode::User, + "custom" => NetworkingMode::Custom, + "" => return None, // not set, use global default + other => { + tracing::warn!("unsupported per-VM networking mode '{other}', using global default"); + return None; + } + }; + // Only set mode; other fields will be merged from global config at runtime + Some(crate::config::Networking { + mode, + bridge: String::new(), + mac_prefix: String::new(), + net: String::new(), + dhcp_start: String::new(), + restrict: false, + passt_exec: String::new(), + interface: String::new(), + address: String::new(), + netmask: String::new(), + gateway: String::new(), + dns: vec![], + map_host_loopback: String::new(), + map_guest_addr: String::new(), + no_map_gw: false, + ipv4_only: false, + netdev: String::new(), + forward_service_enabled: false, }) } @@ -416,6 +453,9 @@ impl VmmRpc for RpcHandler { .load_vm(&vm_work_dir, &Default::default(), false) .await .context("Failed to load VM")?; + if request.update_ports { + self.app.reconfigure_port_forward(&new_id).await; + } Ok(Id { id: new_id }) } @@ -542,6 +582,11 @@ impl VmmRpc for RpcHandler { info!("Reloading VMs directory and syncing with memory state"); self.app.reload_vms_sync().await } + + async fn report_dhcp_lease(self, request: DhcpLeaseRequest) -> Result<()> { + self.app.report_dhcp_lease(&request.mac, &request.ip).await; + Ok(()) + } } impl RpcCall for RpcHandler { diff --git a/vmm/src/vmm-cli.py b/vmm/src/vmm-cli.py index aff504ff..7bb40806 100755 --- a/vmm/src/vmm-cli.py +++ b/vmm/src/vmm-cli.py @@ -287,7 +287,7 @@ def list_vms(self, verbose: bool = False, json_output: bool = False) -> None: headers = ['VM ID', 'App ID', 'Name', 'Status', 'Uptime'] if verbose: - headers.extend(['vCPU', 'Memory', 'Disk', 'Image', 'GPUs']) + headers.extend(['Instance ID', 'vCPU', 'Memory', 'Disk', 'Image', 'GPUs']) rows = [] for vm in vms: @@ -303,6 +303,7 @@ def list_vms(self, verbose: bool = False, json_output: bool = False) -> None: config = vm.get('configuration', {}) gpu_info = self._format_gpu_info(config.get('gpus')) row.extend([ + vm.get('instance_id', '-') or '-', config.get('vcpu', '-'), f"{config.get('memory', '-')}MB", f"{config.get('disk_size', '-')}GB", @@ -638,6 +639,8 @@ def create_vm(self, args) -> None: params["kms_urls"] = args.kms_url if args.gateway_url: params["gateway_urls"] = args.gateway_url + if args.net: + params["networking"] = {"mode": args.net} app_id = args.app_id or self.calc_app_id(compose_content) print(f"App ID: {app_id}") @@ -911,6 +914,50 @@ def update_vm( else: print(f"No updates specified for VM {vm_id}") + def show_info(self, vm_id: str, json_output: bool = False) -> None: + """Show detailed information about a VM""" + response = self.rpc_call('GetInfo', {'id': vm_id}) + + if not response.get('found', False) or 'info' not in response: + print(f"VM with ID {vm_id} not found") + return + + info = response['info'] + + if json_output: + print(json.dumps(info, indent=2)) + return + + config = info.get('configuration', {}) + + print(f"VM ID: {info.get('id', '-')}") + print(f"Name: {info.get('name', '-')}") + print(f"Status: {info.get('status', '-')}") + print(f"Uptime: {info.get('uptime', '-')}") + print(f"App ID: {info.get('app_id', '-')}") + print(f"Instance ID: {info.get('instance_id', '-') or '-'}") + print(f"App URL: {info.get('app_url', '-') or '-'}") + print(f"Image: {config.get('image', '-')}") + print(f"Image Version: {info.get('image_version', '-')}") + print(f"vCPU: {config.get('vcpu', '-')}") + print(f"Memory: {config.get('memory', '-')}MB") + print(f"Disk: {config.get('disk_size', '-')}GB") + print(f"GPUs: {self._format_gpu_info(config.get('gpus'))}") + print(f"Boot Progress: {info.get('boot_progress', '-')}") + if info.get('boot_error'): + print(f"Boot Error: {info['boot_error']}") + if info.get('exited_at'): + print(f"Exited At: {info['exited_at']}") + if info.get('shutdown_progress'): + print(f"Shutdown: {info['shutdown_progress']}") + + events = info.get('events', []) + if events: + print(f"\nRecent Events:") + for event in events[-10:]: + ts = event.get('timestamp', 0) + print(f" [{event.get('event', '')}] {event.get('body', '')} (ts: {ts})") + def list_gpus(self, json_output: bool = False) -> None: """List all available GPUs""" response = self.rpc_call('ListGpus') @@ -1227,6 +1274,12 @@ def main(): lsvm_parser.add_argument( '--json', action='store_true', help='Output in JSON format for automation') + # Info command + info_parser = subparsers.add_parser('info', help='Show detailed VM information') + info_parser.add_argument('vm_id', help='VM ID to show info for') + info_parser.add_argument( + '--json', action='store_true', help='Output in JSON format for automation') + # Start command start_parser = subparsers.add_parser('start', help='Start a VM') start_parser.add_argument('vm_id', help='VM ID to start') @@ -1337,6 +1390,9 @@ def main(): deploy_parser.add_argument('--tee', dest='no_tee', action='store_false', help='Force-enable Intel TDX (default)') deploy_parser.set_defaults(no_tee=False) + deploy_parser.add_argument('--net', choices=['bridge', 'passt', 'user'], + help='Networking mode (default: use global config)') + # Images command lsimage_parser = subparsers.add_parser( @@ -1502,6 +1558,8 @@ def main(): if args.command == 'lsvm': cli.list_vms(args.verbose, args.json) + elif args.command == 'info': + cli.show_info(args.vm_id, args.json) elif args.command == 'start': cli.start_vm(args.vm_id) elif args.command == 'stop': diff --git a/vmm/ui/src/components/CreateVmDialog.ts b/vmm/ui/src/components/CreateVmDialog.ts index fc79e0b3..90ab797d 100644 --- a/vmm/ui/src/components/CreateVmDialog.ts +++ b/vmm/ui/src/components/CreateVmDialog.ts @@ -146,6 +146,16 @@ const CreateVmDialogComponent = {
+
+ + +
+
diff --git a/vmm/ui/src/composables/useVmManager.ts b/vmm/ui/src/composables/useVmManager.ts index 3c8f72e1..a0d7b935 100644 --- a/vmm/ui/src/composables/useVmManager.ts +++ b/vmm/ui/src/composables/useVmManager.ts @@ -106,6 +106,7 @@ type VmFormState = { no_tee: boolean; pin_numa: boolean; hugepages: boolean; + net_mode: string; user_config: string; kms_urls: string[]; gateway_urls: string[]; @@ -188,6 +189,7 @@ function createVmFormState(preLaunchScript: string): VmFormState { no_tee: false, pin_numa: false, hugepages: false, + net_mode: '', user_config: '', kms_urls: [], gateway_urls: [], @@ -430,6 +432,7 @@ type CreateVmPayloadSource = { hugepages?: boolean; pin_numa?: boolean; no_tee?: boolean; + net_mode?: string; gpus?: VmmTypes.IGpuConfig; kms_urls?: string[]; gateway_urls?: string[]; @@ -452,6 +455,7 @@ type CreateVmPayloadSource = { hugepages: !!source.hugepages, pin_numa: !!source.pin_numa, no_tee: source.no_tee ?? false, + networking: source.net_mode ? { mode: source.net_mode } : undefined, gpus: source.gpus, kms_urls: source.kms_urls?.filter((url) => url && url.trim().length) ?? [], gateway_urls: source.gateway_urls?.filter((url) => url && url.trim().length) ?? [], @@ -976,6 +980,7 @@ type CreateVmPayloadSource = { hugepages: vmForm.value.hugepages, pin_numa: vmForm.value.pin_numa, no_tee: vmForm.value.no_tee, + net_mode: vmForm.value.net_mode, gpus: configGpu(vmForm.value) || undefined, kms_urls: vmForm.value.kms_urls, gateway_urls: vmForm.value.gateway_urls, @@ -1126,6 +1131,7 @@ type CreateVmPayloadSource = { pin_numa: !!config.pin_numa, hugepages: !!config.hugepages, no_tee: !!config.no_tee, + net_mode: config.networking?.mode || '', user_config: config.user_config || '', stopped: !!config.stopped, }; diff --git a/vmm/vmm.toml b/vmm/vmm.toml index b64bf113..c26c84dd 100644 --- a/vmm/vmm.toml +++ b/vmm/vmm.toml @@ -97,6 +97,10 @@ map_guest_addr = "none" no_map_gw = true ipv4_only = true +# for mode = "bridge" +# bridge = "virbr0" +forward_service_enabled = false + [cvm.port_mapping] enabled = false address = "127.0.0.1"