diff --git a/checks/executable-by-bubblewrap.nix b/checks/executable-by-bubblewrap.nix new file mode 100644 index 0000000..c6bb896 --- /dev/null +++ b/checks/executable-by-bubblewrap.nix @@ -0,0 +1,39 @@ +{ + perSystem = + { self', pkgs, ... }: + let + containerName = "http-test"; + in + { + checks.executable-by-bubblewrap = pkgs.testers.runNixOSTest { + name = "executable-by-bubblewrap"; + nodes.bubblewrapRunner = _: { + # Enable unprivileged user namespaces for bubblewrap + boot.kernel.sysctl."kernel.unprivileged_userns_clone" = 1; + + environment.systemPackages = [ + self'.packages.${containerName}.runInBubblewrap + pkgs.curl + ]; + + system.stateVersion = "25.11"; + }; + + testScript = '' + machine.wait_for_unit("default.target") + + machine.execute("nohup nimi-sandbox > /tmp/nimi-sandbox.log 2>&1 &") + + machine.wait_for_open_port(8080, timeout=30) + + output = machine.succeed("curl -s http://localhost:8080") + print(f"Server output: {output}") + assert "Index of" in output, f"Expected 'Index of' in response, got: {output}" + + machine.wait_for_open_port(2222, timeout=30) + + machine.execute("cat /tmp/nimi-sandbox.log") + ''; + }; + }; +} diff --git a/checks/executable-by-container-runtime/default.nix b/checks/executable-by-container-runtime.nix similarity index 77% rename from checks/executable-by-container-runtime/default.nix rename to checks/executable-by-container-runtime.nix index 9e27963..c7f1db3 100644 --- a/checks/executable-by-container-runtime/default.nix +++ b/checks/executable-by-container-runtime.nix @@ -22,10 +22,11 @@ machine.wait_for_unit("default.target") machine.succeed("copy-to-container-runtime") - machine.succeed("podman run -d -p 8080:8080 ${containerName}:latest") - machine.sleep(2) + machine.succeed("podman run -d -p 8080:8080 -p 22:22 ${containerName}:latest") - machine.wait_for_open_port(8080, timeout=30) + machine.wait_for_open_port(8080, timeout=60) + + machine.wait_for_open_port(22, timeout=60) ''; }; }; diff --git a/checks/executable-by-container-runtime/container.nix b/checks/test-instance.nix similarity index 84% rename from checks/executable-by-container-runtime/container.nix rename to checks/test-instance.nix index 62c13ae..bb5ed58 100644 --- a/checks/executable-by-container-runtime/container.nix +++ b/checks/test-instance.nix @@ -11,6 +11,8 @@ exposedPorts = { "8080/tcp" = { }; }; + + nimiSettings.restart.mode = "up-to-count"; }; }; } diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 40fe009..1e7c61d 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -7,3 +7,4 @@ - [Services & Runtime](services.md) - [Defining Custom Services](custom-service.md) - [Integrations](integrations.md) +- [Bubblewrap Mode](bubblewrap.md) diff --git a/docs/bubblewrap.md b/docs/bubblewrap.md new file mode 100644 index 0000000..5570c81 --- /dev/null +++ b/docs/bubblewrap.md @@ -0,0 +1,249 @@ +# // bubblewrap mode // + +run `nix2gpu` containers without docker, podman, or any container runtime. just bubblewrap + nix. + +______________________________________________________________________ + +## // why bubblewrap // + +**bubblewrap** (`bwrap`) is a lightweight sandboxing tool that uses Linux namespaces to create isolated environments. Unlike docker/podman: + +- **no daemon** required +- **startup in milliseconds** instead of seconds +- **works on systems without container runtimes** +- **simpler architecture** - just a binary that execs into your process +- **no copy out of the nix store** - nix built containers must be copied out of the nix store contents into the container runtime. bubblewrap mounts the nix store directly. + +Useful when you want to run GPU workloads on a host that has nix but no container infrastructure, or for iterating faster. + +______________________________________________________________________ + +## // how it works // + +`nix2gpu` leverages Nimi's built-in bubblewrap support. When you build a container with bubblewrap enabled, Nimi generates a wrapper script that: + +1. **bind mounts** the nix store and container filesystem into a new namespace +1. **binds GPU devices** (`/dev/nvidia*`, `/dev/dri`) from the host +1. **sets up a minimal `/proc`** with NVIDIA driver visibility +1. **executes the startup script** in the sandboxed environment +1. **runs Nimi** to manage your services + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ host system │ +│ ┌──────────────┐ ┌─────────────────────────────────────────────┐ │ +│ │ nix store │────│ bubblewrap sandbox (new user namespace) │ │ +│ │ /nix/store │ │ │ │ +│ └──────────────┘ │ ┌──────────┐ ┌─────────┐ ┌───────────┐ │ │ +│ │ │/bin, /lib│ │/dev/nv* │ │ /proc │ │ │ +│ ┌──────────────┐ │ │(ro bind) │ │(dev bind│ │(ro bind) │ │ │ +│ │ GPU devs │────│ └──────────┘ └─────────┘ └───────────┘ │ │ +│ │ /dev/nvidia*│ │ │ │ +│ └──────────────┘ │ Nimi + your services │ │ +│ │ │ │ +│ ┌──────────────┐ │ ┌─────────────────────────────────────┐ │ │ +│ │ NVIDIA libs │────│ │ nix2gpu startup.sh │ │ │ +│ │/lib/x86_64.. │ │ │ (sets up /etc, GPU libs, SSH...) │ │ │ +│ └──────────────┘ │ └─────────────────────────────────────┘ │ │ +│ └─────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +______________________________________________________________________ + +## // key differences from containers // + +| aspect | container mode | bubblewrap mode | +|--------|---------------|-----------------| +| **runtime** | docker/podman | bubblewrap binary | +| **isolation** | full container | user namespace only | +| **startup time** | ~1-5 seconds | ~50-200ms | +| **image format** | OCI tarball | nix store paths directly | +| **GPU access** | `--gpus all` flag | bind mounts from host | +| **networking** | container bridge | host network (by default) | + +______________________________________________________________________ + +## // filesystem setup // + +Unlike container layers which overlay on top of each other, bubblewrap uses bind mounts. This requires different handling: + +### **read-only binds** + +Each subdirectory from your container's `copyToRoot` is individually bound: + +``` +/nix/store/xxx-base-system/bin → /bin +/nix/store/xxx-base-system/lib → /lib +/nix/store/xxx-base-system/usr → /usr +``` + +This gives the same view as a container, but via bind mounts instead of overlayfs. + +### **GPU library binds** + +Host NVIDIA libraries are bound into the sandbox: + +``` +/lib/x86_64-linux-gnu → /lib/x86_64-linux-gnu +/usr/lib/x86_64-linux-gnu → /usr/lib/x86_64-linux-gnu +/usr/bin/nvidia-smi → /usr/bin/nvidia-smi +``` + +### **device binds** + +GPU devices from the host are made available: + +``` +/dev/nvidiactl +/dev/nvidia-modeset +/dev/nvidia-uvm +/dev/nvidia0 through /dev/nvidia7 +/dev/dri +``` + +### **procfs handling** + +The container gets the **host's `/proc`** instead of a private one. This is required because NVIDIA drivers expose GPU state through `/proc/driver/nvidia`, which only exists in the host's procfs. + +______________________________________________________________________ + +## // runtime directories // + +Directories that need to be mutable are set up as tmpfs mounts: + +- `/tmp` - temporary files +- `/run` - runtime state +- `/var` - variable data +- `/root` - root's home directory +- `/home` - user home directories + +The startup script (`startup.sh`) populates `/etc` and `/root` from the nix store on first run since these start empty in bubblewrap mode. + +______________________________________________________________________ + +## // usage // + +### **basic setup** + +Bubblewrap mode is already configured through Nimi settings. No additional flake inputs needed: + +```nix +perSystem.nix2gpu."my-gpu-app" = { + # Your normal nix2gpu config + services.myapp = { + process.argv = [ (lib.getExe pkgs.myapp) ]; + }; +}; +``` + +### **running** + +Build the bubblewrap wrapper instead of the OCI image: + +```bash +# Run your `nix2gpu` instance in bubblewrap +nix run .#my-gpu-app.runInBubblewrap +``` + +______________________________________________________________________ + +## // configuration options // + +`nix2gpu` automatically translates your container config to bubblewrap equivalents: + +### **environment variables** + +```nix +nix2gpu."my-app" = { + env = { + MY_VAR = "value"; + CUDA_PATH = "${pkgs.cudaPackages_12_8.cudatoolkit}"; + }; +}; +``` + +These are passed to bubblewrap's `--setenv` flags. + +### **user/uid** + +```nix +nix2gpu."my-app" = { + user = "root"; # Must exist in nix2gpuUsers +}; +``` + +The UID is resolved from `nix2gpuUsers` and passed to bubblewrap's `--uid`. + +### **working directory** + +```nix +nix2gpu."my-app" = { + workingDir = "/workspace"; +}; +``` + +Translated to bubblewrap's `--chdir`. + +### **custom bubblewrap flags** + +You can add additional bubblewrap options through Nimi: + +```nix +nix2gpu."my-app".nimiSettings.bubblewrap = { + # Additional read-only binds + tryRoBinds = [ + { src = "/host/data"; dest = "/data"; } + ]; + + # Additional device binds + tryDevBinds = [ + { src = "/dev/custom"; dest = "/dev/custom"; } + ]; + + # Share a network namespace + shareNet = true; +}; +``` + +______________________________________________________________________ + +## // when to use bubblewrap // + +**good for:** + +- Development environments where you want fast iteration +- Systems without docker/podman (e.g., some HPC clusters) +- CI/CD pipelines where container runtimes aren't available +- Debugging - easier to inspect the sandbox from outside + +**not ideal for:** + +- Production multi-tenant isolation (user namespaces are weaker than containers) +- Scenarios requiring complex network setups (no built-in container networking) +- When you need to distribute the runtime to machines without nix + +______________________________________________________________________ + +## // debugging // + +Since bubblewrap doesn't hide the process in a container runtime, debugging is easier: + +```bash +# Monitor from host with standard tools +ps aux | grep bwrap +``` + +The startup script logs everything via `gum`, so you can see exactly what initialization steps are running. + +______________________________________________________________________ + +## // security notes // + +Bubblewrap uses **user namespaces**, which provide less isolation than containers: + +- **Root in the sandbox is not real root** - it's mapped to your host UID +- **Kernel attack surface** is larger than containers (no seccomp/apparmor by default) +- **Host filesystem** is still accessible outside bind mounts (though protected by permissions) + +For untrusted workloads, prefer container runtimes with stronger isolation. diff --git a/docs/getting-started.md b/docs/getting-started.md index 2cbf41a..637af3b 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -69,7 +69,7 @@ Take a look in the [examples folder](https://github.com/weyl-ai/nix2gpu/tree/bai Going forward, we will use the `comfyui.nix` example. -We can run this in `nix2gpu` like (replacing the `perSystem.nix2gpu` from earlier: +We can run this in `nix2gpu` like (replacing the `perSystem.nix2gpu` from earlier): ```nix { diff --git a/docs/index.md b/docs/index.md index 7eb7c63..d0257e1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,6 +14,7 @@ ______________________________________________________________________ modules - **[secrets & agenix](secrets.md)** — keys never touch the nix store - **[integrations](integrations.md)** — integrations with the nix ecosystem +- **[bubblewrap mode](bubblewrap.md)** — run without docker/podman ______________________________________________________________________ diff --git a/flake.lock b/flake.lock index 069595e..bd53456 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "nixpkgs-lib": "nixpkgs-lib" }, "locked": { - "lastModified": 1768135262, - "narHash": "sha256-PVvu7OqHBGWN16zSi6tEmPwwHQ4rLPU9Plvs8/1TUBY=", + "lastModified": 1772408722, + "narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=", "owner": "hercules-ci", "repo": "flake-parts", - "rev": "80daad04eddbbf5a4d883996a73f3f542fa437ac", + "rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3", "type": "github" }, "original": { @@ -27,11 +27,11 @@ ] }, "locked": { - "lastModified": 1765495779, - "narHash": "sha256-MhA7wmo/7uogLxiewwRRmIax70g6q1U/YemqTGoFHlM=", + "lastModified": 1768135262, + "narHash": "sha256-PVvu7OqHBGWN16zSi6tEmPwwHQ4rLPU9Plvs8/1TUBY=", "owner": "hercules-ci", "repo": "flake-parts", - "rev": "5635c32d666a59ec9a55cab87e898889869f7b71", + "rev": "80daad04eddbbf5a4d883996a73f3f542fa437ac", "type": "github" }, "original": { @@ -48,11 +48,11 @@ ] }, "locked": { - "lastModified": 1765774562, - "narHash": "sha256-UQhfCggNGDc7eam+EittlYmeW89CZVT1KkFIHZWBH7k=", + "lastModified": 1768476106, + "narHash": "sha256-V0YOJRum50gtKgwavsAfwXc9+XAsJCC7386YZx1sWGQ=", "owner": "hercules-ci", "repo": "hercules-ci-effects", - "rev": "edcbb19948b6caf1700434e369fde6ff9e6a3c93", + "rev": "c19e263e6e22ec7379d972f19e6a322f943c73fb", "type": "github" }, "original": { @@ -68,11 +68,11 @@ ] }, "locked": { - "lastModified": 1768240557, - "narHash": "sha256-bVqJ34yMiiUQwYhjliiiN5LBH1Y+UldbIjNTCUtDdwE=", + "lastModified": 1772633327, + "narHash": "sha256-jl+DJB2DUx7EbWLRng+6HNWW/1/VQOnf0NsQB4PlA7I=", "owner": "nix-community", "repo": "home-manager", - "rev": "b3f737e70fb9eef1d2308ea6738ffed5ae080f9b", + "rev": "5a75730e6f21ee624cbf86f4915c6e7489c74acc", "type": "github" }, "original": { @@ -83,11 +83,11 @@ }, "import-tree": { "locked": { - "lastModified": 1763762820, - "narHash": "sha256-ZvYKbFib3AEwiNMLsejb/CWs/OL/srFQ8AogkebEPF0=", + "lastModified": 1772344373, + "narHash": "sha256-OQQ1MhB9t1J71b2wxRRTdH/Qd8UGG0p+dGspfCf5U1c=", "owner": "vic", "repo": "import-tree", - "rev": "3c23749d8013ec6daa1d7255057590e9ca726646", + "rev": "10fda59eee7d7970ec443b925f32a1bc7526648c", "type": "github" }, "original": { @@ -104,16 +104,15 @@ ] }, "locked": { - "lastModified": 1768238066, - "narHash": "sha256-prTeXJ5KHTlYJU42e8NEw5frYER8NeGcWU2EqlZE1kA=", + "lastModified": 1771178957, + "narHash": "sha256-vfwz/D1ggRJdny/XaL62Mx2t8VIyaA1YtLuRZ7wjnrg=", "owner": "weyl-ai", "repo": "nimi", - "rev": "c1135f769558c7d3ae092cae9c1ee2812aa849ae", + "rev": "08a12bcddbca92b770f239da195188edd4d2f45a", "type": "github" }, "original": { "owner": "weyl-ai", - "ref": "baileylu/minimize-flake", "repo": "nimi", "type": "github" } @@ -150,11 +149,11 @@ ] }, "locked": { - "lastModified": 1767843980, - "narHash": "sha256-skl1BnMj8BkpFTFG0Ix+ejOU3dRAvHHBUhd/l7PhYVU=", + "lastModified": 1772646415, + "narHash": "sha256-2WS56K14LCx/SGuTgN6Fnh8ZFqJ6ZBoiPLBVT6IYdjA=", "owner": "nixified-ai", "repo": "flake", - "rev": "4a0e94067f863de3a2711d48651e499cb51f337e", + "rev": "4470be42194763ebd99748d2006d6c373c79ec49", "type": "github" }, "original": { @@ -165,11 +164,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1768240724, - "narHash": "sha256-ZGG8XGelh3eEPLkm9+77Tl4PtzC+avBNsK3dtapLMEA=", + "lastModified": 1772721746, + "narHash": "sha256-GBuNOTwrTEDkkCxZIt31/4vOOrk6EN9WJRX5Iw6rSgo=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "f83bf5187bfbcd5d751672433b66ddc5a3336480", + "rev": "954e9a9127b88c528b232c48142345c8d845951a", "type": "github" }, "original": { @@ -180,11 +179,11 @@ }, "nixpkgs-lib": { "locked": { - "lastModified": 1765674936, - "narHash": "sha256-k00uTP4JNfmejrCLJOwdObYC9jHRrr/5M/a/8L2EIdo=", + "lastModified": 1772328832, + "narHash": "sha256-e+/T/pmEkLP6BHhYjx6GmwP5ivonQQn0bJdH9YrRB+Q=", "owner": "nix-community", "repo": "nixpkgs.lib", - "rev": "2075416fcb47225d9b68ac469a5c4801a9c4dd85", + "rev": "c185c7a5e5dd8f9add5b2f8ebeff00888b070742", "type": "github" }, "original": { @@ -231,11 +230,11 @@ ] }, "locked": { - "lastModified": 1768158989, - "narHash": "sha256-67vyT1+xClLldnumAzCTBvU0jLZ1YBcf4vANRWP3+Ak=", + "lastModified": 1772660329, + "narHash": "sha256-IjU1FxYqm+VDe5qIOxoW+pISBlGvVApRjiw/Y/ttJzY=", "owner": "numtide", "repo": "treefmt-nix", - "rev": "e96d59dff5c0d7fddb9d113ba108f03c3ef99eca", + "rev": "3710e0e1218041bbad640352a0440114b1e10428", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index c58d09d..6b31f2f 100644 --- a/flake.nix +++ b/flake.nix @@ -25,7 +25,7 @@ }; nimi = { - url = "github:weyl-ai/nimi/baileylu/minimize-flake"; + url = "github:weyl-ai/nimi"; inputs.nixpkgs.follows = "nixpkgs"; }; diff --git a/modules/mk-nix2gpu-container.nix b/modules/mk-nix2gpu-container.nix index 1f42568..a7fe349 100644 --- a/modules/mk-nix2gpu-container.nix +++ b/modules/mk-nix2gpu-container.nix @@ -23,20 +23,24 @@ in nix2gpuCfg = (config.evalNix2GpuModule name module).config; - image = nimi.mkContainerImage { + nimiCfg = nimi.evalNimiModule { inherit (nix2gpuCfg) services meta; imports = [ # TODO[baileylu] Find a way to do this transformation less manually + (lib.mkAliasOptionModule [ "bubblewrap" ] [ "settings" "bubblewrap" ]) (lib.mkAliasOptionModule [ "container" ] [ "settings" "container" ]) - (lib.mkAliasOptionModule [ "startup" ] [ "settings" "startup" ]) (lib.mkAliasOptionModule [ "logging" ] [ "settings" "logging" ]) (lib.mkAliasOptionModule [ "restart" ] [ "settings" "restart" ]) + (lib.mkAliasOptionModule [ "startup" ] [ "settings" "startup" ]) nix2gpuCfg.nimiSettings ]; }; + + image = nimi.mkContainerImageWithConfig nimiCfg; + bubblewrap = nimi.mkBwrapWithConfig nimiCfg; in image.overrideAttrs (old: { - passthru = (old.passthru or { }) // nix2gpuCfg.passthru; + passthru = (old.passthru or { }) // nix2gpuCfg.passthru // { runInBubblewrap = bubblewrap; }; }); }; } diff --git a/modules/templates.nix b/modules/templates.nix new file mode 100644 index 0000000..4340ff4 --- /dev/null +++ b/modules/templates.nix @@ -0,0 +1,16 @@ +{ self, ... }: +{ + flake.templates.default = { + path = "${self}/templates/default"; + welcomeText = '' + welcome to `nix2gpu`, providing `nixos` containers for cost-effective and capable gpu compute. + + run your first container with + + - `nix run .#basic.copyToDockerDaemon` + + - `nix run .#basic.shell` + ''; + description = "default nix2gpu template"; + }; +} diff --git a/nix2gpu/base-system/default.nix b/nix2gpu/base-system/default.nix index dece5ca..1dae823 100644 --- a/nix2gpu/base-system/default.nix +++ b/nix2gpu/base-system/default.nix @@ -63,7 +63,5 @@ let } (builtins.readFile ./create-base-system.sh); in { - config.nimiSettings.container.copyToRoot = pkgs.runCommandLocal "base-system" { } ( - lib.getExe script - ); + config.copyToRoot = pkgs.runCommandLocal "base-system" { } (lib.getExe script); } diff --git a/nix2gpu/bubblewrap.nix b/nix2gpu/bubblewrap.nix new file mode 100644 index 0000000..ce1762b --- /dev/null +++ b/nix2gpu/bubblewrap.nix @@ -0,0 +1,60 @@ +{ lib, ... }: +{ + options.bubblewrapTmpfsDirs = lib.mkOption { + type = lib.types.listOf lib.types.str; + description = '' + Directories that should be tmpfs (not bind mounted from copyToRootEnv) + ''; + default = [ + "tmp" + "run" + "var" + "etc" + "root" + "home" + "proc" + "dev" + "nix" + "sys" + ]; + internal = true; + }; + + config.nimiSettings.bubblewrap = { + extraTmpfs = [ + "/tmp" + "/run" + "/var" + "/root" + "/home" + "/etc/ssh" + "/etc/ld.so.conf.d" + ]; + + environment = { + NIX_REMOTE = "daemon"; + NIX2GPU_BUBBLEWRAP_MODE = "1"; + }; + + # Use host's /proc for GPU driver access + # NVIDIA driver requires /proc/driver/nvidia which only exists in host's procfs + bind.proc = false; + prependFlags = [ + "--ro-bind" + "/proc" + "/proc" + ]; + + tryDevBinds = lib.mkAfter [ + { + src = "/dev/net/tun"; + dest = "/dev/net/tun"; + } + { + src = "/nix/var/nix/daemon-socket"; + dest = "/nix/var/nix/daemon-socket"; + } + ]; + }; + +} diff --git a/nix2gpu/container/nix-store-profile.nix b/nix2gpu/container/nix-store-profile.nix index 3c42746..1c3fe47 100644 --- a/nix2gpu/container/nix-store-profile.nix +++ b/nix2gpu/container/nix-store-profile.nix @@ -1,6 +1,6 @@ { pkgs, ... }: { - config.nimiSettings.container.copyToRoot = pkgs.runCommand "nix-store-profile" { } '' + config.copyToRoot = pkgs.runCommand "nix-store-profile" { } '' mkdir -p $out/root mkdir -p $out/root/.nix-defexpr touch $out/root/.nix-channels diff --git a/nix2gpu/copy-to-root.nix b/nix2gpu/copy-to-root.nix index 8191758..cc12464 100644 --- a/nix2gpu/copy-to-root.nix +++ b/nix2gpu/copy-to-root.nix @@ -1,4 +1,9 @@ -{ config, lib, ... }: +{ + config, + pkgs, + lib, + ... +}: let inherit (lib) types @@ -6,6 +11,20 @@ let literalExpression literalMD ; + + copyToRootEnv = pkgs.buildEnv { + name = "nix2gpu-copy-to-root"; + paths = config.copyToRoot; + }; + + copyToRootBinds = lib.pipe (builtins.readDir copyToRootEnv) [ + builtins.attrNames + (lib.filter (e: !builtins.elem e config.bubblewrapTmpfsDirs)) + (map (entry: { + src = "${copyToRootEnv}/${entry}"; + dest = "/${entry}"; + })) + ]; in { _class = "nix2gpu"; @@ -35,12 +54,19 @@ in git ]; ''; - type = types.listOf types.package; + type = types.coercedTo types.pathInStore (p: [ p ]) (types.listOf types.pathInStore); default = [ ]; defaultText = literalMD '' The generated base system from the other config options ''; }; - config.nimiSettings.container.copyToRoot = config.copyToRoot; + config = { + inherit copyToRootEnv; + + nimiSettings = { + container.copyToRoot = config.copyToRoot; + bubblewrap.tryRoBinds = lib.mkAfter copyToRootBinds; + }; + }; } diff --git a/nix2gpu/cuda.nix b/nix2gpu/cuda.nix index 8c918cb..b4fd0d7 100644 --- a/nix2gpu/cuda.nix +++ b/nix2gpu/cuda.nix @@ -74,5 +74,82 @@ in "com.nvidia.volumes.needed" = "nvidia_driver"; "com.nvidia.cuda.version" = cfg.packages.cudatoolkit.version; }; + + nimiSettings.bubblewrap.tryRoBinds = lib.mkAfter [ + # NVIDIA libraries from host + { + src = "/lib/x86_64-linux-gnu"; + dest = "/lib/x86_64-linux-gnu"; + } + { + src = "/usr/lib/x86_64-linux-gnu"; + dest = "/usr/lib/x86_64-linux-gnu"; + } + { + src = "/usr/bin/nvidia-smi"; + dest = "/usr/bin/nvidia-smi"; + } + ]; + + nimiSettings.bubblewrap.tryDevBinds = lib.mkAfter [ + # NVIDIA GPU devices + { + src = "/dev/nvidiactl"; + dest = "/dev/nvidiactl"; + } + { + src = "/dev/nvidia-modeset"; + dest = "/dev/nvidia-modeset"; + } + { + src = "/dev/nvidia-uvm"; + dest = "/dev/nvidia-uvm"; + } + { + src = "/dev/nvidia-uvm-tools"; + dest = "/dev/nvidia-uvm-tools"; + } + { + src = "/dev/nvidia0"; + dest = "/dev/nvidia0"; + } + { + src = "/dev/nvidia1"; + dest = "/dev/nvidia1"; + } + { + src = "/dev/nvidia2"; + dest = "/dev/nvidia2"; + } + { + src = "/dev/nvidia3"; + dest = "/dev/nvidia3"; + } + { + src = "/dev/nvidia4"; + dest = "/dev/nvidia4"; + } + { + src = "/dev/nvidia5"; + dest = "/dev/nvidia5"; + } + { + src = "/dev/nvidia6"; + dest = "/dev/nvidia6"; + } + { + src = "/dev/nvidia7"; + dest = "/dev/nvidia7"; + } + { + src = "/dev/nvidia-caps"; + dest = "/dev/nvidia-caps"; + } + # Direct Rendering Infrastructure (DRI) devices + { + src = "/dev/dri"; + dest = "/dev/dri"; + } + ]; }; } diff --git a/nix2gpu/env.nix b/nix2gpu/env.nix index eb88fa5..3903f6b 100644 --- a/nix2gpu/env.nix +++ b/nix2gpu/env.nix @@ -66,21 +66,24 @@ in ''; }; - config.nimiSettings.container.imageConfig.Env = - let - translateToGoEnvString = - var: value: + config.nimiSettings = { + container.imageConfig.Env = + let + translateToGoEnvString = + var: value: - assert lib.assertMsg (lib.toUpper var == var) '' - `nix2gpu` env var names should be uppercase - in order to be properly recognized. + assert lib.assertMsg (lib.toUpper var == var) '' + `nix2gpu` env var names should be uppercase + in order to be properly recognized. - The failing attribute name is `${var}`. - ''; + The failing attribute name is `${var}`. + ''; - "${var}=${value}"; + "${var}=${value}"; - totalEnv = config.env // config.extraEnv; - in - lib.mapAttrsToList translateToGoEnvString totalEnv; + totalEnv = config.env // config.extraEnv; + in + lib.mapAttrsToList translateToGoEnvString totalEnv; + bubblewrap.environment = config.env // config.extraEnv; + }; } diff --git a/nix2gpu/environment/core.nix b/nix2gpu/environment/core.nix index 017e54f..b2df0f4 100644 --- a/nix2gpu/environment/core.nix +++ b/nix2gpu/environment/core.nix @@ -22,7 +22,6 @@ procps shadow sudo - tini unzip util-linux neovim diff --git a/nix2gpu/profile.nix b/nix2gpu/profile.nix index 45131f7..084c5bb 100644 --- a/nix2gpu/profile.nix +++ b/nix2gpu/profile.nix @@ -28,5 +28,5 @@ in ]; }; - config.nimiSettings.container.copyToRoot = config.profile; + config.copyToRoot = config.profile; } diff --git a/nix2gpu/startup-script.nix b/nix2gpu/startup-script.nix index 1ac0033..0686434 100644 --- a/nix2gpu/startup-script.nix +++ b/nix2gpu/startup-script.nix @@ -15,12 +15,23 @@ let }; in { - options.startupScript = mkOption { - description = '' - nix2gpu container ${name} startup script. - ''; - type = types.package; - internal = true; + options = { + startupScript = mkOption { + description = '' + nix2gpu container ${name} startup script. + ''; + type = types.package; + internal = true; + }; + + copyToRootEnv = mkOption { + description = '' + Path to the merged copyToRoot environment used for populating + /etc and /root in bubblewrap mode. + ''; + type = types.path; + default = ""; + }; }; config = { @@ -44,8 +55,9 @@ in external = [ "passwd" ]; }; prologue = - (pkgs.writeText "setup-passwd" '' + (pkgs.writeText "setup-env" '' export PATH="${pkgs.shadow}/bin:$PATH" + export NIX2GPU_COPY_TO_ROOT="${config.copyToRootEnv}" '').outPath; } '' diff --git a/nix2gpu/startup-script/startup.sh b/nix2gpu/startup-script/startup.sh index 3f98003..75a5b08 100644 --- a/nix2gpu/startup-script/startup.sh +++ b/nix2gpu/startup-script/startup.sh @@ -4,18 +4,26 @@ set -euo pipefail gum log --level debug "Container initialization starting..." +export HOME="/root" + +# section 1 // runtime directories + gum log --level debug "Writing runtime directories" -# // critical // runtime directories -mkdir -p /tmp /var/tmp /run /run/sshd /var/log /var/empty -chmod 1777 /tmp /var/tmp -chmod 755 /run/sshd +mkdir -p /tmp /var/tmp /run /run/sshd /var/log /var/empty /var/empty/sshd +chmod a+rwx,+t /tmp /var/tmp +chmod u=rwx,g=rx,o=rx /run/sshd +chmod u=rwx,g=rx,o=rx /var/empty +chmod u=rwx,g=,o= /var/empty/sshd + +# section 2 // environment variables gum log --level debug "Setting up environment" export TMPDIR=/tmp export NIX_BUILD_TOP=/tmp +# section 3 // network device setup + gum log --level debug "Enabling userspace networking" -# // devices // userspace networking mkdir -p /dev/net if [ -c /dev/net/tun ]; then @@ -26,8 +34,9 @@ else gum log --level warn "/dev/net/tun not present; TUN-based networking will be unavailable. Try running with --cap-add=MKNOD." fi +# section 4 // nvidia gpu support + gum log --level debug "Generating LD cache..." -# // ldconfig // regenerate cache with NVIDIA libs if [ -d /lib/x86_64-linux-gnu ] && [ "$(ls -A /lib/x86_64-linux-gnu/*.so* 2>/dev/null)" ]; then gum log --level debug "Found NVIDIA libraries, updating ld cache..." @@ -40,48 +49,22 @@ if [ -d /lib/x86_64-linux-gnu ] && [ "$(ls -A /lib/x86_64-linux-gnu/*.so* 2>/dev fi done - # Add Nix CUDA paths too for cuda_path in /nix/store/*-cuda*/lib; do [ -d "$cuda_path" ] && echo "$cuda_path" >>/etc/ld.so.conf.d/nix-cuda.conf done - # Regenerate cache ldconfig 2>/dev/null || true - - # Update LD_LIBRARY_PATH for immediate use export LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:/usr/lib64:/usr/lib:${LD_LIBRARY_PATH:-}" fi -# // dynamic // shadow file -if [ ! -f /etc/shadow ]; then - cp /nix/store/*/etc/shadow /etc/shadow - chmod 0640 /etc/shadow -fi - -# // root // password -if [ -n "${ROOT_PASSWORD:-}" ]; then - gum log --level debug "Setting root password..." - echo "root:$ROOT_PASSWORD" | chpasswd -else - gum log --level debug "Enabling passwordless root..." - passwd -d root -fi - -export HOME="/root" - -# // nvidia-smi // validation +# // nvidia-smi // validation and patching +gum log --level debug "Testing nvidia-smi..." if [ -e /usr/bin/nvidia-smi ]; then - gum log --level debug "Testing nvidia-smi..." - - # First check if it needs patching if ! /usr/bin/nvidia-smi --version &>/dev/null; then gum log --level debug "Patching nvidia-smi..." - # Find the correct interpreter INTERP=$(find /nix/store -name "ld-linux-x86-64.so.2" -type f | head -1) ([ -n "$INTERP" ] && patchelf --set-interpreter "$INTERP" /usr/bin/nvidia-smi 2>/dev/null) || true - - # Set rpath to include the ACTUAL library locations patchelf --set-rpath "/lib/x86_64-linux-gnu:/usr/lib64:/usr/lib" /usr/bin/nvidia-smi 2>/dev/null || true fi @@ -89,7 +72,6 @@ if [ -e /usr/bin/nvidia-smi ]; then gum log --level debug "GPU ready: $(/usr/bin/nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)" else gum log --level warn "nvidia-smi not functional" - # Debug info gum log --level debug "Library dependencies:" ldd /usr/bin/nvidia-smi 2>&1 | head -10 || true gum log --level debug "Available NVIDIA libraries:" @@ -97,8 +79,39 @@ if [ -e /usr/bin/nvidia-smi ]; then fi fi +# section 5 // authentication setup + +if [ -n "${NIX2GPU_COPY_TO_ROOT:-}" ] && [ -d "$NIX2GPU_COPY_TO_ROOT/etc" ]; then + gum log --level debug "Setting up /etc from nix store..." + + cp -r --reflink=auto --no-preserve=mode,ownership "$NIX2GPU_COPY_TO_ROOT/etc/"* /etc/ 2>/dev/null || true +fi + +# // root // password +gum log --level debug "Configuring root authentication..." +if [ -n "${ROOT_PASSWORD:-}" ]; then + echo "root:$ROOT_PASSWORD" | chpasswd +else + passwd -d root +fi + +# section 6 // ssh setup + +gum log --level debug "Configuring SSH..." + +# Generate host keys if missing +for type in rsa ed25519; do + key="/etc/ssh/ssh_host_${type}_key" + [ ! -f "$key" ] && ssh-keygen -t "$type" -f "$key" -N "" >/dev/null 2>&1 +done + +# In bubblewrap mode, /etc/ssh may be a tmpfs overlay. If sshd_config doesn't exist yet, +# Configure sshd to use unprivileged port when in bubblewrap mode +if [ -f /etc/ssh/sshd_config ] && [ "${NIX2GPU_BUBBLEWRAP_MODE:-}" = "1" ]; then + sed -i 's/^Port 22$/Port 2222/' /etc/ssh/sshd_config +fi + gum log --level debug "Adding SSH keys..." -# // ssh // keys mkdir -p "$HOME/.ssh" chmod 700 "$HOME/.ssh" if [ -n "${SSH_PUBLIC_KEYS:-}" ]; then @@ -106,10 +119,7 @@ if [ -n "${SSH_PUBLIC_KEYS:-}" ]; then chmod 600 "$HOME/.ssh/authorized_keys" fi -for type in rsa ed25519; do - key="/etc/ssh/ssh_host_${type}_key" - [ ! -f "$key" ] && ssh-keygen -t "$type" -f "$key" -N "" >/dev/null 2>&1 -done +# section 7 // xdg directories gum log --level debug "Setting XDG dirs" export XDG_DATA_HOME="$HOME/.local/share" @@ -121,5 +131,6 @@ export XDG_CACHE_HOME="$HOME/.cache" export XDG_RUNTIME_DIR="/run/user/$UID" export XDG_BIN_HOME="$HOME/.local/bin" -# // config // extra startup script +# section 8 // finalization + gum log --level debug "Running extra startup script..." diff --git a/nix2gpu/user.nix b/nix2gpu/user.nix index 8017a6c..f428f25 100644 --- a/nix2gpu/user.nix +++ b/nix2gpu/user.nix @@ -31,5 +31,19 @@ in defaultText = literalMD "root"; }; - config.nimiSettings.container.imageConfig.User = config.user; + config.nimiSettings = { + container.imageConfig.User = config.user; + bubblewrap.uid = + assert lib.assertMsg (config.nix2gpuUsers ? ${config.user}) '' + `${config.user}` is not a valid user name as per your + `nix2gpu` config. + + The current set of valid users (config.nix2gpuUsers) is: + ```nix + ${lib.generators.toPretty { } config.nix2gpuUsers} + ``` + ''; + + config.nix2gpuUsers.${config.user}.uid; + }; } diff --git a/nix2gpu/working-dir.nix b/nix2gpu/working-dir.nix index 3a498b2..6b681f5 100644 --- a/nix2gpu/working-dir.nix +++ b/nix2gpu/working-dir.nix @@ -30,5 +30,8 @@ in defaultText = literalMD "`/root`"; }; - config.nimiSettings.container.imageConfig.WorkingDir = config.workingDir; + config.nimiSettings = { + container.imageConfig.WorkingDir = config.workingDir; + bubblewrap.chdir = config.workingDir; + }; } diff --git a/templates/basic/README.md b/templates/default/README.md similarity index 100% rename from templates/basic/README.md rename to templates/default/README.md diff --git a/templates/basic/default.nix b/templates/default/default.nix similarity index 100% rename from templates/basic/default.nix rename to templates/default/default.nix diff --git a/templates/basic/flake.lock b/templates/default/flake.lock similarity index 100% rename from templates/basic/flake.lock rename to templates/default/flake.lock diff --git a/templates/basic/flake.nix b/templates/default/flake.nix similarity index 100% rename from templates/basic/flake.nix rename to templates/default/flake.nix