-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinstance.nix
More file actions
354 lines (317 loc) · 12.1 KB
/
Copy pathinstance.nix
File metadata and controls
354 lines (317 loc) · 12.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# Seed instance module — tenant-facing options for Kata VM workloads
#
# This module lives in a separate NixOS evaluation from module.nix (the node module).
# Node-level: seed.enable, seed.hypervisor, seed.k3s.*
# Instance-level (this file): seed.size, seed.expose, seed.storage, seed.connect
{ config, lib, pkgs, ... }:
let
cfg = config.seed;
sizeTiers = {
xs = { vcpus = 1; memory = 512; };
s = { vcpus = 1; memory = 1024; };
m = { vcpus = 2; memory = 2048; };
l = { vcpus = 4; memory = 4096; };
xl = { vcpus = 8; memory = 8192; };
};
tier = sizeTiers.${cfg.size};
# Well-known services: port and seed protocol defaults derived from
# /etc/services conventions. Embedded as a literal table so flake
# evaluation stays pure.
knownServices = {
http = { port = 80; protocol = "tcp"; };
https = { port = 443; protocol = "http"; }; # ACME-enabled
ssh = { port = 22; protocol = "tcp"; };
dns = { port = 53; protocol = "dns"; }; # TCP+UDP
domain = { port = 53; protocol = "dns"; };
smtp = { port = 25; protocol = "tcp"; };
smtps = { port = 465; protocol = "tcp"; };
imaps = { port = 993; protocol = "tcp"; };
postgresql = { port = 5432; protocol = "tcp"; };
mysql = { port = 3306; protocol = "tcp"; };
redis = { port = 6379; protocol = "tcp"; };
grpc = { port = 443; protocol = "grpc"; }; # ACME-enabled
};
# Submodule for seed.expose entries — defaults from well-known service table
exposeSubmodule = lib.types.submodule ({ name, ... }: let
svc = knownServices.${name} or null;
in {
options = {
enable = lib.mkEnableOption "Expose this port" // { default = true; };
port = lib.mkOption {
type = lib.types.port;
default = if svc != null then svc.port
else throw "seed.expose.${name}: port required — '${name}' is not a well-known service. Set port explicitly.";
description = "Port number. Defaults to well-known port for the entry name.";
};
protocol = lib.mkOption {
type = lib.types.enum [ "tcp" "udp" "dns" "http" "grpc" ];
default = if svc != null then svc.protocol else "tcp";
description = ''
Protocol hint for the controller. Defaults to well-known protocol.
"dns" exposes on both TCP and UDP. "http"/"grpc" enable ACME.
'';
};
};
});
# Submodule for seed.storage entries
storageSubmodule = lib.types.submodule ({ name, ... }: {
options = {
size = lib.mkOption {
type = lib.types.str;
description = "Storage size (e.g. \"1Gi\", \"500Mi\").";
};
mountPoint = lib.mkOption {
type = lib.types.str;
default = "/seed/storage/${name}";
description = "Mount point inside the instance.";
};
user = lib.mkOption {
type = lib.types.str;
default = "root";
description = "Owner user for the mount point directory.";
};
group = lib.mkOption {
type = lib.types.str;
default = "root";
description = "Owner group for the mount point directory.";
};
mode = lib.mkOption {
type = lib.types.str;
default = "0755";
description = "Permissions for the mount point directory.";
};
};
});
# Submodule for seed.connect entries
connectSubmodule = lib.types.submodule {
options = {
service = lib.mkOption {
type = lib.types.str;
description = "Service name to connect to.";
};
port = lib.mkOption {
type = lib.types.nullOr lib.types.port;
default = null;
description = "Port override (defaults to service's default port).";
};
};
};
in {
options.seed = {
namespace = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
internal = true;
description = ''
The k8s namespace this instance runs in. Platform-assigned, derived
from the flake's .seed-identity (IPNS CID). Set by mkInstance only —
instances cannot override this (prevents namespace spoofing).
'';
};
size = lib.mkOption {
type = lib.types.enum [ "xs" "s" "m" "l" "xl" ];
default = "xs";
description = ''
Instance size tier. Maps to vCPU/memory:
xs: 1 vCPU, 512MB — s: 1 vCPU, 1GB — m: 2 vCPU, 2GB — l: 4 vCPU, 4GB — xl: 8 vCPU, 8GB
'';
};
expose = lib.mkOption {
type = lib.types.attrsOf (lib.types.coercedTo
lib.types.port
(port: { inherit port; })
exposeSubmodule
);
default = {};
example = { http = 8080; grpc = { port = 9090; protocol = "grpc"; }; };
description = "Ports to expose via ingress.";
};
storage = lib.mkOption {
type = lib.types.attrsOf (lib.types.coercedTo
lib.types.str
(size: { inherit size; })
storageSubmodule
);
default = {};
example = { data = "1Gi"; cache = { size = "500Mi"; mountPoint = "/tmp/cache"; }; };
description = "Persistent volumes for the instance.";
};
connect = lib.mkOption {
type = lib.types.attrsOf (lib.types.coercedTo
lib.types.str
(service: { inherit service; })
connectSubmodule
);
default = {};
example = { redis = "my-redis"; db = { service = "postgres"; port = 5432; }; };
description = "Service connections available inside the instance.";
};
rollout = lib.mkOption {
type = lib.types.enum [ "recreate" "rolling" ];
default = "recreate";
description = ''
Deployment rollout strategy.
"recreate" stops the old pod before starting the new one (safe for stateful).
"rolling" starts the new pod before stopping the old (zero-downtime for stateless).
'';
};
acme = lib.mkOption {
type = lib.types.bool;
default = builtins.any (e: e.enable && (e.protocol == "http" || e.protocol == "grpc"))
(builtins.attrValues cfg.expose);
defaultText = lib.literalExpression "true when any expose entry has protocol \"http\" or \"grpc\"";
description = ''
Enable platform ACME for TLS certificates. When true, the controller
injects SEED_ACME_URL pointing to its embedded ACME endpoint. Instances
use their web server's built-in ACME client (e.g. Caddy's ca directive)
to obtain Let's Encrypt-signed certificates automatically.
Defaults to true when any expose entry uses "http" or "grpc" protocol.
Set explicitly for services not on the whitelist, or false to opt out.
'';
};
dns = {
names = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [];
example = [ "id.loom.farm" "loom.farm" ];
description = ''
Custom DNS names for this instance. Each name gets an AAAA record
pointing at the instance's ingress IPv6 address (from MetalLB).
Zone apex names (e.g. "loom.farm") automatically generate a wildcard
record (*.loom.farm) too.
'';
};
};
shoot = {
enable = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
Enable fork-style ephemeral VM execution via the pool manager.
When enabled, the instance gets a seed-shoot command and SEED_SHOOT_URL
env var pointing to the node-local pool manager.
'';
};
};
meta = lib.mkOption {
type = lib.types.attrs;
readOnly = true;
internal = true;
description = "Controller-consumable metadata computed from seed options.";
};
};
config = let
enabledExpose = lib.filterAttrs (_: e: e.enable) cfg.expose;
in {
# Denormalized metadata for the controller
seed.meta = {
namespace = cfg.namespace;
size = cfg.size;
resources = tier;
expose = lib.mapAttrs (_: e: {
inherit (e) port protocol;
}) enabledExpose;
storage = lib.mapAttrs (name: s: {
inherit (s) size mountPoint;
}) cfg.storage;
connect = lib.mapAttrs (_: c: {
inherit (c) service;
port = c.port;
}) cfg.connect;
rollout = cfg.rollout;
acme = cfg.acme;
shoot = lib.optionalAttrs cfg.shoot.enable { enable = true; };
dns = lib.optionalAttrs (cfg.dns.names != []) { names = cfg.dns.names; };
};
# seed-shoot wrapper script
environment.systemPackages = lib.optionals cfg.shoot.enable [
(pkgs.writeShellScriptBin "seed-shoot" ''
# seed-shoot — fork an ephemeral VM via the pool manager
# Usage: seed-shoot [--timeout MS] command [args...]
# Source SEED_* env vars (systemd strips them in Kata VMs)
[ -f /run/seed/env ] && . /run/seed/env
timeout=""
while [ $# -gt 0 ]; do
case "$1" in
--timeout) timeout="$2"; shift 2 ;;
*) break ;;
esac
done
if [ $# -eq 0 ]; then
echo "Usage: seed-shoot [--timeout MS] command [args...]" >&2
exit 1
fi
url="''${SEED_SHOOT_URL:-}"
if [ -z "$url" ]; then
echo "error: SEED_SHOOT_URL not set" >&2
exit 1
fi
# Build JSON command array
cmd_json=$(printf '%s\n' "$@" | ${pkgs.jq}/bin/jq -R . | ${pkgs.jq}/bin/jq -s .)
# Build request
request=$(${pkgs.jq}/bin/jq -nc \
--argjson command "$cmd_json" \
--argjson timeout "''${timeout:-120000}" \
'{command: $command, timeout: $timeout}')
# POST to pool manager
response=$(${pkgs.curl}/bin/curl -s -X POST \
-H "Content-Type: application/json" \
-d "$request" \
"$url/shoot" 2>/dev/null)
# Parse response
exit_code=$(echo "$response" | ${pkgs.jq}/bin/jq -r '.exitCode // 1')
stdout=$(echo "$response" | ${pkgs.jq}/bin/jq -r '.stdout // ""')
stderr=$(echo "$response" | ${pkgs.jq}/bin/jq -r '.stderr // ""')
error=$(echo "$response" | ${pkgs.jq}/bin/jq -r '.error // ""')
if [ -n "$error" ]; then
echo "shoot error: $error" >&2
exit 1
fi
[ -n "$stdout" ] && printf '%s\n' "$stdout"
[ -n "$stderr" ] && printf '%s\n' "$stderr" >&2
exit "$exit_code"
'')
];
# Create mount point directories for storage volumes
systemd.tmpfiles.rules = lib.mapAttrsToList
(name: s: "d ${s.mountPoint} ${s.mode} ${s.user} ${s.group} -")
cfg.storage;
# Open firewall for exposed ports
networking.firewall.allowedTCPPorts =
lib.pipe enabledExpose [
(lib.filterAttrs (_: e: e.protocol != "udp"))
(lib.mapAttrsToList (_: e: e.port))
];
networking.firewall.allowedUDPPorts =
lib.pipe enabledExpose [
(lib.filterAttrs (_: e: e.protocol == "udp" || e.protocol == "dns"))
(lib.mapAttrsToList (_: e: e.port))
];
# When ACME is enabled, configure security.acme and Caddy to use the
# platform's embedded ACME endpoint. NixOS requires acceptTerms + email
# even though our internal server doesn't use them (it proxies to LE).
security.acme = lib.mkIf cfg.acme {
acceptTerms = true;
defaults.server = "https://seed-controller.seed-system.svc.cluster.local:9876/acme/directory";
defaults.email = lib.mkDefault "acme@seed.loom.farm";
};
# Caddy has its own ACME client (doesn't use security.acme). Point it
# at the platform endpoint so TLS works without manual Caddyfile config.
services.caddy.acmeCA = lib.mkIf cfg.acme
"https://seed-controller.seed-system.svc.cluster.local:9876/acme/directory";
# Service discovery: environment variables
environment.sessionVariables = lib.mapAttrs'
(name: c: lib.nameValuePair
"SEED_${lib.toUpper (builtins.replaceStrings ["-"] ["_"] name)}_HOST"
c.service
)
cfg.connect;
# Service discovery: files at /seed/connect/<name>
environment.etc = lib.mapAttrs'
(name: c: lib.nameValuePair
"seed/connect/${name}"
{ text = c.service + (lib.optionalString (c.port != null) ":${toString c.port}") + "\n"; }
)
cfg.connect;
};
}