diff --git a/swap-orchestrator/README.md b/swap-orchestrator/README.md index eebd9841da..7d48d3862f 100644 --- a/swap-orchestrator/README.md +++ b/swap-orchestrator/README.md @@ -42,6 +42,8 @@ Run the command below to start the wizard. It’ll guide you through a bunch of To also ship the `asb` tracing logs and the `bitcoind`/`monerod`/`electrs` container logs to a Loki endpoint, set `PROMTAIL_LOKI_PUSH_URL`, `PROMTAIL_LOKI_PUSH_TOKEN`, and `PROMTAIL_INSTANCE` before running the orchestrator — this adds `promtail` and `docker-socket-proxy` services to the generated `docker-compose.yml`. All streams are labelled with `host=`; the daemon logs additionally carry `job=node` and `container=`. +To additionally ship per-container resource metrics (cpu, memory, pids, network, fs) to a Prometheus `remote_write` endpoint, set `METRICS_REMOTE_WRITE_URL` — this adds `cadvisor` and `prometheus-agent` services to the generated `docker-compose.yml`. Metrics reuse the Promtail bearer token and `host=` label (so metrics and logs share one Grafana selector), so the `PROMTAIL_*` variables must be set as well. The central collector (`scripts/logging`) gates the metrics endpoint with the same token as the Loki push, behind the same Cloudflare tunnel. + If the `asb`/`asb-controller`/`rendezvous-node` images are built from a **private** GitHub repository, set `GH_TOKEN` to a token with read access before running the orchestrator. The token is inlined into the build-context URL in the generated `docker-compose.yml` so `docker compose build` can clone the repository. ```bash diff --git a/swap-orchestrator/src/compose.rs b/swap-orchestrator/src/compose.rs index ab917061ea..8734ef1669 100644 --- a/swap-orchestrator/src/compose.rs +++ b/swap-orchestrator/src/compose.rs @@ -11,6 +11,7 @@ pub const ASB_DATA_DIR: &str = "/asb-data"; pub const ASB_CONFIG_FILE: &str = "config.toml"; pub const DOCKER_COMPOSE_FILE: &str = "./docker-compose.yml"; pub const PROMTAIL_CONFIG_FILE: &str = "./promtail.yml"; +pub const PROMETHEUS_CONFIG_FILE: &str = "./prometheus.yml"; pub struct OrchestratorInput { pub ports: OrchestratorPorts, @@ -20,6 +21,7 @@ pub struct OrchestratorInput { pub want_tor: bool, pub cloudflared: Option, pub promtail: Option, + pub metrics: Option, } /// Cloudflare Tunnel configuration. @@ -65,6 +67,13 @@ pub struct PromtailConfig { pub instance: String, } +#[derive(Clone)] +pub struct MetricsConfig { + pub remote_write_url: String, + pub token: String, + pub instance: String, +} + pub struct OrchestratorDirectories { pub asb_data_dir: PathBuf, } @@ -87,6 +96,8 @@ pub struct OrchestratorImages { pub cloudflared: T, pub promtail: T, pub docker_socket_proxy: T, + pub cadvisor: T, + pub prometheus_agent: T, } pub struct OrchestratorPorts { @@ -366,6 +377,45 @@ fn build(input: OrchestratorInput) -> String { (String::new(), "") }; + let (metrics_segment, metrics_volume) = if input.metrics.is_some() { + let metrics_segment = format!( + "\ + cadvisor: + container_name: cadvisor + {image_cadvisor} + restart: unless-stopped + privileged: true + cgroup: host + devices: + - /dev/kmsg:/dev/kmsg + volumes: + - '/:/rootfs:ro' + - '/var/run:/var/run:ro' + - '/sys:/sys:ro' + - '/var/lib/docker/:/var/lib/docker:ro' + - '/dev/disk/:/dev/disk:ro' + expose: + - 8080 + prometheus-agent: + container_name: prometheus-agent + {image_prometheus_agent} + restart: unless-stopped + depends_on: + - cadvisor + volumes: + - '{prometheus_config_file}:/etc/prometheus/prometheus.yml:ro' + - 'prometheus-agent-data:/prometheus' + command: [\"--config.file=/etc/prometheus/prometheus.yml\", \"--agent\", \"--storage.agent.path=/prometheus\"]\ +", + image_cadvisor = input.images.cadvisor.to_image_attribute(), + image_prometheus_agent = input.images.prometheus_agent.to_image_attribute(), + prometheus_config_file = PROMETHEUS_CONFIG_FILE, + ); + (metrics_segment, "prometheus-agent-data:") + } else { + (String::new(), "") + }; + let (tor_segment, tor_volume) = if input.want_tor { // This image comes with an empty /etc/tor/, so this is the entire config let command_tor = command![ @@ -457,12 +507,15 @@ services: {tor_segment} {cloudflared_segment} {promtail_segment} + {metrics_segment} asb: container_name: asb {image_asb} restart: unless-stopped cap_add: - SYS_PTRACE + sysctls: + - net.ipv4.tcp_tw_reuse=1 depends_on: - electrs volumes: @@ -510,6 +563,7 @@ volumes: rendezvous-data: {tor_volume} {promtail_volume} + {metrics_volume} ", port_monerod_rpc = input.ports.monerod_rpc, port_bitcoind_rpc = input.ports.bitcoind_rpc, @@ -627,6 +681,33 @@ scrape_configs: ) } +pub fn build_prometheus_agent_yml(cfg: &MetricsConfig) -> String { + fn yaml_single_quote(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) + } + + format!( + "\ +global: + scrape_interval: 30s + external_labels: + host: {instance} + +scrape_configs: + - job_name: cadvisor + static_configs: + - targets: ['cadvisor:8080'] + +remote_write: + - url: {url} + bearer_token: {token} +", + instance = yaml_single_quote(&cfg.instance), + url = yaml_single_quote(&cfg.remote_write_url), + token = yaml_single_quote(&cfg.token), + ) +} + pub struct Flags(Vec); /// Displays a list of flags into the "Exec form" supported by Docker diff --git a/swap-orchestrator/src/images.rs b/swap-orchestrator/src/images.rs index 3553f787ae..81719af295 100644 --- a/swap-orchestrator/src/images.rs +++ b/swap-orchestrator/src/images.rs @@ -41,6 +41,12 @@ pub static PROMTAIL_IMAGE: &str = "grafana/promtail@sha256:8b2aa61745bc4a9343cc4 /// docker-socket-proxy 0.3.0 (https://hub.docker.com/r/tecnativa/docker-socket-proxy) pub static DOCKER_SOCKET_PROXY_IMAGE: &str = "tecnativa/docker-socket-proxy@sha256:9e4b9e7517a6b660f2cc903a19b257b1852d5b3344794e3ea334ff00ae677ac2"; +/// cadvisor v0.52.1 (https://github.com/google/cadvisor/pkgs/container/cadvisor) +pub static CADVISOR_IMAGE: &str = "gcr.io/cadvisor/cadvisor@sha256:f40e65878e25c2e78ea037f73a449527a0fb994e303dc3e34cb6b187b4b91435"; + +/// prometheus v3.1.0 (https://hub.docker.com/r/prom/prometheus) +pub static PROMETHEUS_IMAGE: &str = "prom/prometheus@sha256:6559acbd5d770b15bb3c954629ce190ac3cbbdb2b7f1c30f0385c4e05104e218"; + /// Build-context URL for the source-built images. A `gh_token` is inlined into /// the URL userinfo so Docker can fetch a private repository — note this writes /// the token into `docker-compose.yml` in plaintext. diff --git a/swap-orchestrator/src/main.rs b/swap-orchestrator/src/main.rs index c8c9ae31e6..d8ead92696 100644 --- a/swap-orchestrator/src/main.rs +++ b/swap-orchestrator/src/main.rs @@ -6,9 +6,10 @@ mod prompt; use swap_orchestrator as _; use crate::compose::{ - ASB_DATA_DIR, CloudflaredConfig, DOCKER_COMPOSE_FILE, IntoSpec, OrchestratorDirectories, - OrchestratorImage, OrchestratorImages, OrchestratorInput, OrchestratorNetworks, - PROMTAIL_CONFIG_FILE, PromtailConfig, build_promtail_yml, + ASB_DATA_DIR, CloudflaredConfig, DOCKER_COMPOSE_FILE, IntoSpec, MetricsConfig, + OrchestratorDirectories, OrchestratorImage, OrchestratorImages, OrchestratorInput, + OrchestratorNetworks, PROMETHEUS_CONFIG_FILE, PROMTAIL_CONFIG_FILE, PromtailConfig, + build_prometheus_agent_yml, build_promtail_yml, }; use libp2p::Multiaddr; use libp2p::multiaddr::Protocol; @@ -141,6 +142,26 @@ fn read_promtail_config_from_env() -> Option { }) } +fn read_metrics_config_from_env(promtail: Option<&PromtailConfig>) -> Option { + let remote_write_url = std::env::var("METRICS_REMOTE_WRITE_URL").ok()?; + + if remote_write_url.trim().is_empty() { + panic!("METRICS_REMOTE_WRITE_URL must not be empty."); + } + + let promtail = promtail.unwrap_or_else(|| { + panic!( + "METRICS_REMOTE_WRITE_URL is set but Promtail is not configured. Metrics reuse the Promtail bearer token and instance label, so set the PROMTAIL_* variables as well." + ) + }); + + Some(MetricsConfig { + remote_write_url, + token: promtail.loki_push_token.clone(), + instance: promtail.instance.clone(), + }) +} + /// `GH_TOKEN` for fetching a private build-context repository; `None` if unset /// or empty. See [`images::source_build_context`]. fn read_gh_token_from_env() -> Option { @@ -159,6 +180,7 @@ fn main() { // Promtail log shipping is opt-in via env vars; same rationale as the // Cloudflare integration above. let promtail_config = read_promtail_config_from_env(); + let metrics_config = read_metrics_config_from_env(promtail_config.as_ref()); // Opt-in: inlined into the build-context URL so Docker can fetch a private repo. let gh_token = read_gh_token_from_env(); let source_build_context = images::source_build_context(gh_token.as_deref()); @@ -210,6 +232,8 @@ fn main() { docker_socket_proxy: OrchestratorImage::Registry( images::DOCKER_SOCKET_PROXY_IMAGE.to_string(), ), + cadvisor: OrchestratorImage::Registry(images::CADVISOR_IMAGE.to_string()), + prometheus_agent: OrchestratorImage::Registry(images::PROMETHEUS_IMAGE.to_string()), }, directories: OrchestratorDirectories { asb_data_dir: PathBuf::from(ASB_DATA_DIR), @@ -217,6 +241,7 @@ fn main() { want_tor, cloudflared: cloudflared_config.clone(), promtail: promtail_config.clone(), + metrics: metrics_config.clone(), }; // If the config file already exists and be de-serialized, @@ -393,6 +418,11 @@ fn main() { .expect("Failed to write promtail.yml"); } + if let Some(metrics) = metrics_config.as_ref() { + std::fs::write(PROMETHEUS_CONFIG_FILE, build_prometheus_agent_yml(metrics)) + .expect("Failed to write prometheus.yml"); + } + // Write the compose to ./docker-compose.yml let compose = recipe.to_spec(); std::fs::write(DOCKER_COMPOSE_FILE, compose).expect("Failed to write docker-compose.yml"); @@ -407,6 +437,10 @@ fn main() { if let Some(promtail) = promtail_config.as_ref() { print_promtail_instructions(promtail); } + + if let Some(metrics) = metrics_config.as_ref() { + print_metrics_instructions(metrics); + } } /// Reads the ASB config from disk, inserts the WebSocket listen address and @@ -512,6 +546,17 @@ fn print_promtail_instructions(promtail: &PromtailConfig) { ); } +fn print_metrics_instructions(metrics: &MetricsConfig) { + println!(); + println!("Prometheus metrics shipping is enabled."); + println!(" - Instance label (host): {}", metrics.instance); + println!(" - Remote write URL: {}", metrics.remote_write_url); + println!(" - Config written to: {}", PROMETHEUS_CONFIG_FILE); + println!(" - Ships: per-container cpu/memory/pids/network/fs via cadvisor"); + println!(" - Verify after `docker compose up -d`:"); + println!(" docker compose logs --tail 50 prometheus-agent"); +} + fn unix_epoch_secs() -> u64 { std::time::UNIX_EPOCH .elapsed() diff --git a/swap-orchestrator/tests/spec.rs b/swap-orchestrator/tests/spec.rs index b20a66bda5..42e84c1174 100644 --- a/swap-orchestrator/tests/spec.rs +++ b/swap-orchestrator/tests/spec.rs @@ -1,9 +1,9 @@ #![allow(unused_crate_dependencies)] use swap_orchestrator::compose::{ - CloudflaredConfig, IntoSpec, OrchestratorDirectories, OrchestratorImage, OrchestratorImages, - OrchestratorInput, OrchestratorNetworks, OrchestratorPorts, PromtailConfig, - build_promtail_yml, + CloudflaredConfig, IntoSpec, MetricsConfig, OrchestratorDirectories, OrchestratorImage, + OrchestratorImages, OrchestratorInput, OrchestratorNetworks, OrchestratorPorts, PromtailConfig, + build_prometheus_agent_yml, build_promtail_yml, }; use swap_orchestrator::images; @@ -11,6 +11,7 @@ fn make_input( want_tor: bool, cloudflared: Option, promtail: Option, + metrics: Option, ) -> OrchestratorInput { let source_build_context = images::source_build_context(None); OrchestratorInput { @@ -48,6 +49,8 @@ fn make_input( docker_socket_proxy: OrchestratorImage::Registry( images::DOCKER_SOCKET_PROXY_IMAGE.to_string(), ), + cadvisor: OrchestratorImage::Registry(images::CADVISOR_IMAGE.to_string()), + prometheus_agent: OrchestratorImage::Registry(images::PROMETHEUS_IMAGE.to_string()), }, directories: OrchestratorDirectories { asb_data_dir: std::path::PathBuf::from(swap_orchestrator::compose::ASB_DATA_DIR), @@ -55,6 +58,7 @@ fn make_input( want_tor, cloudflared, promtail, + metrics, } } @@ -75,17 +79,25 @@ fn sample_promtail_config() -> PromtailConfig { } } +fn sample_metrics_config() -> MetricsConfig { + MetricsConfig { + remote_write_url: "https://loki-asb-logs.example.com/api/v1/write".to_string(), + token: "test-token".to_string(), + instance: "asb-test-1".to_string(), + } +} + #[test] fn test_orchestrator_spec_generation() { // `to_spec` runs `validate_compose` internally, so generating each // variant is enough to catch indentation regressions in the optional // tor / cloudflared / promtail segments. - let _ = make_input(false, None, None).to_spec(); - let _ = make_input(true, None, None).to_spec(); - let _ = make_input(false, Some(sample_cloudflared_config()), None).to_spec(); - let _ = make_input(true, Some(sample_cloudflared_config()), None).to_spec(); - let compose = make_input(false, None, Some(sample_promtail_config())).to_spec(); - let _ = make_input(true, None, Some(sample_promtail_config())).to_spec(); + let _ = make_input(false, None, None, None).to_spec(); + let _ = make_input(true, None, None, None).to_spec(); + let _ = make_input(false, Some(sample_cloudflared_config()), None, None).to_spec(); + let _ = make_input(true, Some(sample_cloudflared_config()), None, None).to_spec(); + let compose = make_input(false, None, Some(sample_promtail_config()), None).to_spec(); + let _ = make_input(true, None, Some(sample_promtail_config()), None).to_spec(); // promtail's docker SD needs the networks API, not just containers, or // discovery 403s on /networks and no node logs ship. @@ -94,8 +106,26 @@ fn test_orchestrator_spec_generation() { true, Some(sample_cloudflared_config()), Some(sample_promtail_config()), + None, + ) + .to_spec(); + + // With metrics enabled, both cadvisor and the prometheus agent must appear. + let metrics_compose = make_input( + true, + Some(sample_cloudflared_config()), + Some(sample_promtail_config()), + Some(sample_metrics_config()), ) .to_spec(); + assert!(metrics_compose.contains("container_name: cadvisor")); + assert!(metrics_compose.contains("container_name: prometheus-agent")); + assert!(metrics_compose.contains("prometheus-agent-data:")); + + // Without metrics, neither service is generated. + let plain = make_input(false, None, None, None).to_spec(); + assert!(!plain.contains("cadvisor")); + assert!(!plain.contains("prometheus-agent")); } #[test] @@ -105,7 +135,7 @@ fn test_gh_token_inlined_into_build_context() { // A spec built from the authenticated context must still validate, and the // token must reach the build attribute of every source-built service. - let mut input = make_input(false, None, None); + let mut input = make_input(false, None, None, None); input.images.asb = OrchestratorImage::Build(images::asb_image_from_source(&context)); input.images.asb_controller = OrchestratorImage::Build(images::asb_controller_image_from_source(&context)); @@ -192,3 +222,30 @@ fn test_promtail_yml_ships_node_container_logs() { .expect("a host relabel must be present"); assert_eq!(host_relabel["replacement"].as_str(), Some("asb-test-1")); } + +#[test] +fn test_prometheus_agent_yml_is_valid_and_wired() { + let yml = build_prometheus_agent_yml(&sample_metrics_config()); + let parsed: serde_yaml::Value = + serde_yaml::from_str(&yml).expect("prometheus.yml must be valid YAML"); + + // The host external label must match the Promtail instance so metrics and + // logs share one selector in Grafana. + assert_eq!( + parsed["global"]["external_labels"]["host"].as_str(), + Some("asb-test-1") + ); + + // The agent scrapes the local cadvisor and pushes to the remote endpoint + // with the shared bearer token. + assert_eq!( + parsed["scrape_configs"][0]["static_configs"][0]["targets"][0].as_str(), + Some("cadvisor:8080") + ); + let remote = &parsed["remote_write"][0]; + assert_eq!( + remote["url"].as_str(), + Some("https://loki-asb-logs.example.com/api/v1/write") + ); + assert_eq!(remote["bearer_token"].as_str(), Some("test-token")); +}