diff --git a/.config/nextest.toml b/.config/nextest.toml index a549d4068f2..15e247f7bec 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,5 +1,5 @@ [test-groups] -run-in-isolation = { max-threads = 32 } +run-in-isolation = { max-threads = 32 } # these are tests that must not run with other tests concurrently. All tests in # this group can take up at most 32 threads among them, but each one requiring # 16 threads also. The effect should be that tests run isolated. @@ -15,4 +15,12 @@ test-group = 'run-in-isolation' threads-required = 32 [profile.default] +default-filter = 'not binary(patchbay)' slow-timeout = { period = "10s", terminate-after = 3 } + +[profile.patchbay] +fail-fast = false +retries = 1 +test-threads = 4 +default-filter = 'binary(patchbay)' +slow-timeout = { period = "30s", terminate-after = 4 } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 98b56505259..210a7cd7f9d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,8 @@ jobs: if: "github.event_name != 'pull_request' || ! contains(github.event.pull_request.labels.*.name, 'flaky-test')" timeout-minutes: 30 runs-on: [self-hosted, linux, X64] + env: + RUSTFLAGS: "-Dwarnings --cfg skip_patchbay" strategy: fail-fast: false matrix: @@ -135,6 +137,8 @@ jobs: matrix: target: - i686-unknown-linux-gnu + env: + RUSTFLAGS: "-Dwarnings --cfg skip_patchbay" steps: - name: Checkout uses: actions/checkout@v6 diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml new file mode 100644 index 00000000000..74b93d85b91 --- /dev/null +++ b/.github/workflows/patchbay.yml @@ -0,0 +1,56 @@ +name: Patchbay Tests + +on: + pull_request: + push: + branches: + - main + +concurrency: + group: patchbay-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUST_BACKTRACE: 1 + SCCACHE_CACHE_SIZE: "10G" + IROH_FORCE_STAGING_RELAYS: "1" + NEXTEST_VERSION: "0.9.80" + +jobs: + patchbay_tests: + name: Patchbay Tests + timeout-minutes: 15 + runs-on: [self-hosted, linux, X64] + env: + RUSTC_WRAPPER: "sccache" + steps: + - name: Enable unprivileged user namespaces + run: sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 + continue-on-error: true + + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + - uses: mozilla-actions/sccache-action@v0.0.9 + - name: Install cargo-make and cargo-nextest + uses: taiki-e/install-action@v2 + with: + tool: nextest@${{ env.NEXTEST_VERSION }},cargo-make + + - name: Build patchbay tests + run: cargo make patchbay --no-run + + - name: Run patchbay tests + id: tests + run: cargo make patchbay + env: + RUST_LOG: ${{ runner.debug && 'trace' || 'debug' }} + PATCHBAY_LOG: trace + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: patchbay-testdir-${{ github.sha }} + path: target/testdir-current/ + retention-days: 7 + if-no-files-found: ignore diff --git a/.gitignore b/.gitignore index 1594301eadb..e7b9572c2f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /logs iroh.config.toml +/.patchbay diff --git a/Cargo.lock b/Cargo.lock index 61a5eb1766e..b33c3e130b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aead" version = "0.5.2" @@ -131,7 +146,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -142,7 +157,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -398,6 +413,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + [[package]] name = "base32" version = "0.5.1" @@ -496,6 +526,39 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "camino" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" +dependencies = [ + "serde_core", +] + +[[package]] +name = "cargo-platform" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87a0c0e6148f11f01f32650a2ea02d532b2ad4e81d8bd41e6e565b5adc5e6082" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "cargo_metadata" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror 2.0.18", +] + [[package]] name = "cast" version = "0.3.0" @@ -885,6 +948,22 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "ctor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" +dependencies = [ + "ctor-proc-macro", + "dtor", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" + [[package]] name = "ctr" version = "0.9.2" @@ -1197,6 +1276,21 @@ dependencies = [ "litrs", ] +[[package]] +name = "dtor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "dunce" version = "1.0.5" @@ -1305,7 +1399,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -1402,6 +1496,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fslock" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "futures" version = "0.3.32" @@ -1556,7 +1660,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -1600,6 +1704,12 @@ dependencies = [ "polyval", ] +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" + [[package]] name = "gloo-timers" version = "0.3.0" @@ -2017,7 +2127,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -2266,6 +2376,7 @@ dependencies = [ "clap", "console", "console_error_panic_hook", + "ctor", "ctutils", "data-encoding", "derive_more", @@ -2289,6 +2400,7 @@ dependencies = [ "noq-udp", "papaya", "parse-size", + "patchbay", "pin-project", "pkarr", "pkcs8", @@ -2309,6 +2421,7 @@ dependencies = [ "strum 0.28.0", "swarm-discovery", "sync_wrapper", + "testdir", "time", "tokio", "tokio-stream", @@ -2803,6 +2916,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "1.1.1" @@ -2810,7 +2932,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -2837,6 +2959,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af4782b4baf92d686d161c15460c83d16ebcfd215918763903e9619842665cae" dependencies = [ + "anyhow", "n0-error-macros", "spez", ] @@ -2937,7 +3060,7 @@ dependencies = [ "libc", "mac-addr", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.29.0", "netlink-sys", "objc2-core-foundation", "objc2-system-configuration", @@ -2955,6 +3078,18 @@ dependencies = [ "paste", ] +[[package]] +name = "netlink-packet-route" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ce3636fa715e988114552619582b530481fd5ef176a1e5c1bf024077c2c9445" +dependencies = [ + "bitflags", + "libc", + "log", + "netlink-packet-core", +] + [[package]] name = "netlink-packet-route" version = "0.29.0" @@ -3011,7 +3146,7 @@ dependencies = [ "n0-watcher", "netdev", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.29.0", "netlink-proto", "netlink-sys", "noq-udp", @@ -3046,7 +3181,7 @@ dependencies = [ "n0-watcher", "netdev", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.29.0", "netlink-proto", "netlink-sys", "noq-udp", @@ -3065,6 +3200,18 @@ dependencies = [ "wmi", ] +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -3099,7 +3246,7 @@ dependencies = [ "pin-project-lite", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tokio-stream", @@ -3143,11 +3290,20 @@ source = "git+https://github.com/n0-computer/noq?branch=main#b212bbcaccaa82089cc dependencies = [ "cfg_aliases", "libc", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.61.2", ] +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + [[package]] name = "ntimestamp" version = "1.0.0" @@ -3169,7 +3325,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3266,6 +3422,16 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + [[package]] name = "objc2-security" version = "0.3.2" @@ -3291,6 +3457,15 @@ dependencies = [ "objc2-security", ] +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + [[package]] name = "oid-registry" version = "0.8.1" @@ -3385,6 +3560,32 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "patchbay" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d93ad32b57e2d0185284b2e73817b3668feb3013ee70e963d1cb01dda53a8a" +dependencies = [ + "anyhow", + "chrono", + "derive_more", + "futures", + "ipnet", + "iroh-metrics", + "libc", + "nix", + "rtnetlink", + "serde", + "serde_json", + "strum 0.28.0", + "tokio", + "tokio-util", + "toml", + "tracing", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "pem" version = "3.0.6" @@ -3722,7 +3923,7 @@ dependencies = [ "libc", "once_cell", "raw-cpuid", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "web-sys", "winapi", ] @@ -3755,7 +3956,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -3792,9 +3993,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -4099,6 +4300,30 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rtnetlink" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b960d5d873a75b5be9761b1e73b146f52dddcd27bac75263f40fba686d4d7b5" +dependencies = [ + "futures-channel", + "futures-util", + "log", + "netlink-packet-core", + "netlink-packet-route 0.28.0", + "netlink-proto", + "netlink-sys", + "nix", + "thiserror 1.0.69", + "tokio", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -4133,7 +4358,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4226,7 +4451,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4331,7 +4556,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4345,6 +4570,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "send_wrapper" @@ -4607,7 +4836,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4796,6 +5025,20 @@ dependencies = [ "syn", ] +[[package]] +name = "sysinfo" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows", +] + [[package]] name = "system-configuration" version = "0.7.0" @@ -4833,7 +5076,22 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "testdir" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d53c48916d4a8bb476f45e3699d9d904477dcd3569117d446f1b870d1e5a576" +dependencies = [ + "anyhow", + "backtrace", + "cargo-platform", + "cargo_metadata", + "fslock", + "sysinfo", + "whoami", ] [[package]] @@ -5451,6 +5709,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -5469,6 +5736,15 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] + [[package]] name = "wasm-bindgen" version = "0.2.115" @@ -5659,6 +5935,19 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" +dependencies = [ + "libc", + "libredox", + "objc2-system-configuration", + "wasite", + "web-sys", +] + [[package]] name = "widestring" version = "1.2.1" @@ -5687,7 +5976,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -5833,15 +6122,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -5890,30 +6170,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows-threading" version = "0.2.1" @@ -5941,12 +6204,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -5965,12 +6222,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -5989,24 +6240,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -6025,12 +6264,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -6049,12 +6282,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -6073,12 +6300,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -6097,12 +6318,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "winnow" version = "0.7.15" diff --git a/Cargo.toml b/Cargo.toml index c56367f8acb..f1aaad7b521 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ missing_debug_implementations = "warn" # do. To enable for a crate set `#![cfg_attr(iroh_docsrs, # feature(doc_cfg))]` in the crate. # We also have our own `iroh_loom` cfg to enable tokio-rs/loom testing. -unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)"] } +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)", "cfg(skip_patchbay)"] } [workspace.lints.clippy] unused-async = "warn" diff --git a/Makefile.toml b/Makefile.toml index 5dde2e70dd8..0e62deb53c8 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -26,3 +26,8 @@ args = [ "--config", "imports_granularity=Crate,group_imports=StdExternalCrate,reorder_imports=true,format_code_in_doc_comments=true", ] + +[tasks.patchbay] +workspace = false +command = "cargo" +args = ["nextest", "run", "-p", "iroh", "--features", "qlog", "--test", "patchbay", "--profile", "patchbay", "${@}"] diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 0bb758e9e83..19b6fce5f14 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -101,6 +101,7 @@ getrandom = { version = "0.4", features = ["wasm_js"] } # target-common test/dev dependencies [dev-dependencies] console_error_panic_hook = "0.1" +n0-error = { version = "0.1", features = ["anyhow"] } postcard = { version = "1.1.1", features = ["use-std"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } rand_chacha = "0.10" @@ -138,6 +139,12 @@ console = { version = "0.16" } wasm-tracing = "2.1.0" wasm-bindgen-test = "0.3.62" +# patchbay netsim test dependencies (linux only) +[target.'cfg(target_os = "linux")'.dev-dependencies] +ctor = "0.6" +patchbay = { version = "0.3", features = ["iroh-metrics"] } +testdir = "0.10" + [build-dependencies] cfg_aliases = { version = "0.2.1" } diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs new file mode 100644 index 00000000000..d3e9b39d476 --- /dev/null +++ b/iroh/tests/patchbay.rs @@ -0,0 +1,611 @@ +//! Patchbay network simulation tests. +//! +//! These tests use the [`patchbay`] crate to create virtual network topologies +//! in Linux user namespaces, testing iroh's NAT traversal, holepunching, +//! and connectivity under various network conditions. +//! +//! These tests require Linux with user namespace support. On non-Linux systems, you can use +//! the `patchbay` CLI to get a Linux container or VM with the required capabilities. +//! See patchbay docs for details. +//! +//! To run: +//! +//! ```sh +//! # On Linux (with user namespace support): +//! cargo nextest run -p iroh --test patchbay --profile patchbay +//! # or use the `cargo make` alias: +//! cargo make patchbay +//! # can also pass additional args: +//! cargo make patchbay holepunch_simple --no-capture +//! +//! # On macOS (runs in container via patchbay CLI): +//! patchbay test --release -p iroh --test patchbay +//! ``` + +// patchbay only runs on linux, and is skipped in cross-compile environments +// via a cfg directive +#![cfg(all(target_os = "linux", not(skip_patchbay)))] + +use std::time::Duration; + +use iroh::{TransportAddr, endpoint::Side}; +use n0_error::{Result, StackResultExt, StdResultExt}; +use n0_tracing_test::traced_test; +use patchbay::{LinkCondition, LinkDirection, LinkLimits, Nat, RouterPreset, TestGuard}; +use testdir::testdir; +use tracing::info; + +use self::util::{Pair, PathWatcherExt, lab_with_relay, ping_accept, ping_open}; + +#[path = "patchbay/util.rs"] +mod util; + +/// Init the user namespace before any threads are spawned. +/// +/// This gives us all permissions we need for the patchbay tests. +#[ctor::ctor] +fn userns_ctor() { + patchbay::init_userns().expect("failed to init userns"); +} + +// --- +// Holepunch tests +// --- + +/// Two devices behind destination-independent NATs holepunch a direct connection. +/// +/// Both devices connect through a relay first, then upgrade to a direct path. +/// The client asserts that the connection starts as relayed, then waits for +/// a direct (IP) path to be selected. +#[tokio::test] +#[traced_test] +async fn holepunch_simple() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let server = lab.add_device("server").uplink(nat1.id()).build().await?; + let client = lab.add_device("client").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(relay_map) + .server(server, async |_dev, _ep, conn| { + conn.closed().await; + Ok(()) + }) + .client(client, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + paths.wait_ip(timeout).await?; + info!("connection became direct"); + Ok(()) + }) + .run() + .await?; + guard.ok(); + Ok(()) +} + +/// Switches the client's IPv4 uplink to a different NAT mid-connection. +/// +/// The client starts behind `nat2`, holepunches a direct path, then replugs +/// its interface to `nat3`. The server waits until a direct path with a new +/// remote address is selected. We verify with a ping that the new path works. +/// +/// Currently ignored because iroh does not yet recover reliably from an +/// uplink switch. +#[tokio::test] +#[traced_test] +#[ignore = "known to still fail"] +async fn switch_uplink_v4() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let nat3 = lab.add_router("nat3").nat(Nat::Home).build().await?; + let server = lab.add_device("server").uplink(nat1.id()).build().await?; + let client = lab.add_device("client").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(relay_map) + .server(server, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait until a first direct path is established. + let first = paths.wait_ip(timeout).await?; + info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); + + // Now wait until the direct path changes, which happens after the other endpoint + // changes its uplink. We check is_ip() explicitly to avoid triggering on a + // transient relay fallback during the network switch. + let second = paths + .wait_selected(timeout, |p| { + p.is_ip() && p.remote_addr() != first.remote_addr() + }) + .await + .context("did not switch paths")?; + info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); + + ping_accept(&conn, timeout).await?; + info!("ping done"); + conn.closed().await; + Ok(()) + }) + .client(client, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait for conn to become direct. + paths.wait_ip(timeout).await.context("become direct")?; + + // Wait a little more and then switch wifis. + tokio::time::sleep(Duration::from_secs(1)).await; + info!("switch IP uplink"); + dev.replug_iface("eth0", nat3.id()).await?; + + // We don't assert any path changes here, because the remote stays identical, + // and PathInfo does not contain info on local addrs. Instead, the remote + // only accepts our ping after the path changed. + info!("send ping"); + ping_open(&conn, timeout) + .await + .context("failed at ping_open")?; + info!("ping done"); + Ok(()) + }) + .run() + .await?; + guard.ok(); + Ok(()) +} + +/// Switches the client's uplink from an IPv4 NAT to an IPv6-only ISP network. +/// +/// Similar to [`switch_uplink_v4`], but the client replugs from a Home NAT +/// to an IPv6-only ISP router. The server waits for the selected path to +/// switch from an IPv4 to an IPv6 remote address. +/// +/// Currently ignored because this fails in roughly half of runs. +#[tokio::test] +#[traced_test] +#[ignore = "known to still be flaky"] +async fn switch_uplink_v6() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let public = lab + .add_router("public") + .preset(RouterPreset::Public) + .build() + .await?; + let home = lab + .add_router("nat2") + .preset(RouterPreset::Home) + .build() + .await?; + let mobile = lab + .add_router("nat3") + .preset(RouterPreset::IspV6) + .build() + .await?; + let server = lab.add_device("server").uplink(public.id()).build().await?; + let client = lab.add_device("client").uplink(home.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(relay_map) + .server(server, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait until a first direct path is established. + let first = paths + .wait_selected( + timeout, + |p| matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv4()), + ) + .await + .context("did not become direct")?; + info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); + + ping_accept(&conn, timeout).await.context("ping_accept 1")?; + + // Now wait until the direct path changes, which happens after the other endpoint + // changes its uplink. We check is_ip() explicitly to avoid triggering on a + // transient relay fallback during the network switch. + let second = paths + .wait_selected( + timeout, + |p| matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv6()), + ) + .await + .context("did not switch paths to v6")?; + info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); + + ping_accept(&conn, timeout).await.context("ping_accept 2")?; + info!("ping done"); + conn.closed().await; + Ok(()) + }) + .client(client, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait for conn to become direct. + paths.wait_ip(timeout).await.context("become direct")?; + + ping_open(&conn, timeout).await.context("ping_open 1")?; + + info!("switch IP uplink"); + dev.replug_iface("eth0", mobile.id()).await?; + + // We don't assert any path changes here, because the remote stays identical, + // and PathInfo does not contain info on local addrs. Instead, the remote + // only accepts our ping after the path changed. + ping_open(&conn, timeout).await.context("ping_open 2")?; + Ok(()) + }) + .run() + .await?; + guard.ok(); + Ok(()) +} + +/// Adds a faster LAN interface and verifies the path becomes selected. +/// +/// The server sits on `nat1`. The client starts on `nat2` with a 4G-impaired +/// link and has a second interface `eth1` connected to `nat1` (a LAN path), +/// but `eth1` starts down. After holepunching over the impaired link, the test +/// brings `eth1` up and waits for the selected path to change to the new, +/// faster LAN address. A ping verifies the new path works. +#[tokio::test] +#[traced_test] +async fn change_ifaces() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + + // Client has two uplinks (eth0=4G via nat2, eth1=LAN via nat1). eth1 starts down. + let server = lab + .add_device("server") + .iface("eth0", nat1.id()) + .build() + .await?; + let client = lab + .add_device("client") + .iface("eth0", nat2.id()) + .iface("eth1", nat1.id()) + .build() + .await?; + client + .set_link_condition("eth0", Some(LinkCondition::Mobile4G), LinkDirection::Both) + .await?; + client.link_down("eth1").await?; + + let timeout = Duration::from_secs(10); + Pair::new(relay_map) + .server(server, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await.context("ping_accept")?; + conn.closed().await; + Ok(()) + }) + .client(client, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + let first = paths + .wait_ip(timeout) + .await + .context("did not become direct")?; + info!(addr=?first.remote_addr(), "connection became direct"); + + tokio::time::sleep(Duration::from_secs(1)).await; + + // Bring up the LAN interface to the other ep. + info!("bring up eth1"); + dev.link_up("eth1").await?; + + // Wait for a new direct path to be established. We check is_ip() explicitly + // to avoid triggering on a transient relay fallback during the switch. + let next = paths + .wait_selected(timeout, |p| { + p.is_ip() && p.remote_addr() != first.remote_addr() + }) + .await + .context("did not switch paths")?; + info!(addr=?next.remote_addr(), "new direct path established"); + + ping_open(&conn, timeout).await.context("ping_open")?; + Ok(()) + }) + .run() + .await?; + guard.ok(); + Ok(()) +} + +/// Takes the client's link down for five seconds after holepunching, then brings it back. +/// +/// After recovery, the test verifies that we can ping (via relay fallback or +/// a re-established direct path), and then waits for a direct path to be +/// selected again. +#[tokio::test] +#[traced_test] +async fn link_outage_recovery() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let server = lab.add_device("server").uplink(nat1.id()).build().await?; + let client = lab.add_device("client").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(15); + Pair::new(relay_map) + .server(server, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await.context("ping_accept 1")?; + ping_accept(&conn, timeout).await.context("ping_accept 2")?; + conn.closed().await; + Ok(()) + }) + .client(client, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + paths.wait_ip(timeout).await.context("initial holepunch")?; + let downtime = Duration::from_secs(5); + info!("holepunched, now killing link for {downtime:?}"); + // Take the link down. + dev.link_down("eth0").await?; + tokio::time::sleep(downtime).await; + dev.link_up("eth0").await?; + info!("link restored, waiting for recovery"); + + // After link recovery, we should be able to ping, either via relay + // fallback or re-established direct path. + ping_open(&conn, Duration::from_secs(30)) + .await + .context("ping_open after link_up")?; + info!("connection recovered after link outage"); + + // Eventually the direct path should come back. + paths + .wait_ip(Duration::from_secs(30)) + .await + .context("did not re-establish direct path")?; + ping_open(&conn, timeout) + .await + .context("ping_open after direct")?; + Ok(()) + }) + .run() + .await?; + guard.ok(); + Ok(()) +} + +// --- +// Degradation ladder: find where holepunching breaks under worsening conditions +// --- + +/// Increasingly degraded link conditions applied to one side of the connection. +/// +/// Each level adds more latency, loss, and reordering. The levels are tested +/// individually for both server-side and client-side impairment. +const DEGRADE_LEVELS: &[LinkLimits] = &[ + // 0: mild - good wifi + LinkLimits { + latency_ms: 10, + jitter_ms: 5, + loss_pct: 0.5, + reorder_pct: 0.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 1: poor - bad wifi or 3G + LinkLimits { + latency_ms: 100, + jitter_ms: 30, + loss_pct: 3.0, + reorder_pct: 3.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 2: bad - congested 3G + LinkLimits { + latency_ms: 200, + jitter_ms: 60, + loss_pct: 5.0, + reorder_pct: 5.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 3: terrible - barely usable + LinkLimits { + latency_ms: 300, + jitter_ms: 80, + loss_pct: 8.0, + reorder_pct: 8.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 4: extreme - GEO satellite with heavy loss + LinkLimits { + latency_ms: 500, + jitter_ms: 100, + loss_pct: 12.0, + reorder_pct: 12.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 5: absurd - stress test + LinkLimits { + latency_ms: 800, + jitter_ms: 200, + loss_pct: 20.0, + reorder_pct: 20.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, +]; + +/// Runs a single degradation level. +/// +/// Creates two devices behind Home NATs, applies the given [`LinkLimits`] to +/// `impaired_side`, then attempts to holepunch and ping. Returns the +/// [`TestGuard`] on success so the caller can mark it as passed. +async fn run_degrade_level(impaired_side: Side, level: usize) -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let timeout = Duration::from_secs(30); + + let limits = DEGRADE_LEVELS[level]; + let link_condition = Some(LinkCondition::Manual(limits)); + + let server = lab + .add_device("server") + .iface("eth0", nat1.id()) + .build() + .await?; + let client = lab + .add_device("client") + .iface("eth0", nat2.id()) + .build() + .await?; + let impaired_device = match impaired_side { + Side::Client => &client, + Side::Server => &server, + }; + impaired_device + .set_link_condition("eth0", link_condition, LinkDirection::Both) + .await?; + + let result = tokio::time::timeout( + timeout * 2, + Pair::new(relay_map) + .server(server, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + conn.closed().await; + Ok(()) + }) + .client(client, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths.wait_ip(timeout).await?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run(), + ) + .await + .std_context("pair timed out") + .flatten(); + + match &result { + Ok(()) => tracing::event!( + target: "test::_events::ladder_pass", + tracing::Level::INFO, + level, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + impaired_side = ?impaired_side, + "PASSED", + ), + Err(err) => tracing::event!( + target: "test::_events::ladder_fail", + tracing::Level::WARN, + level, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + impaired_side = ?impaired_side, + error = format!("{err:#}"), + "FAILED", + ), + } + + result?; + Ok(guard) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_0_mild() -> Result { + run_degrade_level(Side::Server, 0).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_1_poor() -> Result { + run_degrade_level(Side::Server, 1).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_2_bad() -> Result { + run_degrade_level(Side::Server, 2).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_3_terrible() -> Result { + run_degrade_level(Side::Server, 3).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +#[ignore = "not yet passing reliably"] +async fn degrade_server_4_extreme() -> Result { + run_degrade_level(Side::Server, 4).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +#[ignore = "not yet passing reliably"] +async fn degrade_server_5_absurd() -> Result { + run_degrade_level(Side::Server, 5).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_0_mild() -> Result { + run_degrade_level(Side::Client, 0).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_1_poor() -> Result { + run_degrade_level(Side::Client, 1).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_2_bad() -> Result { + run_degrade_level(Side::Client, 2).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_3_terrible() -> Result { + run_degrade_level(Side::Client, 3).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +#[ignore = "not yet passing reliably"] +async fn degrade_client_4_extreme() -> Result { + run_degrade_level(Side::Client, 4).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +#[ignore = "not yet passing reliably"] +async fn degrade_client_5_absurd() -> Result { + run_degrade_level(Side::Client, 5).await?.ok(); + Ok(()) +} diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs new file mode 100644 index 00000000000..50edd015198 --- /dev/null +++ b/iroh/tests/patchbay/util.rs @@ -0,0 +1,433 @@ +use std::{future::Future, path::PathBuf, sync::Arc, time::Duration}; + +use iroh::{ + Endpoint, EndpointAddr, RelayMap, RelayMode, Watcher, + endpoint::{Connection, PathInfo, PathWatcher, presets}, + tls::CaRootsConfig, +}; +use iroh_metrics::MetricsGroupSet; +use n0_error::{Result, StackResultExt, StdResultExt, anyerr, ensure_any}; +use n0_future::{boxed::BoxFuture, task::AbortOnDropHandle}; +use patchbay::{Device, IpSupport, Lab, LabOpts, OutDir, TestGuard}; +use tokio::sync::{Barrier, oneshot}; +use tracing::{Instrument, debug, error, error_span, event, info}; + +use self::relay::run_relay_server; + +const TEST_ALPN: &[u8] = b"test"; + +/// Creates a lab with a relay server. +/// +/// Returns the lab, relay map, a drop guard that keeps the relay alive, +/// and a [`TestGuard`] that records pass/fail. +/// +/// The relay binds on `[::]` and is reachable via `https://relay.test` +/// (resolved through lab-wide DNS entries for both IPv4 and IPv6). +pub async fn lab_with_relay( + path: PathBuf, +) -> Result<(Lab, RelayMap, AbortOnDropHandle<()>, TestGuard)> { + let mut opts = LabOpts::default().outdir(OutDir::Exact(path)); + if let Some(name) = std::thread::current().name() { + opts = opts.label(name); + } + let lab = Lab::with_opts(opts).await?; + let guard = lab.test_guard(); + let (relay_map, relay_guard) = spawn_relay(&lab).await?; + Ok((lab, relay_map, relay_guard, guard)) +} + +/// Creates a router `dc` and device `relay` and spawns a relay server on the device. +/// +/// Also creates a lab-wide DNS entry `relay.test` that resolves to the relay server's +/// IPv4 and IPv6 addresses. +/// +/// Returns a [`RelayMap`] with an entry for the relay, and a drop handle that will +/// stop the relay server once dropped. +async fn spawn_relay(lab: &Lab) -> Result<(RelayMap, AbortOnDropHandle<()>)> { + let dc = lab + .add_router("dc") + .ip_support(IpSupport::DualStack) + .build() + .await?; + let dev_relay = lab.add_device("relay").uplink(dc.id()).build().await?; + + // Register both v4 and v6 addresses under "relay.test" lab-wide. + // Devices created after this will resolve "relay.test" to both addresses. + let relay_v4 = dev_relay.ip().expect("relay has IPv4"); + let relay_v6 = dev_relay.ip6().expect("relay has IPv6"); + lab.dns_entry("relay.test", relay_v4.into())?; + lab.dns_entry("relay.test", relay_v6.into())?; + + let (relay_map_tx, relay_map_rx) = oneshot::channel(); + let task_relay = dev_relay.spawn(async move |_ctx| { + let (relay_map, _server) = run_relay_server().await.unwrap(); + relay_map_tx.send(relay_map).unwrap(); + std::future::pending::<()>().await; + })?; + let relay_map = relay_map_rx.await.unwrap(); + Ok((relay_map, AbortOnDropHandle::new(task_relay))) +} + +/// Type alias for boxed run functions used in [`Pair`]. +type RunFn = Box BoxFuture>; + +/// Builder for two connected endpoints in a lab. +/// +/// Use this to quickly create two endpoints on two different devices and create a +/// connection between them that starts as relay-only. +pub struct Pair { + relay_map: RelayMap, + server: Option<(Device, RunFn)>, + client: Option<(Device, RunFn)>, +} + +impl Pair { + /// Creates a new pair builder with a shared [`RelayMap`]. + pub fn new(relay_map: RelayMap) -> Self { + Self { + relay_map, + server: None, + client: None, + } + } + + /// Sets the server device and run function. + pub fn server(mut self, device: Device, run_fn: F) -> Self + where + F: FnOnce(Device, Endpoint, Connection) -> Fut + Send + 'static, + Fut: Future + Send + 'static, + { + let run_fn: RunFn = + Box::new(move |device, endpoint, conn| Box::pin(run_fn(device, endpoint, conn))); + self.server = Some((device, run_fn)); + self + } + + /// Sets the client device and run function. + pub fn client(mut self, device: Device, run_fn: F) -> Self + where + F: FnOnce(Device, Endpoint, Connection) -> Fut + Send + 'static, + Fut: Future + Send + 'static, + { + let run_fn: RunFn = + Box::new(move |device, endpoint, conn| Box::pin(run_fn(device, endpoint, conn))); + self.client = Some((device, run_fn)); + self + } + + /// Runs the pair to completion. + /// + /// This will bind an endpoint on each device, wait for the server endpoint to be online, + /// then send a relay-only [`EndpointAddr`] to the client task. + /// The client task will connect to the server, and the server will accept a connection. + /// Once a connection is established on either side, its run function is invoked. + /// Once both run functions completed, the endpoints are dropped without awaiting + /// [`Endpoint::close`], so the corresponding ERROR logs are expected. + /// + /// After completion, this will: + /// - log the result of the run functions + /// - record the endpoint metrics as a `patchbay::_metrics` tracing event + /// - emit a `test::_events::pass` or `test::_events::fail` event for each device + /// + /// Returns an error if any step or run function failed. + pub async fn run(mut self) -> Result { + let (server_device, server_run) = self + .server + .take() + .context("Missing server initialization")?; + let (client_device, client_run) = self + .client + .take() + .context("Missing client initialization")?; + + let (addr_tx, addr_rx) = oneshot::channel(); + let relay_map2 = self.relay_map.clone(); + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + let server_task = server_device.spawn(|dev| { + let barrier = barrier2; + async move { + let endpoint = endpoint_builder(&dev, relay_map2).bind().await?; + info!(id=%endpoint.id().fmt_short(), bound_sockets=?endpoint.bound_sockets(), "server endpoint bound"); + endpoint.online().await; + info!("endpoint online"); + // Send address to client task. Make it a relay-only address, like in the default address lookup services. + addr_tx.send(addr_relay_only(endpoint.addr())).unwrap(); + let conn = endpoint.accept().await.unwrap().accept().anyerr()?.await?; + info!(remote=%conn.remote_id().fmt_short(), "accepted, executing run function"); + watch_selected_path(&conn); + let res = server_run(dev.clone(), endpoint.clone(), conn).await; + match &res { + Ok(()) => info!("run function completed successfully"), + Err(err)=> error!("run function failed: {err:#}"), + } + // Wait until the client run function completed before dropping the endpoint. + barrier.wait().await; + for group in endpoint.metrics().groups() { + dev.record_iroh_metrics(group); + } + res + } + .instrument(error_span!("ep-server")) + })?; + let client_task = client_device.spawn(move |dev| { + async move { + let endpoint = endpoint_builder(&dev, self.relay_map).bind().await?; + info!(id=%endpoint.id().fmt_short(), bound_sockets=?endpoint.bound_sockets(), "client endpoint bound"); + let addr = addr_rx.await.std_context("server did not send its address")?; + info!(?addr, "connecting to server"); + let conn = endpoint.connect(addr, TEST_ALPN).await?; + watch_selected_path(&conn); + info!(remote=%conn.remote_id().fmt_short(), "connected, executing run function"); + let res = client_run(dev.clone(), endpoint.clone(), conn).await; + match &res { + Ok(()) => info!("run function completed successfully"), + Err(err)=> error!("run function failed: {err:#}"), + } + // Wait until the server run function completed before dropping the endpoint. + barrier.wait().await; + // endpoint.close().await; + for group in endpoint.metrics().groups() { + dev.record_iroh_metrics(group); + } + res + } + .instrument(error_span!("ep-client")) + })?; + + let (server_res, client_res) = tokio::join!(server_task, client_task); + + // Map the results to include the device name, and emit a tracing event within the device context. + let [server_res, client_res] = [(&server_device, server_res), (&client_device, client_res)] + .map(|(dev, res)| { + let res = match res { + Err(err) => Err(anyerr!(err, "device {} panicked", dev.name())), + Ok(Err(err)) => Err(anyerr!(err, "device {} failed", dev.name())), + Ok(Ok(())) => Ok(()), + }; + let res_str = res.as_ref().map_err(|err| format!("{err:#}")).cloned(); + log_result_on_device(dev, res_str); + res + }); + server_res?; + client_res?; + Ok(()) + } +} + +fn log_result_on_device(dev: &Device, res: Result<(), E>) { + let _ = dev.run_sync(move || { + match res { + Ok(_) => event!( + target: "test::_events::pass", + tracing::Level::INFO, + msg = %"device passed" + ), + Err(error) => event!( + target: "test::_events::fail", + tracing::Level::ERROR, + %error, + msg = %"device failed" + ), + } + Ok(()) + }); +} + +/// Extension methods on [`PathWatcher`] for common waiting patterns in tests. +#[allow(unused)] +pub trait PathWatcherExt { + /// Waits until the selected path fulfills a condition. + /// + /// Calls `f` with the currently-selected path, and again after each path update, + /// until `f` returns true or `timeout` elapses. + /// + /// Returns an error if the timeout elapses before `f` returned true. + async fn wait_selected( + &mut self, + timeout: Duration, + f: impl Fn(&PathInfo) -> bool, + ) -> Result; + + /// Returns the currently selected path. + /// + /// Panics if no path is marked as selected. + fn selected(&mut self) -> PathInfo; + + /// Wait until the selected path is a direct (IP) path. + async fn wait_ip(&mut self, timeout: Duration) -> Result { + self.wait_selected(timeout, PathInfo::is_ip).await + } + + /// Wait until the selected path is a relay path. + async fn wait_relay(&mut self, timeout: Duration) -> Result { + self.wait_selected(timeout, PathInfo::is_relay).await + } +} + +impl PathWatcherExt for PathWatcher { + fn selected(&mut self) -> PathInfo { + let p = self.get(); + p.iter() + .find(|p| p.is_selected()) + .cloned() + .expect("no selected path") + } + + async fn wait_selected( + &mut self, + timeout: Duration, + f: impl Fn(&PathInfo) -> bool, + ) -> Result { + tokio::time::timeout(timeout, async { + loop { + let selected = self.selected(); + if f(&selected) { + return n0_error::Ok(selected); + } + self.updated().await?; + } + }) + .await + .anyerr()? + } +} + +/// Opens a bidi stream, sends 8 bytes of data, and waits to receive the same data back. +pub async fn ping_open(conn: &Connection, timeout: Duration) -> Result { + tokio::time::timeout(timeout, async { + let data: [u8; 8] = rand::random(); + let (mut send, mut recv) = conn.open_bi().await.anyerr()?; + send.write_all(&data).await.anyerr()?; + send.finish().anyerr()?; + let r = recv.read_to_end(8).await.anyerr()?; + ensure_any!(r == data, "reply matches"); + Ok(()) + }) + .await + .anyerr()? +} + +/// Accepts a bidi stream, reads 8 bytes of data, and sends the same data back. +pub async fn ping_accept(conn: &Connection, timeout: Duration) -> Result { + tokio::time::timeout(timeout, async { + let (mut send, mut recv) = conn.accept_bi().await.anyerr()?; + let data = recv.read_to_end(8).await.anyerr()?; + send.write_all(&data).await.anyerr()?; + send.finish().anyerr()?; + Ok(()) + }) + .await + .anyerr()? +} + +fn watch_selected_path(conn: &Connection) { + let mut watcher = conn.paths(); + tokio::spawn( + async move { + let mut prev = None; + loop { + let paths = watcher.get(); + let selected = paths.iter().find(|p| p.is_selected()).unwrap(); + if Some(selected) != prev.as_ref() { + debug!( + "selected path: [{}] {:?} rtt {:?}", + selected.id(), + selected.remote_addr(), + selected.rtt().unwrap() + ); + prev = Some(selected.clone()); + } + if watcher.updated().await.is_err() { + break; + } + } + } + .instrument(tracing::Span::current()), + ); +} + +fn endpoint_builder(device: &Device, relay_map: RelayMap) -> iroh::endpoint::Builder { + #[allow(unused_mut)] + let mut builder = Endpoint::builder(presets::Minimal) + .relay_mode(RelayMode::Custom(relay_map)) + .ca_roots_config(CaRootsConfig::insecure_skip_verify()) + .alpns(vec![TEST_ALPN.to_vec()]); + + #[cfg(not(feature = "qlog"))] + let _ = device; + + #[cfg(feature = "qlog")] + { + if let Some(path) = device.filepath("qlog") { + let prefix = path.file_name().unwrap().to_str().unwrap(); + let directory = path.parent().unwrap(); + let transport_config = iroh::endpoint::QuicTransportConfig::builder() + .qlog_from_path(directory, prefix) + .build(); + builder = builder.transport_config(transport_config); + } + } + + builder +} + +fn addr_relay_only(addr: EndpointAddr) -> EndpointAddr { + EndpointAddr::from_parts(addr.id, addr.addrs.into_iter().filter(|a| a.is_relay())) +} + +mod relay { + use std::net::{IpAddr, Ipv6Addr}; + + use iroh_base::RelayUrl; + use iroh_relay::{ + RelayConfig, RelayMap, RelayQuicConfig, + server::{ + AccessConfig, CertConfig, QuicConfig, RelayConfig as RelayServerConfig, Server, + ServerConfig, SpawnError, TlsConfig, + }, + }; + + /// Spawn a relay server bound on `[::]` that accepts both IPv4 and IPv6. + /// + /// The returned [`RelayMap`] uses `https://relay.test` as the relay URL. + /// Callers are responsible for ensuring that a DNS entry for `relay.test` + /// exists and points to the relay's IP addresses. + pub async fn run_relay_server() -> Result<(RelayMap, Server), SpawnError> { + let bind_ip: IpAddr = Ipv6Addr::UNSPECIFIED.into(); + + let (certs, server_config) = + iroh_relay::server::testing::self_signed_tls_certs_and_config(); + + let tls = TlsConfig { + cert: CertConfig::<(), ()>::Manual { certs }, + https_bind_addr: (bind_ip, 443).into(), + quic_bind_addr: (bind_ip, 7842).into(), + server_config, + }; + let quic = Some(QuicConfig { + server_config: tls.server_config.clone(), + bind_addr: tls.quic_bind_addr, + }); + let config = ServerConfig { + relay: Some(RelayServerConfig { + http_bind_addr: (bind_ip, 80).into(), + tls: Some(tls), + limits: Default::default(), + key_cache_capacity: Some(1024), + access: AccessConfig::Everyone, + }), + quic, + ..Default::default() + }; + let server = Server::spawn(config).await?; + + let url: RelayUrl = "https://relay.test".parse().expect("valid relay url"); + let quic = server + .quic_addr() + .map(|addr| RelayQuicConfig { port: addr.port() }); + let relay_map: RelayMap = RelayConfig { url, quic }.into(); + + Ok((relay_map, server)) + } +}