From d38b3a774c9d7f5f3ef6bcb5aed862c1b9eb1665 Mon Sep 17 00:00:00 2001 From: Frando Date: Thu, 5 Mar 2026 12:49:20 +0100 Subject: [PATCH 01/35] tests: add patchbay netsim tests --- .github/workflows/patchbay.yml | 121 ++++ Cargo.lock | 362 +++++++++- Cargo.toml | 2 +- iroh/Cargo.toml | 9 + iroh/tests/patchbay.rs | 1053 ++++++++++++++++++++++++++++++ iroh/tests/patchbay/netreport.rs | 341 ++++++++++ iroh/tests/patchbay/util.rs | 321 +++++++++ 7 files changed, 2189 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/patchbay.yml create mode 100644 iroh/tests/patchbay.rs create mode 100644 iroh/tests/patchbay/netreport.rs create mode 100644 iroh/tests/patchbay/util.rs diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml new file mode 100644 index 0000000000..2d0c6a09e8 --- /dev/null +++ b/.github/workflows/patchbay.yml @@ -0,0 +1,121 @@ +name: Patchbay Tests + +on: + pull_request: + push: + branches: + - main + - Frando/netsim + +concurrency: + group: patchbay-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUST_BACKTRACE: 1 + RUSTFLAGS: "-Dwarnings --cfg patchbay_tests" + SCCACHE_CACHE_SIZE: "10G" + IROH_FORCE_STAGING_RELAYS: "1" + +jobs: + patchbay_tests: + name: Patchbay Tests + timeout-minutes: 45 + runs-on: [self-hosted, linux, X64] + env: + RUSTC_WRAPPER: "sccache" + steps: + - name: Enable unprivileged user namespaces + run: sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 + continue-on-error: true + + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + - uses: mozilla-actions/sccache-action@v0.0.9 + + - name: Run patchbay tests + id: tests + run: cargo test --release -p iroh --test patchbay -- --test-threads=1 + env: + RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} + + # ── Push results to patchbay-serve ── + - name: Push results + if: always() + env: + PATCHBAY_URL: https://frando.gateway.lol + PATCHBAY_API_KEY: ${{ secrets.PATCHBAY_API_KEY }} + TEST_STATUS: ${{ steps.tests.outcome }} + run: | + set -euo pipefail + PROJECT="${{ github.event.repository.name }}" + TESTDIR="$(cargo metadata --format-version=1 --no-deps | jq -r .target_directory)/testdir-current" + [ ! -d "$TESTDIR" ] && echo "No testdir output, skipping" && exit 0 + + cat > "$TESTDIR/run.json" <> "$GITHUB_ENV" + echo "PATCHBAY_TEST_STATUS=$TEST_STATUS" >> "$GITHUB_ENV" + echo "Results: $PATCHBAY_URL/#/inv/$INVOCATION" + + # ── Post or update PR comment ── + - name: Comment on PR + if: always() && env.PATCHBAY_VIEW_URL + uses: actions/github-script@v7 + with: + script: | + let prNumber = context.issue?.number; + if (!prNumber) { + const { data: prs } = await github.rest.pulls.list({ + owner: context.repo.owner, repo: context.repo.repo, + head: `${context.repo.owner}:${{ github.ref_name }}`, + state: 'open', + }); + if (!prs.length) return; + prNumber = prs[0].number; + } + + const status = process.env.PATCHBAY_TEST_STATUS; + const icon = status === 'success' ? '✅' : '❌'; + const sha = '${{ github.sha }}'; + const shortSha = sha.slice(0, 7); + const commitUrl = `${{ github.server_url }}/${{ github.repository }}/commit/${sha}`; + const date = new Date().toISOString().replace('T', ' ').slice(0, 19) + ' UTC'; + const marker = ''; + const body = [ + marker, + `${icon} **patchbay:** ${status} | ${process.env.PATCHBAY_VIEW_URL}`, + `${date} · [\`${shortSha}\`](${commitUrl})`, + ].join('\n'); + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, + }); + const existing = comments.find(c => c.body.includes(marker)); + const params = { owner: context.repo.owner, repo: context.repo.repo }; + if (existing) { + await github.rest.issues.updateComment({ ...params, comment_id: existing.id, body }); + } else { + await github.rest.issues.createComment({ ...params, issue_number: prNumber, body }); + } diff --git a/Cargo.lock b/Cargo.lock index 61a5eb1766..d1581ac62a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aead" version = "0.5.2" @@ -131,7 +146,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -142,7 +157,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -398,6 +413,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + [[package]] name = "base32" version = "0.5.1" @@ -496,6 +526,37 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "camino" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" +dependencies = [ + "serde_core", +] + +[[package]] +name = "cargo-platform" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", +] + [[package]] name = "cast" version = "0.3.0" @@ -885,6 +946,22 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "ctor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" +dependencies = [ + "ctor-proc-macro", + "dtor", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" + [[package]] name = "ctr" version = "0.9.2" @@ -1197,6 +1274,21 @@ dependencies = [ "litrs", ] +[[package]] +name = "dtor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "dunce" version = "1.0.5" @@ -1305,7 +1397,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -1600,6 +1692,12 @@ dependencies = [ "polyval", ] +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" + [[package]] name = "gloo-timers" version = "0.3.0" @@ -2017,7 +2115,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -2266,6 +2364,7 @@ dependencies = [ "clap", "console", "console_error_panic_hook", + "ctor", "ctutils", "data-encoding", "derive_more", @@ -2289,6 +2388,7 @@ dependencies = [ "noq-udp", "papaya", "parse-size", + "patchbay", "pin-project", "pkarr", "pkcs8", @@ -2305,10 +2405,12 @@ dependencies = [ "rustls-webpki", "serde", "serde_json", + "serial_test", "smallvec", "strum 0.28.0", "swarm-discovery", "sync_wrapper", + "testdir", "time", "tokio", "tokio-stream", @@ -2662,7 +2764,10 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ + "bitflags", "libc", + "plain", + "redox_syscall 0.7.3", ] [[package]] @@ -2803,6 +2908,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "1.1.1" @@ -2837,6 +2951,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af4782b4baf92d686d161c15460c83d16ebcfd215918763903e9619842665cae" dependencies = [ + "anyhow", "n0-error-macros", "spez", ] @@ -2937,7 +3052,7 @@ dependencies = [ "libc", "mac-addr", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.29.0", "netlink-sys", "objc2-core-foundation", "objc2-system-configuration", @@ -2955,6 +3070,18 @@ dependencies = [ "paste", ] +[[package]] +name = "netlink-packet-route" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ce3636fa715e988114552619582b530481fd5ef176a1e5c1bf024077c2c9445" +dependencies = [ + "bitflags", + "libc", + "log", + "netlink-packet-core", +] + [[package]] name = "netlink-packet-route" version = "0.29.0" @@ -3011,7 +3138,7 @@ dependencies = [ "n0-watcher", "netdev", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.29.0", "netlink-proto", "netlink-sys", "noq-udp", @@ -3046,7 +3173,7 @@ dependencies = [ "n0-watcher", "netdev", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.29.0", "netlink-proto", "netlink-sys", "noq-udp", @@ -3065,6 +3192,18 @@ dependencies = [ "wmi", ] +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -3099,7 +3238,7 @@ dependencies = [ "pin-project-lite", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tokio-stream", @@ -3143,11 +3282,20 @@ source = "git+https://github.com/n0-computer/noq?branch=main#b212bbcaccaa82089cc dependencies = [ "cfg_aliases", "libc", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.61.2", ] +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + [[package]] name = "ntimestamp" version = "1.0.0" @@ -3169,7 +3317,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3291,6 +3439,15 @@ dependencies = [ "objc2-security", ] +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + [[package]] name = "oid-registry" version = "0.8.1" @@ -3368,7 +3525,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link", ] @@ -3385,6 +3542,30 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "patchbay" +version = "0.1.0" +source = "git+https://github.com/n0-computer/patchbay.git?branch=feat%2Fserver-push#729bc67bbb9df8a95c0e690c4476eb32c3cd5203" +dependencies = [ + "anyhow", + "chrono", + "derive_more", + "futures", + "ipnet", + "libc", + "nix", + "rtnetlink", + "serde", + "serde_json", + "strum 0.28.0", + "tokio", + "tokio-util", + "toml", + "tracing", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "pem" version = "3.0.6" @@ -3494,6 +3675,12 @@ dependencies = [ "spki", ] +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "plist" version = "1.8.0" @@ -3755,7 +3942,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -3792,7 +3979,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -3944,6 +4131,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.4.6" @@ -4099,6 +4295,30 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rtnetlink" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b960d5d873a75b5be9761b1e73b146f52dddcd27bac75263f40fba686d4d7b5" +dependencies = [ + "futures-channel", + "futures-util", + "log", + "netlink-packet-core", + "netlink-packet-route 0.28.0", + "netlink-proto", + "netlink-sys", + "nix", + "thiserror 1.0.69", + "tokio", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -4133,7 +4353,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4226,7 +4446,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4280,6 +4500,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scc" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" +dependencies = [ + "sdd", +] + [[package]] name = "schannel" version = "0.1.29" @@ -4301,6 +4530,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sdd" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" + [[package]] name = "security-framework" version = "3.7.0" @@ -4331,7 +4566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4345,6 +4580,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "send_wrapper" @@ -4479,6 +4718,32 @@ dependencies = [ "syn", ] +[[package]] +name = "serial_test" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f" +dependencies = [ + "futures-executor", + "futures-util", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sha1" version = "0.11.0-rc.5" @@ -4607,7 +4872,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4796,6 +5061,20 @@ dependencies = [ "syn", ] +[[package]] +name = "sysinfo" +version = "0.26.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c18a6156d1f27a9592ee18c1a846ca8dd5c258b7179fc193ae87c74ebb666f5" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "winapi", +] + [[package]] name = "system-configuration" version = "0.7.0" @@ -4833,7 +5112,22 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "testdir" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9ffa013be124f7e8e648876190de818e3a87088ed97ccd414a398b403aec8c8" +dependencies = [ + "anyhow", + "backtrace", + "cargo-platform", + "cargo_metadata", + "once_cell", + "sysinfo", + "whoami", ] [[package]] @@ -5229,6 +5523,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.22" @@ -5239,6 +5543,8 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", @@ -5246,6 +5552,7 @@ dependencies = [ "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -5469,6 +5776,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.115" @@ -5659,6 +5972,17 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", + "web-sys", +] + [[package]] name = "widestring" version = "1.2.1" @@ -5687,7 +6011,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c56367f8ac..795ef46481 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ missing_debug_implementations = "warn" # do. To enable for a crate set `#![cfg_attr(iroh_docsrs, # feature(doc_cfg))]` in the crate. # We also have our own `iroh_loom` cfg to enable tokio-rs/loom testing. -unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)"] } +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)", "cfg(patchbay_tests)"] } [workspace.lints.clippy] unused-async = "warn" diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 0bb758e9e8..3a0bbf64ac 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -101,10 +101,12 @@ getrandom = { version = "0.4", features = ["wasm_js"] } # target-common test/dev dependencies [dev-dependencies] console_error_panic_hook = "0.1" +n0-error = { version = "0.1", features = ["anyhow"] } postcard = { version = "1.1.1", features = ["use-std"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } rand_chacha = "0.10" chrono = "0.4.43" +serial_test = "3.4.0" # *non*-wasm-in-browser test/dev dependencies [target.'cfg(not(all(target_family = "wasm", target_os = "unknown")))'.dev-dependencies] @@ -127,6 +129,7 @@ n0-tracing-test = "0.3" clap = { version = "4", features = ["derive"] } tracing-subscriber = { version = "0.3", features = [ "env-filter", + "json", ] } indicatif = { version = "0.18", features = ["tokio"] } parse-size = { version = "1.1.0", features = ['std'] } @@ -138,6 +141,12 @@ console = { version = "0.16" } wasm-tracing = "2.1.0" wasm-bindgen-test = "0.3.62" +# patchbay netsim test dependencies (linux only) +[target.'cfg(target_os = "linux")'.dev-dependencies] +ctor = "0.6" +patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "feat/server-push" } +testdir = "0.9" + [build-dependencies] cfg_aliases = { version = "0.2.1" } diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs new file mode 100644 index 0000000000..2ca187c4e4 --- /dev/null +++ b/iroh/tests/patchbay.rs @@ -0,0 +1,1053 @@ +//! Patchbay network simulation tests. +//! +//! These tests use the [`patchbay`] crate to create virtual network topologies +//! in Linux user namespaces, testing iroh's NAT traversal, holepunching, +//! and connectivity under various network conditions. +//! +//! These tests are disabled by default and only run when the `patchbay_tests` cfg is enabled. +//! They require Linux with user namespace support. On non-Linux systems, you can use +//! `patchbay-vm` to get a Linux VM with the required capabilities. See patchbay docs +//! for details. +//! +//! To run: +//! +//! ```sh +//! # On Linux (with user namespace support): +//! RUSTFLAGS="--cfg patchbay_tests" cargo test --release -p iroh --test patchbay -- --test-threads=1 +//! +//! # On macOS (via patchbay-vm): +//! RUSTFLAGS="--cfg patchbay_tests" patchbay-vm test --release -p iroh --test patchbay -- --test-threads=1 +//! ``` + +// patchbay only runs on linux +#![cfg(target_os = "linux")] +// Only compile these tests when the patchbay_tests cfg is enabled. +#![cfg(patchbay_tests)] + +use std::time::Duration; + +use iroh::TransportAddr; +use n0_error::{Result, StackResultExt}; +use n0_tracing_test::traced_test; +use patchbay::{Firewall, LinkCondition, LinkLimits, Nat, RouterPreset, TestGuard}; +use testdir::testdir; +use tracing::{debug, info, warn}; + +use self::util::{Pair, PathWatcherExt, lab_with_relay, ping_accept, ping_open}; + +#[path = "patchbay/util.rs"] +mod util; + +/// Init the user namespace before any threads are spawned. +/// +/// This gives us all permissions we need for the patchbay tests. +#[ctor::ctor] +fn userns_ctor() { + patchbay::init_userns().expect("failed to init userns"); +} + +// --- +// Holepunch tests +// --- + +/// Simple holepunch: Two devices behind destination-independent NATs, +/// establish via relay, upgrade to direct. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn holepunch_simple() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, _conn| Ok(()), + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "connection started relayed"); + paths.wait_ip(timeout).await?; + info!("connection became direct"); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Tests that changing the uplink of an interface works (i.e. switching wifis). +/// +/// For this we observe a change in the selected path's remote addr on the *other* side. +/// Whether the side that changes interfaces opens a new path or does an RFC9000-style migration +/// is an implementation detail which we won't test for. +/// +/// The test currently fails, but should pass. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +#[ignore = "known to still fail"] +async fn switch_uplink() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let nat3 = lab.add_router("nat3").nat(Nat::Home).build().await?; + let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "connection started relayed"); + + // Wait until a first direct path is established. + let first = paths.wait_ip(timeout).await?; + info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); + + // Now wait until the direct path changes, which happens after the other endpoint + // changes its uplink. We check is_ip() explicitly to avoid triggering on a + // transient relay fallback during the network switch. + let second = paths + .wait_selected(timeout, |p| { + p.is_ip() && p.remote_addr() != first.remote_addr() + }) + .await + .context("did not switch paths")?; + info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); + + ping_accept(&conn, timeout).await?; + info!("ping done"); + Ok(()) + }, + async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "connection started relayed"); + + // Wait for conn to become direct. + paths + .wait_ip(timeout) + .await + .context("become direct")?; + + // Wait a little more and then switch wifis. + tokio::time::sleep(Duration::from_secs(1)).await; + info!("switch IP uplink"); + dev.replug_iface("eth0", nat3.id()).await?; + + // We don't assert any path changes here, because the remote stays identical, + // and PathInfo does not contain info on local addrs. Instead, the remote + // only accepts our ping after the path changed. + info!("send ping"); + ping_open(&conn, timeout) + .await + .context("failed at ping_open")?; + info!("ping done"); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Tests that changing the uplink from IPv4 to IPv6 works. +/// +/// Similar to `switch_uplink` but switches to an IPv6 only network. +/// +/// The test currently fails, but should pass. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +#[ignore = "known to still fail"] +async fn switch_uplink_ipv6() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let public = lab + .add_router("public") + .preset(RouterPreset::Public) + .build() + .await?; + let home = lab + .add_router("nat2") + .preset(RouterPreset::Home) + .build() + .await?; + let mobile = lab + .add_router("nat3") + .preset(RouterPreset::IspV6) + .build() + .await?; + let dev1 = lab.add_device("dev1").uplink(public.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(home.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "connection started relayed"); + + // Wait until a first direct path is established. + let first = paths + .wait_selected(timeout, |p| { + matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv4()) + }) + .await + .context("did not become direct")?; + info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); + + // Now wait until the direct path changes, which happens after the other endpoint + // changes its uplink. We check is_ip() explicitly to avoid triggering on a + // transient relay fallback during the network switch. + let second = paths + .wait_selected(timeout, |p| { + matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv6()) + }) + .await + .context("did not switch paths to v6")?; + info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); + + ping_accept(&conn, timeout).await?; + info!("ping done"); + Ok(()) + }, + async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "connection started relayed"); + + // Wait for conn to become direct. + paths + .wait_ip(timeout) + .await + .context("become direct")?; + + // Wait a little more and then switch wifis. + tokio::time::sleep(Duration::from_secs(1)).await; + info!("switch IP uplink"); + dev.replug_iface("eth0", mobile.id()).await?; + + // We don't assert any path changes here, because the remote stays identical, + // and PathInfo does not contain info on local addrs. Instead, the remote + // only accepts our ping after the path changed. + info!("send ping"); + ping_open(&conn, timeout) + .await + .context("failed at ping_open")?; + info!("ping done"); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Test that switching to a faster link works. +/// +/// Two devices, connected initially over holepunched NAT. Then mid connection +/// device 2 plugs a cable into device 1's router, i.e. they now have a LAN +/// connection. +/// +/// Verify we switch to the LAN connection. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn change_ifaces() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + + // dev2 has two uplinks (wifi=Mobile3G on eth0, LAN on eth1). eth1 starts down. + let dev1 = lab + .add_device("dev1") + .iface("eth0", nat1.id(), None) + .build() + .await?; + let dev2 = lab + .add_device("dev2") + .iface("eth0", nat2.id(), Some(LinkCondition::Mobile3G)) + .iface("eth1", nat1.id(), None) + .build() + .await?; + dev2.link_down("eth1").await?; + + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout) + .await + .context("failed at ping_accept")?; + Ok(()) + }, + async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "connection started relayed"); + let first = paths + .wait_ip(timeout) + .await + .context("did not become direct")?; + info!(addr=?first.remote_addr(), "connection became direct"); + + tokio::time::sleep(Duration::from_secs(1)).await; + + // Bring up the LAN interface to the other ep. + info!("bring up eth1"); + dev.link_up("eth1").await?; + + // Wait for a new direct path to be established. We check is_ip() explicitly + // to avoid triggering on a transient relay fallback during the switch. + let next = paths + .wait_selected(timeout, |p| { + p.is_ip() && p.remote_addr() != first.remote_addr() + }) + .await + .context("did not switch paths")?; + info!(addr=?next.remote_addr(), "new direct path established"); + + ping_open(&conn, timeout) + .await + .context("failed at ping_open")?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +// --- +// NAT type matrix: verify holepunching across different NAT combinations +// --- + +/// One peer behind Home NAT, the other on a public network. +/// Holepunching should succeed: EIM mapping means the public peer can reach +/// the NATted peer's mapped port once it learns the address via relay. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +#[ignore = "stays relayed, holepunch times out (deadline elapsed)"] +async fn holepunch_home_nat_one_side() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat = lab.add_router("nat").nat(Nat::Home).build().await?; + let public = lab.add_router("public").build().await?; + let dev1 = lab.add_device("dev1").uplink(nat.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths.wait_ip(timeout).await.context("did not holepunch")?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Both peers behind CGNAT (EIM+EIF). The most permissive real-world NAT. +/// Holepunching should succeed easily since filtering is endpoint-independent. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +#[ignore = "stays relayed, holepunch times out (deadline elapsed)"] +async fn holepunch_cgnat_both() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Cgnat).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Cgnat).build().await?; + let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch through CGNAT")?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Both peers behind FullCone NAT (EIM+EIF with hairpin). The most permissive +/// NAT type — any external host can send to the mapped port. Holepunching +/// always succeeds on the first try. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn holepunch_full_cone_both() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::FullCone).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::FullCone).build().await?; + let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch through full cone")?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Both peers behind Corporate (symmetric/EDM) NAT. Each destination gets a +/// different external port, making holepunching impossible. The connection +/// must stay on the relay. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn symmetric_nat_stays_relayed() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Corporate).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Corporate).build().await?; + let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "should start on relay"); + // Ping to verify the relay path works. + ping_open(&conn, timeout).await?; + // Give holepunching time to attempt and fail. + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.is_relay(), + "should still be relayed — symmetric NAT blocks holepunching" + ); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// One peer behind Home NAT (EIM), the other behind Corporate/symmetric NAT +/// (EDM). Holepunching fails because the symmetric side allocates a different +/// port for each destination, so the Home peer's probes never reach the right +/// port. Connection stays relayed. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn mixed_home_vs_symmetric_stays_relayed() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let home = lab.add_router("home").nat(Nat::Home).build().await?; + let corp = lab.add_router("corp").nat(Nat::Corporate).build().await?; + let dev1 = lab.add_device("dev1").uplink(home.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(corp.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.is_relay(), + "should still be relayed — symmetric NAT on one side blocks holepunching" + ); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Both peers behind CloudNat (EDM+APDF), the symmetric NAT used by cloud +/// providers (AWS NAT Gateway, GCP Cloud NAT). Same as Corporate: holepunching +/// is impossible, connection stays relayed. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn cloud_nat_stays_relayed() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::CloudNat).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::CloudNat).build().await?; + let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.is_relay(), + "should still be relayed — cloud symmetric NAT blocks holepunching" + ); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Double NAT: device behind a Home router, which itself sits behind an ISP +/// CGNAT router. This is a common real-world scenario (carrier-grade NAT + +/// consumer router). Both NATs use endpoint-independent mapping, so +/// holepunching should succeed. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +#[ignore = "stays relayed, holepunch times out (deadline elapsed)"] +async fn holepunch_double_nat() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + // ISP-level CGNAT routers + let isp1 = lab.add_router("isp1").nat(Nat::Cgnat).build().await?; + let isp2 = lab.add_router("isp2").nat(Nat::Cgnat).build().await?; + // Home routers behind ISPs + let home1 = lab + .add_router("home1") + .nat(Nat::Home) + .upstream(isp1.id()) + .build() + .await?; + let home2 = lab + .add_router("home2") + .nat(Nat::Home) + .upstream(isp2.id()) + .build() + .await?; + let dev1 = lab.add_device("dev1").uplink(home1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(home2.id()).build().await?; + let timeout = Duration::from_secs(15); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch through double NAT")?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +// --- +// Firewall and adverse conditions +// --- + +/// Corporate firewall blocks all UDP except DNS (port 53) and only allows TCP +/// on ports 80 and 443. Holepunching is impossible, but the relay connection +/// via HTTPS (TCP 443) must still work. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn corporate_firewall_relay_only() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let fw = lab + .add_router("fw") + .firewall(Firewall::Corporate) + .build() + .await?; + let public = lab.add_router("public").build().await?; + let dev1 = lab.add_device("dev1").uplink(fw.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.is_relay(), + "should still be relayed — corporate firewall blocks UDP" + ); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Holepunch through Home NATs with a degraded mobile link (100ms latency, +/// 30ms jitter, 2% loss). Connection should still upgrade to direct despite +/// the poor link quality. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn holepunch_mobile_3g() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let dev1 = lab + .add_device("dev1") + .iface("eth0", nat1.id(), Some(LinkCondition::Mobile3G)) + .build() + .await?; + let dev2 = lab + .add_device("dev2") + .iface("eth0", nat2.id(), Some(LinkCondition::Mobile3G)) + .build() + .await?; + let timeout = Duration::from_secs(20); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch over 3G link")?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Holepunch through Home NATs on a satellite link (high latency, moderate +/// jitter). Tests that iroh handles high-RTT environments without timing out +/// during NAT traversal. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn holepunch_satellite() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let dev1 = lab + .add_device("dev1") + .iface("eth0", nat1.id(), Some(LinkCondition::Satellite)) + .build() + .await?; + let dev2 = lab + .add_device("dev2") + .iface("eth0", nat2.id(), Some(LinkCondition::Satellite)) + .build() + .await?; + let timeout = Duration::from_secs(20); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch over satellite link")?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Brief link outage: after holepunching succeeds, the link goes down for 2 +/// seconds and comes back up. The connection should recover — either by +/// falling back to relay during the outage or by re-establishing the direct +/// path after recovery. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn link_outage_recovery() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let timeout = Duration::from_secs(15); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await.context("ping 1")?; + ping_accept(&conn, timeout).await.context("ping 2")?; + Ok(()) + }, + async move |dev, _ep, conn| { + let mut paths = conn.paths(); + paths.wait_ip(timeout).await.context("initial holepunch")?; + info!("holepunched, now killing link for 2s"); + + // Take the link down. + dev.link_down("eth0").await?; + tokio::time::sleep(Duration::from_secs(2)).await; + dev.link_up("eth0").await?; + info!("link restored, waiting for recovery"); + + // After link recovery, we should be able to ping — via relay + // fallback or re-established direct path. + ping_open(&conn, Duration::from_secs(20)) + .await + .context("ping after link recovery")?; + info!("connection recovered after link outage"); + + // Eventually the direct path should come back. + paths + .wait_ip(Duration::from_secs(20)) + .await + .context("did not re-establish direct path")?; + ping_open(&conn, timeout).await.context("ping on direct")?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Hotel WiFi: captive-portal firewall allows all outbound TCP but only UDP +/// port 53 (DNS). Similar to corporate firewall but less restrictive on TCP. +/// Relay via HTTPS should work, holepunching should not. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn hotel_wifi_relay_only() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let hotel = lab + .add_router("hotel") + .preset(RouterPreset::Hotel) + .build() + .await?; + let public = lab.add_router("public").build().await?; + let dev1 = lab.add_device("dev1").uplink(hotel.id()).build().await?; + let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; + let timeout = Duration::from_secs(10); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.is_relay(), + "should still be relayed — hotel firewall blocks UDP" + ); + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +/// Asymmetric link conditions: one peer on a fast LAN, the other on degraded +/// WiFi. Holepunching should still succeed, and the connection should use +/// the direct path despite the asymmetric quality. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn holepunch_asymmetric_links() -> Result { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let dev1 = lab + .add_device("dev1") + .iface("eth0", nat1.id(), Some(LinkCondition::Lan)) + .build() + .await?; + let dev2 = lab + .add_device("dev2") + .iface("eth0", nat2.id(), Some(LinkCondition::WifiBad)) + .build() + .await?; + let timeout = Duration::from_secs(15); + Pair::new(dev1, dev2, relay_map) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch with asymmetric links")?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await?; + guard.ok(); + Ok(()) +} + +// --- +// Degradation ladder: find where holepunching breaks under worsening conditions +// --- + +/// Increasingly degraded link on one side, clean link on the other. +/// Each level adds more latency, loss, and reordering. The test runs each level +/// twice: once with the impaired side accepting, once connecting. +/// +/// Bump these thresholds as iroh's holepunching improves. +const DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER: usize = 7; +const DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT: usize = 7; + +const DEGRADE_LEVELS: &[LinkLimits] = &[ + // 0: mild — good wifi + LinkLimits { + latency_ms: 10, + jitter_ms: 5, + loss_pct: 0.5, + reorder_pct: 0.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 1: moderate — mediocre 4G + LinkLimits { + latency_ms: 40, + jitter_ms: 15, + loss_pct: 1.0, + reorder_pct: 1.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 2: poor — bad wifi or 3G + LinkLimits { + latency_ms: 100, + jitter_ms: 30, + loss_pct: 3.0, + reorder_pct: 3.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 3: bad — congested 3G + LinkLimits { + latency_ms: 200, + jitter_ms: 60, + loss_pct: 5.0, + reorder_pct: 5.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 4: terrible — barely usable + LinkLimits { + latency_ms: 300, + jitter_ms: 80, + loss_pct: 8.0, + reorder_pct: 8.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 5: extreme — GEO satellite with heavy loss + LinkLimits { + latency_ms: 500, + jitter_ms: 100, + loss_pct: 12.0, + reorder_pct: 12.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, + // 6: absurd — stress test + LinkLimits { + latency_ms: 800, + jitter_ms: 200, + loss_pct: 20.0, + reorder_pct: 20.0, + rate_kbit: 0, + duplicate_pct: 0.0, + corrupt_pct: 0.0, + }, +]; + +/// Run the degradation ladder: iterate through levels, creating fresh devices +/// each round but reusing the lab and relay. Returns the number of levels passed. +async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuard)> { + let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; + let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; + let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; + let timeout = Duration::from_secs(15); + + let mut last_pass = 0; + for (level, limits) in DEGRADE_LEVELS.iter().enumerate() { + let impaired = Some(LinkCondition::Manual(*limits)); + let (server_cond, client_cond) = if impaired_is_server { + (impaired, None) + } else { + (None, impaired) + }; + let server_name = format!("{level}-server"); + let client_name = format!("{level}-client"); + debug!( + level, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + impaired_is_server, + "starting level", + ); + let server = lab + .add_device(&server_name) + .iface("eth0", nat1.id(), server_cond) + .build() + .await?; + let client = lab + .add_device(&client_name) + .iface("eth0", nat2.id(), client_cond) + .build() + .await?; + + let server_id = server.id(); + let client_id = client.id(); + let result = Pair::new(server, client, relay_map.clone()) + .run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + if paths.wait_ip(timeout).await.is_err() { + n0_error::bail_any!("holepunch_timeout"); + } + ping_open(&conn, timeout).await?; + Ok(()) + }, + ) + .await; + + lab.remove_device(server_id)?; + lab.remove_device(client_id)?; + + let ok = match result { + Ok(()) => true, + Err(e) if e.to_string().contains("holepunch_timeout") => false, + Err(e) => return Err(e), + }; + + if ok { + info!( + level, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + "PASSED", + ); + } else { + warn!( + level, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + "FAILED", + ); + } + + if ok { + last_pass = level + 1; + } else { + break; + } + } + Ok((last_pass, guard)) +} + +/// Impaired side is the accepting (server) peer. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn degrade_ladder_impaired_server() -> Result { + let (passed, guard) = run_degrade_ladder(true).await?; + assert!( + passed >= DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER, + "holepunch should pass at least {DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER} levels \ + with impaired server, but only passed {passed}" + ); + guard.ok(); + Ok(()) +} + +/// Impaired side is the connecting (client) peer. +#[tokio::test] +#[traced_test] +#[serial_test::serial] +async fn degrade_ladder_impaired_client() -> Result { + let (passed, guard) = run_degrade_ladder(false).await?; + assert!( + passed >= DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT, + "holepunch should pass at least {DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT} levels \ + with impaired client, but only passed {passed}" + ); + guard.ok(); + Ok(()) +} diff --git a/iroh/tests/patchbay/netreport.rs b/iroh/tests/patchbay/netreport.rs new file mode 100644 index 0000000000..80037c7f9a --- /dev/null +++ b/iroh/tests/patchbay/netreport.rs @@ -0,0 +1,341 @@ +// --- +// NetReport tests +// --- + +/// Home NAT (EIM+APDF): the most common consumer router. +/// Expect UDP v4, a NATted public address (different from the device's private IP), +/// relay reachability with measured latency, and no captive portal. +#[tokio::test] +#[traced_test] +async fn netreport_home_nat() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let nat = lab.add_router("nat").nat(Nat::Home).build().await?; + let dev = lab.add_device("dev").uplink(nat.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let report = run_net_report(dev, relay_map).await?; + assert!(report.udp_v4, "expected UDP v4 through home NAT"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_ne!( + *global_v4.ip(), + dev_ip, + "global IP should differ from device private IP behind NAT" + ); + let relay = report + .preferred_relay + .expect("expected relay to be reachable"); + assert!( + report.relay_latency.iter().any(|(_, url, _)| *url == relay), + "expected latency data for preferred relay" + ); + Ok(()) +} + +/// Corporate (symmetric) NAT: produces a different external port +/// per destination. Holepunching requires relay, but relay should be reachable. +#[tokio::test] +#[traced_test] +async fn netreport_corporate_nat() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let nat = lab.add_router("nat").nat(Nat::Corporate).build().await?; + let dev = lab.add_device("dev").uplink(nat.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let report = run_net_report(dev, relay_map).await?; + assert!(report.udp_v4, "expected UDP v4 through corporate NAT"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_ne!( + *global_v4.ip(), + dev_ip, + "global IP should differ from device private IP behind symmetric NAT" + ); + let relay = report + .preferred_relay + .expect("expected relay to be reachable"); + assert!( + report.relay_latency.iter().any(|(_, url, _)| *url == relay), + "expected latency data for preferred relay" + ); + Ok(()) +} + +/// Direct connection (no NAT). The reported global_v4 should equal the +/// device's own IP since there is no address translation. +#[tokio::test] +#[traced_test] +async fn netreport_direct() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let router = lab.add_router("direct").build().await?; // Nat::None by default + let dev = lab.add_device("dev").uplink(router.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let report = run_net_report(dev, relay_map).await?; + assert!(report.udp_v4, "expected UDP v4 on direct connection"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_eq!( + *global_v4.ip(), + dev_ip, + "without NAT, global IP should equal device's own IP" + ); + let relay = report + .preferred_relay + .expect("expected relay to be reachable"); + assert!( + report.relay_latency.iter().any(|(_, url, _)| *url == relay), + "expected latency data for preferred relay" + ); + Ok(()) +} + +// --- +// NetReport: additional NAT topologies +// --- + +/// Full cone NAT (EIM+EIF): most permissive NAT. Port-preserving, hairpin enabled. +/// Holepunching always succeeds. Same expectations as Home NAT for net_report. +#[tokio::test] +#[traced_test] +async fn netreport_full_cone() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let nat = lab.add_router("nat").nat(Nat::FullCone).build().await?; + let dev = lab.add_device("dev").uplink(nat.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let report = run_net_report(dev, relay_map).await?; + assert!(report.udp_v4, "expected UDP v4 through full cone NAT"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_ne!( + *global_v4.ip(), + dev_ip, + "global IP should differ from device private IP behind NAT" + ); + let relay = report + .preferred_relay + .expect("expected relay to be reachable"); + assert!( + report.relay_latency.iter().any(|(_, url, _)| *url == relay), + "expected latency data for preferred relay" + ); + assert_ne!( + report.captive_portal, + Some(true), + "no captive portal expected" + ); + Ok(()) +} + +/// Cloud NAT (EDM+APDF): symmetric NAT with randomized ports, similar to corporate +/// but with longer UDP timeout (350s). Common in cloud providers (GCP, AWS). +#[tokio::test] +#[traced_test] +async fn netreport_cloud_nat() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let nat = lab.add_router("nat").nat(Nat::CloudNat).build().await?; + let dev = lab.add_device("dev").uplink(nat.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let report = run_net_report(dev, relay_map).await?; + assert!(report.udp_v4, "expected UDP v4 through cloud NAT"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_ne!( + *global_v4.ip(), + dev_ip, + "global IP should differ from device private IP behind cloud NAT" + ); + let relay = report + .preferred_relay + .expect("expected relay to be reachable"); + assert!( + report.relay_latency.iter().any(|(_, url, _)| *url == relay), + "expected latency data for preferred relay" + ); + assert_ne!( + report.captive_portal, + Some(true), + "no captive portal expected" + ); + Ok(()) +} + +/// Standalone CGNAT (EIM+EIF): carrier-grade NAT without a home router in front. +/// Common for mobile carriers. More permissive filtering than Home NAT. +#[tokio::test] +#[traced_test] +async fn netreport_cgnat() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let nat = lab.add_router("nat").nat(Nat::Cgnat).build().await?; + let dev = lab.add_device("dev").uplink(nat.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let report = run_net_report(dev, relay_map).await?; + assert!(report.udp_v4, "expected UDP v4 through CGNAT"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_ne!( + *global_v4.ip(), + dev_ip, + "global IP should differ from device private IP behind CGNAT" + ); + let relay = report + .preferred_relay + .expect("expected relay to be reachable"); + assert!( + report.relay_latency.iter().any(|(_, url, _)| *url == relay), + "expected latency data for preferred relay" + ); + assert_ne!( + report.captive_portal, + Some(true), + "no captive portal expected" + ); + Ok(()) +} + +// --- +// NetReport: firewall scenarios +// --- + +/// Corporate firewall blocks all UDP except DNS (port 53). QAD probes fail, +/// but the relay is still reachable via HTTPS on port 443. +#[tokio::test] +#[traced_test] +async fn netreport_corporate_firewall() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let fw = lab + .add_router("fw") + .firewall(Firewall::Corporate) + .build() + .await?; + let dev = lab.add_device("dev").uplink(fw.id()).build().await?; + let report = run_net_report(dev, relay_map).await?; + assert!( + !report.udp_v4, + "UDP should be blocked by corporate firewall" + ); + assert!( + report.global_v4.is_none(), + "no global IPv4 without successful QAD probes" + ); + assert!( + report.preferred_relay.is_some(), + "relay should still be reachable via HTTPS (TCP 443)" + ); + assert_ne!( + report.captive_portal, + Some(true), + "no captive portal expected" + ); + Ok(()) +} + +// --- +// NetReport: dual-stack / IPv6 +// --- + +/// Dual-stack device on a direct (no NAT) connection with a dual-stack relay. +/// Both IPv4 and IPv6 QAD probes should succeed. Without NAT, the reported +/// global addresses should match the device's own addresses. +#[tokio::test] +#[traced_test] +async fn netreport_dual_stack_direct() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let router = lab + .add_router("direct") + .ip_support(IpSupport::DualStack) + .build() + .await?; + let dev = lab.add_device("dev").uplink(router.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let dev_ip6 = dev.ip6().expect("device has IPv6 on dual-stack router"); + info!(%dev_ip, %dev_ip6, "dual-stack device"); + let report = run_net_report(dev, relay_map).await?; + // v4 + assert!(report.udp_v4, "expected UDP v4 on direct dual-stack"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_eq!( + *global_v4.ip(), + dev_ip, + "without NAT, global IPv4 should equal device's own IP" + ); + // v6 + assert!(report.udp_v6, "expected UDP v6 on direct dual-stack"); + let global_v6 = report.global_v6.expect("expected global IPv6 address"); + assert_eq!( + *global_v6.ip(), + dev_ip6, + "without NAT, global IPv6 should equal device's own IP" + ); + assert!( + report.preferred_relay.is_some(), + "expected relay to be reachable" + ); + assert_ne!( + report.captive_portal, + Some(true), + "no captive portal expected" + ); + Ok(()) +} + +/// Dual-stack device behind a home NAT with no IPv6 NAT (NatV6Mode::None). +/// IPv4 is NATted (global differs from device IP). IPv6 uses global unicast +/// directly, so the reported global IPv6 should match the device's own address. +#[tokio::test] +#[traced_test] +#[ignore = "currently broken due to bug in patchbay"] +async fn netreport_dual_stack_home_nat() -> Result { + let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; + let nat = lab + .add_router("nat") + .nat(Nat::Home) + .ip_support(IpSupport::DualStack) + .build() + .await?; + let dev = lab.add_device("dev").uplink(nat.id()).build().await?; + let dev_ip = dev.ip().expect("device has IPv4"); + let dev_ip6 = dev.ip6().expect("device has IPv6 on dual-stack router"); + info!(%dev_ip, %dev_ip6, "dual-stack device behind home NAT"); + let report = run_net_report(dev, relay_map).await?; + println!("{report:#?}"); + // v4 is NATted. + assert!(report.udp_v4, "expected UDP v4 through home NAT"); + let global_v4 = report.global_v4.expect("expected global IPv4 address"); + assert_ne!( + *global_v4.ip(), + dev_ip, + "global IPv4 should differ from private IP behind NAT" + ); + // v6 passes through without translation (NatV6Mode::None = global unicast). + assert!(report.udp_v6, "expected UDP v6 with global unicast IPv6"); + let global_v6 = report.global_v6.expect("expected global IPv6 address"); + assert_eq!( + *global_v6.ip(), + dev_ip6, + "IPv6 has no NAT, global should equal device's own IP" + ); + assert!( + report.preferred_relay.is_some(), + "expected relay to be reachable" + ); + assert_ne!( + report.captive_portal, + Some(true), + "no captive portal expected" + ); + Ok(()) +} + +// --- +// NetReport helper +// --- + +/// Bind an endpoint in `dev`'s namespace, wait for the first net report, return it. +pub async fn run_net_report(dev: Device, relay_map: RelayMap) -> Result { + dev.spawn(move |dev| { + async move { + let endpoint = endpoint_builder(&dev, relay_map).bind().await?; + let mut watcher = endpoint.net_report(); + let report = tokio::time::timeout(Duration::from_secs(10), watcher.initialized()) + .await + .anyerr()?; + endpoint.close().await; + n0_error::Ok(report) + } + .instrument(error_span!("net_report")) + })? + .await + .anyerr()? +} diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs new file mode 100644 index 0000000000..402965a0a7 --- /dev/null +++ b/iroh/tests/patchbay/util.rs @@ -0,0 +1,321 @@ +use std::{future::Future, path::PathBuf, time::Duration}; + +use iroh::{ + Endpoint, EndpointAddr, RelayMap, RelayMode, Watcher, + endpoint::{Connection, PathInfo, PathWatcher}, + tls::CaRootsConfig, +}; +use n0_error::{Result, StdResultExt, ensure_any}; +use n0_future::task::AbortOnDropHandle; +use patchbay::{Device, IpSupport, Lab, LabOpts, OutDir, TestGuard}; +use tokio::sync::oneshot; +use tracing::{Instrument, debug, error_span}; + +use self::relay::run_relay_server; + +const TEST_ALPN: &[u8] = b"test"; + +/// Create a lab with a dual-stack relay server. Returns the lab, relay map, a drop guard +/// that keeps the relay alive, and a [`TestGuard`] that records pass/fail. +/// +/// The relay binds on `[::]` and is reachable via `https://relay.test` (resolved +/// through lab-wide DNS entries for both IPv4 and IPv6). +pub async fn lab_with_relay( + path: PathBuf, +) -> Result<(Lab, RelayMap, AbortOnDropHandle<()>, TestGuard)> { + let mut opts = LabOpts::default().outdir(OutDir::Exact(path)); + if let Some(name) = std::thread::current().name() { + opts = opts.label(name); + } + let lab = Lab::with_opts(opts).await?; + let guard = lab.test_guard(); + let (relay_map, relay_guard) = spawn_relay(&lab).await?; + Ok((lab, relay_map, relay_guard, guard)) +} + +async fn spawn_relay(lab: &Lab) -> Result<(RelayMap, AbortOnDropHandle<()>)> { + let dc = lab + .add_router("dc") + .ip_support(IpSupport::DualStack) + .build() + .await?; + let dev_relay = lab.add_device("relay").uplink(dc.id()).build().await?; + + // Register both v4 and v6 addresses under "relay.test" lab-wide. + // Devices created after this will resolve "relay.test" to both addresses. + let relay_v4 = dev_relay.ip().expect("relay has IPv4"); + let relay_v6 = dev_relay.ip6().expect("relay has IPv6"); + lab.dns_entry("relay.test", relay_v4.into())?; + lab.dns_entry("relay.test", relay_v6.into())?; + + let (relay_map_tx, relay_map_rx) = oneshot::channel(); + let task_relay = dev_relay.spawn(async move |_ctx| { + let (relay_map, _server) = run_relay_server().await.unwrap(); + relay_map_tx.send(relay_map).unwrap(); + std::future::pending::<()>().await; + })?; + let relay_map = relay_map_rx.await.unwrap(); + Ok((relay_map, AbortOnDropHandle::new(task_relay))) +} + +// --- +// Pair: run two connected endpoints +// --- + +/// Two connected endpoints in the test lab, ready to run. +/// +/// `peer1` runs in `dev1`'s namespace as the accepting side. +/// `peer2` runs in `dev2`'s namespace as the connecting side. +/// +/// `peer1` awaits the connection to be closed afterwards, whereas `peer2` closes +/// the connection. +pub struct Pair { + dev1: Device, + dev2: Device, + relay_map: RelayMap, +} + +impl Pair { + pub fn new(dev1: Device, dev2: Device, relay_map: RelayMap) -> Self { + Self { + dev1, + dev2, + relay_map, + } + } + + pub async fn run(self, peer1: F1, peer2: F2) -> Result + where + F1: FnOnce(Device, Endpoint, Connection) -> Fut1 + Send + 'static, + Fut1: Future + Send, + F2: FnOnce(Device, Endpoint, Connection) -> Fut2 + Send + 'static, + Fut2: Future + Send, + { + let (addr_tx, addr_rx) = oneshot::channel(); + let relay_map2 = self.relay_map.clone(); + let task1 = self.dev1.spawn(move |dev| { + async move { + let endpoint = endpoint_builder(&dev, relay_map2).bind().await?; + endpoint.online().await; + addr_tx.send(addr_relay_only(endpoint.addr())).unwrap(); + let conn = endpoint.accept().await.unwrap().accept().anyerr()?.await?; + watch_selected_path(&conn); + peer1(dev, endpoint.clone(), conn.clone()).await?; + conn.closed().await; + endpoint.close().await; + n0_error::Ok(()) + } + .instrument(error_span!("ep-acpt")) + })?; + let task2 = self.dev2.spawn(move |dev| { + async move { + let endpoint = endpoint_builder(&dev, self.relay_map).bind().await?; + let addr = addr_rx.await.unwrap(); + let conn = endpoint.connect(addr, TEST_ALPN).await?; + watch_selected_path(&conn); + peer2(dev, endpoint.clone(), conn).await?; + endpoint.close().await; + n0_error::Ok(()) + } + .instrument(error_span!("ep-cnct")) + })?; + task2.await.anyerr()??; + task1.await.anyerr()??; + Ok(()) + } +} + +/// Extension methods on [`PathWatcher`] for common waiting patterns in tests. +#[allow(unused)] +pub trait PathWatcherExt { + async fn wait_selected( + &mut self, + timeout: Duration, + f: impl Fn(&PathInfo) -> bool, + ) -> Result; + + fn selected(&mut self) -> PathInfo; + + fn match_selected(&mut self, f: impl FnOnce(&PathInfo) -> bool) -> bool { + f(&self.selected()) + } + + fn is_ip(&mut self) -> bool { + self.match_selected(PathInfo::is_ip) + } + + fn is_relay(&mut self) -> bool { + self.match_selected(PathInfo::is_relay) + } + /// Wait until the selected path is a direct (IP) path. + async fn wait_ip(&mut self, timeout: Duration) -> Result { + self.wait_selected(timeout, PathInfo::is_ip).await + } + + /// Wait until the selected path is a relay path. + async fn wait_relay(&mut self, timeout: Duration) -> Result { + self.wait_selected(timeout, PathInfo::is_relay).await + } +} + +impl PathWatcherExt for PathWatcher { + fn selected(&mut self) -> PathInfo { + let p = self.get(); + p.iter() + .find(|p| p.is_selected()) + .cloned() + .expect("no selected path") + } + + async fn wait_selected( + &mut self, + timeout: Duration, + f: impl Fn(&PathInfo) -> bool, + ) -> Result { + tokio::time::timeout(timeout, async { + loop { + let selected = self.selected(); + if f(&selected) { + return n0_error::Ok(selected); + } + self.updated().await?; + } + }) + .await + .anyerr()? + } +} + +pub async fn ping_open(conn: &Connection, timeout: Duration) -> Result { + tokio::time::timeout(timeout, async { + let data: [u8; 8] = rand::random(); + let (mut send, mut recv) = conn.open_bi().await.anyerr()?; + send.write_all(&data).await.anyerr()?; + send.finish().anyerr()?; + let r = recv.read_to_end(8).await.anyerr()?; + ensure_any!(r == data, "reply matches"); + Ok(()) + }) + .await + .anyerr()? +} + +pub async fn ping_accept(conn: &Connection, timeout: Duration) -> Result { + tokio::time::timeout(timeout, async { + let (mut send, mut recv) = conn.accept_bi().await.anyerr()?; + let data = recv.read_to_end(8).await.anyerr()?; + send.write_all(&data).await.anyerr()?; + send.finish().anyerr()?; + Ok(()) + }) + .await + .anyerr()? +} + +fn watch_selected_path(conn: &Connection) { + let mut watcher = conn.paths(); + tokio::spawn( + async move { + let mut prev = None; + loop { + let paths = watcher.get(); + let selected = paths.iter().find(|p| p.is_selected()).unwrap(); + if Some(selected) != prev.as_ref() { + debug!( + "selected path: [{}] {:?} rtt {:?}", + selected.id(), + selected.remote_addr(), + selected.rtt().unwrap() + ); + prev = Some(selected.clone()); + } + if watcher.updated().await.is_err() { + break; + } + } + } + .instrument(tracing::Span::current()), + ); +} + +fn endpoint_builder(device: &Device, relay_map: RelayMap) -> iroh::endpoint::Builder { + #[allow(unused_mut)] + let mut builder = Endpoint::empty_builder(RelayMode::Custom(relay_map)) + .ca_roots_config(CaRootsConfig::insecure_skip_verify()) + .alpns(vec![TEST_ALPN.to_vec()]); + + #[cfg(not(feature = "qlog"))] + let _ = device; + + #[cfg(feature = "qlog")] + { + if let Some(path) = device.filepath("qlog") { + let prefix = path.file_name().unwrap().to_str().unwrap(); + let directory = path.parent().unwrap(); + let transport_config = iroh::endpoint::QuicTransportConfig::builder() + .qlog_from_path(directory, prefix) + .build(); + builder = builder.transport_config(transport_config); + } + } + + builder +} + +fn addr_relay_only(addr: EndpointAddr) -> EndpointAddr { + EndpointAddr::from_parts(addr.id, addr.addrs.into_iter().filter(|a| a.is_relay())) +} + +mod relay { + use std::net::{IpAddr, Ipv6Addr}; + + use iroh_base::RelayUrl; + use iroh_relay::{ + RelayConfig, RelayMap, RelayQuicConfig, + server::{ + AccessConfig, CertConfig, QuicConfig, RelayConfig as RelayServerConfig, Server, + ServerConfig, SpawnError, TlsConfig, + }, + }; + + /// Spawn a relay server bound on `[::]` that accepts both IPv4 and IPv6. + /// Uses `https://relay.test` as the URL — callers must set up lab-wide DNS + /// entries for `relay.test` pointing to the relay's v4 and v6 addresses. + pub async fn run_relay_server() -> Result<(RelayMap, Server), SpawnError> { + let bind_ip: IpAddr = Ipv6Addr::UNSPECIFIED.into(); + + let (certs, server_config) = + iroh_relay::server::testing::self_signed_tls_certs_and_config(); + + let tls = TlsConfig { + cert: CertConfig::<(), ()>::Manual { certs }, + https_bind_addr: (bind_ip, 443).into(), + quic_bind_addr: (bind_ip, 7842).into(), + server_config, + }; + let quic = Some(QuicConfig { + server_config: tls.server_config.clone(), + bind_addr: tls.quic_bind_addr, + }); + let config = ServerConfig { + relay: Some(RelayServerConfig { + http_bind_addr: (bind_ip, 80).into(), + tls: Some(tls), + limits: Default::default(), + key_cache_capacity: Some(1024), + access: AccessConfig::Everyone, + }), + quic, + ..Default::default() + }; + let server = Server::spawn(config).await?; + + let url: RelayUrl = "https://relay.test".parse().expect("valid relay url"); + let quic = server + .quic_addr() + .map(|addr| RelayQuicConfig { port: addr.port() }); + let relay_map: RelayMap = RelayConfig { url, quic }.into(); + + Ok((relay_map, server)) + } +} From 722f174f27fabed2e2d6fc12c8c27decbaf62e10 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 24 Mar 2026 13:26:29 +0100 Subject: [PATCH 02/35] tests: improve patchbay test error reporting --- iroh/tests/patchbay.rs | 65 +++++++++++++++++++------------------ iroh/tests/patchbay/util.rs | 36 ++++++++++++++++++-- 2 files changed, 68 insertions(+), 33 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 2ca187c4e4..05f697369e 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -936,20 +936,22 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - let timeout = Duration::from_secs(15); + let timeout = Duration::from_secs(10); let mut last_pass = 0; - for (level, limits) in DEGRADE_LEVELS.iter().enumerate() { + for (impairment_level, limits) in DEGRADE_LEVELS.iter().enumerate() { let impaired = Some(LinkCondition::Manual(*limits)); let (server_cond, client_cond) = if impaired_is_server { (impaired, None) } else { (None, impaired) }; - let server_name = format!("{level}-server"); - let client_name = format!("{level}-client"); - debug!( - level, + let server_name = format!("server-{impairment_level}"); + let client_name = format!("client-{impairment_level}"); + tracing::event!( + target: "test::_events::ladder_start", + tracing::Level::INFO, + impairment_level, latency_ms = limits.latency_ms, loss_pct = limits.loss_pct, reorder_pct = limits.reorder_pct, @@ -977,9 +979,7 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar }, async move |_dev, _ep, conn| { let mut paths = conn.paths(); - if paths.wait_ip(timeout).await.is_err() { - n0_error::bail_any!("holepunch_timeout"); - } + paths.wait_ip(timeout).await?; ping_open(&conn, timeout).await?; Ok(()) }, @@ -989,32 +989,35 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar lab.remove_device(server_id)?; lab.remove_device(client_id)?; - let ok = match result { - Ok(()) => true, - Err(e) if e.to_string().contains("holepunch_timeout") => false, - Err(e) => return Err(e), - }; + let ok = result.is_ok(); - if ok { - info!( - level, - latency_ms = limits.latency_ms, - loss_pct = limits.loss_pct, - reorder_pct = limits.reorder_pct, - "PASSED", - ); - } else { - warn!( - level, - latency_ms = limits.latency_ms, - loss_pct = limits.loss_pct, - reorder_pct = limits.reorder_pct, - "FAILED", - ); + match result.as_ref() { + Ok(()) => { + tracing::event!( + target: "test::_events::ladder_pass", + tracing::Level::INFO, + impairment_level, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + "PASSED", + ); + } + Err(err) => { + tracing::event!( + target: "test::_events::ladder_fail", + tracing::Level::WARN, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + error = format!("{err:#}"), + "FAILED", + ); + } } if ok { - last_pass = level + 1; + last_pass = impairment_level + 1; } else { break; } diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index 402965a0a7..c8d1ad9b88 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -119,8 +119,40 @@ impl Pair { } .instrument(error_span!("ep-cnct")) })?; - task2.await.anyerr()??; - task1.await.anyerr()??; + + let (res1, res2) = tokio::join!(task1, task2); + + let res1: Result<()> = res1 + .std_context("device1 panicked") + .map(|res| res.context("device1 failed")) + .flatten(); + let res2: Result<()> = res2 + .std_context("device2 panicked") + .map(|res| res.context("device2 failed")) + .flatten(); + + if let Err(err) = res1.as_ref() { + self.dev1.run_sync(|| { + tracing::event!( + target: "test::_event::failed", + tracing::Level::ERROR, + error: format!("{err:#}"), + ); + Ok(()) + }); + } + if let Err(err) = res2.as_ref() { + self.dev2.run_sync(|| { + tracing::event!( + target: "test::_event::failed", + tracing::Level::ERROR, + error: format!("{err:#}"), + ); + Ok(()) + }); + } + res1?; + res2?; Ok(()) } } From 141cce54e299e9e02c08fb5e727bad7d0b615a45 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 24 Mar 2026 14:29:50 +0100 Subject: [PATCH 03/35] improve patchbay test error reporting --- .github/workflows/patchbay.yml | 1 - Cargo.lock | 10 +++--- iroh/tests/patchbay.rs | 10 +++--- iroh/tests/patchbay/util.rs | 65 ++++++++++++++++++---------------- 4 files changed, 44 insertions(+), 42 deletions(-) diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index 2d0c6a09e8..2d99746f37 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -5,7 +5,6 @@ on: push: branches: - main - - Frando/netsim concurrency: group: patchbay-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/Cargo.lock b/Cargo.lock index d1581ac62a..695e1b889e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2115,7 +2115,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -3238,7 +3238,7 @@ dependencies = [ "pin-project-lite", "rustc-hash", "rustls", - "socket2 0.6.3", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tokio-stream", @@ -3282,7 +3282,7 @@ source = "git+https://github.com/n0-computer/noq?branch=main#b212bbcaccaa82089cc dependencies = [ "cfg_aliases", "libc", - "socket2 0.6.3", + "socket2 0.5.10", "tracing", "windows-sys 0.61.2", ] @@ -3942,7 +3942,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.6.3", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tracing", @@ -3979,7 +3979,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2 0.5.10", "tracing", "windows-sys 0.60.2", ] diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 05f697369e..8c1bc3ffad 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -31,7 +31,7 @@ use n0_error::{Result, StackResultExt}; use n0_tracing_test::traced_test; use patchbay::{Firewall, LinkCondition, LinkLimits, Nat, RouterPreset, TestGuard}; use testdir::testdir; -use tracing::{debug, info, warn}; +use tracing::info; use self::util::{Pair, PathWatcherExt, lab_with_relay, ping_accept, ping_open}; @@ -946,8 +946,8 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar } else { (None, impaired) }; - let server_name = format!("server-{impairment_level}"); - let client_name = format!("client-{impairment_level}"); + let server_name = format!("{impairment_level}-server"); + let client_name = format!("{impairment_level}-client"); tracing::event!( target: "test::_events::ladder_start", tracing::Level::INFO, @@ -994,7 +994,7 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar match result.as_ref() { Ok(()) => { tracing::event!( - target: "test::_events::ladder_pass", + target: "iroh::_events::test_ladder_pass", tracing::Level::INFO, impairment_level, latency_ms = limits.latency_ms, @@ -1005,7 +1005,7 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar } Err(err) => { tracing::event!( - target: "test::_events::ladder_fail", + target: "iroh::_events::test_ladder_fail", tracing::Level::WARN, latency_ms = limits.latency_ms, loss_pct = limits.loss_pct, diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index c8d1ad9b88..cb7ffd8857 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -2,10 +2,10 @@ use std::{future::Future, path::PathBuf, time::Duration}; use iroh::{ Endpoint, EndpointAddr, RelayMap, RelayMode, Watcher, - endpoint::{Connection, PathInfo, PathWatcher}, + endpoint::{Connection, PathInfo, PathWatcher, presets}, tls::CaRootsConfig, }; -use n0_error::{Result, StdResultExt, ensure_any}; +use n0_error::{Result, StdResultExt, anyerr, ensure_any}; use n0_future::task::AbortOnDropHandle; use patchbay::{Device, IpSupport, Lab, LabOpts, OutDir, TestGuard}; use tokio::sync::oneshot; @@ -122,35 +122,37 @@ impl Pair { let (res1, res2) = tokio::join!(task1, task2); - let res1: Result<()> = res1 - .std_context("device1 panicked") - .map(|res| res.context("device1 failed")) - .flatten(); - let res2: Result<()> = res2 - .std_context("device2 panicked") - .map(|res| res.context("device2 failed")) - .flatten(); - - if let Err(err) = res1.as_ref() { - self.dev1.run_sync(|| { - tracing::event!( - target: "test::_event::failed", - tracing::Level::ERROR, - error: format!("{err:#}"), - ); - Ok(()) - }); - } - if let Err(err) = res2.as_ref() { - self.dev2.run_sync(|| { - tracing::event!( - target: "test::_event::failed", - tracing::Level::ERROR, - error: format!("{err:#}"), - ); + // Map the results to include the device name, and emit a tracing event within the device context. + let [res1, res2] = [(&self.dev1, res1), (&self.dev2, res2)].map(|(dev, res)| { + let res = match res { + Err(err) => Err(anyerr!(err, "device {} panicked", dev.name())), + Ok(Err(err)) => Err(anyerr!(err, "device {} failed", dev.name())), + Ok(Ok(())) => Ok(()), + }; + let res_str = res.as_ref().map_err(|err| format!("{err:#}")).cloned(); + dev.run_sync(move || { + match res_str { + Ok(()) => { + tracing::event!( + target: "iroh::_events::test_ok", + tracing::Level::INFO, + msg = "device ok" + ); + } + Err(error) => { + tracing::event!( + target: "iroh::_events::test_failed", + tracing::Level::ERROR, + error, + msg = "device failed" + ); + } + } Ok(()) - }); - } + }) + .ok(); + res + }); res1?; res2?; Ok(()) @@ -272,7 +274,8 @@ fn watch_selected_path(conn: &Connection) { fn endpoint_builder(device: &Device, relay_map: RelayMap) -> iroh::endpoint::Builder { #[allow(unused_mut)] - let mut builder = Endpoint::empty_builder(RelayMode::Custom(relay_map)) + let mut builder = Endpoint::builder(presets::Minimal) + .relay_mode(RelayMode::Custom(relay_map)) .ca_roots_config(CaRootsConfig::insecure_skip_verify()) .alpns(vec![TEST_ALPN.to_vec()]); From c5d592881ff64ccc251e7c552821c4c3a38b9368 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 24 Mar 2026 15:45:03 +0100 Subject: [PATCH 04/35] fixup timeouts --- .github/workflows/patchbay.yml | 2 +- iroh/tests/patchbay.rs | 44 +++++++++++++--------------------- iroh/tests/patchbay/util.rs | 4 ++-- 3 files changed, 20 insertions(+), 30 deletions(-) diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index 2d99746f37..b6aa88fe9e 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -34,7 +34,7 @@ jobs: - name: Run patchbay tests id: tests - run: cargo test --release -p iroh --test patchbay -- --test-threads=1 + run: cargo test --release -p iroh --test patchbay -- --test-threads=1 --nocapture env: RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 8c1bc3ffad..142234fc02 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -27,7 +27,7 @@ use std::time::Duration; use iroh::TransportAddr; -use n0_error::{Result, StackResultExt}; +use n0_error::{Result, StackResultExt, StdResultExt}; use n0_tracing_test::traced_test; use patchbay::{Firewall, LinkCondition, LinkLimits, Nat, RouterPreset, TestGuard}; use testdir::testdir; @@ -854,8 +854,8 @@ async fn holepunch_asymmetric_links() -> Result { /// twice: once with the impaired side accepting, once connecting. /// /// Bump these thresholds as iroh's holepunching improves. -const DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER: usize = 7; -const DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT: usize = 7; +const DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER: usize = 3; +const DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT: usize = 3; const DEGRADE_LEVELS: &[LinkLimits] = &[ // 0: mild — good wifi @@ -868,17 +868,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 1: moderate — mediocre 4G - LinkLimits { - latency_ms: 40, - jitter_ms: 15, - loss_pct: 1.0, - reorder_pct: 1.0, - rate_kbit: 0, - duplicate_pct: 0.0, - corrupt_pct: 0.0, - }, - // 2: poor — bad wifi or 3G + // 1: poor — bad wifi or 3G LinkLimits { latency_ms: 100, jitter_ms: 30, @@ -888,7 +878,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 3: bad — congested 3G + // 2: bad — congested 3G LinkLimits { latency_ms: 200, jitter_ms: 60, @@ -898,7 +888,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 4: terrible — barely usable + // 3: terrible — barely usable LinkLimits { latency_ms: 300, jitter_ms: 80, @@ -908,7 +898,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 5: extreme — GEO satellite with heavy loss + // 4: extreme — GEO satellite with heavy loss LinkLimits { latency_ms: 500, jitter_ms: 100, @@ -936,7 +926,7 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - let timeout = Duration::from_secs(10); + let timeout = Duration::from_secs(20); let mut last_pass = 0; for (impairment_level, limits) in DEGRADE_LEVELS.iter().enumerate() { @@ -948,9 +938,7 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar }; let server_name = format!("{impairment_level}-server"); let client_name = format!("{impairment_level}-client"); - tracing::event!( - target: "test::_events::ladder_start", - tracing::Level::INFO, + tracing::info!( impairment_level, latency_ms = limits.latency_ms, loss_pct = limits.loss_pct, @@ -971,8 +959,9 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar let server_id = server.id(); let client_id = client.id(); - let result = Pair::new(server, client, relay_map.clone()) - .run( + let result = tokio::time::timeout( + timeout * 2, + Pair::new(server, client, relay_map.clone()).run( async move |_dev, _ep, conn| { ping_accept(&conn, timeout).await?; Ok(()) @@ -983,8 +972,11 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar ping_open(&conn, timeout).await?; Ok(()) }, - ) - .await; + ), + ) + .await + .std_context("pair timed timeout") + .flatten(); lab.remove_device(server_id)?; lab.remove_device(client_id)?; @@ -1018,8 +1010,6 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar if ok { last_pass = impairment_level + 1; - } else { - break; } } Ok((last_pass, guard)) diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index cb7ffd8857..eebb4eb65e 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -136,7 +136,7 @@ impl Pair { tracing::event!( target: "iroh::_events::test_ok", tracing::Level::INFO, - msg = "device ok" + msg = %"device ok" ); } Err(error) => { @@ -144,7 +144,7 @@ impl Pair { target: "iroh::_events::test_failed", tracing::Level::ERROR, error, - msg = "device failed" + msg = %"device failed" ); } } From ff384dfb9066ee985169950a9633033593c7f24e Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 24 Mar 2026 15:55:58 +0100 Subject: [PATCH 05/35] chore: bump patchbay --- Cargo.lock | 2 +- iroh/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 695e1b889e..4a4cb66dcd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3545,7 +3545,7 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "patchbay" version = "0.1.0" -source = "git+https://github.com/n0-computer/patchbay.git?branch=feat%2Fserver-push#729bc67bbb9df8a95c0e690c4476eb32c3cd5203" +source = "git+https://github.com/n0-computer/patchbay.git?branch=main#9f41d41e3030e38eba405e7052c4566f8f7831f0" dependencies = [ "anyhow", "chrono", diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 3a0bbf64ac..69265b7333 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -144,7 +144,7 @@ wasm-bindgen-test = "0.3.62" # patchbay netsim test dependencies (linux only) [target.'cfg(target_os = "linux")'.dev-dependencies] ctor = "0.6" -patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "feat/server-push" } +patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "main" } testdir = "0.9" [build-dependencies] From 42c600149b11d35aab102bdd72ac2fa1b1aa0584 Mon Sep 17 00:00:00 2001 From: Frando Date: Thu, 26 Mar 2026 13:27:51 +0100 Subject: [PATCH 06/35] update patchbay, enable parallelism, export metrics --- .gitignore | 1 + Cargo.lock | 3 ++- iroh/Cargo.toml | 2 +- iroh/tests/patchbay.rs | 19 ------------------- iroh/tests/patchbay/util.rs | 11 +++++++++-- 5 files changed, 13 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 1594301ead..e7b9572c2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /logs iroh.config.toml +/.patchbay diff --git a/Cargo.lock b/Cargo.lock index 4a4cb66dcd..ae14c90248 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3545,13 +3545,14 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "patchbay" version = "0.1.0" -source = "git+https://github.com/n0-computer/patchbay.git?branch=main#9f41d41e3030e38eba405e7052c4566f8f7831f0" +source = "git+https://github.com/n0-computer/patchbay.git?branch=feat%2Fcompare#8b855e710eb517bfcd05ad4054b10369ef661b7b" dependencies = [ "anyhow", "chrono", "derive_more", "futures", "ipnet", + "iroh-metrics", "libc", "nix", "rtnetlink", diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 69265b7333..3fbfdf6cb0 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -144,7 +144,7 @@ wasm-bindgen-test = "0.3.62" # patchbay netsim test dependencies (linux only) [target.'cfg(target_os = "linux")'.dev-dependencies] ctor = "0.6" -patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "main" } +patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "feat/compare", features = ["iroh-metrics"] } testdir = "0.9" [build-dependencies] diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 142234fc02..afb8d97830 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -54,7 +54,6 @@ fn userns_ctor() { /// establish via relay, upgrade to direct. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn holepunch_simple() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -87,7 +86,6 @@ async fn holepunch_simple() -> Result { /// The test currently fails, but should pass. #[tokio::test] #[traced_test] -#[serial_test::serial] #[ignore = "known to still fail"] async fn switch_uplink() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; @@ -160,7 +158,6 @@ async fn switch_uplink() -> Result { /// The test currently fails, but should pass. #[tokio::test] #[traced_test] -#[serial_test::serial] #[ignore = "known to still fail"] async fn switch_uplink_ipv6() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; @@ -252,7 +249,6 @@ async fn switch_uplink_ipv6() -> Result { /// Verify we switch to the LAN connection. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn change_ifaces() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -326,7 +322,6 @@ async fn change_ifaces() -> Result { /// the NATted peer's mapped port once it learns the address via relay. #[tokio::test] #[traced_test] -#[serial_test::serial] #[ignore = "stays relayed, holepunch times out (deadline elapsed)"] async fn holepunch_home_nat_one_side() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; @@ -357,7 +352,6 @@ async fn holepunch_home_nat_one_side() -> Result { /// Holepunching should succeed easily since filtering is endpoint-independent. #[tokio::test] #[traced_test] -#[serial_test::serial] #[ignore = "stays relayed, holepunch times out (deadline elapsed)"] async fn holepunch_cgnat_both() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; @@ -392,7 +386,6 @@ async fn holepunch_cgnat_both() -> Result { /// always succeeds on the first try. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn holepunch_full_cone_both() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::FullCone).build().await?; @@ -426,7 +419,6 @@ async fn holepunch_full_cone_both() -> Result { /// must stay on the relay. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn symmetric_nat_stays_relayed() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Corporate).build().await?; @@ -465,7 +457,6 @@ async fn symmetric_nat_stays_relayed() -> Result { /// port. Connection stays relayed. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn mixed_home_vs_symmetric_stays_relayed() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let home = lab.add_router("home").nat(Nat::Home).build().await?; @@ -501,7 +492,6 @@ async fn mixed_home_vs_symmetric_stays_relayed() -> Result { /// is impossible, connection stays relayed. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn cloud_nat_stays_relayed() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::CloudNat).build().await?; @@ -538,7 +528,6 @@ async fn cloud_nat_stays_relayed() -> Result { /// holepunching should succeed. #[tokio::test] #[traced_test] -#[serial_test::serial] #[ignore = "stays relayed, holepunch times out (deadline elapsed)"] async fn holepunch_double_nat() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; @@ -591,7 +580,6 @@ async fn holepunch_double_nat() -> Result { /// via HTTPS (TCP 443) must still work. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn corporate_firewall_relay_only() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let fw = lab @@ -631,7 +619,6 @@ async fn corporate_firewall_relay_only() -> Result { /// the poor link quality. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn holepunch_mobile_3g() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -673,7 +660,6 @@ async fn holepunch_mobile_3g() -> Result { /// during NAT traversal. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn holepunch_satellite() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -716,7 +702,6 @@ async fn holepunch_satellite() -> Result { /// path after recovery. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn link_outage_recovery() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -768,7 +753,6 @@ async fn link_outage_recovery() -> Result { /// Relay via HTTPS should work, holepunching should not. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn hotel_wifi_relay_only() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let hotel = lab @@ -808,7 +792,6 @@ async fn hotel_wifi_relay_only() -> Result { /// the direct path despite the asymmetric quality. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn holepunch_asymmetric_links() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -1018,7 +1001,6 @@ async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuar /// Impaired side is the accepting (server) peer. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn degrade_ladder_impaired_server() -> Result { let (passed, guard) = run_degrade_ladder(true).await?; assert!( @@ -1033,7 +1015,6 @@ async fn degrade_ladder_impaired_server() -> Result { /// Impaired side is the connecting (client) peer. #[tokio::test] #[traced_test] -#[serial_test::serial] async fn degrade_ladder_impaired_client() -> Result { let (passed, guard) = run_degrade_ladder(false).await?; assert!( diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index eebb4eb65e..c236d7c9f8 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -5,6 +5,7 @@ use iroh::{ endpoint::{Connection, PathInfo, PathWatcher, presets}, tls::CaRootsConfig, }; +use iroh_metrics::MetricsGroupSet; use n0_error::{Result, StdResultExt, anyerr, ensure_any}; use n0_future::task::AbortOnDropHandle; use patchbay::{Device, IpSupport, Lab, LabOpts, OutDir, TestGuard}; @@ -100,9 +101,12 @@ impl Pair { addr_tx.send(addr_relay_only(endpoint.addr())).unwrap(); let conn = endpoint.accept().await.unwrap().accept().anyerr()?.await?; watch_selected_path(&conn); - peer1(dev, endpoint.clone(), conn.clone()).await?; + peer1(dev.clone(), endpoint.clone(), conn.clone()).await?; conn.closed().await; endpoint.close().await; + for group in endpoint.metrics().groups() { + dev.record_iroh_metrics(group); + } n0_error::Ok(()) } .instrument(error_span!("ep-acpt")) @@ -113,8 +117,11 @@ impl Pair { let addr = addr_rx.await.unwrap(); let conn = endpoint.connect(addr, TEST_ALPN).await?; watch_selected_path(&conn); - peer2(dev, endpoint.clone(), conn).await?; + peer2(dev.clone(), endpoint.clone(), conn).await?; endpoint.close().await; + for group in endpoint.metrics().groups() { + dev.record_iroh_metrics(group); + } n0_error::Ok(()) } .instrument(error_span!("ep-cnct")) From 26f4b835cb0be4424c6516e4084c4606c1fac6e1 Mon Sep 17 00:00:00 2001 From: Frando Date: Thu, 26 Mar 2026 13:40:02 +0100 Subject: [PATCH 07/35] split out degradation tests into individual tests --- iroh/tests/patchbay.rs | 196 ++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 112 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index afb8d97830..c216ea6481 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -836,10 +836,6 @@ async fn holepunch_asymmetric_links() -> Result { /// Each level adds more latency, loss, and reordering. The test runs each level /// twice: once with the impaired side accepting, once connecting. /// -/// Bump these thresholds as iroh's holepunching improves. -const DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER: usize = 3; -const DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT: usize = 3; - const DEGRADE_LEVELS: &[LinkLimits] = &[ // 0: mild — good wifi LinkLimits { @@ -903,125 +899,101 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ }, ]; -/// Run the degradation ladder: iterate through levels, creating fresh devices -/// each round but reusing the lab and relay. Returns the number of levels passed. -async fn run_degrade_ladder(impaired_is_server: bool) -> Result<(usize, TestGuard)> { +/// Run a single degradation level: create devices with the given impairment, +/// try to holepunch and ping, return Ok if successful. +async fn run_degrade_level(impaired_is_server: bool, level: usize) -> Result { + let limits = DEGRADE_LEVELS[level]; let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; let timeout = Duration::from_secs(20); - let mut last_pass = 0; - for (impairment_level, limits) in DEGRADE_LEVELS.iter().enumerate() { - let impaired = Some(LinkCondition::Manual(*limits)); - let (server_cond, client_cond) = if impaired_is_server { - (impaired, None) - } else { - (None, impaired) - }; - let server_name = format!("{impairment_level}-server"); - let client_name = format!("{impairment_level}-client"); - tracing::info!( - impairment_level, + let impaired = Some(LinkCondition::Manual(limits)); + let (server_cond, client_cond) = if impaired_is_server { + (impaired, None) + } else { + (None, impaired) + }; + let server = lab + .add_device("server") + .iface("eth0", nat1.id(), server_cond) + .build() + .await?; + let client = lab + .add_device("client") + .iface("eth0", nat2.id(), client_cond) + .build() + .await?; + + let result = tokio::time::timeout( + timeout * 2, + Pair::new(server, client, relay_map).run( + async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }, + async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths.wait_ip(timeout).await?; + ping_open(&conn, timeout).await?; + Ok(()) + }, + ), + ) + .await + .std_context("pair timed out") + .flatten(); + + match &result { + Ok(()) => tracing::event!( + target: "iroh::_events::test_ladder_pass", + tracing::Level::INFO, + level, latency_ms = limits.latency_ms, loss_pct = limits.loss_pct, reorder_pct = limits.reorder_pct, impaired_is_server, - "starting level", - ); - let server = lab - .add_device(&server_name) - .iface("eth0", nat1.id(), server_cond) - .build() - .await?; - let client = lab - .add_device(&client_name) - .iface("eth0", nat2.id(), client_cond) - .build() - .await?; - - let server_id = server.id(); - let client_id = client.id(); - let result = tokio::time::timeout( - timeout * 2, - Pair::new(server, client, relay_map.clone()).run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths.wait_ip(timeout).await?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ), - ) - .await - .std_context("pair timed timeout") - .flatten(); - - lab.remove_device(server_id)?; - lab.remove_device(client_id)?; - - let ok = result.is_ok(); - - match result.as_ref() { - Ok(()) => { - tracing::event!( - target: "iroh::_events::test_ladder_pass", - tracing::Level::INFO, - impairment_level, - latency_ms = limits.latency_ms, - loss_pct = limits.loss_pct, - reorder_pct = limits.reorder_pct, - "PASSED", - ); - } - Err(err) => { - tracing::event!( - target: "iroh::_events::test_ladder_fail", - tracing::Level::WARN, - latency_ms = limits.latency_ms, - loss_pct = limits.loss_pct, - reorder_pct = limits.reorder_pct, - error = format!("{err:#}"), - "FAILED", - ); - } - } - - if ok { - last_pass = impairment_level + 1; - } + "PASSED", + ), + Err(err) => tracing::event!( + target: "iroh::_events::test_ladder_fail", + tracing::Level::WARN, + level, + latency_ms = limits.latency_ms, + loss_pct = limits.loss_pct, + reorder_pct = limits.reorder_pct, + impaired_is_server, + error = format!("{err:#}"), + "FAILED", + ), } - Ok((last_pass, guard)) -} -/// Impaired side is the accepting (server) peer. -#[tokio::test] -#[traced_test] -async fn degrade_ladder_impaired_server() -> Result { - let (passed, guard) = run_degrade_ladder(true).await?; - assert!( - passed >= DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER, - "holepunch should pass at least {DEGRADE_PASS_THRESHOLD_IMPAIRED_SERVER} levels \ - with impaired server, but only passed {passed}" - ); - guard.ok(); - Ok(()) + result?; + Ok(guard) } -/// Impaired side is the connecting (client) peer. -#[tokio::test] -#[traced_test] -async fn degrade_ladder_impaired_client() -> Result { - let (passed, guard) = run_degrade_ladder(false).await?; - assert!( - passed >= DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT, - "holepunch should pass at least {DEGRADE_PASS_THRESHOLD_IMPAIRED_CLIENT} levels \ - with impaired client, but only passed {passed}" - ); - guard.ok(); - Ok(()) +macro_rules! degrade_test { + ($name:ident, $impaired_is_server:expr, $level:expr) => { + #[tokio::test] + #[traced_test] + async fn $name() -> Result { + let guard = run_degrade_level($impaired_is_server, $level).await?; + guard.ok(); + Ok(()) + } + }; } + +degrade_test!(degrade_server_0_mild, true, 0); +degrade_test!(degrade_server_1_poor, true, 1); +degrade_test!(degrade_server_2_bad, true, 2); +degrade_test!(degrade_server_3_terrible, true, 3); +degrade_test!(degrade_server_4_extreme, true, 4); +degrade_test!(degrade_server_5_absurd, true, 5); + +degrade_test!(degrade_client_0_mild, false, 0); +degrade_test!(degrade_client_1_poor, false, 1); +degrade_test!(degrade_client_2_bad, false, 2); +degrade_test!(degrade_client_3_terrible, false, 3); +degrade_test!(degrade_client_4_extreme, false, 4); +degrade_test!(degrade_client_5_absurd, false, 5); From e9b2dcb826254ed15128cbad1be89119fb62ef70 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 12:52:58 +0200 Subject: [PATCH 08/35] fix: improve CI workflow --- .config/nextest.toml | 9 +- .github/workflows/patchbay.yml | 108 ++++++++++---------- .github/workflows/tests.yaml | 2 +- Cargo.lock | 176 +++++++++++---------------------- Makefile.toml | 7 ++ iroh/Cargo.toml | 4 +- 6 files changed, 128 insertions(+), 178 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index a549d4068f..f1c1bbdca2 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,5 +1,5 @@ [test-groups] -run-in-isolation = { max-threads = 32 } +run-in-isolation = { max-threads = 32 } # these are tests that must not run with other tests concurrently. All tests in # this group can take up at most 32 threads among them, but each one requiring # 16 threads also. The effect should be that tests run isolated. @@ -16,3 +16,10 @@ threads-required = 32 [profile.default] slow-timeout = { period = "10s", terminate-after = 3 } + +[profile.patchbay] +fail-fast = false +retries = 1 +test-threads = 4 +slow-timeout = { period = "20s", terminate-after = 3 } +default-filter = 'binary(patchbay)' diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index b6aa88fe9e..abc45b8413 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -12,13 +12,16 @@ concurrency: env: RUST_BACKTRACE: 1 - RUSTFLAGS: "-Dwarnings --cfg patchbay_tests" SCCACHE_CACHE_SIZE: "10G" IROH_FORCE_STAGING_RELAYS: "1" + NEXTEST_VERSION: "0.9.132" jobs: patchbay_tests: name: Patchbay Tests + permissions: + contents: read + pull-requests: write timeout-minutes: 45 runs-on: [self-hosted, linux, X64] env: @@ -32,89 +35,78 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: mozilla-actions/sccache-action@v0.0.9 + - name: Install cargo-nextest + uses: taiki-e/install-action@v2 + with: + tool: nextest@${{ env.NEXTEST_VERSION }} + - name: Run patchbay tests id: tests - run: cargo test --release -p iroh --test patchbay -- --test-threads=1 --nocapture + run: cargo nextest run --profile patchbay --release --message-format libtest-json-plus > test-results.jsonl env: RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} + NEXTEST_EXPERIMENTAL_LIBTEST_JSON: "1" + RUSTFLAGS: "--cfg patchbay_tests" - # ── Push results to patchbay-serve ── - name: Push results if: always() env: PATCHBAY_URL: https://frando.gateway.lol PATCHBAY_API_KEY: ${{ secrets.PATCHBAY_API_KEY }} - TEST_STATUS: ${{ steps.tests.outcome }} run: | set -euo pipefail - PROJECT="${{ github.event.repository.name }}" TESTDIR="$(cargo metadata --format-version=1 --no-deps | jq -r .target_directory)/testdir-current" [ ! -d "$TESTDIR" ] && echo "No testdir output, skipping" && exit 0 - cat > "$TESTDIR/run.json" < "$TESTDIR/run.json" RESPONSE=$(tar -czf - -C "$TESTDIR" . | \ curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: Bearer $PATCHBAY_API_KEY" \ -H "Content-Type: application/gzip" \ - --data-binary @- "$PATCHBAY_URL/api/push/$PROJECT") + --data-binary @- "$PATCHBAY_URL/api/push/${{ github.event.repository.name }}") HTTP_CODE=$(echo "$RESPONSE" | tail -1) BODY=$(echo "$RESPONSE" | head -n -1) [ "$HTTP_CODE" != "200" ] && echo "Push failed ($HTTP_CODE): $BODY" && exit 1 - INVOCATION=$(echo "$BODY" | jq -r .invocation) - echo "PATCHBAY_VIEW_URL=$PATCHBAY_URL/#/inv/$INVOCATION" >> "$GITHUB_ENV" - echo "PATCHBAY_TEST_STATUS=$TEST_STATUS" >> "$GITHUB_ENV" - echo "Results: $PATCHBAY_URL/#/inv/$INVOCATION" + VIEW_URL=$(echo "$BODY" | jq -r '.view_url // empty') + echo "PATCHBAY_VIEW_URL=$VIEW_URL" >> "$GITHUB_ENV" - # ── Post or update PR comment ── - name: Comment on PR if: always() && env.PATCHBAY_VIEW_URL - uses: actions/github-script@v7 - with: - script: | - let prNumber = context.issue?.number; - if (!prNumber) { - const { data: prs } = await github.rest.pulls.list({ - owner: context.repo.owner, repo: context.repo.repo, - head: `${context.repo.owner}:${{ github.ref_name }}`, - state: 'open', - }); - if (!prs.length) return; - prNumber = prs[0].number; - } + env: + GH_TOKEN: ${{ github.token }} + GH_REPO: ${{ github.repository }} + run: | + set -euo pipefail + SHA="${{ github.sha }}" + STATUS="${{ steps.tests.outcome }}" + ICON=$([[ "$STATUS" == "success" ]] && echo "✅" || echo "❌") + MARKER="" + printf -v BODY '%s\n%s\n%s' \ + "$MARKER" \ + "$ICON **patchbay:** $STATUS | $PATCHBAY_VIEW_URL" \ + "$(date -u '+%Y-%m-%d %H:%M:%S UTC') · [\`${SHA:0:7}\`](${{ github.server_url }}/$GH_REPO/commit/$SHA)" - const status = process.env.PATCHBAY_TEST_STATUS; - const icon = status === 'success' ? '✅' : '❌'; - const sha = '${{ github.sha }}'; - const shortSha = sha.slice(0, 7); - const commitUrl = `${{ github.server_url }}/${{ github.repository }}/commit/${sha}`; - const date = new Date().toISOString().replace('T', ' ').slice(0, 19) + ' UTC'; - const marker = ''; - const body = [ - marker, - `${icon} **patchbay:** ${status} | ${process.env.PATCHBAY_VIEW_URL}`, - `${date} · [\`${shortSha}\`](${commitUrl})`, - ].join('\n'); + PR_NUMBER="${{ github.event.pull_request.number }}" + if [ -z "$PR_NUMBER" ]; then + PR_NUMBER=$(gh pr list --head "${{ github.ref_name }}" --state open --json number -q '.[0].number') + fi + [ -z "$PR_NUMBER" ] && echo "No PR found, skipping comment" && exit 0 - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, - }); - const existing = comments.find(c => c.body.includes(marker)); - const params = { owner: context.repo.owner, repo: context.repo.repo }; - if (existing) { - await github.rest.issues.updateComment({ ...params, comment_id: existing.id, body }); - } else { - await github.rest.issues.createComment({ ...params, issue_number: prNumber, body }); - } + EXISTING=$(gh api --paginate "repos/$GH_REPO/issues/$PR_NUMBER/comments" --jq ".[] | select(.body | contains(\"$MARKER\")) | .id" | head -1) + if [ -n "$EXISTING" ]; then + gh api "repos/$GH_REPO/issues/comments/$EXISTING" -X PATCH -f body="$BODY" + else + gh pr comment "$PR_NUMBER" --body "$BODY" + fi diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 0b77611b1c..449bd5ca3a 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -25,7 +25,7 @@ env: SCCACHE_CACHE_SIZE: "10G" CRATES_LIST: "iroh,iroh-bench,iroh-dns-server,iroh-relay" IROH_FORCE_STAGING_RELAYS: "1" - NEXTEST_VERSION: "0.9.80" + NEXTEST_VERSION: "0.9.132" jobs: build_and_test_nix: diff --git a/Cargo.lock b/Cargo.lock index ae14c90248..fc44ae6fad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,24 +537,26 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.8" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" +checksum = "87a0c0e6148f11f01f32650a2ea02d532b2ad4e81d8bd41e6e565b5adc5e6082" dependencies = [ "serde", + "serde_core", ] [[package]] name = "cargo_metadata" -version = "0.14.2" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" dependencies = [ "camino", "cargo-platform", "semver", "serde", "serde_json", + "thiserror 2.0.18", ] [[package]] @@ -1494,6 +1496,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fslock" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "futures" version = "0.3.32" @@ -1648,7 +1660,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2764,10 +2776,7 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ - "bitflags", "libc", - "plain", - "redox_syscall 0.7.3", ] [[package]] @@ -2924,7 +2933,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -3414,6 +3423,16 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + [[package]] name = "objc2-security" version = "0.3.2" @@ -3525,7 +3544,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.18", + "redox_syscall", "smallvec", "windows-link", ] @@ -3545,7 +3564,7 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "patchbay" version = "0.1.0" -source = "git+https://github.com/n0-computer/patchbay.git?branch=feat%2Fcompare#8b855e710eb517bfcd05ad4054b10369ef661b7b" +source = "git+https://github.com/n0-computer/patchbay.git?branch=main#52890e15fbf55bd462a7e1317df5a672e1c61059" dependencies = [ "anyhow", "chrono", @@ -3676,12 +3695,6 @@ dependencies = [ "spki", ] -[[package]] -name = "plain" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" - [[package]] name = "plist" version = "1.8.0" @@ -3910,7 +3923,7 @@ dependencies = [ "libc", "once_cell", "raw-cpuid", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "web-sys", "winapi", ] @@ -3982,7 +3995,7 @@ dependencies = [ "once_cell", "socket2 0.5.10", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -4132,15 +4145,6 @@ dependencies = [ "bitflags", ] -[[package]] -name = "redox_syscall" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" -dependencies = [ - "bitflags", -] - [[package]] name = "redox_users" version = "0.4.6" @@ -5064,16 +5068,16 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.26.9" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c18a6156d1f27a9592ee18c1a846ca8dd5c258b7179fc193ae87c74ebb666f5" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" dependencies = [ - "cfg-if", - "core-foundation-sys", "libc", + "memchr", "ntapi", - "once_cell", - "winapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows", ] [[package]] @@ -5118,15 +5122,15 @@ dependencies = [ [[package]] name = "testdir" -version = "0.9.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9ffa013be124f7e8e648876190de818e3a87088ed97ccd414a398b403aec8c8" +checksum = "4d53c48916d4a8bb476f45e3699d9d904477dcd3569117d446f1b870d1e5a576" dependencies = [ "anyhow", "backtrace", "cargo-platform", "cargo_metadata", - "once_cell", + "fslock", "sysinfo", "whoami", ] @@ -5759,6 +5763,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -5779,9 +5792,12 @@ dependencies = [ [[package]] name = "wasite" -version = "0.1.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] [[package]] name = "wasm-bindgen" @@ -5975,11 +5991,13 @@ dependencies = [ [[package]] name = "whoami" -version = "1.6.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" dependencies = [ + "libc", "libredox", + "objc2-system-configuration", "wasite", "web-sys", ] @@ -6158,15 +6176,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -6215,30 +6224,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows-threading" version = "0.2.1" @@ -6266,12 +6258,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -6290,12 +6276,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -6314,24 +6294,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -6350,12 +6318,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -6374,12 +6336,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -6398,12 +6354,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -6422,12 +6372,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "winnow" version = "0.7.15" diff --git a/Makefile.toml b/Makefile.toml index 5dde2e70dd..2ec0c5bc25 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -26,3 +26,10 @@ args = [ "--config", "imports_granularity=Crate,group_imports=StdExternalCrate,reorder_imports=true,format_code_in_doc_comments=true", ] + +[tasks.patchbay] +workspace = false +command = "cargo" +args = ["nextest", "run", "--profile", "patchbay"] +[tasks.patchbay.env] +RUSTFLAGS = "--cfg patchbay_tests" diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 3fbfdf6cb0..4a1535c514 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -144,8 +144,8 @@ wasm-bindgen-test = "0.3.62" # patchbay netsim test dependencies (linux only) [target.'cfg(target_os = "linux")'.dev-dependencies] ctor = "0.6" -patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "feat/compare", features = ["iroh-metrics"] } -testdir = "0.9" +patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "main", features = ["iroh-metrics"] } +testdir = "0.10" [build-dependencies] cfg_aliases = { version = "0.2.1" } From ef8a3f4529d1e6946c34bdb5875b256cb74a491f Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 13:04:12 +0200 Subject: [PATCH 09/35] fix: use nextest profiles instead of cfg directive via @matheus23 in https://github.com/n0-computer/iroh/pull/4059 --- .config/nextest.toml | 3 ++- .github/workflows/patchbay.yml | 1 - Cargo.lock | 42 ---------------------------------- Cargo.toml | 2 +- iroh/Cargo.toml | 1 - iroh/tests/patchbay.rs | 15 +++++------- 6 files changed, 9 insertions(+), 55 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index f1c1bbdca2..609df7f7fd 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -15,11 +15,12 @@ test-group = 'run-in-isolation' threads-required = 32 [profile.default] +default-filter = 'not binary(patchbay)' slow-timeout = { period = "10s", terminate-after = 3 } [profile.patchbay] fail-fast = false retries = 1 test-threads = 4 -slow-timeout = { period = "20s", terminate-after = 3 } default-filter = 'binary(patchbay)' +slow-timeout = { period = "60s", terminate-after = 4 } diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index abc45b8413..e0b25ec1d2 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -46,7 +46,6 @@ jobs: env: RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} NEXTEST_EXPERIMENTAL_LIBTEST_JSON: "1" - RUSTFLAGS: "--cfg patchbay_tests" - name: Push results if: always() diff --git a/Cargo.lock b/Cargo.lock index fc44ae6fad..98ae854b9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2417,7 +2417,6 @@ dependencies = [ "rustls-webpki", "serde", "serde_json", - "serial_test", "smallvec", "strum 0.28.0", "swarm-discovery", @@ -4505,15 +4504,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scc" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" -dependencies = [ - "sdd", -] - [[package]] name = "schannel" version = "0.1.29" @@ -4535,12 +4525,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sdd" -version = "3.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" - [[package]] name = "security-framework" version = "3.7.0" @@ -4723,32 +4707,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serial_test" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f" -dependencies = [ - "futures-executor", - "futures-util", - "log", - "once_cell", - "parking_lot", - "scc", - "serial_test_derive", -] - -[[package]] -name = "serial_test_derive" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "sha1" version = "0.11.0-rc.5" diff --git a/Cargo.toml b/Cargo.toml index 795ef46481..c56367f8ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ missing_debug_implementations = "warn" # do. To enable for a crate set `#![cfg_attr(iroh_docsrs, # feature(doc_cfg))]` in the crate. # We also have our own `iroh_loom` cfg to enable tokio-rs/loom testing. -unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)", "cfg(patchbay_tests)"] } +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)"] } [workspace.lints.clippy] unused-async = "warn" diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 4a1535c514..b91a978ca0 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -106,7 +106,6 @@ postcard = { version = "1.1.1", features = ["use-std"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } rand_chacha = "0.10" chrono = "0.4.43" -serial_test = "3.4.0" # *non*-wasm-in-browser test/dev dependencies [target.'cfg(not(all(target_family = "wasm", target_os = "unknown")))'.dev-dependencies] diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index c216ea6481..6a9ffa4a60 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -4,25 +4,22 @@ //! in Linux user namespaces, testing iroh's NAT traversal, holepunching, //! and connectivity under various network conditions. //! -//! These tests are disabled by default and only run when the `patchbay_tests` cfg is enabled. -//! They require Linux with user namespace support. On non-Linux systems, you can use -//! `patchbay-vm` to get a Linux VM with the required capabilities. See patchbay docs -//! for details. +//! These tests require Linux with user namespace support. On non-Linux systems, you can use +//! the `patchbay` CLI to get a Linux container or VM with the required capabilities. +//! See patchbay docs for details. //! //! To run: //! //! ```sh //! # On Linux (with user namespace support): -//! RUSTFLAGS="--cfg patchbay_tests" cargo test --release -p iroh --test patchbay -- --test-threads=1 +//! cargo nextest run -p iroh --test patchbay -P patchbay //! -//! # On macOS (via patchbay-vm): -//! RUSTFLAGS="--cfg patchbay_tests" patchbay-vm test --release -p iroh --test patchbay -- --test-threads=1 +//! # On macOS (runs in container via patchbay CLI): +//! patchbay test --release -p iroh --test patchbay //! ``` // patchbay only runs on linux #![cfg(target_os = "linux")] -// Only compile these tests when the patchbay_tests cfg is enabled. -#![cfg(patchbay_tests)] use std::time::Duration; From 1849bbf153a5661b176a73afa8b03b0ebae09118 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 13:10:37 +0200 Subject: [PATCH 10/35] address review: remove obsolete helper fns --- iroh/tests/patchbay.rs | 32 ++++++++++++++++---------------- iroh/tests/patchbay/util.rs | 11 ----------- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 6a9ffa4a60..4b7644a3e2 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -63,7 +63,7 @@ async fn holepunch_simple() -> Result { async move |_dev, _ep, _conn| Ok(()), async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "connection started relayed"); + assert!(paths.selected().is_relay(), "connection started relayed"); paths.wait_ip(timeout).await?; info!("connection became direct"); Ok(()) @@ -96,7 +96,7 @@ async fn switch_uplink() -> Result { .run( async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "connection started relayed"); + assert!(paths.selected().is_relay(), "connection started relayed"); // Wait until a first direct path is established. let first = paths.wait_ip(timeout).await?; @@ -119,7 +119,7 @@ async fn switch_uplink() -> Result { }, async move |dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "connection started relayed"); + assert!(paths.selected().is_relay(), "connection started relayed"); // Wait for conn to become direct. paths @@ -180,7 +180,7 @@ async fn switch_uplink_ipv6() -> Result { .run( async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "connection started relayed"); + assert!(paths.selected().is_relay(), "connection started relayed"); // Wait until a first direct path is established. let first = paths @@ -208,7 +208,7 @@ async fn switch_uplink_ipv6() -> Result { }, async move |dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "connection started relayed"); + assert!(paths.selected().is_relay(), "connection started relayed"); // Wait for conn to become direct. paths @@ -276,7 +276,7 @@ async fn change_ifaces() -> Result { }, async move |dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "connection started relayed"); + assert!(paths.selected().is_relay(), "connection started relayed"); let first = paths .wait_ip(timeout) .await @@ -431,13 +431,13 @@ async fn symmetric_nat_stays_relayed() -> Result { }, async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "should start on relay"); + assert!(paths.selected().is_relay(), "should start on relay"); // Ping to verify the relay path works. ping_open(&conn, timeout).await?; // Give holepunching time to attempt and fail. tokio::time::sleep(Duration::from_secs(8)).await; assert!( - paths.is_relay(), + paths.selected().is_relay(), "should still be relayed — symmetric NAT blocks holepunching" ); Ok(()) @@ -469,11 +469,11 @@ async fn mixed_home_vs_symmetric_stays_relayed() -> Result { }, async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "should start on relay"); + assert!(paths.selected().is_relay(), "should start on relay"); ping_open(&conn, timeout).await?; tokio::time::sleep(Duration::from_secs(8)).await; assert!( - paths.is_relay(), + paths.selected().is_relay(), "should still be relayed — symmetric NAT on one side blocks holepunching" ); Ok(()) @@ -504,11 +504,11 @@ async fn cloud_nat_stays_relayed() -> Result { }, async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "should start on relay"); + assert!(paths.selected().is_relay(), "should start on relay"); ping_open(&conn, timeout).await?; tokio::time::sleep(Duration::from_secs(8)).await; assert!( - paths.is_relay(), + paths.selected().is_relay(), "should still be relayed — cloud symmetric NAT blocks holepunching" ); Ok(()) @@ -596,11 +596,11 @@ async fn corporate_firewall_relay_only() -> Result { }, async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "should start on relay"); + assert!(paths.selected().is_relay(), "should start on relay"); ping_open(&conn, timeout).await?; tokio::time::sleep(Duration::from_secs(8)).await; assert!( - paths.is_relay(), + paths.selected().is_relay(), "should still be relayed — corporate firewall blocks UDP" ); Ok(()) @@ -769,11 +769,11 @@ async fn hotel_wifi_relay_only() -> Result { }, async move |_dev, _ep, conn| { let mut paths = conn.paths(); - assert!(paths.is_relay(), "should start on relay"); + assert!(paths.selected().is_relay(), "should start on relay"); ping_open(&conn, timeout).await?; tokio::time::sleep(Duration::from_secs(8)).await; assert!( - paths.is_relay(), + paths.selected().is_relay(), "should still be relayed — hotel firewall blocks UDP" ); Ok(()) diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index c236d7c9f8..846ed4cff0 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -177,17 +177,6 @@ pub trait PathWatcherExt { fn selected(&mut self) -> PathInfo; - fn match_selected(&mut self, f: impl FnOnce(&PathInfo) -> bool) -> bool { - f(&self.selected()) - } - - fn is_ip(&mut self) -> bool { - self.match_selected(PathInfo::is_ip) - } - - fn is_relay(&mut self) -> bool { - self.match_selected(PathInfo::is_relay) - } /// Wait until the selected path is a direct (IP) path. async fn wait_ip(&mut self, timeout: Duration) -> Result { self.wait_selected(timeout, PathInfo::is_ip).await From 44a7ac5b55a05d46b0bb6a552f355725f3dc8f2b Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 13:11:20 +0200 Subject: [PATCH 11/35] fixup after rebase --- Cargo.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 98ae854b9c..6f6e513d9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2127,7 +2127,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -3246,7 +3246,7 @@ dependencies = [ "pin-project-lite", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tokio-stream", @@ -3290,7 +3290,7 @@ source = "git+https://github.com/n0-computer/noq?branch=main#b212bbcaccaa82089cc dependencies = [ "cfg_aliases", "libc", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.61.2", ] @@ -3955,7 +3955,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -3992,7 +3992,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.59.0", ] From ea4aa51eb5455f8e0e86912940aa1350ae7d6faa Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 13:13:19 +0200 Subject: [PATCH 12/35] chore: fmt --- iroh/tests/patchbay.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 4b7644a3e2..05bf69edca 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -832,7 +832,6 @@ async fn holepunch_asymmetric_links() -> Result { /// Increasingly degraded link on one side, clean link on the other. /// Each level adds more latency, loss, and reordering. The test runs each level /// twice: once with the impaired side accepting, once connecting. -/// const DEGRADE_LEVELS: &[LinkLimits] = &[ // 0: mild — good wifi LinkLimits { From bfd933f6f9cfaa39083f3836110656a0952408da Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 13:15:27 +0200 Subject: [PATCH 13/35] remove netreport test draft (was not used yet), will be added back later --- iroh/tests/patchbay/netreport.rs | 341 ------------------------------- 1 file changed, 341 deletions(-) delete mode 100644 iroh/tests/patchbay/netreport.rs diff --git a/iroh/tests/patchbay/netreport.rs b/iroh/tests/patchbay/netreport.rs deleted file mode 100644 index 80037c7f9a..0000000000 --- a/iroh/tests/patchbay/netreport.rs +++ /dev/null @@ -1,341 +0,0 @@ -// --- -// NetReport tests -// --- - -/// Home NAT (EIM+APDF): the most common consumer router. -/// Expect UDP v4, a NATted public address (different from the device's private IP), -/// relay reachability with measured latency, and no captive portal. -#[tokio::test] -#[traced_test] -async fn netreport_home_nat() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let nat = lab.add_router("nat").nat(Nat::Home).build().await?; - let dev = lab.add_device("dev").uplink(nat.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let report = run_net_report(dev, relay_map).await?; - assert!(report.udp_v4, "expected UDP v4 through home NAT"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_ne!( - *global_v4.ip(), - dev_ip, - "global IP should differ from device private IP behind NAT" - ); - let relay = report - .preferred_relay - .expect("expected relay to be reachable"); - assert!( - report.relay_latency.iter().any(|(_, url, _)| *url == relay), - "expected latency data for preferred relay" - ); - Ok(()) -} - -/// Corporate (symmetric) NAT: produces a different external port -/// per destination. Holepunching requires relay, but relay should be reachable. -#[tokio::test] -#[traced_test] -async fn netreport_corporate_nat() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let nat = lab.add_router("nat").nat(Nat::Corporate).build().await?; - let dev = lab.add_device("dev").uplink(nat.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let report = run_net_report(dev, relay_map).await?; - assert!(report.udp_v4, "expected UDP v4 through corporate NAT"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_ne!( - *global_v4.ip(), - dev_ip, - "global IP should differ from device private IP behind symmetric NAT" - ); - let relay = report - .preferred_relay - .expect("expected relay to be reachable"); - assert!( - report.relay_latency.iter().any(|(_, url, _)| *url == relay), - "expected latency data for preferred relay" - ); - Ok(()) -} - -/// Direct connection (no NAT). The reported global_v4 should equal the -/// device's own IP since there is no address translation. -#[tokio::test] -#[traced_test] -async fn netreport_direct() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let router = lab.add_router("direct").build().await?; // Nat::None by default - let dev = lab.add_device("dev").uplink(router.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let report = run_net_report(dev, relay_map).await?; - assert!(report.udp_v4, "expected UDP v4 on direct connection"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_eq!( - *global_v4.ip(), - dev_ip, - "without NAT, global IP should equal device's own IP" - ); - let relay = report - .preferred_relay - .expect("expected relay to be reachable"); - assert!( - report.relay_latency.iter().any(|(_, url, _)| *url == relay), - "expected latency data for preferred relay" - ); - Ok(()) -} - -// --- -// NetReport: additional NAT topologies -// --- - -/// Full cone NAT (EIM+EIF): most permissive NAT. Port-preserving, hairpin enabled. -/// Holepunching always succeeds. Same expectations as Home NAT for net_report. -#[tokio::test] -#[traced_test] -async fn netreport_full_cone() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let nat = lab.add_router("nat").nat(Nat::FullCone).build().await?; - let dev = lab.add_device("dev").uplink(nat.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let report = run_net_report(dev, relay_map).await?; - assert!(report.udp_v4, "expected UDP v4 through full cone NAT"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_ne!( - *global_v4.ip(), - dev_ip, - "global IP should differ from device private IP behind NAT" - ); - let relay = report - .preferred_relay - .expect("expected relay to be reachable"); - assert!( - report.relay_latency.iter().any(|(_, url, _)| *url == relay), - "expected latency data for preferred relay" - ); - assert_ne!( - report.captive_portal, - Some(true), - "no captive portal expected" - ); - Ok(()) -} - -/// Cloud NAT (EDM+APDF): symmetric NAT with randomized ports, similar to corporate -/// but with longer UDP timeout (350s). Common in cloud providers (GCP, AWS). -#[tokio::test] -#[traced_test] -async fn netreport_cloud_nat() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let nat = lab.add_router("nat").nat(Nat::CloudNat).build().await?; - let dev = lab.add_device("dev").uplink(nat.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let report = run_net_report(dev, relay_map).await?; - assert!(report.udp_v4, "expected UDP v4 through cloud NAT"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_ne!( - *global_v4.ip(), - dev_ip, - "global IP should differ from device private IP behind cloud NAT" - ); - let relay = report - .preferred_relay - .expect("expected relay to be reachable"); - assert!( - report.relay_latency.iter().any(|(_, url, _)| *url == relay), - "expected latency data for preferred relay" - ); - assert_ne!( - report.captive_portal, - Some(true), - "no captive portal expected" - ); - Ok(()) -} - -/// Standalone CGNAT (EIM+EIF): carrier-grade NAT without a home router in front. -/// Common for mobile carriers. More permissive filtering than Home NAT. -#[tokio::test] -#[traced_test] -async fn netreport_cgnat() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let nat = lab.add_router("nat").nat(Nat::Cgnat).build().await?; - let dev = lab.add_device("dev").uplink(nat.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let report = run_net_report(dev, relay_map).await?; - assert!(report.udp_v4, "expected UDP v4 through CGNAT"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_ne!( - *global_v4.ip(), - dev_ip, - "global IP should differ from device private IP behind CGNAT" - ); - let relay = report - .preferred_relay - .expect("expected relay to be reachable"); - assert!( - report.relay_latency.iter().any(|(_, url, _)| *url == relay), - "expected latency data for preferred relay" - ); - assert_ne!( - report.captive_portal, - Some(true), - "no captive portal expected" - ); - Ok(()) -} - -// --- -// NetReport: firewall scenarios -// --- - -/// Corporate firewall blocks all UDP except DNS (port 53). QAD probes fail, -/// but the relay is still reachable via HTTPS on port 443. -#[tokio::test] -#[traced_test] -async fn netreport_corporate_firewall() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let fw = lab - .add_router("fw") - .firewall(Firewall::Corporate) - .build() - .await?; - let dev = lab.add_device("dev").uplink(fw.id()).build().await?; - let report = run_net_report(dev, relay_map).await?; - assert!( - !report.udp_v4, - "UDP should be blocked by corporate firewall" - ); - assert!( - report.global_v4.is_none(), - "no global IPv4 without successful QAD probes" - ); - assert!( - report.preferred_relay.is_some(), - "relay should still be reachable via HTTPS (TCP 443)" - ); - assert_ne!( - report.captive_portal, - Some(true), - "no captive portal expected" - ); - Ok(()) -} - -// --- -// NetReport: dual-stack / IPv6 -// --- - -/// Dual-stack device on a direct (no NAT) connection with a dual-stack relay. -/// Both IPv4 and IPv6 QAD probes should succeed. Without NAT, the reported -/// global addresses should match the device's own addresses. -#[tokio::test] -#[traced_test] -async fn netreport_dual_stack_direct() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let router = lab - .add_router("direct") - .ip_support(IpSupport::DualStack) - .build() - .await?; - let dev = lab.add_device("dev").uplink(router.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let dev_ip6 = dev.ip6().expect("device has IPv6 on dual-stack router"); - info!(%dev_ip, %dev_ip6, "dual-stack device"); - let report = run_net_report(dev, relay_map).await?; - // v4 - assert!(report.udp_v4, "expected UDP v4 on direct dual-stack"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_eq!( - *global_v4.ip(), - dev_ip, - "without NAT, global IPv4 should equal device's own IP" - ); - // v6 - assert!(report.udp_v6, "expected UDP v6 on direct dual-stack"); - let global_v6 = report.global_v6.expect("expected global IPv6 address"); - assert_eq!( - *global_v6.ip(), - dev_ip6, - "without NAT, global IPv6 should equal device's own IP" - ); - assert!( - report.preferred_relay.is_some(), - "expected relay to be reachable" - ); - assert_ne!( - report.captive_portal, - Some(true), - "no captive portal expected" - ); - Ok(()) -} - -/// Dual-stack device behind a home NAT with no IPv6 NAT (NatV6Mode::None). -/// IPv4 is NATted (global differs from device IP). IPv6 uses global unicast -/// directly, so the reported global IPv6 should match the device's own address. -#[tokio::test] -#[traced_test] -#[ignore = "currently broken due to bug in patchbay"] -async fn netreport_dual_stack_home_nat() -> Result { - let (lab, relay_map, _relay_guard) = lab_with_relay(testdir!()).await?; - let nat = lab - .add_router("nat") - .nat(Nat::Home) - .ip_support(IpSupport::DualStack) - .build() - .await?; - let dev = lab.add_device("dev").uplink(nat.id()).build().await?; - let dev_ip = dev.ip().expect("device has IPv4"); - let dev_ip6 = dev.ip6().expect("device has IPv6 on dual-stack router"); - info!(%dev_ip, %dev_ip6, "dual-stack device behind home NAT"); - let report = run_net_report(dev, relay_map).await?; - println!("{report:#?}"); - // v4 is NATted. - assert!(report.udp_v4, "expected UDP v4 through home NAT"); - let global_v4 = report.global_v4.expect("expected global IPv4 address"); - assert_ne!( - *global_v4.ip(), - dev_ip, - "global IPv4 should differ from private IP behind NAT" - ); - // v6 passes through without translation (NatV6Mode::None = global unicast). - assert!(report.udp_v6, "expected UDP v6 with global unicast IPv6"); - let global_v6 = report.global_v6.expect("expected global IPv6 address"); - assert_eq!( - *global_v6.ip(), - dev_ip6, - "IPv6 has no NAT, global should equal device's own IP" - ); - assert!( - report.preferred_relay.is_some(), - "expected relay to be reachable" - ); - assert_ne!( - report.captive_portal, - Some(true), - "no captive portal expected" - ); - Ok(()) -} - -// --- -// NetReport helper -// --- - -/// Bind an endpoint in `dev`'s namespace, wait for the first net report, return it. -pub async fn run_net_report(dev: Device, relay_map: RelayMap) -> Result { - dev.spawn(move |dev| { - async move { - let endpoint = endpoint_builder(&dev, relay_map).bind().await?; - let mut watcher = endpoint.net_report(); - let report = tokio::time::timeout(Duration::from_secs(10), watcher.initialized()) - .await - .anyerr()?; - endpoint.close().await; - n0_error::Ok(report) - } - .instrument(error_span!("net_report")) - })? - .await - .anyerr()? -} From 4778d376320b61b036f07a6441d63d36f589032f Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 13:32:05 +0200 Subject: [PATCH 14/35] docs fixes --- iroh/tests/patchbay/util.rs | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index 846ed4cff0..fe35a9ef3a 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -16,11 +16,13 @@ use self::relay::run_relay_server; const TEST_ALPN: &[u8] = b"test"; -/// Create a lab with a dual-stack relay server. Returns the lab, relay map, a drop guard -/// that keeps the relay alive, and a [`TestGuard`] that records pass/fail. +/// Creates a lab with a dual-stack relay server. /// -/// The relay binds on `[::]` and is reachable via `https://relay.test` (resolved -/// through lab-wide DNS entries for both IPv4 and IPv6). +/// Returns the lab, relay map, a drop guard that keeps the relay alive, +/// and a [`TestGuard`] that records pass/fail. +/// +/// The relay binds on `[::]` and is reachable via `https://relay.test` +/// (resolved through lab-wide DNS entries for both IPv4 and IPv6). pub async fn lab_with_relay( path: PathBuf, ) -> Result<(Lab, RelayMap, AbortOnDropHandle<()>, TestGuard)> { @@ -59,17 +61,7 @@ async fn spawn_relay(lab: &Lab) -> Result<(RelayMap, AbortOnDropHandle<()>)> { Ok((relay_map, AbortOnDropHandle::new(task_relay))) } -// --- -// Pair: run two connected endpoints -// --- - -/// Two connected endpoints in the test lab, ready to run. -/// -/// `peer1` runs in `dev1`'s namespace as the accepting side. -/// `peer2` runs in `dev2`'s namespace as the connecting side. -/// -/// `peer1` awaits the connection to be closed afterwards, whereas `peer2` closes -/// the connection. +/// Manages two connected endpoints in the test lab. pub struct Pair { dev1: Device, dev2: Device, @@ -85,6 +77,21 @@ impl Pair { } } + /// Bind an endpoint on each device and establish a connection between them. + /// + /// `peer1` runs in `dev1`'s namespace as the accepting side. + /// `peer2` runs in `dev2`'s namespace as the connecting side. + /// + /// A connection is made from `peer1` to `peer2` with a relay-only + /// [`EndpointAddr`], and then the supplied functions are invoked, passing + /// the device, endpoint, and connection to user code. + /// + /// After a future complete, `peer1` awaits the connection to be closed, + /// whereas `peer2` closes the connection. + /// + /// Afterwards, both endpoints are closed and metrics are recorded through + /// [`Device::record_iroh_metrics`]. Will also emit a debug log with target + /// `patchbay::_events` with the result of the user-supplied work functions. pub async fn run(self, peer1: F1, peer2: F2) -> Result where F1: FnOnce(Device, Endpoint, Connection) -> Fut1 + Send + 'static, From 400ecf203b81ba22cd0c2cc94da2724903a2e178 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 13:39:40 +0200 Subject: [PATCH 15/35] fixup workflow --- .config/nextest.toml | 4 +-- .github/workflows/patchbay.yml | 48 ++++++++++++++-------------------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 609df7f7fd..9622e892bb 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -21,6 +21,6 @@ slow-timeout = { period = "10s", terminate-after = 3 } [profile.patchbay] fail-fast = false retries = 1 -test-threads = 4 +test-threads = 8 default-filter = 'binary(patchbay)' -slow-timeout = { period = "60s", terminate-after = 4 } +slow-timeout = { period = "30s", terminate-after = 4 } diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index e0b25ec1d2..8e447da7f1 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -66,7 +66,7 @@ jobs: --arg pr_url "${{ github.event.pull_request.html_url || '' }}" \ --arg title "${{ github.event.pull_request.title || github.event.head_commit.message || '' }}" \ --arg outcome "${{ steps.tests.outcome }}" \ - '{$project, $branch, $commit, $pr, $pr_url, $title, test_outcome: $outcome, created_at: (now | todate)}' \ + '{kind: "test", $project, $branch, $commit, $pr, $pr_url, $title, test_outcome: $outcome, created_at: (now | todate)}' \ > "$TESTDIR/run.json" RESPONSE=$(tar -czf - -C "$TESTDIR" . | \ @@ -81,31 +81,23 @@ jobs: VIEW_URL=$(echo "$BODY" | jq -r '.view_url // empty') echo "PATCHBAY_VIEW_URL=$VIEW_URL" >> "$GITHUB_ENV" - - name: Comment on PR - if: always() && env.PATCHBAY_VIEW_URL - env: - GH_TOKEN: ${{ github.token }} - GH_REPO: ${{ github.repository }} - run: | - set -euo pipefail - SHA="${{ github.sha }}" - STATUS="${{ steps.tests.outcome }}" - ICON=$([[ "$STATUS" == "success" ]] && echo "✅" || echo "❌") - MARKER="" - printf -v BODY '%s\n%s\n%s' \ - "$MARKER" \ - "$ICON **patchbay:** $STATUS | $PATCHBAY_VIEW_URL" \ - "$(date -u '+%Y-%m-%d %H:%M:%S UTC') · [\`${SHA:0:7}\`](${{ github.server_url }}/$GH_REPO/commit/$SHA)" - - PR_NUMBER="${{ github.event.pull_request.number }}" - if [ -z "$PR_NUMBER" ]; then - PR_NUMBER=$(gh pr list --head "${{ github.ref_name }}" --state open --json number -q '.[0].number') - fi - [ -z "$PR_NUMBER" ] && echo "No PR found, skipping comment" && exit 0 + - name: Find existing comment + if: always() && env.PATCHBAY_VIEW_URL && github.event.pull_request.number + uses: peter-evans/find-comment@v4 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: '' - EXISTING=$(gh api --paginate "repos/$GH_REPO/issues/$PR_NUMBER/comments" --jq ".[] | select(.body | contains(\"$MARKER\")) | .id" | head -1) - if [ -n "$EXISTING" ]; then - gh api "repos/$GH_REPO/issues/comments/$EXISTING" -X PATCH -f body="$BODY" - else - gh pr comment "$PR_NUMBER" --body "$BODY" - fi + - name: Comment on PR + if: always() && env.PATCHBAY_VIEW_URL && github.event.pull_request.number + uses: peter-evans/create-or-update-comment@v5 + with: + issue-number: ${{ github.event.pull_request.number }} + comment-id: ${{ steps.fc.outputs.comment-id }} + body: | + + ${{ steps.tests.outcome == 'success' && '✅' || '❌' }} **patchbay:** ${{ steps.tests.outcome }} | ${{ env.PATCHBAY_VIEW_URL }} + ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }} + edit-mode: replace From 17cf5df4cc5c1de0505fd21b7babf4d2855d47c1 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 14:24:22 +0200 Subject: [PATCH 16/35] fix: ignore patchbay tests in cross ci --- .github/workflows/ci.yml | 4 ++++ .github/workflows/patchbay.yml | 4 ++-- Cargo.toml | 2 +- Makefile.toml | 2 -- iroh/tests/patchbay.rs | 5 +++-- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 98b5650525..210a7cd7f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,8 @@ jobs: if: "github.event_name != 'pull_request' || ! contains(github.event.pull_request.labels.*.name, 'flaky-test')" timeout-minutes: 30 runs-on: [self-hosted, linux, X64] + env: + RUSTFLAGS: "-Dwarnings --cfg skip_patchbay" strategy: fail-fast: false matrix: @@ -135,6 +137,8 @@ jobs: matrix: target: - i686-unknown-linux-gnu + env: + RUSTFLAGS: "-Dwarnings --cfg skip_patchbay" steps: - name: Checkout uses: actions/checkout@v6 diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index 8e447da7f1..15b0d6b437 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -98,6 +98,6 @@ jobs: comment-id: ${{ steps.fc.outputs.comment-id }} body: | - ${{ steps.tests.outcome == 'success' && '✅' || '❌' }} **patchbay:** ${{ steps.tests.outcome }} | ${{ env.PATCHBAY_VIEW_URL }} - ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }} + ${{ steps.tests.outcome == 'success' && '✅' || '❌' }} **patchbay:** ${{ steps.tests.outcome }} | [results](${{ env.PATCHBAY_VIEW_URL }}) + Last updated: ${{ github.event.pull_request.updated_at }} · [${{ github.sha }}](${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}) edit-mode: replace diff --git a/Cargo.toml b/Cargo.toml index c56367f8ac..f1aaad7b52 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ missing_debug_implementations = "warn" # do. To enable for a crate set `#![cfg_attr(iroh_docsrs, # feature(doc_cfg))]` in the crate. # We also have our own `iroh_loom` cfg to enable tokio-rs/loom testing. -unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)"] } +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_loom)", "cfg(skip_patchbay)"] } [workspace.lints.clippy] unused-async = "warn" diff --git a/Makefile.toml b/Makefile.toml index 2ec0c5bc25..15a3d7df1e 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -31,5 +31,3 @@ args = [ workspace = false command = "cargo" args = ["nextest", "run", "--profile", "patchbay"] -[tasks.patchbay.env] -RUSTFLAGS = "--cfg patchbay_tests" diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 05bf69edca..5a8063008a 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -18,8 +18,9 @@ //! patchbay test --release -p iroh --test patchbay //! ``` -// patchbay only runs on linux -#![cfg(target_os = "linux")] +// patchbay only runs on linux, and is skipped in cross-compile environments +// via a cfg directive +#![cfg(all(target_os = "linux", not(skip_patchbay)))] use std::time::Duration; From 91539779142529d7c2eef2eb49a11181a1ea0422 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 15:08:15 +0200 Subject: [PATCH 17/35] fix: reduce concurrency --- .config/nextest.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 9622e892bb..15e247f7be 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -21,6 +21,6 @@ slow-timeout = { period = "10s", terminate-after = 3 } [profile.patchbay] fail-fast = false retries = 1 -test-threads = 8 +test-threads = 4 default-filter = 'binary(patchbay)' slow-timeout = { period = "30s", terminate-after = 4 } From 6be3c7f2fc27400e71cdc782bc3b5148a4065473 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 16:15:31 +0200 Subject: [PATCH 18/35] chore: update to patchbay 0.2.0 --- Cargo.lock | 5 +++-- iroh/Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f6e513d9e..4761814343 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3562,8 +3562,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "patchbay" -version = "0.1.0" -source = "git+https://github.com/n0-computer/patchbay.git?branch=main#52890e15fbf55bd462a7e1317df5a672e1c61059" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b50a3ddaf6b496f5ff309bb02f253a308e59b6e4fc48a9bb0ba9c19c2530d5e" dependencies = [ "anyhow", "chrono", diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index b91a978ca0..f1ceb407ea 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -143,7 +143,7 @@ wasm-bindgen-test = "0.3.62" # patchbay netsim test dependencies (linux only) [target.'cfg(target_os = "linux")'.dev-dependencies] ctor = "0.6" -patchbay = { git = "https://github.com/n0-computer/patchbay.git", branch = "main", features = ["iroh-metrics"] } +patchbay = { version = "0.2", features = ["iroh-metrics"] } testdir = "0.10" [build-dependencies] From 3b8a11a952200136e0b9f9ef5d650a2c49b3972f Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 21:05:00 +0200 Subject: [PATCH 19/35] refactor: remove macro for degrade ladder --- iroh/tests/patchbay.rs | 104 ++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 23 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 5a8063008a..856f2b76ac 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -969,28 +969,86 @@ async fn run_degrade_level(impaired_is_server: bool, level: usize) -> Result { - #[tokio::test] - #[traced_test] - async fn $name() -> Result { - let guard = run_degrade_level($impaired_is_server, $level).await?; - guard.ok(); - Ok(()) - } - }; +#[tokio::test] +#[traced_test] +async fn degrade_server_0_mild() -> Result { + run_degrade_level(true, 0).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_1_poor() -> Result { + run_degrade_level(true, 1).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_2_bad() -> Result { + run_degrade_level(true, 2).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_3_terrible() -> Result { + run_degrade_level(true, 3).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_4_extreme() -> Result { + run_degrade_level(true, 4).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_server_5_absurd() -> Result { + run_degrade_level(true, 5).await?.ok(); + Ok(()) } -degrade_test!(degrade_server_0_mild, true, 0); -degrade_test!(degrade_server_1_poor, true, 1); -degrade_test!(degrade_server_2_bad, true, 2); -degrade_test!(degrade_server_3_terrible, true, 3); -degrade_test!(degrade_server_4_extreme, true, 4); -degrade_test!(degrade_server_5_absurd, true, 5); - -degrade_test!(degrade_client_0_mild, false, 0); -degrade_test!(degrade_client_1_poor, false, 1); -degrade_test!(degrade_client_2_bad, false, 2); -degrade_test!(degrade_client_3_terrible, false, 3); -degrade_test!(degrade_client_4_extreme, false, 4); -degrade_test!(degrade_client_5_absurd, false, 5); +#[tokio::test] +#[traced_test] +async fn degrade_client_0_mild() -> Result { + run_degrade_level(false, 0).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_1_poor() -> Result { + run_degrade_level(false, 1).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_2_bad() -> Result { + run_degrade_level(false, 2).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_3_terrible() -> Result { + run_degrade_level(false, 3).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_4_extreme() -> Result { + run_degrade_level(false, 4).await?.ok(); + Ok(()) +} + +#[tokio::test] +#[traced_test] +async fn degrade_client_5_absurd() -> Result { + run_degrade_level(false, 5).await?.ok(); + Ok(()) +} From d8c8fc4f99259cf2c93333a77a8eda1f99f1ee69 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 30 Mar 2026 22:30:13 +0200 Subject: [PATCH 20/35] refactor: use enum for server/client side --- iroh/tests/patchbay.rs | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 856f2b76ac..9eb8332f72 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -24,7 +24,7 @@ use std::time::Duration; -use iroh::TransportAddr; +use iroh::{TransportAddr, endpoint::Side}; use n0_error::{Result, StackResultExt, StdResultExt}; use n0_tracing_test::traced_test; use patchbay::{Firewall, LinkCondition, LinkLimits, Nat, RouterPreset, TestGuard}; @@ -898,7 +898,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ /// Run a single degradation level: create devices with the given impairment, /// try to holepunch and ping, return Ok if successful. -async fn run_degrade_level(impaired_is_server: bool, level: usize) -> Result { +async fn run_degrade_level(impaired_side: Side, level: usize) -> Result { let limits = DEGRADE_LEVELS[level]; let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -906,10 +906,9 @@ async fn run_degrade_level(impaired_is_server: bool, level: usize) -> Result (impaired, None), + Side::Client => (None, impaired), }; let server = lab .add_device("server") @@ -949,7 +948,7 @@ async fn run_degrade_level(impaired_is_server: bool, level: usize) -> Result tracing::event!( @@ -959,7 +958,7 @@ async fn run_degrade_level(impaired_is_server: bool, level: usize) -> Result Result Result { - run_degrade_level(true, 0).await?.ok(); + run_degrade_level(Side::Server,0).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_1_poor() -> Result { - run_degrade_level(true, 1).await?.ok(); + run_degrade_level(Side::Server,1).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_2_bad() -> Result { - run_degrade_level(true, 2).await?.ok(); + run_degrade_level(Side::Server,2).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_3_terrible() -> Result { - run_degrade_level(true, 3).await?.ok(); + run_degrade_level(Side::Server,3).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_4_extreme() -> Result { - run_degrade_level(true, 4).await?.ok(); + run_degrade_level(Side::Server,4).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_5_absurd() -> Result { - run_degrade_level(true, 5).await?.ok(); + run_degrade_level(Side::Server,5).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_0_mild() -> Result { - run_degrade_level(false, 0).await?.ok(); + run_degrade_level(Side::Client,0).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_1_poor() -> Result { - run_degrade_level(false, 1).await?.ok(); + run_degrade_level(Side::Client,1).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_2_bad() -> Result { - run_degrade_level(false, 2).await?.ok(); + run_degrade_level(Side::Client,2).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_3_terrible() -> Result { - run_degrade_level(false, 3).await?.ok(); + run_degrade_level(Side::Client,3).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_4_extreme() -> Result { - run_degrade_level(false, 4).await?.ok(); + run_degrade_level(Side::Client,4).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_5_absurd() -> Result { - run_degrade_level(false, 5).await?.ok(); + run_degrade_level(Side::Client,5).await?.ok(); Ok(()) } From 9f7f09cfc94c25fe532291e035dc58cd7d2f330f Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 31 Mar 2026 10:03:20 +0200 Subject: [PATCH 21/35] fix: use patchbay with fix for bidi link conditioning --- Cargo.lock | 3 +- iroh/Cargo.toml | 2 +- iroh/tests/patchbay.rs | 78 ++++++++++++++++++++++++++---------------- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4761814343..cae47cc2c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3563,8 +3563,7 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "patchbay" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b50a3ddaf6b496f5ff309bb02f253a308e59b6e4fc48a9bb0ba9c19c2530d5e" +source = "git+https://github.com/n0-computer/patchbay?branch=fix%2Flink-condition#ccb3e4894654bb531c18e18e158acfe8a90752d8" dependencies = [ "anyhow", "chrono", diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index f1ceb407ea..27fa60d943 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -143,7 +143,7 @@ wasm-bindgen-test = "0.3.62" # patchbay netsim test dependencies (linux only) [target.'cfg(target_os = "linux")'.dev-dependencies] ctor = "0.6" -patchbay = { version = "0.2", features = ["iroh-metrics"] } +patchbay = { git = "https://github.com/n0-computer/patchbay", branch = "fix/link-condition", features = ["iroh-metrics"] } testdir = "0.10" [build-dependencies] diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 9eb8332f72..110be1f386 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -27,7 +27,7 @@ use std::time::Duration; use iroh::{TransportAddr, endpoint::Side}; use n0_error::{Result, StackResultExt, StdResultExt}; use n0_tracing_test::traced_test; -use patchbay::{Firewall, LinkCondition, LinkLimits, Nat, RouterPreset, TestGuard}; +use patchbay::{Firewall, LinkCondition, LinkDirection, LinkLimits, Nat, RouterPreset, TestGuard}; use testdir::testdir; use tracing::info; @@ -255,15 +255,17 @@ async fn change_ifaces() -> Result { // dev2 has two uplinks (wifi=Mobile3G on eth0, LAN on eth1). eth1 starts down. let dev1 = lab .add_device("dev1") - .iface("eth0", nat1.id(), None) + .iface("eth0", nat1.id()) .build() .await?; let dev2 = lab .add_device("dev2") - .iface("eth0", nat2.id(), Some(LinkCondition::Mobile3G)) - .iface("eth1", nat1.id(), None) + .iface("eth0", nat2.id()) + .iface("eth1", nat1.id()) .build() .await?; + dev2.set_link_condition("eth0", Some(LinkCondition::Mobile3G), LinkDirection::Both) + .await?; dev2.link_down("eth1").await?; let timeout = Duration::from_secs(10); @@ -623,14 +625,18 @@ async fn holepunch_mobile_3g() -> Result { let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; let dev1 = lab .add_device("dev1") - .iface("eth0", nat1.id(), Some(LinkCondition::Mobile3G)) + .iface("eth0", nat1.id()) .build() .await?; let dev2 = lab .add_device("dev2") - .iface("eth0", nat2.id(), Some(LinkCondition::Mobile3G)) + .iface("eth0", nat2.id()) .build() .await?; + dev1.set_link_condition("eth0", Some(LinkCondition::Mobile3G), LinkDirection::Both) + .await?; + dev2.set_link_condition("eth0", Some(LinkCondition::Mobile3G), LinkDirection::Both) + .await?; let timeout = Duration::from_secs(20); Pair::new(dev1, dev2, relay_map) .run( @@ -664,14 +670,18 @@ async fn holepunch_satellite() -> Result { let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; let dev1 = lab .add_device("dev1") - .iface("eth0", nat1.id(), Some(LinkCondition::Satellite)) + .iface("eth0", nat1.id()) .build() .await?; let dev2 = lab .add_device("dev2") - .iface("eth0", nat2.id(), Some(LinkCondition::Satellite)) + .iface("eth0", nat2.id()) .build() .await?; + dev1.set_link_condition("eth0", Some(LinkCondition::Satellite), LinkDirection::Both) + .await?; + dev2.set_link_condition("eth0", Some(LinkCondition::Satellite), LinkDirection::Both) + .await?; let timeout = Duration::from_secs(20); Pair::new(dev1, dev2, relay_map) .run( @@ -796,14 +806,18 @@ async fn holepunch_asymmetric_links() -> Result { let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; let dev1 = lab .add_device("dev1") - .iface("eth0", nat1.id(), Some(LinkCondition::Lan)) + .iface("eth0", nat1.id()) .build() .await?; let dev2 = lab .add_device("dev2") - .iface("eth0", nat2.id(), Some(LinkCondition::WifiBad)) + .iface("eth0", nat2.id()) .build() .await?; + dev1.set_link_condition("eth0", Some(LinkCondition::Lan), LinkDirection::Both) + .await?; + dev2.set_link_condition("eth0", Some(LinkCondition::WifiBad), LinkDirection::Both) + .await?; let timeout = Duration::from_secs(15); Pair::new(dev1, dev2, relay_map) .run( @@ -899,27 +913,31 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ /// Run a single degradation level: create devices with the given impairment, /// try to holepunch and ping, return Ok if successful. async fn run_degrade_level(impaired_side: Side, level: usize) -> Result { - let limits = DEGRADE_LEVELS[level]; let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; let timeout = Duration::from_secs(20); - let impaired = Some(LinkCondition::Manual(limits)); - let (server_cond, client_cond) = match impaired_side { - Side::Server => (impaired, None), - Side::Client => (None, impaired), - }; + let limits = DEGRADE_LEVELS[level]; + let link_condition = Some(LinkCondition::Manual(limits)); + let server = lab .add_device("server") - .iface("eth0", nat1.id(), server_cond) + .iface("eth0", nat1.id()) .build() .await?; let client = lab .add_device("client") - .iface("eth0", nat2.id(), client_cond) + .iface("eth0", nat2.id()) .build() .await?; + let impaired_device = match impaired_side { + Side::Client => &client, + Side::Server => &server, + }; + impaired_device + .set_link_condition("eth0", link_condition, LinkDirection::Both) + .await?; let result = tokio::time::timeout( timeout * 2, @@ -971,83 +989,83 @@ async fn run_degrade_level(impaired_side: Side, level: usize) -> Result Result { - run_degrade_level(Side::Server,0).await?.ok(); + run_degrade_level(Side::Server, 0).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_1_poor() -> Result { - run_degrade_level(Side::Server,1).await?.ok(); + run_degrade_level(Side::Server, 1).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_2_bad() -> Result { - run_degrade_level(Side::Server,2).await?.ok(); + run_degrade_level(Side::Server, 2).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_3_terrible() -> Result { - run_degrade_level(Side::Server,3).await?.ok(); + run_degrade_level(Side::Server, 3).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_4_extreme() -> Result { - run_degrade_level(Side::Server,4).await?.ok(); + run_degrade_level(Side::Server, 4).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_server_5_absurd() -> Result { - run_degrade_level(Side::Server,5).await?.ok(); + run_degrade_level(Side::Server, 5).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_0_mild() -> Result { - run_degrade_level(Side::Client,0).await?.ok(); + run_degrade_level(Side::Client, 0).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_1_poor() -> Result { - run_degrade_level(Side::Client,1).await?.ok(); + run_degrade_level(Side::Client, 1).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_2_bad() -> Result { - run_degrade_level(Side::Client,2).await?.ok(); + run_degrade_level(Side::Client, 2).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_3_terrible() -> Result { - run_degrade_level(Side::Client,3).await?.ok(); + run_degrade_level(Side::Client, 3).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_4_extreme() -> Result { - run_degrade_level(Side::Client,4).await?.ok(); + run_degrade_level(Side::Client, 4).await?.ok(); Ok(()) } #[tokio::test] #[traced_test] async fn degrade_client_5_absurd() -> Result { - run_degrade_level(Side::Client,5).await?.ok(); + run_degrade_level(Side::Client, 5).await?.ok(); Ok(()) } From b8f20828a8be1b413006355c2b49d8d575e1d64f Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 31 Mar 2026 13:25:20 +0200 Subject: [PATCH 22/35] ignore extreme and absurd degradations for now --- iroh/tests/patchbay.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 110be1f386..6ed7c9127f 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -1016,6 +1016,7 @@ async fn degrade_server_3_terrible() -> Result { #[tokio::test] #[traced_test] +#[ignore = "not yet passing reliably"] async fn degrade_server_4_extreme() -> Result { run_degrade_level(Side::Server, 4).await?.ok(); Ok(()) @@ -1023,6 +1024,7 @@ async fn degrade_server_4_extreme() -> Result { #[tokio::test] #[traced_test] +#[ignore = "not yet passing reliably"] async fn degrade_server_5_absurd() -> Result { run_degrade_level(Side::Server, 5).await?.ok(); Ok(()) @@ -1058,6 +1060,7 @@ async fn degrade_client_3_terrible() -> Result { #[tokio::test] #[traced_test] +#[ignore = "not yet passing reliably"] async fn degrade_client_4_extreme() -> Result { run_degrade_level(Side::Client, 4).await?.ok(); Ok(()) @@ -1065,6 +1068,7 @@ async fn degrade_client_4_extreme() -> Result { #[tokio::test] #[traced_test] +#[ignore = "not yet passing reliably"] async fn degrade_client_5_absurd() -> Result { run_degrade_level(Side::Client, 5).await?.ok(); Ok(()) From 82f9d7aef95d18736018bf3f86379470f0eb8d86 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 31 Mar 2026 14:30:05 +0200 Subject: [PATCH 23/35] fix: make test less flaky --- iroh/tests/patchbay.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 6ed7c9127f..e04b865e6a 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -264,7 +264,7 @@ async fn change_ifaces() -> Result { .iface("eth1", nat1.id()) .build() .await?; - dev2.set_link_condition("eth0", Some(LinkCondition::Mobile3G), LinkDirection::Both) + dev2.set_link_condition("eth0", Some(LinkCondition::Mobile4G), LinkDirection::Both) .await?; dev2.link_down("eth1").await?; From 8c243d8856dc8fca00a59700a3f165702ea21bd5 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 31 Mar 2026 15:07:56 +0200 Subject: [PATCH 24/35] deps: bump patchbay to 0.3 --- Cargo.lock | 5 +++-- iroh/Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cae47cc2c1..fd4ca13595 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3562,8 +3562,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "patchbay" -version = "0.2.0" -source = "git+https://github.com/n0-computer/patchbay?branch=fix%2Flink-condition#ccb3e4894654bb531c18e18e158acfe8a90752d8" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d93ad32b57e2d0185284b2e73817b3668feb3013ee70e963d1cb01dda53a8a" dependencies = [ "anyhow", "chrono", diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 27fa60d943..96dfa17fe8 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -143,7 +143,7 @@ wasm-bindgen-test = "0.3.62" # patchbay netsim test dependencies (linux only) [target.'cfg(target_os = "linux")'.dev-dependencies] ctor = "0.6" -patchbay = { git = "https://github.com/n0-computer/patchbay", branch = "fix/link-condition", features = ["iroh-metrics"] } +patchbay = { version = "0.3", features = ["iroh-metrics"] } testdir = "0.10" [build-dependencies] From 044ed4a5c474e5c00d8d75bec1d59aab0c507ed3 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 31 Mar 2026 15:59:16 +0200 Subject: [PATCH 25/35] chore: remove patchbay publishing for now --- .github/workflows/patchbay.yml | 54 ---------------------------------- 1 file changed, 54 deletions(-) diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index 15b0d6b437..e560a3692a 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -47,57 +47,3 @@ jobs: RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} NEXTEST_EXPERIMENTAL_LIBTEST_JSON: "1" - - name: Push results - if: always() - env: - PATCHBAY_URL: https://frando.gateway.lol - PATCHBAY_API_KEY: ${{ secrets.PATCHBAY_API_KEY }} - run: | - set -euo pipefail - TESTDIR="$(cargo metadata --format-version=1 --no-deps | jq -r .target_directory)/testdir-current" - [ ! -d "$TESTDIR" ] && echo "No testdir output, skipping" && exit 0 - - cp test-results.jsonl "$TESTDIR/test-results.jsonl" - jq -n \ - --arg project "${{ github.event.repository.name }}" \ - --arg branch "${{ github.head_ref || github.ref_name }}" \ - --arg commit "${{ github.sha }}" \ - --argjson pr ${{ github.event.pull_request.number || 'null' }} \ - --arg pr_url "${{ github.event.pull_request.html_url || '' }}" \ - --arg title "${{ github.event.pull_request.title || github.event.head_commit.message || '' }}" \ - --arg outcome "${{ steps.tests.outcome }}" \ - '{kind: "test", $project, $branch, $commit, $pr, $pr_url, $title, test_outcome: $outcome, created_at: (now | todate)}' \ - > "$TESTDIR/run.json" - - RESPONSE=$(tar -czf - -C "$TESTDIR" . | \ - curl -s -w "\n%{http_code}" -X POST \ - -H "Authorization: Bearer $PATCHBAY_API_KEY" \ - -H "Content-Type: application/gzip" \ - --data-binary @- "$PATCHBAY_URL/api/push/${{ github.event.repository.name }}") - HTTP_CODE=$(echo "$RESPONSE" | tail -1) - BODY=$(echo "$RESPONSE" | head -n -1) - [ "$HTTP_CODE" != "200" ] && echo "Push failed ($HTTP_CODE): $BODY" && exit 1 - - VIEW_URL=$(echo "$BODY" | jq -r '.view_url // empty') - echo "PATCHBAY_VIEW_URL=$VIEW_URL" >> "$GITHUB_ENV" - - - name: Find existing comment - if: always() && env.PATCHBAY_VIEW_URL && github.event.pull_request.number - uses: peter-evans/find-comment@v4 - id: fc - with: - issue-number: ${{ github.event.pull_request.number }} - comment-author: 'github-actions[bot]' - body-includes: '' - - - name: Comment on PR - if: always() && env.PATCHBAY_VIEW_URL && github.event.pull_request.number - uses: peter-evans/create-or-update-comment@v5 - with: - issue-number: ${{ github.event.pull_request.number }} - comment-id: ${{ steps.fc.outputs.comment-id }} - body: | - - ${{ steps.tests.outcome == 'success' && '✅' || '❌' }} **patchbay:** ${{ steps.tests.outcome }} | [results](${{ env.PATCHBAY_VIEW_URL }}) - Last updated: ${{ github.event.pull_request.updated_at }} · [${{ github.sha }}](${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}) - edit-mode: replace From 7dbb85b4d2670424c8aa0bef5950b4d28eb0f103 Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 11:58:19 +0200 Subject: [PATCH 26/35] refactor: use server/client fns for pair constructions - make it more obvious who is client and who is server - drop endpoint dead on the floor after both run functions completed --- Makefile.toml | 2 +- iroh/tests/patchbay.rs | 781 ++++++++++++++++++------------------ iroh/tests/patchbay/util.rs | 234 +++++++---- 3 files changed, 539 insertions(+), 478 deletions(-) diff --git a/Makefile.toml b/Makefile.toml index 15a3d7df1e..5d00fc498d 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -30,4 +30,4 @@ args = [ [tasks.patchbay] workspace = false command = "cargo" -args = ["nextest", "run", "--profile", "patchbay"] +args = ["nextest", "run", "-p", "iroh", "--test", "patchbay", "--profile", "patchbay", "${@}"] diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index e04b865e6a..f7e6760a8b 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -12,7 +12,11 @@ //! //! ```sh //! # On Linux (with user namespace support): -//! cargo nextest run -p iroh --test patchbay -P patchbay +//! cargo nextest run -p iroh --test patchbay --profile patchbay +//! # or use the `cargo make` alias: +//! cargo make patchbay +//! # can also pass additional args: +//! cargo make patchbay holepunch_simple --no-capture //! //! # On macOS (runs in container via patchbay CLI): //! patchbay test --release -p iroh --test patchbay @@ -59,17 +63,16 @@ async fn holepunch_simple() -> Result { let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, _conn| Ok(()), - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "connection started relayed"); - paths.wait_ip(timeout).await?; - info!("connection became direct"); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async |_dev, _ep, _conn| Ok(())) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + paths.wait_ip(timeout).await?; + info!("connection became direct"); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -93,57 +96,53 @@ async fn switch_uplink() -> Result { let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "connection started relayed"); - - // Wait until a first direct path is established. - let first = paths.wait_ip(timeout).await?; - info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); - - // Now wait until the direct path changes, which happens after the other endpoint - // changes its uplink. We check is_ip() explicitly to avoid triggering on a - // transient relay fallback during the network switch. - let second = paths - .wait_selected(timeout, |p| { - p.is_ip() && p.remote_addr() != first.remote_addr() - }) - .await - .context("did not switch paths")?; - info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); - - ping_accept(&conn, timeout).await?; - info!("ping done"); - Ok(()) - }, - async move |dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "connection started relayed"); - - // Wait for conn to become direct. - paths - .wait_ip(timeout) - .await - .context("become direct")?; - - // Wait a little more and then switch wifis. - tokio::time::sleep(Duration::from_secs(1)).await; - info!("switch IP uplink"); - dev.replug_iface("eth0", nat3.id()).await?; - - // We don't assert any path changes here, because the remote stays identical, - // and PathInfo does not contain info on local addrs. Instead, the remote - // only accepts our ping after the path changed. - info!("send ping"); - ping_open(&conn, timeout) - .await - .context("failed at ping_open")?; - info!("ping done"); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait until a first direct path is established. + let first = paths.wait_ip(timeout).await?; + info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); + + // Now wait until the direct path changes, which happens after the other endpoint + // changes its uplink. We check is_ip() explicitly to avoid triggering on a + // transient relay fallback during the network switch. + let second = paths + .wait_selected(timeout, |p| { + p.is_ip() && p.remote_addr() != first.remote_addr() + }) + .await + .context("did not switch paths")?; + info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); + + ping_accept(&conn, timeout).await?; + info!("ping done"); + Ok(()) + }) + .client(dev2, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait for conn to become direct. + paths.wait_ip(timeout).await.context("become direct")?; + + // Wait a little more and then switch wifis. + tokio::time::sleep(Duration::from_secs(1)).await; + info!("switch IP uplink"); + dev.replug_iface("eth0", nat3.id()).await?; + + // We don't assert any path changes here, because the remote stays identical, + // and PathInfo does not contain info on local addrs. Instead, the remote + // only accepts our ping after the path changed. + info!("send ping"); + ping_open(&conn, timeout) + .await + .context("failed at ping_open")?; + info!("ping done"); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -177,62 +176,60 @@ async fn switch_uplink_ipv6() -> Result { let dev1 = lab.add_device("dev1").uplink(public.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(home.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "connection started relayed"); - - // Wait until a first direct path is established. - let first = paths - .wait_selected(timeout, |p| { - matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv4()) - }) - .await - .context("did not become direct")?; - info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); - - // Now wait until the direct path changes, which happens after the other endpoint - // changes its uplink. We check is_ip() explicitly to avoid triggering on a - // transient relay fallback during the network switch. - let second = paths - .wait_selected(timeout, |p| { - matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv6()) - }) - .await - .context("did not switch paths to v6")?; - info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); - - ping_accept(&conn, timeout).await?; - info!("ping done"); - Ok(()) - }, - async move |dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "connection started relayed"); - - // Wait for conn to become direct. - paths - .wait_ip(timeout) - .await - .context("become direct")?; - - // Wait a little more and then switch wifis. - tokio::time::sleep(Duration::from_secs(1)).await; - info!("switch IP uplink"); - dev.replug_iface("eth0", mobile.id()).await?; - - // We don't assert any path changes here, because the remote stays identical, - // and PathInfo does not contain info on local addrs. Instead, the remote - // only accepts our ping after the path changed. - info!("send ping"); - ping_open(&conn, timeout) - .await - .context("failed at ping_open")?; - info!("ping done"); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait until a first direct path is established. + let first = paths + .wait_selected( + timeout, + |p| matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv4()), + ) + .await + .context("did not become direct")?; + info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); + + // Now wait until the direct path changes, which happens after the other endpoint + // changes its uplink. We check is_ip() explicitly to avoid triggering on a + // transient relay fallback during the network switch. + let second = paths + .wait_selected( + timeout, + |p| matches!(p.remote_addr(), TransportAddr::Ip(addr) if addr.ip().is_ipv6()), + ) + .await + .context("did not switch paths to v6")?; + info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); + + ping_accept(&conn, timeout).await?; + info!("ping done"); + Ok(()) + }) + .client(dev2, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + + // Wait for conn to become direct. + paths.wait_ip(timeout).await.context("become direct")?; + + // Wait a little more and then switch wifis. + tokio::time::sleep(Duration::from_secs(1)).await; + info!("switch IP uplink"); + dev.replug_iface("eth0", mobile.id()).await?; + + // We don't assert any path changes here, because the remote stays identical, + // and PathInfo does not contain info on local addrs. Instead, the remote + // only accepts our ping after the path changed. + info!("send ping"); + ping_open(&conn, timeout) + .await + .context("failed at ping_open")?; + info!("ping done"); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -269,45 +266,44 @@ async fn change_ifaces() -> Result { dev2.link_down("eth1").await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout) - .await - .context("failed at ping_accept")?; - Ok(()) - }, - async move |dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "connection started relayed"); - let first = paths - .wait_ip(timeout) - .await - .context("did not become direct")?; - info!(addr=?first.remote_addr(), "connection became direct"); - - tokio::time::sleep(Duration::from_secs(1)).await; - - // Bring up the LAN interface to the other ep. - info!("bring up eth1"); - dev.link_up("eth1").await?; - - // Wait for a new direct path to be established. We check is_ip() explicitly - // to avoid triggering on a transient relay fallback during the switch. - let next = paths - .wait_selected(timeout, |p| { - p.is_ip() && p.remote_addr() != first.remote_addr() - }) - .await - .context("did not switch paths")?; - info!(addr=?next.remote_addr(), "new direct path established"); - - ping_open(&conn, timeout) - .await - .context("failed at ping_open")?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout) + .await + .context("failed at ping_accept")?; + Ok(()) + }) + .client(dev2, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "connection started relayed"); + let first = paths + .wait_ip(timeout) + .await + .context("did not become direct")?; + info!(addr=?first.remote_addr(), "connection became direct"); + + tokio::time::sleep(Duration::from_secs(1)).await; + + // Bring up the LAN interface to the other ep. + info!("bring up eth1"); + dev.link_up("eth1").await?; + + // Wait for a new direct path to be established. We check is_ip() explicitly + // to avoid triggering on a transient relay fallback during the switch. + let next = paths + .wait_selected(timeout, |p| { + p.is_ip() && p.remote_addr() != first.remote_addr() + }) + .await + .context("did not switch paths")?; + info!(addr=?next.remote_addr(), "new direct path established"); + + ping_open(&conn, timeout) + .await + .context("failed at ping_open")?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -330,19 +326,18 @@ async fn holepunch_home_nat_one_side() -> Result { let dev1 = lab.add_device("dev1").uplink(nat.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths.wait_ip(timeout).await.context("did not holepunch")?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths.wait_ip(timeout).await.context("did not holepunch")?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -360,22 +355,21 @@ async fn holepunch_cgnat_both() -> Result { let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch through CGNAT")?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch through CGNAT")?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -393,22 +387,21 @@ async fn holepunch_full_cone_both() -> Result { let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch through full cone")?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch through full cone")?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -426,26 +419,25 @@ async fn symmetric_nat_stays_relayed() -> Result { let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - // Ping to verify the relay path works. - ping_open(&conn, timeout).await?; - // Give holepunching time to attempt and fail. - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — symmetric NAT blocks holepunching" - ); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "should start on relay"); + // Ping to verify the relay path works. + ping_open(&conn, timeout).await?; + // Give holepunching time to attempt and fail. + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.selected().is_relay(), + "should still be relayed — symmetric NAT blocks holepunching" + ); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -464,24 +456,23 @@ async fn mixed_home_vs_symmetric_stays_relayed() -> Result { let dev1 = lab.add_device("dev1").uplink(home.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(corp.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — symmetric NAT on one side blocks holepunching" - ); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.selected().is_relay(), + "should still be relayed — symmetric NAT on one side blocks holepunching" + ); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -499,24 +490,23 @@ async fn cloud_nat_stays_relayed() -> Result { let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — cloud symmetric NAT blocks holepunching" - ); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.selected().is_relay(), + "should still be relayed — cloud symmetric NAT blocks holepunching" + ); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -550,22 +540,21 @@ async fn holepunch_double_nat() -> Result { let dev1 = lab.add_device("dev1").uplink(home1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(home2.id()).build().await?; let timeout = Duration::from_secs(15); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch through double NAT")?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch through double NAT")?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -591,24 +580,23 @@ async fn corporate_firewall_relay_only() -> Result { let dev1 = lab.add_device("dev1").uplink(fw.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — corporate firewall blocks UDP" - ); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.selected().is_relay(), + "should still be relayed — corporate firewall blocks UDP" + ); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -638,22 +626,21 @@ async fn holepunch_mobile_3g() -> Result { dev2.set_link_condition("eth0", Some(LinkCondition::Mobile3G), LinkDirection::Both) .await?; let timeout = Duration::from_secs(20); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch over 3G link")?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch over 3G link")?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -683,22 +670,21 @@ async fn holepunch_satellite() -> Result { dev2.set_link_condition("eth0", Some(LinkCondition::Satellite), LinkDirection::Both) .await?; let timeout = Duration::from_secs(20); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch over satellite link")?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch over satellite link")?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -717,40 +703,39 @@ async fn link_outage_recovery() -> Result { let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(15); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await.context("ping 1")?; - ping_accept(&conn, timeout).await.context("ping 2")?; - Ok(()) - }, - async move |dev, _ep, conn| { - let mut paths = conn.paths(); - paths.wait_ip(timeout).await.context("initial holepunch")?; - info!("holepunched, now killing link for 2s"); - - // Take the link down. - dev.link_down("eth0").await?; - tokio::time::sleep(Duration::from_secs(2)).await; - dev.link_up("eth0").await?; - info!("link restored, waiting for recovery"); - - // After link recovery, we should be able to ping — via relay - // fallback or re-established direct path. - ping_open(&conn, Duration::from_secs(20)) - .await - .context("ping after link recovery")?; - info!("connection recovered after link outage"); - - // Eventually the direct path should come back. - paths - .wait_ip(Duration::from_secs(20)) - .await - .context("did not re-establish direct path")?; - ping_open(&conn, timeout).await.context("ping on direct")?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await.context("ping 1")?; + ping_accept(&conn, timeout).await.context("ping 2")?; + Ok(()) + }) + .client(dev2, async move |dev, _ep, conn| { + let mut paths = conn.paths(); + paths.wait_ip(timeout).await.context("initial holepunch")?; + info!("holepunched, now killing link for 2s"); + + // Take the link down. + dev.link_down("eth0").await?; + tokio::time::sleep(Duration::from_secs(5)).await; + dev.link_up("eth0").await?; + info!("link restored, waiting for recovery"); + + // After link recovery, we should be able to ping — via relay + // fallback or re-established direct path. + ping_open(&conn, Duration::from_secs(20)) + .await + .context("ping after link recovery")?; + info!("connection recovered after link outage"); + + // Eventually the direct path should come back. + paths + .wait_ip(Duration::from_secs(20)) + .await + .context("did not re-establish direct path")?; + ping_open(&conn, timeout).await.context("ping on direct")?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -772,24 +757,23 @@ async fn hotel_wifi_relay_only() -> Result { let dev1 = lab.add_device("dev1").uplink(hotel.id()).build().await?; let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; let timeout = Duration::from_secs(10); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — hotel firewall blocks UDP" - ); - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + assert!(paths.selected().is_relay(), "should start on relay"); + ping_open(&conn, timeout).await?; + tokio::time::sleep(Duration::from_secs(8)).await; + assert!( + paths.selected().is_relay(), + "should still be relayed — hotel firewall blocks UDP" + ); + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -819,22 +803,21 @@ async fn holepunch_asymmetric_links() -> Result { dev2.set_link_condition("eth0", Some(LinkCondition::WifiBad), LinkDirection::Both) .await?; let timeout = Duration::from_secs(15); - Pair::new(dev1, dev2, relay_map) - .run( - async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }, - async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch with asymmetric links")?; - ping_open(&conn, timeout).await?; - Ok(()) - }, - ) + Pair::new(relay_map) + .server(dev1, async move |_dev, _ep, conn| { + ping_accept(&conn, timeout).await?; + Ok(()) + }) + .client(dev2, async move |_dev, _ep, conn| { + let mut paths = conn.paths(); + paths + .wait_ip(timeout) + .await + .context("did not holepunch with asymmetric links")?; + ping_open(&conn, timeout).await?; + Ok(()) + }) + .run() .await?; guard.ok(); Ok(()) @@ -941,18 +924,18 @@ async fn run_degrade_level(impaired_side: Side, level: usize) -> Result Result<(RelayMap, AbortOnDropHandle<()>)> { let dc = lab .add_router("dc") @@ -61,127 +68,194 @@ async fn spawn_relay(lab: &Lab) -> Result<(RelayMap, AbortOnDropHandle<()>)> { Ok((relay_map, AbortOnDropHandle::new(task_relay))) } -/// Manages two connected endpoints in the test lab. +/// Type alias for boxed run functions used in [`Pair`]. +type RunFn = Box BoxFuture>; + +/// Builder for two connected endpoints in a lab. +/// +/// Use this to quickly create two endpoints on two different devices and create a +/// connection between them that starts as relay-only. pub struct Pair { - dev1: Device, - dev2: Device, relay_map: RelayMap, + server: Option<(Device, RunFn)>, + client: Option<(Device, RunFn)>, } impl Pair { - pub fn new(dev1: Device, dev2: Device, relay_map: RelayMap) -> Self { + /// Creates a new pair builder with a shared [`RelayMap`]. + pub fn new(relay_map: RelayMap) -> Self { Self { - dev1, - dev2, relay_map, + server: None, + client: None, } } - /// Bind an endpoint on each device and establish a connection between them. - /// - /// `peer1` runs in `dev1`'s namespace as the accepting side. - /// `peer2` runs in `dev2`'s namespace as the connecting side. + /// Sets the server device and run function. + pub fn server(mut self, device: Device, run_fn: F) -> Self + where + F: FnOnce(Device, Endpoint, Connection) -> Fut + Send + 'static, + Fut: Future + Send + 'static, + { + let run_fn: RunFn = + Box::new(move |device, endpoint, conn| Box::pin(run_fn(device, endpoint, conn))); + self.server = Some((device, run_fn)); + self + } + + /// Sets the client device and run function. + pub fn client(mut self, device: Device, run_fn: F) -> Self + where + F: FnOnce(Device, Endpoint, Connection) -> Fut + Send + 'static, + Fut: Future + Send + 'static, + { + let run_fn: RunFn = + Box::new(move |device, endpoint, conn| Box::pin(run_fn(device, endpoint, conn))); + self.client = Some((device, run_fn)); + self + } + + /// Runs the pair to completion. /// - /// A connection is made from `peer1` to `peer2` with a relay-only - /// [`EndpointAddr`], and then the supplied functions are invoked, passing - /// the device, endpoint, and connection to user code. + /// This will bind an endpoint on each device, wait for the server endpoint to be online, + /// then send a relay-only [`EndpointAddr`] to the client task. + /// The client task will connect to the server, and the server will accept a connection. + /// Once a connection is established on either side, its run function is invoked. + /// Once both run functions completed, the endpoints are dropped without awaiting + /// [`Endpoint::close`], so the corresponding ERROR logs are exepcted. /// - /// After a future complete, `peer1` awaits the connection to be closed, - /// whereas `peer2` closes the connection. + /// After completion, this will: + /// - log the result of the run functions + /// - record the endpoint metrics as a `patchbay::_metrics` tracing event + /// - emit an `iroh::_events::test::ok` or `::failed` event for each device /// - /// Afterwards, both endpoints are closed and metrics are recorded through - /// [`Device::record_iroh_metrics`]. Will also emit a debug log with target - /// `patchbay::_events` with the result of the user-supplied work functions. - pub async fn run(self, peer1: F1, peer2: F2) -> Result - where - F1: FnOnce(Device, Endpoint, Connection) -> Fut1 + Send + 'static, - Fut1: Future + Send, - F2: FnOnce(Device, Endpoint, Connection) -> Fut2 + Send + 'static, - Fut2: Future + Send, - { + /// Returns an error if any step or run function failed. + pub async fn run(mut self) -> Result { + let (server_device, server_run) = self + .server + .take() + .context("Missing server initialization")?; + let (client_device, client_run) = self + .client + .take() + .context("Missing client initialization")?; + let (addr_tx, addr_rx) = oneshot::channel(); let relay_map2 = self.relay_map.clone(); - let task1 = self.dev1.spawn(move |dev| { + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + let server_task = server_device.spawn(|dev| { + let barrier = barrier2; async move { let endpoint = endpoint_builder(&dev, relay_map2).bind().await?; + info!(id=%endpoint.id().fmt_short(), bound_sockets=?endpoint.bound_sockets(), "server endpoint bound"); endpoint.online().await; + info!("endpoint online"); + // Send address to client task. Make it a relay-only address, like in the default address lookup services. addr_tx.send(addr_relay_only(endpoint.addr())).unwrap(); let conn = endpoint.accept().await.unwrap().accept().anyerr()?.await?; + info!(remote=%conn.remote_id().fmt_short(), "accepted, executing run function"); watch_selected_path(&conn); - peer1(dev.clone(), endpoint.clone(), conn.clone()).await?; - conn.closed().await; - endpoint.close().await; + let res = server_run(dev.clone(), endpoint.clone(), conn).await; + match &res { + Ok(()) => info!("run function completed successfully"), + Err(err)=> error!("run function failed: {err:#}"), + } + // Wait until the client run function completed before dropping the endpoint. + barrier.wait().await; for group in endpoint.metrics().groups() { dev.record_iroh_metrics(group); } - n0_error::Ok(()) + res } - .instrument(error_span!("ep-acpt")) + .instrument(error_span!("ep-server")) })?; - let task2 = self.dev2.spawn(move |dev| { + let client_task = client_device.spawn(move |dev| { async move { let endpoint = endpoint_builder(&dev, self.relay_map).bind().await?; - let addr = addr_rx.await.unwrap(); + info!(id=%endpoint.id().fmt_short(), bound_sockets=?endpoint.bound_sockets(), "client endpoint bound"); + let addr = addr_rx.await.std_context("server did not send its address")?; + info!(?addr, "connecting to server"); let conn = endpoint.connect(addr, TEST_ALPN).await?; watch_selected_path(&conn); - peer2(dev.clone(), endpoint.clone(), conn).await?; - endpoint.close().await; + info!(remote=%conn.remote_id().fmt_short(), "connected, executing run function"); + let res = client_run(dev.clone(), endpoint.clone(), conn).await; + match &res { + Ok(()) => info!("run function completed successfully"), + Err(err)=> error!("run function failed: {err:#}"), + } + // Wait until the server run function completed before dropping the endpoint. + barrier.wait().await; + // endpoint.close().await; for group in endpoint.metrics().groups() { dev.record_iroh_metrics(group); } - n0_error::Ok(()) + res } - .instrument(error_span!("ep-cnct")) + .instrument(error_span!("ep-client")) })?; - let (res1, res2) = tokio::join!(task1, task2); + let (server_res, client_res) = tokio::join!(server_task, client_task); // Map the results to include the device name, and emit a tracing event within the device context. - let [res1, res2] = [(&self.dev1, res1), (&self.dev2, res2)].map(|(dev, res)| { - let res = match res { - Err(err) => Err(anyerr!(err, "device {} panicked", dev.name())), - Ok(Err(err)) => Err(anyerr!(err, "device {} failed", dev.name())), - Ok(Ok(())) => Ok(()), - }; - let res_str = res.as_ref().map_err(|err| format!("{err:#}")).cloned(); - dev.run_sync(move || { - match res_str { - Ok(()) => { - tracing::event!( - target: "iroh::_events::test_ok", - tracing::Level::INFO, - msg = %"device ok" - ); - } - Err(error) => { - tracing::event!( - target: "iroh::_events::test_failed", - tracing::Level::ERROR, - error, - msg = %"device failed" - ); - } - } - Ok(()) - }) - .ok(); - res - }); - res1?; - res2?; + let [server_res, client_res] = [(&server_device, server_res), (&client_device, client_res)] + .map(|(dev, res)| { + let res = match res { + Err(err) => Err(anyerr!(err, "device {} panicked", dev.name())), + Ok(Err(err)) => Err(anyerr!(err, "device {} failed", dev.name())), + Ok(Ok(())) => Ok(()), + }; + let res_str = res.as_ref().map_err(|err| format!("{err:#}")).cloned(); + log_result_on_device(dev, res_str); + res + }); + server_res?; + client_res?; Ok(()) } } +fn log_result_on_device(dev: &Device, res: Result<(), E>) { + let _ = dev.run_sync(move || { + match res { + Ok(_) => { + tracing::event!( + target: "iroh::_events::test::ok", + tracing::Level::INFO, + msg = %"device ok" + ); + } + Err(error) => { + tracing::event!( + target: "iroh::_events::test::failed", + tracing::Level::ERROR, + %error, + msg = %"device failed" + ); + } + } + Ok(()) + }); +} + /// Extension methods on [`PathWatcher`] for common waiting patterns in tests. #[allow(unused)] pub trait PathWatcherExt { + /// Waits until the selected path fulfills a condition. + /// + /// Calls `f` with the currently-selected path, and again after each path update, + /// until `f` returns true or `timeout` elapses. + /// + /// Returns an error if the timeout elapses before `f` returned true. async fn wait_selected( &mut self, timeout: Duration, f: impl Fn(&PathInfo) -> bool, ) -> Result; + /// Returns the currently selected path. + /// + /// Panics if no patch is marked as selected. fn selected(&mut self) -> PathInfo; /// Wait until the selected path is a direct (IP) path. @@ -223,6 +297,7 @@ impl PathWatcherExt for PathWatcher { } } +/// Opens a bidi stream, sends 8 bytes of data, and waits to receive the same data back. pub async fn ping_open(conn: &Connection, timeout: Duration) -> Result { tokio::time::timeout(timeout, async { let data: [u8; 8] = rand::random(); @@ -237,6 +312,7 @@ pub async fn ping_open(conn: &Connection, timeout: Duration) -> Result { .anyerr()? } +/// Accepts a bidi stream, reads 8 bytes of data, and sends the same data back. pub async fn ping_accept(conn: &Connection, timeout: Duration) -> Result { tokio::time::timeout(timeout, async { let (mut send, mut recv) = conn.accept_bi().await.anyerr()?; @@ -317,8 +393,10 @@ mod relay { }; /// Spawn a relay server bound on `[::]` that accepts both IPv4 and IPv6. - /// Uses `https://relay.test` as the URL — callers must set up lab-wide DNS - /// entries for `relay.test` pointing to the relay's v4 and v6 addresses. + /// + /// The returned [`RelayMap`] uses `https://relay.test` as the relay URL. + /// Callers are responsible for ensuring that a DNS entry for `relay.test` + /// exists and points to the relay's IP addresses. pub async fn run_relay_server() -> Result<(RelayMap, Server), SpawnError> { let bind_ip: IpAddr = Ipv6Addr::UNSPECIFIED.into(); From 3ebee111454984a7e96dcfea1a4d4a58d5fb8cba Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 12:26:38 +0200 Subject: [PATCH 27/35] refactor: remove some patchbay tests --- iroh/tests/patchbay.rs | 465 +---------------------------------------- 1 file changed, 1 insertion(+), 464 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index f7e6760a8b..a68729af39 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -31,7 +31,7 @@ use std::time::Duration; use iroh::{TransportAddr, endpoint::Side}; use n0_error::{Result, StackResultExt, StdResultExt}; use n0_tracing_test::traced_test; -use patchbay::{Firewall, LinkCondition, LinkDirection, LinkLimits, Nat, RouterPreset, TestGuard}; +use patchbay::{LinkCondition, LinkDirection, LinkLimits, Nat, RouterPreset, TestGuard}; use testdir::testdir; use tracing::info; @@ -309,387 +309,6 @@ async fn change_ifaces() -> Result { Ok(()) } -// --- -// NAT type matrix: verify holepunching across different NAT combinations -// --- - -/// One peer behind Home NAT, the other on a public network. -/// Holepunching should succeed: EIM mapping means the public peer can reach -/// the NATted peer's mapped port once it learns the address via relay. -#[tokio::test] -#[traced_test] -#[ignore = "stays relayed, holepunch times out (deadline elapsed)"] -async fn holepunch_home_nat_one_side() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat = lab.add_router("nat").nat(Nat::Home).build().await?; - let public = lab.add_router("public").build().await?; - let dev1 = lab.add_device("dev1").uplink(nat.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths.wait_ip(timeout).await.context("did not holepunch")?; - ping_open(&conn, timeout).await?; - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Both peers behind CGNAT (EIM+EIF). The most permissive real-world NAT. -/// Holepunching should succeed easily since filtering is endpoint-independent. -#[tokio::test] -#[traced_test] -#[ignore = "stays relayed, holepunch times out (deadline elapsed)"] -async fn holepunch_cgnat_both() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat1 = lab.add_router("nat1").nat(Nat::Cgnat).build().await?; - let nat2 = lab.add_router("nat2").nat(Nat::Cgnat).build().await?; - let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch through CGNAT")?; - ping_open(&conn, timeout).await?; - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Both peers behind FullCone NAT (EIM+EIF with hairpin). The most permissive -/// NAT type — any external host can send to the mapped port. Holepunching -/// always succeeds on the first try. -#[tokio::test] -#[traced_test] -async fn holepunch_full_cone_both() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat1 = lab.add_router("nat1").nat(Nat::FullCone).build().await?; - let nat2 = lab.add_router("nat2").nat(Nat::FullCone).build().await?; - let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch through full cone")?; - ping_open(&conn, timeout).await?; - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Both peers behind Corporate (symmetric/EDM) NAT. Each destination gets a -/// different external port, making holepunching impossible. The connection -/// must stay on the relay. -#[tokio::test] -#[traced_test] -async fn symmetric_nat_stays_relayed() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat1 = lab.add_router("nat1").nat(Nat::Corporate).build().await?; - let nat2 = lab.add_router("nat2").nat(Nat::Corporate).build().await?; - let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - // Ping to verify the relay path works. - ping_open(&conn, timeout).await?; - // Give holepunching time to attempt and fail. - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — symmetric NAT blocks holepunching" - ); - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// One peer behind Home NAT (EIM), the other behind Corporate/symmetric NAT -/// (EDM). Holepunching fails because the symmetric side allocates a different -/// port for each destination, so the Home peer's probes never reach the right -/// port. Connection stays relayed. -#[tokio::test] -#[traced_test] -async fn mixed_home_vs_symmetric_stays_relayed() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let home = lab.add_router("home").nat(Nat::Home).build().await?; - let corp = lab.add_router("corp").nat(Nat::Corporate).build().await?; - let dev1 = lab.add_device("dev1").uplink(home.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(corp.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — symmetric NAT on one side blocks holepunching" - ); - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Both peers behind CloudNat (EDM+APDF), the symmetric NAT used by cloud -/// providers (AWS NAT Gateway, GCP Cloud NAT). Same as Corporate: holepunching -/// is impossible, connection stays relayed. -#[tokio::test] -#[traced_test] -async fn cloud_nat_stays_relayed() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat1 = lab.add_router("nat1").nat(Nat::CloudNat).build().await?; - let nat2 = lab.add_router("nat2").nat(Nat::CloudNat).build().await?; - let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — cloud symmetric NAT blocks holepunching" - ); - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Double NAT: device behind a Home router, which itself sits behind an ISP -/// CGNAT router. This is a common real-world scenario (carrier-grade NAT + -/// consumer router). Both NATs use endpoint-independent mapping, so -/// holepunching should succeed. -#[tokio::test] -#[traced_test] -#[ignore = "stays relayed, holepunch times out (deadline elapsed)"] -async fn holepunch_double_nat() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - // ISP-level CGNAT routers - let isp1 = lab.add_router("isp1").nat(Nat::Cgnat).build().await?; - let isp2 = lab.add_router("isp2").nat(Nat::Cgnat).build().await?; - // Home routers behind ISPs - let home1 = lab - .add_router("home1") - .nat(Nat::Home) - .upstream(isp1.id()) - .build() - .await?; - let home2 = lab - .add_router("home2") - .nat(Nat::Home) - .upstream(isp2.id()) - .build() - .await?; - let dev1 = lab.add_device("dev1").uplink(home1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(home2.id()).build().await?; - let timeout = Duration::from_secs(15); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch through double NAT")?; - ping_open(&conn, timeout).await?; - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -// --- -// Firewall and adverse conditions -// --- - -/// Corporate firewall blocks all UDP except DNS (port 53) and only allows TCP -/// on ports 80 and 443. Holepunching is impossible, but the relay connection -/// via HTTPS (TCP 443) must still work. -#[tokio::test] -#[traced_test] -async fn corporate_firewall_relay_only() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let fw = lab - .add_router("fw") - .firewall(Firewall::Corporate) - .build() - .await?; - let public = lab.add_router("public").build().await?; - let dev1 = lab.add_device("dev1").uplink(fw.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — corporate firewall blocks UDP" - ); - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Holepunch through Home NATs with a degraded mobile link (100ms latency, -/// 30ms jitter, 2% loss). Connection should still upgrade to direct despite -/// the poor link quality. -#[tokio::test] -#[traced_test] -async fn holepunch_mobile_3g() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; - let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - let dev1 = lab - .add_device("dev1") - .iface("eth0", nat1.id()) - .build() - .await?; - let dev2 = lab - .add_device("dev2") - .iface("eth0", nat2.id()) - .build() - .await?; - dev1.set_link_condition("eth0", Some(LinkCondition::Mobile3G), LinkDirection::Both) - .await?; - dev2.set_link_condition("eth0", Some(LinkCondition::Mobile3G), LinkDirection::Both) - .await?; - let timeout = Duration::from_secs(20); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch over 3G link")?; - ping_open(&conn, timeout).await?; - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Holepunch through Home NATs on a satellite link (high latency, moderate -/// jitter). Tests that iroh handles high-RTT environments without timing out -/// during NAT traversal. -#[tokio::test] -#[traced_test] -async fn holepunch_satellite() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; - let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - let dev1 = lab - .add_device("dev1") - .iface("eth0", nat1.id()) - .build() - .await?; - let dev2 = lab - .add_device("dev2") - .iface("eth0", nat2.id()) - .build() - .await?; - dev1.set_link_condition("eth0", Some(LinkCondition::Satellite), LinkDirection::Both) - .await?; - dev2.set_link_condition("eth0", Some(LinkCondition::Satellite), LinkDirection::Both) - .await?; - let timeout = Duration::from_secs(20); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch over satellite link")?; - ping_open(&conn, timeout).await?; - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - /// Brief link outage: after holepunching succeeds, the link goes down for 2 /// seconds and comes back up. The connection should recover — either by /// falling back to relay during the outage or by re-establishing the direct @@ -741,88 +360,6 @@ async fn link_outage_recovery() -> Result { Ok(()) } -/// Hotel WiFi: captive-portal firewall allows all outbound TCP but only UDP -/// port 53 (DNS). Similar to corporate firewall but less restrictive on TCP. -/// Relay via HTTPS should work, holepunching should not. -#[tokio::test] -#[traced_test] -async fn hotel_wifi_relay_only() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let hotel = lab - .add_router("hotel") - .preset(RouterPreset::Hotel) - .build() - .await?; - let public = lab.add_router("public").build().await?; - let dev1 = lab.add_device("dev1").uplink(hotel.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(public.id()).build().await?; - let timeout = Duration::from_secs(10); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - assert!(paths.selected().is_relay(), "should start on relay"); - ping_open(&conn, timeout).await?; - tokio::time::sleep(Duration::from_secs(8)).await; - assert!( - paths.selected().is_relay(), - "should still be relayed — hotel firewall blocks UDP" - ); - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - -/// Asymmetric link conditions: one peer on a fast LAN, the other on degraded -/// WiFi. Holepunching should still succeed, and the connection should use -/// the direct path despite the asymmetric quality. -#[tokio::test] -#[traced_test] -async fn holepunch_asymmetric_links() -> Result { - let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; - let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; - let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - let dev1 = lab - .add_device("dev1") - .iface("eth0", nat1.id()) - .build() - .await?; - let dev2 = lab - .add_device("dev2") - .iface("eth0", nat2.id()) - .build() - .await?; - dev1.set_link_condition("eth0", Some(LinkCondition::Lan), LinkDirection::Both) - .await?; - dev2.set_link_condition("eth0", Some(LinkCondition::WifiBad), LinkDirection::Both) - .await?; - let timeout = Duration::from_secs(15); - Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await?; - Ok(()) - }) - .client(dev2, async move |_dev, _ep, conn| { - let mut paths = conn.paths(); - paths - .wait_ip(timeout) - .await - .context("did not holepunch with asymmetric links")?; - ping_open(&conn, timeout).await?; - Ok(()) - }) - .run() - .await?; - guard.ok(); - Ok(()) -} - // --- // Degradation ladder: find where holepunching breaks under worsening conditions // --- From 207fa3f43fb53288eac3c8fc17c2f4e7aab92a54 Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 12:27:18 +0200 Subject: [PATCH 28/35] refactor: improve and harden patchbay tests --- iroh/tests/patchbay.rs | 69 ++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index a68729af39..161319e21c 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -60,12 +60,15 @@ async fn holepunch_simple() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let server = lab.add_device("server").uplink(nat1.id()).build().await?; + let client = lab.add_device("client").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); Pair::new(relay_map) - .server(dev1, async |_dev, _ep, _conn| Ok(())) - .client(dev2, async move |_dev, _ep, conn| { + .server(server, async |_dev, _ep, conn| { + conn.closed().await; + Ok(()) + }) + .client(client, async move |_dev, _ep, conn| { let mut paths = conn.paths(); assert!(paths.selected().is_relay(), "connection started relayed"); paths.wait_ip(timeout).await?; @@ -88,16 +91,16 @@ async fn holepunch_simple() -> Result { #[tokio::test] #[traced_test] #[ignore = "known to still fail"] -async fn switch_uplink() -> Result { +async fn switch_uplink_v4() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; let nat3 = lab.add_router("nat3").nat(Nat::Home).build().await?; - let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let server = lab.add_device("server").uplink(nat1.id()).build().await?; + let client = lab.add_device("client").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(10); Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { + .server(server, async move |_dev, _ep, conn| { let mut paths = conn.paths(); assert!(paths.selected().is_relay(), "connection started relayed"); @@ -118,9 +121,10 @@ async fn switch_uplink() -> Result { ping_accept(&conn, timeout).await?; info!("ping done"); + conn.closed().await; Ok(()) }) - .client(dev2, async move |dev, _ep, conn| { + .client(client, async move |dev, _ep, conn| { let mut paths = conn.paths(); assert!(paths.selected().is_relay(), "connection started relayed"); @@ -155,8 +159,8 @@ async fn switch_uplink() -> Result { /// The test currently fails, but should pass. #[tokio::test] #[traced_test] -#[ignore = "known to still fail"] -async fn switch_uplink_ipv6() -> Result { +#[ignore = "known to still be flaky"] +async fn switch_uplink_v6() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let public = lab .add_router("public") @@ -173,11 +177,11 @@ async fn switch_uplink_ipv6() -> Result { .preset(RouterPreset::IspV6) .build() .await?; - let dev1 = lab.add_device("dev1").uplink(public.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(home.id()).build().await?; + let server = lab.add_device("server").uplink(public.id()).build().await?; + let client = lab.add_device("client").uplink(home.id()).build().await?; let timeout = Duration::from_secs(10); Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { + .server(server, async move |_dev, _ep, conn| { let mut paths = conn.paths(); assert!(paths.selected().is_relay(), "connection started relayed"); @@ -205,9 +209,10 @@ async fn switch_uplink_ipv6() -> Result { ping_accept(&conn, timeout).await?; info!("ping done"); + conn.closed().await; Ok(()) }) - .client(dev2, async move |dev, _ep, conn| { + .client(client, async move |dev, _ep, conn| { let mut paths = conn.paths(); assert!(paths.selected().is_relay(), "connection started relayed"); @@ -250,30 +255,32 @@ async fn change_ifaces() -> Result { let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; // dev2 has two uplinks (wifi=Mobile3G on eth0, LAN on eth1). eth1 starts down. - let dev1 = lab - .add_device("dev1") + let server = lab + .add_device("server") .iface("eth0", nat1.id()) .build() .await?; - let dev2 = lab - .add_device("dev2") + let client = lab + .add_device("client") .iface("eth0", nat2.id()) .iface("eth1", nat1.id()) .build() .await?; - dev2.set_link_condition("eth0", Some(LinkCondition::Mobile4G), LinkDirection::Both) + client + .set_link_condition("eth0", Some(LinkCondition::Mobile4G), LinkDirection::Both) .await?; - dev2.link_down("eth1").await?; + client.link_down("eth1").await?; let timeout = Duration::from_secs(10); Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { + .server(server, async move |_dev, _ep, conn| { ping_accept(&conn, timeout) .await .context("failed at ping_accept")?; + conn.closed().await; Ok(()) }) - .client(dev2, async move |dev, _ep, conn| { + .client(client, async move |dev, _ep, conn| { let mut paths = conn.paths(); assert!(paths.selected().is_relay(), "connection started relayed"); let first = paths @@ -319,16 +326,17 @@ async fn link_outage_recovery() -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - let dev1 = lab.add_device("dev1").uplink(nat1.id()).build().await?; - let dev2 = lab.add_device("dev2").uplink(nat2.id()).build().await?; + let server = lab.add_device("server").uplink(nat1.id()).build().await?; + let client = lab.add_device("client").uplink(nat2.id()).build().await?; let timeout = Duration::from_secs(15); Pair::new(relay_map) - .server(dev1, async move |_dev, _ep, conn| { + .server(server, async move |_dev, _ep, conn| { ping_accept(&conn, timeout).await.context("ping 1")?; ping_accept(&conn, timeout).await.context("ping 2")?; + conn.closed().await; Ok(()) }) - .client(dev2, async move |dev, _ep, conn| { + .client(client, async move |dev, _ep, conn| { let mut paths = conn.paths(); paths.wait_ip(timeout).await.context("initial holepunch")?; info!("holepunched, now killing link for 2s"); @@ -341,14 +349,14 @@ async fn link_outage_recovery() -> Result { // After link recovery, we should be able to ping — via relay // fallback or re-established direct path. - ping_open(&conn, Duration::from_secs(20)) + ping_open(&conn, Duration::from_secs(30)) .await .context("ping after link recovery")?; info!("connection recovered after link outage"); // Eventually the direct path should come back. paths - .wait_ip(Duration::from_secs(20)) + .wait_ip(Duration::from_secs(30)) .await .context("did not re-establish direct path")?; ping_open(&conn, timeout).await.context("ping on direct")?; @@ -436,7 +444,7 @@ async fn run_degrade_level(impaired_side: Side, level: usize) -> Result Result Date: Wed, 1 Apr 2026 12:32:18 +0200 Subject: [PATCH 29/35] fix: incongruency btw log and test --- iroh/tests/patchbay.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 161319e21c..340720d768 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -339,11 +339,11 @@ async fn link_outage_recovery() -> Result { .client(client, async move |dev, _ep, conn| { let mut paths = conn.paths(); paths.wait_ip(timeout).await.context("initial holepunch")?; - info!("holepunched, now killing link for 2s"); - + let downtime = Duration::from_secs(5); + info!("holepunched, now killing link for {downtime:?}"); // Take the link down. dev.link_down("eth0").await?; - tokio::time::sleep(Duration::from_secs(5)).await; + tokio::time::sleep(downtime).await; dev.link_up("eth0").await?; info!("link restored, waiting for recovery"); From a29eb7f96a80819eaf7663d07d8102e98fac8954 Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 12:40:10 +0200 Subject: [PATCH 30/35] improve docs --- .github/workflows/tests.yaml | 2 +- iroh/tests/patchbay.rs | 12 +++++++----- iroh/tests/patchbay/util.rs | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 449bd5ca3a..0b77611b1c 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -25,7 +25,7 @@ env: SCCACHE_CACHE_SIZE: "10G" CRATES_LIST: "iroh,iroh-bench,iroh-dns-server,iroh-relay" IROH_FORCE_STAGING_RELAYS: "1" - NEXTEST_VERSION: "0.9.132" + NEXTEST_VERSION: "0.9.80" jobs: build_and_test_nix: diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index 340720d768..f0f0e56f02 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -83,9 +83,11 @@ async fn holepunch_simple() -> Result { /// Tests that changing the uplink of an interface works (i.e. switching wifis). /// -/// For this we observe a change in the selected path's remote addr on the *other* side. -/// Whether the side that changes interfaces opens a new path or does an RFC9000-style migration -/// is an implementation detail which we won't test for. +/// In this test, the client device switches the uplink of its network interface. +/// We then observe a change in the selected path's remote addr on the server side. +/// How this change carries through in iroh is an implementation detail we don't care for here +/// (whether it's a new path or the same path) but the server must observe the client's +/// new address as the selected path. /// /// The test currently fails, but should pass. #[tokio::test] @@ -154,9 +156,9 @@ async fn switch_uplink_v4() -> Result { /// Tests that changing the uplink from IPv4 to IPv6 works. /// -/// Similar to `switch_uplink` but switches to an IPv6 only network. +/// Similar to [`switch_uplink_v4`] but switches to an IPv6 only network. /// -/// The test currently fails, but should pass. +/// The test currently fails in ~50% of runs, but should pass reliably. #[tokio::test] #[traced_test] #[ignore = "known to still be flaky"] diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index 10e4276993..33dda9b90e 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -122,7 +122,7 @@ impl Pair { /// The client task will connect to the server, and the server will accept a connection. /// Once a connection is established on either side, its run function is invoked. /// Once both run functions completed, the endpoints are dropped without awaiting - /// [`Endpoint::close`], so the corresponding ERROR logs are exepcted. + /// [`Endpoint::close`], so the corresponding ERROR logs are expected. /// /// After completion, this will: /// - log the result of the run functions From 91cfc40d6ded9997955103fb8bc2508ec627460b Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 12:48:51 +0200 Subject: [PATCH 31/35] fixup workflow --- .github/workflows/patchbay.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index e560a3692a..e7769c6fab 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -19,10 +19,7 @@ env: jobs: patchbay_tests: name: Patchbay Tests - permissions: - contents: read - pull-requests: write - timeout-minutes: 45 + timeout-minutes: 15 runs-on: [self-hosted, linux, X64] env: RUSTC_WRAPPER: "sccache" @@ -40,10 +37,20 @@ jobs: with: tool: nextest@${{ env.NEXTEST_VERSION }} + - name: Build patchbay tests + run: cargo nextest run -p iroh --test patchbay --profile patchbay --release --no-run + - name: Run patchbay tests id: tests - run: cargo nextest run --profile patchbay --release --message-format libtest-json-plus > test-results.jsonl + run: cargo nextest run -p iroh --test patchbay --profile patchbay --release env: RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} - NEXTEST_EXPERIMENTAL_LIBTEST_JSON: "1" + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: patchbay-testdir-${{ github.sha }} + path: target/testdir-current/ + retention-days: 7 + if-no-files-found: ignore From 14ebbfe57b310b511497634ffdb1fc4184c786ba Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 12:56:55 +0200 Subject: [PATCH 32/35] fix: use makefile in ci --- .github/workflows/patchbay.yml | 9 ++++----- Makefile.toml | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index e7769c6fab..3b840b8f9f 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -31,18 +31,17 @@ jobs: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: mozilla-actions/sccache-action@v0.0.9 - - - name: Install cargo-nextest + - name: Install cargo-make and cargo-nextest uses: taiki-e/install-action@v2 with: - tool: nextest@${{ env.NEXTEST_VERSION }} + tool: nextest@${{ env.NEXTEST_VERSION }},cargo-make - name: Build patchbay tests - run: cargo nextest run -p iroh --test patchbay --profile patchbay --release --no-run + run: cargo make patchbay --no-run - name: Run patchbay tests id: tests - run: cargo nextest run -p iroh --test patchbay --profile patchbay --release + run: cargo make patchbay env: RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} diff --git a/Makefile.toml b/Makefile.toml index 5d00fc498d..0e62deb53c 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -30,4 +30,4 @@ args = [ [tasks.patchbay] workspace = false command = "cargo" -args = ["nextest", "run", "-p", "iroh", "--test", "patchbay", "--profile", "patchbay", "${@}"] +args = ["nextest", "run", "-p", "iroh", "--features", "qlog", "--test", "patchbay", "--profile", "patchbay", "${@}"] From 83a8d2acb70f24145d353a36509006a2e072a978 Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 13:06:44 +0200 Subject: [PATCH 33/35] fixup nits --- .github/workflows/patchbay.yml | 3 ++- Cargo.lock | 13 ------------- iroh/Cargo.toml | 1 - 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index 3b840b8f9f..48745a647f 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -43,7 +43,8 @@ jobs: id: tests run: cargo make patchbay env: - RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG' }} + RUST_LOG: ${{ runner.debug && 'trace' || 'debug' }} + PATCHBAY_LOG: trace - name: Upload test results if: always() diff --git a/Cargo.lock b/Cargo.lock index fd4ca13595..b33c3e130b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5487,16 +5487,6 @@ dependencies = [ "tracing-core", ] -[[package]] -name = "tracing-serde" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" -dependencies = [ - "serde", - "tracing-core", -] - [[package]] name = "tracing-subscriber" version = "0.3.22" @@ -5507,8 +5497,6 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", - "serde", - "serde_json", "sharded-slab", "smallvec", "thread_local", @@ -5516,7 +5504,6 @@ dependencies = [ "tracing", "tracing-core", "tracing-log", - "tracing-serde", ] [[package]] diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 96dfa17fe8..19b6fce5f1 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -128,7 +128,6 @@ n0-tracing-test = "0.3" clap = { version = "4", features = ["derive"] } tracing-subscriber = { version = "0.3", features = [ "env-filter", - "json", ] } indicatif = { version = "0.18", features = ["tokio"] } parse-size = { version = "1.1.0", features = ['std'] } From 09c545f1b3c792d7b55b139da7d0686b6074a0bb Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Apr 2026 13:12:39 +0200 Subject: [PATCH 34/35] chore: change nextest version --- .github/workflows/patchbay.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/patchbay.yml b/.github/workflows/patchbay.yml index 48745a647f..74b93d85b9 100644 --- a/.github/workflows/patchbay.yml +++ b/.github/workflows/patchbay.yml @@ -14,7 +14,7 @@ env: RUST_BACKTRACE: 1 SCCACHE_CACHE_SIZE: "10G" IROH_FORCE_STAGING_RELAYS: "1" - NEXTEST_VERSION: "0.9.132" + NEXTEST_VERSION: "0.9.80" jobs: patchbay_tests: From 8d91df17e6d79cc655da3fe4026b12eb9a996ba3 Mon Sep 17 00:00:00 2001 From: Frando Date: Thu, 2 Apr 2026 10:14:12 +0200 Subject: [PATCH 35/35] fix: final cleanup, better docs, better errors --- iroh/tests/patchbay.rs | 115 +++++++++++++++++++----------------- iroh/tests/patchbay/util.rs | 32 +++++----- 2 files changed, 74 insertions(+), 73 deletions(-) diff --git a/iroh/tests/patchbay.rs b/iroh/tests/patchbay.rs index f0f0e56f02..d3e9b39d47 100644 --- a/iroh/tests/patchbay.rs +++ b/iroh/tests/patchbay.rs @@ -52,8 +52,11 @@ fn userns_ctor() { // Holepunch tests // --- -/// Simple holepunch: Two devices behind destination-independent NATs, -/// establish via relay, upgrade to direct. +/// Two devices behind destination-independent NATs holepunch a direct connection. +/// +/// Both devices connect through a relay first, then upgrade to a direct path. +/// The client asserts that the connection starts as relayed, then waits for +/// a direct (IP) path to be selected. #[tokio::test] #[traced_test] async fn holepunch_simple() -> Result { @@ -81,15 +84,14 @@ async fn holepunch_simple() -> Result { Ok(()) } -/// Tests that changing the uplink of an interface works (i.e. switching wifis). +/// Switches the client's IPv4 uplink to a different NAT mid-connection. /// -/// In this test, the client device switches the uplink of its network interface. -/// We then observe a change in the selected path's remote addr on the server side. -/// How this change carries through in iroh is an implementation detail we don't care for here -/// (whether it's a new path or the same path) but the server must observe the client's -/// new address as the selected path. +/// The client starts behind `nat2`, holepunches a direct path, then replugs +/// its interface to `nat3`. The server waits until a direct path with a new +/// remote address is selected. We verify with a ping that the new path works. /// -/// The test currently fails, but should pass. +/// Currently ignored because iroh does not yet recover reliably from an +/// uplink switch. #[tokio::test] #[traced_test] #[ignore = "known to still fail"] @@ -154,11 +156,13 @@ async fn switch_uplink_v4() -> Result { Ok(()) } -/// Tests that changing the uplink from IPv4 to IPv6 works. +/// Switches the client's uplink from an IPv4 NAT to an IPv6-only ISP network. /// -/// Similar to [`switch_uplink_v4`] but switches to an IPv6 only network. +/// Similar to [`switch_uplink_v4`], but the client replugs from a Home NAT +/// to an IPv6-only ISP router. The server waits for the selected path to +/// switch from an IPv4 to an IPv6 remote address. /// -/// The test currently fails in ~50% of runs, but should pass reliably. +/// Currently ignored because this fails in roughly half of runs. #[tokio::test] #[traced_test] #[ignore = "known to still be flaky"] @@ -197,6 +201,8 @@ async fn switch_uplink_v6() -> Result { .context("did not become direct")?; info!(addr=?first.remote_addr(), "connection became direct, waiting for path change"); + ping_accept(&conn, timeout).await.context("ping_accept 1")?; + // Now wait until the direct path changes, which happens after the other endpoint // changes its uplink. We check is_ip() explicitly to avoid triggering on a // transient relay fallback during the network switch. @@ -209,7 +215,7 @@ async fn switch_uplink_v6() -> Result { .context("did not switch paths to v6")?; info!(addr=?second.remote_addr(), "connection changed path, wait for ping"); - ping_accept(&conn, timeout).await?; + ping_accept(&conn, timeout).await.context("ping_accept 2")?; info!("ping done"); conn.closed().await; Ok(()) @@ -221,19 +227,15 @@ async fn switch_uplink_v6() -> Result { // Wait for conn to become direct. paths.wait_ip(timeout).await.context("become direct")?; - // Wait a little more and then switch wifis. - tokio::time::sleep(Duration::from_secs(1)).await; + ping_open(&conn, timeout).await.context("ping_open 1")?; + info!("switch IP uplink"); dev.replug_iface("eth0", mobile.id()).await?; // We don't assert any path changes here, because the remote stays identical, // and PathInfo does not contain info on local addrs. Instead, the remote // only accepts our ping after the path changed. - info!("send ping"); - ping_open(&conn, timeout) - .await - .context("failed at ping_open")?; - info!("ping done"); + ping_open(&conn, timeout).await.context("ping_open 2")?; Ok(()) }) .run() @@ -242,13 +244,13 @@ async fn switch_uplink_v6() -> Result { Ok(()) } -/// Test that switching to a faster link works. +/// Adds a faster LAN interface and verifies the path becomes selected. /// -/// Two devices, connected initially over holepunched NAT. Then mid connection -/// device 2 plugs a cable into device 1's router, i.e. they now have a LAN -/// connection. -/// -/// Verify we switch to the LAN connection. +/// The server sits on `nat1`. The client starts on `nat2` with a 4G-impaired +/// link and has a second interface `eth1` connected to `nat1` (a LAN path), +/// but `eth1` starts down. After holepunching over the impaired link, the test +/// brings `eth1` up and waits for the selected path to change to the new, +/// faster LAN address. A ping verifies the new path works. #[tokio::test] #[traced_test] async fn change_ifaces() -> Result { @@ -256,7 +258,7 @@ async fn change_ifaces() -> Result { let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; let nat2 = lab.add_router("nat2").nat(Nat::Home).build().await?; - // dev2 has two uplinks (wifi=Mobile3G on eth0, LAN on eth1). eth1 starts down. + // Client has two uplinks (eth0=4G via nat2, eth1=LAN via nat1). eth1 starts down. let server = lab .add_device("server") .iface("eth0", nat1.id()) @@ -276,9 +278,7 @@ async fn change_ifaces() -> Result { let timeout = Duration::from_secs(10); Pair::new(relay_map) .server(server, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout) - .await - .context("failed at ping_accept")?; + ping_accept(&conn, timeout).await.context("ping_accept")?; conn.closed().await; Ok(()) }) @@ -307,9 +307,7 @@ async fn change_ifaces() -> Result { .context("did not switch paths")?; info!(addr=?next.remote_addr(), "new direct path established"); - ping_open(&conn, timeout) - .await - .context("failed at ping_open")?; + ping_open(&conn, timeout).await.context("ping_open")?; Ok(()) }) .run() @@ -318,10 +316,11 @@ async fn change_ifaces() -> Result { Ok(()) } -/// Brief link outage: after holepunching succeeds, the link goes down for 2 -/// seconds and comes back up. The connection should recover — either by -/// falling back to relay during the outage or by re-establishing the direct -/// path after recovery. +/// Takes the client's link down for five seconds after holepunching, then brings it back. +/// +/// After recovery, the test verifies that we can ping (via relay fallback or +/// a re-established direct path), and then waits for a direct path to be +/// selected again. #[tokio::test] #[traced_test] async fn link_outage_recovery() -> Result { @@ -333,8 +332,8 @@ async fn link_outage_recovery() -> Result { let timeout = Duration::from_secs(15); Pair::new(relay_map) .server(server, async move |_dev, _ep, conn| { - ping_accept(&conn, timeout).await.context("ping 1")?; - ping_accept(&conn, timeout).await.context("ping 2")?; + ping_accept(&conn, timeout).await.context("ping_accept 1")?; + ping_accept(&conn, timeout).await.context("ping_accept 2")?; conn.closed().await; Ok(()) }) @@ -349,11 +348,11 @@ async fn link_outage_recovery() -> Result { dev.link_up("eth0").await?; info!("link restored, waiting for recovery"); - // After link recovery, we should be able to ping — via relay + // After link recovery, we should be able to ping, either via relay // fallback or re-established direct path. ping_open(&conn, Duration::from_secs(30)) .await - .context("ping after link recovery")?; + .context("ping_open after link_up")?; info!("connection recovered after link outage"); // Eventually the direct path should come back. @@ -361,7 +360,9 @@ async fn link_outage_recovery() -> Result { .wait_ip(Duration::from_secs(30)) .await .context("did not re-establish direct path")?; - ping_open(&conn, timeout).await.context("ping on direct")?; + ping_open(&conn, timeout) + .await + .context("ping_open after direct")?; Ok(()) }) .run() @@ -374,11 +375,12 @@ async fn link_outage_recovery() -> Result { // Degradation ladder: find where holepunching breaks under worsening conditions // --- -/// Increasingly degraded link on one side, clean link on the other. -/// Each level adds more latency, loss, and reordering. The test runs each level -/// twice: once with the impaired side accepting, once connecting. +/// Increasingly degraded link conditions applied to one side of the connection. +/// +/// Each level adds more latency, loss, and reordering. The levels are tested +/// individually for both server-side and client-side impairment. const DEGRADE_LEVELS: &[LinkLimits] = &[ - // 0: mild — good wifi + // 0: mild - good wifi LinkLimits { latency_ms: 10, jitter_ms: 5, @@ -388,7 +390,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 1: poor — bad wifi or 3G + // 1: poor - bad wifi or 3G LinkLimits { latency_ms: 100, jitter_ms: 30, @@ -398,7 +400,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 2: bad — congested 3G + // 2: bad - congested 3G LinkLimits { latency_ms: 200, jitter_ms: 60, @@ -408,7 +410,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 3: terrible — barely usable + // 3: terrible - barely usable LinkLimits { latency_ms: 300, jitter_ms: 80, @@ -418,7 +420,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 4: extreme — GEO satellite with heavy loss + // 4: extreme - GEO satellite with heavy loss LinkLimits { latency_ms: 500, jitter_ms: 100, @@ -428,7 +430,7 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ duplicate_pct: 0.0, corrupt_pct: 0.0, }, - // 6: absurd — stress test + // 5: absurd - stress test LinkLimits { latency_ms: 800, jitter_ms: 200, @@ -440,8 +442,11 @@ const DEGRADE_LEVELS: &[LinkLimits] = &[ }, ]; -/// Run a single degradation level: create devices with the given impairment, -/// try to holepunch and ping, return Ok if successful. +/// Runs a single degradation level. +/// +/// Creates two devices behind Home NATs, applies the given [`LinkLimits`] to +/// `impaired_side`, then attempts to holepunch and ping. Returns the +/// [`TestGuard`] on success so the caller can mark it as passed. async fn run_degrade_level(impaired_side: Side, level: usize) -> Result { let (lab, relay_map, _relay_guard, guard) = lab_with_relay(testdir!()).await?; let nat1 = lab.add_router("nat1").nat(Nat::Home).build().await?; @@ -491,7 +496,7 @@ async fn run_degrade_level(impaired_side: Side, level: usize) -> Result tracing::event!( - target: "iroh::_events::test_ladder_pass", + target: "test::_events::ladder_pass", tracing::Level::INFO, level, latency_ms = limits.latency_ms, @@ -501,7 +506,7 @@ async fn run_degrade_level(impaired_side: Side, level: usize) -> Result tracing::event!( - target: "iroh::_events::test_ladder_fail", + target: "test::_events::ladder_fail", tracing::Level::WARN, level, latency_ms = limits.latency_ms, diff --git a/iroh/tests/patchbay/util.rs b/iroh/tests/patchbay/util.rs index 33dda9b90e..50edd01519 100644 --- a/iroh/tests/patchbay/util.rs +++ b/iroh/tests/patchbay/util.rs @@ -10,7 +10,7 @@ use n0_error::{Result, StackResultExt, StdResultExt, anyerr, ensure_any}; use n0_future::{boxed::BoxFuture, task::AbortOnDropHandle}; use patchbay::{Device, IpSupport, Lab, LabOpts, OutDir, TestGuard}; use tokio::sync::{Barrier, oneshot}; -use tracing::{Instrument, debug, error, error_span, info}; +use tracing::{Instrument, debug, error, error_span, event, info}; use self::relay::run_relay_server; @@ -127,7 +127,7 @@ impl Pair { /// After completion, this will: /// - log the result of the run functions /// - record the endpoint metrics as a `patchbay::_metrics` tracing event - /// - emit an `iroh::_events::test::ok` or `::failed` event for each device + /// - emit a `test::_events::pass` or `test::_events::fail` event for each device /// /// Returns an error if any step or run function failed. pub async fn run(mut self) -> Result { @@ -218,21 +218,17 @@ impl Pair { fn log_result_on_device(dev: &Device, res: Result<(), E>) { let _ = dev.run_sync(move || { match res { - Ok(_) => { - tracing::event!( - target: "iroh::_events::test::ok", - tracing::Level::INFO, - msg = %"device ok" - ); - } - Err(error) => { - tracing::event!( - target: "iroh::_events::test::failed", - tracing::Level::ERROR, - %error, - msg = %"device failed" - ); - } + Ok(_) => event!( + target: "test::_events::pass", + tracing::Level::INFO, + msg = %"device passed" + ), + Err(error) => event!( + target: "test::_events::fail", + tracing::Level::ERROR, + %error, + msg = %"device failed" + ), } Ok(()) }); @@ -255,7 +251,7 @@ pub trait PathWatcherExt { /// Returns the currently selected path. /// - /// Panics if no patch is marked as selected. + /// Panics if no path is marked as selected. fn selected(&mut self) -> PathInfo; /// Wait until the selected path is a direct (IP) path.