From 46cc17c7861d8af98f5a16c17ebc49666ef2bbc9 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 28 Mar 2025 11:50:49 +0100 Subject: [PATCH 01/29] New test using tiny topo. --- pkg/snet/multihomed/multihomed_test.go | 144 +++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 pkg/snet/multihomed/multihomed_test.go diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go new file mode 100644 index 0000000000..cd3fc4d94b --- /dev/null +++ b/pkg/snet/multihomed/multihomed_test.go @@ -0,0 +1,144 @@ +package multihomed + +import ( + "context" + "fmt" + "net" + "testing" + "time" + + "github.com/scionproto/scion/pkg/addr" + "github.com/scionproto/scion/pkg/daemon" + "github.com/scionproto/scion/pkg/snet" + "github.com/stretchr/testify/require" +) + +func TestMultihomed(t *testing.T) { + serverAddress := "127.0.0.1:12345" + clientAddress := "127.0.0.1:0" + serverDaemon := "127.0.0.19:30255" // 111 + clientDaemon := "127.0.0.27:30255" // 112 + + serverAddr, err := net.ResolveUDPAddr("udp", serverAddress) + require.NoError(t, err) + clientAddr, err := net.ResolveUDPAddr("udp", clientAddress) + require.NoError(t, err) + + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + { // Server + sd, err := daemon.NewService(serverDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + info, err := sd.ASInfo(ctx, 0) + require.NoError(t, err) + t.Logf("local IA: %s", info.IA.String()) + + interfaces, err := sd.Interfaces(ctx) + require.NoError(t, err) + for k, v := range interfaces { + t.Logf("iface %3d: %s", k, v.String()) + } + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + conn, err := sn.Listen(ctx, "udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + defer conn.Close() + + go func() { + handlePing(t, conn) + }() + } + + { // Client + sd, err := daemon.NewService(clientDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + info, err := sd.ASInfo(ctx, 0) + require.NoError(t, err) + t.Logf("client local IA: %s", info.IA.String()) + + interfaces, err := sd.Interfaces(ctx) + require.NoError(t, err) + for k, v := range interfaces { + t.Logf("iface %3d: %s", k, v.String()) + } + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + completeServerAddrStr := fmt.Sprintf("1-ff00:0:111,[%s]:%d", + serverAddr.IP.String(), + serverAddr.Port) + completeServerAddr, err := snet.ParseUDPAddr(completeServerAddrStr) + require.NoError(t, err) + + paths := getRemote(ctx, t, sd, completeServerAddr.IA, info.IA) + require.Greater(t, len(paths), 0) + completeServerAddr.Path = paths[0].Dataplane() + completeServerAddr.NextHop = paths[0].UnderlayNextHop() + + conn, err := sn.Dial(ctx, "udp", clientAddr, completeServerAddr) + require.NoError(t, err) + _, err = conn.Write([]byte("ping")) + require.NoError(t, err) + + // Read answer. + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + require.NoError(t, err) + require.Equal(t, completeServerAddr.String(), remoteAddr.String()) + buff = buff[:n] + require.Equal(t, "pong", string(buff)) + + err = conn.Close() + require.NoError(t, err) + } +} + +func handlePing(t *testing.T, conn *snet.Conn) { + t.Logf("handlePing conn = %p", conn) + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + t.Logf("handlePing, remote: %s, read %d bytes", remoteAddr, n) + require.NoError(t, err) + buff = buff[:n] + t.Logf("read from %s", remoteAddr) + // Check ping. + require.Equal(t, "ping", string(buff)) + + // Pong. + _, err = conn.WriteTo([]byte("pong"), remoteAddr) + require.NoError(t, err) +} + +func getRemote( + ctx context.Context, + t *testing.T, + sd daemon.Connector, + remote, local addr.IA, +) []snet.Path { + paths, err := sd.Paths(ctx, remote, local, daemon.PathReqFlags{}) + require.NoError(t, err) + return paths +} From 300b45d6593e98cea5bed771e4b09910d70edc13 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 28 Mar 2025 12:08:55 +0100 Subject: [PATCH 02/29] Refactor test. --- pkg/snet/multihomed/multihomed_test.go | 214 ++++++++++++++----------- 1 file changed, 122 insertions(+), 92 deletions(-) diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index cd3fc4d94b..f2a8783b9f 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -9,111 +9,138 @@ import ( "github.com/scionproto/scion/pkg/addr" "github.com/scionproto/scion/pkg/daemon" + "github.com/scionproto/scion/pkg/private/xtest" "github.com/scionproto/scion/pkg/snet" "github.com/stretchr/testify/require" ) +var ( + serverIA = "1-ff00:0:111" + serverAddress = "127.0.0.1:12345" + clientAddress = "127.0.0.1:0" + serverDaemon = "127.0.0.19:30255" // 111 + clientDaemon = "127.0.0.27:30255" // 112 +) + +func TestBasic(t *testing.T) { + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) + clientAddr := xtest.MustParseUDPAddr(t, clientAddress) + + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + runServerAt(ctx, t, serverAddr) + runClientWith(ctx, t, serverAddr, clientAddr) +} + func TestMultihomed(t *testing.T) { - serverAddress := "127.0.0.1:12345" - clientAddress := "127.0.0.1:0" - serverDaemon := "127.0.0.19:30255" // 111 - clientDaemon := "127.0.0.27:30255" // 112 + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) + clientAddr := xtest.MustParseUDPAddr(t, clientAddress) + + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + runServerAt(ctx, t, serverAddr) + runClientWith(ctx, t, serverAddr, clientAddr) +} - serverAddr, err := net.ResolveUDPAddr("udp", serverAddress) +func runServerAt( + ctx context.Context, + t *testing.T, + serverAddr *net.UDPAddr, +) { + sd, err := daemon.NewService(serverDaemon).Connect(ctx) require.NoError(t, err) - clientAddr, err := net.ResolveUDPAddr("udp", clientAddress) + defer sd.Close() + + info, err := sd.ASInfo(ctx, 0) require.NoError(t, err) + t.Logf("local IA: %s", info.IA.String()) - ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) - defer cancelF() + interfaces, err := sd.Interfaces(ctx) + require.NoError(t, err) + for k, v := range interfaces { + t.Logf("iface %3d: %s", k, v.String()) + } + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + conn, err := sn.Listen(ctx, "udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) - { // Server - sd, err := daemon.NewService(serverDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - info, err := sd.ASInfo(ctx, 0) - require.NoError(t, err) - t.Logf("local IA: %s", info.IA.String()) - - interfaces, err := sd.Interfaces(ctx) - require.NoError(t, err) - for k, v := range interfaces { - t.Logf("iface %3d: %s", k, v.String()) - } - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) - - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, - } - - conn, err := sn.Listen(ctx, "udp", serverAddr) - require.NoError(t, err) - require.NotNil(t, conn) - defer conn.Close() - - go func() { - handlePing(t, conn) - }() + go func() { + handlePing(t, conn) + }() + t.Log("runServerAt: done") +} + +func runClientWith( + ctx context.Context, + t *testing.T, + serverAddr *net.UDPAddr, + clientAddr *net.UDPAddr, +) { + sd, err := daemon.NewService(clientDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + info, err := sd.ASInfo(ctx, 0) + require.NoError(t, err) + t.Logf("client local IA: %s", info.IA.String()) + + interfaces, err := sd.Interfaces(ctx) + require.NoError(t, err) + for k, v := range interfaces { + t.Logf("iface %3d: %s", k, v.String()) } - { // Client - sd, err := daemon.NewService(clientDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - info, err := sd.ASInfo(ctx, 0) - require.NoError(t, err) - t.Logf("client local IA: %s", info.IA.String()) - - interfaces, err := sd.Interfaces(ctx) - require.NoError(t, err) - for k, v := range interfaces { - t.Logf("iface %3d: %s", k, v.String()) - } - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) - - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, - } - - completeServerAddrStr := fmt.Sprintf("1-ff00:0:111,[%s]:%d", - serverAddr.IP.String(), - serverAddr.Port) - completeServerAddr, err := snet.ParseUDPAddr(completeServerAddrStr) - require.NoError(t, err) - - paths := getRemote(ctx, t, sd, completeServerAddr.IA, info.IA) - require.Greater(t, len(paths), 0) - completeServerAddr.Path = paths[0].Dataplane() - completeServerAddr.NextHop = paths[0].UnderlayNextHop() - - conn, err := sn.Dial(ctx, "udp", clientAddr, completeServerAddr) - require.NoError(t, err) - _, err = conn.Write([]byte("ping")) - require.NoError(t, err) - - // Read answer. - buff := make([]byte, 2048) - n, remoteAddr, err := conn.ReadFrom(buff) - require.NoError(t, err) - require.Equal(t, completeServerAddr.String(), remoteAddr.String()) - buff = buff[:n] - require.Equal(t, "pong", string(buff)) - - err = conn.Close() - require.NoError(t, err) + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, } + + completeServerAddrStr := fmt.Sprintf("%s,[%s]:%d", + serverIA, + serverAddr.IP.String(), + serverAddr.Port) + completeServerAddr, err := snet.ParseUDPAddr(completeServerAddrStr) + require.NoError(t, err) + + paths := getRemote(ctx, t, sd, completeServerAddr.IA, info.IA) + require.Greater(t, len(paths), 0) + completeServerAddr.Path = paths[0].Dataplane() + completeServerAddr.NextHop = paths[0].UnderlayNextHop() + + conn, err := sn.Dial(ctx, "udp", clientAddr, completeServerAddr) + require.NoError(t, err) + _, err = conn.Write([]byte("ping")) + require.NoError(t, err) + t.Log("runClientWith: ping sent") + + // Read answer. + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + require.NoError(t, err) + require.Equal(t, completeServerAddr.String(), remoteAddr.String()) + buff = buff[:n] + require.Equal(t, "pong", string(buff)) + + err = conn.Close() + require.NoError(t, err) } func handlePing(t *testing.T, conn *snet.Conn) { @@ -130,6 +157,9 @@ func handlePing(t *testing.T, conn *snet.Conn) { // Pong. _, err = conn.WriteTo([]byte("pong"), remoteAddr) require.NoError(t, err) + + err = conn.Close() + require.NoError(t, err) } func getRemote( From c4c1a3a0b7dbe8935ae43e06bce136b31e0b4221 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 28 Mar 2025 12:14:43 +0100 Subject: [PATCH 03/29] First phase of multihoming test. --- pkg/snet/multihomed/multihomed_test.go | 40 +++++++++++++++++++++----- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index f2a8783b9f..4def931cf4 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -40,7 +40,7 @@ func TestMultihomed(t *testing.T) { ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) defer cancelF() - runServerAt(ctx, t, serverAddr) + runMultihomedServer(ctx, t, serverAddr.Port) runClientWith(ctx, t, serverAddr, clientAddr) } @@ -53,16 +53,41 @@ func runServerAt( require.NoError(t, err) defer sd.Close() - info, err := sd.ASInfo(ctx, 0) + topo, err := daemon.LoadTopology(ctx, sd) require.NoError(t, err) - t.Logf("local IA: %s", info.IA.String()) - interfaces, err := sd.Interfaces(ctx) - require.NoError(t, err) - for k, v := range interfaces { - t.Logf("iface %3d: %s", k, v.String()) + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, } + conn, err := sn.Listen(ctx, "udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + + go func() { + handlePing(t, conn) + }() + t.Log("runServerAt: done") +} + +func runMultihomedServer( + ctx context.Context, + t *testing.T, + port int, +) { + sd, err := daemon.NewService(serverDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + // interfaces, err := sd.Interfaces(ctx) + // require.NoError(t, err) + // for k, v := range interfaces { + // t.Logf("iface %3d: %s", k, v.String()) + // } + topo, err := daemon.LoadTopology(ctx, sd) require.NoError(t, err) @@ -73,6 +98,7 @@ func runServerAt( Topology: topo, } + serverAddr := xtest.MustParseUDPAddr(t, fmt.Sprintf("0.0.0.0:%d", port)) conn, err := sn.Listen(ctx, "udp", serverAddr) require.NoError(t, err) require.NotNil(t, conn) From 04cad6bcd685c27611b2beef0840a35cbe6c2678 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 28 Mar 2025 12:44:04 +0100 Subject: [PATCH 04/29] Temporary changes. --- pkg/snet/conn.go | 8 ++++---- pkg/snet/multihomed/multihomed_test.go | 3 ++- pkg/snet/snet.go | 7 ++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/snet/conn.go b/pkg/snet/conn.go index ad94cb6199..c7ba0cb9b5 100644 --- a/pkg/snet/conn.go +++ b/pkg/snet/conn.go @@ -22,7 +22,6 @@ import ( "github.com/scionproto/scion/pkg/private/common" "github.com/scionproto/scion/pkg/private/ctrl/path_mgmt" - "github.com/scionproto/scion/pkg/private/serrors" "github.com/scionproto/scion/pkg/slayers" ) @@ -72,9 +71,10 @@ func NewCookedConn( IA: topo.LocalIA, Host: pconn.LocalAddr().(*net.UDPAddr), } - if local.Host == nil || local.Host.IP.IsUnspecified() { - return nil, serrors.New("nil or unspecified address is not supported.") - } + // deleteme + // if local.Host == nil || local.Host.IP.IsUnspecified() { + // return nil, serrors.New("nil or unspecified address is not supported.") + // } hasSTUN := hasSTUNConn(pconn) return &Conn{ conn: pconn, diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index 4def931cf4..9a3cb2eb51 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -37,7 +37,8 @@ func TestMultihomed(t *testing.T) { serverAddr := xtest.MustParseUDPAddr(t, serverAddress) clientAddr := xtest.MustParseUDPAddr(t, clientAddress) - ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + // ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Hour) // deleteme defer cancelF() runMultihomedServer(ctx, t, serverAddr.Port) diff --git a/pkg/snet/snet.go b/pkg/snet/snet.go index 386d946096..59a80d624a 100644 --- a/pkg/snet/snet.go +++ b/pkg/snet/snet.go @@ -101,9 +101,10 @@ type SCIONNetwork struct { func (n *SCIONNetwork) OpenRaw(ctx context.Context, addr *net.UDPAddr) (PacketConn, error) { var pconn *net.UDPConn var err error - if addr == nil || addr.IP.IsUnspecified() { - return nil, serrors.New("nil or unspecified address is not supported") - } + // deleteme + // if addr == nil || addr.IP.IsUnspecified() { + // return nil, serrors.New("nil or unspecified address is not supported") + // } start, end := n.Topology.PortRange.Start, n.Topology.PortRange.End if addr.Port == 0 { pconn, err = listenUDPRange(addr, start, end) From 7b2db98059eda9fc17ed2fecc01966b882be895c Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Wed, 3 Sep 2025 10:45:12 +0200 Subject: [PATCH 05/29] Multihomed behavior through UDP socket creation. Via the creation of a UDP socket with destination the next hop, those snet sockets that were created without binding them to a local address, can now be used regularly. There is a additional cache that allows skipping the UDP socket creation if the address to dial to has been previously seen. There is a recurrent refresh function running in the background that checks that the host interfaces' local addresses didn't change, and if they did, clears the cache. --- pkg/snet/BUILD.bazel | 1 + pkg/snet/conn.go | 20 +- pkg/snet/multihomed/BUILD.bazel | 31 +++ pkg/snet/multihomed/export_test.go | 49 ++++ pkg/snet/multihomed/integrated_test.go | 318 +++++++++++++++++++++++++ pkg/snet/multihomed/interfaces.go | 143 +++++++++++ pkg/snet/multihomed/multihomed_test.go | 252 +++++++------------- pkg/snet/multihomed/outbound_addr.go | 67 ++++++ pkg/snet/reader.go | 32 --- pkg/snet/snet.go | 25 +- pkg/snet/writer.go | 13 + 11 files changed, 733 insertions(+), 218 deletions(-) create mode 100644 pkg/snet/multihomed/BUILD.bazel create mode 100644 pkg/snet/multihomed/export_test.go create mode 100644 pkg/snet/multihomed/integrated_test.go create mode 100644 pkg/snet/multihomed/interfaces.go create mode 100644 pkg/snet/multihomed/outbound_addr.go diff --git a/pkg/snet/BUILD.bazel b/pkg/snet/BUILD.bazel index 4307d2b1b3..a3eaee135b 100644 --- a/pkg/snet/BUILD.bazel +++ b/pkg/snet/BUILD.bazel @@ -38,6 +38,7 @@ go_library( "//pkg/slayers/path/epic:go_default_library", "//pkg/slayers/path/onehop:go_default_library", "//pkg/slayers/path/scion:go_default_library", + "//pkg/snet/multihomed:go_default_library", "//pkg/stun:go_default_library", "//private/topology:go_default_library", "//private/topology/underlay:go_default_library", diff --git a/pkg/snet/conn.go b/pkg/snet/conn.go index c7ba0cb9b5..0634b9e34b 100644 --- a/pkg/snet/conn.go +++ b/pkg/snet/conn.go @@ -22,7 +22,9 @@ import ( "github.com/scionproto/scion/pkg/private/common" "github.com/scionproto/scion/pkg/private/ctrl/path_mgmt" + "github.com/scionproto/scion/pkg/private/serrors" "github.com/scionproto/scion/pkg/slayers" + "github.com/scionproto/scion/pkg/snet/multihomed" ) type OpError struct { @@ -71,11 +73,21 @@ func NewCookedConn( IA: topo.LocalIA, Host: pconn.LocalAddr().(*net.UDPAddr), } - // deleteme - // if local.Host == nil || local.Host.IP.IsUnspecified() { - // return nil, serrors.New("nil or unspecified address is not supported.") - // } hasSTUN := hasSTUNConn(pconn) + + // If acting as a client, find the local address of the interface used to reach + // the NextHop toward the remote. + if local.Host.IP.IsUnspecified() && o.remote != nil { + localIP, err := multihomed.OutboundIP(o.remote.NextHop) + if err != nil { + return nil, serrors.Wrap( + "Cannot find the local address", err, + "local", local, + "remote", o.remote) + } + local.Host.IP = localIP + } + return &Conn{ conn: pconn, local: local, diff --git a/pkg/snet/multihomed/BUILD.bazel b/pkg/snet/multihomed/BUILD.bazel new file mode 100644 index 0000000000..05e9f9d12b --- /dev/null +++ b/pkg/snet/multihomed/BUILD.bazel @@ -0,0 +1,31 @@ +load("@rules_go//go:def.bzl", "go_library") +load("//tools:go.bzl", "go_test") + +go_library( + name = "go_default_library", + srcs = [ + "interfaces.go", + "outbound_addr.go", + ], + importpath = "github.com/scionproto/scion/pkg/snet/multihomed", + visibility = ["//visibility:public"], + deps = ["//pkg/private/serrors:go_default_library"], +) + +go_test( + name = "go_default_test", + srcs = [ + "export_test.go", + "integrated_test.go", + "multihomed_test.go", + ], + embed = [":go_default_library"], + deps = [ + "//pkg/addr:go_default_library", + "//pkg/daemon:go_default_library", + "//pkg/private/xtest:go_default_library", + "//pkg/snet:go_default_library", + "@com_github_stretchr_testify//require:go_default_library", + "@com_github_stretchr_testify//suite:go_default_library", + ], +) diff --git a/pkg/snet/multihomed/export_test.go b/pkg/snet/multihomed/export_test.go new file mode 100644 index 0000000000..530f6bb8a7 --- /dev/null +++ b/pkg/snet/multihomed/export_test.go @@ -0,0 +1,49 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed + +import ( + "net/netip" + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +func MustGetEgressIpAddresses(t *testing.T) []netip.Addr { + addrs, err := egressIpAddresses() + require.NoError(t, err) + return addrs +} + +func GetInternalMutex() *sync.RWMutex { + return &muRemoteToEgress +} + +func StopTicker() { + ticker.Stop() +} + +func GetRemoteToEgressMap() map[netip.Addr]netip.Addr { + return remoteToEgress +} + +func ReplaceRemoteToEgressMap(newMap map[netip.Addr]netip.Addr) { + remoteToEgress = newMap +} + +func GetEgressesLastState() *[]netip.Addr { + return &egressesLocalAddresses +} diff --git a/pkg/snet/multihomed/integrated_test.go b/pkg/snet/multihomed/integrated_test.go new file mode 100644 index 0000000000..95a661b605 --- /dev/null +++ b/pkg/snet/multihomed/integrated_test.go @@ -0,0 +1,318 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed_test + +import ( + "context" + "fmt" + "net" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/scionproto/scion/pkg/addr" + "github.com/scionproto/scion/pkg/daemon" + "github.com/scionproto/scion/pkg/private/xtest" + "github.com/scionproto/scion/pkg/snet" +) + +// Using test suite declared at multihomed_test.go + +// XXX(juagargi) This is not a valid unit test, should be moved to some kind of +// integration test run within a docker container. For now: +// +// Generate the tiny topology with IPv4 only: +// ./scion.sh stop ; rm -r gen/ +// ./scion.sh topology -c topology/tiny4.topo +// ./scion.sh start +// Then run the tests: +// go test ./pkg/snet/multihomed -count=1 -v -run TestUDP +// go test ./pkg/snet/multihomed -count=1 -v -run TestBasic +// go test ./pkg/snet/multihomed -count=1 -v -run TestMultihomed +var ( + serverIA = "1-ff00:0:111" + serverIPAddress = "127.0.0.1" + serverPort = "12345" + serverAddress = serverIPAddress + ":" + serverPort + clientAddress = "127.0.0.1:0" + serverDaemon = "127.0.0.19:30255" // 111 + clientDaemon = "127.0.0.27:30255" // 112 +) + +func (s *MultihomedTestSuite) TestUDP() { + t := s.T() + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + // Bind server to any interface. + serverAddr := xtest.MustParseUDPAddr(t, "0.0.0.0:"+serverPort) + + runUDPServerAt(ctx, t, serverAddr) + runUDPClientWith(ctx, t, serverAddr, nil) +} + +// TestNoRegressionCheck checks that using bound sockets (like before "multihomed" changes) +// works as expected. +func (s *MultihomedTestSuite) TestNoRegressionCheck() { + t := s.T() + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) + clientAddr := xtest.MustParseUDPAddr(t, clientAddress) + + runServerAt(ctx, t, serverAddr) + runClientWith(ctx, t, serverAddr, clientAddr) +} + +// TestMultihomedServer temporary documentation. +// This is a test intended to debug the multihomed scion socket, remove it and make it +// an integration test. +// A multihomed socket is bound to several interfaces, or conversely to an address that spans +// multiple interfaces. +// Multihomed sockets are necessary to get connectivity via several interfaces, +// 1. At the RX side, listening to both cellular and ethernet WAN interfaces: necessary for handling +// failover one another. +// 2. At the TX side, the ability to use several paths that start on different local interfaces +// relies on a multihomed socket +func (s *MultihomedTestSuite) TestMultihomedServer() { + t := s.T() + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) + + runMultihomedServer(ctx, t, serverAddr.Port) + runClientWith(ctx, t, serverAddr, nil) +} + +func runUDPServerAt( + ctx context.Context, + t *testing.T, + serverAddr *net.UDPAddr, +) { + conn, err := net.ListenUDP("udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + + // Run in a different thread so that the creation of the server finishes without blocking. + go func() { + handleUDPPing(t, conn) + }() + + t.Log("runUDPServerAt: done") +} + +func runServerAt( + ctx context.Context, + t *testing.T, + serverAddr *net.UDPAddr, +) { + sd, err := daemon.NewService(serverDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + conn, err := sn.Listen(ctx, "udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + + // Run in a different thread so that the creation of the server finishes without blocking. + go func() { + handlePing(t, conn) + }() + t.Log("runServerAt: done") +} + +func runMultihomedServer( + ctx context.Context, + t *testing.T, + port int, +) { + sd, err := daemon.NewService(serverDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + serverAddr := xtest.MustParseUDPAddr(t, fmt.Sprintf("0.0.0.0:%d", port)) + conn, err := sn.Listen(ctx, "udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + + go func() { + handlePing(t, conn) + }() + t.Log("runServerAt: done") +} + +func runUDPClientWith( + _ context.Context, + t *testing.T, + serverAddr *net.UDPAddr, + clientAddr *net.UDPAddr, +) { + conn, err := net.DialUDP("udp", clientAddr, serverAddr) + require.NoError(t, err) + + _, err = conn.Write([]byte("ping")) + require.NoError(t, err) + t.Logf(" ---> runUDPClientWith: ping sent") + + // Read answer + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + t.Logf(" <--- runUDPClientWith: pong received. Err? %v, remote: %s read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + + buff = buff[:n] + require.Equal(t, "pong", string(buff)) + + err = conn.Close() + require.NoError(t, err) +} + +func runClientWith( + ctx context.Context, + t *testing.T, + serverAddr *net.UDPAddr, + clientAddr *net.UDPAddr, +) { + sd, err := daemon.NewService(clientDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + info, err := sd.ASInfo(ctx, 0) + require.NoError(t, err) + t.Logf("client local IA: %s", info.IA.String()) + + interfaces, err := sd.Interfaces(ctx) + require.NoError(t, err) + for k, v := range interfaces { + t.Logf("iface %3d: %s", k, v.String()) + } + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + completeServerAddrStr := fmt.Sprintf("%s,[%s]:%d", + serverIA, + serverAddr.IP.String(), + serverAddr.Port) + completeServerAddr, err := snet.ParseUDPAddr(completeServerAddrStr) + require.NoError(t, err) + + paths := getRemote(ctx, t, sd, completeServerAddr.IA, info.IA) + require.Greater(t, len(paths), 0) + completeServerAddr.Path = paths[0].Dataplane() + completeServerAddr.NextHop = paths[0].UnderlayNextHop() + + conn, err := sn.Dial(ctx, "udp", clientAddr, completeServerAddr) + require.NoError(t, err) + _, err = conn.Write([]byte("ping")) + require.NoError(t, err) + t.Logf(" ---> runUDPClientWith: ping sent") + + // Read answer. + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + t.Logf(" <--- runClientWith: pong received. Err? %v, remote: %s read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + require.Equal(t, completeServerAddr.String(), remoteAddr.String()) + buff = buff[:n] + require.Equal(t, "pong", string(buff)) + + err = conn.Close() + require.NoError(t, err) +} + +func handlePing(t *testing.T, conn *snet.Conn) { + t.Logf("handlePing conn = %p", conn) + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + t.Logf(" ---> handlePing. Err? %v, remote: %s, read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + buff = buff[:n] + t.Logf("read from %s", remoteAddr) + // Check ping. + require.Equal(t, "ping", string(buff)) + + // Pong. + _, err = conn.WriteTo([]byte("pong"), remoteAddr) + require.NoError(t, err) + + err = conn.Close() + require.NoError(t, err) +} + +func handleUDPPing(t *testing.T, conn *net.UDPConn) { + t.Logf("handleUDPPing conn = %p", conn) + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFromUDP(buff) + t.Logf(" <--- handleUDPPing. Err? %v, remote: %s, read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + + buff = buff[:n] + t.Logf("read from %s", remoteAddr) + // Check ping. + require.Equal(t, "ping", string(buff)) + + // Pong. + _, err = conn.WriteTo([]byte("pong"), remoteAddr) + require.NoError(t, err) + + err = conn.Close() + require.NoError(t, err) +} + +func getRemote( + ctx context.Context, + t *testing.T, + sd daemon.Connector, + remote, local addr.IA, +) []snet.Path { + paths, err := sd.Paths(ctx, remote, local, daemon.PathReqFlags{}) + require.NoError(t, err) + return paths +} diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go new file mode 100644 index 0000000000..5fcf2a090c --- /dev/null +++ b/pkg/snet/multihomed/interfaces.go @@ -0,0 +1,143 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed + +// The reasons to keep track of the current local addresses are that these two things can happen: +// 1. The interface is brought down and can't be used anymore. This requires the new +// packet to be sent using a different interface, if any is available. +// 2. The interface changed address, which needs us to update the tables and record the +// new address in use. +// The second event deals with the address only, without touching the routing table, while the +// first one modifies the routing table. For our purposes, both events modify the local address +// of the interface, and for both events the solution is to query the kernel again. +// This is why on the event of any address change, we completely clear the table, forcing +// the caller to perform a syscall to find the appropriate route. + +// XXX(juagargi): The right way to keep this routing information updated is to use netlink. +// We however just keep a cache of the last used remote addresses mapped to our interfaces' +// local addresses. Additionally, if the current interfaces' local addresses change, we +// completely clear the cache. + +import ( + "fmt" + "net" + "net/netip" + "os" + "sort" + "sync" + "time" + + "github.com/scionproto/scion/pkg/private/serrors" +) + +const ( + CheckInterfacesPeriod = time.Second +) + +var ( + remoteToEgress map[netip.Addr]netip.Addr = make(map[netip.Addr]netip.Addr) + muRemoteToEgress sync.RWMutex = sync.RWMutex{} + ticker = time.NewTicker(CheckInterfacesPeriod) + egressesLocalAddresses = make([]netip.Addr, 0) +) + +func init() { + go continuousCheckInterfaces() +} + +func continuousCheckInterfaces() { + for ; ; <-ticker.C { + clearCacheIfLocalChanges(&egressesLocalAddresses) + } +} + +func clearCacheIfLocalChanges(lastState *[]netip.Addr) { + addrs := getInterfacesLocalAddresses() + if addrs == nil { + // Internal error, bail. + return + } + + // Compare with previous result. + if equalAddressList(addrs, *lastState) { + // They are the same, bail. + return + } + + // Not equal, invalidate every entry. + invalidateAll() + // And store previous state. + *lastState = addrs +} + +func getInterfacesLocalAddresses() []netip.Addr { + // We only look at the local addresses. If they are not identical to the last call, + // remove all entries from the map, forcing the callers to obtain a new routed egress. + addrs, err := egressIpAddresses() + if err != nil { + // What do we do in this case? + // We should at least log the error and erase all entries in the table. + fmt.Fprintf(os.Stderr, "cannot list the network interfaces and their addresses: %s", err) + invalidateAll() + return nil + } + // Sort the result. + sort.Slice(addrs, func(i, j int) bool { + return addrs[i].Compare(addrs[j]) < 0 + }) + return addrs +} + +func equalAddressList(a, b []netip.Addr) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i].Compare(b[i]) != 0 { + return false + } + } + return true +} + +func invalidateAll() { + muRemoteToEgress.Lock() + defer muRemoteToEgress.Unlock() + remoteToEgress = make(map[netip.Addr]netip.Addr) +} + +func egressIpAddresses() ([]netip.Addr, error) { + interfaces, err := net.Interfaces() + if err != nil { + return nil, serrors.Wrap("listing interfaces", err) + } + ipAddrs := make([]netip.Addr, 0, len(interfaces)) + + for _, iface := range interfaces { + addrs, err := iface.Addrs() + if err != nil { + return nil, serrors.Wrap("getting interface addresses", err, "interface", iface.Name) + } + for _, addr := range addrs { + ipAddr, ok := addr.(*net.IPNet) + if ok { + a, _ := netip.AddrFromSlice(ipAddr.IP) + ipAddrs = append(ipAddrs, a) + } + } + } + + return ipAddrs, nil +} diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index 9a3cb2eb51..97f64d28a1 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -1,201 +1,109 @@ -package multihomed +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed_test import ( - "context" - "fmt" - "net" + "net/netip" + "sync" "testing" "time" - "github.com/scionproto/scion/pkg/addr" - "github.com/scionproto/scion/pkg/daemon" - "github.com/scionproto/scion/pkg/private/xtest" - "github.com/scionproto/scion/pkg/snet" "github.com/stretchr/testify/require" -) + "github.com/stretchr/testify/suite" -var ( - serverIA = "1-ff00:0:111" - serverAddress = "127.0.0.1:12345" - clientAddress = "127.0.0.1:0" - serverDaemon = "127.0.0.19:30255" // 111 - clientDaemon = "127.0.0.27:30255" // 112 + "github.com/scionproto/scion/pkg/private/xtest" + "github.com/scionproto/scion/pkg/snet/multihomed" ) -func TestBasic(t *testing.T) { - serverAddr := xtest.MustParseUDPAddr(t, serverAddress) - clientAddr := xtest.MustParseUDPAddr(t, clientAddress) - - ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) - defer cancelF() - - runServerAt(ctx, t, serverAddr) - runClientWith(ctx, t, serverAddr, clientAddr) -} - func TestMultihomed(t *testing.T) { - serverAddr := xtest.MustParseUDPAddr(t, serverAddress) - clientAddr := xtest.MustParseUDPAddr(t, clientAddress) - - // ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) - ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Hour) // deleteme - defer cancelF() - - runMultihomedServer(ctx, t, serverAddr.Port) - runClientWith(ctx, t, serverAddr, clientAddr) + suite.Run(t, NewMultihomedTestSuite()) } -func runServerAt( - ctx context.Context, - t *testing.T, - serverAddr *net.UDPAddr, -) { - sd, err := daemon.NewService(serverDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) - - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, - } - - conn, err := sn.Listen(ctx, "udp", serverAddr) - require.NoError(t, err) - require.NotNil(t, conn) - - go func() { - handlePing(t, conn) - }() - t.Log("runServerAt: done") +// MultihomedTestSuite ensures that each test in this package is run correctly, even +// in the presence of test functions that alter the internal behaviour of mutexes or +// critical data structures. This is done by protecting the execution of each test function +// with a RWMutex, allowing "regular" tests to obtain a read lock, and "special" tests +// to get a write lock, forcing them run in isolation. +type MultihomedTestSuite struct { + suite.Suite + muInternalsIsolated sync.RWMutex } -func runMultihomedServer( - ctx context.Context, - t *testing.T, - port int, -) { - sd, err := daemon.NewService(serverDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - // interfaces, err := sd.Interfaces(ctx) - // require.NoError(t, err) - // for k, v := range interfaces { - // t.Logf("iface %3d: %s", k, v.String()) - // } - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) - - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, +func NewMultihomedTestSuite() *MultihomedTestSuite { + return &MultihomedTestSuite{ + muInternalsIsolated: sync.RWMutex{}, } - - serverAddr := xtest.MustParseUDPAddr(t, fmt.Sprintf("0.0.0.0:%d", port)) - conn, err := sn.Listen(ctx, "udp", serverAddr) - require.NoError(t, err) - require.NotNil(t, conn) - - go func() { - handlePing(t, conn) - }() - t.Log("runServerAt: done") } -func runClientWith( - ctx context.Context, - t *testing.T, - serverAddr *net.UDPAddr, - clientAddr *net.UDPAddr, -) { - sd, err := daemon.NewService(clientDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - info, err := sd.ASInfo(ctx, 0) - require.NoError(t, err) - t.Logf("client local IA: %s", info.IA.String()) - - interfaces, err := sd.Interfaces(ctx) - require.NoError(t, err) - for k, v := range interfaces { - t.Logf("iface %3d: %s", k, v.String()) - } - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) +func (s *MultihomedTestSuite) SetupTest() { + s.T().Log("--> setting up test") + s.muInternalsIsolated.RLock() +} - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, - } +func (s *MultihomedTestSuite) TearDownTest() { + s.T().Log("<-- tearing down test") + s.muInternalsIsolated.RUnlock() +} - completeServerAddrStr := fmt.Sprintf("%s,[%s]:%d", - serverIA, - serverAddr.IP.String(), - serverAddr.Port) - completeServerAddr, err := snet.ParseUDPAddr(completeServerAddrStr) - require.NoError(t, err) +func (s *MultihomedTestSuite) TestListInterfaces() { + t := s.T() + addrs := multihomed.MustGetEgressIpAddresses(t) + require.NotEmpty(t, addrs) +} - paths := getRemote(ctx, t, sd, completeServerAddr.IA, info.IA) - require.Greater(t, len(paths), 0) - completeServerAddr.Path = paths[0].Dataplane() - completeServerAddr.NextHop = paths[0].UnderlayNextHop() +func (s *MultihomedTestSuite) TestInternalEgressCache() { + t := s.T() + // We require this function to run in isolation: lock every other test. + s.muInternalsIsolated.RUnlock() + s.muInternalsIsolated.Lock() + defer func() { + // Because we hold the write lock, unlock it. + s.muInternalsIsolated.Unlock() + // Because the test suite will expect a read lock, get it. + s.muInternalsIsolated.RLock() + }() - conn, err := sn.Dial(ctx, "udp", clientAddr, completeServerAddr) - require.NoError(t, err) - _, err = conn.Write([]byte("ping")) - require.NoError(t, err) - t.Log("runClientWith: ping sent") + // Wait for 100ms allow the ticker to run first. + time.Sleep(100 * time.Millisecond) - // Read answer. - buff := make([]byte, 2048) - n, remoteAddr, err := conn.ReadFrom(buff) - require.NoError(t, err) - require.Equal(t, completeServerAddr.String(), remoteAddr.String()) - buff = buff[:n] - require.Equal(t, "pong", string(buff)) + // Synchronize with the internal ticker routine to ensure it finished the update. + multihomed.GetInternalMutex().RLock() + // Check that the egress table is not empty. + require.NotEmpty(t, *multihomed.GetEgressesLastState()) + multihomed.GetInternalMutex().RUnlock() - err = conn.Close() - require.NoError(t, err) -} + // Stop internal refresh method. + multihomed.StopTicker() -func handlePing(t *testing.T, conn *snet.Conn) { - t.Logf("handlePing conn = %p", conn) - buff := make([]byte, 2048) - n, remoteAddr, err := conn.ReadFrom(buff) - t.Logf("handlePing, remote: %s, read %d bytes", remoteAddr, n) - require.NoError(t, err) - buff = buff[:n] - t.Logf("read from %s", remoteAddr) - // Check ping. - require.Equal(t, "ping", string(buff)) + // Clear map. + multihomed.ReplaceRemoteToEgressMap(make(map[netip.Addr]netip.Addr)) + require.Empty(t, multihomed.GetRemoteToEgressMap()) - // Pong. - _, err = conn.WriteTo([]byte("pong"), remoteAddr) - require.NoError(t, err) + // Create a pretend remote endpoint. + const mockRemoteAddress = "127.1.2.3" + const mockEgressAddress = "127.1.2.100" + mockRemote := xtest.MustParseUDPAddr(t, mockRemoteAddress+":22") - err = conn.Close() - require.NoError(t, err) -} + // Add mock remote entry to map. + multihomed.ReplaceRemoteToEgressMap(map[netip.Addr]netip.Addr{ + netip.MustParseAddr(mockRemoteAddress): netip.MustParseAddr(mockEgressAddress), + }) -func getRemote( - ctx context.Context, - t *testing.T, - sd daemon.Connector, - remote, local addr.IA, -) []snet.Path { - paths, err := sd.Paths(ctx, remote, local, daemon.PathReqFlags{}) + // Actual test, get the egress address for the remote. + expected := xtest.MustParseIP(t, mockEgressAddress).To4() + got, err := multihomed.OutboundIP(mockRemote) require.NoError(t, err) - return paths + require.Equal(t, expected, got) } diff --git a/pkg/snet/multihomed/outbound_addr.go b/pkg/snet/multihomed/outbound_addr.go new file mode 100644 index 0000000000..b1e6360bd0 --- /dev/null +++ b/pkg/snet/multihomed/outbound_addr.go @@ -0,0 +1,67 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed + +import ( + "net" + "net/netip" +) + +// OutboundIP returns the IP address used by this host to dial to the specified remote host. +// The port value in the remote udp address is irrelevant. +// It relies on a previously populated table that maps remote addresses to egress addresses. +// If the remote is not present, it is added. +func OutboundIP(raddr *net.UDPAddr) (net.IP, error) { + // Check if the table contains an entry. + muRemoteToEgress.RLock() + defer muRemoteToEgress.RLocker().Unlock() + + remote, _ := netip.AddrFromSlice(raddr.IP) + egress, ok := remoteToEgress[remote] + if ok { + return net.IP(egress.AsSlice()), nil + } + + // Not found, find it and add it. The dialing involves a syscall, but no network traffic. + eg, err := dialRemote(raddr) + if err != nil { + return nil, err + } + egress, _ = netip.AddrFromSlice(eg) + + // Beware of the RWMutex, it's read-locked already. + muRemoteToEgress.RUnlock() + muRemoteToEgress.Lock() + remoteToEgress[remote] = egress + muRemoteToEgress.Unlock() + + muRemoteToEgress.RLock() // because we always read-unlock + return eg, nil +} + +// dialRemote creates a socket used to send UDP packets to the remote endpoint. +// Note that while a syscall is performed (two including Close), there will be no network traffic. +// Anyhow, this is somewhat expensive, so try to reduce its usage. +func dialRemote(raddr *net.UDPAddr) (net.IP, error) { + conn, err := net.DialUDP("udp", nil, raddr) + if err != nil { + return nil, err + } + defer conn.Close() + + // The conn object is always a net.UDPConn, with LocalAddr statically returning + // always a *net.UDPAddr. + return conn.LocalAddr().(*net.UDPAddr).IP, nil +} diff --git a/pkg/snet/reader.go b/pkg/snet/reader.go index 88f8e6ef2d..715de4803b 100644 --- a/pkg/snet/reader.go +++ b/pkg/snet/reader.go @@ -16,7 +16,6 @@ package snet import ( "net" - "net/netip" "sync" "time" @@ -91,37 +90,6 @@ func (c *scionConnReader) read(b []byte) (int, *UDPAddr, error) { return 0, nil, serrors.New("unexpected payload", "type", common.TypeOf(pkt.Payload)) } - pktAddrPort := netip.AddrPortFrom(pkt.Destination.Host.IP(), udp.DstPort) - if c.local.IA != pkt.Destination.IA { - return 0, nil, serrors.New("packet is destined to a different IA", - "local_isd_as", c.local.IA, - "local_host", c.local.Host, - "pkt_destination_isd_as", pkt.Destination.IA, - "pkt_destination_host", pktAddrPort, - ) - } - - // XXX(JordiSubira): We explicitly forbid nil or unspecified address in the current constructor - // for Conn. - // If this were ever to change, we would always fall into the following if statement, then - // we would like to replace this logic (e.g., using IP_PKTINFO, with its caveats). - if c.local.Host.AddrPort() != pktAddrPort { - - // If the client is behind a NAT, the SCION packet will hold the mapped external address, - // which is expected to be different from the local address. To handle this case, we check - // whether the underlying connection is a stunConn, which indicates that NAT traversal - // is in use. - // TODO: Is it necessary to check that the address matches one of the mapped addresses? - if !c.hasSTUN { - return 0, nil, serrors.New("packet is destined to a different host", - "local_isd_as", c.local.IA, - "local_host", c.local.Host, - "pkt_destination_isd_as", pkt.Destination.IA, - "pkt_destination_host", pktAddrPort, - ) - } - } - // Extract remote address. // Copy the address data to prevent races. See // https://github.com/scionproto/scion/issues/1659. diff --git a/pkg/snet/snet.go b/pkg/snet/snet.go index 59a80d624a..d402452315 100644 --- a/pkg/snet/snet.go +++ b/pkg/snet/snet.go @@ -101,12 +101,8 @@ type SCIONNetwork struct { func (n *SCIONNetwork) OpenRaw(ctx context.Context, addr *net.UDPAddr) (PacketConn, error) { var pconn *net.UDPConn var err error - // deleteme - // if addr == nil || addr.IP.IsUnspecified() { - // return nil, serrors.New("nil or unspecified address is not supported") - // } start, end := n.Topology.PortRange.Start, n.Topology.PortRange.End - if addr.Port == 0 { + if addr == nil || addr.Port == 0 { pconn, err = listenUDPRange(addr, start, end) } else { if addr.Port < int(start) || addr.Port > int(end) { @@ -200,7 +196,9 @@ func (n *SCIONNetwork) Listen( return NewCookedConn(packetConn, n.Topology, WithReplyPather(n.ReplyPather)) } -func listenUDPRange(addr *net.UDPAddr, start, end uint16) (*net.UDPConn, error) { +// listenUDPRange creates a new net.UDPConn using a suitable port between the specified range. +// If laddr is not nil, the returned connection is bound to its IP address. +func listenUDPRange(laddr *net.UDPAddr, start, end uint16) (*net.UDPConn, error) { // XXX(JordiSubira): For now, we iterate on the complete SCION/UDP // range, in decreasing order, taking the first unused port. // @@ -221,11 +219,18 @@ func listenUDPRange(addr *net.UDPAddr, start, end uint16) (*net.UDPConn, error) if start < 1024 { restrictedStart = 1024 } + + // Local address used later in a loop to check if the different ports are available. + tryLocalAddr := &net.UDPAddr{} + if laddr != nil { + tryLocalAddr.IP = laddr.IP + tryLocalAddr.Zone = laddr.Zone + } + for port := end; port >= restrictedStart; port-- { - pconn, err := net.ListenUDP(addr.Network(), &net.UDPAddr{ - IP: addr.IP, - Port: int(port), - }) + tryLocalAddr.Port = int(port) + pconn, err := net.ListenUDP("udp", tryLocalAddr) + if err == nil { return pconn, nil } diff --git a/pkg/snet/writer.go b/pkg/snet/writer.go index dcf6d5d11a..301becef85 100644 --- a/pkg/snet/writer.go +++ b/pkg/snet/writer.go @@ -24,6 +24,7 @@ import ( "github.com/scionproto/scion/pkg/addr" "github.com/scionproto/scion/pkg/private/serrors" + "github.com/scionproto/scion/pkg/snet/multihomed" "github.com/scionproto/scion/private/topology" ) @@ -85,6 +86,7 @@ func (c *scionConnWriter) WriteTo(b []byte, raddr net.Addr) (int, error) { if !ok { return 0, serrors.New("invalid listen host IP", "ip", c.local.Host.IP) } + listenHostPort := uint16(c.local.Host.Port) // Rewrite source address if STUN is in use @@ -99,6 +101,17 @@ func (c *scionConnWriter) WriteTo(b []byte, raddr net.Addr) (int, error) { return 0, err } + if listenHostIP.IsUnspecified() { + // Sending data to an unbound socket, we need to find the appropriate local address + // to write it as host address in the SCION packet. The interface to use in the local + // host will be that one routed to reach the next hop. + localIP, err := multihomed.OutboundIP(nextHop) + if err != nil { + return 0, serrors.Wrap("interface IP not bound and cannot find one", err) + } + listenHostIP, _ = netip.AddrFromSlice(localIP) + } + pkt := &Packet{ Bytes: Bytes(c.buffer), PacketInfo: PacketInfo{ From 1854257e72fd39b5b39fbd09207c3143f22e38c1 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Wed, 3 Sep 2025 11:58:04 +0200 Subject: [PATCH 06/29] Limit cache size. --- pkg/snet/multihomed/interfaces.go | 1 + pkg/snet/multihomed/outbound_addr.go | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go index 5fcf2a090c..6aba09e16d 100644 --- a/pkg/snet/multihomed/interfaces.go +++ b/pkg/snet/multihomed/interfaces.go @@ -44,6 +44,7 @@ import ( const ( CheckInterfacesPeriod = time.Second + MaxAllowedCacheSize = 65536 // Maximum number of entries present in `remoteToEgress`. ) var ( diff --git a/pkg/snet/multihomed/outbound_addr.go b/pkg/snet/multihomed/outbound_addr.go index b1e6360bd0..c472b4ded7 100644 --- a/pkg/snet/multihomed/outbound_addr.go +++ b/pkg/snet/multihomed/outbound_addr.go @@ -41,13 +41,18 @@ func OutboundIP(raddr *net.UDPAddr) (net.IP, error) { } egress, _ = netip.AddrFromSlice(eg) - // Beware of the RWMutex, it's read-locked already. - muRemoteToEgress.RUnlock() - muRemoteToEgress.Lock() - remoteToEgress[remote] = egress - muRemoteToEgress.Unlock() - - muRemoteToEgress.RLock() // because we always read-unlock + // Check if our cache is not too big already. + if len(remoteToEgress) < MaxAllowedCacheSize { + // Beware of the RWMutex, it's read-locked already. + muRemoteToEgress.RUnlock() + muRemoteToEgress.Lock() + // Check again to avoid race conditions. Could skipped it if exact size is not important. + if len(remoteToEgress) < MaxAllowedCacheSize { + remoteToEgress[remote] = egress + } + muRemoteToEgress.Unlock() + muRemoteToEgress.RLock() // because we always read-unlock in this function. + } return eg, nil } From f5f8e43a0846389e040b5feb2171cfa2f060317b Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Wed, 3 Sep 2025 11:59:15 +0200 Subject: [PATCH 07/29] Make multihomed tests more robust to concurrent running. --- pkg/snet/multihomed/integrated_test.go | 32 ++++++++++++++++++-------- pkg/snet/multihomed/multihomed_test.go | 8 +++---- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/pkg/snet/multihomed/integrated_test.go b/pkg/snet/multihomed/integrated_test.go index 95a661b605..00670f873b 100644 --- a/pkg/snet/multihomed/integrated_test.go +++ b/pkg/snet/multihomed/integrated_test.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "net" + "sync/atomic" "testing" "time" @@ -42,11 +43,12 @@ import ( // go test ./pkg/snet/multihomed -count=1 -v -run TestUDP // go test ./pkg/snet/multihomed -count=1 -v -run TestBasic // go test ./pkg/snet/multihomed -count=1 -v -run TestMultihomed + +const serverPortInitial = 12345 + var ( serverIA = "1-ff00:0:111" serverIPAddress = "127.0.0.1" - serverPort = "12345" - serverAddress = serverIPAddress + ":" + serverPort clientAddress = "127.0.0.1:0" serverDaemon = "127.0.0.19:30255" // 111 clientDaemon = "127.0.0.27:30255" // 112 @@ -54,23 +56,26 @@ var ( func (s *MultihomedTestSuite) TestUDP() { t := s.T() - ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) - defer cancelF() + t.Parallel() // Bind server to any interface. - serverAddr := xtest.MustParseUDPAddr(t, "0.0.0.0:"+serverPort) + serverAddress := fmt.Sprintf("0.0.0.0:%d", s.getServerPort()) + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) - runUDPServerAt(ctx, t, serverAddr) - runUDPClientWith(ctx, t, serverAddr, nil) + runUDPServerAt(t, serverAddr) + runUDPClientWith(t, serverAddr, nil) } // TestNoRegressionCheck checks that using bound sockets (like before "multihomed" changes) // works as expected. func (s *MultihomedTestSuite) TestNoRegressionCheck() { t := s.T() + t.Parallel() + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) defer cancelF() + serverAddress := fmt.Sprintf("%s:%d", serverIPAddress, s.getServerPort()) serverAddr := xtest.MustParseUDPAddr(t, serverAddress) clientAddr := xtest.MustParseUDPAddr(t, clientAddress) @@ -90,9 +95,12 @@ func (s *MultihomedTestSuite) TestNoRegressionCheck() { // relies on a multihomed socket func (s *MultihomedTestSuite) TestMultihomedServer() { t := s.T() + t.Parallel() + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) defer cancelF() + serverAddress := fmt.Sprintf("%s:%d", serverIPAddress, s.getServerPort()) serverAddr := xtest.MustParseUDPAddr(t, serverAddress) runMultihomedServer(ctx, t, serverAddr.Port) @@ -100,7 +108,6 @@ func (s *MultihomedTestSuite) TestMultihomedServer() { } func runUDPServerAt( - ctx context.Context, t *testing.T, serverAddr *net.UDPAddr, ) { @@ -177,7 +184,6 @@ func runMultihomedServer( } func runUDPClientWith( - _ context.Context, t *testing.T, serverAddr *net.UDPAddr, clientAddr *net.UDPAddr, @@ -316,3 +322,11 @@ func getRemote( require.NoError(t, err) return paths } + +var lastPortUsed atomic.Int32 + +func (s *MultihomedTestSuite) getServerPort() int { + // Initialize to 12344 if it's uninitialized. + lastPortUsed.CompareAndSwap(0, serverPortInitial-1) + return int(lastPortUsed.Add(1)) +} diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index 97f64d28a1..a18eeb96a4 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -36,29 +36,27 @@ func TestMultihomed(t *testing.T) { // critical data structures. This is done by protecting the execution of each test function // with a RWMutex, allowing "regular" tests to obtain a read lock, and "special" tests // to get a write lock, forcing them run in isolation. +// It allows to call t.Parallel() in any and all test functions. type MultihomedTestSuite struct { suite.Suite muInternalsIsolated sync.RWMutex } func NewMultihomedTestSuite() *MultihomedTestSuite { - return &MultihomedTestSuite{ - muInternalsIsolated: sync.RWMutex{}, - } + return &MultihomedTestSuite{} } func (s *MultihomedTestSuite) SetupTest() { - s.T().Log("--> setting up test") s.muInternalsIsolated.RLock() } func (s *MultihomedTestSuite) TearDownTest() { - s.T().Log("<-- tearing down test") s.muInternalsIsolated.RUnlock() } func (s *MultihomedTestSuite) TestListInterfaces() { t := s.T() + t.Parallel() addrs := multihomed.MustGetEgressIpAddresses(t) require.NotEmpty(t, addrs) } From cd14a4d6867abb338a50cdaa715df62c8c4fa16d Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 4 Sep 2025 11:27:22 +0200 Subject: [PATCH 08/29] Fix gateway/dataplane TestNoLeak by allowing to stop the continous state getter. --- gateway/dataplane/BUILD.bazel | 1 + gateway/dataplane/session_test.go | 2 + pkg/snet/multihomed/BUILD.bazel | 5 ++- pkg/snet/multihomed/export_test.go | 4 +- pkg/snet/multihomed/interfaces.go | 54 ++++++++++++++++++++------ pkg/snet/multihomed/multihomed_test.go | 17 +++++--- 6 files changed, 63 insertions(+), 20 deletions(-) diff --git a/gateway/dataplane/BUILD.bazel b/gateway/dataplane/BUILD.bazel index c65bd346c1..5ab0e9c9d3 100644 --- a/gateway/dataplane/BUILD.bazel +++ b/gateway/dataplane/BUILD.bazel @@ -67,6 +67,7 @@ go_test( "//pkg/private/xtest:go_default_library", "//pkg/snet:go_default_library", "//pkg/snet/mock_snet:go_default_library", + "//pkg/snet/multihomed:go_default_library", "//pkg/snet/path:go_default_library", "//private/ringbuf:go_default_library", "@com_github_golang_mock//gomock:go_default_library", diff --git a/gateway/dataplane/session_test.go b/gateway/dataplane/session_test.go index b315d81fd4..b685b5e941 100644 --- a/gateway/dataplane/session_test.go +++ b/gateway/dataplane/session_test.go @@ -31,6 +31,7 @@ import ( "github.com/scionproto/scion/pkg/private/mocks/net/mock_net" "github.com/scionproto/scion/pkg/snet" "github.com/scionproto/scion/pkg/snet/mock_snet" + "github.com/scionproto/scion/pkg/snet/multihomed" snetpath "github.com/scionproto/scion/pkg/snet/path" ) @@ -83,6 +84,7 @@ func TestTwoPaths(t *testing.T) { } func TestNoLeak(t *testing.T) { + multihomed.StopContinuousCheckInterfaces(t) defer goleak.VerifyNone(t) ctrl := gomock.NewController(t) diff --git a/pkg/snet/multihomed/BUILD.bazel b/pkg/snet/multihomed/BUILD.bazel index 05e9f9d12b..a19cdde893 100644 --- a/pkg/snet/multihomed/BUILD.bazel +++ b/pkg/snet/multihomed/BUILD.bazel @@ -9,7 +9,10 @@ go_library( ], importpath = "github.com/scionproto/scion/pkg/snet/multihomed", visibility = ["//visibility:public"], - deps = ["//pkg/private/serrors:go_default_library"], + deps = [ + "//pkg/log:go_default_library", + "//pkg/private/serrors:go_default_library", + ], ) go_test( diff --git a/pkg/snet/multihomed/export_test.go b/pkg/snet/multihomed/export_test.go index 530f6bb8a7..95ad8f9f18 100644 --- a/pkg/snet/multihomed/export_test.go +++ b/pkg/snet/multihomed/export_test.go @@ -33,7 +33,7 @@ func GetInternalMutex() *sync.RWMutex { } func StopTicker() { - ticker.Stop() + stopContinuousCheckInterfaces() } func GetRemoteToEgressMap() map[netip.Addr]netip.Addr { @@ -45,5 +45,5 @@ func ReplaceRemoteToEgressMap(newMap map[netip.Addr]netip.Addr) { } func GetEgressesLastState() *[]netip.Addr { - return &egressesLocalAddresses + return localAddresses.Load() } diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go index 6aba09e16d..7f174c1b04 100644 --- a/pkg/snet/multihomed/interfaces.go +++ b/pkg/snet/multihomed/interfaces.go @@ -37,8 +37,11 @@ import ( "os" "sort" "sync" + "sync/atomic" + "testing" "time" + "github.com/scionproto/scion/pkg/log" "github.com/scionproto/scion/pkg/private/serrors" ) @@ -48,23 +51,48 @@ const ( ) var ( - remoteToEgress map[netip.Addr]netip.Addr = make(map[netip.Addr]netip.Addr) - muRemoteToEgress sync.RWMutex = sync.RWMutex{} - ticker = time.NewTicker(CheckInterfacesPeriod) - egressesLocalAddresses = make([]netip.Addr, 0) + remoteToEgress map[netip.Addr]netip.Addr = make(map[netip.Addr]netip.Addr) + muRemoteToEgress sync.RWMutex = sync.RWMutex{} + + // The local addresses are stored in an atomic pointer to allow tests to inspect the + // internal value of it without data races. + localAddresses = atomic.Pointer[[]netip.Addr]{} + ticker = time.NewTicker(CheckInterfacesPeriod) + stopTicker = make(chan struct{}) ) func init() { - go continuousCheckInterfaces() + localAddrs := make([]netip.Addr, 0) + localAddresses.Store(&localAddrs) + go func() { + defer log.HandlePanic() + continuousCheckInterfaces() + }() +} + +// StopContinuousCheckInterfaces is used in tests where they need to stop the running +// goroutine that checks the state of the local interfaces. +func StopContinuousCheckInterfaces(*testing.T) { + if testing.Testing() { + stopContinuousCheckInterfaces() + } } func continuousCheckInterfaces() { - for ; ; <-ticker.C { - clearCacheIfLocalChanges(&egressesLocalAddresses) + clearCacheIfLocalChanges() +loop: + for { + select { + case <-ticker.C: + clearCacheIfLocalChanges() + case <-stopTicker: + ticker.Stop() + break loop + } } } -func clearCacheIfLocalChanges(lastState *[]netip.Addr) { +func clearCacheIfLocalChanges() { addrs := getInterfacesLocalAddresses() if addrs == nil { // Internal error, bail. @@ -72,7 +100,7 @@ func clearCacheIfLocalChanges(lastState *[]netip.Addr) { } // Compare with previous result. - if equalAddressList(addrs, *lastState) { + if equalAddressList(addrs, *localAddresses.Load()) { // They are the same, bail. return } @@ -80,7 +108,7 @@ func clearCacheIfLocalChanges(lastState *[]netip.Addr) { // Not equal, invalidate every entry. invalidateAll() // And store previous state. - *lastState = addrs + localAddresses.Store(&addrs) } func getInterfacesLocalAddresses() []netip.Addr { @@ -115,8 +143,8 @@ func equalAddressList(a, b []netip.Addr) bool { func invalidateAll() { muRemoteToEgress.Lock() - defer muRemoteToEgress.Unlock() remoteToEgress = make(map[netip.Addr]netip.Addr) + muRemoteToEgress.Unlock() } func egressIpAddresses() ([]netip.Addr, error) { @@ -142,3 +170,7 @@ func egressIpAddresses() ([]netip.Addr, error) { return ipAddrs, nil } + +func stopContinuousCheckInterfaces() { + stopTicker <- struct{}{} +} diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index a18eeb96a4..400d4920ed 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -73,14 +73,19 @@ func (s *MultihomedTestSuite) TestInternalEgressCache() { s.muInternalsIsolated.RLock() }() - // Wait for 100ms allow the ticker to run first. - time.Sleep(100 * time.Millisecond) - // Synchronize with the internal ticker routine to ensure it finished the update. - multihomed.GetInternalMutex().RLock() - // Check that the egress table is not empty. + checkTicker := time.NewTicker(10 * time.Millisecond) + for i, _ := 0, <-checkTicker.C; i < 10; i, _ = i+1, <-checkTicker.C { + multihomed.GetInternalMutex().RLock() + state := *multihomed.GetEgressesLastState() + multihomed.GetInternalMutex().RUnlock() + // Check that the egress table is not empty. + if len(state) > 0 { + break + } + } + checkTicker.Stop() require.NotEmpty(t, *multihomed.GetEgressesLastState()) - multihomed.GetInternalMutex().RUnlock() // Stop internal refresh method. multihomed.StopTicker() From 708e599bdf48d586ee0fba5274d43131b3162f7c Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Wed, 10 Sep 2025 12:04:00 +0200 Subject: [PATCH 09/29] Do not obtain local address at Conn creation, do it at WriteTo. --- pkg/snet/conn.go | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/pkg/snet/conn.go b/pkg/snet/conn.go index 0634b9e34b..f72400dea1 100644 --- a/pkg/snet/conn.go +++ b/pkg/snet/conn.go @@ -22,9 +22,7 @@ import ( "github.com/scionproto/scion/pkg/private/common" "github.com/scionproto/scion/pkg/private/ctrl/path_mgmt" - "github.com/scionproto/scion/pkg/private/serrors" "github.com/scionproto/scion/pkg/slayers" - "github.com/scionproto/scion/pkg/snet/multihomed" ) type OpError struct { @@ -59,7 +57,12 @@ type Conn struct { // NewCookedConn returns a "cooked" Conn. The Conn object can be used to // send/receive SCION traffic with the usual methods. // It takes as arguments a non-nil PacketConn and a non-nil Topology parameter. -// Nil or unspecified addresses for the PacketConn object are not supported. +// The local address of the PacketConn can be nil or unspecified, leaving the socket not bound +// to any particular interface; however it has its limitations, namely a created Conn not +// being able to properly react to a routing change in the OS unless the routing change is +// accompanied by a change in the local network interfaces, in the form of changing their +// local IP addresses, or adding/removing one or more local network interfaces. +// // This is an advanced API, that allows fine-tuning of the Conn underlay functionality. // The general methods for obtaining a Conn object are still SCIONNetwork.Listen and // SCIONNetwork.Dial. @@ -75,19 +78,6 @@ func NewCookedConn( } hasSTUN := hasSTUNConn(pconn) - // If acting as a client, find the local address of the interface used to reach - // the NextHop toward the remote. - if local.Host.IP.IsUnspecified() && o.remote != nil { - localIP, err := multihomed.OutboundIP(o.remote.NextHop) - if err != nil { - return nil, serrors.Wrap( - "Cannot find the local address", err, - "local", local, - "remote", o.remote) - } - local.Host.IP = localIP - } - return &Conn{ conn: pconn, local: local, From 2a286d42a9f35380381d76d5029acb04eab910a1 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Wed, 10 Sep 2025 12:06:31 +0200 Subject: [PATCH 10/29] More efficient & clear mutex locking in OutboundIP. --- pkg/snet/multihomed/outbound_addr.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/snet/multihomed/outbound_addr.go b/pkg/snet/multihomed/outbound_addr.go index c472b4ded7..8c19c6b9ed 100644 --- a/pkg/snet/multihomed/outbound_addr.go +++ b/pkg/snet/multihomed/outbound_addr.go @@ -17,25 +17,30 @@ package multihomed import ( "net" "net/netip" + + "github.com/scionproto/scion/pkg/private/serrors" ) // OutboundIP returns the IP address used by this host to dial to the specified remote host. // The port value in the remote udp address is irrelevant. // It relies on a previously populated table that maps remote addresses to egress addresses. // If the remote is not present, it is added. -func OutboundIP(raddr *net.UDPAddr) (net.IP, error) { +func OutboundIP(nextHop *net.UDPAddr) (net.IP, error) { + remote, ok := netip.AddrFromSlice(nextHop.IP) + if !ok { + return nil, serrors.New("invalid IP address", "address", nextHop.IP) + } + // Check if the table contains an entry. muRemoteToEgress.RLock() - defer muRemoteToEgress.RLocker().Unlock() - - remote, _ := netip.AddrFromSlice(raddr.IP) egress, ok := remoteToEgress[remote] + muRemoteToEgress.RLocker().Unlock() if ok { return net.IP(egress.AsSlice()), nil } // Not found, find it and add it. The dialing involves a syscall, but no network traffic. - eg, err := dialRemote(raddr) + eg, err := dialRemote(nextHop) if err != nil { return nil, err } @@ -44,14 +49,12 @@ func OutboundIP(raddr *net.UDPAddr) (net.IP, error) { // Check if our cache is not too big already. if len(remoteToEgress) < MaxAllowedCacheSize { // Beware of the RWMutex, it's read-locked already. - muRemoteToEgress.RUnlock() muRemoteToEgress.Lock() // Check again to avoid race conditions. Could skipped it if exact size is not important. if len(remoteToEgress) < MaxAllowedCacheSize { remoteToEgress[remote] = egress } muRemoteToEgress.Unlock() - muRemoteToEgress.RLock() // because we always read-unlock in this function. } return eg, nil } From d2b1582dfefd6c401671a7877bf1e84d4610a4db Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Wed, 10 Sep 2025 16:25:57 +0200 Subject: [PATCH 11/29] Remove no longer valid comment. --- pkg/snet/snet.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/snet/snet.go b/pkg/snet/snet.go index d402452315..f747c04435 100644 --- a/pkg/snet/snet.go +++ b/pkg/snet/snet.go @@ -95,7 +95,6 @@ type SCIONNetwork struct { } // OpenRaw returns a PacketConn which listens on the specified address. -// Nil or unspecified addresses are not supported. // If the address port is 0 a valid and free SCION/UDP port is automatically chosen. // Otherwise, the specified port must be a valid SCION/UDP port. func (n *SCIONNetwork) OpenRaw(ctx context.Context, addr *net.UDPAddr) (PacketConn, error) { From 8da7d473a8fa4943531c68585eb44679f47941c0 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Wed, 10 Sep 2025 16:32:37 +0200 Subject: [PATCH 12/29] Remove not needed double locking of the cache in OutboundIP. --- pkg/snet/multihomed/outbound_addr.go | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pkg/snet/multihomed/outbound_addr.go b/pkg/snet/multihomed/outbound_addr.go index 8c19c6b9ed..1146f1ee85 100644 --- a/pkg/snet/multihomed/outbound_addr.go +++ b/pkg/snet/multihomed/outbound_addr.go @@ -46,16 +46,13 @@ func OutboundIP(nextHop *net.UDPAddr) (net.IP, error) { } egress, _ = netip.AddrFromSlice(eg) + muRemoteToEgress.Lock() // Check if our cache is not too big already. if len(remoteToEgress) < MaxAllowedCacheSize { - // Beware of the RWMutex, it's read-locked already. - muRemoteToEgress.Lock() - // Check again to avoid race conditions. Could skipped it if exact size is not important. - if len(remoteToEgress) < MaxAllowedCacheSize { - remoteToEgress[remote] = egress - } - muRemoteToEgress.Unlock() + remoteToEgress[remote] = egress } + muRemoteToEgress.Unlock() + return eg, nil } From 90228c033ed81b984bfdc292cf663114d82e8d57 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 25 Sep 2025 09:42:03 +0200 Subject: [PATCH 13/29] The ticker object is local to continuousCheckInterfaces. --- pkg/snet/multihomed/interfaces.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go index 7f174c1b04..1414d5463d 100644 --- a/pkg/snet/multihomed/interfaces.go +++ b/pkg/snet/multihomed/interfaces.go @@ -57,7 +57,6 @@ var ( // The local addresses are stored in an atomic pointer to allow tests to inspect the // internal value of it without data races. localAddresses = atomic.Pointer[[]netip.Addr]{} - ticker = time.NewTicker(CheckInterfacesPeriod) stopTicker = make(chan struct{}) ) @@ -80,6 +79,7 @@ func StopContinuousCheckInterfaces(*testing.T) { func continuousCheckInterfaces() { clearCacheIfLocalChanges() + ticker := time.NewTicker(CheckInterfacesPeriod) loop: for { select { From 4c31ce398363750ad4d628454cc35fadc682440d Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 25 Sep 2025 10:55:02 +0200 Subject: [PATCH 14/29] Add benchmarks measuring sync.Map and RWMutex+map. --- pkg/snet/multihomed/multihomed_test.go | 85 ++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index 400d4920ed..d993f3c743 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -15,6 +15,7 @@ package multihomed_test import ( + "crypto/rand" "net/netip" "sync" "testing" @@ -110,3 +111,87 @@ func (s *MultihomedTestSuite) TestInternalEgressCache() { require.NoError(t, err) require.Equal(t, expected, got) } + +// BenchmarkSyncMapWrites (and the other 3 analogous benchmarks) are used to check the performance +// of a sync.Map (any->any) and a regular map (IP->IP) with a RWMutex. +func BenchmarkSyncMapWrites(b *testing.B) { + // Create a set of `size` IP addresses. + addrs := generateIpAddrs(b.N) + + m := sync.Map{} + b.ResetTimer() + storeInSyncMap(&m, addrs) +} + +func BenchmarkSyncMapReads(b *testing.B) { + addrs := generateIpAddrs(b.N) + m := sync.Map{} + storeInSyncMap(&m, addrs) + + // Refrain optimizer from removing code by adding the values to a discard buffer. + discardBuff := make([]netip.Addr, b.N) + b.ResetTimer() + for i, addr := range addrs { + a, ok := m.Load(addr) + addr = a.(netip.Addr) + _ = ok + discardBuff[i] = addr + } + b.StopTimer() + require.NotEmpty(b, discardBuff) + require.Len(b, discardBuff, b.N) +} + +func BenchmarkMuMapWrites(b *testing.B) { + addrs := generateIpAddrs(b.N) + + m := make(map[netip.Addr]netip.Addr) + mu := sync.RWMutex{} + b.ResetTimer() + storeInMuMap(m, &mu, addrs) +} + +func BenchmarkMuMapReads(b *testing.B) { + addrs := generateIpAddrs(b.N) + m := make(map[netip.Addr]netip.Addr) + mu := sync.RWMutex{} + storeInMuMap(m, &mu, addrs) + + // Refrain optimizer from removing code by adding the values to a discard buffer. + discardBuff := make([]netip.Addr, b.N) + b.ResetTimer() + for i, addr := range addrs { + mu.RLock() + addr, ok := m[addr] + mu.RUnlock() + _ = ok + discardBuff[i] = addr + } + b.StopTimer() + require.NotEmpty(b, discardBuff) + require.Len(b, discardBuff, b.N) +} + +func generateIpAddrs(size int) []netip.Addr { + addrs := make([]netip.Addr, size) + raw := [4]byte{} + for i := range size { + rand.Read(raw[:]) + addrs[i] = netip.AddrFrom4(raw) + } + return addrs +} + +func storeInSyncMap(m *sync.Map, addrs []netip.Addr) { + for _, addr := range addrs { + m.Store(addr, addr) + } +} + +func storeInMuMap(m map[netip.Addr]netip.Addr, mu *sync.RWMutex, addrs []netip.Addr) { + for _, addr := range addrs { + mu.Lock() + m[addr] = addr + mu.Unlock() + } +} From 9e32756f4880b15350296291a6cc3e77162277f2 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 25 Sep 2025 10:57:49 +0200 Subject: [PATCH 15/29] Use slices.Equal instead of custom function. --- pkg/snet/multihomed/interfaces.go | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go index 1414d5463d..2ff84845e9 100644 --- a/pkg/snet/multihomed/interfaces.go +++ b/pkg/snet/multihomed/interfaces.go @@ -35,6 +35,7 @@ import ( "net" "net/netip" "os" + "slices" "sort" "sync" "sync/atomic" @@ -100,7 +101,7 @@ func clearCacheIfLocalChanges() { } // Compare with previous result. - if equalAddressList(addrs, *localAddresses.Load()) { + if slices.Equal(addrs, *localAddresses.Load()) { // They are the same, bail. return } @@ -129,18 +130,6 @@ func getInterfacesLocalAddresses() []netip.Addr { return addrs } -func equalAddressList(a, b []netip.Addr) bool { - if len(a) != len(b) { - return false - } - for i := range a { - if a[i].Compare(b[i]) != 0 { - return false - } - } - return true -} - func invalidateAll() { muRemoteToEgress.Lock() remoteToEgress = make(map[netip.Addr]netip.Addr) From 15477a361a5744c9016c85e0195e2cf85baf6896 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 25 Sep 2025 10:58:54 +0200 Subject: [PATCH 16/29] Use RUnlock() instead of RLocker().Unlock(). --- pkg/snet/multihomed/outbound_addr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/snet/multihomed/outbound_addr.go b/pkg/snet/multihomed/outbound_addr.go index 1146f1ee85..76e0dced43 100644 --- a/pkg/snet/multihomed/outbound_addr.go +++ b/pkg/snet/multihomed/outbound_addr.go @@ -34,7 +34,7 @@ func OutboundIP(nextHop *net.UDPAddr) (net.IP, error) { // Check if the table contains an entry. muRemoteToEgress.RLock() egress, ok := remoteToEgress[remote] - muRemoteToEgress.RLocker().Unlock() + muRemoteToEgress.RUnlock() if ok { return net.IP(egress.AsSlice()), nil } From 58aaa1aaf6956ad94484f39dbd89231730c540f6 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 25 Sep 2025 11:54:54 +0200 Subject: [PATCH 17/29] Fix comment. --- pkg/snet/multihomed/interfaces.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go index 2ff84845e9..7817c00dd3 100644 --- a/pkg/snet/multihomed/interfaces.go +++ b/pkg/snet/multihomed/interfaces.go @@ -108,7 +108,7 @@ func clearCacheIfLocalChanges() { // Not equal, invalidate every entry. invalidateAll() - // And store previous state. + // And store current state. localAddresses.Store(&addrs) } From 46fd4436604d6820cb5e0925ac39b1f51483ec70 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 25 Sep 2025 12:23:10 +0200 Subject: [PATCH 18/29] Simplify storage of localAddresses to just a slice. The atomic.Pointer seems not needed anymore, tests with -race work alright without warnings. --- pkg/snet/multihomed/export_test.go | 2 +- pkg/snet/multihomed/interfaces.go | 14 ++++---------- pkg/snet/multihomed/multihomed_test.go | 5 +---- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/pkg/snet/multihomed/export_test.go b/pkg/snet/multihomed/export_test.go index 95ad8f9f18..d1f4290cff 100644 --- a/pkg/snet/multihomed/export_test.go +++ b/pkg/snet/multihomed/export_test.go @@ -45,5 +45,5 @@ func ReplaceRemoteToEgressMap(newMap map[netip.Addr]netip.Addr) { } func GetEgressesLastState() *[]netip.Addr { - return localAddresses.Load() + return &localAddresses } diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go index 7817c00dd3..3344c4aba8 100644 --- a/pkg/snet/multihomed/interfaces.go +++ b/pkg/snet/multihomed/interfaces.go @@ -38,7 +38,6 @@ import ( "slices" "sort" "sync" - "sync/atomic" "testing" "time" @@ -54,16 +53,11 @@ const ( var ( remoteToEgress map[netip.Addr]netip.Addr = make(map[netip.Addr]netip.Addr) muRemoteToEgress sync.RWMutex = sync.RWMutex{} - - // The local addresses are stored in an atomic pointer to allow tests to inspect the - // internal value of it without data races. - localAddresses = atomic.Pointer[[]netip.Addr]{} - stopTicker = make(chan struct{}) + localAddresses = make([]netip.Addr, 0) + stopTicker = make(chan struct{}) ) func init() { - localAddrs := make([]netip.Addr, 0) - localAddresses.Store(&localAddrs) go func() { defer log.HandlePanic() continuousCheckInterfaces() @@ -101,7 +95,7 @@ func clearCacheIfLocalChanges() { } // Compare with previous result. - if slices.Equal(addrs, *localAddresses.Load()) { + if slices.Equal(addrs, localAddresses) { // They are the same, bail. return } @@ -109,7 +103,7 @@ func clearCacheIfLocalChanges() { // Not equal, invalidate every entry. invalidateAll() // And store current state. - localAddresses.Store(&addrs) + localAddresses = addrs } func getInterfacesLocalAddresses() []netip.Addr { diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index d993f3c743..3519a72c7d 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -77,11 +77,8 @@ func (s *MultihomedTestSuite) TestInternalEgressCache() { // Synchronize with the internal ticker routine to ensure it finished the update. checkTicker := time.NewTicker(10 * time.Millisecond) for i, _ := 0, <-checkTicker.C; i < 10; i, _ = i+1, <-checkTicker.C { - multihomed.GetInternalMutex().RLock() - state := *multihomed.GetEgressesLastState() - multihomed.GetInternalMutex().RUnlock() // Check that the egress table is not empty. - if len(state) > 0 { + if len(*multihomed.GetEgressesLastState()) > 0 { break } } From 0e31eedb7e1b8748fd2b7771a7810b5cb7335c70 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Thu, 25 Sep 2025 12:33:02 +0200 Subject: [PATCH 19/29] Add comment clarifying interactions with NAT+STUN. --- pkg/snet/multihomed/outbound_addr.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/snet/multihomed/outbound_addr.go b/pkg/snet/multihomed/outbound_addr.go index 76e0dced43..88bb6c10eb 100644 --- a/pkg/snet/multihomed/outbound_addr.go +++ b/pkg/snet/multihomed/outbound_addr.go @@ -25,6 +25,9 @@ import ( // The port value in the remote udp address is irrelevant. // It relies on a previously populated table that maps remote addresses to egress addresses. // If the remote is not present, it is added. +// Note that NAT address discovery support in scionproto via STUN will also dial periodically +// connections to the STUN server, which should be enough to find the local address used +// to route to the next hop. func OutboundIP(nextHop *net.UDPAddr) (net.IP, error) { remote, ok := netip.AddrFromSlice(nextHop.IP) if !ok { From ce9224b3d5f8b165da3e006bae46f3457cbe0338 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 22 May 2026 13:26:39 +0200 Subject: [PATCH 20/29] Fix build after rebasing on last master. --- pkg/snet/multihomed/BUILD.bazel | 1 + pkg/snet/multihomed/integrated_test.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/snet/multihomed/BUILD.bazel b/pkg/snet/multihomed/BUILD.bazel index a19cdde893..caf43e707c 100644 --- a/pkg/snet/multihomed/BUILD.bazel +++ b/pkg/snet/multihomed/BUILD.bazel @@ -26,6 +26,7 @@ go_test( deps = [ "//pkg/addr:go_default_library", "//pkg/daemon:go_default_library", + "//pkg/daemon/types:go_default_library", "//pkg/private/xtest:go_default_library", "//pkg/snet:go_default_library", "@com_github_stretchr_testify//require:go_default_library", diff --git a/pkg/snet/multihomed/integrated_test.go b/pkg/snet/multihomed/integrated_test.go index 00670f873b..503d353687 100644 --- a/pkg/snet/multihomed/integrated_test.go +++ b/pkg/snet/multihomed/integrated_test.go @@ -26,6 +26,7 @@ import ( "github.com/scionproto/scion/pkg/addr" "github.com/scionproto/scion/pkg/daemon" + daemontypes "github.com/scionproto/scion/pkg/daemon/types" "github.com/scionproto/scion/pkg/private/xtest" "github.com/scionproto/scion/pkg/snet" ) @@ -318,7 +319,7 @@ func getRemote( sd daemon.Connector, remote, local addr.IA, ) []snet.Path { - paths, err := sd.Paths(ctx, remote, local, daemon.PathReqFlags{}) + paths, err := sd.Paths(ctx, remote, local, daemontypes.PathReqFlags{}) require.NoError(t, err) return paths } From 9d5b5d50bda936dbc185a75aa50e29256432b083 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 22 May 2026 14:49:08 +0200 Subject: [PATCH 21/29] Move the unit test to an acceptance test. --- acceptance/multihomed/BUILD.bazel | 15 ++ acceptance/multihomed/test-client/BUILD.bazel | 19 ++ acceptance/multihomed/test-client/main.go | 127 +++++++++++++ acceptance/multihomed/test-server/BUILD.bazel | 18 ++ acceptance/multihomed/test-server/main.go | 105 +++++++++++ acceptance/multihomed/test.py | 176 ++++++++++++++++++ pkg/snet/multihomed/BUILD.bazel | 1 - 7 files changed, 460 insertions(+), 1 deletion(-) create mode 100644 acceptance/multihomed/BUILD.bazel create mode 100644 acceptance/multihomed/test-client/BUILD.bazel create mode 100644 acceptance/multihomed/test-client/main.go create mode 100644 acceptance/multihomed/test-server/BUILD.bazel create mode 100644 acceptance/multihomed/test-server/main.go create mode 100755 acceptance/multihomed/test.py diff --git a/acceptance/multihomed/BUILD.bazel b/acceptance/multihomed/BUILD.bazel new file mode 100644 index 0000000000..78ad13cc02 --- /dev/null +++ b/acceptance/multihomed/BUILD.bazel @@ -0,0 +1,15 @@ +load("//acceptance/common:topogen.bzl", "topogen_test") + +topogen_test( + name = "test", + src = "test.py", + args = [ + "--executable=test-client:$(location //acceptance/multihomed/test-client)", + "--executable=test-server:$(location //acceptance/multihomed/test-server)", + ], + data = [ + "//acceptance/multihomed/test-client", + "//acceptance/multihomed/test-server", + ], + topo = "//topology:tiny.topo", +) diff --git a/acceptance/multihomed/test-client/BUILD.bazel b/acceptance/multihomed/test-client/BUILD.bazel new file mode 100644 index 0000000000..8d83cf7245 --- /dev/null +++ b/acceptance/multihomed/test-client/BUILD.bazel @@ -0,0 +1,19 @@ +load("@rules_go//go:def.bzl", "go_binary", "go_library") + +go_library( + name = "go_default_library", + srcs = ["main.go"], + importpath = "github.com/scionproto/scion/acceptance/multihomed/test-client", + visibility = ["//visibility:private"], + deps = [ + "//pkg/daemon:go_default_library", + "//pkg/daemon/types:go_default_library", + "//pkg/snet:go_default_library", + ], +) + +go_binary( + name = "test-client", + embed = [":go_default_library"], + visibility = ["//visibility:public"], +) diff --git a/acceptance/multihomed/test-client/main.go b/acceptance/multihomed/test-client/main.go new file mode 100644 index 0000000000..6cf20c5df3 --- /dev/null +++ b/acceptance/multihomed/test-client/main.go @@ -0,0 +1,127 @@ +// Copyright 2026 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "flag" + "log" + "os" + + "github.com/scionproto/scion/pkg/daemon" + daemontypes "github.com/scionproto/scion/pkg/daemon/types" + "github.com/scionproto/scion/pkg/snet" +) + +func main() { + log.SetOutput(os.Stdout) + + // Parse test inputs. The remote is provided as a full SCION UDP address so the same + // client binary can probe server primary and secondary IPs without code changes. + var daemonAddr string + var localAddr snet.UDPAddr + var remoteAddr snet.UDPAddr + var expect string + var expectAddr *snet.UDPAddr + + flag.StringVar(&daemonAddr, "daemon", "", "SCION daemon address") + flag.Var(&localAddr, "local", "Local SCION address") + flag.Var(&remoteAddr, "remote", "Remote SCION address") + flag.StringVar(&expect, "expect", "", "Expected remote SCION address") + flag.Parse() + + if expect != "" { + parsed, err := snet.ParseUDPAddr(expect) + if err != nil { + log.Fatalf("parse expected remote address: %v", err) + } + expectAddr = parsed + } + + if daemonAddr == "" { + daemonAddr = os.Getenv("SCION_DAEMON_ADDRESS") + } + if daemonAddr == "" { + daemonAddr = os.Getenv("SCION_DAEMON") + } + if daemonAddr == "" { + log.Fatal("daemon address missing: pass -daemon or set SCION_DAEMON_ADDRESS/SCION_DAEMON") + } + + // Resolve a path from local IA to remote IA. + ctx := context.Background() + sd, err := daemon.NewService(daemonAddr).Connect(ctx) + if err != nil { + log.Fatalf("connect daemon: %v", err) + } + defer sd.Close() + + paths, err := sd.Paths(ctx, remoteAddr.IA, localAddr.IA, daemontypes.PathReqFlags{Refresh: true}) + if err != nil { + log.Fatalf("path lookup: %v", err) + } + if len(paths) == 0 { + log.Fatalf("no path from %s to %s", localAddr.IA, remoteAddr.IA) + } + sp := paths[0] + + // Build a SCION connection pinned to the selected path. + topo, err := daemon.LoadTopology(ctx, sd) + if err != nil { + log.Fatalf("load topology: %v", err) + } + remoteAddr.Path = sp.Dataplane() + remoteAddr.NextHop = sp.UnderlayNextHop() + + sn := snet.SCIONNetwork{ + Topology: topo, + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + } + + conn, err := sn.Dial(ctx, "udp", localAddr.Host, &remoteAddr) + if err != nil { + log.Fatalf("dial: %v", err) + } + defer conn.Close() + + // Exchange ping/pong payloads and assert reply endpoint if requested by the caller. + _, err = conn.Write([]byte("ping")) + if err != nil { + log.Fatalf("write ping: %v", err) + } + + buf := make([]byte, 2048) + n, from, err := conn.ReadFrom(buf) + if err != nil { + log.Fatalf("read pong: %v", err) + } + if string(buf[:n]) != "pong" { + log.Fatalf("unexpected payload: %q", string(buf[:n])) + } + if expectAddr != nil { + got, ok := from.(*snet.UDPAddr) + if !ok { + log.Fatalf("unexpected remote type %T", from) + } + if got.IA != expectAddr.IA || got.Host.Port != expectAddr.Host.Port || + !got.Host.IP.Equal(expectAddr.Host.IP) { + log.Fatalf("unexpected remote. got=%s want=%s", got, expectAddr) + } + } + + log.Printf("client success remote=%s", from) +} diff --git a/acceptance/multihomed/test-server/BUILD.bazel b/acceptance/multihomed/test-server/BUILD.bazel new file mode 100644 index 0000000000..3fca8e87f1 --- /dev/null +++ b/acceptance/multihomed/test-server/BUILD.bazel @@ -0,0 +1,18 @@ +load("@rules_go//go:def.bzl", "go_binary", "go_library") + +go_library( + name = "go_default_library", + srcs = ["main.go"], + importpath = "github.com/scionproto/scion/acceptance/multihomed/test-server", + visibility = ["//visibility:private"], + deps = [ + "//pkg/daemon:go_default_library", + "//pkg/snet:go_default_library", + ], +) + +go_binary( + name = "test-server", + embed = [":go_default_library"], + visibility = ["//visibility:public"], +) diff --git a/acceptance/multihomed/test-server/main.go b/acceptance/multihomed/test-server/main.go new file mode 100644 index 0000000000..62dd342861 --- /dev/null +++ b/acceptance/multihomed/test-server/main.go @@ -0,0 +1,105 @@ +// Copyright 2026 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "flag" + "fmt" + "log" + "net" + "os" + + "github.com/scionproto/scion/pkg/daemon" + "github.com/scionproto/scion/pkg/snet" +) + +func main() { + log.SetOutput(os.Stdout) + + // Parse test inputs. The same server binary is used for: + // - bound mode: bind to a specific host IP + // - multihomed mode: bind to 0.0.0.0 and accept traffic via multiple local IPs + var daemonAddr string + var bindAddr string + var port int + var mode string + + flag.StringVar(&daemonAddr, "daemon", "", "SCION daemon address") + flag.StringVar(&bindAddr, "bind", "0.0.0.0", "Bind host") + flag.IntVar(&port, "port", 31000, "Bind UDP port") + flag.StringVar(&mode, "mode", "multihomed", "Server mode") + flag.Parse() + + if daemonAddr == "" { + daemonAddr = os.Getenv("SCION_DAEMON_ADDRESS") + } + if daemonAddr == "" { + daemonAddr = os.Getenv("SCION_DAEMON") + } + if daemonAddr == "" { + log.Fatal("daemon address missing: pass -daemon or set SCION_DAEMON_ADDRESS/SCION_DAEMON") + } + + // Initialize SCION networking stack from daemon topology. + ctx := context.Background() + sd, err := daemon.NewService(daemonAddr).Connect(ctx) + if err != nil { + log.Fatalf("connect daemon: %v", err) + } + defer sd.Close() + + topo, err := daemon.LoadTopology(ctx, sd) + if err != nil { + log.Fatalf("load topology: %v", err) + } + + sn := snet.SCIONNetwork{ + Topology: topo, + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + } + + // Bind listening socket according to requested mode/host. + local, err := net.ResolveUDPAddr("udp", net.JoinHostPort(bindAddr, fmt.Sprintf("%d", port))) + if err != nil { + log.Fatalf("parse bind address: %v", err) + } + conn, err := sn.Listen(ctx, "udp", local) + if err != nil { + log.Fatalf("listen: %v", err) + } + defer conn.Close() + + log.Printf("server running mode=%s daemon=%s bind=%s:%d", mode, daemonAddr, bindAddr, port) + + // Single ping/pong exchange; process exits afterwards so the test can restart with a fresh bind. + buf := make([]byte, 2048) + n, remote, err := conn.ReadFrom(buf) + if err != nil { + log.Fatalf("read ping: %v", err) + } + if string(buf[:n]) != "ping" { + log.Fatalf("unexpected payload: %q", string(buf[:n])) + } + + _, err = conn.WriteTo([]byte("pong"), remote) + if err != nil { + log.Fatalf("write pong: %v", err) + } + + log.Printf("served ping from %s", remote) +} diff --git a/acceptance/multihomed/test.py b/acceptance/multihomed/test.py new file mode 100755 index 0000000000..d2b8526486 --- /dev/null +++ b/acceptance/multihomed/test.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 + +# Copyright 2025 ETH Zurich +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Acceptance test for SCION multihomed socket behavior. + +Manual run (single command): + bazel test --config=integration //acceptance/multihomed:test --test_output=streamed + +Manual run in phases (useful for debugging): + bazel run //acceptance/multihomed:test_setup + bazel run //acceptance/multihomed:test_run + bazel run //acceptance/multihomed:test_teardown + +Validation steps: +1. Setup phase adds a second docker network and a second IP address to the server tester + container. The client tester and the AS111 border router are attached to that same extra + network so the secondary destination IP is actually reachable inside the remote AS. +2. Regression check (bound server): server binds to the primary server IP only, and client + verifies ping/pong works via that bound endpoint. +3. Multihomed check A (unbound server): server binds to 0.0.0.0, and client reaches it via + server primary IP. +4. Multihomed check B (unbound server): server binds to 0.0.0.0, and client reaches it via + server secondary IP. The reply may still use the server's primary IP as source address, so + this step checks successful ping/pong delivery rather than exact reply source selection. +""" + +import time +import yaml +from plumbum import local + +from acceptance.common import base + + +class Test(base.TestTopogen): + def setup_prepare(self): + super().setup_prepare() + + # Patch generated docker-compose topology to give the tester network namespaces a second + # IP address on a dedicated local network. Tester containers inherit networking from their + # disp_tester sidecars, so the extra network must be attached there rather than on the + # tester services themselves. The remote AS border router also needs connectivity to that + # subnet, otherwise packets addressed to the secondary IP would time out at delivery. + compose_path = self.artifacts / "gen/scion-dc.yml" + with open(compose_path, "r") as file: + scion_dc = yaml.safe_load(file) + + scion_dc["networks"]["local_002"] = { + "driver": "bridge", + "ipam": {"config": [{"subnet": "192.168.200.0/24"}]}, + } + + server_disp = scion_dc["services"]["disp_tester_1-ff00_0_111"] + client_disp = scion_dc["services"]["disp_tester_1-ff00_0_112"] + server_router = scion_dc["services"]["br1-ff00_0_111-1"] + + server_disp.setdefault("networks", {})["local_002"] = {"ipv4_address": "192.168.200.11"} + client_disp.setdefault("networks", {})["local_002"] = {"ipv4_address": "192.168.200.12"} + server_router.setdefault("networks", {})["local_002"] = {"ipv4_address": "192.168.200.21"} + + with open(compose_path, "w") as file: + yaml.dump(scion_dc, file) + + def _server_primary_ip(self): + # Read the original topology IP from the tester dispatcher so we can test both original + # and secondary IPs of the shared tester network namespace. + compose_path = self.artifacts / "gen/scion-dc.yml" + with open(compose_path, "r") as file: + scion_dc = yaml.safe_load(file) + + nets = scion_dc["services"]["disp_tester_1-ff00_0_111"]["networks"] + if "scn_002" in nets and "ipv4_address" in nets["scn_002"]: + return nets["scn_002"]["ipv4_address"] + for data in nets.values(): + ip = data.get("ipv4_address") + if ip and ip != "192.168.200.11": + return ip + raise RuntimeError("could not determine server primary IP") + + def _run(self): + # Wait until SCION control-plane/path connectivity is established. + self.await_connectivity() + time.sleep(5) + + # Copy test binaries into the two tester containers. + test_client = local["realpath"](self.get_executable("test-client").executable).strip() + test_server = local["realpath"](self.get_executable("test-server").executable).strip() + + self.dc("cp", test_server, "tester_1-ff00_0_111:/bin/") + self.dc("cp", test_client, "tester_1-ff00_0_112:/bin/") + + primary_ip = self._server_primary_ip() + secondary_ip = "192.168.200.11" + bound_port = 31001 + multihomed_port = 31000 + + print(f"server IPs configured: primary={primary_ip}, secondary={secondary_ip}") + + # 1) Regression check: bound server must still work with a specific IP binding. + self.dc.execute_detached( + "tester_1-ff00_0_111", + "bash", + "-c", + f"test-server -bind {primary_ip} -port {bound_port} -mode bound", + ) + time.sleep(2) + + local_addr = "1-ff00:0:112,0.0.0.0:0" + remote_bound = f"1-ff00:0:111,{primary_ip}:{bound_port}" + print(f"running bound-address regression scenario: {remote_bound}") + result_bound = self.dc.execute( + "tester_1-ff00_0_112", + "bash", + "-c", + f'test-client -local "{local_addr}" -remote "{remote_bound}" ' + f'-expect "{remote_bound}"', + ) + print(result_bound) + + # 2) Multihomed check using server primary IP while server is unbound (0.0.0.0). + self.dc.execute_detached( + "tester_1-ff00_0_111", + "bash", + "-c", + f"test-server -bind 0.0.0.0 -port {multihomed_port} -mode multihomed", + ) + time.sleep(2) + + remote_primary = f"1-ff00:0:111,{primary_ip}:{multihomed_port}" + print(f"running client against primary IP: {remote_primary}") + result_primary = self.dc.execute( + "tester_1-ff00_0_112", + "bash", + "-c", + f'test-client -local "{local_addr}" -remote "{remote_primary}" ' + f'-expect "{remote_primary}"', + ) + print(result_primary) + + # 3) Multihomed check using server secondary IP while server is unbound (0.0.0.0). + # Linux may still select the primary IP as reply source, so this validates connectivity + # through the secondary destination without enforcing the response source address. + self.dc.execute_detached( + "tester_1-ff00_0_111", + "bash", + "-c", + f"test-server -bind 0.0.0.0 -port {multihomed_port} -mode multihomed", + ) + time.sleep(2) + + remote_secondary = f"1-ff00:0:111,{secondary_ip}:{multihomed_port}" + print(f"running client against secondary IP: {remote_secondary}") + result_secondary = self.dc.execute( + "tester_1-ff00_0_112", + "bash", + "-c", + f'test-client -local "{local_addr}" -remote "{remote_secondary}"', + ) + print(result_secondary) + + +if __name__ == "__main__": + base.main(Test) diff --git a/pkg/snet/multihomed/BUILD.bazel b/pkg/snet/multihomed/BUILD.bazel index caf43e707c..98487655ea 100644 --- a/pkg/snet/multihomed/BUILD.bazel +++ b/pkg/snet/multihomed/BUILD.bazel @@ -19,7 +19,6 @@ go_test( name = "go_default_test", srcs = [ "export_test.go", - "integrated_test.go", "multihomed_test.go", ], embed = [":go_default_library"], From 3162dd7565583caa92554d66beb8f4bf1401bf90 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 22 May 2026 16:18:15 +0200 Subject: [PATCH 22/29] Simplify test: server at 110, clients at 111 and 112. Have one server sitting at 110, with two distinct IP addresses, each suitable for one BR in 110. Server listens in 0.0.0.0 Clients in 111 and 112 check the reply address corresponds to that of the particular interface of the particular BR used to talk to the server. --- acceptance/multihomed/test-server/BUILD.bazel | 5 +- acceptance/multihomed/test-server/main.go | 95 ++++--- acceptance/multihomed/test.py | 234 +++++++++--------- 3 files changed, 161 insertions(+), 173 deletions(-) mode change 100755 => 100644 acceptance/multihomed/test.py diff --git a/acceptance/multihomed/test-server/BUILD.bazel b/acceptance/multihomed/test-server/BUILD.bazel index 3fca8e87f1..08e5d3a1d9 100644 --- a/acceptance/multihomed/test-server/BUILD.bazel +++ b/acceptance/multihomed/test-server/BUILD.bazel @@ -5,10 +5,7 @@ go_library( srcs = ["main.go"], importpath = "github.com/scionproto/scion/acceptance/multihomed/test-server", visibility = ["//visibility:private"], - deps = [ - "//pkg/daemon:go_default_library", - "//pkg/snet:go_default_library", - ], + deps = ["//pkg/snet:go_default_library"], ) go_binary( diff --git a/acceptance/multihomed/test-server/main.go b/acceptance/multihomed/test-server/main.go index 62dd342861..302563a46c 100644 --- a/acceptance/multihomed/test-server/main.go +++ b/acceptance/multihomed/test-server/main.go @@ -15,14 +15,12 @@ package main import ( - "context" "flag" - "fmt" "log" "net" "os" + "strconv" - "github.com/scionproto/scion/pkg/daemon" "github.com/scionproto/scion/pkg/snet" ) @@ -30,76 +28,75 @@ func main() { log.SetOutput(os.Stdout) // Parse test inputs. The same server binary is used for: - // - bound mode: bind to a specific host IP - // - multihomed mode: bind to 0.0.0.0 and accept traffic via multiple local IPs - var daemonAddr string + // - IPv4 unbound mode: bind to 0.0.0.0 + // - IPv6 unbound mode: bind to :: var bindAddr string var port int - var mode string - flag.StringVar(&daemonAddr, "daemon", "", "SCION daemon address") flag.StringVar(&bindAddr, "bind", "0.0.0.0", "Bind host") flag.IntVar(&port, "port", 31000, "Bind UDP port") - flag.StringVar(&mode, "mode", "multihomed", "Server mode") flag.Parse() - if daemonAddr == "" { - daemonAddr = os.Getenv("SCION_DAEMON_ADDRESS") - } - if daemonAddr == "" { - daemonAddr = os.Getenv("SCION_DAEMON") - } - if daemonAddr == "" { - log.Fatal("daemon address missing: pass -daemon or set SCION_DAEMON_ADDRESS/SCION_DAEMON") - } - - // Initialize SCION networking stack from daemon topology. - ctx := context.Background() - sd, err := daemon.NewService(daemonAddr).Connect(ctx) - if err != nil { - log.Fatalf("connect daemon: %v", err) - } - defer sd.Close() - - topo, err := daemon.LoadTopology(ctx, sd) - if err != nil { - log.Fatalf("load topology: %v", err) - } - - sn := snet.SCIONNetwork{ - Topology: topo, - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - } - - // Bind listening socket according to requested mode/host. - local, err := net.ResolveUDPAddr("udp", net.JoinHostPort(bindAddr, fmt.Sprintf("%d", port))) + // Bind a raw UDP socket in the tester namespace. Replies are created by reversing the + // received SCION packet, which preserves the destination address the client originally used. + local, err := net.ResolveUDPAddr("udp", net.JoinHostPort(bindAddr, portString(port))) if err != nil { log.Fatalf("parse bind address: %v", err) } - conn, err := sn.Listen(ctx, "udp", local) + conn, err := net.ListenUDP("udp", local) if err != nil { log.Fatalf("listen: %v", err) } defer conn.Close() - log.Printf("server running mode=%s daemon=%s bind=%s:%d", mode, daemonAddr, bindAddr, port) + log.Printf("server running bind=%s:%d", bindAddr, port) // Single ping/pong exchange; process exits afterwards so the test can restart with a fresh bind. - buf := make([]byte, 2048) - n, remote, err := conn.ReadFrom(buf) + var pkt snet.Packet + pkt.Prepare() + n, lastHop, err := conn.ReadFrom(pkt.Bytes) if err != nil { log.Fatalf("read ping: %v", err) } - if string(buf[:n]) != "ping" { - log.Fatalf("unexpected payload: %q", string(buf[:n])) + pkt.Bytes = pkt.Bytes[:n] + + if err := pkt.Decode(); err != nil { + log.Fatalf("decode packet: %v", err) + } + pld, ok := pkt.Payload.(snet.UDPPayload) + if !ok { + log.Fatalf("unexpected payload type %T", pkt.Payload) + } + if string(pld.Payload) != "ping" { + log.Fatalf("unexpected payload: %q", string(pld.Payload)) } - _, err = conn.WriteTo([]byte("pong"), remote) + rawPath, ok := pkt.Path.(snet.RawPath) + if !ok { + log.Fatalf("unexpected path type %T", pkt.Path) + } + replyPath, err := snet.DefaultReplyPather{}.ReplyPath(rawPath) if err != nil { + log.Fatalf("reverse path: %v", err) + } + + pkt.Destination, pkt.Source = pkt.Source, pkt.Destination + pkt.Path = replyPath + pkt.Payload = snet.UDPPayload{ + SrcPort: pld.DstPort, + DstPort: pld.SrcPort, + Payload: []byte("pong"), + } + if err := pkt.Serialize(); err != nil { + log.Fatalf("serialize reply: %v", err) + } + if _, err := conn.WriteTo(pkt.Bytes, lastHop); err != nil { log.Fatalf("write pong: %v", err) } - log.Printf("served ping from %s", remote) + log.Printf("served ping from %s", pkt.Destination) +} + +func portString(port int) string { + return strconv.Itoa(port) } diff --git a/acceptance/multihomed/test.py b/acceptance/multihomed/test.py old mode 100755 new mode 100644 index d2b8526486..671330f717 --- a/acceptance/multihomed/test.py +++ b/acceptance/multihomed/test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2025 ETH Zurich +# Copyright 2026 ETH Zurich # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,163 +14,157 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Acceptance test for SCION multihomed socket behavior. - -Manual run (single command): - bazel test --config=integration //acceptance/multihomed:test --test_output=streamed - -Manual run in phases (useful for debugging): - bazel run //acceptance/multihomed:test_setup - bazel run //acceptance/multihomed:test_run - bazel run //acceptance/multihomed:test_teardown - -Validation steps: -1. Setup phase adds a second docker network and a second IP address to the server tester - container. The client tester and the AS111 border router are attached to that same extra - network so the secondary destination IP is actually reachable inside the remote AS. -2. Regression check (bound server): server binds to the primary server IP only, and client - verifies ping/pong works via that bound endpoint. -3. Multihomed check A (unbound server): server binds to 0.0.0.0, and client reaches it via - server primary IP. -4. Multihomed check B (unbound server): server binds to 0.0.0.0, and client reaches it via - server secondary IP. The reply may still use the server's primary IP as source address, so - this step checks successful ping/pong delivery rather than exact reply source selection. +"""Acceptance test for multihomed endhost source-address selection. + +Manual runs: + bazel test --config=integration //acceptance/multihomed:test --test_output=streamed + +The setup/run/teardown stages can also be exercised independently: + bazel run //acceptance/multihomed:test_setup + bazel run //acceptance/multihomed:test_run + bazel run //acceptance/multihomed:test_teardown + +Test flow: +1. Generate the tiny topology and keep the generated control-plane addressing unchanged. +2. Attach one extra compose-managed subnet to `br1-ff00_0_110-2` and to the AS110 tester + namespace so the server gets a second address behind the second border router without + rewriting the topology generator's internal BR addresses. +3. Start fresh unbound server instances in AS110 and probe them from AS111 and AS112, + targeting the address that is reachable through each border router. +4. Start fresh bound server instances in AS110, one bound to the AS111-facing address + and one bound to the AS112-facing address. +5. Require the client in all four cases to observe the reply coming back from the exact + SCION source address it targeted. """ import time + import yaml from plumbum import local from acceptance.common import base +SERVER_PORT = 31000 +SERVER_PRIMARY_IP = "172.20.0.22" +SERVER_SECONDARY_IP = "192.168.201.3" +SERVER_SECONDARY_BR_IP = "192.168.201.2" +SERVER_SECONDARY_SUBNET = "192.168.201.0/24" +SERVER_SECONDARY_NETWORK = "local_110_br2" + +SERVER_IA = "1-ff00:0:110" +CLIENT_111_IA = "1-ff00:0:111" +CLIENT_112_IA = "1-ff00:0:112" + +SERVER_CONTAINER = "tester_1-ff00_0_110" +CLIENT_111_CONTAINER = "tester_1-ff00_0_111" +CLIENT_112_CONTAINER = "tester_1-ff00_0_112" +SERVER_DISPATCHER = "disp_tester_1-ff00_0_110" +SERVER_BR2 = "br1-ff00_0_110-2" + + class Test(base.TestTopogen): def setup_prepare(self): super().setup_prepare() - # Patch generated docker-compose topology to give the tester network namespaces a second - # IP address on a dedicated local network. Tester containers inherit networking from their - # disp_tester sidecars, so the extra network must be attached there rather than on the - # tester services themselves. The remote AS border router also needs connectivity to that - # subnet, otherwise packets addressed to the secondary IP would time out at delivery. + # Add a second endhost-facing subnet behind br1-ff00_0_110-2 while leaving the + # generated control-plane topology untouched. This is intentionally a compose-only + # network change: the BR keeps its original generated internal address for SCION + # control-plane traffic, while the server gains a second reachable host address for + # the multihoming assertion. The raw packet-reversal server keeps the reply source + # equal to the destination address that each client targeted. compose_path = self.artifacts / "gen/scion-dc.yml" - with open(compose_path, "r") as file: + with open(compose_path, "r", encoding="utf-8") as file: scion_dc = yaml.safe_load(file) - scion_dc["networks"]["local_002"] = { + scion_dc["networks"][SERVER_SECONDARY_NETWORK] = { "driver": "bridge", - "ipam": {"config": [{"subnet": "192.168.200.0/24"}]}, + "ipam": {"config": [{"subnet": SERVER_SECONDARY_SUBNET}]}, + } + scion_dc["services"][SERVER_BR2]["networks"][SERVER_SECONDARY_NETWORK] = { + "ipv4_address": SERVER_SECONDARY_BR_IP, + } + scion_dc["services"][SERVER_DISPATCHER]["networks"][SERVER_SECONDARY_NETWORK] = { + "ipv4_address": SERVER_SECONDARY_IP, } - server_disp = scion_dc["services"]["disp_tester_1-ff00_0_111"] - client_disp = scion_dc["services"]["disp_tester_1-ff00_0_112"] - server_router = scion_dc["services"]["br1-ff00_0_111-1"] - - server_disp.setdefault("networks", {})["local_002"] = {"ipv4_address": "192.168.200.11"} - client_disp.setdefault("networks", {})["local_002"] = {"ipv4_address": "192.168.200.12"} - server_router.setdefault("networks", {})["local_002"] = {"ipv4_address": "192.168.200.21"} - - with open(compose_path, "w") as file: - yaml.dump(scion_dc, file) - - def _server_primary_ip(self): - # Read the original topology IP from the tester dispatcher so we can test both original - # and secondary IPs of the shared tester network namespace. - compose_path = self.artifacts / "gen/scion-dc.yml" - with open(compose_path, "r") as file: - scion_dc = yaml.safe_load(file) - - nets = scion_dc["services"]["disp_tester_1-ff00_0_111"]["networks"] - if "scn_002" in nets and "ipv4_address" in nets["scn_002"]: - return nets["scn_002"]["ipv4_address"] - for data in nets.values(): - ip = data.get("ipv4_address") - if ip and ip != "192.168.200.11": - return ip - raise RuntimeError("could not determine server primary IP") + with open(compose_path, "w", encoding="utf-8") as file: + yaml.safe_dump(scion_dc, file, sort_keys=False) def _run(self): - # Wait until SCION control-plane/path connectivity is established. self.await_connectivity() - time.sleep(5) + time.sleep(10) - # Copy test binaries into the two tester containers. test_client = local["realpath"](self.get_executable("test-client").executable).strip() test_server = local["realpath"](self.get_executable("test-server").executable).strip() + self.dc("cp", test_server, f"{SERVER_CONTAINER}:/bin/") + self.dc("cp", test_client, f"{CLIENT_111_CONTAINER}:/bin/") + self.dc("cp", test_client, f"{CLIENT_112_CONTAINER}:/bin/") - self.dc("cp", test_server, "tester_1-ff00_0_111:/bin/") - self.dc("cp", test_client, "tester_1-ff00_0_112:/bin/") - - primary_ip = self._server_primary_ip() - secondary_ip = "192.168.200.11" - bound_port = 31001 - multihomed_port = 31000 - - print(f"server IPs configured: primary={primary_ip}, secondary={secondary_ip}") - - # 1) Regression check: bound server must still work with a specific IP binding. - self.dc.execute_detached( - "tester_1-ff00_0_111", - "bash", - "-c", - f"test-server -bind {primary_ip} -port {bound_port} -mode bound", + print( + "server IPs configured: primary=%s, secondary=%s" + % (SERVER_PRIMARY_IP, SERVER_SECONDARY_IP) ) - time.sleep(2) - - local_addr = "1-ff00:0:112,0.0.0.0:0" - remote_bound = f"1-ff00:0:111,{primary_ip}:{bound_port}" - print(f"running bound-address regression scenario: {remote_bound}") - result_bound = self.dc.execute( - "tester_1-ff00_0_112", - "bash", - "-c", - f'test-client -local "{local_addr}" -remote "{remote_bound}" ' - f'-expect "{remote_bound}"', + self._run_scenario( + client_container=CLIENT_111_CONTAINER, + client_ia=CLIENT_111_IA, + remote_ip=SERVER_PRIMARY_IP, + bind_ip="0.0.0.0", + label="AS111 -> AS110 via br1-ff00_0_110-1 with unbound server", ) - print(result_bound) - - # 2) Multihomed check using server primary IP while server is unbound (0.0.0.0). - self.dc.execute_detached( - "tester_1-ff00_0_111", - "bash", - "-c", - f"test-server -bind 0.0.0.0 -port {multihomed_port} -mode multihomed", + self._run_scenario( + client_container=CLIENT_112_CONTAINER, + client_ia=CLIENT_112_IA, + remote_ip=SERVER_SECONDARY_IP, + bind_ip="0.0.0.0", + # The extra compose-managed subnet sits behind br1-ff00_0_110-2, so the + # reply should be sourced from the second server address on that subnet. + label="AS112 -> AS110 via br1-ff00_0_110-2 with unbound server", ) - time.sleep(2) - - remote_primary = f"1-ff00:0:111,{primary_ip}:{multihomed_port}" - print(f"running client against primary IP: {remote_primary}") - result_primary = self.dc.execute( - "tester_1-ff00_0_112", - "bash", - "-c", - f'test-client -local "{local_addr}" -remote "{remote_primary}" ' - f'-expect "{remote_primary}"', + self._run_scenario( + client_container=CLIENT_111_CONTAINER, + client_ia=CLIENT_111_IA, + remote_ip=SERVER_PRIMARY_IP, + bind_ip=SERVER_PRIMARY_IP, + label="AS111 -> AS110 via br1-ff00_0_110-1 with server bound to primary IP", + ) + self._run_scenario( + client_container=CLIENT_112_CONTAINER, + client_ia=CLIENT_112_IA, + remote_ip=SERVER_SECONDARY_IP, + bind_ip=SERVER_SECONDARY_IP, + # The extra compose-managed subnet sits behind br1-ff00_0_110-2, so the + # reply should be sourced from the second server address on that subnet. + label="AS112 -> AS110 via br1-ff00_0_110-2 with server bound to secondary IP", ) - print(result_primary) - # 3) Multihomed check using server secondary IP while server is unbound (0.0.0.0). - # Linux may still select the primary IP as reply source, so this validates connectivity - # through the secondary destination without enforcing the response source address. + def _run_scenario( + self, + client_container: str, + client_ia: str, + remote_ip: str, + bind_ip: str, + label: str, + ): + remote = f"{SERVER_IA},{remote_ip}:{SERVER_PORT}" + print(f"running {label}: {remote}") self.dc.execute_detached( - "tester_1-ff00_0_111", + SERVER_CONTAINER, "bash", "-c", - f"test-server -bind 0.0.0.0 -port {multihomed_port} -mode multihomed", + f'test-server -bind "{bind_ip}" -port {SERVER_PORT}', ) - time.sleep(2) - - remote_secondary = f"1-ff00:0:111,{secondary_ip}:{multihomed_port}" - print(f"running client against secondary IP: {remote_secondary}") - result_secondary = self.dc.execute( - "tester_1-ff00_0_112", + time.sleep(3) + result = self.dc.execute( + client_container, "bash", "-c", - f'test-client -local "{local_addr}" -remote "{remote_secondary}"', + ( + f'test-client -local "{client_ia},0.0.0.0:0" ' + f'-remote "{remote}" -expect "{remote}"' + ), ) - print(result_secondary) - + print(result) if __name__ == "__main__": base.main(Test) From 25f14837500fb350aa33691d3edf9024d40682ca Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 22 May 2026 16:26:09 +0200 Subject: [PATCH 23/29] Remove deprecated unit test. --- pkg/snet/multihomed/BUILD.bazel | 4 - pkg/snet/multihomed/integrated_test.go | 333 ------------------------- 2 files changed, 337 deletions(-) delete mode 100644 pkg/snet/multihomed/integrated_test.go diff --git a/pkg/snet/multihomed/BUILD.bazel b/pkg/snet/multihomed/BUILD.bazel index 98487655ea..4cf73c47d4 100644 --- a/pkg/snet/multihomed/BUILD.bazel +++ b/pkg/snet/multihomed/BUILD.bazel @@ -23,11 +23,7 @@ go_test( ], embed = [":go_default_library"], deps = [ - "//pkg/addr:go_default_library", - "//pkg/daemon:go_default_library", - "//pkg/daemon/types:go_default_library", "//pkg/private/xtest:go_default_library", - "//pkg/snet:go_default_library", "@com_github_stretchr_testify//require:go_default_library", "@com_github_stretchr_testify//suite:go_default_library", ], diff --git a/pkg/snet/multihomed/integrated_test.go b/pkg/snet/multihomed/integrated_test.go deleted file mode 100644 index 503d353687..0000000000 --- a/pkg/snet/multihomed/integrated_test.go +++ /dev/null @@ -1,333 +0,0 @@ -// Copyright 2025 ETH Zurich -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package multihomed_test - -import ( - "context" - "fmt" - "net" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/scionproto/scion/pkg/addr" - "github.com/scionproto/scion/pkg/daemon" - daemontypes "github.com/scionproto/scion/pkg/daemon/types" - "github.com/scionproto/scion/pkg/private/xtest" - "github.com/scionproto/scion/pkg/snet" -) - -// Using test suite declared at multihomed_test.go - -// XXX(juagargi) This is not a valid unit test, should be moved to some kind of -// integration test run within a docker container. For now: -// -// Generate the tiny topology with IPv4 only: -// ./scion.sh stop ; rm -r gen/ -// ./scion.sh topology -c topology/tiny4.topo -// ./scion.sh start -// Then run the tests: -// go test ./pkg/snet/multihomed -count=1 -v -run TestUDP -// go test ./pkg/snet/multihomed -count=1 -v -run TestBasic -// go test ./pkg/snet/multihomed -count=1 -v -run TestMultihomed - -const serverPortInitial = 12345 - -var ( - serverIA = "1-ff00:0:111" - serverIPAddress = "127.0.0.1" - clientAddress = "127.0.0.1:0" - serverDaemon = "127.0.0.19:30255" // 111 - clientDaemon = "127.0.0.27:30255" // 112 -) - -func (s *MultihomedTestSuite) TestUDP() { - t := s.T() - t.Parallel() - - // Bind server to any interface. - serverAddress := fmt.Sprintf("0.0.0.0:%d", s.getServerPort()) - serverAddr := xtest.MustParseUDPAddr(t, serverAddress) - - runUDPServerAt(t, serverAddr) - runUDPClientWith(t, serverAddr, nil) -} - -// TestNoRegressionCheck checks that using bound sockets (like before "multihomed" changes) -// works as expected. -func (s *MultihomedTestSuite) TestNoRegressionCheck() { - t := s.T() - t.Parallel() - - ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) - defer cancelF() - - serverAddress := fmt.Sprintf("%s:%d", serverIPAddress, s.getServerPort()) - serverAddr := xtest.MustParseUDPAddr(t, serverAddress) - clientAddr := xtest.MustParseUDPAddr(t, clientAddress) - - runServerAt(ctx, t, serverAddr) - runClientWith(ctx, t, serverAddr, clientAddr) -} - -// TestMultihomedServer temporary documentation. -// This is a test intended to debug the multihomed scion socket, remove it and make it -// an integration test. -// A multihomed socket is bound to several interfaces, or conversely to an address that spans -// multiple interfaces. -// Multihomed sockets are necessary to get connectivity via several interfaces, -// 1. At the RX side, listening to both cellular and ethernet WAN interfaces: necessary for handling -// failover one another. -// 2. At the TX side, the ability to use several paths that start on different local interfaces -// relies on a multihomed socket -func (s *MultihomedTestSuite) TestMultihomedServer() { - t := s.T() - t.Parallel() - - ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) - defer cancelF() - - serverAddress := fmt.Sprintf("%s:%d", serverIPAddress, s.getServerPort()) - serverAddr := xtest.MustParseUDPAddr(t, serverAddress) - - runMultihomedServer(ctx, t, serverAddr.Port) - runClientWith(ctx, t, serverAddr, nil) -} - -func runUDPServerAt( - t *testing.T, - serverAddr *net.UDPAddr, -) { - conn, err := net.ListenUDP("udp", serverAddr) - require.NoError(t, err) - require.NotNil(t, conn) - - // Run in a different thread so that the creation of the server finishes without blocking. - go func() { - handleUDPPing(t, conn) - }() - - t.Log("runUDPServerAt: done") -} - -func runServerAt( - ctx context.Context, - t *testing.T, - serverAddr *net.UDPAddr, -) { - sd, err := daemon.NewService(serverDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) - - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, - } - - conn, err := sn.Listen(ctx, "udp", serverAddr) - require.NoError(t, err) - require.NotNil(t, conn) - - // Run in a different thread so that the creation of the server finishes without blocking. - go func() { - handlePing(t, conn) - }() - t.Log("runServerAt: done") -} - -func runMultihomedServer( - ctx context.Context, - t *testing.T, - port int, -) { - sd, err := daemon.NewService(serverDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) - - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, - } - - serverAddr := xtest.MustParseUDPAddr(t, fmt.Sprintf("0.0.0.0:%d", port)) - conn, err := sn.Listen(ctx, "udp", serverAddr) - require.NoError(t, err) - require.NotNil(t, conn) - - go func() { - handlePing(t, conn) - }() - t.Log("runServerAt: done") -} - -func runUDPClientWith( - t *testing.T, - serverAddr *net.UDPAddr, - clientAddr *net.UDPAddr, -) { - conn, err := net.DialUDP("udp", clientAddr, serverAddr) - require.NoError(t, err) - - _, err = conn.Write([]byte("ping")) - require.NoError(t, err) - t.Logf(" ---> runUDPClientWith: ping sent") - - // Read answer - buff := make([]byte, 2048) - n, remoteAddr, err := conn.ReadFrom(buff) - t.Logf(" <--- runUDPClientWith: pong received. Err? %v, remote: %s read %d bytes", - err != nil, remoteAddr, n) - require.NoError(t, err) - - buff = buff[:n] - require.Equal(t, "pong", string(buff)) - - err = conn.Close() - require.NoError(t, err) -} - -func runClientWith( - ctx context.Context, - t *testing.T, - serverAddr *net.UDPAddr, - clientAddr *net.UDPAddr, -) { - sd, err := daemon.NewService(clientDaemon).Connect(ctx) - require.NoError(t, err) - defer sd.Close() - - info, err := sd.ASInfo(ctx, 0) - require.NoError(t, err) - t.Logf("client local IA: %s", info.IA.String()) - - interfaces, err := sd.Interfaces(ctx) - require.NoError(t, err) - for k, v := range interfaces { - t.Logf("iface %3d: %s", k, v.String()) - } - - topo, err := daemon.LoadTopology(ctx, sd) - require.NoError(t, err) - - sn := &snet.SCIONNetwork{ - SCMPHandler: snet.DefaultSCMPHandler{ - RevocationHandler: daemon.RevHandler{Connector: sd}, - }, - Topology: topo, - } - - completeServerAddrStr := fmt.Sprintf("%s,[%s]:%d", - serverIA, - serverAddr.IP.String(), - serverAddr.Port) - completeServerAddr, err := snet.ParseUDPAddr(completeServerAddrStr) - require.NoError(t, err) - - paths := getRemote(ctx, t, sd, completeServerAddr.IA, info.IA) - require.Greater(t, len(paths), 0) - completeServerAddr.Path = paths[0].Dataplane() - completeServerAddr.NextHop = paths[0].UnderlayNextHop() - - conn, err := sn.Dial(ctx, "udp", clientAddr, completeServerAddr) - require.NoError(t, err) - _, err = conn.Write([]byte("ping")) - require.NoError(t, err) - t.Logf(" ---> runUDPClientWith: ping sent") - - // Read answer. - buff := make([]byte, 2048) - n, remoteAddr, err := conn.ReadFrom(buff) - t.Logf(" <--- runClientWith: pong received. Err? %v, remote: %s read %d bytes", - err != nil, remoteAddr, n) - require.NoError(t, err) - require.Equal(t, completeServerAddr.String(), remoteAddr.String()) - buff = buff[:n] - require.Equal(t, "pong", string(buff)) - - err = conn.Close() - require.NoError(t, err) -} - -func handlePing(t *testing.T, conn *snet.Conn) { - t.Logf("handlePing conn = %p", conn) - buff := make([]byte, 2048) - n, remoteAddr, err := conn.ReadFrom(buff) - t.Logf(" ---> handlePing. Err? %v, remote: %s, read %d bytes", - err != nil, remoteAddr, n) - require.NoError(t, err) - buff = buff[:n] - t.Logf("read from %s", remoteAddr) - // Check ping. - require.Equal(t, "ping", string(buff)) - - // Pong. - _, err = conn.WriteTo([]byte("pong"), remoteAddr) - require.NoError(t, err) - - err = conn.Close() - require.NoError(t, err) -} - -func handleUDPPing(t *testing.T, conn *net.UDPConn) { - t.Logf("handleUDPPing conn = %p", conn) - buff := make([]byte, 2048) - n, remoteAddr, err := conn.ReadFromUDP(buff) - t.Logf(" <--- handleUDPPing. Err? %v, remote: %s, read %d bytes", - err != nil, remoteAddr, n) - require.NoError(t, err) - - buff = buff[:n] - t.Logf("read from %s", remoteAddr) - // Check ping. - require.Equal(t, "ping", string(buff)) - - // Pong. - _, err = conn.WriteTo([]byte("pong"), remoteAddr) - require.NoError(t, err) - - err = conn.Close() - require.NoError(t, err) -} - -func getRemote( - ctx context.Context, - t *testing.T, - sd daemon.Connector, - remote, local addr.IA, -) []snet.Path { - paths, err := sd.Paths(ctx, remote, local, daemontypes.PathReqFlags{}) - require.NoError(t, err) - return paths -} - -var lastPortUsed atomic.Int32 - -func (s *MultihomedTestSuite) getServerPort() int { - // Initialize to 12344 if it's uninitialized. - lastPortUsed.CompareAndSwap(0, serverPortInitial-1) - return int(lastPortUsed.Add(1)) -} From 1573fb5313ff680b186293e36a32097d5313ecc9 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 29 May 2026 14:59:51 +0200 Subject: [PATCH 24/29] Linter findings: long lines and rand.Read in test without err check. --- acceptance/multihomed/test-client/main.go | 3 ++- acceptance/multihomed/test-server/main.go | 3 ++- pkg/snet/multihomed/multihomed_test.go | 14 ++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/acceptance/multihomed/test-client/main.go b/acceptance/multihomed/test-client/main.go index 6cf20c5df3..27131fcd09 100644 --- a/acceptance/multihomed/test-client/main.go +++ b/acceptance/multihomed/test-client/main.go @@ -68,7 +68,8 @@ func main() { } defer sd.Close() - paths, err := sd.Paths(ctx, remoteAddr.IA, localAddr.IA, daemontypes.PathReqFlags{Refresh: true}) + paths, err := sd.Paths(ctx, remoteAddr.IA, localAddr.IA, + daemontypes.PathReqFlags{Refresh: true}) if err != nil { log.Fatalf("path lookup: %v", err) } diff --git a/acceptance/multihomed/test-server/main.go b/acceptance/multihomed/test-server/main.go index 302563a46c..8c21c5f633 100644 --- a/acceptance/multihomed/test-server/main.go +++ b/acceptance/multihomed/test-server/main.go @@ -51,7 +51,8 @@ func main() { log.Printf("server running bind=%s:%d", bindAddr, port) - // Single ping/pong exchange; process exits afterwards so the test can restart with a fresh bind. + // One-time ping/pong exchange; process exits afterwards, + // so a new server can be started with a fresh bind to the same port. var pkt snet.Packet pkt.Prepare() n, lastHop, err := conn.ReadFrom(pkt.Bytes) diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go index 3519a72c7d..06e1ecca04 100644 --- a/pkg/snet/multihomed/multihomed_test.go +++ b/pkg/snet/multihomed/multihomed_test.go @@ -113,7 +113,7 @@ func (s *MultihomedTestSuite) TestInternalEgressCache() { // of a sync.Map (any->any) and a regular map (IP->IP) with a RWMutex. func BenchmarkSyncMapWrites(b *testing.B) { // Create a set of `size` IP addresses. - addrs := generateIpAddrs(b.N) + addrs := generateIpAddrs(b, b.N) m := sync.Map{} b.ResetTimer() @@ -121,7 +121,7 @@ func BenchmarkSyncMapWrites(b *testing.B) { } func BenchmarkSyncMapReads(b *testing.B) { - addrs := generateIpAddrs(b.N) + addrs := generateIpAddrs(b, b.N) m := sync.Map{} storeInSyncMap(&m, addrs) @@ -140,7 +140,7 @@ func BenchmarkSyncMapReads(b *testing.B) { } func BenchmarkMuMapWrites(b *testing.B) { - addrs := generateIpAddrs(b.N) + addrs := generateIpAddrs(b, b.N) m := make(map[netip.Addr]netip.Addr) mu := sync.RWMutex{} @@ -149,7 +149,7 @@ func BenchmarkMuMapWrites(b *testing.B) { } func BenchmarkMuMapReads(b *testing.B) { - addrs := generateIpAddrs(b.N) + addrs := generateIpAddrs(b, b.N) m := make(map[netip.Addr]netip.Addr) mu := sync.RWMutex{} storeInMuMap(m, &mu, addrs) @@ -169,11 +169,13 @@ func BenchmarkMuMapReads(b *testing.B) { require.Len(b, discardBuff, b.N) } -func generateIpAddrs(size int) []netip.Addr { +func generateIpAddrs(tb testing.TB, size int) []netip.Addr { + tb.Helper() addrs := make([]netip.Addr, size) raw := [4]byte{} for i := range size { - rand.Read(raw[:]) + _, err := rand.Read(raw[:]) + require.NoError(tb, err) addrs[i] = netip.AddrFrom4(raw) } return addrs From 4f59eea3860c14218a5a4c8dfbd8bc285f018159 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 29 May 2026 15:29:51 +0200 Subject: [PATCH 25/29] Linter: flake8 new line required. --- acceptance/multihomed/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/acceptance/multihomed/test.py b/acceptance/multihomed/test.py index 671330f717..b3ba46495c 100644 --- a/acceptance/multihomed/test.py +++ b/acceptance/multihomed/test.py @@ -166,5 +166,6 @@ def _run_scenario( ) print(result) + if __name__ == "__main__": base.main(Test) From f987d0ce55b4d06be4b6e5966d605b5e70531cb9 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Fri, 29 May 2026 15:39:33 +0200 Subject: [PATCH 26/29] More robust network attachment to server container. --- acceptance/multihomed/test.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/acceptance/multihomed/test.py b/acceptance/multihomed/test.py index b3ba46495c..fd67540b5f 100644 --- a/acceptance/multihomed/test.py +++ b/acceptance/multihomed/test.py @@ -84,7 +84,17 @@ def setup_prepare(self): scion_dc["services"][SERVER_BR2]["networks"][SERVER_SECONDARY_NETWORK] = { "ipv4_address": SERVER_SECONDARY_BR_IP, } - scion_dc["services"][SERVER_DISPATCHER]["networks"][SERVER_SECONDARY_NETWORK] = { + + # Attach the secondary subnet to the container namespace that hosts the + # test-server process. In some compose variants `tester_*` shares the + # dispatcher namespace via `network_mode`, in others it has its own. + server_service = scion_dc["services"][SERVER_CONTAINER] + server_ns_service = ( + SERVER_DISPATCHER + if "network_mode" in server_service and SERVER_DISPATCHER in scion_dc["services"] + else SERVER_CONTAINER + ) + scion_dc["services"][server_ns_service]["networks"][SERVER_SECONDARY_NETWORK] = { "ipv4_address": SERVER_SECONDARY_IP, } From 023787b2c039e8286bd01dd3215247f1df732df1 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Mon, 1 Jun 2026 11:16:12 +0200 Subject: [PATCH 27/29] Kill previous server processes. --- acceptance/multihomed/test.py | 51 +++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/acceptance/multihomed/test.py b/acceptance/multihomed/test.py index fd67540b5f..aecb5b72b8 100644 --- a/acceptance/multihomed/test.py +++ b/acceptance/multihomed/test.py @@ -158,23 +158,40 @@ def _run_scenario( ): remote = f"{SERVER_IA},{remote_ip}:{SERVER_PORT}" print(f"running {label}: {remote}") - self.dc.execute_detached( - SERVER_CONTAINER, - "bash", - "-c", - f'test-server -bind "{bind_ip}" -port {SERVER_PORT}', - ) - time.sleep(3) - result = self.dc.execute( - client_container, - "bash", - "-c", - ( - f'test-client -local "{client_ia},0.0.0.0:0" ' - f'-remote "{remote}" -expect "{remote}"' - ), - ) - print(result) + for attempt in range(2): + # Make sure no stale server process from a previous scenario/attempt keeps + # the port busy and causes a false negative timeout. + self.dc.execute( + SERVER_CONTAINER, + "bash", + "-c", + "killall test-server >/dev/null 2>&1 || true", + ) + self.dc.execute_detached( + SERVER_CONTAINER, + "bash", + "-c", + f'test-server -bind "{bind_ip}" -port {SERVER_PORT}', + ) + time.sleep(3) + try: + result = self.dc.execute( + client_container, + "bash", + "-c", + ( + f'test-client -local "{client_ia},0.0.0.0:0" ' + f'-remote "{remote}" -expect "{remote}"' + ), + ) + print(result) + return + except Exception: + if attempt == 0: + print(f"scenario retry after first failure: {label}") + time.sleep(2) + continue + raise if __name__ == "__main__": From f4e4d188982f6d143bfa8b7841aaceb3f22e2fc6 Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Mon, 1 Jun 2026 11:50:33 +0200 Subject: [PATCH 28/29] Debug output added. --- acceptance/multihomed/test-client/main.go | 11 +++++++++ acceptance/multihomed/test-server/main.go | 6 +++++ acceptance/multihomed/test.py | 28 +++++++++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/acceptance/multihomed/test-client/main.go b/acceptance/multihomed/test-client/main.go index 27131fcd09..ef9b0cd125 100644 --- a/acceptance/multihomed/test-client/main.go +++ b/acceptance/multihomed/test-client/main.go @@ -17,6 +17,7 @@ package main import ( "context" "flag" + "fmt" "log" "os" @@ -27,6 +28,7 @@ import ( func main() { log.SetOutput(os.Stdout) + log.Printf("test-client starting") // Parse test inputs. The remote is provided as a full SCION UDP address so the same // client binary can probe server primary and secondary IPs without code changes. @@ -41,6 +43,8 @@ func main() { flag.Var(&remoteAddr, "remote", "Remote SCION address") flag.StringVar(&expect, "expect", "", "Expected remote SCION address") flag.Parse() + log.Printf("parsed args daemon=%q local=%q remote=%q expect=%q", + daemonAddr, localAddr.String(), remoteAddr.String(), expect) if expect != "" { parsed, err := snet.ParseUDPAddr(expect) @@ -59,6 +63,7 @@ func main() { if daemonAddr == "" { log.Fatal("daemon address missing: pass -daemon or set SCION_DAEMON_ADDRESS/SCION_DAEMON") } + log.Printf("using daemon address %s", daemonAddr) // Resolve a path from local IA to remote IA. ctx := context.Background() @@ -67,6 +72,7 @@ func main() { log.Fatalf("connect daemon: %v", err) } defer sd.Close() + log.Printf("connected to daemon") paths, err := sd.Paths(ctx, remoteAddr.IA, localAddr.IA, daemontypes.PathReqFlags{Refresh: true}) @@ -77,6 +83,7 @@ func main() { log.Fatalf("no path from %s to %s", localAddr.IA, remoteAddr.IA) } sp := paths[0] + log.Printf("path lookup returned %d paths; using first path", len(paths)) // Build a SCION connection pinned to the selected path. topo, err := daemon.LoadTopology(ctx, sd) @@ -98,18 +105,21 @@ func main() { log.Fatalf("dial: %v", err) } defer conn.Close() + log.Printf("dial successful: local_host=%s remote=%s", localAddr.Host, remoteAddr.String()) // Exchange ping/pong payloads and assert reply endpoint if requested by the caller. _, err = conn.Write([]byte("ping")) if err != nil { log.Fatalf("write ping: %v", err) } + log.Printf("ping sent") buf := make([]byte, 2048) n, from, err := conn.ReadFrom(buf) if err != nil { log.Fatalf("read pong: %v", err) } + log.Printf("received response from %s (%d bytes)", from, n) if string(buf[:n]) != "pong" { log.Fatalf("unexpected payload: %q", string(buf[:n])) } @@ -125,4 +135,5 @@ func main() { } log.Printf("client success remote=%s", from) + fmt.Printf("test-client done\n") } diff --git a/acceptance/multihomed/test-server/main.go b/acceptance/multihomed/test-server/main.go index 8c21c5f633..d45b9b2e2d 100644 --- a/acceptance/multihomed/test-server/main.go +++ b/acceptance/multihomed/test-server/main.go @@ -16,6 +16,7 @@ package main import ( "flag" + "fmt" "log" "net" "os" @@ -26,6 +27,7 @@ import ( func main() { log.SetOutput(os.Stdout) + log.Printf("test-server starting") // Parse test inputs. The same server binary is used for: // - IPv4 unbound mode: bind to 0.0.0.0 @@ -36,6 +38,7 @@ func main() { flag.StringVar(&bindAddr, "bind", "0.0.0.0", "Bind host") flag.IntVar(&port, "port", 31000, "Bind UDP port") flag.Parse() + log.Printf("parsed args bind=%q port=%d", bindAddr, port) // Bind a raw UDP socket in the tester namespace. Replies are created by reversing the // received SCION packet, which preserves the destination address the client originally used. @@ -50,6 +53,7 @@ func main() { defer conn.Close() log.Printf("server running bind=%s:%d", bindAddr, port) + fmt.Printf("test-server listening\n") // One-time ping/pong exchange; process exits afterwards, // so a new server can be started with a fresh bind to the same port. @@ -59,6 +63,7 @@ func main() { if err != nil { log.Fatalf("read ping: %v", err) } + log.Printf("received packet from lastHop=%v bytes=%d", lastHop, n) pkt.Bytes = pkt.Bytes[:n] if err := pkt.Decode(); err != nil { @@ -96,6 +101,7 @@ func main() { } log.Printf("served ping from %s", pkt.Destination) + fmt.Printf("test-server done\n") } func portString(port int) string { diff --git a/acceptance/multihomed/test.py b/acceptance/multihomed/test.py index aecb5b72b8..2509e72d0f 100644 --- a/acceptance/multihomed/test.py +++ b/acceptance/multihomed/test.py @@ -102,11 +102,16 @@ def setup_prepare(self): yaml.safe_dump(scion_dc, file, sort_keys=False) def _run(self): + print("[multihomed] waiting for control/data-plane connectivity") self.await_connectivity() + print("[multihomed] connectivity reported ready; waiting extra 10s for stabilization") time.sleep(10) test_client = local["realpath"](self.get_executable("test-client").executable).strip() test_server = local["realpath"](self.get_executable("test-server").executable).strip() + print(f"[multihomed] test-client binary={test_client}") + print(f"[multihomed] test-server binary={test_server}") + print("[multihomed] copying test binaries into containers") self.dc("cp", test_server, f"{SERVER_CONTAINER}:/bin/") self.dc("cp", test_client, f"{CLIENT_111_CONTAINER}:/bin/") self.dc("cp", test_client, f"{CLIENT_112_CONTAINER}:/bin/") @@ -157,16 +162,24 @@ def _run_scenario( label: str, ): remote = f"{SERVER_IA},{remote_ip}:{SERVER_PORT}" - print(f"running {label}: {remote}") + print(f"[multihomed] running scenario: {label}") + print( + "[multihomed] scenario config: " + f"client_container={client_container} client_ia={client_ia} " + f"server_bind={bind_ip}:{SERVER_PORT} remote={remote}" + ) for attempt in range(2): + print(f"[multihomed] attempt {attempt + 1}/2 for scenario: {label}") # Make sure no stale server process from a previous scenario/attempt keeps # the port busy and causes a false negative timeout. + print(f"[multihomed] killing stale test-server processes in {SERVER_CONTAINER}") self.dc.execute( SERVER_CONTAINER, "bash", "-c", "killall test-server >/dev/null 2>&1 || true", ) + print(f"[multihomed] starting test-server (detached) bind={bind_ip}:{SERVER_PORT}") self.dc.execute_detached( SERVER_CONTAINER, "bash", @@ -175,6 +188,7 @@ def _run_scenario( ) time.sleep(3) try: + print(f"[multihomed] executing test-client in {client_container}") result = self.dc.execute( client_container, "bash", @@ -184,9 +198,19 @@ def _run_scenario( f'-remote "{remote}" -expect "{remote}"' ), ) + print("[multihomed] test-client output begin") print(result) + print("[multihomed] test-client output end") return - except Exception: + except Exception as err: + print(f"[multihomed] scenario attempt failed: {label}") + print(str(err)) + print("[multihomed] server logs (tail=80)") + print(self.dc("logs", "--tail", "80", SERVER_CONTAINER)) + print("[multihomed] server dispatcher logs (tail=80)") + print(self.dc("logs", "--tail", "80", SERVER_DISPATCHER)) + print(f"[multihomed] client logs (tail=80) from {client_container}") + print(self.dc("logs", "--tail", "80", client_container)) if attempt == 0: print(f"scenario retry after first failure: {label}") time.sleep(2) From 49cb926232be4f91e03c67147fc776210df6980e Mon Sep 17 00:00:00 2001 From: "Juan A. Garcia Pardo" Date: Mon, 1 Jun 2026 11:58:56 +0200 Subject: [PATCH 29/29] Fix: local IP depends on IPv4 vs IPv6. --- acceptance/multihomed/test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/acceptance/multihomed/test.py b/acceptance/multihomed/test.py index 2509e72d0f..971367517f 100644 --- a/acceptance/multihomed/test.py +++ b/acceptance/multihomed/test.py @@ -55,6 +55,8 @@ SERVER_IA = "1-ff00:0:110" CLIENT_111_IA = "1-ff00:0:111" CLIENT_112_IA = "1-ff00:0:112" +CLIENT_111_LOCAL_BIND = "0.0.0.0" +CLIENT_112_LOCAL_BIND = "[::]" SERVER_CONTAINER = "tester_1-ff00_0_110" CLIENT_111_CONTAINER = "tester_1-ff00_0_111" @@ -123,6 +125,7 @@ def _run(self): self._run_scenario( client_container=CLIENT_111_CONTAINER, client_ia=CLIENT_111_IA, + client_local_bind=CLIENT_111_LOCAL_BIND, remote_ip=SERVER_PRIMARY_IP, bind_ip="0.0.0.0", label="AS111 -> AS110 via br1-ff00_0_110-1 with unbound server", @@ -130,6 +133,7 @@ def _run(self): self._run_scenario( client_container=CLIENT_112_CONTAINER, client_ia=CLIENT_112_IA, + client_local_bind=CLIENT_112_LOCAL_BIND, remote_ip=SERVER_SECONDARY_IP, bind_ip="0.0.0.0", # The extra compose-managed subnet sits behind br1-ff00_0_110-2, so the @@ -139,6 +143,7 @@ def _run(self): self._run_scenario( client_container=CLIENT_111_CONTAINER, client_ia=CLIENT_111_IA, + client_local_bind=CLIENT_111_LOCAL_BIND, remote_ip=SERVER_PRIMARY_IP, bind_ip=SERVER_PRIMARY_IP, label="AS111 -> AS110 via br1-ff00_0_110-1 with server bound to primary IP", @@ -146,6 +151,7 @@ def _run(self): self._run_scenario( client_container=CLIENT_112_CONTAINER, client_ia=CLIENT_112_IA, + client_local_bind=CLIENT_112_LOCAL_BIND, remote_ip=SERVER_SECONDARY_IP, bind_ip=SERVER_SECONDARY_IP, # The extra compose-managed subnet sits behind br1-ff00_0_110-2, so the @@ -157,6 +163,7 @@ def _run_scenario( self, client_container: str, client_ia: str, + client_local_bind: str, remote_ip: str, bind_ip: str, label: str, @@ -166,6 +173,7 @@ def _run_scenario( print( "[multihomed] scenario config: " f"client_container={client_container} client_ia={client_ia} " + f"client_local_bind={client_local_bind}:0 " f"server_bind={bind_ip}:{SERVER_PORT} remote={remote}" ) for attempt in range(2): @@ -194,7 +202,7 @@ def _run_scenario( "bash", "-c", ( - f'test-client -local "{client_ia},0.0.0.0:0" ' + f'test-client -local "{client_ia},{client_local_bind}:0" ' f'-remote "{remote}" -expect "{remote}"' ), )