diff --git a/acceptance/router_priority/test.py b/acceptance/router_priority/test.py index a7eaa83d75..921000ea57 100755 --- a/acceptance/router_priority/test.py +++ b/acceptance/router_priority/test.py @@ -51,6 +51,10 @@ def measure_br(url: str): "total": 0, "interface": defaultdict(int), }, + "router_priority_forwarded_pkts":{ + "total": 0, + "interface": defaultdict(int), + }, } text = requests.get(url).text for family in text_string_to_metric_families(text): @@ -135,6 +139,19 @@ def _run(self): if busy_fwd == 0: print(f"Insufficient load: no packet drop occurred.") sys.exit(1) + bfd_sent_delta = metrics_after["router_bfd_sent_packets"]["total"] -\ + metrics_before["router_bfd_sent_packets"]["total"] + print(f"BFD sent packets delta: {bfd_sent_delta}") + prio_fwd = metrics_after["router_priority_forwarded_pkts"]["total"] -\ + metrics_before["router_priority_forwarded_pkts"]["total"] + print(f"Priority-forwarded packets delta: {prio_fwd}") + if prio_fwd <= 0: + print("Expected priority-forwarded packets to increase, but it did not.") + sys.exit(1) + if prio_fwd < bfd_sent_delta: + print("Priority-forwarded packets delta is lower than BFD sent packets delta.") + print(f"prio_fwd={prio_fwd}, bfd_sent_delta={bfd_sent_delta}") + sys.exit(1) print(f"router metrics follow.\n" f"Before:\n-----8<-----\n{metrics_before}\n-----8<-----\n" f"After: \n-----8<-----\n{metrics_after}\n-----8<-----") diff --git a/doc/manuals/router/metrics.rst b/doc/manuals/router/metrics.rst index af0491607c..f109aaf78c 100644 --- a/doc/manuals/router/metrics.rst +++ b/doc/manuals/router/metrics.rst @@ -57,6 +57,7 @@ local system (if any) are not counted in this number. **Labels**: ``interface``, ``isd_as`` and ``neighbor_isd_as``. + Dropped packets total --------------------- @@ -69,6 +70,19 @@ This metric reports the number of packets that were dropped because of errors. **Labels**: ``interface``, ``isd_as`` and ``neighbor_isd_as``. +Priority forwarded packets total +-------------------------------- + +**Name**: ``router_priority_forwarded_pkts_total`` + +**Type**: Counter + +**Description**: Total number of priority packets successfully forwarded by the +router. + +**Labels**: ``interface``, ``isd_as``, ``neighbor_isd_as`` and ``sizeclass``. + + BFD state changes (inter-AS) ---------------------------- @@ -117,6 +131,66 @@ router in the local AS. **Labels**: ``sibling`` and ``isd_as``. +Hummingbird packets processed total +----------------------------------- + +**Name**: ``router_humm_processed_pkts_total`` + +**Type**: Counter + +**Description**: Total number of Hummingbird packets received by the router +processor. + +**Labels**: ``interface``, ``isd_as``, ``neighbor_isd_as`` and ``sizeclass``. + +Hummingbird flyover packets total +--------------------------------- + +**Name**: ``router_humm_flyover_pkts_total`` + +**Type**: Counter + +**Description**: Total number of parsed Hummingbird packets with flyover hop +fields. + +**Labels**: ``interface``, ``isd_as``, ``neighbor_isd_as`` and ``sizeclass``. + +Hummingbird freshness demotions total +------------------------------------- + +**Name**: ``router_humm_demoted_freshness_total`` + +**Type**: Counter + +**Description**: Total number of Hummingbird packets demoted to best-effort due +to freshness checks. + +**Labels**: ``interface``, ``isd_as``, ``neighbor_isd_as`` and ``sizeclass``. + +Hummingbird expiration demotions total +-------------------------------------- + +**Name**: ``router_humm_demoted_expired_total`` + +**Type**: Counter + +**Description**: Total number of Hummingbird packets demoted to best-effort due +to expired reservations. + +**Labels**: ``interface``, ``isd_as``, ``neighbor_isd_as`` and ``sizeclass``. + +Hummingbird token bucket demotions total +---------------------------------------- + +**Name**: ``router_humm_demoted_tokenbucket_total`` + +**Type**: Counter + +**Description**: Total number of Hummingbird packets demoted to best-effort due +to token bucket checks. + +**Labels**: ``interface``, ``isd_as``, ``neighbor_isd_as`` and ``sizeclass``. + Service instance count ---------------------- diff --git a/pkg/snet/squic/hummingbird_live_test.go b/pkg/snet/squic/hummingbird_live_test.go index 409b9eeb5d..992ea9e57d 100644 --- a/pkg/snet/squic/hummingbird_live_test.go +++ b/pkg/snet/squic/hummingbird_live_test.go @@ -15,10 +15,15 @@ package squic_test import ( + "bufio" "context" + "fmt" + "net/http" "os" "path/filepath" "runtime" + "strconv" + "strings" "testing" "time" @@ -35,6 +40,13 @@ const ( tinyServerRemoteAddr = "1-ff00:0:111,127.0.0.20:12345" ) +var tinyBRMetricsEndpoints = []string{ + "http://127.0.0.9:30442/metrics", + "http://127.0.0.10:30442/metrics", + "http://127.0.0.17:30442/metrics", + "http://[fd00:f00d:cafe::7f00:9]:30442/metrics", +} + // TestQUICOverHummingbirdTinyTopology verifies that a QUIC handshake plus a // stream exchange succeeds when the client sends over a Hummingbird reservation // path in the running tiny topology. @@ -83,6 +95,73 @@ func TestQUICOverHummingbirdTinyTopology(t *testing.T) { require.NoError(t, <-serverErr) } +// TestQUICOverHummingbirdTinyTopologyTokenBucketDemotion verifies that QUIC +// still succeeds when the reservation bandwidth is intentionally tiny, causing +// routers to demote packets to best-effort via token bucket checks. +func TestQUICOverHummingbirdTinyTopologyTokenBucketDemotion(t *testing.T) { + if testing.Short() { + t.Skip("skipping live tiny-topology test in short mode") + } + if os.Getenv("SCION_RUN_LIVE_TESTS") == "" { + t.Skip("set SCION_RUN_LIVE_TESTS=1 to run live tiny-topology tests") + } + + before, err := scrapeHummCounterTotals(tinyBRMetricsEndpoints) + require.NoError(t, err) + + keysRoot := requireTinyTopologyAssets(t) + serverLocal, err := hummingbirdtest.MustParseUDPAddr(tinyServerListenAddr) + require.NoError(t, err) + clientLocal, err := hummingbirdtest.MustParseUDPAddr(tinyClientListenAddr) + require.NoError(t, err) + serverRemote, err := hummingbirdtest.MustParseUDPAddr(tinyServerRemoteAddr) + require.NoError(t, err) + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + serverErr := make(chan error, 1) + go func() { + serverErr <- hummingbirdtest.RunServer( + ctx, + tinyServerDaemonAddr, + serverLocal, + clientLocal.IA, + t.Logf, + ) + }() + + params := hummingbirdtest.DefaultReservationParams() + // Intentionally tiny so payload quickly exceeds token bucket and gets demoted. + params.Bandwidth = 1 + err = hummingbirdtest.RunClientWithParams( + ctx, + tinyClientDaemonAddr, + clientLocal, + serverRemote, + keysRoot, + params, + t.Logf, + ) + require.NoErrorf(t, err, + "QUIC dial timed out or failed; demoted packets should still complete over best-effort") + + require.NoError(t, <-serverErr) + + after, err := scrapeHummCounterTotals(tinyBRMetricsEndpoints) + require.NoError(t, err) + + hummCounter := deltaCounter(after, before, "router_humm_processed_pkts_total") + flyoverCounter := deltaCounter(after, before, "router_humm_flyover_pkts_total") + demotedCounter := deltaCounter(after, before, "router_humm_demoted_tokenbucket_total") + t.Logf("total hummingbird packets: %f", hummCounter) + t.Logf("total flyover packets: %f", flyoverCounter) + t.Logf("total demotions: %f", demotedCounter) + require.Greater(t, hummCounter, float64(0)) + require.Greater(t, flyoverCounter, float64(0)) + require.Greater(t, demotedCounter, float64(0)) +} + func requireTinyTopologyAssets(t *testing.T) string { t.Helper() @@ -100,3 +179,60 @@ func requireRepoRoot(t *testing.T) string { require.True(t, ok, "resolve current file path") return filepath.Clean(filepath.Join(filepath.Dir(file), "..", "..", "..")) } + +func scrapeHummCounterTotals(endpoints []string) (map[string]float64, error) { + totals := map[string]float64{ + "router_humm_processed_pkts_total": 0, + "router_humm_flyover_pkts_total": 0, + "router_humm_demoted_freshness_total": 0, + "router_humm_demoted_expired_total": 0, + "router_humm_demoted_tokenbucket_total": 0, + } + client := &http.Client{Timeout: 3 * time.Second} + + for _, endpoint := range endpoints { + resp, err := client.Get(endpoint) + if err != nil { + return nil, fmt.Errorf("fetching %s: %w", endpoint, err) + } + if resp.StatusCode != http.StatusOK { + _ = resp.Body.Close() + return nil, fmt.Errorf("fetching %s: status %s", endpoint, resp.Status) + } + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + if !strings.HasPrefix(line, "router_humm_") || strings.HasPrefix(line, "#") { + continue + } + space := strings.LastIndexByte(line, ' ') + if space <= 0 || space == len(line)-1 { + continue + } + namePart := line[:space] + metricName := namePart + if brace := strings.IndexByte(namePart, '{'); brace >= 0 { + metricName = namePart[:brace] + } + if _, ok := totals[metricName]; !ok { + continue + } + v, err := strconv.ParseFloat(strings.TrimSpace(line[space+1:]), 64) + if err != nil { + continue + } + totals[metricName] += v + } + if err := scanner.Err(); err != nil { + _ = resp.Body.Close() + return nil, fmt.Errorf("reading %s: %w", endpoint, err) + } + _ = resp.Body.Close() + } + return totals, nil +} + +func deltaCounter(after, before map[string]float64, metric string) float64 { + return after[metric] - before[metric] +} diff --git a/pkg/snet/squic/hummingbirdtest/helpers.go b/pkg/snet/squic/hummingbirdtest/helpers.go index 2887325964..eb4e286ccc 100644 --- a/pkg/snet/squic/hummingbirdtest/helpers.go +++ b/pkg/snet/squic/hummingbirdtest/helpers.go @@ -50,7 +50,7 @@ const ( // HbirdTestResID is the synthetic reservation ID used by the tests. HbirdTestResID = uint32(1) // HbirdTestBandwidth is the synthetic bandwidth class used by the tests. - HbirdTestBandwidth = uint16(2) + HbirdTestBandwidth = uint16(1024 - 1) // HbirdTestDuration is the reservation lifetime in seconds. HbirdTestDuration = uint16(9) // HbirdTestStartOffset backdates the reservation slightly so it is already @@ -63,6 +63,26 @@ const ( QUICTestMessageReply = "pong over scion" ) +// ReservationParams configures synthetic Hummingbird reservation values used by +// live tests. +type ReservationParams struct { + ResID uint32 + Bandwidth uint16 + Duration uint16 + StartOffset time.Duration +} + +// DefaultReservationParams returns the baseline reservation used by existing +// Hummingbird live tests. +func DefaultReservationParams() ReservationParams { + return ReservationParams{ + ResID: HbirdTestResID, + Bandwidth: HbirdTestBandwidth, + Duration: HbirdTestDuration, + StartOffset: HbirdTestStartOffset, + } +} + // QUICTestMessageClient is the fixed client payload used by the round-trip // tests. It is kept at or above 20 KiB so the exchange exercises multiple // packets instead of succeeding on only a few packets. @@ -218,12 +238,35 @@ func BuildHummingbirdRemote( serverRemote *snet.UDPAddr, keysRoot string, log Logger, +) (*snet.UDPAddr, error) { + return BuildHummingbirdRemoteWithParams( + ctx, + conn, + clientLocal, + serverRemote, + keysRoot, + DefaultReservationParams(), + log, + ) +} + +// BuildHummingbirdRemoteWithParams turns a plain remote address into one that +// carries a Hummingbird reservation path and the matching next hop with +// caller-provided reservation parameters. +func BuildHummingbirdRemoteWithParams( + ctx context.Context, + conn daemon.Connector, + clientLocal *snet.UDPAddr, + serverRemote *snet.UDPAddr, + keysRoot string, + params ReservationParams, + log Logger, ) (*snet.UDPAddr, error) { basePath, err := BasePath(ctx, conn, clientLocal.IA, serverRemote.IA, log) if err != nil { return nil, err } - reservation, err := NewHummingbirdReservation(basePath, keysRoot, time.Now(), log) + reservation, err := NewHummingbirdReservationWithParams(basePath, keysRoot, time.Now(), params, log) if err != nil { return nil, err } @@ -252,13 +295,26 @@ func NewHummingbirdReservation( keysRoot string, now time.Time, log Logger, +) (snet.DataplanePath, error) { + return NewHummingbirdReservationWithParams(basePath, keysRoot, now, DefaultReservationParams(), log) +} + +// NewHummingbirdReservationWithParams derives one flyover reservation per hop +// using caller-provided reservation parameters and wraps them into a reservation +// dataplane path. +func NewHummingbirdReservationWithParams( + basePath snet.Path, + keysRoot string, + now time.Time, + params ReservationParams, + log Logger, ) (snet.DataplanePath, error) { baseHops := snetpath.InterfacesToBaseHops(basePath.Metadata().Interfaces) if len(baseHops) == 0 { return nil, serrors.New("base path does not contain any hops") } - startTime := uint32(now.Add(HbirdTestStartOffset).Unix()) + startTime := uint32(now.Add(params.StartOffset).Unix()) aesByIA := make(map[addr.IA]cipher.Block) buffer := make([]byte, hummlib.AkBufferSize) flyovers := make([]*snetpath.Hop, 0, len(baseHops)) @@ -279,29 +335,29 @@ func NewHummingbirdReservation( // reservation so the routers can validate every flyover hop. akRaw := hummlib.DeriveAuthKey( block, - HbirdTestResID, - HbirdTestBandwidth, + params.ResID, + params.Bandwidth, baseHop.Ingress, baseHop.Egress, startTime, - HbirdTestDuration, + params.Duration, buffer, ) var ak [hummlib.AkBufferSize]byte copy(ak[:], akRaw) if log != nil { log("reservation inputs ia=%s in=%d eg=%d res_id=%d bw=%d start=%d dur=%d ak=%s", - baseHop.IA, baseHop.Ingress, baseHop.Egress, HbirdTestResID, - HbirdTestBandwidth, startTime, HbirdTestDuration, hex.EncodeToString(ak[:])) + baseHop.IA, baseHop.Ingress, baseHop.Egress, params.ResID, + params.Bandwidth, startTime, params.Duration, hex.EncodeToString(ak[:])) } flyovers = append(flyovers, &snetpath.Hop{ BaseHop: baseHop, Flyover: &snetpath.FlyoverData{ - ResID: HbirdTestResID, + ResID: params.ResID, Ak: ak, - Bw: HbirdTestBandwidth, + Bw: params.Bandwidth, StartTime: startTime, - Duration: HbirdTestDuration, + Duration: params.Duration, }, }) } @@ -479,6 +535,28 @@ func RunClient( remoteAddr *snet.UDPAddr, keysRoot string, log Logger, +) error { + return RunClientWithParams( + ctx, + daemonAddr, + localAddr, + remoteAddr, + keysRoot, + DefaultReservationParams(), + log, + ) +} + +// RunClientWithParams runs the client side with caller-provided reservation +// parameters for Hummingbird path construction. +func RunClientWithParams( + ctx context.Context, + daemonAddr string, + localAddr *snet.UDPAddr, + remoteAddr *snet.UDPAddr, + keysRoot string, + params ReservationParams, + log Logger, ) error { clientDaemon, err := ConnectDaemon(ctx, daemonAddr) if err != nil { @@ -496,7 +574,8 @@ func RunClient( } defer clientConn.Close() - remote, err := BuildHummingbirdRemote(ctx, clientDaemon, localAddr, remoteAddr, keysRoot, log) + remote, err := BuildHummingbirdRemoteWithParams( + ctx, clientDaemon, localAddr, remoteAddr, keysRoot, params, log) if err != nil { return err } diff --git a/router/dataplane_hbird.go b/router/dataplane_hbird.go index f78629f761..60ecf96614 100644 --- a/router/dataplane_hbird.go +++ b/router/dataplane_hbird.go @@ -34,6 +34,12 @@ import ( "github.com/scionproto/scion/router/tokenbucket" ) +// MaxFreshnessTolerance is the allowed drift from current time for a "fresh" packet. +// If the packet exceeds this limit (under or over), it will be best-effort. +// This parameter heavily correlates with the allowed clock drift, and maximum path latency. +// TODO: make a configurable value instead of using a flat 5 seconds +const MaxFreshnessTolerance = 5 * time.Second + // SetHbirdKey sets the key for the PRF function used to compute the Hummingbird Auth Key. func (d *dataPlane) SetHbirdKey(key []byte) error { d.mtx.Lock() @@ -58,7 +64,7 @@ func (d *dataPlane) SetHbirdKey(key []byte) error { return nil } -func (p *scionPacketProcessor) parseHbirdPath() disposition { +func (p *scionPacketProcessor) parseHbirdPath(sc sizeClass) disposition { var err error if !p.hbirdPath.CurrHFIsHopStart() || !p.hbirdPath.CurrINFMatchesCurrHF() { return errorDiscard("error", errMalformedPath) @@ -75,6 +81,7 @@ func (p *scionPacketProcessor) parseHbirdPath() disposition { } if p.flyoverField.Flyover { p.pkt.PriorityLabel = pr.WithPriority + p.pkt.Link.Metrics()[sc].HummFlyoverPackets.Inc() } return pForward @@ -134,7 +141,7 @@ func (p *scionPacketProcessor) validateHopExpiryHbird() disposition { return pSlowPath } -func (p *scionPacketProcessor) validateReservationExpiry() disposition { +func (p *scionPacketProcessor) validateReservationExpiry(sc sizeClass) disposition { startTime := util.SecsToTime(p.hbirdPath.PathMeta.BaseTS - uint32(p.flyoverField.ResStartTime)) endTime := startTime.Add(time.Duration(p.flyoverField.Duration) * time.Second) now := time.Now() @@ -145,6 +152,7 @@ func (p *scionPacketProcessor) validateReservationExpiry() disposition { "reservation start", startTime, "reservation end", endTime, "now", now) p.pkt.PriorityLabel = pr.WithBestEffort + p.pkt.Link.Metrics()[sc].HummDemotedExpiredPkts.Inc() return pForward } @@ -356,19 +364,21 @@ func (p *scionPacketProcessor) validateHbirdTransitUnderlaySrc() disposition { return pForward } -// Verifies the PathMetaHeader timestamp is recent -// Current implementation works with a nanosecond granularity HighResTS -func (p *scionPacketProcessor) validatePathMetaTimestamp() { +// Verifies the PathMetaHeader timestamp is recent. +// Current implementation works with a millisecond granularity HighResTS. +// If the freshness of the packet is not within 5 seconds, it will be marked as best-effort. +func (p *scionPacketProcessor) validatePathMetaTimestamp(sc sizeClass) { timestamp := util.SecsToTime(p.hbirdPath.PathMeta.BaseTS).Add( time.Duration(p.hbirdPath.PathMeta.HighResTS>>22) * time.Millisecond) - // TODO: make a configurable value instead of using a flat 1 seconds - if time.Until(timestamp).Abs() > time.Duration(1)*time.Second { - // Forward with best-effort is timestamp is too old. + + if time.Until(timestamp).Abs() > MaxFreshnessTolerance { + // Forward with best-effort if timestamp is too old. p.pkt.PriorityLabel = pr.WithBestEffort + p.pkt.Link.Metrics()[sc].HummDemotedFreshnessPkts.Inc() } } -func (p *scionPacketProcessor) checkReservationBandwidth() disposition { +func (p *scionPacketProcessor) checkReservationBandwidth(sc sizeClass) disposition { // Only check bandwidth if packet is given priority. // Bandwidth check is NOT performed for late packets that have flyover but no priority. if p.pkt.PriorityLabel != pr.WithPriority { @@ -419,6 +429,7 @@ func (p *scionPacketProcessor) checkReservationBandwidth() disposition { log.Debug("hummingbird packet exceeding allowed bandwidth token bucket", "resID", fmt.Sprintf("%x", p.flyoverField.ResID)) p.pkt.PriorityLabel = pr.WithBestEffort + p.pkt.Link.Metrics()[sc].HummDemotedTokenBucketPkts.Inc() } else { log.Debug("hummingbird checking BW: packet fits into bucket") } @@ -633,21 +644,26 @@ func (p *scionPacketProcessor) processHbirdEgress() disposition { return pForward } -// func (p *scionPacketProcessor) processHummingbird() (processResult, error) { func (p *scionPacketProcessor) processHummingbird() disposition { + // Increment the counter of received Hummingbird packets. + sc := ClassOfSize(len(p.pkt.RawPacket)) + p.pkt.Link.Metrics()[sc].HummProcessedPackets.Inc() + var ok bool p.hbirdPath, ok = p.scionLayer.Path.(*hummingbird.Raw) if !ok { // TODO(lukedirtwalker) parameter problem invalid path? return errorDiscard("error", errMalformedPath) } - if disp := p.parseHbirdPath(); disp != pForward { + + if disp := p.parseHbirdPath(sc); disp != pForward { return disp } + if disp := p.determinePeerHbird(); disp != pForward { return disp } - // deleteme uncomment + if disp := p.validateHopExpiryHbird(); disp != pForward { return disp } @@ -667,21 +683,21 @@ func (p *scionPacketProcessor) processHummingbird() disposition { return disp } if p.flyoverField.Flyover { - return p.processHBIRDFlyover() + return p.processHBIRDFlyover(sc) } return p.processHBIRDBestEffort() } -func (p *scionPacketProcessor) processHBIRDFlyover() disposition { - // deleteme uncomment - if disp := p.validateReservationExpiry(); disp != pForward { +func (p *scionPacketProcessor) processHBIRDFlyover(sc sizeClass) disposition { + + if disp := p.validateReservationExpiry(sc); disp != pForward { return disp } if disp := p.verifyHbirdFlyoverMac(); disp != pForward { return disp } - p.validatePathMetaTimestamp() - if disp := p.checkReservationBandwidth(); disp != pForward { + p.validatePathMetaTimestamp(sc) + if disp := p.checkReservationBandwidth(sc); disp != pForward { return disp } if disp := p.handleHbirdIngressRouterAlert(); disp != pForward { diff --git a/router/export_test.go b/router/export_test.go index 93f36d33fc..3ce1b02d1c 100644 --- a/router/export_test.go +++ b/router/export_test.go @@ -59,12 +59,13 @@ type SlowPathRequestView struct { // Implements the link interface minimally type MockLink struct { - ifID uint16 + ifID uint16 + metrics *InterfaceMetrics } func (l *MockLink) IsUp() bool { return true } func (l *MockLink) IfID() uint16 { return l.ifID } -func (l *MockLink) Metrics() *InterfaceMetrics { return nil } +func (l *MockLink) Metrics() *InterfaceMetrics { return l.metrics } func (l *MockLink) Scope() LinkScope { return Internal } func (l *MockLink) BFDSession() *bfd.Session { return nil } func (l *MockLink) Resolve(p *Packet, host addr.Host, port uint16) error { return nil } @@ -73,7 +74,14 @@ func (l *MockLink) SendBlocking(p *Packet) {} var _ Link = new(MockLink) -func newMockLink(ingress uint16) Link { return &MockLink{ifID: ingress} } +func newMockLink(ingress uint16) Link { + local := addr.MustParseIA("1-ff00:0:1") + neighbor := addr.MustParseIA("1-ff00:0:2") + return &MockLink{ + ifID: ingress, + metrics: newInterfaceMetrics(metrics, ingress, local, "", neighbor), + } +} // NewPacket makes a mock packet. It has shortcomings which makes it unsuited for some tests: it // refers to a mock link that has the scope Internal in all cases, and a blank remote address. diff --git a/router/metrics.go b/router/metrics.go index 426954ba8c..9c7a2a7627 100644 --- a/router/metrics.go +++ b/router/metrics.go @@ -29,22 +29,28 @@ import ( // Metrics defines the data-plane metrics for the BR. type Metrics struct { - InputBytesTotal *prometheus.CounterVec - OutputBytesTotal *prometheus.CounterVec - InputPacketsTotal *prometheus.CounterVec - OutputPacketsTotal *prometheus.CounterVec - ProcessedPackets *prometheus.CounterVec - DroppedPacketsTotal *prometheus.CounterVec - InterfaceUp *prometheus.GaugeVec - BFDInterfaceStateChanges *prometheus.CounterVec - BFDPacketsSent *prometheus.CounterVec - BFDPacketsReceived *prometheus.CounterVec - ServiceInstanceCount *prometheus.GaugeVec - ServiceInstanceChanges *prometheus.CounterVec - SiblingReachable *prometheus.GaugeVec - SiblingBFDPacketsSent *prometheus.CounterVec - SiblingBFDPacketsReceived *prometheus.CounterVec - SiblingBFDStateChanges *prometheus.CounterVec + InputBytesTotal *prometheus.CounterVec + OutputBytesTotal *prometheus.CounterVec + InputPacketsTotal *prometheus.CounterVec + OutputPacketsTotal *prometheus.CounterVec + ProcessedPackets *prometheus.CounterVec + PriorityForwardedPackets *prometheus.CounterVec + DroppedPacketsTotal *prometheus.CounterVec + HummProcessedPackets *prometheus.CounterVec + HummFlyoverPackets *prometheus.CounterVec + HummDemotedFreshnessPkts *prometheus.CounterVec + HummDemotedExpiredPkts *prometheus.CounterVec + HummDemotedTokenBucketPkts *prometheus.CounterVec + InterfaceUp *prometheus.GaugeVec + BFDInterfaceStateChanges *prometheus.CounterVec + BFDPacketsSent *prometheus.CounterVec + BFDPacketsReceived *prometheus.CounterVec + ServiceInstanceCount *prometheus.GaugeVec + ServiceInstanceChanges *prometheus.CounterVec + SiblingReachable *prometheus.GaugeVec + SiblingBFDPacketsSent *prometheus.CounterVec + SiblingBFDPacketsReceived *prometheus.CounterVec + SiblingBFDStateChanges *prometheus.CounterVec } // NewMetrics initializes the metrics for the Border Router, and registers them with the default @@ -58,6 +64,13 @@ func NewMetrics() *Metrics { }, []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass"}, ), + PriorityForwardedPackets: promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "router_priority_forwarded_pkts_total", + Help: "Total number of priority packets successfully forwarded by the router", + }, + []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass"}, + ), InputBytesTotal: promauto.NewCounterVec( prometheus.CounterOpts{ Name: "router_input_bytes_total", @@ -93,6 +106,41 @@ func NewMetrics() *Metrics { }, []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass", "reason"}, ), + HummProcessedPackets: promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "router_humm_processed_pkts_total", + Help: "Total number of Hummingbird packets received by the router processor", + }, + []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass"}, + ), + HummFlyoverPackets: promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "router_humm_flyover_pkts_total", + Help: "Total number of parsed Hummingbird packets with a flyover", + }, + []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass"}, + ), + HummDemotedFreshnessPkts: promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "router_humm_demoted_freshness_total", + Help: "Total number of Hummingbird packets demoted to best-effort due to freshness checks", + }, + []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass"}, + ), + HummDemotedExpiredPkts: promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "router_humm_demoted_expired_total", + Help: "Total number of Hummingbird packets demoted to best-effort due to expired reservations", + }, + []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass"}, + ), + HummDemotedTokenBucketPkts: promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "router_humm_demoted_tokenbucket_total", + Help: "Total number of Hummingbird packets demoted to best-effort due to token bucket checks", + }, + []string{"interface", "isd_as", "neighbor_isd_as", "sizeclass"}, + ), InterfaceUp: promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "router_interface_up", @@ -269,6 +317,12 @@ type trafficMetrics struct { DroppedPacketsBusyForwarder prometheus.Counter DroppedPacketsBusySlowPath prometheus.Counter ProcessedPackets prometheus.Counter + PriorityForwardedPackets prometheus.Counter + HummProcessedPackets prometheus.Counter + HummFlyoverPackets prometheus.Counter + HummDemotedFreshnessPkts prometheus.Counter + HummDemotedExpiredPkts prometheus.Counter + HummDemotedTokenBucketPkts prometheus.Counter Output [ttMax]outputMetrics } @@ -305,6 +359,15 @@ func newTrafficMetrics( InputBytesTotal: metrics.InputBytesTotal.MustCurryWith(ifLabels).With(scLabels), InputPacketsTotal: metrics.InputPacketsTotal.MustCurryWith(ifLabels).With(scLabels), ProcessedPackets: metrics.ProcessedPackets.MustCurryWith(ifLabels).With(scLabels), + PriorityForwardedPackets: metrics.PriorityForwardedPackets.MustCurryWith(ifLabels). + With(scLabels), + HummProcessedPackets: metrics.HummProcessedPackets.MustCurryWith(ifLabels).With(scLabels), + HummFlyoverPackets: metrics.HummFlyoverPackets.MustCurryWith(ifLabels).With(scLabels), + HummDemotedFreshnessPkts: metrics.HummDemotedFreshnessPkts.MustCurryWith(ifLabels). + With(scLabels), + HummDemotedExpiredPkts: metrics.HummDemotedExpiredPkts.MustCurryWith(ifLabels).With(scLabels), + HummDemotedTokenBucketPkts: metrics.HummDemotedTokenBucketPkts.MustCurryWith(ifLabels). + With(scLabels), } // Output metrics have the extra "trafficType" label. @@ -339,6 +402,12 @@ func newTrafficMetrics( c.DroppedPacketsBusyForwarder.Add(0) c.DroppedPacketsBusySlowPath.Add(0) c.ProcessedPackets.Add(0) + c.PriorityForwardedPackets.Add(0) + c.HummProcessedPackets.Add(0) + c.HummFlyoverPackets.Add(0) + c.HummDemotedFreshnessPkts.Add(0) + c.HummDemotedExpiredPkts.Add(0) + c.HummDemotedTokenBucketPkts.Add(0) return c } diff --git a/router/underlayproviders/udpip/udpip.go b/router/underlayproviders/udpip/udpip.go index e5cef40ec3..24e68e630e 100644 --- a/router/underlayproviders/udpip/udpip.go +++ b/router/underlayproviders/udpip/udpip.go @@ -443,6 +443,10 @@ func (u *udpConnection) send(batchSize int, pool router.PacketPool) { router.UpdateOutputMetrics(u.metrics, iterator) // Return storage for all the written packets. for p := range iterator { + if p.PriorityLabel == pr.WithPriority { + sc := router.ClassOfSize(len(p.RawPacket)) + u.metrics[sc].PriorityForwardedPackets.Inc() + } pool.Put(p) } // The next packet to write is now the first one not written. diff --git a/router/underlayproviders/udpip/udpip_test.go b/router/underlayproviders/udpip/udpip_test.go index 72e8c85f05..e3fcfa5273 100644 --- a/router/underlayproviders/udpip/udpip_test.go +++ b/router/underlayproviders/udpip/udpip_test.go @@ -18,6 +18,7 @@ package udpip import ( "crypto/rand" "encoding/binary" + "fmt" "hash/fnv" "net/netip" "testing" @@ -33,6 +34,7 @@ import ( "github.com/scionproto/scion/pkg/slayers" "github.com/scionproto/scion/pkg/slayers/path" "github.com/scionproto/scion/pkg/slayers/path/scion" + "github.com/scionproto/scion/router" ) var ( @@ -318,3 +320,22 @@ func TestComputeProcIdErrorCases(t *testing.T) { }) } } + +// BenchmarkClassOfSize measures the overhead of calling ClassOfSize per packet. +// Since our udpConnection.send function calls it for every packet, this overhead should be small. +func BenchmarkClassOfSize(b *testing.B) { + sizes := []int{1, 50, 100, 200, 2000, 9000} + + var sink int + for _, sz := range sizes { + sz := sz + b.Run(fmt.Sprintf("size=%d", sz), func(b *testing.B) { + var sc int + for i := 0; i < b.N; i++ { + sc = int(router.ClassOfSize(sz)) + } + sink = sc + }) + } + _ = sink +}