diff --git a/docs/graph.svg b/docs/graph.svg index 59d85c29c..81a0301f8 100644 --- a/docs/graph.svg +++ b/docs/graph.svg @@ -4,885 +4,939 @@ - - - + + + bond_output - -bond_output + +bond_output port_output - -port_output + +port_output bond_output->port_output - - + + iface_input - -iface_input + +iface_input xconnect - -xconnect + +xconnect iface_input->xconnect - - + + eth_input - -eth_input + +eth_input iface_input->eth_input - - + + bridge_input - -bridge_input + +bridge_input iface_input->bridge_input - - + + iface_output - -iface_output + +iface_output iface_output->bond_output - - + + iface_output->port_output - - + + iface_output->bridge_input - - + + + + + +vxlan_output + +vxlan_output + + + +iface_output->vxlan_output + + port_tx - -port_tx + +port_tx - + port_output->port_tx - - + + port_rx - -port_rx + +port_rx - + port_rx->iface_input - - + + - + xconnect->port_output - - + + lacp_input - -lacp_input + +lacp_input eth_input->lacp_input - - + + snap_input - -snap_input + +snap_input eth_input->snap_input - - + + arp_input - -arp_input + +arp_input eth_input->arp_input - - + + - + ip_input - -ip_input + +ip_input eth_input->ip_input - - + + - + ip6_input - -ip6_input + +ip6_input eth_input->ip6_input - - + + eth_output - -eth_output + +eth_output eth_output->iface_output - - + + l2_redirect - -l2_redirect + +l2_redirect lacp_output - -lacp_output + +lacp_output - + lacp_output->eth_output - - + + - + snap_input->l2_redirect - - + + arp_input_reply - -arp_input_reply + +arp_input_reply - + arp_input->arp_input_reply - - + + arp_input_request - -arp_input_request + +arp_input_request - + arp_input->arp_input_request - - + + arp_output_reply - -arp_output_reply + +arp_output_reply - + arp_output_reply->eth_output - - + + arp_output_request - -arp_output_request + +arp_output_request - + arp_output_request->eth_output - - + + bridge_flood - -bridge_flood + +bridge_flood - + bridge_flood->iface_input - - + + - + bridge_flood->iface_output - - + + - + + +vxlan_flood + +vxlan_flood + + +bridge_flood->vxlan_flood + + + + + bridge_input->iface_input - - + + - + bridge_input->iface_output - - + + - + bridge_input->bridge_flood - - + + + + + +vxlan_flood->iface_output + + - + ospf_redirect - -ospf_redirect + +ospf_redirect - + ospf_redirect->l2_redirect - - + + - + loopback_input - -loopback_input + +loopback_input - + loopback_input->ip_input - - + + - + loopback_input->ip6_input - - + + - + loopback_output - -loopback_output + +loopback_output - + xvrf - -xvrf + +xvrf - + xvrf->ip_input - - + + - + xvrf->ip6_input - - + + - + ip_forward - -ip_forward + +ip_forward - + ip_output - -ip_output + +ip_output - + ip_forward->ip_output - - + + - + ip_fragment - -ip_fragment + +ip_fragment - + ip_fragment->ip_output - - + + - + ip_hold - -ip_hold + +ip_hold - + ip_input->ip_forward - - + + - + ip_input_local - -ip_input_local + +ip_input_local - + ip_input->ip_input_local - - + + - + ip_input->ip_output - - + + - + dnat44_dynamic - -dnat44_dynamic + +dnat44_dynamic - + ip_input->dnat44_dynamic - - + + - + dnat44_static - -dnat44_static + +dnat44_static - + ip_input->dnat44_static - - + + - + ip_loadbalance - -ip_loadbalance + +ip_loadbalance - + ip_loadbalance->ip_output - - + + - + ip_input_local->ospf_redirect - - + + - + ipip_input - -ipip_input + +ipip_input - + ip_input_local->ipip_input - - + + - + icmp_input - -icmp_input + +icmp_input - + ip_input_local->icmp_input - - + + - + l4_input_local - -l4_input_local + +l4_input_local - + ip_input_local->l4_input_local - - + + - + ip_output->eth_output - - + + - + ip_output->xvrf - - + + - + ip_output->ip_fragment - - + + - + ip_output->ip_hold - - + + - + ip_output->ip_loadbalance - - + + - + ipip_output - -ipip_output + +ipip_output - + ip_output->ipip_output - - + + - + sr6_output - -sr6_output + +sr6_output - + ip_output->sr6_output - - + + - + ip6_forward - -ip6_forward + +ip6_forward - + ip6_output - -ip6_output + +ip6_output - + ip6_forward->ip6_output - - + + - + ip6_hold - -ip6_hold + +ip6_hold - + ip6_input->ip6_forward - - + + - + ip6_input_local - -ip6_input_local + +ip6_input_local - + ip6_input->ip6_input_local - - + + - + ip6_input->ip6_output - - + + - + sr6_local - -sr6_local + +sr6_local - + ip6_input->sr6_local - - + + - + ip6_loadbalance - -ip6_loadbalance + +ip6_loadbalance - + ip6_loadbalance->ip6_output - - + + - + ip6_input_local->ospf_redirect - - + + - + icmp6_input - -icmp6_input + +icmp6_input - + ip6_input_local->icmp6_input - - + + - + ip6_input_local->l4_input_local - - + + - + ip6_output->eth_output - - + + - + ip6_output->xvrf - - + + - + ip6_output->ip6_hold - - + + - + ip6_output->ip6_loadbalance - - + + - + ip6_output->sr6_output - - + + - + ipip_input->ip_input - - + + - + ipip_output->ip_output - - + + - + + +vxlan_input + +vxlan_input + + + +vxlan_input->iface_input + + + + +vxlan_output->ip_output + + + + + dnat44_dynamic->ip_forward - - + + - + dnat44_dynamic->ip_input_local - - + + - + dnat44_static->ip_forward - - + + - + dnat44_static->ip_input_local - - + + - + sr6_local->ip_input - - + + - + sr6_local->ip6_input - - + + - + sr6_local->ip6_input_local - - + + - + sr6_output->ip6_output - - + + - + icmp_output - -icmp_output + +icmp_output - + icmp_input->icmp_output - - + + - + icmp_local_send - -icmp_local_send + +icmp_local_send - + icmp_local_send->icmp_output - - + + - + icmp_output->ip_output - - + + - + icmp6_output - -icmp6_output + +icmp6_output - + icmp6_input->icmp6_output - - + + - + ndp_na_input - -ndp_na_input + +ndp_na_input - + icmp6_input->ndp_na_input - - + + - + ndp_ns_input - -ndp_ns_input + +ndp_ns_input - + icmp6_input->ndp_ns_input - - + + - + ndp_rs_input - -ndp_rs_input + +ndp_rs_input - + icmp6_input->ndp_rs_input - - + + - + icmp6_local_send - -icmp6_local_send + +icmp6_local_send - + icmp6_local_send->icmp6_output - - + + - + icmp6_output->ip6_output - - + + - + ndp_na_output - -ndp_na_output + +ndp_na_output - + ndp_na_output->icmp6_output - - + + - + ndp_ns_output - -ndp_ns_output + +ndp_ns_output - + ndp_ns_output->icmp6_output - - + + + + + +l4_input_local->vxlan_input + + - + l4_loopback_output - -l4_loopback_output + +l4_loopback_output - + l4_input_local->l4_loopback_output - - + + - + dhcp_input - -dhcp_input + +dhcp_input - + l4_input_local->dhcp_input - - + + - + l4_loopback_output->loopback_output - - + + diff --git a/docs/meson.build b/docs/meson.build index 478f5ef41..1ec5975ed 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -74,8 +74,8 @@ custom_target( # Individual command man pages # The list is hardcoded since we can't run grcli during meson configuration. grcli_commands = [ - 'address', 'affinity', 'conntrack', 'dnat44', 'events', 'fdb', 'graph', - 'interface', 'logging', 'nexthop', 'ping', 'ping6', 'route', + 'address', 'affinity', 'conntrack', 'dnat44', 'events', 'fdb', 'flood', + 'graph', 'interface', 'logging', 'nexthop', 'ping', 'ping6', 'route', 'router-advert', 'snat44', 'stats', 'trace', 'traceroute', 'traceroute6', 'tunsrc', ] diff --git a/frr/if_grout.c b/frr/if_grout.c index fb3a6ca3e..12c0b3bff 100644 --- a/frr/if_grout.c +++ b/frr/if_grout.c @@ -7,15 +7,15 @@ #include #include +#include #include #include #include #include +#include #include -#define GROUT_NS NS_DEFAULT - static uint64_t gr_if_flags_to_netlink(struct gr_iface *gr_if, enum zebra_link_type link_type) { uint64_t frr_if_flags = 0; @@ -43,9 +43,12 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { enum zebra_slave_iftype slave_type = ZEBRA_IF_SLAVE_NONE; enum zebra_link_type link_type = ZEBRA_LLT_UNKNOWN; enum zebra_iftype zif_type = ZEBRA_IF_OTHER; + const struct gr_iface_info_bridge *gr_bridge = NULL; + const struct gr_iface_info_vxlan *gr_vxlan = NULL; const struct gr_iface_info_vlan *gr_vlan = NULL; const struct gr_iface_info_port *gr_port = NULL; const struct gr_iface_info_bond *gr_bond = NULL; + ifindex_t bridge_ifindex = IFINDEX_INTERNAL; ifindex_t link_ifindex = IFINDEX_INTERNAL; ifindex_t bond_ifindex = IFINDEX_INTERNAL; const struct rte_ether_addr *mac = NULL; @@ -83,6 +86,18 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { link_type = ZEBRA_LLT_ETHER; zif_type = ZEBRA_IF_VRF; break; + case GR_IFACE_TYPE_BRIDGE: + gr_bridge = (const struct gr_iface_info_bridge *)&gr_if->info; + link_type = ZEBRA_LLT_ETHER; + zif_type = ZEBRA_IF_BRIDGE; + mac = &gr_bridge->mac; + break; + case GR_IFACE_TYPE_VXLAN: + gr_vxlan = (const struct gr_iface_info_vxlan *)&gr_if->info; + link_type = ZEBRA_LLT_ETHER; + zif_type = ZEBRA_IF_VXLAN; + mac = &gr_vxlan->mac; + break; case GR_IFACE_TYPE_UNDEF: default: gr_log_err( @@ -123,12 +138,15 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { bond_ifindex = ifindex_grout_to_frr(gr_if->domain_id); slave_type = ZEBRA_IF_SLAVE_BOND; break; + case GR_IFACE_MODE_BRIDGE: + bridge_ifindex = ifindex_grout_to_frr(gr_if->domain_id); + slave_type = ZEBRA_IF_SLAVE_BRIDGE; + break; default: break; } - // no bridge support in grout - dplane_ctx_set_ifp_bridge_ifindex(ctx, IFINDEX_INTERNAL); + dplane_ctx_set_ifp_bridge_ifindex(ctx, bridge_ifindex); dplane_ctx_set_ifp_master_ifindex(ctx, IFINDEX_INTERNAL); dplane_ctx_set_ifp_bond_ifindex(ctx, bond_ifindex); dplane_ctx_set_ifp_zif_slave_type(ctx, slave_type); @@ -150,6 +168,14 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { vlan_info.vid = gr_vlan->vlan_id; dplane_ctx_set_ifp_vlan_info(ctx, &vlan_info); } + if (gr_vxlan) { + struct zebra_l2info_vxlan vi = {0}; + vi.vni_info.iftype = ZEBRA_VXLAN_IF_VNI; + vi.vni_info.vni.vni = gr_vxlan->vni; + vi.ifindex_link = ifindex_grout_to_frr(gr_vxlan->encap_vrf_id); + vi.vtep_ip.s_addr = gr_vxlan->local; + dplane_ctx_set_ifp_vxlan_info(ctx, &vi); + } } else { dplane_ctx_set_op(ctx, DPLANE_OP_INTF_DELETE); dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_QUEUED); diff --git a/frr/rt_grout.c b/frr/rt_grout.c index 6ae2b637e..57b16975a 100644 --- a/frr/rt_grout.c +++ b/frr/rt_grout.c @@ -5,9 +5,11 @@ #include "log_grout.h" #include "rt_grout.h" +#include #include #include +#include #include #include #include @@ -844,3 +846,154 @@ void grout_nexthop_change(bool new, struct gr_nexthop *gr_nh, bool startup) { // nexthop_free() must *NOT* be used to preserve the nh_srv6 context. free(nh); } + +void grout_macfdb_change(const struct gr_fdb_entry *fdb, bool new) { + struct zebra_dplane_ctx *ctx = dplane_ctx_alloc(); + struct ethaddr mac; + + gr_log_debug( + "%s bridge=%u iface=%u mac=%pEA vlan=%u vtep=%pI4", + new ? "add" : "del", + fdb->bridge_id, + fdb->iface_id, + &fdb->mac, + fdb->vlan_id, + &fdb->vtep + ); + + memcpy(&mac, &fdb->mac, sizeof(mac)); + + // Zebra's dplane API is asymmetric for FDB entries: + // + // - DPLANE_OP_MAC_INSTALL/DELETE is the downward path (zebra pushing + // MACs to dplane providers). The result handler is a no-op. + // - DPLANE_OP_NEIGH_INSTALL/DELETE is the upward path (dplane providers + // notifying zebra of learned MACs). This goes through + // zebra_neigh_macfdb_update() which triggers EVPN type-2 routes. + // + // It is NOT a bug to use dplane_ctx_mac_set_*() with DPLANE_OP_NEIGH_* + // ops. The macinfo and neigh fields are separate union members in the + // dplane context, and zebra's own netlink provider does the same thing + // (see rt_netlink.c netlink_macfdb_change()). + dplane_ctx_set_ns_id(ctx, GROUT_NS); + dplane_ctx_set_ifindex(ctx, ifindex_grout_to_frr(fdb->iface_id)); + dplane_ctx_mac_set_addr(ctx, &mac); + dplane_ctx_mac_set_nhg_id(ctx, 0); + dplane_ctx_mac_set_ndm_state(ctx, NUD_REACHABLE); + dplane_ctx_mac_set_ndm_flags(ctx, NTF_MASTER); + dplane_ctx_mac_set_dst_present(ctx, fdb->vtep != 0); + dplane_ctx_mac_set_vtep_ip(ctx, &(struct in_addr) {fdb->vtep}); + dplane_ctx_mac_set_vid(ctx, fdb->vlan_id); + dplane_ctx_mac_set_dp_static(ctx, fdb->flags & GR_FDB_F_STATIC); + dplane_ctx_mac_set_local_inactive(ctx, false); + dplane_ctx_mac_set_is_sticky(ctx, false); + dplane_ctx_set_op(ctx, new ? DPLANE_OP_NEIGH_INSTALL : DPLANE_OP_NEIGH_DELETE); + + dplane_provider_enqueue_to_zebra(ctx); +} + +enum zebra_dplane_result grout_macfdb_update_ctx(struct zebra_dplane_ctx *ctx) { + bool add = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL; + uint32_t req_type; + size_t len; + void *req; + int ret; + + gr_log_debug( + "%s bridge=%u iface=%u mac=%pEA vlan=%u vtep=%pI4", + add ? "add" : "del", + ifindex_frr_to_grout(dplane_ctx_get_ifindex(ctx)), + ifindex_frr_to_grout(dplane_ctx_get_ifindex(ctx)), + dplane_ctx_mac_get_addr(ctx), + dplane_ctx_mac_get_vlan(ctx), + dplane_ctx_mac_get_vtep_ip(ctx) + ); + + len = add ? sizeof(struct gr_fdb_add_req) : sizeof(struct gr_fdb_del_req); + req = calloc(1, len); + if (req == NULL) { + gr_log_err("failed to allocate memory"); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + if (add) { + struct gr_fdb_add_req *add = req; + add->exist_ok = true; + add->fdb.iface_id = ifindex_frr_to_grout(dplane_ctx_get_ifindex(ctx)); + add->fdb.bridge_id = ifindex_frr_to_grout(dplane_ctx_mac_get_br_ifindex(ctx)); + add->fdb.vlan_id = dplane_ctx_mac_get_vlan(ctx); + add->fdb.flags = GR_FDB_F_EXTERN; + if (dplane_ctx_mac_get_dp_static(ctx)) + add->fdb.flags |= GR_FDB_F_STATIC; + memcpy(&add->fdb.mac, dplane_ctx_mac_get_addr(ctx), sizeof(add->fdb.mac)); + add->fdb.vtep = dplane_ctx_mac_get_vtep_ip(ctx)->s_addr; + req_type = GR_FDB_ADD; + } else { + struct gr_fdb_del_req *del = req; + del->missing_ok = true; + del->bridge_id = ifindex_frr_to_grout(dplane_ctx_mac_get_br_ifindex(ctx)); + del->vlan_id = dplane_ctx_mac_get_vlan(ctx); + memcpy(&del->mac, dplane_ctx_mac_get_addr(ctx), sizeof(del->mac)); + req_type = GR_FDB_DEL; + } + + ret = grout_client_send_recv(req_type, len, req, NULL); + + free(req); + + return ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE; +} + +enum zebra_dplane_result grout_vxlan_flood_update_ctx(struct zebra_dplane_ctx *ctx) { + const struct ipaddr *addr = dplane_ctx_neigh_get_ipaddr(ctx); + bool add = dplane_ctx_get_op(ctx) == DPLANE_OP_VTEP_ADD; + struct gr_flood_entry *entry; + uint32_t req_type; + size_t len; + void *req; + int ret; + + gr_log_debug( + "%s %pIA vni=%u vrf=%u", + add ? "add" : "del", + addr, + dplane_ctx_neigh_get_vni(ctx), + vrf_frr_to_grout(dplane_ctx_get_vrf(ctx)) + ); + + if (addr->ipa_type != IPADDR_V4) { + gr_log_err("IPv6 flood list entries are not supported"); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + len = add ? sizeof(struct gr_flood_add_req) : sizeof(struct gr_flood_del_req); + + req = calloc(1, len); + if (req == NULL) { + gr_log_err("failed to allocate memory"); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + if (add) { + struct gr_flood_add_req *a = req; + entry = &a->entry; + a->exist_ok = true; + req_type = GR_FLOOD_ADD; + } else { + struct gr_flood_del_req *d = req; + entry = &d->entry; + d->missing_ok = true; + req_type = GR_FLOOD_DEL; + } + + entry->type = GR_FLOOD_T_VTEP; + entry->vrf_id = vrf_frr_to_grout(dplane_ctx_get_vrf(ctx)); + entry->vtep.vni = dplane_ctx_neigh_get_vni(ctx); + entry->vtep.addr = addr->ipaddr_v4.s_addr; + + ret = grout_client_send_recv(req_type, len, req, NULL); + + free(req); + + return ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE; +} diff --git a/frr/rt_grout.h b/frr/rt_grout.h index 3da557e52..942c2f06d 100644 --- a/frr/rt_grout.h +++ b/frr/rt_grout.h @@ -5,6 +5,7 @@ #include #include +#include #include @@ -13,3 +14,7 @@ void grout_route6_change(bool new, struct gr_ip6_route *gr_r6); enum zebra_dplane_result grout_add_del_route(struct zebra_dplane_ctx *ctx); enum zebra_dplane_result grout_add_del_nexthop(struct zebra_dplane_ctx *ctx); void grout_nexthop_change(bool new, struct gr_nexthop *gr_nh, bool startup); + +void grout_macfdb_change(const struct gr_fdb_entry *fdb, bool new); +enum zebra_dplane_result grout_macfdb_update_ctx(struct zebra_dplane_ctx *ctx); +enum zebra_dplane_result grout_vxlan_flood_update_ctx(struct zebra_dplane_ctx *ctx); diff --git a/frr/zebra_dplane_grout.c b/frr/zebra_dplane_grout.c index 9ecbb01df..4a0a6e949 100644 --- a/frr/zebra_dplane_grout.c +++ b/frr/zebra_dplane_grout.c @@ -8,6 +8,7 @@ #include "rt_grout.h" #include +#include #include #include @@ -85,6 +86,29 @@ static int grout_client_ensure_connect(void) { return 0; } +static void grout_sync_fdb(struct event *) { + struct gr_fdb_list_req req = {.bridge_id = GR_IFACE_ID_UNDEF}; + struct gr_fdb_entry *fdb; + int ret; + + gr_log_debug("sync FDB entries"); + + if (grout_client_ensure_connect() < 0) + return; + + gr_api_client_stream_foreach (fdb, ret, grout_ctx.client, GR_FDB_LIST, sizeof(req), &req) { + gr_log_debug( + "sync fdb bridge %u iface %u mac %pEA", + fdb->bridge_id, + fdb->iface_id, + &fdb->mac + ); + grout_macfdb_change(fdb, true); + } + if (ret < 0) + gr_log_err("GR_FDB_LIST: %s", strerror(errno)); +} + static void grout_sync_routes(struct event *e) { struct gr_ip4_route_list_req r4_req = {.vrf_id = EVENT_VAL(e)}; struct gr_ip4_route *r4; @@ -198,15 +222,17 @@ static void grout_sync_ifaces_addresses(struct event *e) { } static void grout_sync_ifaces(struct event *) { - // Sync interfaces in dependency order: VRF first (no deps), then bond - // and ipip (need VRF only), port (needs VRF, may be bond member), vlan - // (needs parent port or bond). + // Sync interfaces in dependency order: VRF first (no deps), then bridge, bond + // and ipip (need VRF only), port (needs VRF, may be bond or bridge member), vlan + // (needs parent port or bond) and vxlan (needs VRF and bridge). static const gr_iface_type_t types[] = { GR_IFACE_TYPE_VRF, + GR_IFACE_TYPE_BRIDGE, GR_IFACE_TYPE_BOND, GR_IFACE_TYPE_IPIP, GR_IFACE_TYPE_PORT, GR_IFACE_TYPE_VLAN, + GR_IFACE_TYPE_VXLAN, }; struct gr_infra_iface_list_req if_req; bool sync_vrf[GR_MAX_IFACES] = {false}; @@ -239,6 +265,8 @@ static void grout_sync_ifaces(struct event *) { if (sync_vrf[i]) event_add_event(zrouter.master, grout_sync_ifaces_addresses, NULL, i, NULL); } + + event_add_event(zrouter.master, grout_sync_fdb, NULL, 0, NULL); } static void dplane_grout_connect(struct event *) { @@ -254,6 +282,9 @@ static void dplane_grout_connect(struct event *) { {.type = GR_EVENT_IP6_ADDR_ADD, .suppress_self_events = false}, {.type = GR_EVENT_IP_ADDR_DEL, .suppress_self_events = false}, {.type = GR_EVENT_IP6_ADDR_DEL, .suppress_self_events = false}, + {.type = GR_EVENT_FDB_ADD, .suppress_self_events = true}, + {.type = GR_EVENT_FDB_DEL, .suppress_self_events = true}, + {.type = GR_EVENT_FDB_UPDATE, .suppress_self_events = true}, }; if (grout_notif_subscribe(&grout_ctx.dplane_notifs, gr_evts, ARRAY_DIM(gr_evts)) < 0) @@ -343,6 +374,16 @@ static const char *gr_req_type_to_str(uint32_t e) { return TOSTRING(GR_IP6_ROUTE_LIST); case GR_SRV6_TUNSRC_SET: return TOSTRING(GR_SRV6_TUNSRC_SET); + case GR_FDB_ADD: + return TOSTRING(GR_FDB_ADD); + case GR_FDB_DEL: + return TOSTRING(GR_FDB_DEL); + case GR_FDB_LIST: + return TOSTRING(GR_FDB_LIST); + case GR_FLOOD_ADD: + return TOSTRING(GR_FLOOD_ADD); + case GR_FLOOD_DEL: + return TOSTRING(GR_FLOOD_DEL); default: snprintf(buf, sizeof(buf), "0x%x", e); return buf; @@ -421,6 +462,16 @@ static const char *gr_evt_to_str(uint32_t e) { return TOSTRING(GR_EVENT_NEXTHOP_UPDATE); case GR_EVENT_NEXTHOP_DELETE: return TOSTRING(GR_EVENT_NEXTHOP_DELETE); + case GR_EVENT_FDB_ADD: + return TOSTRING(GR_EVENT_FDB_ADD); + case GR_EVENT_FDB_UPDATE: + return TOSTRING(GR_EVENT_FDB_UPDATE); + case GR_EVENT_FDB_DEL: + return TOSTRING(GR_EVENT_FDB_DEL); + case GR_EVENT_FLOOD_ADD: + return TOSTRING(GR_EVENT_FLOOD_ADD); + case GR_EVENT_FLOOD_DEL: + return TOSTRING(GR_EVENT_FLOOD_DEL); default: snprintf(buf, sizeof(buf), "event 0x%x", e); return buf; @@ -467,6 +518,14 @@ static void dplane_read_notifications(struct event *event) { case GR_EVENT_IP6_ADDR_DEL: grout_interface_addr6_change(new, PAYLOAD(gr_e)); break; + + case GR_EVENT_FDB_ADD: + case GR_EVENT_FDB_UPDATE: + new = true; + // fallthrough + case GR_EVENT_FDB_DEL: + grout_macfdb_change(PAYLOAD(gr_e), new); + break; } free(gr_e); @@ -546,6 +605,14 @@ static enum zebra_dplane_result zd_grout_process_update(struct zebra_dplane_ctx case DPLANE_OP_NH_DELETE: return grout_add_del_nexthop(ctx); + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + return grout_macfdb_update_ctx(ctx); + + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + return grout_vxlan_flood_update_ctx(ctx); + case DPLANE_OP_SRV6_ENCAP_SRCADDR_SET: return grout_set_sr_tunsrc(ctx); diff --git a/frr/zebra_dplane_grout.h b/frr/zebra_dplane_grout.h index 9b03f3640..f8af1ed79 100644 --- a/frr/zebra_dplane_grout.h +++ b/frr/zebra_dplane_grout.h @@ -3,7 +3,10 @@ #pragma once +#include #include #include +#define GROUT_NS NS_DEFAULT + int grout_client_send_recv(uint32_t req_type, size_t tx_len, const void *tx_data, void **rx_data); diff --git a/modules/infra/api/gr_infra.h b/modules/infra/api/gr_infra.h index f6f4bed0a..4d7d7f0e2 100644 --- a/modules/infra/api/gr_infra.h +++ b/modules/infra/api/gr_infra.h @@ -22,6 +22,7 @@ typedef enum : uint8_t { GR_IFACE_TYPE_IPIP, GR_IFACE_TYPE_BOND, GR_IFACE_TYPE_BRIDGE, + GR_IFACE_TYPE_VXLAN, GR_IFACE_TYPE_COUNT } gr_iface_type_t; @@ -448,6 +449,8 @@ static inline const char *gr_iface_type_name(gr_iface_type_t type) { return "bond"; case GR_IFACE_TYPE_BRIDGE: return "bridge"; + case GR_IFACE_TYPE_VXLAN: + return "vxlan"; case GR_IFACE_TYPE_UNDEF: case GR_IFACE_TYPE_COUNT: break; diff --git a/modules/infra/control/ctlplane.c b/modules/infra/control/ctlplane.c index 2d9bcf41c..8a17e42c5 100644 --- a/modules/infra/control/ctlplane.c +++ b/modules/infra/control/ctlplane.c @@ -398,6 +398,7 @@ static void iface_event(uint32_t event, const void *obj) { case GR_IFACE_TYPE_VLAN: case GR_IFACE_TYPE_BOND: case GR_IFACE_TYPE_BRIDGE: + case GR_IFACE_TYPE_VXLAN: break; default: return; diff --git a/modules/infra/control/iface.c b/modules/infra/control/iface.c index 206745463..d208eb7df 100644 --- a/modules/infra/control/iface.c +++ b/modules/infra/control/iface.c @@ -35,6 +35,7 @@ static bool iface_type_valid(gr_iface_type_t type) { case GR_IFACE_TYPE_IPIP: case GR_IFACE_TYPE_BOND: case GR_IFACE_TYPE_BRIDGE: + case GR_IFACE_TYPE_VXLAN: return true; case GR_IFACE_TYPE_UNDEF: case GR_IFACE_TYPE_COUNT: diff --git a/modules/infra/datapath/gr_rxtx.h b/modules/infra/datapath/gr_rxtx.h index add6adf0e..3828c633b 100644 --- a/modules/infra/datapath/gr_rxtx.h +++ b/modules/infra/datapath/gr_rxtx.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -40,7 +41,10 @@ struct port_output_edges { rte_edge_t edges[RTE_MAX_ETHPORTS]; }; -GR_MBUF_PRIV_DATA_TYPE(iface_mbuf_data, { uint16_t vlan_id; }); +GR_MBUF_PRIV_DATA_TYPE(iface_mbuf_data, { + uint16_t vlan_id; + ip4_addr_t vtep; +}); int rxtx_trace_format(char *buf, size_t len, const void *data, size_t /*data_len*/); diff --git a/modules/l2/api/gr_l2.h b/modules/l2/api/gr_l2.h index 84f0c6dba..6fbe7052a 100644 --- a/modules/l2/api/gr_l2.h +++ b/modules/l2/api/gr_l2.h @@ -41,12 +41,29 @@ struct gr_iface_info_bridge { uint16_t members[GR_BRIDGE_MAX_MEMBERS]; // Interface IDs of bridge members. }; +// VXLAN reconfiguration attribute flags. +#define GR_VXLAN_SET_VNI GR_BIT64(32) +#define GR_VXLAN_SET_ENCAP_VRF GR_BIT64(33) +#define GR_VXLAN_SET_DST_PORT GR_BIT64(34) +#define GR_VXLAN_SET_LOCAL GR_BIT64(35) +#define GR_VXLAN_SET_MAC GR_BIT64(37) + +// Info structure for GR_IFACE_TYPE_VXLAN interfaces. +struct gr_iface_info_vxlan { + uint32_t vni; // VXLAN Network Identifier (24-bit). + uint16_t encap_vrf_id; // L3 domain for underlay routing. + uint16_t dst_port; // UDP destination port (default 4789). + ip4_addr_t local; // Local VTEP IP address (must be a configured address in encap_vrf_id). + struct rte_ether_addr mac; // Default to random address. +}; + // FDB (L2 Forwarding Database) management ///////////////////////////////////// // FDB entry flags. typedef enum : uint8_t { GR_FDB_F_STATIC = GR_BIT8(0), // User-configured, never aged out. GR_FDB_F_LEARN = GR_BIT8(1), // Learned via local bridge. + GR_FDB_F_EXTERN = GR_BIT8(2), // Programmed by external control plane. } gr_fdb_flags_t; // Forwarding database entry associating a MAC+VLAN to a bridge member interface. @@ -55,6 +72,7 @@ struct gr_fdb_entry { struct rte_ether_addr mac; uint16_t vlan_id; uint16_t iface_id; // Updated automatically when a MAC moves between members. + ip4_addr_t vtep; // Remote VTEP for VXLAN-learned entries, 0 for local. gr_fdb_flags_t flags; clock_t last_seen; // Refreshed on each datapath hit for learned entries. }; @@ -128,3 +146,83 @@ struct gr_fdb_config_set_req { }; // struct gr_fdb_config_set_resp { }; + +// Flood list management for BUM (Broadcast, Unknown unicast, Multicast) ////// + +typedef enum : uint8_t { + GR_FLOOD_T_VTEP = 1, // VXLAN remote VTEP +} gr_flood_type_t; + +static inline const char *gr_flood_type_name(gr_flood_type_t type) { + switch (type) { + case GR_FLOOD_T_VTEP: + return "vtep"; + } + return "?"; +} + +struct gr_flood_vtep { + uint32_t vni; + ip4_addr_t addr; +}; + +struct gr_flood_entry { + gr_flood_type_t type; + uint16_t vrf_id; + union { + struct gr_flood_vtep vtep; + }; +}; + +enum { + GR_EVENT_FLOOD_ADD = EVENT_TYPE(GR_L2_MODULE, 0x0011), + GR_EVENT_FLOOD_DEL = EVENT_TYPE(GR_L2_MODULE, 0x0012), +}; + +#define GR_FLOOD_ADD REQUEST_TYPE(GR_L2_MODULE, 0x0011) + +struct gr_flood_add_req { + struct gr_flood_entry entry; + bool exist_ok; +}; + +// struct gr_flood_add_resp { }; + +#define GR_FLOOD_DEL REQUEST_TYPE(GR_L2_MODULE, 0x0012) + +struct gr_flood_del_req { + struct gr_flood_entry entry; + bool missing_ok; +}; + +// struct gr_flood_del_resp { }; + +#define GR_FLOOD_LIST REQUEST_TYPE(GR_L2_MODULE, 0x0013) + +struct gr_flood_list_req { + gr_flood_type_t type; // 0 for all types + uint16_t vrf_id; // GR_VRF_ID_UNDEF for all +}; + +STREAM_RESP(struct gr_flood_entry); + +// FDB statistics /////////////////////////////////////////////////////////////// + +#define GR_L2_FDB_STATS_GET REQUEST_TYPE(GR_L2_MODULE, 0x0020) + +struct gr_l2_fdb_stats_get_req { + uint16_t bridge_id; +}; + +struct gr_l2_fdb_stats { + uint16_t bridge_id; + uint64_t hit; // unicast forwarded via FDB lookup + uint64_t miss; // unknown unicast, sent to flood + uint64_t flood; // broadcast/multicast, sent to flood +}; + +#define GR_L2_FDB_STATS_RESET REQUEST_TYPE(GR_L2_MODULE, 0x0021) + +struct gr_l2_fdb_stats_reset_req { + uint16_t bridge_id; +}; diff --git a/modules/l2/cli/fdb.c b/modules/l2/cli/fdb.c index 4ab652086..2677a3741 100644 --- a/modules/l2/cli/fdb.c +++ b/modules/l2/cli/fdb.c @@ -93,7 +93,7 @@ static cmd_status_t fdb_flush(struct gr_api_client *c, const struct ec_pnode *p) return CMD_ERROR; if (arg_str(p, "all") != NULL) - req.flags |= GR_FDB_F_STATIC; + req.flags |= (GR_FDB_F_STATIC | GR_FDB_F_EXTERN); if (gr_api_client_send_recv(c, GR_FDB_FLUSH, sizeof(req), &req, NULL) < 0) return CMD_ERROR; @@ -108,6 +108,8 @@ static size_t fdb_format_flags(char *buf, size_t len, gr_fdb_flags_t flags) { SAFE_BUF(snprintf, len, "%slearn", n ? " " : ""); if (flags & GR_FDB_F_STATIC) SAFE_BUF(snprintf, len, "%sstatic", n ? " " : ""); + if (flags & GR_FDB_F_EXTERN) + SAFE_BUF(snprintf, len, "%sextern", n ? " " : ""); err: return n; } @@ -134,12 +136,15 @@ static cmd_status_t fdb_show(struct gr_api_client *c, const struct ec_pnode *p) req.flags |= GR_FDB_F_STATIC; if (arg_str(p, "learn") != NULL) req.flags |= GR_FDB_F_LEARN; + if (arg_str(p, "extern") != NULL) + req.flags |= GR_FDB_F_EXTERN; struct libscols_table *table = scols_new_table(); scols_table_new_column(table, "BRIDGE", 0, 0); scols_table_new_column(table, "MAC", 0, 0); scols_table_new_column(table, "VLAN", 0, 0); scols_table_new_column(table, "IFACE", 0, 0); + scols_table_new_column(table, "VTEP", 0, 0); scols_table_new_column(table, "FLAGS", 0, 0); scols_table_new_column(table, "AGE", 0, SCOLS_FL_RIGHT); scols_table_set_column_separator(table, " "); @@ -160,11 +165,14 @@ static cmd_status_t fdb_show(struct gr_api_client *c, const struct ec_pnode *p) scols_line_sprintf(line, 3, "%s", iface ? iface->name : "[deleted]"); free(iface); + if (fdb->vtep != 0) + scols_line_sprintf(line, 4, IP4_F, &fdb->vtep); + if (fdb_format_flags(flags, sizeof(flags), fdb->flags)) - scols_line_set_data(line, 4, flags); + scols_line_set_data(line, 5, flags); scols_line_sprintf( - line, 5, "%lds", (gr_clock_us() - fdb->last_seen) / CLOCKS_PER_SEC + line, 6, "%lds", (gr_clock_us() - fdb->last_seen) / CLOCKS_PER_SEC ); } @@ -256,7 +264,9 @@ static int ctx_init(struct ec_node *root) { "Flush only entries matching this MAC address.", ec_node_re("MAC", ETH_ADDR_RE) ), - with_help("Flush all entries including static.", ec_node_str("all", "all")) + with_help( + "Flush all entries including static and extern.", ec_node_str("all", "all") + ) ); if (ret < 0) return ret; @@ -282,7 +292,7 @@ static int ctx_init(struct ec_node *root) { ret = CLI_COMMAND( FDB_CTX(root), - "[show] [(bridge BRIDGE),(iface IFACE),(static|learn)]", + "[show] [(bridge BRIDGE),(iface IFACE),(static|learn|extern)]", fdb_show, "Show FDB entries.", with_help( @@ -294,7 +304,8 @@ static int ctx_init(struct ec_node *root) { ec_node_dyn("IFACE", complete_iface_names, INT2PTR(GR_IFACE_TYPE_UNDEF)) ), with_help("Show only static entries.", ec_node_str("static", "static")), - with_help("Show only learned entries.", ec_node_str("learn", "learn")) + with_help("Show only learned entries.", ec_node_str("learn", "learn")), + with_help("Show only extern entries.", ec_node_str("extern", "extern")) ); if (ret < 0) return ret; @@ -331,6 +342,8 @@ static void fdb_event_print(uint32_t event, const void *obj) { if (fdb->vlan_id != 0) printf(" vlan=%u", fdb->vlan_id); printf(" iface=%u", fdb->iface_id); + if (fdb->vtep != 0) + printf(" vtep=" IP4_F, &fdb->vtep); if (fdb_format_flags(flags, sizeof(flags), fdb->flags)) printf(" %s", flags); printf("\n"); diff --git a/modules/l2/cli/flood.c b/modules/l2/cli/flood.c new file mode 100644 index 000000000..760665ac9 --- /dev/null +++ b/modules/l2/cli/flood.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static cmd_status_t vtep_add(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_flood_add_req req = { + .entry.type = GR_FLOOD_T_VTEP, + .exist_ok = true, + }; + + if (arg_ip4(p, "ADDR", &req.entry.vtep.addr) < 0) + return CMD_ERROR; + if (arg_u32(p, "VNI", &req.entry.vtep.vni) < 0) + return CMD_ERROR; + if (arg_vrf(c, p, "VRF", &req.entry.vrf_id) < 0) + return CMD_ERROR; + + if (gr_api_client_send_recv(c, GR_FLOOD_ADD, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + +static cmd_status_t vtep_del(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_flood_del_req req = { + .entry.type = GR_FLOOD_T_VTEP, + .missing_ok = true, + }; + + if (arg_ip4(p, "ADDR", &req.entry.vtep.addr) < 0) + return CMD_ERROR; + if (arg_u32(p, "VNI", &req.entry.vtep.vni) < 0) + return CMD_ERROR; + if (arg_vrf(c, p, "VRF", &req.entry.vrf_id) < 0) + return CMD_ERROR; + + if (gr_api_client_send_recv(c, GR_FLOOD_DEL, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + +static cmd_status_t vtep_show(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_flood_list_req req = { + .type = GR_FLOOD_T_VTEP, + .vrf_id = GR_VRF_ID_UNDEF, + }; + const struct gr_flood_entry *entry; + int ret; + + if (arg_str(p, "VRF") != NULL && arg_vrf(c, p, "VRF", &req.vrf_id) < 0) + return CMD_ERROR; + + struct libscols_table *table = scols_new_table(); + scols_table_new_column(table, "VNI", 0, SCOLS_FL_RIGHT); + scols_table_new_column(table, "VRF", 0, 0); + scols_table_new_column(table, "ADDR", 0, 0); + scols_table_set_column_separator(table, " "); + + gr_api_client_stream_foreach (entry, ret, c, GR_FLOOD_LIST, sizeof(req), &req) { + struct libscols_line *line = scols_table_new_line(table, NULL); + + scols_line_sprintf(line, 0, "%u", entry->vtep.vni); + + struct gr_iface *vrf = iface_from_id(c, entry->vrf_id); + scols_line_sprintf(line, 1, "%s", vrf ? vrf->name : "[deleted]"); + free(vrf); + + scols_line_sprintf(line, 2, IP4_F, &entry->vtep.addr); + } + + scols_print_table(table); + scols_unref_table(table); + + return ret < 0 ? CMD_ERROR : CMD_SUCCESS; +} + +#define FLOOD_CTX(root) CLI_CONTEXT(root, CTX_ARG("flood", "Flood list management.")) +#define VTEP_CTX(root) CLI_CONTEXT(FLOOD_CTX(root), CTX_ARG("vtep", "VXLAN Tunnel End-Points.")) + +static int ctx_init(struct ec_node *root) { + int ret; + + ret = CLI_COMMAND( + VTEP_CTX(root), + "add ADDR vni VNI [vrf VRF]", + vtep_add, + "Add a VXLAN flood VTEP.", + with_help("Remote VTEP IP address.", ec_node_re("ADDR", IPV4_RE)), + with_help( + "VXLAN Network Identifier (1-16777215).", + ec_node_uint("VNI", 1, 16777215, 10) + ), + with_help("L3 routing domain name.", ec_node_dyn("VRF", complete_vrf_names, NULL)) + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + VTEP_CTX(root), + "del ADDR vni VNI [vrf VRF]", + vtep_del, + "Delete a VXLAN flood VTEP.", + with_help("Remote VTEP IP address.", ec_node_re("ADDR", IPV4_RE)), + with_help( + "VXLAN Network Identifier (1-16777215).", + ec_node_uint("VNI", 1, 16777215, 10) + ), + with_help("L3 routing domain name.", ec_node_dyn("VRF", complete_vrf_names, NULL)) + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + VTEP_CTX(root), + "[show] [vrf VRF]", + vtep_show, + "List VXLAN flood VTEPs.", + with_help("L3 routing domain name.", ec_node_dyn("VRF", complete_vrf_names, NULL)) + ); + if (ret < 0) + return ret; + + return 0; +} + +static struct cli_context ctx = { + .name = "flood", + .init = ctx_init, +}; + +static void flood_event_print(uint32_t event, const void *obj) { + const struct gr_flood_entry *entry = obj; + const char *action; + + switch (event) { + case GR_EVENT_FLOOD_ADD: + action = "add"; + break; + case GR_EVENT_FLOOD_DEL: + action = "del"; + break; + default: + action = "?"; + break; + } + + printf("flood %s: %s vrf=%u", action, gr_flood_type_name(entry->type), entry->vrf_id); + switch (entry->type) { + case GR_FLOOD_T_VTEP: + printf(" " IP4_F " vni=%u", &entry->vtep.addr, entry->vtep.vni); + } + printf("\n"); +} + +static struct cli_event_printer printer = { + .print = flood_event_print, + .ev_count = 2, + .ev_types = { + GR_EVENT_FLOOD_ADD, + GR_EVENT_FLOOD_DEL, + }, +}; + +static void __attribute__((constructor, used)) init(void) { + cli_context_register(&ctx); + cli_event_printer_register(&printer); +} diff --git a/modules/l2/cli/l2_stats.c b/modules/l2/cli/l2_stats.c new file mode 100644 index 000000000..939985f6f --- /dev/null +++ b/modules/l2/cli/l2_stats.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Fabien Dupont + +#include +#include +#include +#include + +#include + +#include + +static cmd_status_t fdb_stats_show(struct gr_api_client *c, const struct ec_pnode *p) { + const struct gr_l2_fdb_stats *stats; + struct gr_l2_fdb_stats_get_req req; + void *resp_ptr = NULL; + struct gr_iface *iface; + + iface = iface_from_name(c, arg_str(p, "NAME")); + if (iface == NULL) + return CMD_ERROR; + + req.bridge_id = iface->id; + free(iface); + + if (gr_api_client_send_recv(c, GR_L2_FDB_STATS_GET, sizeof(req), &req, &resp_ptr) < 0) + return CMD_ERROR; + + stats = resp_ptr; + + printf("fdb_hit: %lu\n", stats->hit); + printf("fdb_miss: %lu\n", stats->miss); + printf("bcast: %lu\n", stats->flood); + + free(resp_ptr); + return CMD_SUCCESS; +} + +static cmd_status_t fdb_stats_reset(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_l2_fdb_stats_reset_req req; + struct gr_iface *iface; + + iface = iface_from_name(c, arg_str(p, "NAME")); + if (iface == NULL) + return CMD_ERROR; + + req.bridge_id = iface->id; + free(iface); + + if (gr_api_client_send_recv(c, GR_L2_FDB_STATS_RESET, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + +#define FDB_STATS_CTX(root) \ + CLI_CONTEXT( \ + root, \ + CTX_ARG("stats", "Statistics."), \ + CTX_ARG("fdb", "FDB forwarding statistics.") \ + ) + +static int ctx_init(struct ec_node *root) { + int ret; + + ret = CLI_COMMAND( + FDB_STATS_CTX(root), + "show NAME", + fdb_stats_show, + "Show FDB forwarding statistics for a bridge.", + with_help( + "Bridge interface name.", + ec_node_dyn("NAME", complete_iface_names, INT2PTR(GR_IFACE_TYPE_BRIDGE)) + ) + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + FDB_STATS_CTX(root), + "reset NAME", + fdb_stats_reset, + "Reset FDB forwarding statistics for a bridge.", + with_help( + "Bridge interface name.", + ec_node_dyn("NAME", complete_iface_names, INT2PTR(GR_IFACE_TYPE_BRIDGE)) + ) + ); + if (ret < 0) + return ret; + + return 0; +} + +static struct cli_context ctx = { + .name = "fdb stats", + .init = ctx_init, +}; + +static void __attribute__((constructor, used)) init(void) { + cli_context_register(&ctx); +} diff --git a/modules/l2/cli/meson.build b/modules/l2/cli/meson.build index 53b9e5699..0d5b39879 100644 --- a/modules/l2/cli/meson.build +++ b/modules/l2/cli/meson.build @@ -3,5 +3,8 @@ cli_src += files( 'bridge.c', + 'flood.c', 'fdb.c', + 'l2_stats.c', + 'vxlan.c', ) diff --git a/modules/l2/cli/vxlan.c b/modules/l2/cli/vxlan.c new file mode 100644 index 000000000..92655fb35 --- /dev/null +++ b/modules/l2/cli/vxlan.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include + +#include + +#include + +static void vxlan_show(struct gr_api_client *c, const struct gr_iface *iface) { + const struct gr_iface_info_vxlan *vxlan = (const struct gr_iface_info_vxlan *)iface->info; + struct gr_iface *vrf = iface_from_id(c, vxlan->encap_vrf_id); + printf("vni: %u\n", vxlan->vni); + printf("local: " IP4_F "\n", &vxlan->local); + printf("encap_vrf: %s\n", vrf ? vrf->name : "[deleted]"); + printf("dst_port: %u\n", vxlan->dst_port); + printf("mac: " ETH_F "\n", &vxlan->mac); + free(vrf); +} + +static void +vxlan_list_info(struct gr_api_client *c, const struct gr_iface *iface, char *buf, size_t len) { + const struct gr_iface_info_vxlan *vxlan = (const struct gr_iface_info_vxlan *)iface->info; + struct gr_iface *vrf = iface_from_id(c, vxlan->encap_vrf_id); + snprintf( + buf, + len, + "vni=%u local=" IP4_F " encap_vrf=%s", + vxlan->vni, + &vxlan->local, + vrf ? vrf->name : "[deleted]" + ); + free(vrf); +} + +static struct cli_iface_type vxlan_type = { + .type_id = GR_IFACE_TYPE_VXLAN, + .show = vxlan_show, + .list_info = vxlan_list_info, +}; + +static uint64_t parse_vxlan_args( + struct gr_api_client *c, + const struct ec_pnode *p, + struct gr_iface *iface, + bool update +) { + struct gr_iface_info_vxlan *vxlan; + uint64_t set_attrs; + + set_attrs = parse_iface_args(c, p, iface, sizeof(*vxlan), update); + + vxlan = (struct gr_iface_info_vxlan *)iface->info; + + if (arg_u32(p, "VNI", &vxlan->vni) < 0) { + if (errno != ENOENT) + return 0; + } else { + set_attrs |= GR_VXLAN_SET_VNI; + } + + if (arg_ip4(p, "LOCAL", &vxlan->local) < 0) { + if (errno != ENOENT) + return 0; + } else { + set_attrs |= GR_VXLAN_SET_LOCAL; + } + + if (arg_str(p, "ENCAP_VRF") != NULL) { + if (arg_vrf(c, p, "ENCAP_VRF", &vxlan->encap_vrf_id) < 0) + return 0; + else + set_attrs |= GR_VXLAN_SET_ENCAP_VRF; + } + + if (arg_u16(p, "DST_PORT", &vxlan->dst_port) < 0) { + if (errno != ENOENT) + return 0; + } else { + set_attrs |= GR_VXLAN_SET_DST_PORT; + } + + if (arg_eth_addr(p, "MAC", &vxlan->mac) < 0) { + if (errno != ENOENT) + return 0; + } else { + set_attrs |= GR_VXLAN_SET_MAC; + } + + if (set_attrs == 0) + errno = EINVAL; + return set_attrs; +} + +static cmd_status_t vxlan_add(struct gr_api_client *c, const struct ec_pnode *p) { + const struct gr_infra_iface_add_resp *resp; + struct gr_infra_iface_add_req *req = NULL; + void *resp_ptr = NULL; + size_t len; + + len = sizeof(*req) + sizeof(struct gr_iface_info_vxlan); + if ((req = calloc(1, len)) == NULL) + goto err; + + req->iface.type = GR_IFACE_TYPE_VXLAN; + req->iface.flags = GR_IFACE_F_UP; + + if (parse_vxlan_args(c, p, &req->iface, false) == 0) + goto err; + + if (gr_api_client_send_recv(c, GR_INFRA_IFACE_ADD, len, req, &resp_ptr) < 0) + goto err; + + free(req); + resp = resp_ptr; + printf("Created interface %u\n", resp->iface_id); + free(resp_ptr); + return CMD_SUCCESS; +err: + free(req); + return CMD_ERROR; +} + +static cmd_status_t vxlan_set(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_infra_iface_set_req *req = NULL; + cmd_status_t ret = CMD_ERROR; + size_t len; + + len = sizeof(*req) + sizeof(struct gr_iface_info_vxlan); + if ((req = calloc(1, len)) == NULL) + goto out; + + if ((req->set_attrs = parse_vxlan_args(c, p, &req->iface, true)) == 0) + goto out; + + if (gr_api_client_send_recv(c, GR_INFRA_IFACE_SET, len, req, NULL) < 0) + goto out; + + ret = CMD_SUCCESS; +out: + free(req); + return ret; +} + +#define VXLAN_ATTRS_CMD "(encap_vrf ENCAP_VRF),(mac MAC),(dst_port DST_PORT)" + +#define VXLAN_ATTRS_ARGS \ + IFACE_ATTRS_ARGS, \ + with_help( \ + "VXLAN Network Identifier (1-16777215).", \ + ec_node_uint("VNI", 1, 16777215, 10) \ + ), \ + with_help("Local VTEP IP address.", ec_node_re("LOCAL", IPV4_RE)), \ + with_help( \ + "L3 routing domain name for encap addresses.", \ + ec_node_dyn("ENCAP_VRF", complete_vrf_names, NULL) \ + ), \ + with_help("Ethernet address (default random).", ec_node_re("MAC", ETH_ADDR_RE)), \ + with_help( \ + "UDP destination port (default 4789).", \ + ec_node_uint("DST_PORT", 1, 65535, 10) \ + ) + +static int ctx_init(struct ec_node *root) { + int ret; + + ret = CLI_COMMAND( + INTERFACE_ADD_CTX(root), + "vxlan NAME vni VNI local LOCAL [" VXLAN_ATTRS_CMD "," IFACE_ATTRS_CMD "]", + vxlan_add, + "Create a new VXLAN tunnel interface.", + with_help("Interface name.", ec_node("any", "NAME")), + VXLAN_ATTRS_ARGS + ); + if (ret < 0) + return ret; + ret = CLI_COMMAND( + INTERFACE_SET_CTX(root), + "vxlan NAME (name NEW_NAME),(vni VNI),(local LOCAL), " VXLAN_ATTRS_CMD + "," IFACE_ATTRS_CMD, + vxlan_set, + "Modify VXLAN parameters.", + with_help( + "Interface name.", + ec_node_dyn("NAME", complete_iface_names, INT2PTR(GR_IFACE_TYPE_VXLAN)) + ), + with_help("New interface name.", ec_node("any", "NEW_NAME")), + VXLAN_ATTRS_ARGS + ); + if (ret < 0) + return ret; + + return 0; +} + +static struct cli_context ctx = { + .name = "vxlan", + .init = ctx_init, +}; + +static void __attribute__((constructor, used)) init(void) { + cli_context_register(&ctx); + register_iface_type(&vxlan_type); +} diff --git a/modules/l2/control/bridge.c b/modules/l2/control/bridge.c index 208f37112..d4edfec51 100644 --- a/modules/l2/control/bridge.c +++ b/modules/l2/control/bridge.c @@ -35,6 +35,7 @@ static int bridge_attach_member(struct iface *bridge, struct iface *member) { case GR_IFACE_TYPE_PORT: case GR_IFACE_TYPE_VLAN: case GR_IFACE_TYPE_BOND: + case GR_IFACE_TYPE_VXLAN: break; default: return errno_set(EMEDIUMTYPE); @@ -88,6 +89,10 @@ static int bridge_fini(struct iface *iface) { gr_event_push(GR_EVENT_IFACE_POST_RECONFIG, member); } + // Clear FDB forwarding stats. + if (iface->id < L2_MAX_BRIDGES) + memset(l2_fdb_stats[iface->id], 0, sizeof(l2_fdb_stats[0])); + fdb_purge_bridge(iface->id); return 0; diff --git a/modules/l2/control/fdb.c b/modules/l2/control/fdb.c index dfea4339c..0b10b0a77 100644 --- a/modules/l2/control/fdb.c +++ b/modules/l2/control/fdb.c @@ -105,7 +105,8 @@ void fdb_learn( uint16_t bridge_id, uint16_t iface_id, const struct rte_ether_addr *mac, - uint16_t vlan_id + uint16_t vlan_id, + ip4_addr_t vtep ) { const struct fdb_key key = {bridge_id, vlan_id, *mac}; struct gr_fdb_entry *fdb; @@ -121,6 +122,7 @@ void fdb_learn( fdb->mac = *mac; fdb->flags = GR_FDB_F_LEARN; fdb->iface_id = iface_id; + fdb->vtep = vtep; if (rte_hash_add_key_data(fdb_hash, &key, fdb) < 0) { // no space left in hash @@ -135,9 +137,10 @@ void fdb_learn( fdb->last_seen = gr_clock_us(); - if ((fdb->flags & GR_FDB_F_LEARN) && fdb->iface_id != iface_id) { + if ((fdb->flags & GR_FDB_F_LEARN) && (fdb->iface_id != iface_id || fdb->vtep != vtep)) { // update in case the mac address has moved fdb->iface_id = iface_id; + fdb->vtep = vtep; gr_event_push(GR_EVENT_FDB_UPDATE, fdb); } } @@ -177,7 +180,7 @@ static struct api_out fdb_add(const void *request, struct api_ctx *) { void *data; int ret; - if (req->fdb.flags & ~GR_FDB_F_STATIC) + if (req->fdb.flags & ~(GR_FDB_F_STATIC | GR_FDB_F_EXTERN)) return api_out(EINVAL, 0, NULL); iface = iface_from_id(req->fdb.iface_id); @@ -259,6 +262,8 @@ static inline bool fdb_match( return false; if ((flags & GR_FDB_F_LEARN) && !(e->flags & GR_FDB_F_LEARN)) return false; + if ((flags & GR_FDB_F_EXTERN) && !(e->flags & GR_FDB_F_EXTERN)) + return false; if (bridge_id != GR_IFACE_ID_UNDEF && e->bridge_id != bridge_id) return false; if (iface_id != GR_IFACE_ID_UNDEF && e->iface_id != iface_id) diff --git a/modules/l2/control/flood.c b/modules/l2/control/flood.c new file mode 100644 index 000000000..9d04ac7bb --- /dev/null +++ b/modules/l2/control/flood.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include + +#include + +static const struct flood_type_ops *flood_types[UINT_NUM_VALUES(gr_flood_type_t)]; + +static bool flood_type_valid(gr_flood_type_t type) { + switch (type) { + case GR_FLOOD_T_VTEP: + return true; + } + return false; +} + +void flood_type_register(const struct flood_type_ops *ops) { + if (!flood_type_valid(ops->type)) + ABORT("invalid flood type %u", ops->type); + if (flood_types[ops->type] != NULL) + ABORT("flood type %u already registered", ops->type); + flood_types[ops->type] = ops; +} + +static struct api_out flood_add(const void *request, struct api_ctx *) { + const struct gr_flood_add_req *req = request; + const struct flood_type_ops *ops; + int ret; + + ops = flood_types[req->entry.type]; + if (ops == NULL || ops->add == NULL) + return api_out(EAFNOSUPPORT, 0, NULL); + + ret = ops->add(&req->entry, req->exist_ok); + + return api_out(-ret, 0, NULL); +} + +static struct gr_api_handler flood_add_handler = { + .name = "flood add", + .request_type = GR_FLOOD_ADD, + .callback = flood_add, +}; + +static struct api_out flood_del(const void *request, struct api_ctx *) { + const struct gr_flood_del_req *req = request; + const struct flood_type_ops *ops; + int ret; + + ops = flood_types[req->entry.type]; + if (ops == NULL || ops->del == NULL) + return api_out(EAFNOSUPPORT, 0, NULL); + + ret = ops->del(&req->entry, req->missing_ok); + + return api_out(-ret, 0, NULL); +} + +static struct gr_api_handler flood_del_handler = { + .name = "flood del", + .request_type = GR_FLOOD_DEL, + .callback = flood_del, +}; + +static struct api_out flood_list(const void *request, struct api_ctx *ctx) { + const struct gr_flood_list_req *req = request; + const struct flood_type_ops *ops; + + for (unsigned t = 0; t < ARRAY_DIM(flood_types); t++) { + if (req->type != 0 && req->type != t) + continue; + ops = flood_types[t]; + if (ops == NULL || ops->list == NULL) + continue; + if (ops->list(req->vrf_id, ctx) < 0) + return api_out(errno, 0, NULL); + } + + return api_out(0, 0, NULL); +} + +static struct gr_api_handler flood_list_handler = { + .name = "flood list", + .request_type = GR_FLOOD_LIST, + .callback = flood_list, +}; + +static struct gr_event_serializer serializer = { + .size = sizeof(struct gr_flood_entry), + .ev_count = 2, + .ev_types = { + GR_EVENT_FLOOD_ADD, + GR_EVENT_FLOOD_DEL, + }, +}; + +RTE_INIT(flood_init) { + gr_register_api_handler(&flood_add_handler); + gr_register_api_handler(&flood_del_handler); + gr_register_api_handler(&flood_list_handler); + gr_event_register_serializer(&serializer); +} diff --git a/modules/l2/control/gr_l2_control.h b/modules/l2/control/gr_l2_control.h index 89284ee87..6aefb4eaa 100644 --- a/modules/l2/control/gr_l2_control.h +++ b/modules/l2/control/gr_l2_control.h @@ -5,9 +5,36 @@ #include #include +#include +#include + +#include +#include +#include +#include +#include #include +// Per-core FDB forwarding statistics, indexed by [bridge_slot][lcore_id]. +// Track forwarding decisions that generic per-interface iface_stats and +// drop node software stats cannot distinguish. +struct fdb_stats { + uint64_t hit; // unicast forwarded via FDB lookup + uint64_t miss; // unknown unicast, sent to flood + uint64_t flood; // broadcast/multicast, sent to flood +} __rte_cache_aligned; + +#define L2_MAX_BRIDGES 256 + +extern struct fdb_stats l2_fdb_stats[L2_MAX_BRIDGES][RTE_MAX_LCORE]; + +static inline struct fdb_stats *fdb_get_stats(uint16_t bridge_id, unsigned lcore_id) { + if (bridge_id >= L2_MAX_BRIDGES) + return NULL; + return &l2_fdb_stats[bridge_id][lcore_id]; +} + // Internal bridge info structure. GR_IFACE_INFO(GR_IFACE_TYPE_BRIDGE, iface_info_bridge, { BASE(__gr_iface_info_bridge_base); @@ -24,7 +51,8 @@ void fdb_learn( uint16_t bridge_id, uint16_t iface_id, const struct rte_ether_addr *, - uint16_t vlan_id + uint16_t vlan_id, + ip4_addr_t vtep ); // Delete all FDB entries referencing the provided interface. @@ -32,3 +60,48 @@ void fdb_purge_iface(uint16_t iface_id); // Delete all FDB entries referencing the provided bridge. void fdb_purge_bridge(uint16_t bridge_id); + +struct vxlan_template { + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + struct rte_vxlan_hdr vxlan; +}; + +GR_IFACE_INFO(GR_IFACE_TYPE_VXLAN, iface_info_vxlan, { + BASE(gr_iface_info_vxlan); + + struct vxlan_template template; + + uint16_t n_flood_vteps; + ip4_addr_t *flood_vteps; +}); + +struct iface *vxlan_get_iface(rte_be32_t vni, uint16_t encap_vrf_id); + +// Flood list type callbacks, registered per gr_flood_t. +struct flood_type_ops { + gr_flood_type_t type; + int (*add)(const struct gr_flood_entry *, bool exist_ok); + int (*del)(const struct gr_flood_entry *, bool missing_ok); + int (*list)(uint16_t vrf_id, struct api_ctx *); +}; + +void flood_type_register(const struct flood_type_ops *); + +#define VXLAN_FLAGS_VNI RTE_BE32(GR_BIT32(27)) + +static inline rte_be32_t vxlan_decode_vni(rte_be32_t vx_vni) { +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + return (rte_be32_t)((uint32_t)vx_vni >> 8); +#else + return (rte_be32_t)((uint32_t)(vx_vni & RTE_BE32(0xffffff00)) << 8); +#endif +} + +static inline rte_be32_t vxlan_encode_vni(uint32_t vni) { +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + return (rte_be32_t)((uint32_t)vni << 8); +#else + return (rte_be32_t)((uint32_t)rte_cpu_to_be_32(vni) >> 8); +#endif +} diff --git a/modules/l2/control/l2_stats.c b/modules/l2/control/l2_stats.c new file mode 100644 index 000000000..cef93c172 --- /dev/null +++ b/modules/l2/control/l2_stats.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Fabien Dupont + +#include +#include +#include +#include +#include + +#include + +#include +#include + +struct fdb_stats l2_fdb_stats[L2_MAX_BRIDGES][RTE_MAX_LCORE]; + +static struct api_out fdb_stats_get(const void *request, struct api_ctx *) { + const struct gr_l2_fdb_stats_get_req *req = request; + struct gr_l2_fdb_stats *resp; + + const struct iface *iface = iface_from_id(req->bridge_id); + if (iface == NULL || iface->type != GR_IFACE_TYPE_BRIDGE) + return api_out(ENOENT, 0, NULL); + + resp = calloc(1, sizeof(*resp)); + if (resp == NULL) + return api_out(ENOMEM, 0, NULL); + + resp->bridge_id = req->bridge_id; + + for (unsigned lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + struct fdb_stats *fs = fdb_get_stats(req->bridge_id, lcore_id); + if (fs == NULL) + continue; + resp->hit += fs->hit; + resp->miss += fs->miss; + resp->flood += fs->flood; + } + + return api_out(0, sizeof(*resp), resp); +} + +static struct api_out fdb_stats_reset(const void *request, struct api_ctx *) { + const struct gr_l2_fdb_stats_reset_req *req = request; + + const struct iface *iface = iface_from_id(req->bridge_id); + if (iface == NULL || iface->type != GR_IFACE_TYPE_BRIDGE) + return api_out(ENOENT, 0, NULL); + + if (req->bridge_id < L2_MAX_BRIDGES) + memset(l2_fdb_stats[req->bridge_id], 0, sizeof(l2_fdb_stats[0])); + + return api_out(0, 0, NULL); +} + +static struct gr_api_handler fdb_stats_get_handler = { + .name = "fdb stats get", + .request_type = GR_L2_FDB_STATS_GET, + .callback = fdb_stats_get, +}; + +static struct gr_api_handler fdb_stats_reset_handler = { + .name = "fdb stats reset", + .request_type = GR_L2_FDB_STATS_RESET, + .callback = fdb_stats_reset, +}; + +RTE_INIT(fdb_stats_constructor) { + gr_register_api_handler(&fdb_stats_get_handler); + gr_register_api_handler(&fdb_stats_reset_handler); +} diff --git a/modules/l2/control/meson.build b/modules/l2/control/meson.build index e98d2892b..a6801dab7 100644 --- a/modules/l2/control/meson.build +++ b/modules/l2/control/meson.build @@ -4,6 +4,9 @@ src += files( 'bridge.c', 'fdb.c', + 'flood.c', + 'l2_stats.c', + 'vxlan.c', ) inc += include_directories('.') diff --git a/modules/l2/control/vxlan.c b/modules/l2/control/vxlan.c new file mode 100644 index 000000000..40b8f42ab --- /dev/null +++ b/modules/l2/control/vxlan.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +struct vxlan_key { + rte_be32_t vni; + // Use uint32_t to avoid padding issues. See ipip_key in ipip/control.c. + uint32_t vrf_id; +}; + +static struct rte_hash *vxlan_hash; + +struct iface *vxlan_get_iface(rte_be32_t vni, uint16_t encap_vrf_id) { + const struct vxlan_key key = {vni, encap_vrf_id}; + void *data; + + if (rte_hash_lookup_data(vxlan_hash, &key, &data) < 0) + return NULL; + + return data; +} + +static int iface_vxlan_reconfig( + struct iface *iface, + uint64_t set_attrs, + const struct gr_iface *, + const void *api_info +) { + struct iface_info_vxlan *cur = iface_info_vxlan(iface); + const struct vxlan_key cur_key = {rte_cpu_to_be_32(cur->vni), cur->encap_vrf_id}; + const struct gr_iface_info_vxlan *next = api_info; + int ret; + + if (set_attrs & GR_VXLAN_SET_ENCAP_VRF) { + uint16_t vrf = next->encap_vrf_id; + uint16_t old = cur->encap_vrf_id; + + if (vrf == GR_VRF_ID_UNDEF) + vrf = vrf_default_get_or_create(); + + if (vrf != old && vrf_incref(vrf) < 0) + return -errno; + + if (old != GR_VRF_ID_UNDEF) + vrf_decref(old); + + cur->encap_vrf_id = vrf; + } + + if (set_attrs & (GR_VXLAN_SET_VNI | GR_VXLAN_SET_ENCAP_VRF)) { + const struct vxlan_key next_key = {rte_cpu_to_be_32(next->vni), cur->encap_vrf_id}; + + if (rte_hash_lookup(vxlan_hash, &next_key) >= 0) + return errno_set(EADDRINUSE); + + if (next->vni == 0 || next->vni > 0xffffff) + return errno_set(ERANGE); + + rte_hash_del_key(vxlan_hash, &cur_key); + + ret = rte_hash_add_key_data(vxlan_hash, &next_key, iface); + if (ret < 0) + return errno_log(-ret, "rte_hash_add_key_data"); + + cur->vni = next->vni; + } + + if (set_attrs & GR_VXLAN_SET_DST_PORT) { + uint16_t port = next->dst_port ?: RTE_VXLAN_DEFAULT_PORT; + if (cur->dst_port != 0 && cur->dst_port != RTE_VXLAN_DEFAULT_PORT + && port != cur->dst_port) { + l4_input_unalias_port(IPPROTO_UDP, rte_cpu_to_be_16(cur->dst_port)); + } + if (port != RTE_VXLAN_DEFAULT_PORT && port != cur->dst_port) { + l4_input_alias_port( + IPPROTO_UDP, + RTE_BE16(RTE_VXLAN_DEFAULT_PORT), + rte_cpu_to_be_16(port) + ); + } + cur->dst_port = port; + } + + if (set_attrs & (GR_VXLAN_SET_LOCAL | GR_VXLAN_SET_ENCAP_VRF)) { + ip4_addr_t local = (set_attrs & GR_VXLAN_SET_LOCAL) ? next->local : cur->local; + const struct nexthop *nh = rib4_lookup(cur->encap_vrf_id, local); + if (nh == NULL) + return -errno; + if (nh->type != GR_NH_T_L3) + return errno_set(EPROTOTYPE); + + const struct nexthop_info_l3 *l3 = nexthop_info_l3(nh); + if (!(l3->flags & GR_NH_F_LOCAL)) + return errno_set(EPROTOTYPE); + + cur->local = local; + } + + if (set_attrs & GR_VXLAN_SET_MAC) { + if (iface_set_eth_addr(iface, &next->mac) < 0) + return -errno; + } + + // Update the datapath template from the current config. + cur->template.ip.version_ihl = IPV4_VERSION_IHL; + cur->template.ip.time_to_live = IPV4_DEFAULT_TTL; + cur->template.ip.next_proto_id = IPPROTO_UDP; + cur->template.ip.src_addr = cur->local; + cur->template.udp.dst_port = rte_cpu_to_be_16(cur->dst_port); + cur->template.vxlan.vx_flags = VXLAN_FLAGS_VNI; + cur->template.vxlan.vx_vni = vxlan_encode_vni(cur->vni); + + return 0; +} + +static int iface_vxlan_fini(struct iface *iface) { + struct iface_info_vxlan *vxlan = iface_info_vxlan(iface); + struct gr_flood_entry entry = { + .type = GR_FLOOD_T_VTEP, + .vrf_id = vxlan->encap_vrf_id, + .vtep.vni = vxlan->vni, + }; + + for (uint16_t i = 0; i < vxlan->n_flood_vteps; i++) { + entry.vtep.addr = vxlan->flood_vteps[i]; + gr_event_push(GR_EVENT_FLOOD_DEL, &entry); + } + + if (vxlan->encap_vrf_id != GR_VRF_ID_UNDEF) + vrf_decref(vxlan->encap_vrf_id); + + if (vxlan->dst_port != RTE_VXLAN_DEFAULT_PORT) + l4_input_unalias_port(IPPROTO_UDP, rte_cpu_to_be_16(vxlan->dst_port)); + + rte_free(vxlan->flood_vteps); + + return 0; +} + +static int iface_vxlan_init(struct iface *iface, const void *api_info) { + struct gr_iface conf; + int ret; + + iface->speed = RTE_ETH_SPEED_NUM_10G; + if (iface->mtu == 0) + iface->mtu = 1450; + + conf.base = iface->base; + + ret = iface_vxlan_reconfig(iface, IFACE_SET_ALL, &conf, api_info); + if (ret < 0) { + iface_vxlan_fini(iface); + errno = -ret; + } + + return ret; +} + +static int iface_vxlan_get_eth_addr(const struct iface *iface, struct rte_ether_addr *mac) { + const struct iface_info_vxlan *vxlan = iface_info_vxlan(iface); + + *mac = vxlan->mac; + + return 0; +} + +static int iface_vxlan_set_eth_addr(struct iface *iface, const struct rte_ether_addr *mac) { + struct iface_info_vxlan *vxlan = iface_info_vxlan(iface); + + if (rte_is_zero_ether_addr(mac)) + rte_eth_random_addr(vxlan->mac.addr_bytes); + else + vxlan->mac = *mac; + + return 0; +} + +static void vxlan_to_api(void *info, const struct iface *iface) { + const struct iface_info_vxlan *vxlan = iface_info_vxlan(iface); + struct gr_iface_info_vxlan *api = info; + *api = vxlan->base; +} + +static const struct iface_type iface_type_vxlan = { + .id = GR_IFACE_TYPE_VXLAN, + .pub_size = sizeof(struct gr_iface_info_vxlan), + .priv_size = sizeof(struct iface_info_vxlan), + .init = iface_vxlan_init, + .reconfig = iface_vxlan_reconfig, + .fini = iface_vxlan_fini, + .get_eth_addr = iface_vxlan_get_eth_addr, + .set_eth_addr = iface_vxlan_set_eth_addr, + .to_api = vxlan_to_api, +}; + +static void vxlan_pre_remove_cb(uint32_t /*ev_type*/, const void *obj) { + const struct iface_info_vxlan *vxlan; + const struct iface *iface = obj; + + if (iface->type != GR_IFACE_TYPE_VXLAN) + return; + + vxlan = iface_info_vxlan(iface); + struct vxlan_key key = {rte_cpu_to_be_32(vxlan->vni), vxlan->encap_vrf_id}; + rte_hash_del_key(vxlan_hash, &key); +} + +static struct gr_event_subscription vxlan_subscription = { + .callback = vxlan_pre_remove_cb, + .ev_count = 1, + .ev_types = {GR_EVENT_IFACE_PRE_REMOVE}, +}; + +static int vtep_flood_add(const struct gr_flood_entry *entry, bool exist_ok) { + struct iface_info_vxlan *vxlan; + ip4_addr_t *vteps, *old_vteps; + struct iface *iface; + + iface = vxlan_get_iface(rte_cpu_to_be_32(entry->vtep.vni), entry->vrf_id); + if (iface == NULL) + return errno_set(ENODEV); + + vxlan = iface_info_vxlan(iface); + + for (uint16_t i = 0; i < vxlan->n_flood_vteps; i++) { + if (vxlan->flood_vteps[i] == entry->vtep.addr) { + if (exist_ok) + return 0; + return errno_set(EEXIST); + } + } + + vteps = rte_calloc(__func__, vxlan->n_flood_vteps + 1, sizeof(*vteps), 0); + if (vteps == NULL) + return errno_set(ENOMEM); + + memcpy(vteps, vxlan->flood_vteps, vxlan->n_flood_vteps * sizeof(*vteps)); + vteps[vxlan->n_flood_vteps] = entry->vtep.addr; + old_vteps = vxlan->flood_vteps; + vxlan->flood_vteps = vteps; + // ensure n_flood_vteps is incremented *after* flood_vteps is updated + atomic_thread_fence(memory_order_release); + vxlan->n_flood_vteps++; + + rte_rcu_qsbr_synchronize(gr_datapath_rcu(), rte_lcore_id()); + rte_free(old_vteps); + + gr_event_push(GR_EVENT_FLOOD_ADD, entry); + + return 0; +} + +static int vtep_flood_del(const struct gr_flood_entry *entry, bool missing_ok) { + struct iface_info_vxlan *vxlan; + struct iface *iface; + + iface = vxlan_get_iface(rte_cpu_to_be_32(entry->vtep.vni), entry->vrf_id); + if (iface == NULL) { + if (missing_ok) + return 0; + return errno_set(ENOENT); + } + + vxlan = iface_info_vxlan(iface); + + for (uint16_t i = 0; i < vxlan->n_flood_vteps; i++) { + if (vxlan->flood_vteps[i] == entry->vtep.addr) { + vxlan->flood_vteps[i] = vxlan->flood_vteps[vxlan->n_flood_vteps - 1]; + vxlan->n_flood_vteps--; + gr_event_push(GR_EVENT_FLOOD_DEL, entry); + return 0; + } + } + + if (missing_ok) + return 0; + + return errno_set(ENOENT); +} + +static int vtep_flood_list(uint16_t vrf_id, struct api_ctx *ctx) { + struct gr_flood_entry entry = {.type = GR_FLOOD_T_VTEP}; + const struct iface_info_vxlan *vxlan; + uint32_t next = 0; + const void *key; + void *data; + + while (rte_hash_iterate(vxlan_hash, &key, &data, &next) >= 0) { + struct iface *iface = data; + vxlan = iface_info_vxlan(iface); + + if (vrf_id != GR_VRF_ID_UNDEF && vxlan->encap_vrf_id != vrf_id) + continue; + + for (uint16_t i = 0; i < vxlan->n_flood_vteps; i++) { + entry.vrf_id = vxlan->encap_vrf_id; + entry.vtep.vni = vxlan->vni; + entry.vtep.addr = vxlan->flood_vteps[i]; + api_send(ctx, sizeof(entry), &entry); + } + } + + return 0; +} + +static const struct flood_type_ops vtep_flood_ops = { + .type = GR_FLOOD_T_VTEP, + .add = vtep_flood_add, + .del = vtep_flood_del, + .list = vtep_flood_list, +}; + +static void vxlan_init(struct event_base *) { + struct rte_hash_parameters params = { + .name = "vxlan", + .entries = GR_MAX_IFACES, + .key_len = sizeof(struct vxlan_key), + .socket_id = SOCKET_ID_ANY, + .extra_flag = RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF + | RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT, + }; + vxlan_hash = rte_hash_create(¶ms); + if (vxlan_hash == NULL) + ABORT("rte_hash_create(vxlan)"); + + struct rte_hash_rcu_config rcu_config = { + .v = gr_datapath_rcu(), .mode = RTE_HASH_QSBR_MODE_SYNC + }; + rte_hash_rcu_qsbr_add(vxlan_hash, &rcu_config); +} + +static void vxlan_fini(struct event_base *) { + rte_hash_free(vxlan_hash); + vxlan_hash = NULL; +} + +static struct gr_module vxlan_module = { + .name = "vxlan", + .depends_on = "rcu", + .init = vxlan_init, + .fini = vxlan_fini, +}; + +RTE_INIT(vxlan_constructor) { + gr_register_module(&vxlan_module); + iface_type_register(&iface_type_vxlan); + gr_event_subscribe(&vxlan_subscription); + flood_type_register(&vtep_flood_ops); +} diff --git a/modules/l2/datapath/bridge_flood.c b/modules/l2/datapath/bridge_flood.c index 50ae8370b..01762b498 100644 --- a/modules/l2/datapath/bridge_flood.c +++ b/modules/l2/datapath/bridge_flood.c @@ -18,6 +18,7 @@ enum edges { OUTPUT = 0, INPUT, + VXLAN_FLOOD, DROP, EDGE_COUNT }; @@ -83,7 +84,11 @@ static uint16_t bridge_flood_process( if (clone == NULL) continue; - rte_node_enqueue_x1(graph, node, OUTPUT, clone); + if (member->type == GR_IFACE_TYPE_VXLAN) + rte_node_enqueue_x1(graph, node, VXLAN_FLOOD, clone); + else + rte_node_enqueue_x1(graph, node, OUTPUT, clone); + flood_count++; } if (iface != br && (br->flags & GR_IFACE_F_UP)) { @@ -112,6 +117,7 @@ static struct rte_node_register node = { .next_nodes = { [OUTPUT] = "iface_output", [INPUT] = "iface_input", + [VXLAN_FLOOD] = "vxlan_flood", [DROP] = "bridge_flood_drop", }, }; diff --git a/modules/l2/datapath/bridge_input.c b/modules/l2/datapath/bridge_input.c index a54f7116c..af90731be 100644 --- a/modules/l2/datapath/bridge_input.c +++ b/modules/l2/datapath/bridge_input.c @@ -10,6 +10,7 @@ #include #include +#include enum edges { OUTPUT = 0, @@ -38,8 +39,11 @@ static uint16_t bridge_input_process( const struct gr_fdb_entry *fdb; struct iface_mbuf_data *d; struct rte_ether_hdr *eth; + struct fdb_stats *stats; struct rte_mbuf *m; + ip4_addr_t vtep; rte_edge_t edge; + unsigned lcore_id = rte_lcore_id(); for (uint16_t i = 0; i < nb_objs; i++) { m = objs[i]; @@ -59,15 +63,20 @@ static uint16_t bridge_input_process( goto next; } br = iface_info_bridge(bridge); + stats = fdb_get_stats(bridge->id, lcore_id); if (rte_is_unicast_ether_addr(ð->src_addr) - && !(br->flags & GR_BRIDGE_F_NO_LEARN)) - fdb_learn(bridge->id, d->iface->id, ð->src_addr, d->vlan_id); + && !(br->flags & GR_BRIDGE_F_NO_LEARN)) { + vtep = (d->iface->type == GR_IFACE_TYPE_VXLAN) ? d->vtep : 0; + fdb_learn(bridge->id, d->iface->id, ð->src_addr, d->vlan_id, vtep); + } if (rte_is_unicast_ether_addr(ð->dst_addr)) { fdb = fdb_lookup(bridge->id, ð->dst_addr, d->vlan_id); if (fdb == NULL) { // Unknown unicast + if (stats) + stats->miss++; edge = FLOOD; goto next; } @@ -82,7 +91,11 @@ static uint16_t bridge_input_process( goto next; } // Direct output to learned interface + if (stats) + stats->hit++; d->iface = iface; + d->vtep = fdb->vtep; + if (iface->type == GR_IFACE_TYPE_BRIDGE) { edge = INPUT; } else { @@ -90,6 +103,8 @@ static uint16_t bridge_input_process( } } else { // Broadcast, multicast + if (stats) + stats->flood++; edge = FLOOD; } next: diff --git a/modules/l2/datapath/l2_datapath.h b/modules/l2/datapath/l2_datapath.h new file mode 100644 index 000000000..7c9ce2150 --- /dev/null +++ b/modules/l2/datapath/l2_datapath.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#pragma once + +#include + +#include + +struct trace_vxlan_data { + rte_be32_t vni; + ip4_addr_t vtep; +}; + +int trace_vxlan_format(char *buf, size_t len, const void *data, size_t data_len); diff --git a/modules/l2/datapath/meson.build b/modules/l2/datapath/meson.build index d61132060..b6dc45fc5 100644 --- a/modules/l2/datapath/meson.build +++ b/modules/l2/datapath/meson.build @@ -4,4 +4,7 @@ src += files( 'bridge_flood.c', 'bridge_input.c', + 'vxlan_flood.c', + 'vxlan_input.c', + 'vxlan_output.c', ) diff --git a/modules/l2/datapath/vxlan_flood.c b/modules/l2/datapath/vxlan_flood.c new file mode 100644 index 000000000..68d91c338 --- /dev/null +++ b/modules/l2/datapath/vxlan_flood.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include + +enum edges { + OUTPUT = 0, + DROP, + EDGE_COUNT +}; + +static uint16_t +vxlan_flood_process(struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs) { + const struct iface_info_vxlan *vxlan; + struct rte_mbuf *m, *clone; + uint16_t flood_count; + uint16_t sent = 0; + + for (uint16_t i = 0; i < nb_objs; i++) { + m = objs[i]; + flood_count = 0; + + if (gr_mbuf_is_traced(m)) + gr_mbuf_trace_add(m, node, 0); + + vxlan = iface_info_vxlan(mbuf_data(m)->iface); + + for (uint16_t j = 0; j < vxlan->n_flood_vteps; j++) { + if (flood_count == 0) { + clone = m; + } else { + clone = gr_mbuf_copy(m, UINT32_MAX, sizeof(struct mbuf_data)); + if (clone == NULL) + continue; + } + + iface_mbuf_data(clone)->vtep = vxlan->flood_vteps[j]; + + rte_node_enqueue_x1(graph, node, OUTPUT, clone); + + flood_count++; + } + + if (flood_count == 0) + rte_node_enqueue_x1(graph, node, DROP, m); + sent += flood_count; + } + + return sent; +} + +static struct rte_node_register node = { + .name = "vxlan_flood", + .process = vxlan_flood_process, + .nb_edges = EDGE_COUNT, + .next_nodes = { + [OUTPUT] = "iface_output", + [DROP] = "vxlan_flood_drop", + }, +}; + +static struct gr_node_info info = { + .node = &node, + .type = GR_NODE_T_L2, +}; + +GR_NODE_REGISTER(info); + +GR_DROP_REGISTER(vxlan_flood_drop); diff --git a/modules/l2/datapath/vxlan_input.c b/modules/l2/datapath/vxlan_input.c new file mode 100644 index 000000000..7edc8d949 --- /dev/null +++ b/modules/l2/datapath/vxlan_input.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include "l2_datapath.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +enum { + IFACE_INPUT = 0, + NO_TUNNEL, + BAD_FLAGS, + EDGE_COUNT, +}; + +int trace_vxlan_format(char *buf, size_t len, const void *data, size_t /*data_len*/) { + const struct trace_vxlan_data *t = data; + int n = snprintf(buf, len, "vni=%u", rte_be_to_cpu_32(t->vni)); + if (t->vtep != 0) + n += snprintf(buf + n, len - n, " vtep=" IP4_F, &t->vtep); + return n; +} + +static uint16_t +vxlan_input_process(struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs) { + uint16_t last_vrf_id, vrf_id; + struct ip_local_mbuf_data *l; + struct iface_mbuf_data *d; + struct rte_vxlan_hdr *vh; + rte_be32_t vni, last_vni; + ip4_addr_t src_vtep; + struct iface *iface; + struct rte_mbuf *m; + rte_edge_t edge; + + last_vrf_id = GR_VRF_ID_UNDEF; + last_vni = 0; + iface = NULL; + vni = 0; + + for (uint16_t i = 0; i < nb_objs; i++) { + m = objs[i]; + l = ip_local_mbuf_data(m); + vrf_id = l->vrf_id; + src_vtep = l->src; + + vh = rte_pktmbuf_mtod_offset(m, struct rte_vxlan_hdr *, sizeof(struct rte_udp_hdr)); + if (!(vh->vx_flags & VXLAN_FLAGS_VNI)) { + edge = BAD_FLAGS; + goto next; + } + + vni = vxlan_decode_vni(vh->vx_vni); + if (vni != last_vni || vrf_id != last_vrf_id) { + iface = vxlan_get_iface(vni, vrf_id); + last_vrf_id = vrf_id; + last_vni = vni; + } + if (iface == NULL) { + edge = NO_TUNNEL; + goto next; + } + + rte_pktmbuf_adj(m, sizeof(struct rte_udp_hdr) + sizeof(*vh)); + + d = iface_mbuf_data(m); + d->iface = iface; + d->vlan_id = 0; + d->vtep = src_vtep; + edge = IFACE_INPUT; +next: + if (gr_mbuf_is_traced(m) || (iface && iface->flags & GR_IFACE_F_PACKET_TRACE)) { + struct trace_vxlan_data *t = gr_mbuf_trace_add(m, node, sizeof(*t)); + t->vni = vni; + t->vtep = src_vtep; + } + rte_node_enqueue_x1(graph, node, edge, m); + } + + return nb_objs; +} + +static void vxlan_input_register(void) { + l4_input_register_port(IPPROTO_UDP, RTE_BE16(RTE_VXLAN_DEFAULT_PORT), "vxlan_input"); +} + +static struct rte_node_register vxlan_input_node = { + .name = "vxlan_input", + + .process = vxlan_input_process, + + .nb_edges = EDGE_COUNT, + .next_nodes = { + [IFACE_INPUT] = "iface_input", + [NO_TUNNEL] = "vxlan_input_no_tunnel", + [BAD_FLAGS] = "vxlan_input_bad_flags", + }, +}; + +static struct gr_node_info vxlan_input_info = { + .node = &vxlan_input_node, + .type = GR_NODE_T_L3, + .register_callback = vxlan_input_register, + .trace_format = trace_vxlan_format, +}; + +GR_NODE_REGISTER(vxlan_input_info); + +GR_DROP_REGISTER(vxlan_input_no_tunnel); +GR_DROP_REGISTER(vxlan_input_bad_flags); diff --git a/modules/l2/datapath/vxlan_output.c b/modules/l2/datapath/vxlan_output.c new file mode 100644 index 000000000..3eb706b00 --- /dev/null +++ b/modules/l2/datapath/vxlan_output.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include "l2_datapath.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +enum { + IP_OUTPUT = 0, + NO_ROUTE, + NO_HEADROOM, + EDGE_COUNT, +}; + +#define EPHEMERAL_PORT_START 49152 +#define EPHEMERAL_PORT_MASK (UINT16_MAX - EPHEMERAL_PORT_START) + +static inline rte_be16_t vxlan_src_port(uint32_t hash) { + // RFC 7348 Section 5, recommends using source port hashing to enable + // ECMP load balancing in the underlay network. + return rte_cpu_to_be_16(EPHEMERAL_PORT_START + (hash & EPHEMERAL_PORT_MASK)); +} + +static uint16_t vxlan_output_process( + struct rte_graph *graph, + struct rte_node *node, + void **objs, + uint16_t nb_objs +) { + const struct iface_info_vxlan *vxlan; + struct iface_mbuf_data *d; + struct vxlan_template *vh; + const struct nexthop *nh; + struct rte_mbuf *m; + rte_edge_t edge; + uint16_t len; + + for (uint16_t i = 0; i < nb_objs; i++) { + m = objs[i]; + d = iface_mbuf_data(m); + vxlan = iface_info_vxlan(d->iface); + + if (gr_mbuf_is_traced(m)) { + struct trace_vxlan_data *t = gr_mbuf_trace_add(m, node, sizeof(*t)); + t->vni = rte_cpu_to_be_32(vxlan->vni); + t->vtep = d->vtep; + } + + nh = fib4_lookup(vxlan->encap_vrf_id, d->vtep); + if (nh == NULL) { + edge = NO_ROUTE; + goto next; + } + + len = rte_pktmbuf_pkt_len(m); + + vh = gr_mbuf_prepend(m, vh); + if (unlikely(vh == NULL)) { + edge = NO_HEADROOM; + goto next; + } + + *vh = vxlan->template; + vh->udp.src_port = vxlan_src_port(m->hash.rss); + vh->udp.dgram_len = rte_cpu_to_be_16(len + sizeof(vh->udp) + sizeof(vh->vxlan)); + vh->ip.dst_addr = d->vtep; + vh->ip.total_length = rte_cpu_to_be_16(len + sizeof(*vh)); + vh->ip.hdr_checksum = rte_ipv4_cksum(&vh->ip); + + ip_output_mbuf_data(m)->nh = nh; + + edge = IP_OUTPUT; +next: + rte_node_enqueue_x1(graph, node, edge, m); + } + + return nb_objs; +} + +static void vxlan_output_register(void) { + iface_output_type_register(GR_IFACE_TYPE_VXLAN, "vxlan_output"); +} + +static struct rte_node_register vxlan_output_node = { + .name = "vxlan_output", + + .process = vxlan_output_process, + + .nb_edges = EDGE_COUNT, + .next_nodes = { + [IP_OUTPUT] = "ip_output", + [NO_ROUTE] = "vxlan_output_no_route", + [NO_HEADROOM] = "error_no_headroom", + }, +}; + +static struct gr_node_info vxlan_output_info = { + .node = &vxlan_output_node, + .type = GR_NODE_T_L3, + .register_callback = vxlan_output_register, + .trace_format = trace_vxlan_format, +}; + +GR_NODE_REGISTER(vxlan_output_info); + +GR_DROP_REGISTER(vxlan_output_no_route); diff --git a/modules/l4/gr_l4.h b/modules/l4/gr_l4.h index ff875adb9..99254bfaa 100644 --- a/modules/l4/gr_l4.h +++ b/modules/l4/gr_l4.h @@ -8,3 +8,7 @@ #include void l4_input_register_port(uint8_t proto, rte_be16_t port, const char *next_node); + +int l4_input_alias_port(uint8_t proto, rte_be16_t port, rte_be16_t alias); + +int l4_input_unalias_port(uint8_t proto, rte_be16_t alias); diff --git a/modules/l4/l4_input_local.c b/modules/l4/l4_input_local.c index e8fe50a3d..777e862e2 100644 --- a/modules/l4/l4_input_local.c +++ b/modules/l4/l4_input_local.c @@ -1,12 +1,11 @@ // SPDX-License-Identifier: BSD-3-Clause // Copyright (c) 2024 Christophe Fontaine -#include "gr_l4.h" - #include #include #include #include +#include #include #include #include @@ -20,6 +19,7 @@ enum edges { EDGE_COUNT, }; +static unsigned udp_refcounts[UINT_NUM_VALUES(rte_be16_t)] = {0}; static rte_edge_t udp_edges[UINT_NUM_VALUES(rte_be16_t)] = {MANAGEMENT}; void l4_input_register_port(uint8_t proto, rte_be16_t port, const char *next_node) { @@ -30,12 +30,40 @@ void l4_input_register_port(uint8_t proto, rte_be16_t port, const char *next_nod if (udp_edges[port] != MANAGEMENT) ABORT("next node already registered for udp port=%hu", p); udp_edges[port] = gr_node_attach_parent("l4_input_local", next_node); + udp_refcounts[port]++; break; default: ABORT("proto not supported %hhu", proto); } } +int l4_input_alias_port(uint8_t proto, rte_be16_t port, rte_be16_t alias) { + assert(proto == IPPROTO_UDP); + + if (udp_edges[port] == MANAGEMENT) + return errno_set(EADDRNOTAVAIL); + if (udp_edges[alias] != MANAGEMENT && udp_edges[alias] != udp_edges[port]) + return errno_set(EADDRINUSE); + + udp_edges[alias] = udp_edges[port]; + udp_refcounts[alias]++; + + return 0; +} + +int l4_input_unalias_port(uint8_t proto, rte_be16_t alias) { + assert(proto == IPPROTO_UDP); + + if (udp_edges[alias] == MANAGEMENT || udp_refcounts[alias] == 0) + return errno_set(EADDRNOTAVAIL); + + udp_refcounts[alias]--; + if (udp_refcounts[alias] == 0) + udp_edges[alias] = MANAGEMENT; + + return 0; +} + static uint16_t l4_input_local_process( struct rte_graph *graph, struct rte_node *node, diff --git a/smoke/bridge_test.sh b/smoke/bridge_test.sh index 31089fdab..6be375d09 100755 --- a/smoke/bridge_test.sh +++ b/smoke/bridge_test.sh @@ -48,6 +48,11 @@ if grcli fdb show iface p1 | grep .; then fail "fdb still contains entries for removed interface" fi +# verify FDB stats show forwarding counters +grcli stats fdb show br0 | grep -q 'fdb_hit:' || fail "stats missing fdb_hit field" +grcli stats fdb reset br0 +grcli stats fdb show br0 | grep -q 'fdb_hit: 0' || fail "stats not reset" + grcli interface del br0 if grcli fdb show | grep .; then fail "fdb still contains entries" diff --git a/smoke/evpn_vxlan_frr_test.sh b/smoke/evpn_vxlan_frr_test.sh new file mode 100755 index 000000000..d94433249 --- /dev/null +++ b/smoke/evpn_vxlan_frr_test.sh @@ -0,0 +1,189 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +# This test verifies EVPN/VXLAN type-2 (MAC/IP) and type-3 (flood VTEP) route +# exchange between FRR+Grout and a standalone FRR+Linux peer. Each side has +# a bridge with a VXLAN member (VNI 100) and a host connected to a local port. +# BGP EVPN advertises locally learned MACs and flood VTEPs to the remote peer. +# +# Success criteria: +# - Both sides exchange EVPN type-3 routes (flood VTEPs installed). +# - Host-A and Host-B can ping each other through the VXLAN overlay. +# - Both sides learn the remote MAC via EVPN type-2 routes. +# +# - - - - - - - - - - - - - - - - - - - - - - - - - - +# | evpn-peer | | grout | +# +# | +----------+ | | +----------+ | +# | vxlan100 | | vxlan100 | +# | +----+-----+ | | +-----+----+ | +# | | +# | +---+---+ | | +---+---+ | +# | br100 | | br100 | +# | +---+---+ | | +---+---+ | +# | .1 .2 | +# | +---+---+ +-------+ | | +------+ +---+---+ | +# | p1 | | x-p0 | | p0 | | p1 | +# | +---+---+ +---+---+ | | +---+--+ +---+---+ | +# - - - |- - - - - - |- - - - - -| - - - - - -| - - - +# | | | | +# - - - |- - - - . | <----- BGP -----> | - - - -| - - - +# | | | | | | | | +# +---+----+ `----------------------' +---+----+ +# | | x-p1 | | underlay | | x-p1 | | +# +--------+ 172.16.0.0/24 +--------+ +# | .2 | | .3 | +# <= = = = = = = = = = = = = => +# | host-a | overlay | host-b | +# - - - - - - - - 10.0.0.0/24 - - - - - - - ' + +. $(dirname $0)/_init_frr.sh + +# right side ------------------------------------------------------------------- +create_interface p0 +set_ip_address p0 172.16.0.2/24 + +grcli interface add bridge br100 +create_interface p1 domain br100 +grcli interface add vxlan vxlan100 vni 100 local 172.16.0.2 domain br100 + +netns_add host-b +move_to_netns x-p1 host-b +ip -n host-b addr add 10.0.0.3/24 dev x-p1 + +# left side -------------------------------------------------------------------- +start_frr evpn-peer 0 + +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.forwarding=1 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.default.rp_filter=0 + +move_to_netns x-p0 evpn-peer +ip -n evpn-peer addr add 172.16.0.1/24 dev x-p0 + +ip -n evpn-peer link add br100 type bridge +ip -n evpn-peer link set br100 up + +ip -n evpn-peer link add vxlan100 type vxlan id 100 local 172.16.0.1 dstport 4789 nolearning +ip -n evpn-peer link set vxlan100 master br100 +ip -n evpn-peer link set vxlan100 up + +# Host-A: veth pair, one end in host-a, other end in evpn-peer bridge +ip -n evpn-peer link add p1 type veth peer name x-p1 +ip -n evpn-peer link set p1 master br100 +ip -n evpn-peer link set p1 up + +netns_add host-a +ip -n evpn-peer link set x-p1 netns host-a +ip -n host-a link set x-p1 up +ip -n host-a addr add 10.0.0.2/24 dev x-p1 + +# BGP EVPN on peer +vtysh -N evpn-peer <<-EOF +configure terminal + +router bgp 65000 + bgp router-id 172.16.0.1 + no bgp default ipv4-unicast + + neighbor 172.16.0.2 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.2 activate + advertise-all-vni + exit-address-family +exit +EOF + +# BGP EVPN on Grout +vtysh <<-EOF +configure terminal + +router bgp 65000 + bgp router-id 172.16.0.2 + no bgp default ipv4-unicast + + neighbor 172.16.0.1 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.1 activate + advertise-all-vni + exit-address-family +exit +EOF + +# -- Wait for EVPN type-3 (flood VTEP) exchange ------------------------------- +attempts=0 +while ! bridge -n evpn-peer fdb show dev vxlan100 | grep -qF 172.16.0.2; do + if [ "$attempts" -ge 10 ]; then + vtysh -N evpn-peer -c "show evpn vni 100" + fail "Linux peer did not learn remote VTEP 172.16.0.2" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +attempts=0 +while ! grcli flood vtep show | grep -qF 172.16.0.1; do + if [ "$attempts" -ge 10 ]; then + grcli flood vtep show + fail "Grout did not learn remote VTEP 172.16.0.1" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +bridge -n evpn-peer fdb show dev vxlan100 +grcli fdb show +grcli flood vtep show + +# -- Verify L2 connectivity through VXLAN overlay ----------------------------- + +# Ping triggers ARP which triggers MAC learning + EVPN type-2 advertisement. +ip netns exec host-a ping -i0.1 -c3 -W1 10.0.0.3 +ip netns exec host-b ping -i0.1 -c3 -W1 10.0.0.2 + +grcli fdb show iface vxlan100 +bridge -n evpn-peer fdb show dev vxlan100 + +# -- Verify EVPN type-2 (MAC/IP) learned on both sides +mac_a=$(ip netns exec host-a cat /sys/class/net/x-p1/address) +attempts=0 +while ! vtysh -c "show bgp l2vpn evpn route type 2" | grep -qF "$mac_a"; do + if [ "$attempts" -ge 10 ]; then + vtysh -c "show bgp l2vpn evpn route type 2" + fail "FRR did not learn type 2 route" + fi + sleep 1 + attempts=$((attempts + 1)) +done +attempts=0 +while ! grcli fdb show iface vxlan100 extern | grep -qF "$mac_a"; do + if [ "$attempts" -ge 10 ]; then + grcli fdb show iface vxlan100 + fail "FRR did not program FDB entry" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +mac_b=$(ip netns exec host-b cat /sys/class/net/x-p1/address) +attempts=0 +while ! vtysh -N evpn-peer -c "show bgp l2vpn evpn route type 2" | grep -qF "$mac_b"; do + if [ "$attempts" -ge 10 ]; then + vtysh -N evpn-peer -c "show bgp l2vpn evpn route type 2" + fail "EVPN peer did not learn type 2 route" + fi + sleep 1 + attempts=$((attempts + 1)) +done +attempts=0 +while ! bridge -n evpn-peer fdb show dev vxlan100 | grep -q "$mac_b.*extern"; do + if [ "$attempts" -ge 10 ]; then + bridge -n evpn-peer fdb show dev vxlan100 + fail "EVPN peer did not program FDB entry in bridge" + fi + sleep 1 + attempts=$((attempts + 1)) +done diff --git a/smoke/vxlan_test.sh b/smoke/vxlan_test.sh new file mode 100755 index 000000000..1af723393 --- /dev/null +++ b/smoke/vxlan_test.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +. $(dirname $0)/_init.sh + +port_add p0 + +grcli address add 10.0.0.1/24 iface p0 +grcli interface add bridge br100 +grcli interface add vxlan vxlan100 vni 100 local 10.0.0.1 domain br100 +grcli flood vtep add 10.0.0.2 vni 100 + +grcli address add 192.168.100.1/24 iface br100 + +netns_add n1 +move_to_netns x-p0 n1 +ip -n n1 addr add 10.0.0.2/24 dev x-p0 +ip -n n1 link add br100 type bridge +ip -n n1 link set br100 up +ip -n n1 link add vxlan100 type vxlan id 100 local 10.0.0.2 dstport 4789 dev x-p0 +ip -n n1 link set vxlan100 master br100 +ip -n n1 link set vxlan100 up +ip -n n1 addr add 192.168.100.2/24 dev br100 +bridge -n n1 fdb add 00:00:00:00:00:00 dev vxlan100 self vni 100 dst 10.0.0.1 + +# Test L3 connectivity over VXLAN tunnel +# The Linux side initiates the ping which will cause grout to learn the MAC +ip netns exec n1 ping -i0.01 -c3 -W1 192.168.100.1 + +grcli fdb show