[pve-devel] [PATCH frr 1/2] add frr zebra buffering patches

Alexandre Derumier aderumier at odiso.com
Wed Apr 27 17:04:55 CEST 2022


https://github.com/FRRouting/frr/pull/10482

This fix bugs for 2 proxmox users, when vlan-aware bridges exists
with a lot of vlans, netlink message are too big.

Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
---
 debian/patches/frr/0001-zebra-buffering.patch |  92 ++++
 debian/patches/frr/0002-zebra-buffering.patch | 420 ++++++++++++++++++
 debian/patches/frr/0003-zebra-buffering.patch | 283 ++++++++++++
 debian/patches/series                         |   3 +
 4 files changed, 798 insertions(+)
 create mode 100644 debian/patches/frr/0001-zebra-buffering.patch
 create mode 100644 debian/patches/frr/0002-zebra-buffering.patch
 create mode 100644 debian/patches/frr/0003-zebra-buffering.patch

diff --git a/debian/patches/frr/0001-zebra-buffering.patch b/debian/patches/frr/0001-zebra-buffering.patch
new file mode 100644
index 0000000..7f45eda
--- /dev/null
+++ b/debian/patches/frr/0001-zebra-buffering.patch
@@ -0,0 +1,92 @@
+From 3670f5047cb00865c15b2f3ebdfcfe634443db61 Mon Sep 17 00:00:00 2001
+From: Donald Sharp <sharpd at nvidia.com>
+Date: Tue, 8 Feb 2022 09:47:24 -0500
+Subject: [PATCH] zebra: Store the sequence number to use as part of the
+ dp_info
+
+Store and use the sequence number instead of using what is in
+the `struct nlsock`.  Future commits are going away from storing
+the `struct nlsock` and the copy of the nlsock was guaranteeing
+unique sequence numbers per message.  So let's store the
+sequence number to use instead.
+
+Signed-off-by: Donald Sharp <sharpd at nvidia.com>
+---
+ zebra/kernel_netlink.c | 14 +++++++-------
+ zebra/zebra_dplane.h   |  3 +++
+ 2 files changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
+index e3b2f9cb665..59911945481 100644
+--- a/zebra/kernel_netlink.c
++++ b/zebra/kernel_netlink.c
+@@ -1038,7 +1038,7 @@ netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
+ 	const struct nlsock *nl;
+ 
+ 	nl = &(dp_info->nls);
+-	n->nlmsg_seq = nl->seq;
++	n->nlmsg_seq = dp_info->seq;
+ 	n->nlmsg_pid = nl->snl.nl_pid;
+ 
+ 	if (IS_ZEBRA_DEBUG_KERNEL)
+@@ -1172,8 +1172,8 @@ static int nl_batch_read_resp(struct nl_batch *bth)
+ 			 * 'update' context objects take two consecutive
+ 			 * sequence numbers.
+ 			 */
+-			if (dplane_ctx_is_update(ctx)
+-			    && dplane_ctx_get_ns(ctx)->nls.seq + 1 == seq) {
++			if (dplane_ctx_is_update(ctx) &&
++			    dplane_ctx_get_ns(ctx)->seq + 1 == seq) {
+ 				/*
+ 				 * This is the situation where we get a response
+ 				 * to a message that should be ignored.
+@@ -1186,14 +1186,14 @@ static int nl_batch_read_resp(struct nl_batch *bth)
+ 			dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
+ 
+ 			/* We have found corresponding context object. */
+-			if (dplane_ctx_get_ns(ctx)->nls.seq == seq)
++			if (dplane_ctx_get_ns(ctx)->seq == seq)
+ 				break;
+ 
+-			if (dplane_ctx_get_ns(ctx)->nls.seq > seq)
++			if (dplane_ctx_get_ns(ctx)->seq > seq)
+ 				zlog_warn(
+ 					"%s:WARNING Recieved %u is less than any context on the queue ctx->seq %u",
+ 					__func__, seq,
+-					dplane_ctx_get_ns(ctx)->nls.seq);
++					dplane_ctx_get_ns(ctx)->seq);
+ 		}
+ 
+ 		if (ignore_msg) {
+@@ -1360,7 +1360,7 @@ enum netlink_msg_status netlink_batch_add_msg(
+ 			return FRR_NETLINK_ERROR;
+ 	}
+ 
+-	seq = dplane_ctx_get_ns(ctx)->nls.seq;
++	seq = dplane_ctx_get_ns(ctx)->seq;
+ 	if (ignore_res)
+ 		seq++;
+ 
+diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
+index 1d55181388e..69ea9c7fd95 100644
+--- a/zebra/zebra_dplane.h
++++ b/zebra/zebra_dplane.h
+@@ -42,6 +42,7 @@ struct zebra_dplane_info {
+ 
+ #if defined(HAVE_NETLINK)
+ 	struct nlsock nls;
++	int seq;
+ 	bool is_cmd;
+ #endif
+ };
+@@ -57,8 +58,10 @@ zebra_dplane_info_from_zns(struct zebra_dplane_info *zns_info,
+ 	zns_info->is_cmd = is_cmd;
+ 	if (is_cmd) {
+ 		zns_info->nls = zns->netlink_cmd;
++		zns_info->seq = zns->netlink_cmd.seq;
+ 	} else {
+ 		zns_info->nls = zns->netlink;
++		zns_info->seq = zns->netlink.seq;
+ 	}
+ #endif /* NETLINK */
+ }
diff --git a/debian/patches/frr/0002-zebra-buffering.patch b/debian/patches/frr/0002-zebra-buffering.patch
new file mode 100644
index 0000000..4f401b3
--- /dev/null
+++ b/debian/patches/frr/0002-zebra-buffering.patch
@@ -0,0 +1,420 @@
+From d4000d7ba3242c0e6adfa56544a34312b0f566a9 Mon Sep 17 00:00:00 2001
+From: Donald Sharp <sharpd at nvidia.com>
+Date: Wed, 2 Feb 2022 13:21:52 -0500
+Subject: [PATCH] zebra: Remove `struct nlsock` from dataplane information and
+ use `int fd`
+
+Store the fd that corresponds to the appropriate `struct nlsock` and pass
+that around in the dplane context instead of the pointer to the nlsock.
+Modify the kernel_netlink.c code to store in a hash the `struct nlsock`
+with the socket fd as the key.
+
+Why do this?  The dataplane context is used to pass around the `struct nlsock`
+but the zebra code has a bug where the received buffer for kernel netlink
+messages from the kernel is not big enough.  So we need to dynamically
+grow the receive buffer per socket, instead of having a non-dynamic buffer
+that we read into.  By passing around the fd we can look up the `struct nlsock`
+that will soon have the associated buffer and not have to worry about `const`
+issues that will arise.
+
+Signed-off-by: Donald Sharp <sharpd at nvidia.com>
+---
+ zebra/kernel_netlink.c | 72 ++++++++++++++++++++++++++++++++++++------
+ zebra/kernel_netlink.h |  1 +
+ zebra/rt_netlink.c     | 13 ++++++--
+ zebra/zebra_dplane.c   | 19 +++++++----
+ zebra/zebra_dplane.h   |  9 +++---
+ 5 files changed, 91 insertions(+), 23 deletions(-)
+
+diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
+index 59911945481..3650d87e0fb 100644
+--- a/zebra/kernel_netlink.c
++++ b/zebra/kernel_netlink.c
+@@ -36,6 +36,7 @@
+ #include "vrf.h"
+ #include "mpls.h"
+ #include "lib_errors.h"
++#include "hash.h"
+ 
+ //#include "zebra/zserv.h"
+ #include "zebra/zebra_router.h"
+@@ -160,6 +161,7 @@ extern struct zebra_privs_t zserv_privs;
+ 
+ DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers");
+ 
++struct hash *nlsock_hash;
+ size_t nl_batch_tx_bufsize;
+ char *nl_batch_tx_buf;
+ 
+@@ -429,8 +431,10 @@ static int kernel_read(struct thread *thread)
+  */
+ int kernel_dplane_read(struct zebra_dplane_info *info)
+ {
+-	netlink_parse_info(dplane_netlink_information_fetch, &info->nls, info,
+-			   5, false);
++	struct nlsock *nl = kernel_netlink_nlsock_lookup(info->sock);
++
++	netlink_parse_info(dplane_netlink_information_fetch, nl, info, 5,
++			   false);
+ 
+ 	return 0;
+ }
+@@ -1035,9 +1039,9 @@ netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
+ 		  struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
+ 		  bool startup)
+ {
+-	const struct nlsock *nl;
++	struct nlsock *nl;
+ 
+-	nl = &(dp_info->nls);
++	nl = kernel_netlink_nlsock_lookup(dp_info->sock);
+ 	n->nlmsg_seq = dp_info->seq;
+ 	n->nlmsg_pid = nl->snl.nl_pid;
+ 
+@@ -1109,11 +1113,11 @@ static int nl_batch_read_resp(struct nl_batch *bth)
+ 	struct sockaddr_nl snl;
+ 	struct msghdr msg = {};
+ 	int status, seq;
+-	const struct nlsock *nl;
++	struct nlsock *nl;
+ 	struct zebra_dplane_ctx *ctx;
+ 	bool ignore_msg;
+ 
+-	nl = &(bth->zns->nls);
++	nl = kernel_netlink_nlsock_lookup(bth->zns->sock);
+ 
+ 	msg.msg_name = (void *)&snl;
+ 	msg.msg_namelen = sizeof(snl);
+@@ -1295,13 +1299,15 @@ static void nl_batch_send(struct nl_batch *bth)
+ 	bool err = false;
+ 
+ 	if (bth->curlen != 0 && bth->zns != NULL) {
++		struct nlsock *nl =
++			kernel_netlink_nlsock_lookup(bth->zns->sock);
++
+ 		if (IS_ZEBRA_DEBUG_KERNEL)
+ 			zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu",
+-				   __func__, bth->zns->nls.name, bth->curlen,
++				   __func__, nl->name, bth->curlen,
+ 				   bth->msgcnt);
+ 
+-		if (netlink_send_msg(&(bth->zns->nls), bth->buf, bth->curlen)
+-		    == -1)
++		if (netlink_send_msg(nl, bth->buf, bth->curlen) == -1)
+ 			err = true;
+ 
+ 		if (!err) {
+@@ -1334,6 +1340,7 @@ enum netlink_msg_status netlink_batch_add_msg(
+ 	int seq;
+ 	ssize_t size;
+ 	struct nlmsghdr *msgh;
++	struct nlsock *nl;
+ 
+ 	size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen);
+ 
+@@ -1361,12 +1368,14 @@ enum netlink_msg_status netlink_batch_add_msg(
+ 	}
+ 
+ 	seq = dplane_ctx_get_ns(ctx)->seq;
++	nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
++
+ 	if (ignore_res)
+ 		seq++;
+ 
+ 	msgh = (struct nlmsghdr *)bth->buf_head;
+ 	msgh->nlmsg_seq = seq;
+-	msgh->nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
++	msgh->nlmsg_pid = nl->snl.nl_pid;
+ 
+ 	bth->zns = dplane_ctx_get_ns(ctx);
+ 	bth->buf_head = ((char *)bth->buf_head) + size;
+@@ -1496,6 +1505,33 @@ void kernel_update_multi(struct dplane_ctx_q *ctx_list)
+ 	dplane_ctx_list_append(ctx_list, &handled_list);
+ }
+ 
++struct nlsock *kernel_netlink_nlsock_lookup(int sock)
++{
++	struct nlsock lookup;
++
++	lookup.sock = sock;
++
++	return hash_lookup(nlsock_hash, &lookup);
++}
++
++static uint32_t kernel_netlink_nlsock_key(const void *arg)
++{
++	const struct nlsock *nl = arg;
++
++	return nl->sock;
++}
++
++static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2)
++{
++	const struct nlsock *nl1 = arg1;
++	const struct nlsock *nl2 = arg2;
++
++	if (nl1->sock == nl2->sock)
++		return true;
++
++	return false;
++}
++
+ /* Exported interface function.  This function simply calls
+    netlink_socket (). */
+ void kernel_init(struct zebra_ns *zns)
+@@ -1505,6 +1541,11 @@ void kernel_init(struct zebra_ns *zns)
+ 	int one, ret;
+ #endif
+ 
++	if (!nlsock_hash)
++		nlsock_hash = hash_create_size(8, kernel_netlink_nlsock_key,
++					       kernel_netlink_nlsock_hash_equal,
++					       "Netlink Socket Hash");
++
+ 	/*
+ 	 * Initialize netlink sockets
+ 	 *
+@@ -1537,6 +1578,7 @@ void kernel_init(struct zebra_ns *zns)
+ 			 zns->netlink.name);
+ 		exit(-1);
+ 	}
++	(void)hash_get(nlsock_hash, &zns->netlink, hash_alloc_intern);
+ 
+ 	snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name),
+ 		 "netlink-cmd (NS %u)", zns->ns_id);
+@@ -1546,6 +1588,7 @@ void kernel_init(struct zebra_ns *zns)
+ 			 zns->netlink_cmd.name);
+ 		exit(-1);
+ 	}
++	(void)hash_get(nlsock_hash, &zns->netlink_cmd, hash_alloc_intern);
+ 
+ 	/* Outbound socket for dplane programming of the host OS. */
+ 	snprintf(zns->netlink_dplane_out.name,
+@@ -1557,6 +1600,8 @@ void kernel_init(struct zebra_ns *zns)
+ 			 zns->netlink_dplane_out.name);
+ 		exit(-1);
+ 	}
++	(void)hash_get(nlsock_hash, &zns->netlink_dplane_out,
++		       hash_alloc_intern);
+ 
+ 	/* Inbound socket for OS events coming to the dplane. */
+ 	snprintf(zns->netlink_dplane_in.name,
+@@ -1569,6 +1614,7 @@ void kernel_init(struct zebra_ns *zns)
+ 			 zns->netlink_dplane_in.name);
+ 		exit(-1);
+ 	}
++	(void)hash_get(nlsock_hash, &zns->netlink_dplane_in, hash_alloc_intern);
+ 
+ 	/*
+ 	 * SOL_NETLINK is not available on all platforms yet
+@@ -1659,16 +1705,19 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
+ 	thread_cancel(&zns->t_netlink);
+ 
+ 	if (zns->netlink.sock >= 0) {
++		hash_release(nlsock_hash, &zns->netlink);
+ 		close(zns->netlink.sock);
+ 		zns->netlink.sock = -1;
+ 	}
+ 
+ 	if (zns->netlink_cmd.sock >= 0) {
++		hash_release(nlsock_hash, &zns->netlink_cmd);
+ 		close(zns->netlink_cmd.sock);
+ 		zns->netlink_cmd.sock = -1;
+ 	}
+ 
+ 	if (zns->netlink_dplane_in.sock >= 0) {
++		hash_release(nlsock_hash, &zns->netlink_dplane_in);
+ 		close(zns->netlink_dplane_in.sock);
+ 		zns->netlink_dplane_in.sock = -1;
+ 	}
+@@ -1678,9 +1727,12 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
+ 	 */
+ 	if (complete) {
+ 		if (zns->netlink_dplane_out.sock >= 0) {
++			hash_release(nlsock_hash, &zns->netlink_dplane_out);
+ 			close(zns->netlink_dplane_out.sock);
+ 			zns->netlink_dplane_out.sock = -1;
+ 		}
++
++		hash_free(nlsock_hash);
+ 	}
+ }
+ #endif /* HAVE_NETLINK */
+diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
+index cf8b8c785e6..ae88f3372b1 100644
+--- a/zebra/kernel_netlink.h
++++ b/zebra/kernel_netlink.h
+@@ -146,6 +146,7 @@ extern int netlink_config_write_helper(struct vty *vty);
+ extern void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold,
+ 					  bool set);
+ 
++extern struct nlsock *kernel_netlink_nlsock_lookup(int sock);
+ #endif /* HAVE_NETLINK */
+ 
+ #ifdef __cplusplus
+diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
+index 9ead1a768e7..c6423dce99c 100644
+--- a/zebra/rt_netlink.c
++++ b/zebra/rt_netlink.c
+@@ -1898,6 +1898,7 @@ ssize_t netlink_route_multipath_msg_encode(int cmd,
+ 	union g_addr src;
+ 	const struct prefix *p, *src_p;
+ 	uint32_t table_id;
++	struct nlsock *nl;
+ 
+ 	struct {
+ 		struct nlmsghdr n;
+@@ -1911,6 +1912,8 @@ ssize_t netlink_route_multipath_msg_encode(int cmd,
+ 	if (datalen < sizeof(*req))
+ 		return 0;
+ 
++	nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
++
+ 	memset(req, 0, sizeof(*req));
+ 
+ 	bytelen = (p->family == AF_INET ? 4 : 16);
+@@ -1924,7 +1927,7 @@ ssize_t netlink_route_multipath_msg_encode(int cmd,
+ 
+ 	req->n.nlmsg_type = cmd;
+ 
+-	req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
++	req->n.nlmsg_pid = nl->snl.nl_pid;
+ 
+ 	req->r.rtm_family = p->family;
+ 	req->r.rtm_dst_len = p->prefixlen;
+@@ -2360,6 +2363,8 @@ ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
+ 	int type = dplane_ctx_get_nhe_type(ctx);
+ 	struct rtattr *nest;
+ 	uint16_t encap;
++	struct nlsock *nl =
++		kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+ 
+ 	if (!id) {
+ 		flog_err(
+@@ -2402,7 +2407,7 @@ ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
+ 		req->n.nlmsg_flags |= NLM_F_REPLACE;
+ 
+ 	req->n.nlmsg_type = cmd;
+-	req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
++	req->n.nlmsg_pid = nl->snl.nl_pid;
+ 
+ 	req->nhm.nh_family = AF_UNSPEC;
+ 	/* TODO: Scope? */
+@@ -4283,6 +4288,8 @@ ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+ 	const char *routedesc;
+ 	int route_type;
+ 	struct prefix p = {0};
++	struct nlsock *nl =
++		kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+ 
+ 	struct {
+ 		struct nlmsghdr n;
+@@ -4325,7 +4332,7 @@ ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+ 	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+ 	req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+ 	req->n.nlmsg_type = cmd;
+-	req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
++	req->n.nlmsg_pid = nl->snl.nl_pid;
+ 
+ 	req->r.rtm_family = AF_MPLS;
+ 	req->r.rtm_table = RT_TABLE_MAIN;
+diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
+index 656ebcf3b7f..05297e143b5 100644
+--- a/zebra/zebra_dplane.c
++++ b/zebra/zebra_dplane.c
+@@ -1465,6 +1465,13 @@ const struct zebra_dplane_info *dplane_ctx_get_ns(
+ 	return &(ctx->zd_ns_info);
+ }
+ 
++int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx)
++{
++	DPLANE_CTX_VALID(ctx);
++
++	return ctx->zd_ns_info.sock;
++}
++
+ /* Accessors for nexthop information */
+ uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx)
+ {
+@@ -4844,7 +4851,7 @@ static void dplane_info_from_zns(struct zebra_dplane_info *ns_info,
+ 
+ #if defined(HAVE_NETLINK)
+ 	ns_info->is_cmd = true;
+-	ns_info->nls = zns->netlink_dplane_out;
++	ns_info->sock = zns->netlink_dplane_out.sock;
+ #endif /* NETLINK */
+ }
+ 
+@@ -4861,7 +4868,7 @@ static int dplane_incoming_read(struct thread *event)
+ 
+ 	/* Re-start read task */
+ 	thread_add_read(zdplane_info.dg_master, dplane_incoming_read, zi,
+-			zi->info.nls.sock, &zi->t_read);
++			zi->info.sock, &zi->t_read);
+ 
+ 	return 0;
+ }
+@@ -4906,13 +4913,13 @@ void zebra_dplane_ns_enable(struct zebra_ns *zns, bool enabled)
+ 		/* Make sure we're up-to-date with the zns object */
+ #if defined(HAVE_NETLINK)
+ 		zi->info.is_cmd = false;
+-		zi->info.nls = zns->netlink_dplane_in;
++		zi->info.sock = zns->netlink_dplane_in.sock;
+ 
+ 		/* Start read task for the dplane pthread. */
+ 		if (zdplane_info.dg_master)
+ 			thread_add_read(zdplane_info.dg_master,
+-					dplane_incoming_read, zi,
+-					zi->info.nls.sock, &zi->t_read);
++					dplane_incoming_read, zi, zi->info.sock,
++					&zi->t_read);
+ #endif
+ 	} else if (zi) {
+ 		if (IS_ZEBRA_DEBUG_DPLANE)
+@@ -5935,7 +5942,7 @@ void zebra_dplane_start(void)
+ 	frr_each (zns_info_list, &zdplane_info.dg_zns_list, zi) {
+ #if defined(HAVE_NETLINK)
+ 		thread_add_read(zdplane_info.dg_master, dplane_incoming_read,
+-				zi, zi->info.nls.sock, &zi->t_read);
++				zi, zi->info.sock, &zi->t_read);
+ #endif
+ 	}
+ 
+diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
+index 69ea9c7fd95..a7a5f99e451 100644
+--- a/zebra/zebra_dplane.h
++++ b/zebra/zebra_dplane.h
+@@ -41,7 +41,7 @@ struct zebra_dplane_info {
+ 	ns_id_t ns_id;
+ 
+ #if defined(HAVE_NETLINK)
+-	struct nlsock nls;
++	int sock;
+ 	int seq;
+ 	bool is_cmd;
+ #endif
+@@ -57,10 +57,10 @@ zebra_dplane_info_from_zns(struct zebra_dplane_info *zns_info,
+ #if defined(HAVE_NETLINK)
+ 	zns_info->is_cmd = is_cmd;
+ 	if (is_cmd) {
+-		zns_info->nls = zns->netlink_cmd;
++		zns_info->sock = zns->netlink_cmd.sock;
+ 		zns_info->seq = zns->netlink_cmd.seq;
+ 	} else {
+-		zns_info->nls = zns->netlink;
++		zns_info->sock = zns->netlink.sock;
+ 		zns_info->seq = zns->netlink.seq;
+ 	}
+ #endif /* NETLINK */
+@@ -564,9 +564,10 @@ dplane_ctx_gre_get_mtu(const struct zebra_dplane_ctx *ctx);
+ const struct zebra_l2info_gre *
+ dplane_ctx_gre_get_info(const struct zebra_dplane_ctx *ctx);
+ 
+-/* Namespace info - esp. for netlink communication */
++/* Namespace fd info - esp. for netlink communication */
+ const struct zebra_dplane_info *dplane_ctx_get_ns(
+ 	const struct zebra_dplane_ctx *ctx);
++int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx);
+ 
+ /* Indicates zebra shutdown/exit is in progress. Some operations may be
+  * simplified or skipped during shutdown processing.
diff --git a/debian/patches/frr/0003-zebra-buffering.patch b/debian/patches/frr/0003-zebra-buffering.patch
new file mode 100644
index 0000000..0a7c634
--- /dev/null
+++ b/debian/patches/frr/0003-zebra-buffering.patch
@@ -0,0 +1,283 @@
+From 2cf7651f0b1b0123dc5568ebad00ac84a9b3c348 Mon Sep 17 00:00:00 2001
+From: Donald Sharp <sharpd at nvidia.com>
+Date: Wed, 2 Feb 2022 13:28:42 -0500
+Subject: [PATCH] zebra: Make netlink buffer reads resizeable when needed
+
+Currently when the kernel sends netlink messages to FRR
+the buffers to receive this data is of fixed length.
+The kernel, with certain configurations, will send
+netlink messages that are larger than this fixed length.
+This leads to situations where, on startup, zebra gets
+really confused about the state of the kernel.  Effectively
+the current algorithm is this:
+
+read up to buffer in size
+while (data to parse)
+     get netlink message header, look at size
+        parse if you can
+
+The problem is that there is a 32k buffer we read.
+We get the first message that is say 1k in size,
+subtract that 1k to 31k left to parse.  We then
+get the next header and notice that the length
+of the message is 33k.  Which is obviously larger
+than what we read in.  FRR has no recover mechanism
+nor is there a way to know, a priori, what the maximum
+size the kernel will send us.
+
+Modify FRR to look at the kernel message and see if the
+buffer is large enough, if not, make it large enough to
+read in the message.
+
+This code has to be per netlink socket because of the usage
+of pthreads.  So add to `struct nlsock` the buffer and current
+buffer length.  Growing it as necessary.
+
+Fixes: #10404
+Signed-off-by: Donald Sharp <sharpd at nvidia.com>
+---
+ zebra/kernel_netlink.c | 68 +++++++++++++++++++++++++-----------------
+ zebra/kernel_netlink.h |  2 +-
+ zebra/zebra_dplane.c   |  4 +++
+ zebra/zebra_ns.h       |  3 ++
+ 4 files changed, 49 insertions(+), 28 deletions(-)
+
+diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
+index 3650d87e0fb..d0c86a6bb0e 100644
+--- a/zebra/kernel_netlink.c
++++ b/zebra/kernel_netlink.c
+@@ -90,8 +90,6 @@
+  */
+ #define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
+ 
+-#define NL_BATCH_RX_BUFSIZE NL_RCV_PKT_BUF_SIZE
+-
+ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
+ 					   {RTM_DELROUTE, "RTM_DELROUTE"},
+ 					   {RTM_GETROUTE, "RTM_GETROUTE"},
+@@ -165,8 +163,6 @@ struct hash *nlsock_hash;
+ size_t nl_batch_tx_bufsize;
+ char *nl_batch_tx_buf;
+ 
+-char nl_batch_rx_buf[NL_BATCH_RX_BUFSIZE];
+-
+ _Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
+ _Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
+ 
+@@ -320,6 +316,9 @@ static int netlink_socket(struct nlsock *nl, unsigned long groups,
+ 
+ 	nl->snl = snl;
+ 	nl->sock = sock;
++	nl->buflen = NL_RCV_PKT_BUF_SIZE;
++	nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
++
+ 	return ret;
+ }
+ 
+@@ -785,19 +784,29 @@ static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
+  *
+  * Returns -1 on error, 0 if read would block or the number of bytes received.
+  */
+-static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
+-			    void *buf, size_t buflen)
++static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
+ {
+ 	struct iovec iov;
+ 	int status;
+ 
+-	iov.iov_base = buf;
+-	iov.iov_len = buflen;
+-	msg.msg_iov = &iov;
+-	msg.msg_iovlen = 1;
++	iov.iov_base = nl->buf;
++	iov.iov_len = nl->buflen;
++	msg->msg_iov = &iov;
++	msg->msg_iovlen = 1;
+ 
+ 	do {
+-		status = recvmsg(nl->sock, &msg, 0);
++		int bytes;
++
++		bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
++
++		if (bytes >= 0 && (size_t)bytes > nl->buflen) {
++			nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
++			nl->buflen = bytes;
++			iov.iov_base = nl->buf;
++			iov.iov_len = nl->buflen;
++		}
++
++		status = recvmsg(nl->sock, msg, 0);
+ 	} while (status == -1 && errno == EINTR);
+ 
+ 	if (status == -1) {
+@@ -817,19 +826,19 @@ static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
+ 		return -1;
+ 	}
+ 
+-	if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
++	if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
+ 		flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
+ 			 "%s sender address length error: length %d", nl->name,
+-			 msg.msg_namelen);
++			 msg->msg_namelen);
+ 		return -1;
+ 	}
+ 
+ 	if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
+ 		zlog_debug("%s: << netlink message dump [recv]", __func__);
+ #ifdef NETLINK_DEBUG
+-		nl_dump(buf, status);
++		nl_dump(nl->buf, status);
+ #else
+-		zlog_hexdump(buf, status);
++		zlog_hexdump(nl->buf, status);
+ #endif /* NETLINK_DEBUG */
+ 	}
+ 
+@@ -932,8 +941,7 @@ static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
+  *            the filter.
+  */
+ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
+-		       const struct nlsock *nl,
+-		       const struct zebra_dplane_info *zns,
++		       struct nlsock *nl, const struct zebra_dplane_info *zns,
+ 		       int count, bool startup)
+ {
+ 	int status;
+@@ -942,7 +950,6 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
+ 	int read_in = 0;
+ 
+ 	while (1) {
+-		char buf[NL_RCV_PKT_BUF_SIZE];
+ 		struct sockaddr_nl snl;
+ 		struct msghdr msg = {.msg_name = (void *)&snl,
+ 				     .msg_namelen = sizeof(snl)};
+@@ -951,14 +958,14 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
+ 		if (count && read_in >= count)
+ 			return 0;
+ 
+-		status = netlink_recv_msg(nl, msg, buf, sizeof(buf));
++		status = netlink_recv_msg(nl, &msg);
+ 		if (status == -1)
+ 			return -1;
+ 		else if (status == 0)
+ 			break;
+ 
+ 		read_in++;
+-		for (h = (struct nlmsghdr *)buf;
++		for (h = (struct nlmsghdr *)nl->buf;
+ 		     (status >= 0 && NLMSG_OK(h, (unsigned int)status));
+ 		     h = NLMSG_NEXT(h, status)) {
+ 			/* Finish of reading. */
+@@ -1034,10 +1041,10 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
+  * startup  -> Are we reading in under startup conditions
+  *             This is passed through eventually to filter.
+  */
+-static int
+-netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
+-		  struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
+-		  bool startup)
++static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t,
++					   int startup),
++			     struct nlmsghdr *n,
++			     struct zebra_dplane_info *dp_info, bool startup)
+ {
+ 	struct nlsock *nl;
+ 
+@@ -1127,8 +1134,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
+ 	 * message at a time.
+ 	 */
+ 	while (true) {
+-		status = netlink_recv_msg(nl, msg, nl_batch_rx_buf,
+-					  sizeof(nl_batch_rx_buf));
++		status = netlink_recv_msg(nl, &msg);
+ 		/*
+ 		 * status == -1 is a full on failure somewhere
+ 		 * since we don't know where the problem happened
+@@ -1149,7 +1155,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
+ 			return status;
+ 		}
+ 
+-		h = (struct nlmsghdr *)nl_batch_rx_buf;
++		h = (struct nlmsghdr *)nl->buf;
+ 		ignore_msg = false;
+ 		seq = h->nlmsg_seq;
+ 		/*
+@@ -1708,18 +1714,24 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
+ 		hash_release(nlsock_hash, &zns->netlink);
+ 		close(zns->netlink.sock);
+ 		zns->netlink.sock = -1;
++		XFREE(MTYPE_NL_BUF, zns->netlink.buf);
++		zns->netlink.buflen = 0;
+ 	}
+ 
+ 	if (zns->netlink_cmd.sock >= 0) {
+ 		hash_release(nlsock_hash, &zns->netlink_cmd);
+ 		close(zns->netlink_cmd.sock);
+ 		zns->netlink_cmd.sock = -1;
++		XFREE(MTYPE_NL_BUF, zns->netlink_cmd.buf);
++		zns->netlink_cmd.buflen = 0;
+ 	}
+ 
+ 	if (zns->netlink_dplane_in.sock >= 0) {
+ 		hash_release(nlsock_hash, &zns->netlink_dplane_in);
+ 		close(zns->netlink_dplane_in.sock);
+ 		zns->netlink_dplane_in.sock = -1;
++		XFREE(MTYPE_NL_BUF, zns->netlink_dplane_in.buf);
++		zns->netlink_dplane_in.buflen = 0;
+ 	}
+ 
+ 	/* During zebra shutdown, we need to leave the dataplane socket
+@@ -1730,6 +1742,8 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
+ 			hash_release(nlsock_hash, &zns->netlink_dplane_out);
+ 			close(zns->netlink_dplane_out.sock);
+ 			zns->netlink_dplane_out.sock = -1;
++			XFREE(MTYPE_NL_BUF, zns->netlink_dplane_out.buf);
++			zns->netlink_dplane_out.buflen = 0;
+ 		}
+ 
+ 		hash_free(nlsock_hash);
+diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
+index ae88f3372b1..9421ea1c611 100644
+--- a/zebra/kernel_netlink.h
++++ b/zebra/kernel_netlink.h
+@@ -96,7 +96,7 @@ extern const char *nl_family_to_str(uint8_t family);
+ extern const char *nl_rttype_to_str(uint8_t rttype);
+ 
+ extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
+-			      const struct nlsock *nl,
++			      struct nlsock *nl,
+ 			      const struct zebra_dplane_info *dp_info,
+ 			      int count, bool startup);
+ extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup);
+diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
+index 05297e143b5..4d32e54d1fb 100644
+--- a/zebra/zebra_dplane.c
++++ b/zebra/zebra_dplane.c
+@@ -1469,7 +1469,11 @@ int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx)
+ {
+ 	DPLANE_CTX_VALID(ctx);
+ 
++#ifdef HAVE_NETLINK
+ 	return ctx->zd_ns_info.sock;
++#else
++	return -1;
++#endif
+ }
+ 
+ /* Accessors for nexthop information */
+diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
+index 0519e1d5b33..7a0ffbc1ee6 100644
+--- a/zebra/zebra_ns.h
++++ b/zebra/zebra_ns.h
+@@ -39,6 +39,9 @@ struct nlsock {
+ 	int seq;
+ 	struct sockaddr_nl snl;
+ 	char name[64];
++
++	uint8_t *buf;
++	size_t buflen;
+ };
+ #endif
+ 
diff --git a/debian/patches/series b/debian/patches/series
index 50b22cc..e6b9359 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,2 +1,5 @@
 pve/0001-enable-bgp-daemon.patch
 pve/0002-bgpd-add-an-option-for-RT-auto-derivation-to-force-A.patch
+frr/0001-zebra-buffering.patch
+frr/0002-zebra-buffering.patch
+frr/0003-zebra-buffering.patch
\ No newline at end of file
-- 
2.30.2





More information about the pve-devel mailing list