[pve-devel] [PATCH] vhost-net: extend device allocation to vmalloc

Alexandre Derumier aderumier at odiso.com
Fri Sep 5 08:06:14 CEST 2014


backported from
https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/commit/drivers/vhost/net.c?id=23cc5a991c7a9fb7e6d6550e65cee4f4173111c5

Michael Mueller provided a patch to reduce the size of
vhost-net structure as some allocations could fail under
memory pressure/fragmentation. We are still left with
high order allocations though.

This patch is handling the problem at the core level, allowing
vhost structures to use vmalloc() if kmalloc() failed.

As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT
to kzalloc() flags to do this fallback only when really needed.

People are still looking at cleaner ways to handle the problem
at the API level, probably passing in multiple iovecs.
This hack seems consistent with approaches
taken since then by drivers/vhost/scsi.c and net/core/dev.c

Based on patch by Romain Francoise.

Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
---
 Makefile          |    1 +
 vhost-alloc.patch |   64 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)
 create mode 100644 vhost-alloc.patch

diff --git a/Makefile b/Makefile
index ef0cf2c..207b77e 100644
--- a/Makefile
+++ b/Makefile
@@ -162,6 +162,7 @@ ${KERNEL_SRC}/README: ${KERNEL_SRC}.org/README
 	#cd ${KERNEL_SRC}; patch -p1 <../fix-idr-header-for-drbd-compilation.patch
 	cd ${KERNEL_SRC}; patch -p1 <../add-empty-ndo_poll_controller-to-veth.patch
 	cd ${KERNEL_SRC}; patch -p1 <../override_for_missing_acs_capabilities.patch
+	cd ${KERNEL_SRC}; patch -p1 <../vhost-alloc.patch
 	sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
 	touch $@
 
diff --git a/vhost-alloc.patch b/vhost-alloc.patch
new file mode 100644
index 0000000..d7ec538
--- /dev/null
+++ b/vhost-alloc.patch
@@ -0,0 +1,64 @@
+diff -r -U 3 -p a/drivers/vhost/net.c b/drivers/vhost/net.c
+--- a/drivers/vhost/net.c	2014-07-16 22:25:31.000000000 +0400
++++ b/drivers/vhost/net.c	2014-09-04 13:56:20.101952635 +0400
+@@ -18,6 +18,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/file.h>
+ #include <linux/slab.h>
++#include <linux/vmalloc.h>
+ 
+ #include <linux/net.h>
+ #include <linux/if_packet.h>
+@@ -706,18 +707,30 @@ static void handle_rx_net(struct vhost_w
+ 	handle_rx(net);
+ }
+ 
++static void vhost_net_free(void *addr)
++{
++   if (is_vmalloc_addr(addr))
++       vfree(addr);
++   else
++       kfree(addr);
++}
++
+ static int vhost_net_open(struct inode *inode, struct file *f)
+ {
+-	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
++	struct vhost_net *n;
+ 	struct vhost_dev *dev;
+ 	struct vhost_virtqueue **vqs;
+ 	int r, i;
+ 
+-	if (!n)
+-		return -ENOMEM;
++    n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
++    if (!n) {
++        n = vmalloc(sizeof *n);
++        if (!n)
++            return -ENOMEM;
++   }
+ 	vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
+ 	if (!vqs) {
+-		kfree(n);
++		vhost_net_free(n);
+ 		return -ENOMEM;
+ 	}
+ 
+@@ -736,7 +749,7 @@ static int vhost_net_open(struct inode *
+ 	}
+ 	r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
+ 	if (r < 0) {
+-		kfree(n);
++		vhost_net_free(n);
+ 		kfree(vqs);
+ 		return r;
+ 	}
+@@ -841,7 +854,7 @@ static int vhost_net_release(struct inod
+ 	 * since jobs can re-queue themselves. */
+ 	vhost_net_flush(n);
+ 	kfree(n->dev.vqs);
+-	kfree(n);
++	vhost_net_free(n);
+ 	return 0;
+ }
+ 
-- 
1.7.10.4




More information about the pve-devel mailing list