[pve-devel] [PATCH] add virtio-vhost-user nic support

Wolfgang Bumiller w.bumiller at proxmox.com
Wed May 4 16:09:27 CEST 2016


On Tue, May 03, 2016 at 05:05:38PM +0200, Alexandre Derumier wrote:
> vm config:
> 
> net0: virtio-vhost-user=66:32:37:61:66:64,bridge=vmbr0,tag=10
> numa: 1
> 
> virtio-vhost-user is userland only, so no tap interface
> Numa need to be enabled, and all vm memory is mapped to hugepages
> 
> The qemu netdev script/downscript don't work with vhost-user
> 
> System configuration:
> 
> hugepages need to be mounted. (can be 2MB or 1GB Hupages)
> 
> /etc/fstab
> ----------
> hugetlbfs  /dev/hugepages  hugetlbfs       pagesize=2048k        0 0
> 
> sysctl
> ------
> echo 2048 > /proc/sys/vm/nr_hugepages
> 
> Openvswitch need to have dpdk enabled
> 
> /etc/default/openvswitch-switch
> --------------------------------
> DPDK_OPTS='--dpdk -c 0x1 -n 4'
> 
> ovs switch need to have datapath_type=netdev
> 
> /etc/network/interfaces
> -----------------------
> auto vmbr0
> iface vmbr10 inet manual
>         ovs_type OVSBridge
>         ovs_ports eth0
>         ovs_extra set bridge vmbr10 datapath_type=netdev
> 
> Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
> ---
>  PVE/QemuServer.pm | 108 +++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 83 insertions(+), 25 deletions(-)
> 
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index 86c2f32..9070dec 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -564,7 +564,7 @@ for (my $i = 0; $i < $MAX_NUMA; $i++)  {
>  
>  my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000',  'pcnet',  'virtio',
>  		      'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3',
> -		      'e1000-82540em', 'e1000-82544gc', 'e1000-82545em'];
> +		      'e1000-82540em', 'e1000-82544gc', 'e1000-82545em', 'virtio-vhost-user'];
>  my $nic_model_list_txt = join(' ', sort @$nic_model_list);
>  
>  my $net_fmt = {
> @@ -1565,9 +1565,9 @@ sub print_netdevice_full {
>      my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
>  
>      my $device = $net->{model};
> -    if ($net->{model} eq 'virtio') {
> +    if ($net->{model} =~ m/^virtio/) {
>           $device = 'virtio-net-pci';
> -     };
> +    };

You fixed the indentation but forgot about the semicolon :p

>      my $pciaddr = print_pci_addr("$netid", $bridges);
>      my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
> @@ -1622,7 +1622,12 @@ sub print_netdev_full {
>      my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
>  
>      if ($net->{bridge}) {
> -        $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script,downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
> +	if ($net->{model} eq 'virtio-vhost-user') {
> +	    $netdev = "type=vhost-user,id=$netid,chardev=charvhostuser$i,vhostforce";
> +	} else {
> +	    $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script,downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
> +	}
> +
>      } else {
>          $netdev = "type=user,id=$netid,hostname=$vmname";
>      }
> @@ -3122,6 +3127,36 @@ sub config_to_command {
>  
>      push @$cmd, '-cpu', $cpu;
>  
> +    my $use_vhost_user = undef;
> +
> +    for (my $i = 0; $i < $MAX_NETS; $i++) {
> +         next if !$conf->{"net$i"};
> +         my $d = parse_net($conf->{"net$i"});
> +         next if !$d;
> +
> +         $use_virtio = 1 if $d->{model} eq 'virtio';
> +         $use_vhost_user = 1 if $d->{model} eq 'virtio-vhost-user';

I was confused for a minute by this. This modifies the variable outside
the loop to be used afterwards to decide on the $numa_object.
What bugs me is that it's very close to an `if()` containing the exact
same condition and with the real use of this variable being way down in
the next hunk this relation might be missed in a future patch which
could potentially break this.

I'm mentioning this because of ease of maintenance:

This code already contains the dead $use_virtio variable taken over from
its previous hunk (found somewhere below) which as far as I can tell is
only ever written and never actually used, so it should probably be
removed (including its declaration somewhere further up in the
function).

So my suggestion is this:
 -) rename the outer variable to $have_vhost_user
 -) remove $use_virtio (unless I'm wrong and it has a purpose?)
 -) change this piece to:
      my $vhost_user = ($d->{model} eq 'virtio-vhost-user');
      $have_vhost_user ||= $vhost_user;
 -) change the if() marked below to use $vhost_user

This also prevents people from accidentally "optimizing" the if() below
to use $use_vhost_user ;-).

> +
> +         if ($bootindex_hash->{n}) {
> +            $d->{bootindex} = $bootindex_hash->{n};
> +            $bootindex_hash->{n} += 1;
> +         }
> +
> +	 if ($d->{model} eq 'virtio-vhost-user') {

see above comment

Also you're now switching indentation from from 9 spaces to 1 tab.
(The for-loop body should begin at 1-tab instead.)

> +	    my $vhostuser = "vhost-user".$vmid."i".$i;
> +	    push @$devices, '-chardev', "socket,id=charvhostuser$i,path=/var/run/openvswitch/$vhostuser";
> +
> +         }
> +         my $netdevfull = print_netdev_full($vmid,$conf,$d,"net$i");
> +         push @$devices, '-netdev', $netdevfull;
> +
> +         my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files);
> +         push @$devices, '-device', $netdevicefull;
> +    }
> +
> +    die "Numa need to be enabled ot use vhost-user nics" if $use_vhost_user && !$conf->{numa};

"needs"; "to" vs "ot".

> +
> +
>      my $memory = $conf->{memory} || $defaults->{memory};
>      my $static_memory = 0;
>      my $dimm_memory = 0;
> @@ -3151,7 +3186,14 @@ sub config_to_command {
>  	    die "missing numa node$i memory value\n" if !$numa->{memory};
>  	    my $numa_memory = $numa->{memory};
>  	    $numa_totalmemory += $numa_memory;
> -	    my $numa_object = "memory-backend-ram,id=ram-node$i,size=${numa_memory}M";
> +
> +	    my $numa_object = '';
> +
> +	    if ($use_vhost_user) {
> +		$numa_object = "memory-backend-file,id=ram-node$i,size=${numa_memory}M,mem-path=/dev/hugepages,share=on";
> +	    } else {
> +		$numa_object = "memory-backend-ram,id=ram-node$i,size=${numa_memory}M";
> +	    }
>  
>  	    # cpus
>  	    my $cpulists = $numa->{cpus};
> @@ -3201,10 +3243,19 @@ sub config_to_command {
>  		my $cpus = $cpustart;
>  		$cpus .= "-$cpuend" if $cpuend;
>  
> -		push @$cmd, '-object', "memory-backend-ram,size=$numa_memory,id=ram-node$i";
> +		my $numa_object = '';
> +		if ($use_vhost_user) {
> +		    $numa_object = "memory-backend-file,size=$numa_memory,id=ram-node$i,mem-path=/dev/hugepages,share=on";
> +		} else {
> +		    $numa_object = "memory-backend-ram,size=$numa_memory,id=ram-node$i";
> +		}
> +
> +		push @$cmd, '-object', $numa_object;
>  		push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
>  	    }
>  	}
> +
> +	push @$devices, '-mem-prealloc' if $use_vhost_user;
>      }
>  
>      if ($hotplug_features->{memory}) {
> @@ -3352,25 +3403,6 @@ sub config_to_command {
>  	push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges);
>      });
>  
> -    for (my $i = 0; $i < $MAX_NETS; $i++) {
> -         next if !$conf->{"net$i"};
> -         my $d = parse_net($conf->{"net$i"});
> -         next if !$d;
> -
> -         $use_virtio = 1 if $d->{model} eq 'virtio';
> -
> -         if ($bootindex_hash->{n}) {
> -            $d->{bootindex} = $bootindex_hash->{n};
> -            $bootindex_hash->{n} += 1;
> -         }
> -
> -         my $netdevfull = print_netdev_full($vmid,$conf,$d,"net$i");
> -         push @$devices, '-netdev', $netdevfull;
> -
> -         my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files);
> -         push @$devices, '-device', $netdevicefull;
> -    }
> -
>      if (!$q35) {
>  	# add pci bridges
>          if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) {
> @@ -4571,6 +4603,21 @@ sub vm_start {
>  	  }
>          }
>  
> +	#ovs vhost-user
> +	for (my $i = 0; $i < $MAX_NETS; $i++) {
> +	    next if !$conf->{"net$i"};
> +	    my $d = parse_net($conf->{"net$i"});
> +	    next if !$d;
> +	    if ($d->{model} eq 'virtio-vhost-user') {
> +		my $interface = "vhost-user".$vmid."i".$i;
> +		#remove old uncleaned port
> +		if(-e "/var/run/openvswitch/$interface") {
> +		    run_command("/usr/bin/ovs-vsctl del-port $interface", outfunc => sub {}, errfunc => sub {});
> +		}
> +		PVE::Network::ovs_bridge_add_port($d->{bridge}, $interface, $d->{vlan}, 'dpdkvhostuser', $d->{trunks});
> +	    }
> +	}
> +
>  	PVE::Storage::activate_volumes($storecfg, $vollist);
>  
>  	if (!check_running($vmid, 1) && -d "/sys/fs/cgroup/systemd/qemu.slice/$vmid.scope") {
> @@ -4746,6 +4793,17 @@ sub vm_stop_cleanup {
>  	    PVE::Storage::deactivate_volumes($storecfg, $vollist);
>  	}
>  
> +        #ovs vhost-user
> +        for (my $i = 0; $i < $MAX_NETS; $i++) {
> +            next if !$conf->{"net$i"};
> +            my $d = parse_net($conf->{"net$i"});
> +            next if !$d;
> +            if ($d->{model} eq 'virtio-vhost-user') {
> +                my $ovsintport = "vhost-user".$vmid."i".$i;
> +		run_command("/usr/bin/ovs-vsctl del-port $ovsintport", outfunc => sub {}, errfunc => sub {});
> +            }
> +        }
> +
>  	foreach my $ext (qw(mon qmp pid vnc qga)) {
>  	    unlink "/var/run/qemu-server/${vmid}.$ext";
>  	}
> -- 
> 2.1.4




More information about the pve-devel mailing list