[pve-devel] [PATCH qemu-server] memory: hugepages: map numa node IDs to host and guest correctly

Wolfgang Bumiller w.bumiller at proxmox.com
Mon Aug 22 10:58:09 CEST 2016


On Fri, Aug 19, 2016 at 04:26:25PM +0200, Alexandre DERUMIER wrote:
> Hi Wolfgang,
> 
> I have tested your patch, and It's working fine for me !
> 
> I think we can apply the 3 patches to master.

merged

> 
> Thanks.
> 
> Alexandre
> ----- Mail original -----
> De: "aderumier" <aderumier at odiso.com>
> À: "Wolfgang Bumiller" <w.bumiller at proxmox.com>
> Cc: "pve-devel" <pve-devel at pve.proxmox.com>
> Envoyé: Mardi 16 Août 2016 08:01:45
> Objet: Re: [pve-devel] [PATCH qemu-server] memory: hugepages: map numa node IDs to host and guest correctly
> 
> Hi Wolfgang, sorry to be late, I was offline last week (some more holidays) 
> 
> I'll try to test it this week. 
> 
> 
> ----- Mail original ----- 
> De: "Wolfgang Bumiller" <w.bumiller at proxmox.com> 
> À: "pve-devel" <pve-devel at pve.proxmox.com> 
> Cc: "aderumier" <aderumier at odiso.com> 
> Envoyé: Vendredi 29 Juillet 2016 13:40:54 
> Objet: [PATCH qemu-server] memory: hugepages: map numa node IDs to host and guest correctly 
> 
> foreach_dimm() provides a guest numa node index, when used 
> in conjunction with the guest-to-host numa node topology 
> mapping one has to make sure that the correct host-side 
> indices are used. 
> 
> This covers situations where the user defines a numaX with 
> hostnodes=Y with Y != X. 
> 
> For instance: 
> cores: 2 
> hotplug: disk,network,cpu,memory 
> hugepages: 2 
> memory: 2048 
> numa: 1 
> numa1: memory=512,hostnodes=0,cpus=0-1,policy=bind 
> numa2: memory=512,hostnodes=0,cpus=2-3,policy=bind 
> 
> Both numa IDs 1 and 2 passed by foreach_dimm() have to be 
> mapped to host node 0. 
> 
> Note that this also reverses the foreach_reverse_dimm() numa 
> node numbering as the current code, while walking sizes 
> backwards, walked the numa IDs inside each size forward, 
> which makes more sense. (Memory hot-unplug is still working 
> with this.) 
> --- 
> Note: this is to be applied on top of Alexandre's patch #2: 
> hugepages: use hostnodes value as numanode for topology 
> 
> @Alexandre: Please review if you get the chance. 
> 
> PVE/QemuServer/Memory.pm | 44 +++++++++++++++++++++++++++++++++++++++++--- 
> 1 file changed, 41 insertions(+), 3 deletions(-) 
> 
> diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm 
> index 37f285a..087584b 100644 
> --- a/PVE/QemuServer/Memory.pm 
> +++ b/PVE/QemuServer/Memory.pm 
> @@ -10,6 +10,33 @@ my $MAX_NUMA = 8; 
> my $MAX_MEM = 4194304; 
> my $STATICMEM = 1024; 
> 
> +sub get_numa_node_list { 
> + my ($conf) = @_; 
> + my @numa_map; 
> + for (my $i = 0; $i < $MAX_NUMA; $i++) { 
> + my $entry = $conf->{"numa$i"} or next; 
> + my $numa = PVE::QemuServer::parse_numa($entry) or next; 
> + push @numa_map, $i; 
> + } 
> + return @numa_map if @numa_map; 
> + my $sockets = $conf->{sockets} || 1; 
> + return (0..($sockets-1)); 
> +} 
> + 
> +# only valid when numa nodes map to a single host node 
> +sub get_numa_guest_to_host_map { 
> + my ($conf) = @_; 
> + my $map = {}; 
> + for (my $i = 0; $i < $MAX_NUMA; $i++) { 
> + my $entry = $conf->{"numa$i"} or next; 
> + my $numa = PVE::QemuServer::parse_numa($entry) or next; 
> + $map->{$i} = print_numa_hostnodes($numa->{hostnodes}); 
> + } 
> + return $map if %$map; 
> + my $sockets = $conf->{sockets} || 1; 
> + return map { $_ => $_ } (0..($sockets-1)); 
> +} 
> + 
> sub foreach_dimm{ 
> my ($conf, $vmid, $memory, $sockets, $func) = @_; 
> 
> @@ -27,11 +54,13 @@ sub foreach_dimm{ 
> 
> return if $current_size == $memory; 
> 
> + my @numa_map = get_numa_node_list($conf); 
> + 
> for (my $j = 0; $j < 8; $j++) { 
> for (my $i = 0; $i < 32; $i++) { 
> my $name = "dimm${dimm_id}"; 
> $dimm_id++; 
> - my $numanode = $i % $sockets; 
> + my $numanode = $numa_map[$i % @numa_map]; 
> $current_size += $dimm_size; 
> &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory); 
> return $current_size if $current_size >= $memory; 
> @@ -57,11 +86,13 @@ sub foreach_reverse_dimm { 
> 
> return if $current_size == $memory; 
> 
> + my @numa_map = get_numa_node_list($conf); 
> + 
> for (my $j = 0; $j < 8; $j++) { 
> for (my $i = 0; $i < 32; $i++) { 
> my $name = "dimm${dimm_id}"; 
> $dimm_id--; 
> - my $numanode = $i % $sockets; 
> + my $numanode = $numa_map[(31-$i) % @numa_map]; 
> $current_size -= $dimm_size; 
> &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory); 
> return $current_size if $current_size <= $memory; 
> @@ -90,6 +121,8 @@ sub qemu_memory_hotplug { 
> 
> if($value > $memory) { 
> 
> + my $numa_hostmap = get_numa_guest_to_host_map($conf) if $conf->{hugepages}; 
> + 
> foreach_dimm($conf, $vmid, $value, $sockets, sub { 
> my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; 
> 
> @@ -99,7 +132,8 @@ sub qemu_memory_hotplug { 
> 
> my $hugepages_size = hugepages_size($conf, $dimm_size); 
> my $path = hugepages_mount_path($hugepages_size); 
> - my $hugepages_topology->{$hugepages_size}->{$numanode} = hugepages_nr($dimm_size, $hugepages_size); 
> + my $host_numanode = $numa_hostmap->{$numanode}; 
> + my $hugepages_topology->{$hugepages_size}->{$host_numanode} = hugepages_nr($dimm_size, $hugepages_size); 
> 
> my $code = sub { 
> my $hugepages_host_topology = hugepages_host_topology(); 
> @@ -436,9 +470,13 @@ sub hugepages_topology { 
> } 
> 
> if ($hotplug_features->{memory}) { 
> + my $numa_hostmap = get_numa_guest_to_host_map($conf); 
> + 
> foreach_dimm($conf, undef, $memory, $sockets, sub { 
> my ($conf, undef, $name, $dimm_size, $numanode, $current_size, $memory) = @_; 
> 
> + $numanode = $numa_hostmap->{$numanode}; 
> + 
> my $hugepages_size = hugepages_size($conf, $dimm_size); 
> $hugepages_topology->{$hugepages_size}->{$numanode} += hugepages_nr($dimm_size, $hugepages_size); 
> }); 
> -- 
> 2.1.4 
> 
> _______________________________________________ 
> pve-devel mailing list 
> pve-devel at pve.proxmox.com 
> http://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel 
> 
> 




More information about the pve-devel mailing list