[pve-devel] [PATCH] add numa options

Alexandre DERUMIER aderumier at odiso.com
Tue Dec 2 14:12:48 CET 2014


Note that if we want to have something simplier but less flexible

we could have an

numa: [policy=<default|preferred|bind|interleave>]

then generate the numa nodes, from the sockets number with same policy and split the memory across the nodes




----- Mail original ----- 

De: "Alexandre Derumier" <aderumier at odiso.com> 
À: pve-devel at pve.proxmox.com 
Cc: "Alexandre Derumier" <aderumier at odiso.com> 
Envoyé: Mardi 2 Décembre 2014 13:43:11 
Objet: [PATCH] add numa options 

This add numa topology support 

numa[0-8]: memory=<mb>,[policy=<default|preferred|bind|interleave>] 

example: 
------- 
sockets:4 
cores:2 
memory:4096 

numa0: memory=1024,policy=bind 
numa1: memory=1024,policy=bind 
numa2: memory=1024,policy=bind 
numa3: memory=1024,policy=bind 

- total numa memory should be equal to vm memory 
- we assign 1 numa node for each socket 

qemu command line: 
------------------ 
-object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0 

-object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node2 
-numa node,nodeid=2,cpus=2-3,memdev=ram-node 

-object memory-backend-ram,size=1024M,policy=bind,host-nodes=2,id=ram-node3 
-numa node,nodeid=3,cpus=4-5,memdev=ram-node 

-object memory-backend-ram,size=1024M,policy=bind,host-nodes=3,id=ram-node4 
-numa node,nodeid=4,cpus=6-7,memdev=ram-node 

Signed-off-by: Alexandre Derumier <aderumier at odiso.com> 
--- 
PVE/QemuServer.pm | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 
1 file changed, 73 insertions(+), 2 deletions(-) 

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm 
index 75ddcdd..51d9045 100644 
--- a/PVE/QemuServer.pm 
+++ b/PVE/QemuServer.pm 
@@ -483,6 +483,19 @@ my $MAX_UNUSED_DISKS = 8; 
my $MAX_HOSTPCI_DEVICES = 4; 
my $MAX_SERIAL_PORTS = 4; 
my $MAX_PARALLEL_PORTS = 3; 
+my $MAX_NUMA = 8; 
+ 
+my $numadesc = { 
+ optional => 1, 
+ type => 'string', format => 'pve-qm-numa', 
+ typetext => "memory=<mb>,[policy=<default|preferred|bind|interleave>]", 
+ description => "numa topology", 
+}; 
+PVE::JSONSchema::register_standard_option("pve-qm-numa", $numadesc); 
+ 
+for (my $i = 0; $i < $MAX_NUMA; $i++) { 
+ $confdesc->{"numa$i"} = $numadesc; 
+} 

my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio', 
'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3']; 
@@ -1272,6 +1285,25 @@ sub drive_is_cdrom { 

} 

+sub parse_numa { 
+ my ($data) = @_; 
+ 
+ my $res = {}; 
+ 
+ foreach my $kvp (split(/,/, $data)) { 
+ 
+ if ($kvp =~ m/^memory=(\S+)$/) { 
+ $res->{memory} = $1; 
+ } elsif ($kvp =~ m/^policy=(default|preferred|bind|interleave)$/) { 
+ $res->{policy} = $1; 
+ } else { 
+ return undef; 
+ } 
+ } 
+ 
+ return $res; 
+} 
+ 
sub parse_hostpci { 
my ($value) = @_; 

@@ -1452,6 +1484,17 @@ sub verify_bootdisk { 
die "invalid boot disk '$value'\n"; 
} 

+PVE::JSONSchema::register_format('pve-qm-numa', \&verify_numa); 
+sub verify_numa { 
+ my ($value, $noerr) = @_; 
+ 
+ return $value if parse_numa($value); 
+ 
+ return undef if $noerr; 
+ 
+ die "unable to parse numa options\n"; 
+} 
+ 
PVE::JSONSchema::register_format('pve-qm-net', \&verify_net); 
sub verify_net { 
my ($value, $noerr) = @_; 
@@ -2686,6 +2729,36 @@ sub config_to_command { 
# push @$cmd, '-cpu', "$cpu,enforce"; 
push @$cmd, '-cpu', $cpu; 

+ my $memory = $conf->{memory} || $defaults->{memory}; 
+ push @$cmd, '-m', $memory; 
+ 
+ my $numa_totalmemory = undef; 
+ for (my $i = 0; $i < $MAX_NUMA; $i++) { 
+ next if !$conf->{"numa$i"}; 
+ my $numa = parse_numa($conf->{"numa$i"}); 
+ next if !$numa; 
+ 
+ die "host numa node$i don't exist" if !(-d "/sys/devices/system/node/node$i/"); 
+ die "vm socket don't exist for numa node$i" if $i > ($sockets-1); 
+ die "missing numa node$i memory value" if !$numa->{memory}; 
+ 
+ my $numa_memory = $numa->{memory}; 
+ $numa_totalmemory += $numa_memory; 
+ die "numa node$i memory $memory M can't be bigger than vm memory" if $numa_memory > $memory; 
+ 
+ my $cpustart = ($cores * $i); 
+ my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1; 
+ my $cpus = $cpustart; 
+ $cpus .= "-$cpuend" if $cpuend; 
+ 
+ my $policy = $numa->{policy} ? $numa->{policy} : "default"; 
+ $numa_memory = $numa_memory."M"; 
+ 
+ push @$cmd, '-object', "memory-backend-ram,size=$numa_memory,policy=$policy,host-nodes=$i,id=ram-node$i"; 
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i"; 
+ } 
+ die "total memory for NUMA nodes must be equal to vm memory" if $numa_totalmemory && $numa_totalmemory != $memory; 
+ 
push @$cmd, '-S' if $conf->{freeze}; 

# set keyboard layout 
@@ -2798,8 +2871,6 @@ sub config_to_command { 
push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges); 
}); 

- push @$cmd, '-m', $conf->{memory} || $defaults->{memory}; 
- 
for (my $i = 0; $i < $MAX_NETS; $i++) { 
next if !$conf->{"net$i"}; 
my $d = parse_net($conf->{"net$i"}); 
-- 
1.7.10.4 



More information about the pve-devel mailing list