[pve-devel] [PATCH qemu-server 08/10] memory: add virtio-mem support
Alexandre Derumier
aderumier at odiso.com
Fri Dec 9 20:27:24 CET 2022
a 4GB static memory is needed for DMA+boot memory, as this memory
is almost always un-unpluggeable.
1 virtio-mem pci device is setup for each numa node on pci.4 bridge
virtio-mem use a fixed blocksize with 32000 blocks
Blocksize is computed from the maxmemory-4096/32000 with a minimum of
2MB to map THP.
(lower blocksize = more chance to unplug memory).
fixes:
https://bugzilla.proxmox.com/show_bug.cgi?id=931
https://bugzilla.proxmox.com/show_bug.cgi?id=2949
Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
---
PVE/QemuServer.pm | 8 +++-
PVE/QemuServer/Memory.pm | 98 +++++++++++++++++++++++++++++++++++++---
PVE/QemuServer/PCI.pm | 8 ++++
3 files changed, 106 insertions(+), 8 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 0d5b550..43fab29 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -285,6 +285,12 @@ my $memory_fmt = {
optional => 1,
enum => [@max_memory_list],
},
+ virtio => {
+ description => "enable virtio-mem memory",
+ type => 'boolean',
+ optional => 1,
+ default => 0,
+ },
};
my $meta_info_fmt = {
@@ -3898,7 +3904,7 @@ sub config_to_command {
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
- PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+ PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type);
push @$cmd, '-S' if $conf->{freeze};
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index 8bbbf07..70ab65a 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -8,6 +8,8 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::QemuServer;
use PVE::QemuServer::Monitor qw(mon_cmd);
+use PVE::QemuServer::PCI qw(print_pci_addr);
+
use base qw(Exporter);
our @EXPORT_OK = qw(
@@ -27,7 +29,9 @@ my sub get_static_mem {
my $static_memory = 0;
my $memory = PVE::QemuServer::parse_memory($conf->{memory});
- if($memory->{max}) {
+ if ($memory->{virtio}) {
+ $static_memory = 4096;
+ } elsif ($memory->{max}) {
my $dimm_size = $memory->{max} / 64;
#static mem can't be lower than 4G and lower than 1 dimmsize by socket
$static_memory = $dimm_size * $sockets;
@@ -102,6 +106,24 @@ my sub get_max_mem {
return $cpu_max_mem;
}
+my sub get_virtiomem_block_size {
+ my ($conf) = @_;
+
+ my $MAX_MEM = get_max_mem($conf);
+ my $static_memory = get_static_mem($conf);
+ my $memory = get_current_memory($conf);
+ #virtiomem can map 32000 block size. try to use lowerst blocksize, lower = more chance to unplug memory.
+ my $blocksize = ($MAX_MEM - $static_memory) / 32000;
+ #round next power of 2
+ $blocksize = 2**(int(log($blocksize)/log(2))+1);
+ #2MB is the minimum to be aligned with THP
+ $blocksize = 2 if $blocksize < 2;
+
+ die "memory size need to be multiple of $blocksize MB when virtio-mem is enabled" if ($memory % $blocksize != 0);
+
+ return $blocksize;
+}
+
sub get_current_memory{
my ($conf) = @_;
@@ -224,7 +246,41 @@ sub qemu_memory_hotplug {
my $MAX_MEM = get_max_mem($conf);
die "you cannot add more memory than max mem $MAX_MEM MB!\n" if $value > $MAX_MEM;
- if ($value > $memory) {
+ my $confmem = PVE::QemuServer::parse_memory($conf->{memory});
+
+ if ($confmem->{virtio}) {
+ my $blocksize = get_virtiomem_block_size($conf);
+ my $requested_size = ($value - $static_memory) / $sockets * 1024 * 1024;
+ my $totalsize = $static_memory;
+ my $err = undef;
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $retry = 0;
+ mon_cmd($vmid, 'qom-set', path => "/machine/peripheral/$id", property => "requested-size", value => int($requested_size));
+
+ my $size = 0;
+ while (1) {
+ sleep 1;
+ $size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/$id", property => "size");
+ $err = 1 if $retry > 5;
+ last if $size eq $requested_size || $retry > 5;
+ $retry++;
+ }
+ $totalsize += ($size / 1024 / 1024 );
+ }
+ #update conf after each succesfull change
+ if($err) {
+ my $mem = { max => $MAX_MEM, virtio => 1};
+ $mem->{current} = $totalsize;
+ $conf->{memory} = PVE::QemuServer::print_memory($mem);
+ PVE::QemuConfig->write_config($vmid, $conf);
+ raise_param_exc({ 'memory' => "error modify virtio memory" }) if $err;
+ }
+ return $totalsize;
+
+ } elsif ($value > $memory) {
my $numa_hostmap;
@@ -324,14 +380,15 @@ sub qemu_dimm_list {
}
sub config {
- my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd) = @_;
+ my ($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd, $devices, $bridges, $arch, $machine_type) = @_;
my $memory = get_current_memory($conf);
my $static_memory = get_static_mem($conf);
+
my $confmem = PVE::QemuServer::parse_memory($conf->{memory});
- if ($hotplug_features->{memory} || defined($confmem->{max})) {
+ if ($hotplug_features->{memory} || defined($confmem->{max}) || defined($confmem->{virtio})) {
die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa};
my $MAX_MEM = get_max_mem($conf);
die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
@@ -342,8 +399,12 @@ sub config {
}
die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory);
+
+ my $cmdstr = "size=${static_memory}";
my $slots = $confmem->{max} ? 64 : 255;
- push @$cmd, '-m', "size=${static_memory},slots=$slots,maxmem=${MAX_MEM}M";
+ $cmdstr .= ",slots=$slots" if !$confmem->{'virtio'};
+ $cmdstr .= ",maxmem=${MAX_MEM}M";
+ push @$cmd, '-m', $cmdstr;
} else {
push @$cmd, '-m', $static_memory;
@@ -412,7 +473,26 @@ sub config {
}
}
- if ($hotplug_features->{memory} || $confmem->{max}) {
+ if ($confmem->{'virtio'}) {
+ my $MAX_MEM = get_max_mem($conf);
+ my $node_maxmem = ($MAX_MEM - $static_memory) / $sockets;
+ my $node_mem = ($memory - $static_memory) / $sockets;
+ my $blocksize = get_virtiomem_block_size($conf);
+
+ for (my $i = 0; $i < $sockets; $i++) {
+
+ my $id = "virtiomem$i";
+ my $mem_object = print_mem_object($conf, "mem-$id", $node_maxmem);
+ push @$cmd, "-object" , "$mem_object,reserve=off";
+
+ my $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type);
+ my $mem_device = "virtio-mem-pci,block-size=${blocksize}M,requested-size=${node_mem}M,id=$id,memdev=mem-$id,node=$i$pciaddr";
+ $mem_device .= ",prealloc=on" if $conf->{hugepages};
+ push @$devices, "-device", $mem_device;
+ }
+
+ } elsif ($hotplug_features->{memory} || $confmem->{max}) {
+
foreach_dimm($conf, $vmid, $memory, $sockets, sub {
my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
@@ -430,12 +510,16 @@ sub config {
sub print_mem_object {
my ($conf, $id, $size) = @_;
+ my $confmem = PVE::QemuServer::parse_memory($conf->{memory});
+
if ($conf->{hugepages}) {
my $hugepages_size = hugepages_size($conf, $size);
my $path = hugepages_mount_path($hugepages_size);
- return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
+ my $object = "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on";
+ $object .= ",prealloc=yes" if !$confmem->{virtio};
+ return $object;
} else {
return "memory-backend-ram,id=$id,size=${size}M";
}
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index a18b974..0187c74 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -249,6 +249,14 @@ sub get_pci_addr_map {
'scsihw2' => { bus => 4, addr => 1 },
'scsihw3' => { bus => 4, addr => 2 },
'scsihw4' => { bus => 4, addr => 3 },
+ 'virtiomem0' => { bus => 4, addr => 4 },
+ 'virtiomem1' => { bus => 4, addr => 5 },
+ 'virtiomem2' => { bus => 4, addr => 6 },
+ 'virtiomem3' => { bus => 4, addr => 7 },
+ 'virtiomem4' => { bus => 4, addr => 8 },
+ 'virtiomem5' => { bus => 4, addr => 9 },
+ 'virtiomem6' => { bus => 4, addr => 10 },
+ 'virtiomem7' => { bus => 4, addr => 11 },
} if !defined($pci_addr_map);
return $pci_addr_map;
}
--
2.30.2
More information about the pve-devel
mailing list