[pve-devel] [PATCH v4 qemu-server 14/16] memory: add virtio-mem support
Fiona Ebner
f.ebner at proxmox.com
Wed Feb 22 16:19:49 CET 2023
Am 13.02.23 um 13:00 schrieb Alexandre Derumier:
> diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
> index 1b1c99d..bf4e92a 100644
> --- a/PVE/QemuServer/Memory.pm
> +++ b/PVE/QemuServer/Memory.pm
> @@ -3,6 +3,8 @@ package PVE::QemuServer::Memory;
> use strict;
> use warnings;
>
> +use POSIX qw(ceil);
> +
> use PVE::JSONSchema;
> use PVE::Tools qw(run_command lock_file lock_file_full file_read_firstline dir_glob_foreach);
> use PVE::Exception qw(raise raise_param_exc);
> @@ -16,6 +18,7 @@ our @EXPORT_OK = qw(
> get_current_memory
> parse_memory
> get_host_max_mem
> +get_virtiomem_block_size
> );
>
> my $MAX_NUMA = 8;
> @@ -37,6 +40,12 @@ our $memory_fmt = {
> maximum => 4194304,
> format => 'pve-qm-memory-max',
> },
> + virtio => {
> + description => "Enable virtio-mem memory (Experimental: Only works with Linux guest with kernel >= 5.10)",
Nit: How about "Use virtio-mem devices for hotplug (Experimental: ...)",
then people immediately know it's for hotplug.
> + type => 'boolean',
> + optional => 1,
> + default => 0,
> + },
> };
>
> PVE::JSONSchema::register_format('pve-qm-memory-max', \&verify_qm_memory_max);
> @@ -72,7 +81,9 @@ my sub get_static_mem {
> my $static_memory = 0;
> my $memory = parse_memory($conf->{memory});
>
> - if ($memory->{max}) {
> + if ($memory->{virtio}) {
> + $static_memory = 4096;
> + } elsif ($memory->{max}) {
> my $dimm_size = $memory->{max} / $MAX_SLOTS;
> #static mem can't be lower than 4G and lower than 1 dimmsize by socket
> $static_memory = $dimm_size * $sockets;
> @@ -161,6 +172,117 @@ sub get_current_memory {
> return $memory->{current};
> }
>
> +sub get_virtiomem_block_size {
> + my ($conf) = @_;
> +
> + my $sockets = $conf->{sockets} || 1;
> + my $MAX_MEM = get_max_mem($conf);
> + my $static_memory = get_static_mem($conf, $sockets);
Nit: Not making a difference with the current implemenetation, but this
should pass 1 for hotplug (we only use the virtio-mem devices for hotplug).
> + my $memory = get_current_memory($conf->{memory});
> +
> + #virtiomem can map 32000 block size.
> + #try to use lowest blocksize, lower = more chance to unplug memory.
> + my $blocksize = ($MAX_MEM - $static_memory) / 32000;
> + #2MB is the minimum to be aligned with THP
> + $blocksize = 2 if $blocksize < 2;
> + $blocksize = 2**(ceil(log($blocksize)/log(2)));
> + #Linux guest kernel only support 4MiB block currently (kernel <= 6.2)
> + $blocksize = 4 if $blocksize < 4;
> +
> + return $blocksize;
> +}
> +
> +my sub get_virtiomem_total_current_size {
> + my ($mems) = @_;
> + my $size = 0;
> + for my $mem (values %$mems) {
> + $size += $mem->{current};
> + }
> + return $size;
> +}
> +
> +my sub balance_virtiomem {
> + my ($vmid, $virtiomems, $blocksize, $target_total) = @_;
> +
> + my $nb_virtiomem = scalar(keys %$virtiomems);
> +
> + print"try to balance memory on $nb_virtiomem virtiomems\n";
> +
> + #if we can't share exactly the same amount, we add the remainder on last node
> + my $target_aligned = int( $target_total / $nb_virtiomem / $blocksize) * $blocksize;
> + my $target_remaining = $target_total - ($target_aligned * ($nb_virtiomem-1));
> +
> + my $i = 0;
> + foreach my $id (sort keys %$virtiomems) {
> + my $virtiomem = $virtiomems->{$id};
> + $i++;
> + my $virtiomem_target = $i == $nb_virtiomem ? $target_remaining : $target_aligned;
> + $virtiomem->{completed} = 0;
> + $virtiomem->{retry} = 0;
> + $virtiomem->{target} = $virtiomem_target;
> +
> + print "virtiomem$id: set-requested-size : $virtiomem_target\n";
> + mon_cmd($vmid, 'qom-set',
> + path => "/machine/peripheral/virtiomem$id",
> + property => "requested-size",
> + value => $virtiomem_target * 1024 * 1024);
Style nit: trailing spaces and should really put each argument on its
own line, with mon_cmd( and the final ) on their own line too.
> + }
> +
> + my $total_finished = 0;
> + my $error = undef;
> +
> + while ($total_finished != $nb_virtiomem) {
> +
> + sleep 1;
> +
> + $total_finished = 0;
> +
> + foreach my $id (keys %$virtiomems) {
> +
> + my $virtiomem = $virtiomems->{$id};
> +
> + if ($virtiomem->{error} || $virtiomem->{completed}) {
> + $total_finished++;
> + next;
> + }
> +
> + my $size = mon_cmd($vmid, 'qom-get', path => "/machine/peripheral/virtiomem$id", property => "size");
> + $virtiomem->{current} = $size / 1024 / 1024;
> + print"virtiomem$id: last: $virtiomem->{last} current: $virtiomem->{current} target: $virtiomem->{target}\n";
[0] marker so I can reference this message below :)
> +
> + if($virtiomem->{current} == $virtiomem->{target}) {
> + print"virtiomem$id: completed\n";
> + $virtiomem->{completed} = 1;
> + next;
> + }
> +
> + if($virtiomem->{current} != $virtiomem->{last}) {
> + #if value has changed, but not yet completed
> + print "virtiomem$id: changed but don't not reach target yet\n";
"don't not" is wrong. But do we really need this print? I feel like the
above[0] is already enough. It already contains the information about
last and current.
> + $virtiomem->{retry} = 0;
> + $virtiomem->{last} = $virtiomem->{current};
> + next;
> + }
> +
> + if($virtiomem->{retry} >= 5) {
> + print "virtiomem$id: too many retry. set error\n";
s/retry/retries/
But I'd also change the message to be a bit more telling to users, "set
error" could mean anything. Maybe something like: "virtiomem$id: target
memory still not reached, ignoring device from now on"?
> + $virtiomem->{error} = 1;
> + $error = 1;
> + #as change is async, we don't want that value change after the api call
> + eval {
> + mon_cmd($vmid, 'qom-set',
> + path => "/machine/peripheral/virtiomem$id",
> + property => "requested-size",
> + value => $virtiomem->{current} * 1024 *1024);
> + };
> + }
> + print"virtiomem$id: increase retry: $virtiomem->{retry}\n";
Maybe add output the retry counter in the message [0] to avoid output bloat?
> + $virtiomem->{retry}++;
> + }
> + }
> + die "No more available blocks in virtiomem to balance all requested memory\n" if $error;
> +}
> +
> sub get_numa_node_list {
> my ($conf) = @_;
> my @numa_map;
> @@ -247,7 +369,39 @@ sub qemu_memory_hotplug {
> my $MAX_MEM = get_max_mem($conf);
> die "you cannot add more memory than max mem $MAX_MEM MB!\n" if $value > $MAX_MEM;
>
> - if ($value > $memory) {
> + my $confmem = parse_memory($conf->{memory});
This is already $oldmem, no need for this second variable.
More information about the pve-devel
mailing list