[pve-devel] [PATCH v2 container 18/18] add vmconfig_hotplug_pending and vmconfig_apply_pending

Fabian Grünbichler f.gruenbichler at proxmox.com
Wed Oct 2 13:54:51 CEST 2019


On September 30, 2019 2:44 pm, Oguz Bektas wrote:
> vmconfig_hotplug_pending is responsible for checking if a key/value pair in the
> pending section can be hotplugged, if yes; perform a generic replace,
> or perform specific actions for hotplugging the special cases.
> 
> vmconfig_apply_pending is only supposed to be called when ct isn't live.
> 
> Signed-off-by: Oguz Bektas <o.bektas at proxmox.com>
> ---
>  src/PVE/LXC.pm        |  14 +--
>  src/PVE/LXC/Config.pm | 199 ++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 203 insertions(+), 10 deletions(-)
> 
> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
> index 65c41f5..f91e27d 100644
> --- a/src/PVE/LXC.pm
> +++ b/src/PVE/LXC.pm
> @@ -1632,7 +1632,7 @@ sub alloc_disk {
>  
>  our $NEW_DISK_RE = qr/^([^:\s]+):(\d+(\.\d+)?)$/;
>  sub create_disks {
> -    my ($storecfg, $vmid, $settings, $conf) = @_;
> +    my ($storecfg, $vmid, $settings, $conf, $pending) = @_;
>  
>      my $vollist = [];
>  
> @@ -1659,10 +1659,14 @@ sub create_disks {
>  		push @$vollist, $volid;
>  		$mountpoint->{volume} = $volid;
>  		$mountpoint->{size} = $size_kb * 1024;
> -		$conf->{$ms} = PVE::LXC::Config->print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
> +		if ($pending) {
> +		    $conf->{pending}->{$ms} = PVE::LXC::Config->print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
> +		} else {
> +		    $conf->{$ms} = PVE::LXC::Config->print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
> +		}
>  	    } else {
> -                # use specified/existing volid/dir/device
> -                $conf->{$ms} = PVE::LXC::Config->print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
> +		# use specified/existing volid/dir/device
> +		$conf->{$ms} = PVE::LXC::Config->print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
>  	    }
>  	});
>  
> @@ -1676,7 +1680,7 @@ sub create_disks {
>      # free allocated images on error
>      if (my $err = $@) {
>  	destroy_disks($storecfg, $vollist);
> -        die $err;
> +	die $err;
>      }
>      return $vollist;
>  }
> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
> index 14c26bc..10dfc75 100644
> --- a/src/PVE/LXC/Config.pm
> +++ b/src/PVE/LXC/Config.pm
> @@ -1177,6 +1177,194 @@ sub option_exists {
>  }
>  # END JSON config code
>  
> +my $LXC_FASTPLUG_OPTIONS= {
> +    'description' => 1,
> +    'onboot' => 1,
> +    'startup' => 1,
> +    'protection' => 1,
> +    'hostname' => 1,
> +    'hookscript' => 1,
> +    'cores' => 1,
> +    'tags' => 1,
> +};
> +
> +sub vmconfig_hotplug_pending {
> +    my ($class, $vmid, $conf, $storecfg, $selection, $errors) = @_;
> +
> +    my $pid = PVE::LXC::find_lxc_pid($vmid);
> +    my $rootdir = "/proc/$pid/root";
> +
> +    my $add_error = sub {
> +	my ($opt, $msg) = @_;
> +	$errors->{$opt} = "hotplug problem - $msg";
> +    };
> +
> +    my $changes;
> +    foreach my $opt (keys %{$conf->{pending}}) { # add/change
> +	next if $selection && !$selection->{$opt};
> +	if ($LXC_FASTPLUG_OPTIONS->{$opt}) {
> +	    $conf->{$opt} = delete $conf->{pending}->{$opt};
> +	    $changes = 1;
> +	}
> +    }
> +
> +    if ($changes) {
> +	$class->write_config($vmid, $conf);
> +    }
> +
> +    # There's no separate swap size to configure, there's memory and "total"
> +    # memory (iow. memory+swap). This means we have to change them together.
> +    my $hotplug_memory_done;
> +    my $hotplug_memory = sub {
> +	my ($wanted_memory, $wanted_swap) = @_;
> +	my $old_memory = ($conf->{memory} || $confdesc->{memory}->{default});
> +	my $old_swap = ($conf->{swap} || $confdesc->{swap}->{default});
> +
> +	$wanted_memory //= $old_memory;
> +	$wanted_swap //= $old_swap;
> +
> +	my $total = $wanted_memory + $wanted_swap;
> +	my $old_total = $old_memory + $old_swap;
> +
> +	if ($total > $old_total) {
> +	    PVE::LXC::write_cgroup_value("memory", $vmid,
> +					 "memory.memsw.limit_in_bytes",
> +					 int($total*1024*1024));
> +	    PVE::LXC::write_cgroup_value("memory", $vmid,
> +					 "memory.limit_in_bytes",
> +					 int($wanted_memory*1024*1024));
> +	} else {
> +	    PVE::LXC::write_cgroup_value("memory", $vmid,
> +					 "memory.limit_in_bytes",
> +					 int($wanted_memory*1024*1024));
> +	    PVE::LXC::write_cgroup_value("memory", $vmid,
> +					 "memory.memsw.limit_in_bytes",
> +					 int($total*1024*1024));
> +	}
> +	$hotplug_memory_done = 1;
> +    };
> +
> +    my $pending_delete_hash = $class->parse_pending_delete($conf->{pending}->{delete});
> +    # FIXME: $force deletion is not implemented for CTs
> +    while (my ($opt, undef) = each %$pending_delete_hash) {
> +	next if $selection && !$selection->{$opt};
> +	eval {
> +	    if ($LXC_FASTPLUG_OPTIONS->{$opt}) {
> +		# pass
> +	    } elsif ($opt =~ m/^unused(\d+)$/) {
> +		PVE::LXC::delete_mountpoint_volume($storecfg, $vmid, $conf->{$opt})

does not modify $conf or the config file

> +		    if !$class->is_volume_in_use($conf, $conf->{$opt}, 1, 1);
> +	    } elsif ($opt eq 'swap') {
> +		$hotplug_memory->(undef, 0);

does not modify $conf or the config file

> +	    } elsif ($opt eq 'cpulimit') {
> +		PVE::LXC::write_cgroup_value("cpu", $vmid, "cpu.cfs_period_us", -1);
> +		PVE::LXC::write_cgroup_value("cpu", $vmid, "cpu.cfs_quota_us", -1);

does not even know about $conf, does not modify config file

> +	    } elsif ($opt eq 'cpuunits') {
> +		PVE::LXC::write_cgroup_value("cpu", $vmid, "cpu.shared", $confdesc->{cpuunits}->{default});

does not even know about $conf, does not modify config file

> +	    } elsif ($opt =~ m/^net(\d)$/) {
> +		my $netid = $1;
> +		PVE::Network::veth_delete("veth${vmid}i$netid");

does not even know about $conf, does not modify config file

> +	    } else {
> +		die "skip\n"; # skip non-hotpluggable opts
> +	    }
> +	};
> +	if (my $err = $@) {
> +	    $add_error->($opt, $err) if $err ne "skip\n";
> +	} else {
> +	    delete $conf->{$opt};
> +	    $class->remove_from_pending_delete($conf, $opt);
> +	    $class->write_config($vmid, $conf);

nothing here has the potential to replace $conf with a new reference, 
but writing out makes sense since we have now hotplugged some stuff. 
OTOH, this whole code path should not die anyway ;)

> +	}
> +    }
> +
> +    foreach my $opt (keys %{$conf->{pending}}) {
> +	next if $opt eq 'delete'; # just to be sure
> +	next if $selection && !$selection->{$opt};
> +	my $value = $conf->{pending}->{$opt};
> +	eval {
> +	    if ($opt eq 'cpulimit') {
> +		PVE::LXC::write_cgroup_value("cpu", $vmid, "cpu.cfs_period_us", 100000);
> +		PVE::LXC::write_cgroup_value("cpu", $vmid, "cpu.cfs_quota_us", int(100000*$value));

does not even know about $conf, does not modify config file

> +	    } elsif ($opt eq 'cpuunits') {
> +		PVE::LXC::write_cgroup_value("cpu", $vmid, "cpu.shares", $value);

does not even know about $conf, does not modify config file

> +	    } elsif ($opt =~ m/^net(\d+)$/) {
> +		my $netid = $1;
> +		my $net = $class->parse_lxc_network($value);
> +		PVE::LXC::update_net($vmid, $conf, $opt, $net, $netid, $rootdir);

does modify $conf, but always immediately followed by write_config

> +	    } elsif ($opt eq 'memory' || $opt eq 'swap') {
> +		if (!$hotplug_memory_done) { # don't call twice if both opts are passed
> +		    $hotplug_memory->($conf->{pending}->{memory}, $conf->{pending}->{swap});

does not modify $conf, does not modify config file

> +		}
> +	    } else {
> +		die "skip\n"; # skip non-hotpluggable
> +	    }
> +	};
> +	if (my $err = $@) {
> +	    $add_error->($opt, $err) if $err ne "skip\n";
> +	    $conf = $class->load_config($vmid);

no need for this given the above - $conf and the config file must still 
represent the same state, unless I missed something?

> +	} else {
> +	    $conf->{$opt} = $value;
> +	    delete $conf->{pending}->{$opt};
> +	    $class->write_config($vmid, $conf);

no need to persist here?

> +	}
> +    }

a final write_config here might be an option, or at the call site in 
update_pct_config, or even just in the API path..

> +}
> +
> +sub vmconfig_apply_pending {

high level: compared to hotplug_pending, this has two major semantic 
differences:
- it always applies all pending changes (fine for start/reboot, but 
  maybe not for update_pct_config?)
- it does not attempt to apply, record error, keep pending change - once 
  it hits an option it can't apply, it's game-over..

the latter is quite bad, since this is in the vm_start codepath, and a 
problematic option in pending can block the start forever, with a 
non-privileged user not being able to remove it.. sorry for not noticing 
this with v1 already. my suggestion would be to wrap eval {} around the 
stuff that can fail, and use a similar add_error mechanism like in 
hotplug.

> +    my ($class, $vmid, $conf, $storecfg) = @_;
> +
> +    my $rescan_volume = sub {
> +	my ($mp) = @_;
> +	eval {
> +	    $mp->{size} = PVE::Storage::volume_size_info($storecfg, $mp->{volume}, 5)
> +		if !defined($mp->{size});
> +	};
> +	warn "Could not rescan volume size - $@\n" if $@;
> +    };
> +
> +    my $pending_delete_hash = $class->parse_pending_delete($conf->{pending}->{delete});
> +    # FIXME: $force deletion is not implemented for CTs
> +    while (my ($opt, undef) = each %$pending_delete_hash) {
> +	$class->cleanup_pending($conf);
> +	if ($opt =~ m/^mp(\d+)$/) {
> +	    my $mp = $class->parse_ct_mountpoint($conf->{$opt});
> +	    if ($mp->{type} eq 'volume') {
> +		$class->add_unused_volume($conf, $mp->{volume}); 

missing if !$class->is_volume_in_use() ?

> +	    }
> +	} elsif ($opt =~ m/^unused(\d+)$/) {
> +	    PVE::LXC::delete_mountpoint_volume($storecfg, $vmid, $conf->{$opt})
> +		if !$class->is_volume_in_use($conf, $conf->{$opt}, 1, 1);
> +	}
> +	delete $conf->{$opt};
> +	$class->remove_from_pending_delete($conf, $opt);
> +    }
> +
> +    foreach my $opt (keys %{$conf->{pending}}) { # add/change
> +	if ($opt =~ m/^mp(\d+)$/) {
> +	    my $mp = $class->parse_ct_mountpoint($conf->{pending}->{$opt});
> +	    my $old = $conf->{$opt};
> +	    if ($mp->{type} eq 'volume') {
> +		if ($mp->{volume} =~ $PVE::LXC::NEW_DISK_RE) {
> +		    PVE::LXC::create_disks($storecfg, $vmid, { $opt => $conf->{pending}->{$opt} }, $conf, 1);
> +		} else {
> +		    $rescan_volume->($mp);
> +		    $conf->{pending}->{$opt} = $class->print_ct_mountpoint($mp);
> +		}
> +	    }
> +	    if (defined($old)) {
> +		my $mp = $class->parse_ct_mountpoint($old);
> +		if ($mp->{type} eq 'volume') {
> +		    $class->add_unused_volume($conf, $mp->{volume});

missing if !$class->is_volume_in_use() ?

> +		}
> +	    }
> +	}
> +	$class->cleanup_pending($conf);
> +	$conf->{$opt} = delete $conf->{pending}->{$opt};
> +    }
> +
> +    $class->write_config($vmid, $conf);

we can only delay this until here iff the above code cannot die at any 
point, otherwise we will have partially applied stuff that is not 
persisted in the config.. e.g., create_disks and 
delete_mountpoint_volume have side-effects, so if we call them we need 
to persist the changes afterwards - maybe even ASAP? 

delete_mountpoint_volume might not matter much, since having an unused, 
not-existing volume should not matter (it's not possible to re-attach 
it, but deletion should work fine), but disk creation might fail for the 
second, third, ... pending disk, leaving orphan disks around which 
should be avoided if possible..

> +}
> +
>  sub classify_mountpoint {
>      my ($class, $vol) = @_;
>      if ($vol =~ m!^/!) {
> @@ -1186,7 +1374,7 @@ sub classify_mountpoint {
>      return 'volume';
>  }
>  

everything below could be its own patch that comes before this one.. are 
there other callers that might want to handle pending volumes specially 
as well?

> -my $is_volume_in_use = sub {
> +my $__is_volume_in_use = sub {
>      my ($class, $config, $volid) = @_;
>      my $used = 0;
>  
> @@ -1204,17 +1392,18 @@ sub is_volume_in_use_by_snapshots {
>  
>      if (my $snapshots = $config->{snapshots}) {
>  	foreach my $snap (keys %$snapshots) {
> -	    return 1 if $is_volume_in_use->($class, $snapshots->{$snap}, $volid);
> +	    return 1 if $__is_volume_in_use->($class, $snapshots->{$snap}, $volid);
>  	}
>      }
>  
>      return 0;
> -};
> +}
>  
>  sub is_volume_in_use {
> -    my ($class, $config, $volid, $include_snapshots) = @_;
> -    return 1 if $is_volume_in_use->($class, $config, $volid);
> +    my ($class, $config, $volid, $include_snapshots, $include_pending) = @_;
> +    return 1 if $__is_volume_in_use->($class, $config, $volid);
>      return 1 if $include_snapshots && $class->is_volume_in_use_by_snapshots($config, $volid);
> +    return 1 if $include_pending && $__is_volume_in_use->($class, $config->{pending}, $volid);
>      return 0;
>  }
>  
> -- 
> 2.20.1
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel at pve.proxmox.com
> https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
> 
> 




More information about the pve-devel mailing list