[pve-devel] [PATCH pve-manager] POC: migrate_all: add auto best target selection

Thomas Lamprecht t.lamprecht at proxmox.com
Mon Oct 7 08:32:29 CEST 2019


On 10/6/19 6:14 AM, Alexandre Derumier wrote:
> This is a POC, trying to implement basic loadbalancing
> with best fist algorithm using dotproduct heuristic
> 
> some docs about dotproduct heuristic:
> https://www.thinkmind.org/download.php?articleid=icn_2014_11_10_30065
> https://hal.archives-ouvertes.fr/hal-00868016v2/document
> 
> The main idea is to ordering nodes with a weight, compute with
> multidimentional vector from node + host (currently cpu,mem. But we could add network usage)
> 
> I have implemented in in migrate_all for the test, but It could
> be used too in HA manager, to select best node.
> (priority and groups can be added too easily)

Great, no java ;P
On a serious note, this look much more like something we can use.

IIUC, for now this works like a "re-distribute my VMs/CTs on the
remaining nodes"

What happens I I trigger this call on a few or all nodes at the same time?
Does it moves everything a bit around (as it has no info about incoming
migrations) or does it read equilibrium after a few runs on all nodes?

As a test totally fine here, but placing it in a central services with a
cluster-wide view like the HA CRM seems really like a nicer fit as it then
can know easily which nodes await incoming migrations and thus their load
needs to be adjusted as else and totally empty node will get overloaded as
all other want to move to it..

> 
> and maybe implement a loadbalancer feature. (Maybe something
> simple like migrate vm when memory/cpu are bigger than an defined threshold)

I'm currently on doing the maintenance mode, at least for HA, ideally for
non-HA too. I'd like to help/take a look at this afterwards.

> 
> Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
> ---
>  PVE/API2/Nodes.pm | 66 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 59 insertions(+), 7 deletions(-)
> 
> diff --git a/PVE/API2/Nodes.pm b/PVE/API2/Nodes.pm
> index 9e731e05..303a7ffb 100644
> --- a/PVE/API2/Nodes.pm
> +++ b/PVE/API2/Nodes.pm
> @@ -1,5 +1,4 @@
>  package PVE::API2::Nodes::Nodeinfo;
> -
>  use strict;
>  use warnings;
>  use POSIX qw(LONG_MAX);
> @@ -1921,7 +1920,11 @@ __PACKAGE__->register_method ({
>  	additionalProperties => 0,
>  	properties => {
>  	    node => get_standard_option('pve-node'),
> -            target => get_standard_option('pve-node', { description => "Target node." }),
> +	    target => {
> +		description => "Target node.",
> +		type => 'string',  format => 'pve-node',
> +		optional => 1,
> +	    },
>              maxworkers => {
>                  description => "Maximal number of parallel migration job." .
>  		    " If not set use 'max_workers' from datacenter.cfg," .
> @@ -1950,27 +1953,68 @@ __PACKAGE__->register_method ({
>  	$nodename = PVE::INotify::nodename() if $nodename eq 'localhost';
>  
>  	my $target = $param->{target};
> -	raise_param_exc({ target => "target is local node."}) if $target eq $nodename;
> +	raise_param_exc({ target => "target is local node."}) if $target && $target eq $nodename;
>  
>  	PVE::Cluster::check_cfs_quorum();
>  
> -	PVE::Cluster::check_node_exists($target);
> +	PVE::Cluster::check_node_exists($target) if $target;
>  
>  	my $datacenterconfig = cfs_read_file('datacenter.cfg');
>  	# prefer parameter over datacenter cfg settings
> -	my $maxWorkers = $param->{maxworkers} || $datacenterconfig->{max_workers} ||
> -	    die "either 'maxworkers' parameter or max_workers in datacenter.cfg must be set!\n";
> +	my $maxWorkers = 1;
> +	$maxWorkers = $param->{maxworkers} || $datacenterconfig->{max_workers} ||
> +	    die "either 'maxworkers' parameter or max_workers in datacenter.cfg must be set!\n" if $target;
>  
>  	my $code = sub {
>  	    $rpcenv->{type} = 'priv'; # to start tasks in background
>  
>  	    my $vmlist = &$get_filtered_vmlist($nodename, $param->{vms}, 1, 1);
> -
>  	    my $workers = {};
>  	    foreach my $vmid (sort keys %$vmlist) {
>  		my $d = $vmlist->{$vmid};
> +
> +		if(!$target) {
> +		
> +		    my $members = PVE::Cluster::get_members();
> +		    my $rrd = PVE::Cluster::rrd_dump();
> +		    my $nodelist = PVE::Cluster::get_nodelist();
> +
> +		    my $vm_stats = PVE::API2Tools::extract_vm_stats($vmid, $d, $rrd);
> +		    my $vm_cpu = $vm_stats->{cpu} * $vm_stats->{maxcpu};
> +		    my $vm_mem = $vm_stats->{mem};
> +		    my @vec_vm = ($vm_cpu, $vm_mem);  #? add network usage dimension ?
> +
> +		    my $nodes_weight = {};
> +		    my $highest_weight = 0;
> +		    foreach my $node (@$nodelist) {
> +			next if $node eq $nodename;
> +
> +			my $node_stats = PVE::API2Tools::extract_node_stats($node, $members, $rrd);
> +			my $node_freemem = $node_stats->{maxmem} - $node_stats->{mem};
> +			my $node_freecpu = (100 - $node_stats->{cpu}) * $node_stats->{maxcpu};  #how to handle different cpu model power ? bogomips ?
> +			next if $node_stats->{status} ne 'online';
> +			next if $node_freecpu < $vm_cpu;
> +			next if $node_freemem < $vm_mem;
> +			next if $node_stats->{maxcpu} < $vm_stats->{maxcpu};
> +			# fixme: check storage available
> +			# fixme: check vmbr available
> +
> +			my @vec_node = ($node_freecpu, $node_freemem); #? add network usage dimension ?
> +			my $weight = dotprod(\@vec_vm,\@vec_node);
> +			$nodes_weight->{$weight} = $node;
> +			$highest_weight = $weight if $weight > $highest_weight;
> +		    }	
> +		    $target = $nodes_weight->{$highest_weight};	
> +		    if(!$target) {
> +			warn "couldn't find a target for vmid $vmid\n";
> +			next;
> +		    }
> +		    print "vm:$vmid best target:$target\n";
> +		}
> +
>  		my $pid;
>  		eval { $pid = &$create_migrate_worker($nodename, $d->{type}, $vmid, $target); };
> +		$target = $param->{target};
>  		warn $@ if $@;
>  		next if !$pid;
>  
> @@ -2086,6 +2130,14 @@ sub complete_templet_repo {
>      return $res;
>  }
>  
> +sub dotprod {
> +    my ($vec_a, $vec_b) = @_;
> +    die "they must have the same size\n" unless @$vec_a == @$vec_b;

I know this is 1:1 from Rosetta code but changing the error to something
a bit less general would be nice

"dot product: vectors need to have the same size ". @$vec_a ." != ". @$vec_b ."\n"
   if @$vec_a != @$vec_b;

> +    my $sum = 0;
> +    $sum += $vec_a->[$_] * $vec_b->[$_] for 0..$#$vec_a;
> +    return $sum;
> +}
> +
>  package PVE::API2::Nodes;
>  
>  use strict;
> 





More information about the pve-devel mailing list