[pve-devel] [PATCH V3 pve-ha-manager 1/2] add ressource awareness manager

Tue Dec 21 16:13:30 CET 2021

his new implementation use best-fit heuristic vector packing with constraints support.

- We compute nodes memory/cpu, and vm memory/cpu average stats  on last 20min

For each ressource :
- First, we ordering pending recovery state services by memory, then cpu usage.
  Memory is more important here, because vm can't start if target node don't have enough memory

- Then, we check possible target nodes contraints. (storage available, node have enough cpu/ram, node have enough cores,...)
  (could be extended with other constraint like vm affinity/anti-affinity, cpu compatibilty, ...)

- We classify nodes with low/medium/high  cpu/mem thresholds

- Then we compute a node weight with euclidean distance of both cpu/ram vectors between vm usage and node available ressources.

- we ordering nodelist by - group prio,
			  - soft constraint prio (antifinity),
                          - threshold prio (try to recover to low threshold group nodes first, the medium, then high),
                          - distance weight: if vm use 1go ram/1% cpu, node1 have 2go ram/2% cpu , and node2 have 4go ram/4% cpu,  node1 will be choose because it's the nearest of vm usage)
                          - number of HA vm/CT
  and choose the first node of the list.

- We add recovered vm cpu/ram to target node stats. (This is only an best effort estimation, as the vm start is async on target lrm, and could failed,...)

I have keeped HA group node prio, and other other ordering,
so this don't break current tests, and we can add easily a option at datacenter to enable/disable

It could be easy to implement later some kind of vm auto migration when a node use too much cpu/ram,
reusing same node selection algorithm

I have added a basic test, I'll add more tests later if this patch serie is ok for you.

Some good litterature about heuristics:

microsoft hyper-v implementation:
 - http://kunaltalwar.org/papers/VBPacking.pdf
 - https://www.microsoft.com/en-us/research/wp-content/uploads/2011/01/virtualization.pdf
Variable size vector bin packing heuristics:
 - https://hal.archives-ouvertes.fr/hal-00868016v2/document

Algorithm comparaison (first-fit, worst-fit, best-fit,..):
http://www.diva-portal.org/smash/get/diva2:1261137/FULLTEXT02.pdf
---
 src/PVE/HA/Env.pm          |  33 +++++
 src/PVE/HA/Env/PVE2.pm     | 177 ++++++++++++++++++++++++
 src/PVE/HA/Manager.pm      | 274 +++++++++++++++++++++++++++++++++++--
 src/PVE/HA/Sim/Hardware.pm |  61 +++++++++
 src/PVE/HA/Sim/TestEnv.pm  |  50 ++++++-
 5 files changed, 586 insertions(+), 9 deletions(-)

diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm
index ac569a9..d957aa9 100644
--- a/src/PVE/HA/Env.pm
+++ b/src/PVE/HA/Env.pm
@@ -269,4 +269,37 @@ sub get_ha_settings {
     return $self->{plug}->get_ha_settings();
 }
 
+sub get_node_rrd_stats {
+    my ($self, $node) = @_;
+
+    return $self->{plug}->get_node_rrd_stats($node);
+}
+
+sub get_vm_rrd_stats {
+    my ($self, $vmid, $percentile) = @_;
+
+    return $self->{plug}->get_vm_rrd_stats($vmid, $percentile);
+}
+
+sub read_vm_ct_config {
+    my ($self, $vmid, $type) = @_;
+
+    if ($type eq 'vm') {
+	return $self->{plug}->read_vm_config($vmid);
+    } elsif ($type eq 'ct') {
+	return $self->{plug}->read_ct_config($vmid);
+    }
+}
+
+sub read_storecfg {
+    my ($self) = @_;
+
+    return $self->{plug}->read_storecfg();
+}
+
+sub check_storage_availability {
+    my ($self, $vmconf, $type, $node, $storecfg) = @_;
+
+    return $self->{plug}->check_storage_availability($vmconf, $type, $node, $storecfg);
+}
 1;
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 5e0a683..1b1b4b0 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -12,6 +12,12 @@ use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file
 use PVE::DataCenterConfig;
 use PVE::INotify;
 use PVE::RPCEnvironment;
+use PVE::API2Tools;
+use PVE::QemuConfig;
+use PVE::QemuServer;
+use PVE::LXC::Config;
+use PVE::Storage;
+use RRDs;
 
 use PVE::HA::Tools ':exit_codes';
 use PVE::HA::Env;
@@ -459,4 +465,175 @@ sub get_ha_settings {
     return $datacenterconfig->{ha};
 }
 
+sub get_node_rrd_stats {
+    my ($self, $node) = @_;
+
+    my $rrd = PVE::Cluster::rrd_dump();
+    my $members = PVE::Cluster::get_members();
+
+    my $stats = PVE::API2Tools::extract_node_stats($node, $members, $rrd);
+
+    return $stats;
+}
+
+sub get_vm_rrd_stats {
+    my ($self, $vmid, $percentile) = @_;
+
+    my $rrdname = "pve2-vm/$vmid";
+    my $rrddir = "/var/lib/rrdcached/db";
+
+    my $rrd = "$rrddir/$rrdname";
+
+    my $cf = "AVERAGE";
+
+    my $reso = 60;
+    my $ctime  = $reso*int(time()/$reso);
+
+    #last 20minutes
+    my $req_start = $ctime - $reso*20;
+    my $req_end = $ctime - $reso*1;
+
+    my @args = (
+        "-s" => $req_start,
+        "-e" => $req_end,
+        "-r" => $reso,
+        );
+
+    my $socket = "/var/run/rrdcached.sock";
+    push @args, "--daemon" => "unix:$socket" if -S $socket;
+
+    my ($start, $step, $names, $data) = RRDs::fetch($rrd, $cf, @args);
+
+    my @cpu = ();
+    my @mem = ();
+    my @maxmem = ();
+    my @maxcpu = ();
+
+    foreach my $rec (@$data) {
+        my $maxcpu = @$rec[0] || 0;
+        my $cpu = @$rec[1] || 0;
+        my $maxmem = @$rec[2] || 0;
+        my $mem = @$rec[3] || 0;
+        #skip zeros values if vm is down
+        push @cpu, $cpu*$maxcpu if $cpu > 0;
+        push @mem, $mem if $mem > 0;
+        push @maxcpu, $maxcpu if $maxcpu > 0;
+        push @maxmem, $maxmem if $maxmem > 0;
+    }
+
+    my $stats = {};
+
+    $stats->{cpu} = percentile($percentile, \@cpu) || 0;
+    $stats->{mem} = percentile($percentile, \@mem) || 0;
+    $stats->{maxmem} = percentile($percentile, \@maxmem) || 0;
+    $stats->{maxcpu} = percentile($percentile, \@maxcpu) || 0;
+    $stats->{totalcpu} = $stats->{cpu} * $stats->{maxcpu} * 100;
+
+    return $stats;
+}
+
+sub percentile {
+    my ($p, $aref) = @_;
+    my $percentile = int($p * $#{$aref}/100);
+    return (sort @$aref)[$percentile];
+}
+
+sub read_vm_config {
+    my ($self, $vmid) = @_;
+
+    my $conf = undef;
+    my $finalconf = {};
+
+    my $vmlist = PVE::Cluster::get_vmlist();
+    my $node = $vmlist->{ids}->{$vmid}->{node};
+
+    eval { $conf = PVE::QemuConfig->load_config($vmid, $node)};
+    return if !$conf;
+
+    if ( PVE::QemuServer::windows_version($conf->{ostype}) ) {
+	$finalconf->{ostype} = 'windows';
+    } else {
+	$finalconf->{ostype} = $conf->{ostype};
+    }
+
+    PVE::QemuConfig->foreach_volume($conf, sub {
+	my ($ds, $drive) = @_;
+
+	$finalconf->{$ds} = $conf->{$ds};
+    });
+
+    return $finalconf;
+}
+
+sub read_ct_config {
+    my ($self, $vmid) = @_;
+
+    my $conf = undef;
+    my $finalconf = {};
+
+    my $vmlist = PVE::Cluster::get_vmlist();
+    my $node = $vmlist->{ids}->{$vmid}->{node};
+
+    eval { $conf = PVE::LXC::Config->load_config($vmid, $node)};
+    return if !$conf;
+
+    PVE::LXC::Config->foreach_volume($conf, sub {
+        my ($ms, $mountpoint) = @_;
+        $finalconf->{$ms} = $conf->{$ms};
+    });
+
+    return $finalconf;
+}
+
+sub read_storecfg {
+    my ($self) = @_;
+
+    return PVE::Storage::config();
+}
+
+sub check_storage_availability {
+    my ($self, $vmconf, $type, $node, $storecfg) = @_;
+
+    if ($type eq 'vm') {
+	eval { PVE::QemuServer::check_storage_availability($storecfg, $vmconf, $node) };
+	return if $@;
+    } elsif ($type eq 'ct') {
+	eval { check_lxc_storage_availability($storecfg, $vmconf, $node) };
+	return if $@;
+    }
+    return 1;
+}
+
+
+
+##copy/paste from PVE::LXC::Migrate. add ad PVE::LXC::check_storage_availability like qemuserver
+sub check_lxc_storage_availability {
+    my ($storecfg, $conf, $node) = @_;
+
+    PVE::LXC::Config->foreach_volume_full($conf, { include_unused => 1 }, sub {
+	my ($ms, $mountpoint) = @_;
+
+	my $volid = $mountpoint->{volume};
+	my $type = $mountpoint->{type};
+
+	# skip dev/bind mps when shared
+	if ($type ne 'volume') {
+	    if ($mountpoint->{shared}) {
+		return;
+	    } else {
+		die "cannot migrate local $type mount point '$ms'\n";
+	    }
+	}
+
+	my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1) if $volid;
+	die "can't determine assigned storage for mount point '$ms'\n" if !$storage;
+
+	# check if storage is available on both nodes
+	my $scfg = PVE::Storage::storage_check_enabled($storecfg, $storage);
+	PVE::Storage::storage_check_enabled($storecfg, $storage, $node);
+
+	die "content type 'rootdir' is not available on storage '$storage'\n"
+	    if !$scfg->{content}->{rootdir};
+    });
+}
 1;
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 1c66b43..80624f4 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -1,3 +1,4 @@
+
 package PVE::HA::Manager;
 
 use strict;
@@ -394,8 +395,20 @@ sub manage {
 	my $repeat = 0;
 
 	$self->recompute_online_node_usage();
-
-	foreach my $sid (sort keys %$ss) {
+	$self->recompute_online_node_stats();
+
+	$self->get_service_stats($ss);
+	$self->{storecfg} = $haenv->read_storecfg();
+
+	foreach my $sid (
+		#ordering vm by size, bigger mem first then bigger cpu
+		#could be improved with bubblesearch heuristic
+		#https://www.cs.tufts.edu/~nr/cs257/archive/michael-mitzenmacher/bubblesearch.pdf
+		sort { 
+			$ss->{$a}->{stats}->{memg} <=> $ss->{$b}->{stats}->{memg} || 
+			$ss->{$a}->{stats}->{totalcpuround} <=> $ss->{$b}->{stats}->{totalcpuround} || 
+			$ss->{$a}->{type} cmp $ss->{$b}->{type}}
+		keys %$ss) {
 	    my $sd = $ss->{$sid};
 	    my $cd = $sc->{$sid} || { state => 'disabled' };
 
@@ -802,12 +815,7 @@ sub next_state_recovery {
 
     $self->recompute_online_node_usage(); # we want the most current node state
 
-    my $recovery_node = select_service_node(
-	$self->{groups},
-	$self->{online_node_usage},
-	$cd,
-	$sd->{node},
-    );
+    my $recovery_node = $self->find_bestfit_node_target($cd , $sd);
 
     if ($recovery_node) {
 	my $msg = "recover service '$sid' from fenced node '$fenced_node' to node '$recovery_node'";
@@ -822,6 +830,14 @@ sub next_state_recovery {
 	$haenv->steal_service($sid, $sd->{node}, $recovery_node);
 	$self->{online_node_usage}->{$recovery_node}++;
 
+	#add vm cpu/mem to current node stats (this is an estimation based on last 20min vm stats)
+	my $node_stats = $self->{online_node_stats}->{$recovery_node}->{stats};
+	$node_stats->{totalcpu} += $sd->{stats}->{totalcpu};
+	$node_stats->{mem} += $sd->{stats}->{mem};
+	$node_stats->{totalfreecpu} = (100 * $node_stats->{maxcpu}) - $node_stats->{totalcpu};
+	$node_stats->{freemem} =  $node_stats->{maxmem} - $node_stats->{mem};
+
+
 	# NOTE: $sd *is normally read-only*, fencing is the exception
 	$cd->{node} = $sd->{node} = $recovery_node;
 	my $new_state = ($cd->{state} eq 'started') ? 'started' : 'request_stop';
@@ -839,4 +855,246 @@ sub next_state_recovery {
     }
 }
 
+sub find_bestfit_node_target {
+    my($self, $cd, $sd) = @_;
+
+    my $online_nodes = $self->{online_node_stats};
+    my $groups = $self->{groups};
+    my $hagroup = get_service_group($groups, $online_nodes, $cd);
+    my ($pri_groups, $group_members_prio) = get_node_priority_groups($hagroup, $online_nodes);
+
+    my $target_nodes = {};
+    foreach my $node (keys %$online_nodes) {
+
+        #### FILTERING NODES WITH HARD CONSTRAINTS (vm can't be started)
+	next if !$self->check_hard_constraints($cd, $sd, $node, $group_members_prio);
+
+	#### compute differents prio
+	$target_nodes->{$node} = $self->add_node_prio($sd, $node, 'distance', $group_members_prio);
+    }
+
+    #order by soft_constraint_prio, hagroup prio, weight (Best fit algorithm, lower distance first), number of services, and nodename
+    my @target_array = sort { 
+				$target_nodes->{$b}->{prio} <=> $target_nodes->{$a}->{prio} || 
+				$target_nodes->{$a}->{soft_constraint_prio} <=> $target_nodes->{$b}->{soft_constraint_prio} || 
+				$target_nodes->{$b}->{threshold_prio} <=> $target_nodes->{$a}->{threshold_prio} || 
+				$target_nodes->{$a}->{weight} <=> $target_nodes->{$b}->{weight} || 
+				$target_nodes->{$a}->{online_node_usage} <=> $target_nodes->{$b}->{online_node_usage} || 
+				$target_nodes->{$a}->{name} cmp $target_nodes->{$b}->{name}
+			} keys %$target_nodes;
+
+    my $target = $target_array[0];
+
+    return $target;
+}
+
+
+sub check_hard_constraints {
+    my ($self, $cd, $sd, $node, $group_members_prio) = @_;
+
+    my $haenv = $self->{haenv};
+    my $vm_stats = $sd->{stats};
+    my $node_stats = $self->{online_node_stats}->{$node}->{stats};
+
+    #node need to have a prio(restricted group)
+    return if !defined($group_members_prio->{$node});
+
+    #vm can't start if host have less core
+    return if $node_stats->{maxcpu} < $vm_stats->{maxcpu};
+    #vm can't start if node don't have enough mem to handle vm max mem
+    return if $node_stats->{freemem} < $vm_stats->{maxmem};
+
+
+    #check if target node have enough mem ressources under threshold
+    my $mem_critical_threshold = 95;
+    my $target_mem_percent = $node_stats->{maxmem} > 0 ? (($node_stats->{mem} + $vm_stats->{mem}) * 100 / $node_stats->{maxmem}) : 0;
+    return if $target_mem_percent  > $mem_critical_threshold;
+
+    #check if target node have enough cpu ressources under threshold
+    my $cpu_critical_threshold = 95;
+    my $target_cpu_percent = $node_stats->{totalcpu} + $vm_stats->{totalcpu};
+    return if $target_cpu_percent > $cpu_critical_threshold;
+
+    return if !$haenv->check_storage_availability($sd->{vmconf}, $sd->{type}, $node, $self->{storecfg});
+
+    # fixme: check bridge availability
+    # fixme: vm: add a check for cpumodel compatibility ?
+    return 1;
+}
+
+sub compute_soft_constraints {
+
+    my $prio = 0;
+    #fixme : add antiaffinity
+
+    return $prio;
+}
+
+sub compute_node_threshold_prio {
+    my ($node_stats, $vm_stats) = @_;
+
+
+    #ordering nodes by low,medium,high usage.
+    #theses thresholds could be configured by user
+
+    #for high threshold to reach 80% max cpu/ram
+    #with ksm, node memory try to be max 80%
+    #cpu at 80% should be a good indicator, checking load and pressure could be implemented too
+
+    my @thresholds = ('low','medium','high');
+    my $mem_thresholds = { low => 20 , medium => 50, high => 80 };
+    my $cpu_thresholds = { low => 20 , medium => 50, high => 80 };
+
+    my $target_mem_percent = $node_stats->{maxmem} > 0 ? (($node_stats->{mem} + $vm_stats->{mem}) * 100 / $node_stats->{maxmem}) : 0;
+    my $target_cpu_percent = $node_stats->{totalcpu} + $vm_stats->{totalcpu};
+
+    my $prio = 0;
+    foreach my $key (@thresholds) {
+	$prio++ if $target_mem_percent <= $mem_thresholds->{$key} && $target_cpu_percent <= $cpu_thresholds->{$key};
+    }
+
+    return $prio;
+}
+
+sub add_node_prio {
+    my ($self, $sd, $nodename, $method, $group_members_prio) = @_;
+
+    my $vm_stats = $sd->{stats};
+    my $node_stats = $self->{online_node_stats}->{$nodename}->{stats};
+
+    #rounded values for vectors
+    my $vm_totalcpu = $vm_stats->{totalcpuround};
+    my $vm_mem = $vm_stats->{memg};
+    my $node_freecpu = roundup($node_stats->{totalfreecpu}, 10);
+    my $node_freemem = roundup(($node_stats->{freemem}/1024/1024/1024),1);
+
+    my @vec_vm = ($vm_totalcpu, $vm_mem);  #? add network usage dimension ?
+    my @vec_node = ($node_freecpu, $node_freemem); #? add network usage dimension ?
+    my $weight = 0;
+    if ($method eq 'distance') {
+	$weight = euclidean_distance(\@vec_vm,\@vec_node);
+    } elsif ($method eq 'dotprod') {
+	$weight = dotprod(\@vec_vm,\@vec_node);
+    }
+
+    my $node = {};
+    $node->{weight} = $weight;
+    $node->{soft_constraint_prio} = compute_soft_constraints();
+    $node->{threshold_prio} = compute_node_threshold_prio($node_stats, $vm_stats);
+    $node->{prio} = $group_members_prio->{$nodename};
+    $node->{online_node_usage} = $self->{online_node_usage}->{$nodename};
+    $node->{name} = $nodename;
+
+    return $node;
+}
+
+sub get_service_stats {
+   my ($self, $ss) = @_;
+
+    my $haenv = $self->{haenv};
+
+    foreach my $sid (sort keys %$ss) {
+
+	my (undef, $type, $vmid) = $haenv->parse_sid($sid);
+	$ss->{$sid}->{type} = $type;
+	$ss->{$sid}->{vmid} = $vmid;
+
+	my $stats = { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0, totalcpu => 0, totalcpuround => 0, memg => 0 };
+	$ss->{$sid}->{stats} = $stats;
+
+	#avoid to compute all stats, as currently we only support recovery
+	next if $ss->{$sid}->{state} ne 'recovery';
+
+	my $vmconf = $haenv->read_vm_ct_config($vmid, $type); 
+	$ss->{$sid}->{vmconf} = $vmconf;
+
+	#get vm/ct stats on last 20min (95percentile)
+	$stats = $haenv->get_vm_rrd_stats($vmid, 95);
+
+	#windows vm fill memory with zero at boot, so mem = maxmem
+	$stats->{mem} = $stats->{maxmem} if $vmconf && defined($vmconf->{ostype}) && $vmconf->{ostype} eq 'windows';
+
+	#totalcpu = relative cpu for 1core. 50% of 4 cores = 200% of 1 core
+	$stats->{totalcpu} = $stats->{cpu} * 100 * $stats->{maxcpu};
+
+	#rounded by 10% step for ordering
+	$stats->{totalcpuround} = roundup($stats->{totalcpu}, 10);
+	#rounded by 1G step for ordering
+	$stats->{memg} = roundup(($stats->{mem} /1024 /1024 /1024), 1);
+
+	$ss->{$sid}->{stats} = $stats;
+    }
+}
+
+sub recompute_online_node_stats {
+    my ($self) = @_;
+
+    my $online_node_stats = {};
+    my $online_nodes = $self->{ns}->list_online_nodes();
+
+    foreach my $node (@$online_nodes) {
+	my $stats = $self->{haenv}->get_node_rrd_stats($node);
+        $stats->{cpu} = 0 if !defined($stats->{cpu});
+        $stats->{maxcpu} = 0 if !defined($stats->{maxcpu});
+        $stats->{mem} = 0 if !defined($stats->{mem});
+        $stats->{maxmem} = 0 if !defined($stats->{maxmem});
+        $stats->{totalcpu} = $stats->{cpu} * 100 * $stats->{maxcpu}; #how to handle different cpu model power ? bogomips ?
+        $stats->{totalfreecpu} = (100 * $stats->{maxcpu}) - $stats->{totalcpu};
+        $stats->{freemem} = $stats->{maxmem} - $stats->{mem};
+        $online_node_stats->{$node}->{stats} = $stats;
+    }
+
+    $self->{online_node_stats} = $online_node_stats;
+}
+
+## math helpers
+sub dotprod {
+    my($vec_a, $vec_b, $mode) = @_;
+    die "they must have the same size\n" unless @$vec_a == @$vec_b;
+    $mode = "" if !$mode;
+    my $sum = 0;
+    my $norm_a = 0;
+    my $norm_b = 0;
+
+    for(my $i=0; $i < scalar @{$vec_a}; $i++) {
+	my $a = @{$vec_a}[$i];
+	my $b = @{$vec_b}[$i];
+
+	$sum += $a * $b;
+	$norm_a += $a * $a;
+	$norm_b += $b * $b;
+    }
+
+    if($mode eq 'normR') {
+	return $sum / (sqrt($norm_a) * sqrt($norm_b))
+    } elsif ($mode eq 'normC') {
+	return $sum / $norm_b;
+    }
+    return $sum;
+}
+
+sub euclidean_distance {
+    my($vec_a, $vec_b) = @_;
+
+    my $sum = 0;
+
+    for(my $i=0; $i < scalar @{$vec_a}; $i++) {
+        my $a = @{$vec_a}[$i];
+        my $b = @{$vec_b}[$i];
+        $sum += ($b - $a)**2;
+    }
+
+    return sqrt($sum);
+}
+
+sub roundup {
+   my ($number, $round) = @_;
+
+   if ($number % $round) {
+      return (1 + int($number/$round)) * $round;
+   } else {
+      return $number;
+   }
+}
+
 1;
diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index ba731e5..396e8bd 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -109,6 +109,22 @@ sub read_service_config {
     return $conf;
 }
 
+sub read_service_stats {
+    my ($self) = @_;
+
+    my $filename = "$self->{statusdir}/service_stats";
+    my $conf = PVE::HA::Tools::read_json_from_file($filename);
+    return $conf;
+}
+
+sub read_node_stats {
+    my ($self) = @_;
+
+    my $filename = "$self->{statusdir}/node_stats";
+    my $conf = PVE::HA::Tools::read_json_from_file($filename);
+    return $conf;
+}
+
 sub update_service_config {
     my ($self, $sid, $param) = @_;
 
@@ -132,6 +148,24 @@ sub write_service_config {
     return PVE::HA::Tools::write_json_to_file($filename, $conf);
 }
 
+sub write_service_stats {
+    my ($self, $conf) = @_;
+
+    $self->{service_stats} = $conf;
+
+    my $filename = "$self->{statusdir}/service_stats";
+    return PVE::HA::Tools::write_json_to_file($filename, $conf);
+}
+
+sub write_node_stats {
+    my ($self, $conf) = @_;
+
+    $self->{node_stats} = $conf;
+
+    my $filename = "$self->{statusdir}/node_stats";
+    return PVE::HA::Tools::write_json_to_file($filename, $conf);
+}
+
 sub read_fence_config {
     my ($self) = @_;
 
@@ -382,6 +416,31 @@ sub new {
 	$self->write_service_config($conf);
     }
 
+    if (-f "$testdir/service_stats") {
+	copy("$testdir/service_stats", "$statusdir/service_stats");
+    } else {
+	my $conf = {
+	    '101' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	    '102' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	    '103' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	    '104' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	    '105' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	    '106' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	};
+	$self->write_service_stats($conf);
+    }
+
+    if (-f "$testdir/node_stats") {
+	copy("$testdir/node_stats", "$statusdir/node_stats");
+    } else {
+	my $conf = {
+	    'node1' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	    'node2' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	    'node3' => { cpu => 0, maxcpu => 0, mem => 0, maxmem => 0 },
+	};
+	$self->write_node_stats($conf);
+    }
+
     if (-f "$testdir/hardware_status") {
 	copy("$testdir/hardware_status", "$statusdir/hardware_status") ||
 	    die "Copy failed: $!\n";
@@ -415,6 +474,8 @@ sub new {
     }
 
     $self->{service_config} = $self->read_service_config();
+    $self->{service_stats} = $self->read_service_stats();
+    $self->{node_stats} = $self->read_node_stats();
 
     return $self;
 }
diff --git a/src/PVE/HA/Sim/TestEnv.pm b/src/PVE/HA/Sim/TestEnv.pm
index 6718d8c..308478d 100644
--- a/src/PVE/HA/Sim/TestEnv.pm
+++ b/src/PVE/HA/Sim/TestEnv.pm
@@ -118,4 +118,52 @@ sub get_max_workers {
     return 0;
 }
 
-1;
+sub get_node_rrd_stats {
+    my ($self, $node) = @_;
+
+    my $nodestats = $self->{hardware}->{node_stats};
+    my $stats = $nodestats->{$node};
+
+    return $stats;
+}
+
+sub get_vm_rrd_stats {
+    my ($self, $vmid, $percentile) = @_;
+
+    my $vmstats = $self->{hardware}->{service_stats};
+    my $stats = $vmstats->{$vmid};
+
+    $stats->{cpu} = $stats->{cpu} || 0;
+    $stats->{mem} = $stats->{mem} || 0;
+    $stats->{maxmem} = $stats->{maxmem} || 0;
+    $stats->{maxcpu} = $stats->{maxcpu} || 0;
+    $stats->{totalcpu} = $stats->{cpu} * $stats->{maxcpu} * 100;
+
+    return $stats;
+}
+
+sub read_vm_config {
+    my ($self, $vmid) = @_;
+
+    return {};
+}
+
+sub read_ct_config {
+    my ($self, $vmid) = @_;
+
+    return {};
+}
+
+sub read_storecfg {
+    my ($self) = @_;
+
+    return {};
+}
+
+sub check_storage_availability {
+    my ($self, $vmid, $type, $node, $storecfg) = @_;
+
+    return 1;
+}
+
+1;
\ No newline at end of file
-- 
2.30.2