[PATCH pve-manager 1/1] pvestatd: lvmqcow2 : extend disk on io-error

Alexandre Derumier alexandre.derumier at groupe-cyllene.com
Mon Aug 26 13:00:21 CEST 2024


if the write are really too fast, and the auto extend from
qmp event is too slow, the vm could try to write to an qcow2 offset
highter than the lvm underlay.

In this case, the vm will be paused in "io-error" mode.

To fix it, try to extend drive and resume the vm.

Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
---
 PVE/Service/pvestatd.pm | 62 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/PVE/Service/pvestatd.pm b/PVE/Service/pvestatd.pm
index 8661f774..72244451 100755
--- a/PVE/Service/pvestatd.pm
+++ b/PVE/Service/pvestatd.pm
@@ -230,12 +230,74 @@ sub auto_balloning {
     }
 }
 
+sub auto_extend_vm_disk_on_error {
+    my ($vmstatus) =  @_;
+
+    my $storecfg = PVE::Storage::config();
+
+    foreach my $vmid (keys %$vmstatus) {
+	my $d = $vmstatus->{$vmid};
+	my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
+	next if $status ne 'io-error';
+	my $resume = undef;
+
+	my $blockstats = PVE::QemuServer::mon_cmd($vmid, "query-blockstats");
+	$blockstats = { map { $_->{device} => $_ } $blockstats->@* };
+
+	my $conf = eval { PVE::QemuConfig->load_config($vmid) };
+	if (my $err = $@) {
+	    warn $err;
+	    next;
+	}
+
+	PVE::QemuConfig->foreach_volume($conf, sub {
+	    my ($ds, $drive) = @_;
+
+	    my $volid = $drive->{file};
+	    return if !$volid;
+
+	    my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+	    return if !$sid;
+
+	    my $scfg = PVE::Storage::storage_config($storecfg, $sid);
+	    return if $scfg->{type} ne 'lvmqcow2';
+
+	    my $blockinfo = PVE::QemuServer::get_block_info($vmid, $ds, $blockstats);
+	    my $wr_highest_offset = $blockinfo->{wr_highest_offset};
+
+	    my $size = PVE::Storage::volume_size_info($storecfg, $volid, 5);
+
+	    #if offset is bigger than size, increase lvm size to highest offset + chunksize
+	    if ($wr_highest_offset >= $size) {
+		my $chunksize = $scfg->{chunksize} // 1024 * 1024 * 1024;
+		my $newsize = $wr_highest_offset + $chunksize;
+		syslog('info', "auto extend disk underlay storage of $blockinfo->{deviceid} to $newsize");
+		PVE::Storage::volume_resize($storecfg, $volid, $newsize, 1, 1);
+		my $threshold = compute_write_threshold($newsize);
+		qemu_block_set_write_threshold($vmid, $blockinfo->{blocknodeid}, $threshold);
+	    }
+	    #if offset is lower, than mean that size has already been increased async but not fast enough
+	    #we just need to resume
+	    $resume = 1;
+	});
+
+	if($resume) {
+	    syslog('info', "resume $vmid");
+	    eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
+	    warn $@ if $@;
+	}
+    }
+}
+
 sub update_qemu_status {
     my ($status_cfg) = @_;
 
     my $ctime = time();
     my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
 
+    eval { auto_extend_vm_disk_on_error($vmstatus); };
+    syslog('err', "auto extend disk error: $@") if $@;
+
     eval { auto_balloning($vmstatus); };
     syslog('err', "auto ballooning error: $@") if $@;
 
-- 
2.39.2




More information about the pve-devel mailing list