[pve-devel] [PATCH qemu-server v4 16/27] backup: implement backup for external providers

Fiona Ebner f.ebner at proxmox.com
Thu Nov 14 16:07:43 CET 2024


The state of the VM's disk images at the time the backup is started is
preserved via a snapshot-access block node. Old data is moved to the
fleecing image when new guest writes come in. The snapshot-access
block node, as well as the associated bitmap in case of incremental
backup, will be made available to the external provider. They are
exported via NBD and for 'nbd' mechanism, the NBD socket path is
passed to the provider, while for 'block-device' mechanism, the NBD
export is made accessible as a regular block device first and the
bitmap information is made available via a $next_dirty_region->()
function. For 'block-device', the 'nbdinfo' binary is required.

The provider can indicate that it wants to do an incremental backup by
returning the bitmap ID that was used for a previous backup and it
will then be told if the bitmap was newly created (either first backup
or old bitmap was invalid) or if the bitmap can be reused.

The provider then reads the parts of the NBD or block device it needs,
either the full disk for full backup, or the dirty parts according to
the bitmap for incremental backup. The bitmap has to be respected,
reads to other parts of the image will return an error. After backing
up each part of the disk, it should be discarded in the export to
avoid unnecessary space usage in the fleecing image (requires the
storage underlying the fleecing image to support discard too).

Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---

Changes in v4:
* adapt to context change by patch 13/27
* only wait for known child pids, not any
* rename $devicename -> $device_name
* move static map outside of loop
* issue guest FS freeze right before 'backup-access-setup' QMP command
* move parameter variable closer to usage
* get rid of manually running modprobe - rely on earlier patch with
  modules-load config, but die if 'nbd' module not loaded for
  'block-device' mechanism
* prefix target ID with 'snapshot-access:' to avoid potential clash
  with legacy 'Proxmox' value
* re-order recording cleanup step for teardown to just before setup
  QMP call
* improve /dev/nbdX handling by adding reservation and checks
* fix cleanup of child pids and NBD block devices (by saving information in $task->{cleanup})

 PVE/VZDump/QemuServer.pm | 386 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 385 insertions(+), 1 deletion(-)

diff --git a/PVE/VZDump/QemuServer.pm b/PVE/VZDump/QemuServer.pm
index 7882b74b..4e27f786 100644
--- a/PVE/VZDump/QemuServer.pm
+++ b/PVE/VZDump/QemuServer.pm
@@ -3,8 +3,10 @@ package PVE::VZDump::QemuServer;
 use strict;
 use warnings;
 
+use Fcntl qw(:mode);
 use File::Basename;
 use File::Path;
+use File::stat qw();
 use IO::File;
 use IPC::Open3;
 use JSON;
@@ -20,7 +22,7 @@ use PVE::QMPClient;
 use PVE::Storage::Plugin;
 use PVE::Storage::PBSPlugin;
 use PVE::Storage;
-use PVE::Tools;
+use PVE::Tools qw(run_command);
 use PVE::VZDump;
 use PVE::Format qw(render_duration render_bytes);
 
@@ -277,6 +279,8 @@ sub archive {
 
     if ($self->{vzdump}->{opts}->{pbs}) {
 	$self->archive_pbs($task, $vmid);
+    } elsif ($self->{vzdump}->{'backup-provider'}) {
+	$self->archive_external($task, $vmid);
     } else {
 	$self->archive_vma($task, $vmid, $filename, $comp);
     }
@@ -1144,11 +1148,61 @@ sub snapshot {
     # nothing to do
 }
 
+my sub cleanup_nbd_block_devices {
+    my ($self, $paths) = @_;
+
+    for my $path ($paths->@*) {
+	eval { run_command(["qemu-nbd", "-d", $path ]); };
+	$self->log('warn', "unable to disconnect NBD backup source '$path' - $@") if $@;
+    }
+}
+
+my sub cleanup_child_processes {
+    my ($self, $cpids) = @_;
+
+    my $waited;
+    my $wait_limit = 5;
+    for ($waited = 0; $waited < $wait_limit && scalar(keys $cpids->%*); $waited++) {
+	for my $cpid (keys $cpids->%*) {
+	    delete($cpids->{$cpid}) if waitpid($cpid, POSIX::WNOHANG) > 0;
+	}
+	if ($waited == 0) {
+	    kill 15, $_ for keys $cpids->%*;
+	}
+	sleep 1;
+    }
+    if ($waited == $wait_limit && scalar(keys $cpids->%*)) {
+	kill 9, $_ for keys $cpids->%*;
+	sleep 1;
+	for my $cpid (keys $cpids->%*) {
+	    delete($cpids->{$cpid}) if waitpid($cpid, POSIX::WNOHANG) > 0;
+	}
+	$self->log('warn', "unable to collect child process '$_'") for keys $cpids->%*;
+    }
+}
+
 sub cleanup {
     my ($self, $task, $vmid) = @_;
 
     # If VM was started only for backup, it is already stopped now.
     if (PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+	if ($task->{cleanup}->{'nbd-stop'}) {
+	    eval { PVE::QemuServer::QMPHelpers::nbd_stop($vmid); };
+	    $self->logerr($@) if $@;
+	}
+
+	if (my $info = $task->{cleanup}->{'backup-access-teardown'}) {
+	    my $params = {
+		'target-id' => $info->{'target-id'},
+		timeout => 60,
+		success => $info->{success} ? JSON::true : JSON::false,
+	    };
+
+	    $self->loginfo("tearing down backup-access");
+	    eval { mon_cmd($vmid, "backup-access-teardown", $params->%*) };
+	    $self->logerr($@) if $@;
+	}
+
 	$detach_tpmstate_drive->($task, $vmid);
 	detach_fleecing_images($task->{disks}, $vmid) if $task->{'use-fleecing'};
     }
@@ -1158,6 +1212,336 @@ sub cleanup {
     if ($self->{qmeventd_fh}) {
 	close($self->{qmeventd_fh});
     }
+
+    cleanup_nbd_block_devices($self, $task->{cleanup}->{'nbd-block-devices'})
+	if $task->{cleanup}->{'nbd-block-devices'};
+
+    cleanup_child_processes($self, $task->{cleanup}->{'child-pids'})
+	if $task->{cleanup}->{'child-pids'};
+}
+
+my sub bind_next_free_dev_nbd_node {
+    my ($options) = @_;
+
+    # /dev/nbdX devices are reserved in a file. Those reservations expires after $expiretime.
+    # This avoids race conditions between allocation and use.
+
+    die "file '/sys/module/nbd' does not exist - 'nbd' kernel module not loaded?"
+	if !-e "/sys/module/nbd";
+
+    my $line = PVE::Tools::file_read_firstline("/sys/module/nbd/parameters/nbds_max")
+	or die "could not read 'nbds_max' parameter file for 'nbd' kernel module\n";
+    my ($nbds_max) = ($line =~ m/(\d+)/)
+	or die "could not determine 'nbds_max' parameter for 'nbd' kernel module\n";
+
+    my $filename = "/run/qemu-server/reserved-dev-nbd-nodes";
+
+    my $code = sub {
+	my $expiretime = 60;
+	my $ctime = time();
+
+	my $used = {};
+	my $latest = [0, 0];
+
+	if (my $fh = IO::File->new ($filename, "r")) {
+	    while (my $line = <$fh>) {
+		if ($line =~ m/^(\d+)\s(\d+)$/) {
+		    my ($n, $timestamp) = ($1, $2);
+
+		    $latest = [$n, $timestamp] if $latest->[1] <= $timestamp;
+
+		    if (($timestamp + $expiretime) > $ctime) {
+			$used->{$n} = $timestamp; # not expired
+		    }
+		}
+	    }
+	}
+
+	my $new_n;
+	for (my $count = 0; $count < $nbds_max; $count++) {
+	    my $n = ($latest->[0] + $count) % $nbds_max;
+	    my $block_device = "/dev/nbd${n}";
+	    next if $used->{$n}; # reserved
+	    next if !-e $block_device;
+
+	    my $st = File::stat::stat("/run/lock/qemu-nbd-nbd${n}");
+	    next if defined($st) && S_ISSOCK($st->mode) && $st->uid == 0; # in use
+
+	    # Used to avoid looping if there are other issues then the NBD node being in use
+	    my $socket_error = 0;
+	    eval {
+		my $errfunc = sub {
+		    my ($line) = @_;
+		    $socket_error = 1 if $line =~ m/^qemu-nbd: Failed to set NBD socket$/;
+		    log_warn($line);
+		};
+		run_command(["qemu-nbd", "-c", $block_device, $options->@*], errfunc => $errfunc);
+	    };
+	    if (my $err = $@) {
+		die $err if !$socket_error;
+		log_warn("unable to bind $block_device - trying next one");
+		next;
+	    }
+	    $used->{$n} = $ctime;
+	    $new_n = $n;
+	    last;
+	}
+
+	my $data = "";
+	$data .= "$_ $used->{$_}\n" for keys $used->%*;
+
+	PVE::Tools::file_set_contents($filename, $data);
+
+	return defined($new_n) ? "/dev/nbd${new_n}" : undef;
+    };
+
+    my $block_device =
+	PVE::Tools::lock_file('/run/lock/qemu-server/reserved-dev-nbd-nodes.lock', 10, $code);
+    die $@ if $@;
+
+    die "unable to find free /dev/nbdX block device node\n" if !$block_device;
+
+    return $block_device;
+}
+
+my sub block_device_backup_prepare {
+    my ($self, $cleanup, $device_name, $size, $nbd_path, $bitmap_name, $count) = @_;
+
+    my $nbd_info_uri = "nbd+unix:///${device_name}?socket=${nbd_path}";
+    my $qemu_nbd_uri = "nbd:unix:${nbd_path}:exportname=${device_name}";
+
+    my $error_fh;
+    my $next_dirty_region;
+
+    # If there is no dirty bitmap, it can be treated as if there's a full dirty one. The output of
+    # nbdinfo is a list of tuples with offset, length, type, description. The first bit of 'type' is
+    # set when the bitmap is dirty, see QEMU's docs/interop/nbd.txt
+    my $dirty_bitmap = [];
+    if ($bitmap_name) {
+	my $input = IO::File->new();
+	my $info = IO::File->new();
+	$error_fh = IO::File->new();
+	my $nbdinfo_cmd = ["nbdinfo", $nbd_info_uri, "--map=qemu:dirty-bitmap:${bitmap_name}"];
+	my $cpid = open3($input, $info, $error_fh, $nbdinfo_cmd->@*)
+	    or die "failed to spawn nbdinfo child - $!\n";
+	$cleanup->{'child-pids'}->{$cpid} = 1;
+
+	$next_dirty_region = sub {
+	    my ($offset, $length, $type);
+	    do {
+		my $line = <$info>;
+		return if !$line;
+		die "unexpected output from nbdinfo - $line\n"
+		    if $line !~ m/^\s*(\d+)\s*(\d+)\s*(\d+)/; # also untaints
+		($offset, $length, $type) = ($1, $2, $3);
+	    } while (($type & 0x1) == 0); # not dirty
+	    return ($offset, $length);
+	};
+    } else {
+	my $done = 0;
+	$next_dirty_region = sub {
+	    return if $done;
+	    $done = 1;
+	    return (0, $size);
+	};
+    }
+
+    my $block_device = bind_next_free_dev_nbd_node([$qemu_nbd_uri, "--format=raw", "--discard=on"]);
+    $self->loginfo("bound NBD export for '$device_name' as $block_device");
+    push $cleanup->{'nbd-block-devices'}->@*, $block_device;
+
+    return ($block_device, $next_dirty_region);
+}
+
+my sub backup_access_to_volume_info {
+    my ($self, $cleanup, $backup_access_info, $mechanism, $nbd_path) = @_;
+
+    my $bitmap_action_to_status = {
+	'not-used' => 'none',
+	'not-used-removed' => 'none',
+	'new' => 'new',
+	'used' => 'reuse',
+	'invalid' => 'new',
+    };
+
+    my $count = 0; # counter for block devices, i.e. /dev/nbd${count}
+    my $volumes = {};
+
+    for my $info ($backup_access_info->@*) {
+	my $bitmap_status = 'none';
+	my $bitmap_name;
+	if (my $bitmap_action = $info->{'bitmap-action'}) {
+	    $bitmap_status = $bitmap_action_to_status->{$bitmap_action}
+		or die "got unexpected bitmap action '$bitmap_action'\n";
+
+	    $bitmap_name = $info->{'bitmap-name'} or die "bitmap-name is not present\n";
+	}
+
+	my ($device, $size) = $info->@{qw(device size)};
+
+	$volumes->{$device}->{'bitmap-mode'} = $bitmap_status;
+	$volumes->{$device}->{size} = $size;
+
+	if ($mechanism eq 'block-device') {
+	    my ($block_device, $next_dirty_region) = block_device_backup_prepare(
+		$self, $cleanup, $device, $size, $nbd_path, $bitmap_name, $count);
+	    $count++;
+	    $volumes->{$device}->{path} = $block_device;
+	    $volumes->{$device}->{'next-dirty-region'} = $next_dirty_region;
+	} elsif ($mechanism eq 'nbd') {
+	    $volumes->{$device}->{'nbd-path'} = $nbd_path;
+	    $volumes->{$device}->{'bitmap-name'} = $bitmap_name;
+	} else {
+	    die "internal error - unkown mechanism '$mechanism'";
+	}
+    }
+
+    return $volumes;
+}
+
+sub archive_external {
+    my ($self, $task, $vmid) = @_;
+
+    my $guest_config = PVE::Tools::file_get_contents("$task->{tmpdir}/qemu-server.conf");
+    my $firewall_file = "$task->{tmpdir}/qemu-server.fw";
+
+    my $opts = $self->{vzdump}->{opts};
+
+    my $backup_provider = $self->{vzdump}->{'backup-provider'};
+
+    $self->loginfo("starting external backup via " . $backup_provider->provider_name());
+
+    my $starttime = time();
+
+    # get list early so we die on unkown drive types before doing anything
+    my $devlist = _get_task_devlist($task);
+
+    $self->enforce_vm_running_for_backup($vmid);
+    $self->{qmeventd_fh} = PVE::QemuServer::register_qmeventd_handle($vmid);
+
+    eval {
+	$SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = $SIG{PIPE} = sub {
+	    die "interrupted by signal\n";
+	};
+
+	my $qemu_support = mon_cmd($vmid, "query-proxmox-support");
+
+	$attach_tpmstate_drive->($self, $task, $vmid);
+
+	my $is_template = PVE::QemuConfig->is_template($self->{vmlist}->{$vmid});
+
+	my $fleecing = check_and_prepare_fleecing(
+	    $self, $vmid, $opts->{fleecing}, $task->{disks}, $is_template, $qemu_support, 1);
+	die "cannot setup backup access without fleecing\n" if !$fleecing;
+
+	$task->{'use-fleecing'} = 1;
+
+	my $target_id = "snapshot-access:$opts->{storage}";
+
+	my ($mechanism, $bitmap_name) = $backup_provider->backup_get_mechanism($vmid, 'qemu');
+	die "mechanism '$mechanism' requested by backup provider is not supported for VMs\n"
+	    if $mechanism ne 'block-device' && $mechanism ne 'nbd';
+
+	$self->loginfo("using backup mechanism '$mechanism'");
+
+	if ($mechanism eq 'block-device') {
+	    # For mechanism 'block-device' the bitmap needs to be passed to the provider. The bitmap
+	    # cannot be dumped via QMP and doing it via qemu-img is experimental, so use nbdinfo.
+	    die "need 'nbdinfo' binary from package libnbd-bin\n" if !-e "/usr/bin/nbdinfo";
+
+	    if (!-e '/sys/module/nbd/coresize') {
+		die "required 'nbd' kernel module not loaded - use 'modprobe nbd nbds_max=128'"
+		    ." and adapt your modules/modprobe configuration to make it permanent\n";
+	    }
+	}
+
+	my $params = {
+	    'target-id' => $target_id,
+	    devlist => $devlist,
+	    timeout => 60,
+	};
+
+	if ($bitmap_name) {
+	    # prepend storage ID so different providers can never cause clashes
+	    $bitmap_name = "$opts->{storage}-" . $bitmap_name;
+	    $params->{'bitmap-name'} = $bitmap_name;
+	}
+
+	my $fs_frozen = $self->qga_fs_freeze($task, $vmid);
+
+	$self->loginfo("setting up snapshot-access for backup");
+
+	$task->{cleanup}->{'backup-access-teardown'} = { 'target-id' => $target_id, success => 0 };
+
+	my $backup_access_info = eval { mon_cmd($vmid, "backup-access-setup", $params->%*) };
+	my $qmperr = $@;
+
+	if ($fs_frozen) {
+	    $self->qga_fs_thaw($vmid);
+	}
+
+	die $qmperr if $qmperr;
+
+	$self->resume_vm_after_job_start($task, $vmid);
+
+	my $bitmap_info = mon_cmd($vmid, 'query-pbs-bitmap-info');
+	for my $info (sort { $a->{drive} cmp $b->{drive} } $bitmap_info->@*) {
+	    my $text = $bitmap_action_to_human->($self, $info);
+	    my $drive = $info->{drive};
+	    $drive =~ s/^drive-//; # for consistency
+	    $self->loginfo("$drive: dirty-bitmap status: $text");
+	}
+
+	$self->loginfo("starting NBD server");
+
+	my $nbd_path = "/run/qemu-server/$vmid\_nbd.backup_access";
+	mon_cmd(
+	    $vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $nbd_path } } );
+	$task->{cleanup}->{'nbd-stop'} = 1;
+
+	for my $info ($backup_access_info->@*) {
+	    $self->loginfo("adding NBD export for $info->{device}");
+
+	    my $export_params = {
+		id => $info->{device},
+		'node-name' => $info->{'node-name'},
+		writable => JSON::true, # for discard
+		type => "nbd",
+		name => $info->{device}, # NBD export name
+	    };
+
+	    if ($info->{'bitmap-name'}) {
+		$export_params->{bitmaps} = [{
+		    node => $info->{'bitmap-node-name'},
+		    name => $info->{'bitmap-name'},
+		}],
+	    }
+
+	    mon_cmd($vmid, "block-export-add", $export_params->%*);
+	}
+
+	my $volumes = backup_access_to_volume_info(
+	    $self, $task->{cleanup}, $backup_access_info, $mechanism, $nbd_path);
+
+	my $param = {};
+	$param->{'bandwidth-limit'} = $opts->{bwlimit} * 1024 if $opts->{bwlimit};
+	$param->{'firewall-config'} = PVE::Tools::file_get_contents($firewall_file)
+	    if -e $firewall_file;
+
+	$backup_provider->backup_vm($vmid, $guest_config, $volumes, $param);
+    };
+    my $err = $@;
+
+    if ($err) {
+	$self->logerr($err);
+	$self->resume_vm_after_job_start($task, $vmid);
+    } else {
+	$task->{size} = $backup_provider->backup_get_task_size($vmid);
+	$task->{cleanup}->{'backup-access-teardown'}->{success} = 1;
+    }
+    $self->restore_vm_power_state($vmid);
+
+    die $err if $err;
 }
 
 1;
-- 
2.39.5





More information about the pve-devel mailing list