[PATCH qemu-server 3/3] qcow2: add external snapshot support

Alexandre Derumier alexandre.derumier at groupe-cyllene.com
Fri Jul 4 08:45:00 CEST 2025


fixme:
 - add test for internal (was missing) && external qemu snapshots
 - is it possible to use blockjob transactions for commit && steam
   for atomatic disk commit ?

Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
---
 src/PVE/QemuConfig.pm          |   4 +-
 src/PVE/QemuServer.pm          | 132 ++++++++++++---
 src/PVE/QemuServer/Blockdev.pm | 296 ++++++++++++++++++++++++++++++++-
 src/test/snapshot-test.pm      |   4 +-
 4 files changed, 402 insertions(+), 34 deletions(-)

diff --git a/src/PVE/QemuConfig.pm b/src/PVE/QemuConfig.pm
index 82295641..e0853d65 100644
--- a/src/PVE/QemuConfig.pm
+++ b/src/PVE/QemuConfig.pm
@@ -398,7 +398,7 @@ sub __snapshot_create_vol_snapshot {
 
     print "snapshotting '$device' ($drive->{file})\n";
 
-    PVE::QemuServer::qemu_volume_snapshot($vmid, $device, $storecfg, $volid, $snapname);
+    PVE::QemuServer::qemu_volume_snapshot($vmid, $device, $storecfg, $drive, $snapname);
 }
 
 sub __snapshot_delete_remove_drive {
@@ -435,7 +435,7 @@ sub __snapshot_delete_vol_snapshot {
     my $storecfg = PVE::Storage::config();
     my $volid = $drive->{file};
 
-    PVE::QemuServer::qemu_volume_snapshot_delete($vmid, $storecfg, $volid, $snapname);
+    PVE::QemuServer::qemu_volume_snapshot_delete($vmid, $storecfg, $drive, $snapname);
 
     push @$unused, $volid;
 }
diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm
index 92c8fad6..158c91b1 100644
--- a/src/PVE/QemuServer.pm
+++ b/src/PVE/QemuServer.pm
@@ -4340,20 +4340,64 @@ sub qemu_cpu_hotplug {
 }
 
 sub qemu_volume_snapshot {
-    my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
+    my ($vmid, $deviceid, $storecfg, $drive, $snap) = @_;
 
+    my $volid = $drive->{file};
     my $running = check_running($vmid);
 
-    if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
+    my $do_snapshots_type = do_snapshots_type($storecfg, $volid, $deviceid, $running);
+
+    if ($do_snapshots_type eq 'internal') {
+        print "internal qemu snapshot\n";
         mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
-    } else {
+    } elsif ($do_snapshots_type eq 'external') {
+        my $storeid = (PVE::Storage::parse_volume_id($volid))[0];
+        my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+        print "external qemu snapshot\n";
+        my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $volid);
+        my $parent_snap = $snapshots->{'current'}->{parent};
+        my $machine_version = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
+
+        PVE::QemuServer::Blockdev::blockdev_rename(
+            $storecfg,
+            $vmid,
+            $machine_version,
+            $deviceid,
+            $drive,
+            'current',
+            $snap,
+            $parent_snap,
+        );
+        eval {
+            PVE::QemuServer::Blockdev::blockdev_external_snapshot(
+                $storecfg, $vmid, $machine_version, $deviceid, $drive, $snap,
+            );
+        };
+        if ($@) {
+            warn $@ if $@;
+            print "Error creating snapshot. Revert rename\n";
+            eval {
+                PVE::QemuServer::Blockdev::blockdev_rename(
+                    $storecfg,
+                    $vmid,
+                    $machine_version,
+                    $deviceid,
+                    $drive,
+                    $snap,
+                    'current',
+                    $parent_snap,
+                );
+            };
+        }
+    } elsif ($do_snapshots_type eq 'storage') {
         PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
     }
 }
 
 sub qemu_volume_snapshot_delete {
-    my ($vmid, $storecfg, $volid, $snap) = @_;
+    my ($vmid, $storecfg, $drive, $snap) = @_;
 
+    my $volid = $drive->{file};
     my $running = check_running($vmid);
     my $attached_deviceid;
 
@@ -4368,14 +4412,62 @@ sub qemu_volume_snapshot_delete {
         );
     }
 
-    if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) {
+    my $do_snapshots_type = do_snapshots_type($storecfg, $volid, $attached_deviceid, $running);
+
+    if ($do_snapshots_type eq 'internal') {
         mon_cmd(
             $vmid,
             'blockdev-snapshot-delete-internal-sync',
             device => $attached_deviceid,
             name => $snap,
         );
-    } else {
+    } elsif ($do_snapshots_type eq 'external') {
+        print "delete qemu external snapshot\n";
+
+        my $path = PVE::Storage::path($storecfg, $volid);
+        my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $volid);
+        my $parentsnap = $snapshots->{$snap}->{parent};
+        my $childsnap = $snapshots->{$snap}->{child};
+        my $machine_version = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
+
+        # if we delete the first snasphot, we commit because the first snapshot original base image, it should be big.
+        # improve-me: if firstsnap > child : commit, if firstsnap < child do a stream.
+        if (!$parentsnap) {
+            print "delete first snapshot $snap\n";
+            PVE::QemuServer::Blockdev::blockdev_commit(
+                $storecfg,
+                $vmid,
+                $machine_version,
+                $attached_deviceid,
+                $drive,
+                $childsnap,
+                $snap,
+            );
+            PVE::QemuServer::Blockdev::blockdev_rename(
+                $storecfg,
+                $vmid,
+                $machine_version,
+                $attached_deviceid,
+                $drive,
+                $snap,
+                $childsnap,
+                $snapshots->{$childsnap}->{child},
+            );
+        } else {
+            #intermediate snapshot, we always stream the snapshot to child snapshot
+            print "stream intermediate snapshot $snap to $childsnap\n";
+            PVE::QemuServer::Blockdev::blockdev_stream(
+                $storecfg,
+                $vmid,
+                $machine_version,
+                $attached_deviceid,
+                $drive,
+                $snap,
+                $parentsnap,
+                $childsnap,
+            );
+        }
+    } elsif ($do_snapshots_type eq 'storage') {
         PVE::Storage::volume_snapshot_delete(
             $storecfg,
             $volid,
@@ -7563,28 +7655,20 @@ sub restore_tar_archive {
     warn $@ if $@;
 }
 
-my $qemu_snap_storage = {
-    rbd => 1,
-};
-
-sub do_snapshots_with_qemu {
-    my ($storecfg, $volid, $deviceid) = @_;
-
-    return if $deviceid =~ m/tpmstate0/;
+sub do_snapshots_type {
+    my ($storecfg, $volid, $deviceid, $running) = @_;
 
-    my $storage_name = PVE::Storage::parse_volume_id($volid);
-    my $scfg = $storecfg->{ids}->{$storage_name};
-    die "could not find storage '$storage_name'\n" if !defined($scfg);
+    #we skip snapshot for tpmstate
+    return if $deviceid && $deviceid =~ m/tpmstate0/;
 
-    if ($qemu_snap_storage->{ $scfg->{type} } && !$scfg->{krbd}) {
-        return 1;
-    }
+    #we use storage snapshot if vm is not running or if disk is unused;
+    return 'storage' if !$running || !$deviceid;
 
-    if ($volid =~ m/\.(qcow2|qed)$/) {
-        return 1;
-    }
+    my $qemu_snapshot_type = PVE::Storage::volume_support_qemu_snapshot($storecfg, $volid);
+    # if running, but don't support qemu snapshot, we use storage snapshot
+    return 'storage' if !$qemu_snapshot_type;
 
-    return;
+    return $qemu_snapshot_type;
 }
 
 =head3 template_create($vmid, $conf [, $disk])
diff --git a/src/PVE/QemuServer/Blockdev.pm b/src/PVE/QemuServer/Blockdev.pm
index 2a0513fb..07141777 100644
--- a/src/PVE/QemuServer/Blockdev.pm
+++ b/src/PVE/QemuServer/Blockdev.pm
@@ -11,6 +11,7 @@ use JSON;
 use PVE::JSONSchema qw(json_bool);
 use PVE::Storage;
 
+use PVE::QemuServer::BlockJob;
 use PVE::QemuServer::Drive qw(drive_is_cdrom);
 use PVE::QemuServer::Helpers;
 use PVE::QemuServer::Monitor qw(mon_cmd);
@@ -243,6 +244,9 @@ my sub generate_file_blockdev {
     my $blockdev = {};
     my $scfg = undef;
 
+    delete $options->{'snapshot-name'}
+        if $options->{'snapshot-name'} && $options->{'snapshot-name'} eq 'current';
+
     die "generate_file_blockdev called without volid/path\n" if !$drive->{file};
     die "generate_file_blockdev called with 'none'\n" if $drive->{file} eq 'none';
     # FIXME use overlay and new config option to define storage for temp write device
@@ -322,6 +326,9 @@ my sub generate_format_blockdev {
     die "generate_format_blockdev called with 'none'\n" if $drive->{file} eq 'none';
     die "generate_format_blockdev called with NBD path\n" if is_nbd($drive);
 
+    delete($options->{'snapshot-name'})
+        if $options->{'snapshot-name'} && $options->{'snapshot-name'} eq 'current';
+
     my $scfg;
     my $format;
     my $volid = $drive->{file};
@@ -400,6 +407,17 @@ my sub generate_backing_chain_blockdev {
     );
 }
 
+sub generate_throttle_blockdev {
+    my ($drive_id, $child) = @_;
+
+    return {
+        driver => "throttle",
+        'node-name' => top_node_name($drive_id),
+        'throttle-group' => throttle_group_id($drive_id),
+        file => $child,
+    };
+}
+
 sub generate_drive_blockdev {
     my ($storecfg, $drive, $machine_version, $options) = @_;
 
@@ -442,12 +460,7 @@ sub generate_drive_blockdev {
     return $child if $options->{fleecing} || $options->{'tpm-backup'} || $options->{'no-throttle'};
 
     # this is the top filter entry point, use $drive-drive_id as nodename
-    return {
-        driver => "throttle",
-        'node-name' => top_node_name($drive_id),
-        'throttle-group' => throttle_group_id($drive_id),
-        file => $child,
-    };
+    return generate_throttle_blockdev($drive_id, $child);
 }
 
 sub generate_pbs_blockdev {
@@ -785,4 +798,275 @@ sub set_io_throttle {
     }
 }
 
+sub blockdev_external_snapshot {
+    my ($storecfg, $vmid, $machine_version, $deviceid, $drive, $snap, $size) = @_;
+
+    print "Creating a new current volume with $snap as backing snap\n";
+
+    my $volid = $drive->{file};
+
+    #preallocate add a new current file with reference to backing-file
+    PVE::Storage::volume_snapshot($storecfg, $volid, $snap, 1);
+
+    #be sure to add drive in write mode
+    delete($drive->{ro});
+
+    my $new_file_blockdev = generate_file_blockdev($storecfg, $drive);
+    my $new_fmt_blockdev = generate_format_blockdev($storecfg, $drive, $new_file_blockdev);
+
+    my $snap_file_blockdev = generate_file_blockdev($storecfg, $drive, $snap);
+    my $snap_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $snap_file_blockdev,
+        { 'snapshot-name' => $snap },
+    );
+
+    #backing need to be forced to undef in blockdev, to avoid reopen of backing-file on blockdev-add
+    $new_fmt_blockdev->{backing} = undef;
+
+    mon_cmd($vmid, 'blockdev-add', %$new_fmt_blockdev);
+
+    mon_cmd(
+        $vmid, 'blockdev-snapshot',
+        node => $snap_fmt_blockdev->{'node-name'},
+        overlay => $new_fmt_blockdev->{'node-name'},
+    );
+}
+
+sub blockdev_delete {
+    my ($storecfg, $vmid, $drive, $file_blockdev, $fmt_blockdev, $snap) = @_;
+
+    #add eval as reopen is auto removing the old nodename automatically only if it was created at vm start in command line argument
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => $file_blockdev->{'node-name'}) };
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => $fmt_blockdev->{'node-name'}) };
+
+    #delete the file (don't use vdisk_free as we don't want to delete all snapshot chain)
+    print "delete old $file_blockdev->{filename}\n";
+
+    my $storage_name = PVE::Storage::parse_volume_id($drive->{file});
+
+    my $volid = $drive->{file};
+    PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, 1);
+}
+
+sub blockdev_rename {
+    my (
+        $storecfg,
+        $vmid,
+        $machine_version,
+        $deviceid,
+        $drive,
+        $src_snap,
+        $target_snap,
+        $parent_snap,
+    ) = @_;
+
+    print "rename $src_snap to $target_snap\n";
+
+    my $volid = $drive->{file};
+
+    my $src_file_blockdev = generate_file_blockdev(
+        $storecfg,
+        $drive,
+        $machine_version,
+        { 'snapshot-name' => $src_snap },
+    );
+    my $src_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $src_file_blockdev,
+        { 'snapshot-name' => $src_snap },
+    );
+
+    #rename volume image
+    PVE::Storage::rename_volume($storecfg, $volid, $vmid, undef, $src_snap, $target_snap);
+
+    my $target_file_blockdev = generate_file_blockdev(
+        $storecfg,
+        $drive,
+        $machine_version,
+        { 'snapshot-name' => $target_snap },
+    );
+    my $target_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $target_file_blockdev,
+        { 'snapshot-name' => $target_snap },
+    );
+
+    if ($target_snap eq 'current' || $src_snap eq 'current') {
+        #rename from|to current
+        my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive);
+
+        #add backing to target
+        if ($parent_snap) {
+            my $parent_fmt_nodename =
+                get_node_name('fmt', $drive_id, $volid, { 'snapshot-name' => $parent_snap });
+            $target_fmt_blockdev->{backing} = $parent_fmt_nodename;
+        }
+        mon_cmd($vmid, 'blockdev-add', %$target_fmt_blockdev);
+
+        #reopen the current throttlefilter nodename with the target fmt nodename
+        my $throttle_blockdev =
+            generate_throttle_blockdev($drive_id, $target_fmt_blockdev->{'node-name'});
+        mon_cmd($vmid, 'blockdev-reopen', options => [$throttle_blockdev]);
+    } else {
+        rename($src_file_blockdev->{filename}, $target_file_blockdev->{filename});
+
+        #intermediate snapshot
+        mon_cmd($vmid, 'blockdev-add', %$target_fmt_blockdev);
+
+        #reopen the parent node with the new target fmt backing node
+        my $parent_file_blockdev = generate_file_blockdev(
+            $storecfg,
+            $drive,
+            $machine_version,
+            { 'snapshot-name' => $parent_snap },
+        );
+        my $parent_fmt_blockdev = generate_format_blockdev(
+            $storecfg,
+            $drive,
+            $parent_file_blockdev,
+            { 'snapshot-name' => $parent_snap },
+        );
+        $parent_fmt_blockdev->{backing} = $target_fmt_blockdev->{'node-name'};
+        mon_cmd($vmid, 'blockdev-reopen', options => [$parent_fmt_blockdev]);
+
+        #change backing-file in qcow2 metadatas
+        mon_cmd(
+            $vmid, 'change-backing-file',
+            device => $deviceid,
+            'image-node-name' => $parent_fmt_blockdev->{'node-name'},
+            'backing-file' => $target_file_blockdev->{filename},
+        );
+    }
+
+    # delete old file|fmt nodes
+    # add eval as reopen is auto removing the old nodename automatically only if it was created at vm start in command line argument
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => $src_file_blockdev->{'node-name'}) };
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => $src_fmt_blockdev->{'node-name'}) };
+}
+
+sub blockdev_commit {
+    my ($storecfg, $vmid, $machine_version, $deviceid, $drive, $src_snap, $target_snap) = @_;
+
+    my $volid = $drive->{file};
+
+    print "block-commit $src_snap to base:$target_snap\n";
+
+    my $target_file_blockdev = generate_file_blockdev(
+        $storecfg,
+        $drive,
+        $machine_version,
+        { 'snapshot-name' => $target_snap },
+    );
+    my $target_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $target_file_blockdev,
+        { 'snapshot-name' => $target_snap },
+    );
+
+    my $src_file_blockdev = generate_file_blockdev(
+        $storecfg,
+        $drive,
+        $machine_version,
+        { 'snapshot-name' => $src_snap },
+    );
+    my $src_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $src_file_blockdev,
+        { 'snapshot-name' => $src_snap },
+    );
+
+    my $job_id = "commit-$deviceid";
+    my $jobs = {};
+    my $opts = { 'job-id' => $job_id, device => $deviceid };
+
+    $opts->{'base-node'} = $target_fmt_blockdev->{'node-name'};
+    $opts->{'top-node'} = $src_fmt_blockdev->{'node-name'};
+
+    mon_cmd($vmid, "block-commit", %$opts);
+    $jobs->{$job_id} = {};
+
+    # if we commit the current, the blockjob need to be in 'complete' mode
+    my $complete = $src_snap && $src_snap ne 'current' ? 'auto' : 'complete';
+
+    eval {
+        PVE::QemuServer::BlockJob::qemu_drive_mirror_monitor(
+            $vmid, undef, $jobs, $complete, 0, 'commit',
+        );
+    };
+    if ($@) {
+        die "Failed to complete block commit: $@\n";
+    }
+
+    blockdev_delete($storecfg, $vmid, $drive, $src_file_blockdev, $src_fmt_blockdev, $src_snap);
+}
+
+sub blockdev_stream {
+    my ($storecfg, $vmid, $machine_version, $deviceid, $drive, $snap, $parent_snap, $target_snap) =
+        @_;
+
+    my $volid = $drive->{file};
+    $target_snap = undef if $target_snap eq 'current';
+
+    my $parent_file_blockdev = generate_file_blockdev(
+        $storecfg,
+        $drive,
+        $machine_version,
+        { 'snapshot-name' => $parent_snap },
+    );
+    my $parent_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $parent_file_blockdev,
+        { 'snapshot-name' => $parent_snap },
+    );
+
+    my $target_file_blockdev = generate_file_blockdev(
+        $storecfg,
+        $drive,
+        $machine_version,
+        { 'snapshot-name' => $target_snap },
+    );
+    my $target_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $target_file_blockdev,
+        { 'snapshot-name' => $target_snap },
+    );
+
+    my $snap_file_blockdev =
+        generate_file_blockdev($storecfg, $drive, $machine_version, { 'snapshot-name' => $snap });
+    my $snap_fmt_blockdev = generate_format_blockdev(
+        $storecfg,
+        $drive,
+        $snap_file_blockdev,
+        { 'snapshot-name' => $snap },
+    );
+
+    my $job_id = "stream-$deviceid";
+    my $jobs = {};
+    my $options = { 'job-id' => $job_id, device => $target_fmt_blockdev->{'node-name'} };
+    $options->{'base-node'} = $parent_fmt_blockdev->{'node-name'};
+    $options->{'backing-file'} = $parent_file_blockdev->{filename};
+
+    mon_cmd($vmid, 'block-stream', %$options);
+    $jobs->{$job_id} = {};
+
+    eval {
+        PVE::QemuServer::BlockJob::qemu_drive_mirror_monitor(
+            $vmid, undef, $jobs, 'auto', 0, 'stream',
+        );
+    };
+    if ($@) {
+        die "Failed to complete block stream: $@\n";
+    }
+
+    blockdev_delete($storecfg, $vmid, $drive, $snap_file_blockdev, $snap_fmt_blockdev, $snap);
+}
+
 1;
diff --git a/src/test/snapshot-test.pm b/src/test/snapshot-test.pm
index 4fce87f1..f61cd64b 100644
--- a/src/test/snapshot-test.pm
+++ b/src/test/snapshot-test.pm
@@ -399,8 +399,8 @@ sub set_migration_caps { } # ignored
 
 # BEGIN redefine PVE::QemuServer methods
 
-sub do_snapshots_with_qemu {
-    return 0;
+sub do_snapshots_type {
+    return 'storage';
 }
 
 sub vm_start {
-- 
2.39.5




More information about the pve-devel mailing list