[pve-devel] [PATCH pve-storage 08/10] qcow2: add external snapshot support

Fabian Grünbichler f.gruenbichler at proxmox.com
Fri Jul 4 13:52:46 CEST 2025


> Alexandre Derumier via pve-devel <pve-devel at lists.proxmox.com> hat am 04.07.2025 08:45 CEST geschrieben:
> add a snapext option to enable the feature
> 
> When a snapshot is taken, the current volume is renamed to snap volname
> and a current image is created with the snap volume as backing file
> 
> Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> ---
>  src/PVE/Storage.pm           |   1 -
>  src/PVE/Storage/Common.pm    |   3 +-
>  src/PVE/Storage/DirPlugin.pm |   1 +
>  src/PVE/Storage/Plugin.pm    | 263 +++++++++++++++++++++++++++++++++--
>  4 files changed, 252 insertions(+), 16 deletions(-)
> 
> diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> index 0396160..d83770c 100755
> --- a/src/PVE/Storage.pm
> +++ b/src/PVE/Storage.pm
> @@ -479,7 +479,6 @@ sub volume_snapshot_rollback {
>      }
>  }
>  
> -# FIXME PVE 8.x remove $running parameter (needs APIAGE reset)
>  sub volume_snapshot_delete {
>      my ($cfg, $volid, $snap, $running) = @_;
>  
> diff --git a/src/PVE/Storage/Common.pm b/src/PVE/Storage/Common.pm
> index e73eeab..43f3f15 100644
> --- a/src/PVE/Storage/Common.pm
> +++ b/src/PVE/Storage/Common.pm
> @@ -172,10 +172,11 @@ sub qemu_img_create {
>  }
>  
>  sub qemu_img_info {
> -    my ($filename, $file_format, $timeout) = @_;
> +    my ($filename, $file_format, $timeout, $follow_backing_files) = @_;
>  
>      my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename];
>      push $cmd->@*, '-f', $file_format if $file_format;
> +    push $cmd->@*, '--backing-chain' if $follow_backing_files;
>  
>      my $json = '';
>      my $err_output = '';
> diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm
> index 10e4f70..ae5d083 100644
> --- a/src/PVE/Storage/DirPlugin.pm
> +++ b/src/PVE/Storage/DirPlugin.pm
> @@ -95,6 +95,7 @@ sub options {
>          is_mountpoint => { optional => 1 },
>          bwlimit => { optional => 1 },
>          preallocation => { optional => 1 },
> +        snapext => { optional => 1 },

needs to be "fixed", as the code doesn't handle mixing internal
and external snapshots on a single storage..

>      };
>  }
>  
> diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> index 88c30c2..68d17ff 100644
> --- a/src/PVE/Storage/Plugin.pm
> +++ b/src/PVE/Storage/Plugin.pm
> @@ -215,6 +215,11 @@ my $defaultData = {
>              maximum => 65535,
>              optional => 1,
>          },
> +        'snapext' => {
> +            type => 'boolean',
> +            description => 'enable external snapshot.',
> +            optional => 1,
> +        },
>      },
>  };
>  
> @@ -727,6 +732,7 @@ sub filesystem_path {
>      my ($class, $scfg, $volname, $snapname) = @_;
>  
>      my ($vtype, $name, $vmid, undef, undef, $isBase, $format) = $class->parse_volname($volname);
> +    $name = $class->get_snap_name($volname, $snapname) if $scfg->{snapext} && $snapname;
>  
>      # Note: qcow2/qed has internal snapshot, so path is always
>      # the same (with or without snapshot => same file).
> @@ -931,6 +937,26 @@ sub alloc_image {
>      return "$vmid/$name";
>  }
>  
> +my sub alloc_backed_image {
> +    my ($class, $storeid, $scfg, $volname, $backing_snap) = @_;
> +
> +    my $path = $class->path($scfg, $volname, $storeid);
> +    my $backing_path = $class->path($scfg, $volname, $storeid, $backing_snap);

should we use a relative path here like we do when doing a linked clone? else
it basically means that it is no longer possible to move the storage mountpoint,
unless I am mistaken?

> +
> +    eval { PVE::Storage::Common::qemu_img_create($scfg, 'qcow2', undef, $path, $backing_path) };
> +    if ($@) {
> +        unlink $path;
> +        die "$@";
> +    }
> +}
> +
> +my sub free_snap_image {
> +    my ($class, $storeid, $scfg, $volname, $snap) = @_;
> +
> +    my $path = $class->path($scfg, $volname, $storeid, $snap);
> +    unlink($path) || die "unlink '$path' failed - $!\n";
> +}
> +
>  sub free_image {
>      my ($class, $storeid, $scfg, $volname, $isBase, $format) = @_;
>  
> @@ -953,6 +979,20 @@ sub free_image {
>              return undef;
>          }
>  
> +        #delete external snapshots
> +        if ($scfg->{snapext}) {
> +            my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +            for my $snapid (
> +                sort { $snapshots->{$b}->{order} <=> $snapshots->{$a}->{order} }
> +                keys %$snapshots
> +            ) {
> +                my $snap = $snapshots->{$snapid};
> +                next if $snapid eq 'current';
> +                next if !$snap->{ext};
> +                free_snap_image($class, $storeid, $scfg, $volname, $snapid);
> +            }
> +        }
> +

this is a bit tricky.. once we've deleted the first snapshot, we've basically invalidated
the whole image.. should we try to continue freeing as much as possible? and maybe even
start with the "current" image, so that a partial removal doesn't look like valid image
anymore?

>          unlink($path) || die "unlink '$path' failed - $!\n";
>      }
>  
> @@ -1159,11 +1199,39 @@ sub volume_snapshot {
>  
>      die "can't snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;

and snapext is only allowed for qcow2!

>  
> -    my $path = $class->filesystem_path($scfg, $volname);
> +    if ($scfg->{snapext}) {
> +
> +        my $vmid = ($class->parse_volname($volname))[2];
> +
> +        #if running, the old current has been renamed with blockdev-reopen by qemu
> +        if (!$running) {
> +            #rename current volume to snap volume

the two comments here could be a single one ;)

# rename volume unless qemu has already done it for us

> +            $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, 'current', $snap);
> +        }
> +
> +        eval { alloc_backed_image($class, $storeid, $scfg, $volname, $snap) };
> +        if ($@) {
> +            warn "$@ \n";
> +            #if running, the revert is done by qemu with blockdev-reopen
> +            if (!$running) {
> +                eval {
> +                    $class->rename_volume(
> +                        $scfg, $storeid, $volname, $vmid, undef, $snap, 'current',
> +                    );
> +                };
> +                warn $@ if $@;
> +            }
> +            die "can't allocate new volume $volname with $snap backing image\n";
> +        }
> +
> +    } else {
> +
> +        my $path = $class->filesystem_path($scfg, $volname);
>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot', '-c', $snap, $path];
> +        my $cmd = ['/usr/bin/qemu-img', 'snapshot', '-c', $snap, $path];
>  
> -    run_command($cmd);
> +        run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1174,6 +1242,21 @@ sub volume_snapshot {
>  sub volume_rollback_is_possible {
>      my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
>  
> +    if ($scfg->{snapext}) {
> +        #technically, we could manage multibranch, we it need lot more work for snapshot delete
> +        #we need to implemente block-stream from deleted snapshot to all others child branchs
> +        #when online, we need to do a transaction for multiple disk when delete the last snapshot
> +        #and need to merge in current running file
> +
> +        my $snappath = $class->path($scfg, $volname, $storeid, $snap);
> +        my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +        my $parentsnap = $snapshots->{current}->{parent};
> +
> +        return 1 if $parentsnap eq $snap;
> +

while only used for replication atm AFAIR, we could fill $blockers here since
we have the information readily available already..

> +        die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
> +    }
> +

nit: could be inverted:

# internal snapshots have no restrictions
return 1 if !$scfg->{snapext};

then the big part of the code doesn't need another level of indentation..

>      return 1;
>  }
>  
> @@ -1182,11 +1265,22 @@ sub volume_snapshot_rollback {
>  
>      die "can't rollback snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
>  
> -    my $path = $class->filesystem_path($scfg, $volname);
> -
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot', '-a', $snap, $path];
> +    if ($scfg->{snapext}) {
> +        #simply delete the current snapshot and recreate it
> +        eval { free_snap_image($class, $storeid, $scfg, $volname, 'current') };
> +        if ($@) {
> +            die "can't delete old volume $volname: $@\n";
> +        }
>  
> -    run_command($cmd);
> +        eval { alloc_backed_image($class, $storeid, $scfg, $volname, $snap) };
> +        if ($@) {
> +            die "can't allocate new volume $volname: $@\n";
> +        }
> +    } else {
> +        my $path = $class->filesystem_path($scfg, $volname);
> +        my $cmd = ['/usr/bin/qemu-img', 'snapshot', '-a', $snap, $path];
> +        run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1196,15 +1290,83 @@ sub volume_snapshot_delete {
>  
>      die "can't delete snapshot for this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
>  
> -    return 1 if $running;
> +    my $cmd = "";
>  
> -    my $path = $class->filesystem_path($scfg, $volname);
> +    if ($scfg->{snapext}) {
>  
> -    $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +        #qemu has already live commit|stream the snapshot, therefore we only have to drop the image itself
> +        if ($running) {
> +            eval { free_snap_image($class, $storeid, $scfg, $volname, $snap) };
> +            if ($@) {
> +                die "can't delete snapshot $snap of volume $volname: $@\n";
> +            }
> +            return;
> +        }
>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot', '-d', $snap, $path];
> +        my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +        my $snappath = $snapshots->{$snap}->{file};
> +        my $snap_volname = $snapshots->{$snap}->{volname};
> +        die "volume $snappath is missing" if !-e $snappath;
> +
> +        my $parentsnap = $snapshots->{$snap}->{parent};
> +        my $childsnap = $snapshots->{$snap}->{child};
> +        my $childpath = $snapshots->{$childsnap}->{file};
> +
> +        #if first snapshot,as it should be bigger,  we merge child, and rename the snapshot to child
> +        if (!$parentsnap) {
> +            print "$volname: deleting snapshot '$snap' by commiting snapshot '$childsnap'\n";
> +            print "running 'qemu-img commit $childpath'\n";
> +            $cmd = ['/usr/bin/qemu-img', 'commit', $childpath];
> +            eval { run_command($cmd) };
> +            if ($@) {
> +                warn
> +                    "The state of $snap is now invalid. Don't try to clone or rollback it. You can only try to delete it again later\n";
> +                die "error commiting $childsnap to $snap; $@\n";
> +            }
> +
> +            print "rename $snappath to $childpath\n";
> +            rename($snappath, $childpath)
> +                || die "rename '$snappath' to '$childpath' failed - $!\n";

should this use `rename_volume` or `rename_snapshot`?

>  
> -    run_command($cmd);
> +        } else {
> +            #we rebase the child image on the parent as new backing image
> +            my $parentpath = $snapshots->{$parentsnap}->{file};
> +            print
> +                "$volname: deleting snapshot '$snap' by rebasing '$childsnap' on top of '$parentsnap'\n";
> +            print "running 'qemu-img rebase -b $parentpath -F qcow -f qcow2 $childpath'\n";
> +            $cmd = [
> +                '/usr/bin/qemu-img',
> +                'rebase',
> +                '-b',
> +                $parentpath,
> +                '-F',
> +                'qcow2',
> +                '-f',
> +                'qcow2',
> +                $childpath,
> +            ];
> +            eval { run_command($cmd) };
> +            if ($@) {
> +                #in case of abort, the state of the snap is still clean, just a little bit bigger
> +                die "error rebase $childsnap from $parentsnap; $@\n";
> +            }
> +            #delete the old snapshot file (not part of the backing chain anymore)
> +            eval { free_snap_image($class, $storeid, $scfg, $volname, $snap) };
> +            if ($@) {
> +                die "error delete old snapshot volume $snap_volname: $@\n";
> +            }
> +        }
> +
> +    } else {
> +
> +        return 1 if $running;
> +
> +        my $path = $class->filesystem_path($scfg, $volname);
> +        $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +
> +        $cmd = ['/usr/bin/qemu-img', 'snapshot', '-d', $snap, $path];
> +        run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1484,7 +1646,53 @@ sub status {
>  sub volume_snapshot_info {
>      my ($class, $scfg, $storeid, $volname) = @_;
>  
> -    die "volume_snapshot_info is not implemented for $class";
> +    my $path = $class->filesystem_path($scfg, $volname);
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
> +        $class->parse_volname($volname);
> +
> +    my $json = PVE::Storage::Common::qemu_img_info($path, undef, 10, 1);
> +    die "failed to query file information with qemu-img\n" if !$json;
> +    my $json_decode = eval { decode_json($json) };
> +    if ($@) {
> +        die "Can't decode qemu snapshot list. Invalid JSON: $@\n";
> +    }
> +    my $info = {};
> +    my $order = 0;
> +    if (ref($json_decode) eq 'HASH') {
> +        #internal snapshots is a hashref
> +        my $snapshots = $json_decode->{snapshots};
> +        for my $snap (@$snapshots) {
> +            my $snapname = $snap->{name};
> +            $info->{$snapname}->{order} = $snap->{id};
> +            $info->{$snapname}->{timestamp} = $snap->{'date-sec'};
> +
> +        }
> +    } elsif (ref($json_decode) eq 'ARRAY') {
> +        #no snapshot or external  snapshots is an arrayref
> +        my $snapshots = $json_decode;
> +        for my $snap (@$snapshots) {
> +            my $snapfile = $snap->{filename};
> +            my $snapname = parse_snapname($snapfile);
> +            $snapname = 'current' if !$snapname;
> +            my $snapvolname = $class->get_snap_volname($volname, $snapname);
> +
> +            $info->{$snapname}->{order} = $order;
> +            $info->{$snapname}->{file} = $snapfile;
> +            $info->{$snapname}->{volname} = "$snapvolname";
> +            $info->{$snapname}->{volid} = "$storeid:$snapvolname";
> +            $info->{$snapname}->{ext} = 1;
> +
> +            my $parentfile = $snap->{'backing-filename'};
> +            if ($parentfile) {
> +                my $parentname = parse_snapname($parentfile);
> +                $info->{$snapname}->{parent} = $parentname;
> +                $info->{$parentname}->{child} = $snapname;
> +            }
> +            $order++;
> +        }
> +    }
> +
> +    return $info;
>  }
>  
>  sub activate_storage {
> @@ -2004,7 +2212,7 @@ sub qemu_blockdev_options {
>          # the snapshot alone.
>          my $format = ($class->parse_volname($volname))[6];
>          die "cannot attach only the snapshot of a '$format' image\n"
> -            if $options->{'snapshot-name'} && ($format eq 'qcow2' || $format eq 'qed');
> +            if $options->{'snapshot-name'} && ($format eq 'qcow2' && !$scfg->{snapext} || $format eq 'qed');

let;s make this a bit easier to read:

my $internal_snapshot = $format eq 'qed' || ($format eq 'qcow2 && !$scfg->{snapext});
die ..
    if $options->{'snapshot-name'} && $internal_snapshot;

?

and then we can switch to using the new helper and combining that with the format?

>  
>          # The 'file' driver only works for regular files. The check below is taken from
>          # block/file-posix.c:hdev_probe_device() in QEMU. Do not bother with detecting 'host_cdrom'
> @@ -2108,4 +2316,31 @@ sub config_aware_base_mkdir {
>      }
>  }
>  
> +sub get_snap_name {

should this be public?

> +    my ($class, $volname, $snapname) = @_;
> +
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
> +        $class->parse_volname($volname);
> +    $name = !$snapname || $snapname eq 'current' ? $name : "snap-$snapname-$name";

this is never called without a snapname, so we can assert that and drop this here..

the naming scheme here still clashes with regular volids unfortunately:

$ pvesm alloc ext4 12344321 snap-foobar-12344321-disk-foofoobar.qcow2 1G
Formatting '/mnt/pve/ext4/images/12344321/snap-foobar-12344321-disk-foofoobar.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16
successfully created 'ext4:12344321/snap-foobar-12344321-disk-foofoobar.qcow2'
$ pvesm list ext4 -content images -vmid 12344321 | grep foobar
ext4:12344321/snap-foobar-12344321-disk-foofoobar.qcow2 qcow2   images    1073741824 12344321
$ qm set 12344321 --scsi0 ext4:12344321/snap-foobar-12344321-disk-foofoobar.qcow2

should we maybe move snapshot files into a subdir, since `/` is not allowed in volnames?

> +    return $name;
> +}
> +
> +sub get_snap_volname {

should this be public?

> +    my ($class, $volname, $snapname) = @_;
> +
> +    my $vmid = ($class->parse_volname($volname))[2];
> +    my $name = $class->get_snap_name($volname, $snapname);
> +    return "$vmid/$name";
> +}
> +
> +sub parse_snapname {

should this be public?

> +    my ($name) = @_;
> +
> +    my $basename = basename($name);
> +    if ($basename =~ m/^snap-(.*)-vm(.*)$/) {

see above..

> +        return $1;
> +    }
> +    return undef;
> +}
> +
>  1;
> -- 
> 2.39.5




More information about the pve-devel mailing list