[pve-devel] [PATCH pve-storage 2/5] qcow2: add external snapshot support
Fabian Grünbichler
f.gruenbichler at proxmox.com
Fri May 9 12:30:04 CEST 2025
> Alexandre Derumier via pve-devel <pve-devel at lists.proxmox.com> hat am 22.04.2025 13:51 CEST geschrieben:
> add a snapext option to enable the feature
>
> When a snapshot is taken, the current volume is renamed to snap volname
> and a current image is created with the snap volume as backing file
>
> Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> ---
> src/PVE/Storage.pm | 5 +-
> src/PVE/Storage/DirPlugin.pm | 1 +
> src/PVE/Storage/Plugin.pm | 277 ++++++++++++++++++++++++++++++-----
> 3 files changed, 242 insertions(+), 41 deletions(-)
>
> diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> index 1a37cc8..db9d190 100755
> --- a/src/PVE/Storage.pm
> +++ b/src/PVE/Storage.pm
> @@ -348,13 +348,13 @@ sub volume_rollback_is_possible {
> }
>
> sub volume_snapshot {
> - my ($cfg, $volid, $snap) = @_;
> + my ($cfg, $volid, $snap, $running) = @_;
>
> my ($storeid, $volname) = parse_volume_id($volid, 1);
> if ($storeid) {
> my $scfg = storage_config($cfg, $storeid);
> my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
> - return $plugin->volume_snapshot($scfg, $storeid, $volname, $snap);
> + return $plugin->volume_snapshot($scfg, $storeid, $volname, $snap, $running);
this is an API bump, should be called out somewhere and documented what it means
> } elsif ($volid =~ m|^(/.+)$| && -e $volid) {
> die "snapshot file/device '$volid' is not possible\n";
> } else {
> @@ -378,7 +378,6 @@ sub volume_snapshot_rollback {
> }
> }
>
> -# FIXME PVE 8.x remove $running parameter (needs APIAGE reset)
> sub volume_snapshot_delete {
> my ($cfg, $volid, $snap, $running) = @_;
>
> diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm
> index 734309f..54d8d74 100644
> --- a/src/PVE/Storage/DirPlugin.pm
> +++ b/src/PVE/Storage/DirPlugin.pm
> @@ -83,6 +83,7 @@ sub options {
> is_mountpoint => { optional => 1 },
> bwlimit => { optional => 1 },
> preallocation => { optional => 1 },
> + snapext => { optional => 1 },
> };
> }
>
> diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> index 85f761c..3f83fae 100644
> --- a/src/PVE/Storage/Plugin.pm
> +++ b/src/PVE/Storage/Plugin.pm
> @@ -215,6 +215,11 @@ my $defaultData = {
> maximum => 65535,
> optional => 1,
> },
> + 'snapext' => {
> + type => 'boolean',
> + description => 'enable external snapshot.',
> + optional => 1,
> + },
> },
> };
>
> @@ -734,6 +739,8 @@ sub filesystem_path {
> my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
> $class->parse_volname($volname);
>
> + $name = $class->get_snap_name($volname, $snapname) if $scfg->{snapext} && $snapname;
> +
> # Note: qcow2/qed has internal snapshot, so path is always
> # the same (with or without snapshot => same file).
> die "can't snapshot this image format\n"
> @@ -926,14 +933,8 @@ sub alloc_image {
> umask $old_umask;
> die $err if $err;
> } else {
> - my $cmd = ['/usr/bin/qemu-img', 'create'];
> -
> - my $prealloc_opt = preallocation_cmd_option($scfg, $fmt);
> - push @$cmd, '-o', $prealloc_opt if defined($prealloc_opt);
>
> - push @$cmd, '-f', $fmt, $path, "${size}K";
> -
> - eval { run_command($cmd, errmsg => "unable to create image"); };
> + eval { qemu_img_create($scfg, $fmt, $size, $path) };
this should be a separate commit, without any semantic changes..
> if ($@) {
> unlink $path;
> rmdir $imagedir;
> @@ -944,6 +945,19 @@ sub alloc_image {
> return "$vmid/$name";
> }
>
> +sub alloc_snap_image {
this should be private.. it's also kind of misnamed, as it doesn't allocate a snapshot image,
it allocates an image backed by a snapshot?
> + my ($class, $storeid, $scfg, $volname, $backing_snap) = @_;
> +
> + my $path = $class->path($scfg, $volname, $storeid);
> + my $backing_path = $class->path($scfg, $volname, $storeid, $backing_snap);
> +
> + eval { qemu_img_create($scfg, 'qcow2', undef, $path, $backing_path) };
> + if ($@) {
> + unlink $path;
> + die "$@";
> + }
> +}
> +
> sub free_image {
> my ($class, $storeid, $scfg, $volname, $isBase, $format) = @_;
>
> @@ -980,6 +994,51 @@ sub free_image {
> # TODO taken from PVE/QemuServer/Drive.pm, avoiding duplication would be nice
> my @checked_qemu_img_formats = qw(raw qcow qcow2 qed vmdk cloop);
>
> +sub qemu_img_create {
should live in some helper module..
> + my ($scfg, $fmt, $size, $path, $backing_path) = @_;
> +
> + my $cmd = ['/usr/bin/qemu-img', 'create'];
> +
> + my $options = [];
> +
> + if($backing_path) {
> + push @$cmd, '-b', $backing_path, '-F', 'qcow2';
> + push @$options, 'extended_l2=on','cluster_size=128k';
> + };
> + push @$options, preallocation_cmd_option($scfg, $fmt);
> + push @$cmd, '-o', join(',', @$options) if @$options > 0;
> + push @$cmd, '-f', $fmt, $path;
> + push @$cmd, "${size}K" if !$backing_path;
> +
> + run_command($cmd, errmsg => "unable to create image");
> +}
> +
> +sub qemu_img_info {
extracting this should be its own commit first, then changes in this commit..
should this also live in some helper module instead of the plugin here?
> + my ($filename, $file_format, $timeout, $follow_backing_files) = @_;
> +
> + my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename];
> + push $cmd->@*, '-f', $file_format if $file_format;
> + push $cmd->@*, '--backing-chain' if $follow_backing_files;
> +
> + my $json = '';
> + my $err_output = '';
> + eval {
> + run_command($cmd,
> + timeout => $timeout,
> + outfunc => sub { $json .= shift },
> + errfunc => sub { $err_output .= shift . "\n"},
> + );
> + };
> + warn $@ if $@;
> + if ($err_output) {
> + # if qemu did not output anything to stdout we die with stderr as an error
> + die $err_output if !$json;
> + # otherwise we warn about it and try to parse the json
> + warn $err_output;
> + }
> + return $json;
> +}
> +
> # set $untrusted if the file in question might be malicious since it isn't
> # created by our stack
> # this makes certain checks fatal, and adds extra checks for known problems like
> @@ -1043,25 +1102,9 @@ sub file_size_info {
> warn "file_size_info: '$filename': falling back to 'raw' from unknown format '$file_format'\n";
> $file_format = 'raw';
> }
> - my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename];
> - push $cmd->@*, '-f', $file_format if $file_format;
>
> - my $json = '';
> - my $err_output = '';
> - eval {
> - run_command($cmd,
> - timeout => $timeout,
> - outfunc => sub { $json .= shift },
> - errfunc => sub { $err_output .= shift . "\n"},
> - );
> - };
> - warn $@ if $@;
> - if ($err_output) {
> - # if qemu did not output anything to stdout we die with stderr as an error
> - die $err_output if !$json;
> - # otherwise we warn about it and try to parse the json
> - warn $err_output;
> - }
> + my $json = qemu_img_info($filename, $file_format, $timeout);
> +
> if (!$json) {
> die "failed to query file information with qemu-img\n" if $untrusted;
> # skip decoding if there was no output, e.g. if there was a timeout.
> @@ -1183,15 +1226,37 @@ sub volume_resize {
> }
>
> sub volume_snapshot {
> - my ($class, $scfg, $storeid, $volname, $snap) = @_;
> + my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
>
> die "can't snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
>
> - my $path = $class->filesystem_path($scfg, $volname);
> + if($scfg->{snapext}) {
> +
> + if ($running) {
> + #rename with blockdev-reopen is done at qemu level when running
> + $class->alloc_snap_image($storeid, $scfg, $volname, $snap);
> + return;
> + }
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> + #rename current volume to snap volume
> + my $vmid = ($class->parse_volname($volname))[2];
> + $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, 'current', $snap);
>
> - run_command($cmd);
> + $class->alloc_snap_image($storeid, $scfg, $volname, $snap);
> +
> + if ($@) {
this error here needs to be logged..
> + eval { $class->free_image($storeid, $scfg, $volname, 0) };
> + warn $@ if $@;
> + eval { $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, $snap, 'current') };
> + warn $@ if $@;
and here we need to die to notify the upper stack that taking the snapshot failed
> + }
> +
> + } else {
> +
> + my $path = $class->filesystem_path($scfg, $volname);
> + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1202,6 +1267,21 @@ sub volume_snapshot {
> sub volume_rollback_is_possible {
> my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
>
> + if ($scfg->{snapext}) {
> + #technically, we could manage multibranch, we it need lot more work for snapshot delete
> + #we need to implemente block-stream from deleted snapshot to all others child branchs
> + #when online, we need to do a transaction for multiple disk when delete the last snapshot
> + #and need to merge in current running file
> +
> + my $snappath = $class->path($scfg, $volname, $storeid, $snap);
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> + my $parentsnap = $snapshots->{current}->{parent};
> +
> + return 1 if $parentsnap eq $snap;
> +
> + die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
> + }
> +
> return 1;
> }
>
> @@ -1212,9 +1292,21 @@ sub volume_snapshot_rollback {
>
> my $path = $class->filesystem_path($scfg, $volname);
$path is only used in the else branch..
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> + if ($scfg->{snapext}) {
> + #simply delete the current snapshot and recreate it
> + eval { $class->free_image($storeid, $scfg, $volname, 0) };
> + if ($@) {
> + die "can't delete old volume $volname: $@\n";
> + }
>
> - run_command($cmd);
> + eval { $class->alloc_snap_image($storeid, $scfg, $volname, $snap) };
> + if ($@) {
> + die "can't allocate new volume $volname: $@\n";
> + }
> + } else {
> + my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1224,15 +1316,65 @@ sub volume_snapshot_delete {
>
> die "can't delete snapshot for this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
>
> - return 1 if $running;
> -
> + my $cmd = "";
> my $path = $class->filesystem_path($scfg, $volname);
$path is only used in the else branch..
>
> - $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> + if ($scfg->{snapext}) {
> +
> + if ($running) {
should we add a comment here noting what this means? i.e., qemu has already removed
that snapshot from the backing chain, therefore we only have to drop the image itself?
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> + $volname = $class->get_snap_volname($volname, $snap);
> + $class->free_image($storeid, $scfg, $volname, $isBase, $format);
> + return;
> + }
> +
> + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> + my $snappath = $snapshots->{$snap}->{file};
> + my $snap_volname = $snapshots->{$snap}->{volname};
> + die "volume $snappath is missing" if !-e $snappath;
> +
> + my $parentsnap = $snapshots->{$snap}->{parent};
> + my $childsnap = $snapshots->{$snap}->{child};
> + my $childpath = $snapshots->{$childsnap}->{file};
> +
> + #if first snapshot,as it should be bigger, we merge child, and rename the snapshot to child
> + if(!$parentsnap) {
> + print "commit: merge content of $childpath into $snappath\n";
> + $cmd = ['/usr/bin/qemu-img', 'commit', $childpath];
> + eval { run_command($cmd) };
> + if ($@) {
should we add an error here about what state this leaves the snapshot in?
AFAIU we've potentially written some of the data from $child to $snap, so
the state of $snap is technically invalid now?
> + die "error commiting $childpath to $snappath; $@\n";
> + }
> + print"rename $snappath to $childpath\n";
> + eval { rename($snappath, $childpath) };
rename doesn't die and set $@..
> + if ($@) {
> + die "error renaming snapshot: $@\n";
> + }
> + } else {
> + #we rebase the child image on the parent as new backing image
> + my $parentpath = $snapshots->{$parentsnap}->{file};
> + print "rebase: merge diff content between $parentpath and $childpath into $childpath\n";
> + $cmd = ['/usr/bin/qemu-img', 'rebase', '-b', $parentpath, '-F', 'qcow2', '-f', 'qcow2', $childpath];
> + eval { run_command($cmd) };
> + if ($@) {
> + die "error rebase $childpath from $parentpath; $@\n";
same here, but in this case $child just contains some duplicate data so nothing is
really broken?
> + }
> + #delete the snapshot
> + eval { $class->free_image($storeid, $scfg, $snap_volname, 0); };
> + if ($@) {
and here we just leave a stray volume around that is not part of the backing chain
anymore, right?
> + die "error delete old snapshot volume $snap_volname: $@\n";
> + }
> + }
> +
> + } else {
>
> - my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> + return 1 if $running;
>
> - run_command($cmd);
> + $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +
> + $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> + run_command($cmd);
> + }
>
> return undef;
> }
> @@ -1271,7 +1413,7 @@ sub volume_has_feature {
> current => { qcow2 => 1, raw => 1, vmdk => 1 },
> },
> rename => {
> - current => {qcow2 => 1, raw => 1, vmdk => 1},
> + current => { qcow2 => 1, raw => 1, vmdk => 1},
unrelated change..
> },
> };
>
> @@ -1506,7 +1648,40 @@ sub status {
> sub volume_snapshot_info {
> my ($class, $scfg, $storeid, $volname) = @_;
>
> - die "volume_snapshot_info is not implemented for $class";
> + my $path = $class->filesystem_path($scfg, $volname);
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> +
> + my $backing_chain = 1;
shouldn't this depend on $snapext ?
> + my $json = qemu_img_info($path, undef, 10, $backing_chain);
> + die "failed to query file information with qemu-img\n" if !$json;
> + my $snapshots = eval { decode_json($json) };
> + if ($@) {
> + die "Can't decode qemu snapshot list. Invalid JSON\n";
should also contain $@ so we get an idea *what is wrong*..
> + }
> + my $info = {};
> + my $order = 0;
> + for my $snap (@$snapshots) {
this doesn't work for internal snapshots, as then qemu-img info just
returns a single object.. or if we pass --backingchain also in that
case, then the code below still doesn't correctly handle it..
> +
> + my $snapfile = $snap->{filename};
> + my $snapname = parse_snapname($snapfile);
> + $snapname = 'current' if !$snapname;
> + my $snapvolname = $class->get_snap_volname($volname, $snapname);
> +
> + $info->{$snapname}->{order} = $order;
> + $info->{$snapname}->{file}= $snapfile;
> + $info->{$snapname}->{volname} = "$snapvolname";
> + $info->{$snapname}->{volid} = "$storeid:$snapvolname";
> + $info->{$snapname}->{ext} = 1;
only if $snapext?
> +
> + my $parentfile = $snap->{'backing-filename'};
> + if ($parentfile) {
> + my $parentname = parse_snapname($parentfile);
> + $info->{$snapname}->{parent} = $parentname;
> + $info->{$parentname}->{child} = $snapname;
> + }
> + $order++;
> + }
> + return $info;
> }
>
> sub activate_storage {
> @@ -1907,4 +2082,30 @@ sub config_aware_base_mkdir {
> }
> }
>
> +sub get_snap_name {
> + my ($class, $volname, $snapname) = @_;
> +
> + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> + $name = !$snapname || $snapname eq 'current' ? $name : "snap-$snapname-$name";
> + return $name;
> +}
> +
> +sub get_snap_volname {
> + my ($class, $volname, $snapname) = @_;
> +
> + my $vmid = ($class->parse_volname($volname))[2];
> + my $name = $class->get_snap_name($volname, $snapname);
> + return "$vmid/$name";
> +}
> +
> +sub parse_snapname {
> + my ($name) = @_;
> +
> + my $basename = basename($name);
> + if ($basename =~ m/^snap-(.*)-vm(.*)$/) {
> + return $1;
> + }
> + return undef;
> +}
> +
> 1;
> --
> 2.39.5
More information about the pve-devel
mailing list