[pve-devel] [PATCH pve-storage 2/5] qcow2: add external snapshot support

Fabian Grünbichler f.gruenbichler at proxmox.com
Fri May 9 12:30:04 CEST 2025


> Alexandre Derumier via pve-devel <pve-devel at lists.proxmox.com> hat am 22.04.2025 13:51 CEST geschrieben:
> add a snapext option to enable the feature
> 
> When a snapshot is taken, the current volume is renamed to snap volname
> and a current image is created with the snap volume as backing file
> 
> Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> ---
>  src/PVE/Storage.pm           |   5 +-
>  src/PVE/Storage/DirPlugin.pm |   1 +
>  src/PVE/Storage/Plugin.pm    | 277 ++++++++++++++++++++++++++++++-----
>  3 files changed, 242 insertions(+), 41 deletions(-)
> 
> diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm
> index 1a37cc8..db9d190 100755
> --- a/src/PVE/Storage.pm
> +++ b/src/PVE/Storage.pm
> @@ -348,13 +348,13 @@ sub volume_rollback_is_possible {
>  }
>  
>  sub volume_snapshot {
> -    my ($cfg, $volid, $snap) = @_;
> +    my ($cfg, $volid, $snap, $running) = @_;
>  
>      my ($storeid, $volname) = parse_volume_id($volid, 1);
>      if ($storeid) {
>  	my $scfg = storage_config($cfg, $storeid);
>  	my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
> -	return $plugin->volume_snapshot($scfg, $storeid, $volname, $snap);
> +	return $plugin->volume_snapshot($scfg, $storeid, $volname, $snap, $running);

this is an API bump, should be called out somewhere and documented what it means

>      } elsif ($volid =~ m|^(/.+)$| && -e $volid) {
>  	die "snapshot file/device '$volid' is not possible\n";
>      } else {
> @@ -378,7 +378,6 @@ sub volume_snapshot_rollback {
>      }
>  }
>  
> -# FIXME PVE 8.x remove $running parameter (needs APIAGE reset)
>  sub volume_snapshot_delete {
>      my ($cfg, $volid, $snap, $running) = @_;
>  
> diff --git a/src/PVE/Storage/DirPlugin.pm b/src/PVE/Storage/DirPlugin.pm
> index 734309f..54d8d74 100644
> --- a/src/PVE/Storage/DirPlugin.pm
> +++ b/src/PVE/Storage/DirPlugin.pm
> @@ -83,6 +83,7 @@ sub options {
>  	is_mountpoint => { optional => 1 },
>  	bwlimit => { optional => 1 },
>  	preallocation => { optional => 1 },
> +	snapext => { optional => 1 },
>     };
>  }
>  
> diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm
> index 85f761c..3f83fae 100644
> --- a/src/PVE/Storage/Plugin.pm
> +++ b/src/PVE/Storage/Plugin.pm
> @@ -215,6 +215,11 @@ my $defaultData = {
>  	    maximum => 65535,
>  	    optional => 1,
>  	},
> +        'snapext' => {
> +	    type => 'boolean',
> +	    description => 'enable external snapshot.',
> +	    optional => 1,
> +        },
>      },
>  };
>  
> @@ -734,6 +739,8 @@ sub filesystem_path {
>      my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
>  	$class->parse_volname($volname);
>  
> +    $name = $class->get_snap_name($volname, $snapname) if $scfg->{snapext} && $snapname;
> +
>      # Note: qcow2/qed has internal snapshot, so path is always
>      # the same (with or without snapshot => same file).
>      die "can't snapshot this image format\n"
> @@ -926,14 +933,8 @@ sub alloc_image {
>  	umask $old_umask;
>  	die $err if $err;
>      } else {
> -	my $cmd = ['/usr/bin/qemu-img', 'create'];
> -
> -	my $prealloc_opt = preallocation_cmd_option($scfg, $fmt);
> -	push @$cmd, '-o', $prealloc_opt if defined($prealloc_opt);
>  
> -	push @$cmd, '-f', $fmt, $path, "${size}K";
> -
> -	eval { run_command($cmd, errmsg => "unable to create image"); };
> +	eval { qemu_img_create($scfg, $fmt, $size, $path) };

this should be a separate commit, without any semantic changes..

>  	if ($@) {
>  	    unlink $path;
>  	    rmdir $imagedir;
> @@ -944,6 +945,19 @@ sub alloc_image {
>      return "$vmid/$name";
>  }
>  
> +sub alloc_snap_image {

this should be private.. it's also kind of misnamed, as it doesn't allocate a snapshot image,
it allocates an image backed by a snapshot?

> +    my ($class, $storeid, $scfg, $volname, $backing_snap) = @_;
> +
> +    my $path = $class->path($scfg, $volname, $storeid);
> +    my $backing_path = $class->path($scfg, $volname, $storeid, $backing_snap);
> +
> +    eval { qemu_img_create($scfg, 'qcow2', undef, $path, $backing_path) };
> +    if ($@) {
> +	unlink $path;
> +	die "$@";
> +    }
> +}
> +
>  sub free_image {
>      my ($class, $storeid, $scfg, $volname, $isBase, $format) = @_;
>  
> @@ -980,6 +994,51 @@ sub free_image {
>  # TODO taken from PVE/QemuServer/Drive.pm, avoiding duplication would be nice
>  my @checked_qemu_img_formats = qw(raw qcow qcow2 qed vmdk cloop);
>  
> +sub qemu_img_create {

should live in some helper module..

> +    my ($scfg, $fmt, $size, $path, $backing_path) = @_;
> +
> +    my $cmd = ['/usr/bin/qemu-img', 'create'];
> +
> +    my $options = [];
> +
> +    if($backing_path) {
> +	push @$cmd, '-b', $backing_path, '-F', 'qcow2';
> +	push @$options, 'extended_l2=on','cluster_size=128k';
> +    };
> +    push @$options, preallocation_cmd_option($scfg, $fmt);
> +    push @$cmd, '-o', join(',', @$options) if @$options > 0;
> +    push @$cmd, '-f', $fmt, $path;
> +    push @$cmd, "${size}K" if !$backing_path;
> +
> +    run_command($cmd, errmsg => "unable to create image");
> +}
> +
> +sub qemu_img_info {

extracting this should be its own commit first, then changes in this commit..

should this also live in some helper module instead of the plugin here?

> +    my ($filename, $file_format, $timeout, $follow_backing_files) = @_;
> +
> +    my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename];
> +    push $cmd->@*, '-f', $file_format if $file_format;
> +    push $cmd->@*, '--backing-chain' if $follow_backing_files;
> +
> +    my $json = '';
> +    my $err_output = '';
> +    eval {
> +	run_command($cmd,
> +	    timeout => $timeout,
> +	    outfunc => sub { $json .= shift },
> +	    errfunc => sub { $err_output .= shift . "\n"},
> +	);
> +    };
> +    warn $@ if $@;
> +    if ($err_output) {
> +	# if qemu did not output anything to stdout we die with stderr as an error
> +	die $err_output if !$json;
> +	# otherwise we warn about it and try to parse the json
> +	warn $err_output;
> +    }
> +    return $json;
> +}
> +
>  # set $untrusted if the file in question might be malicious since it isn't
>  # created by our stack
>  # this makes certain checks fatal, and adds extra checks for known problems like
> @@ -1043,25 +1102,9 @@ sub file_size_info {
>  	warn "file_size_info: '$filename': falling back to 'raw' from unknown format '$file_format'\n";
>  	$file_format = 'raw';
>      }
> -    my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $filename];
> -    push $cmd->@*, '-f', $file_format if $file_format;
>  
> -    my $json = '';
> -    my $err_output = '';
> -    eval {
> -	run_command($cmd,
> -	    timeout => $timeout,
> -	    outfunc => sub { $json .= shift },
> -	    errfunc => sub { $err_output .= shift . "\n"},
> -	);
> -    };
> -    warn $@ if $@;
> -    if ($err_output) {
> -	# if qemu did not output anything to stdout we die with stderr as an error
> -	die $err_output if !$json;
> -	# otherwise we warn about it and try to parse the json
> -	warn $err_output;
> -    }
> +    my $json = qemu_img_info($filename, $file_format, $timeout);
> +
>      if (!$json) {
>  	die "failed to query file information with qemu-img\n" if $untrusted;
>  	# skip decoding if there was no output, e.g. if there was a timeout.
> @@ -1183,15 +1226,37 @@ sub volume_resize {
>  }
>  
>  sub volume_snapshot {
> -    my ($class, $scfg, $storeid, $volname, $snap) = @_;
> +    my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
>  
>      die "can't snapshot this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
>  
> -    my $path = $class->filesystem_path($scfg, $volname);
> +    if($scfg->{snapext}) {
> +
> +	if ($running) {
> +	    #rename with blockdev-reopen is done at qemu level when running
> +	    $class->alloc_snap_image($storeid, $scfg, $volname, $snap);
> +	    return;
> +	}
>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> +	#rename current volume to snap volume
> +	my $vmid = ($class->parse_volname($volname))[2];
> +	$class->rename_volume($scfg, $storeid, $volname, $vmid, undef, 'current', $snap);
>  
> -    run_command($cmd);
> +	$class->alloc_snap_image($storeid, $scfg, $volname, $snap);
> +
> +	if ($@) {

this error here needs to be logged..

> +	    eval { $class->free_image($storeid, $scfg, $volname, 0) };
> +	    warn $@ if $@;
> +	    eval { $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, $snap, 'current') };
> +	    warn $@ if $@;

and here we need to die to notify the upper stack that taking the snapshot failed

> +	}
> +
> +    } else {
> +
> +	my $path = $class->filesystem_path($scfg, $volname);
> +	my $cmd = ['/usr/bin/qemu-img', 'snapshot','-c', $snap, $path];
> +	run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1202,6 +1267,21 @@ sub volume_snapshot {
>  sub volume_rollback_is_possible {
>      my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
>  
> +    if ($scfg->{snapext}) {
> +	#technically, we could manage multibranch, we it need lot more work for snapshot delete
> +	#we need to implemente block-stream from deleted snapshot to all others child branchs
> +	#when online, we need to do a transaction for multiple disk when delete the last snapshot
> +	#and need to merge in current running file
> +
> +	my $snappath = $class->path($scfg, $volname, $storeid, $snap);
> +	my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +	my $parentsnap = $snapshots->{current}->{parent};
> +
> +	return 1 if $parentsnap eq $snap;
> +
> +	die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
> +    }
> +
>      return 1;
>  }
>  
> @@ -1212,9 +1292,21 @@ sub volume_snapshot_rollback {
>  
>      my $path = $class->filesystem_path($scfg, $volname);

$path is only used in the else branch..

>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> +    if ($scfg->{snapext}) {
> +	#simply delete the current snapshot and recreate it
> +	eval { $class->free_image($storeid, $scfg, $volname, 0) };
> +	if ($@) {
> +	    die "can't delete old volume $volname: $@\n";
> +	}
>  
> -    run_command($cmd);
> +	eval { $class->alloc_snap_image($storeid, $scfg, $volname, $snap) };
> +	if ($@) {
> +	    die "can't allocate new volume $volname: $@\n";
> +	}
> +    } else {
> +	my $cmd = ['/usr/bin/qemu-img', 'snapshot','-a', $snap, $path];
> +	run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1224,15 +1316,65 @@ sub volume_snapshot_delete {
>  
>      die "can't delete snapshot for this image format\n" if $volname !~ m/\.(qcow2|qed)$/;
>  
> -    return 1 if $running;
> -
> +    my $cmd = "";
>      my $path = $class->filesystem_path($scfg, $volname);

$path is only used in the else branch..

>  
> -    $class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +    if ($scfg->{snapext}) {
> +
> +	if ($running) {

should we add a comment here noting what this means? i.e., qemu has already removed
that snapshot from the backing chain, therefore we only have to drop the image itself?

> +	    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> +	    $volname = $class->get_snap_volname($volname, $snap);
> +	    $class->free_image($storeid, $scfg, $volname, $isBase, $format);
> +	    return;
> +	}
> +
> +	my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +	my $snappath = $snapshots->{$snap}->{file};
> +	my $snap_volname = $snapshots->{$snap}->{volname};
> +	die "volume $snappath is missing" if !-e $snappath;
> +
> +	my $parentsnap = $snapshots->{$snap}->{parent};
> +	my $childsnap = $snapshots->{$snap}->{child};
> +	my $childpath = $snapshots->{$childsnap}->{file};
> +
> +	#if first snapshot,as it should be bigger,  we merge child, and rename the snapshot to child
> +	if(!$parentsnap) {
> +	    print "commit: merge content of $childpath into $snappath\n";
> +	    $cmd = ['/usr/bin/qemu-img', 'commit', $childpath];
> +	    eval { run_command($cmd) };
> +	    if ($@) {

should we add an error here about what state this leaves the snapshot in?
AFAIU we've potentially written some of the data from $child to $snap, so
the state of $snap is technically invalid now?

> +		die "error commiting $childpath to $snappath; $@\n";
> +	    }
> +	    print"rename $snappath to $childpath\n";
> +	    eval { rename($snappath, $childpath) };

rename doesn't die and set $@..

> +            if ($@) {
> +                die "error renaming snapshot: $@\n";
> +            }
> +	} else {
> +	    #we rebase the child image on the parent as new backing image
> +	    my $parentpath = $snapshots->{$parentsnap}->{file};
> +	    print "rebase: merge diff content between $parentpath and $childpath into $childpath\n";
> +	    $cmd = ['/usr/bin/qemu-img', 'rebase', '-b', $parentpath, '-F', 'qcow2', '-f', 'qcow2', $childpath];
> +	    eval { run_command($cmd) };
> +	    if ($@) {
> +		die "error rebase $childpath from $parentpath; $@\n";

same here, but in this case $child just contains some duplicate data so nothing is
really broken?

> +	    }
> +	    #delete the snapshot
> +	    eval { $class->free_image($storeid, $scfg, $snap_volname, 0); };
> +	    if ($@) {

and here we just leave a stray volume around that is not part of the backing chain
anymore, right?

> +		die "error delete old snapshot volume $snap_volname: $@\n";
> +	    }
> +	}
> +
> +    } else {
>  
> -    my $cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> +	return 1 if $running;
>  
> -    run_command($cmd);
> +	$class->deactivate_volume($storeid, $scfg, $volname, $snap, {});
> +
> +	$cmd = ['/usr/bin/qemu-img', 'snapshot','-d', $snap, $path];
> +	run_command($cmd);
> +    }
>  
>      return undef;
>  }
> @@ -1271,7 +1413,7 @@ sub volume_has_feature {
>  	    current => { qcow2 => 1, raw => 1, vmdk => 1 },
>  	},
>  	rename => {
> -	    current => {qcow2 => 1, raw => 1, vmdk => 1},
> +	    current => { qcow2 => 1, raw => 1, vmdk => 1},

unrelated change..

>  	},
>      };
>  
> @@ -1506,7 +1648,40 @@ sub status {
>  sub volume_snapshot_info {
>      my ($class, $scfg, $storeid, $volname) = @_;
>  
> -    die "volume_snapshot_info is not implemented for $class";
> +    my $path = $class->filesystem_path($scfg, $volname);
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> +
> +    my $backing_chain = 1;

shouldn't this depend on $snapext ?

> +    my $json = qemu_img_info($path, undef, 10, $backing_chain);
> +    die "failed to query file information with qemu-img\n" if !$json;
> +    my $snapshots = eval { decode_json($json) };
> +    if ($@) {
> +	die "Can't decode qemu snapshot list. Invalid JSON\n";

should also contain $@ so we get an idea *what is wrong*..

> +    }
> +    my $info = {};
> +    my $order = 0;
> +    for my $snap (@$snapshots) {

this doesn't work for internal snapshots, as then qemu-img info just
returns a single object.. or if we pass --backingchain also in that
case, then the code below still doesn't correctly handle it..

> +
> +	my $snapfile = $snap->{filename};
> +	my $snapname = parse_snapname($snapfile);
> +	$snapname = 'current' if !$snapname;
> +	my $snapvolname = $class->get_snap_volname($volname, $snapname);
> +
> +	$info->{$snapname}->{order} = $order;
> +	$info->{$snapname}->{file}= $snapfile;
> +	$info->{$snapname}->{volname} = "$snapvolname";
> +	$info->{$snapname}->{volid} = "$storeid:$snapvolname";
> +	$info->{$snapname}->{ext} = 1;

only if $snapext?

> +
> +	my $parentfile = $snap->{'backing-filename'};
> +	if ($parentfile) {
> +	    my $parentname = parse_snapname($parentfile);
> +	    $info->{$snapname}->{parent} = $parentname;
> +	    $info->{$parentname}->{child} = $snapname;
> +	}
> +	$order++;
> +    }
> +    return $info;
>  }
>  
>  sub activate_storage {
> @@ -1907,4 +2082,30 @@ sub config_aware_base_mkdir {
>      }
>  }
>  
> +sub get_snap_name {
> +    my ($class, $volname, $snapname) = @_;
> +
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> +    $name = !$snapname || $snapname eq 'current' ? $name : "snap-$snapname-$name";
> +    return $name;
> +}
> +
> +sub get_snap_volname {
> +    my ($class, $volname, $snapname) = @_;
> +
> +    my $vmid = ($class->parse_volname($volname))[2];
> +    my $name = $class->get_snap_name($volname, $snapname);
> +    return "$vmid/$name";
> +}
> +
> +sub parse_snapname {
> +    my ($name) = @_;
> +
> +    my $basename = basename($name);
> +    if ($basename =~ m/^snap-(.*)-vm(.*)$/) {
> +	return $1;
> +    }
> +    return undef;
> +}
> +
>  1;
> -- 
> 2.39.5




More information about the pve-devel mailing list