[pve-devel] [PATCH pve-storage 3/5] lvmplugin: add qcow2 snapshot

Fabian Grünbichler f.gruenbichler at proxmox.com
Tue May 13 11:54:30 CEST 2025


I started playing around with this today (finally ;))

but immediately ran into an issue..

VM is running, LVM storage configured, adding a new 32G disk to the VM with format=qcow2:

update VM 106: -scsi1 extsnap:32,format=qcow2,discard=on,ssd=on,iothread=on
qcow2 overhead: 4096
LV size: 33558528
  Logical volume "vm-106-disk-0.qcow2" created.
Formatting '/dev/extsnap/vm-106-disk-0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=metadata compression_type=zlib size=34359738368 lazy_refcounts=off refcount_bits=16
qemu-img: Failed to flush the refcount block cache: No space left on device
  Logical volume "vm-106-disk-0.qcow2" successfully removed.
TASK ERROR: unable to create image: qemu-img: /dev/extsnap/vm-106-disk-0.qcow2: Could not resize image: Allocating clusters failed: No space left on device


the overhead and LV size messages I patched in for debugging purposes..

something with the overhead calculation must be wrong? if I make the overhead twice as much, it works..

also see comments below..

> Alexandre Derumier via pve-devel <pve-devel at lists.proxmox.com> hat am 22.04.2025 13:51 CEST geschrieben:
> we format lvm logical volume with qcow2 to handle snapshot chain.
> 
> like for qcow2 file, when a snapshot is taken, the current lvm volume
> is renamed to snap volname, and a new current lvm volume is created
> with the snap volname as backing file
> 
> Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> ---
>  src/PVE/Storage/LVMPlugin.pm | 301 ++++++++++++++++++++++++++++++++---
>  1 file changed, 278 insertions(+), 23 deletions(-)
> 
> diff --git a/src/PVE/Storage/LVMPlugin.pm b/src/PVE/Storage/LVMPlugin.pm
> index c4648ec..8ee337a 100644
> --- a/src/PVE/Storage/LVMPlugin.pm
> +++ b/src/PVE/Storage/LVMPlugin.pm
> @@ -4,6 +4,7 @@ use strict;
>  use warnings;
>  
>  use IO::File;
> +use POSIX qw/ceil/;
>  
>  use PVE::Tools qw(run_command trim);
>  use PVE::Storage::Plugin;
> @@ -218,6 +219,7 @@ sub type {
>  sub plugindata {
>      return {
>  	content => [ {images => 1, rootdir => 1}, { images => 1 }],
> +	format => [ { raw => 1, qcow2 => 1 } , 'raw' ],
>  	'sensitive-properties' => {},
>      };
>  }
> @@ -294,7 +296,10 @@ sub parse_volname {
>      PVE::Storage::Plugin::parse_lvm_name($volname);
>  
>      if ($volname =~ m/^(vm-(\d+)-\S+)$/) {
> -	return ('images', $1, $2, undef, undef, undef, 'raw');
> +	my $name = $1;
> +	my $vmid = $2;
> +	my $format = $volname =~ m/\.qcow2$/ ? 'qcow2' : 'raw';
> +	return ('images', $name, $vmid, undef, undef, undef, $format);
>      }
>  
>      die "unable to parse lvm volume name '$volname'\n";
> @@ -303,11 +308,13 @@ sub parse_volname {
>  sub filesystem_path {
>      my ($class, $scfg, $volname, $snapname) = @_;
>  
> -    die "lvm snapshot is not implemented"if defined($snapname);
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
> +	$class->parse_volname($volname);
>  
> -    my ($vtype, $name, $vmid) = $class->parse_volname($volname);
> +    die "snapshot is working with qcow2 format only" if defined($snapname) && $format ne 'qcow2';
>  
>      my $vg = $scfg->{vgname};
> +    $name = $class->get_snap_name($volname, $snapname) if $snapname;
>  
>      my $path = "/dev/$vg/$name";
>  
> @@ -335,7 +342,9 @@ sub find_free_diskname {
>  
>      my $disk_list = [ keys %{$lvs->{$vg}} ];
>  
> -    return PVE::Storage::Plugin::get_next_vm_diskname($disk_list, $storeid, $vmid, undef, $scfg);
> +    $add_fmt_suffix = $fmt eq 'qcow2' ? 1 : undef;
> +
> +    return PVE::Storage::Plugin::get_next_vm_diskname($disk_list, $storeid, $vmid, $fmt, $scfg, $add_fmt_suffix);
>  }
>  
>  sub lvcreate {
> @@ -363,13 +372,43 @@ sub lvrename {
>      );
>  }
>  
> -sub alloc_image {
> -    my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
> +my sub lvm_qcow2_format {
> +    my ($class, $storeid, $scfg, $name, $fmt, $backing_snap, $size) = @_;
> +
> +    return if $fmt ne 'qcow2';

see below - this should be an assert, and it should never be called for non-qcow2 volumes..

> +
> +    $class->activate_volume($storeid, $scfg, $name);
> +    my $path = $class->path($scfg, $name, $storeid);
> +    my $backing_path = $class->path($scfg, $name, $storeid, $backing_snap) if $backing_snap;
> +    PVE::Storage::Plugin::qemu_img_create($scfg, 'qcow2', $size, $path, $backing_path);
>  
> -    die "unsupported format '$fmt'" if $fmt ne 'raw';
> +}
> +
> +my sub lvm_size {
> +   my ($size, $fmt, $backing_snap) = @_;
> +
> +   #add extra space for qcow2 metadatas for initial image
> +   #if backing_snap exist, the parent lvm volume already have the overhead
> +   return $size if $fmt ne 'qcow2' || $backing_snap;
> +
> +   #without sub-allocated clusters : l2_size = disk_size × 8 / cluster_size
> +   #with sub-allocated clusters :    l2_size = disk_size × 8 / cluster_size / 16
> +   #ex: 4MB overhead for 1TB with extented l2 clustersize=128k
> +   #can't use qemu-img measure, because it's not possible to define options like clustersize && extended_l2
> +   #verification has been done with : qemu-img create -f qcow2 -o extended_l2=on,cluster_size=128k test.img 1G
> +
> +   my $qcow2_overhead = ceil($size/1024/1024/1024) * 4096;
> +   $size += $qcow2_overhead;
> +   return $size;
> +}
> +
> +my sub alloc_lvm_image {
> +    my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size, $backing_snap) = @_;
> +
> +    die "unsupported format '$fmt'" if $fmt !~ m/(raw|qcow2)/;
>  
>      die "illegal name '$name' - should be 'vm-$vmid-*'\n"
> -	if  $name && $name !~ m/^vm-$vmid-/;
> +	if $name !~ m/^vm-$vmid-/;
>  
>      my $vgs = lvm_vgs();
>  
> @@ -378,17 +417,51 @@ sub alloc_image {
>      die "no such volume group '$vg'\n" if !defined ($vgs->{$vg});
>  
>      my $free = int($vgs->{$vg}->{free});
> +    my $lvmsize = lvm_size($size, $fmt, $backing_snap);
>  
>      die "not enough free space ($free < $size)\n" if $free < $size;
>  
> -    $name = $class->find_free_diskname($storeid, $scfg, $vmid)
> +    my $tags = ["pve-vm-$vmid"];
> +    #tags all snapshots volumes with the main volume tag for easier activation of the whole group
> +    push @$tags, "\@pve-$name" if $fmt eq 'qcow2';
> +    lvcreate($vg, $name, $lvmsize, $tags);
> +
> +    #format the lvm volume with qcow2 format

and also I just realized that I missed this in my first pass - we should return here
if the format is not qcow2, instead of making the lvm_qcow2_format a nop in that case..

> +    eval { lvm_qcow2_format($class, $storeid, $scfg, $name, $fmt, $backing_snap, $size) };
> +    if ($@) {
> +	my $err = $@;
> +	#no need to safe cleanup as the volume is still empty
> +	eval {
> +	    my $cmd = ['/sbin/lvremove', '-f', "$vg/$name"];
> +	    run_command($cmd, errmsg => "lvremove '$vg/$name' error");
> +	};
> +	die $err;
> +    }
> +    
> +}
> +
> +sub alloc_image {
> +    my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
> +
> +    $name = $class->find_free_diskname($storeid, $scfg, $vmid, $fmt)
>  	if !$name;
>  
> -    lvcreate($vg, $name, $size, ["pve-vm-$vmid"]);
> +    alloc_lvm_image($class, $storeid, $scfg, $vmid, $fmt, $name, $size);
>  
>      return $name;
>  }
>  
> +sub alloc_snap_image {
> +    my ($class, $storeid, $scfg, $volname, $backing_snap) = @_;
> +
> +    my $size = $class->volume_size_info($scfg, $storeid, $volname, 5, $backing_snap);
> +    $size = $size / 1024;  #we use kb in lvcreate
> +
> +    my ($vmid, $format) = ($class->parse_volname($volname))[2,6];
> +
> +    alloc_lvm_image($class, $storeid, $scfg, $vmid, $format, $volname, $size, $backing_snap);
> +}
> +
>  sub free_image {
>      my ($class, $storeid, $scfg, $volname, $isBase) = @_;
>  
> @@ -539,6 +612,12 @@ sub activate_volume {
>  
>      my $lvm_activate_mode = 'ey';
>  
> +    #activate volume && all snapshots volumes by tag
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
> +	$class->parse_volname($volname);
> +
> +    $path = "\@pve-$name" if $format eq 'qcow2';
> +
>      my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $path];
>      run_command($cmd, errmsg => "can't activate LV '$path'");
>      $cmd = ['/sbin/lvchange', '--refresh', $path];
> @@ -551,6 +630,10 @@ sub deactivate_volume {
>      my $path = $class->path($scfg, $volname, $storeid, $snapname);
>      return if ! -b $path;
>  
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
> +	$class->parse_volname($volname);
> +    $path = "\@pve-$name" if $format eq 'qcow2';
> +
>      my $cmd = ['/sbin/lvchange', '-aln', $path];
>      run_command($cmd, errmsg => "can't deactivate LV '$path'");
>  }
> @@ -558,21 +641,31 @@ sub deactivate_volume {
>  sub volume_resize {
>      my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
>  
> -    $size = ($size/1024/1024) . "M";
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
> +	$class->parse_volname($volname);
> +
> +    my $lvmsize = lvm_size($size/1024, $format);
> +    $lvmsize = "${lvmsize}k";
>  
>      my $path = $class->path($scfg, $volname);
> -    my $cmd = ['/sbin/lvextend', '-L', $size, $path];
> +    my $cmd = ['/sbin/lvextend', '-L', $lvmsize, $path];
>  
>      $class->cluster_lock_storage($storeid, $scfg->{shared}, undef, sub {
>  	run_command($cmd, errmsg => "error resizing volume '$path'");
>      });
>  
> +    if(!$running && $format eq 'qcow2') {
> +	my $prealloc_opt = PVE::Storage::Plugin::preallocation_cmd_option($scfg, $format);
> +	my $cmd = ['/usr/bin/qemu-img', 'resize', "--$prealloc_opt", '-f', $format, $path , $size];
> +	run_command($cmd, timeout => 10);
> +    }
> +
>      return 1;
>  }
>  
>  sub volume_size_info {
> -    my ($class, $scfg, $storeid, $volname, $timeout) = @_;
> -    my $path = $class->filesystem_path($scfg, $volname);
> +    my ($class, $scfg, $storeid, $volname, $timeout, $snap) = @_;
> +    my $path = $class->filesystem_path($scfg, $volname, $snap);
>  
>      my $cmd = ['/sbin/lvs', '--separator', ':', '--noheadings', '--units', 'b',
>  	       '--unbuffered', '--nosuffix', '--options', 'lv_size', $path];
> @@ -586,32 +679,180 @@ sub volume_size_info {
>  }
>  
>  sub volume_snapshot {
> -    my ($class, $scfg, $storeid, $volname, $snap) = @_;
> +    my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> +
> +    my ($vmid, $format) = ($class->parse_volname($volname))[2,6];
> +
> +    die "can't snapshot this image format\n" if $format ne 'qcow2';
> +
> +    if ($running) {
> +        #rename with blockdev-reopen is done at qemu level when running
> +        $class->alloc_snap_image($storeid, $scfg, $volname, $snap);
> +	if ($@) {
> +	    die "can't allocate new volume $volname: $@\n";
> +	}
> +        return;
> +    }
> +
> +    #rename current volume to snap volume
> +    eval { $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, 'current', $snap) };
> +    die "error rename $volname to $snap\n" if $@;
> +
> +    eval { $class->alloc_snap_image($storeid, $scfg, $volname, $snap) };
> +    if ($@) {
> +	my $err = $@;
> +        eval { $class->rename_volume($scfg, $storeid, $volname, $vmid, undef, $snap, 'current') };
> +        die $err;
> +    }
> +}
> +
> +sub volume_rollback_is_possible {
> +    my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_;
> +
> +    my $snap_path = $class->path($scfg, $volname, $storeid, $snap);
>  
> -    die "lvm snapshot is not implemented";
> +    $class->activate_volume($storeid, $scfg, $volname, undef, {});
> +    my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +    my $parent_snap = $snapshots->{current}->{parent};
> +
> +    return 1 if $parent_snap eq $snap;
> +    die "can't rollback, '$snap' is not most recent snapshot on '$volname'\n";
> +
> +    return 1;
>  }
>  
> +
>  sub volume_snapshot_rollback {
>      my ($class, $scfg, $storeid, $volname, $snap) = @_;
>  
> -    die "lvm snapshot rollback is not implemented";
> +    my $format = ($class->parse_volname($volname))[6];
> +
> +    die "can't rollback snapshot for this image format\n" if $format ne 'qcow2';
> +
> +    $class->activate_volume($storeid, $scfg, $volname, undef, {});
> +
> +    # we can simply reformat the current lvm volume to avoid
> +    # a long safe remove.(not needed here, as the allocated space
> +    # is still the same owner)
> +    eval { lvm_qcow2_format($class, $storeid, $scfg, $volname, $format, $snap) };
> +    if($@) {
> +	die "can't rollback. Error reformating current $volname\n";
> +    }
> +    return undef;
>  }
>  
>  sub volume_snapshot_delete {
> -    my ($class, $scfg, $storeid, $volname, $snap) = @_;
> +    my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
> +
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> +
> +    die "can't delete snapshot for this image format\n" if $format ne 'qcow2';
> +
> +    if ($running) {
> +	$volname = $class->get_snap_volname($volname, $snap);
> +	my $cleanup_worker = eval { $class->free_image($storeid, $scfg, $volname, $isBase, $format) };
> +	die "error deleting snapshot $snap\n" if $@;    
> +
> +	if ($cleanup_worker) {
> +	    my $rpcenv = PVE::RPCEnvironment::get();
> +	    my $authuser = $rpcenv->get_user();
> +	    $rpcenv->fork_worker('imgdel', undef, $authuser, $cleanup_worker);
> +	}
> +	return;
> +    }
>  
> -    die "lvm snapshot delete is not implemented";
> +    my $cmd = "";
> +    my $path = $class->filesystem_path($scfg, $volname);
> +
> +    my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname);
> +    my $snappath = $snapshots->{$snap}->{file};
> +    my $snapvolname = $snapshots->{$snap}->{volname};
> +    die "volume $snappath is missing" if !-e $snappath;
> +
> +    my $parentsnap = $snapshots->{$snap}->{parent};
> +
> +    my $childsnap = $snapshots->{$snap}->{child};
> +    my $childpath = $snapshots->{$childsnap}->{file};
> +    my $childvolname = $snapshots->{$childsnap}->{volname};
> +
> +    my $cleanup_worker = undef;
> +    my $err = undef;
> +    #if first snapshot,as it should be bigger,  we merge child, and rename the snapshot to child
> +    if(!$parentsnap) {
> +	print "commit: merge content of $childpath into $snappath\n";
> +	#can't use -d here, as it's an lvm volume
> +	$cmd = ['/usr/bin/qemu-img', 'commit', $childpath];
> +	eval {	run_command($cmd) };
> +	if ($@) {
> +	    die "error commiting $childpath to $snappath; $@\n";
> +	}
> +	print"delete $childvolname\n";
> +
> +	$cleanup_worker = eval { $class->free_image($storeid, $scfg, $childvolname, 0) };
> +	if ($@) {
> +	    die "error delete old snapshot volume $childvolname: $@\n";
> +	}
> +
> +	print"rename $snapvolname to $childvolname\n";
> +	my $vg = $scfg->{vgname};
> +	eval { lvrename($vg, $snapvolname, $childvolname) };
> +	if ($@) {
> +	    warn $@;
> +	    $err = "error renaming snapshot: $@\n";
> +	}
> +
> +    } else {
> +	#we rebase the child image on the parent as new backing image
> +	my $parentpath = $snapshots->{$parentsnap}->{file};
> +	print "rebase: merge diff content between $parentpath and $childpath into $childpath\n";
> +	$cmd = ['/usr/bin/qemu-img', 'rebase', '-b', $parentpath, '-F', 'qcow2', '-f', 'qcow2', $childpath];
> +	eval { run_command($cmd) };
> +	if ($@) {
> +	    die "error rebase $childpath from $parentpath; $@\n";
> +	}
> +	#delete the snapshot
> +	eval { $cleanup_worker = $class->free_image($storeid, $scfg, $snapvolname, 0); };
> +	if ($@) {
> +	    die "error delete old snapshot volume $snapvolname\n";
> +	}
> +    }
> +
> +    if ($cleanup_worker) {
> +	my $rpcenv = PVE::RPCEnvironment::get();
> +	my $authuser = $rpcenv->get_user();
> +	$rpcenv->fork_worker('imgdel', undef, $authuser, $cleanup_worker);
> +    }
> +
> +    die $err if $err;
>  }
>  
>  sub volume_has_feature {
>      my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
>  
>      my $features = {
> -	copy => { base => 1, current => 1},
> -	rename => {current => 1},
> +        copy => {
> +            base => { qcow2 => 1, raw => 1 },
> +            current => { qcow2 => 1, raw => 1},
> +            snap => { qcow2 => 1 },
> +        },
> +        'rename' => {
> +            current => { qcow2 => 1, raw => 1},
> +        },
> +        snapshot => {
> +            current => { qcow2 => 1 },
> +            snap => { qcow2 => 1 },
> +        },
> +#	fixme: add later ? (we need to handle basepath, volume activation,...)
> +#	template => {
> +#	    current => { raw => 1, qcow2 => 1},
> +#	},
> +#	clone => {
> +#	    base => { qcow2 => 1 },
> +#	},
>      };
>  
> -    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
> +
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
>  	$class->parse_volname($volname);
>  
>      my $key = undef;
> @@ -620,7 +861,7 @@ sub volume_has_feature {
>      }else{
>  	$key =  $isBase ? 'base' : 'current';
>      }
> -    return 1 if $features->{$feature}->{$key};
> +    return 1 if defined($features->{$feature}->{$key}->{$format});
>  
>      return undef;
>  }
> @@ -745,4 +986,18 @@ sub rename_volume {
>      return "${storeid}:${target_volname}";
>  }
>  
> +sub get_snap_name {
> +    my ($class, $volname, $snapname) = @_;
> +
> +    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = $class->parse_volname($volname);
> +    $name = !$snapname || $snapname eq 'current' ? $name : "snap-$snapname-$name";
> +    return $name;
> +}
> +
> +sub get_snap_volname {
> +    my ($class, $volname, $snapname) = @_;
> +
> +    return $class->get_snap_name($volname, $snapname);
> +}
> +
>  1;
> -- 
> 2.39.5




More information about the pve-devel mailing list