[pve-devel] [PATCH storage] added btrfs storage backend

Wolfgang Bumiller w.bumiller at proxmox.com
Thu Aug 11 12:39:34 CEST 2016


---
This is experimental and incompatible changes might follow.  Mostly due to the
fact that it currently allows raw files to be 'snapshotted' by creating a clone
(cp --reflink=always, aka BTRFS_IOC_CLONE), which has the same migration issues
as lvm-thin with snapshots.
Migration is therefore currently not implemented and will therefore fall back
to using rsync since it has a 'path' property (but will later use 'btrfs send'
and 'btrfs receive' for subvols). One possibility to preserve snapshots on raw
files would be to put raw files into sub directories and use real snapshots on
them instead of cloned files, that way they can be sent+received as well.

Most functions just implement the subvolume case and fall back to calling
Plugin::$thefunction(@_) for the rest, so in a sense this acts like a directory
storage with the 'subvol' format using btrfs subvolumes and raw files being
'snapshottable' (cow-clones).

 PVE/Storage.pm             |   2 +
 PVE/Storage/BTRFSPlugin.pm | 364 +++++++++++++++++++++++++++++++++++++++++++++
 PVE/Storage/Makefile       |   2 +-
 PVE/Storage/Plugin.pm      |  17 ++-
 4 files changed, 377 insertions(+), 8 deletions(-)
 create mode 100644 PVE/Storage/BTRFSPlugin.pm

diff --git a/PVE/Storage.pm b/PVE/Storage.pm
index 25ff545..46b0999 100755
--- a/PVE/Storage.pm
+++ b/PVE/Storage.pm
@@ -32,6 +32,7 @@ use PVE::Storage::GlusterfsPlugin;
 use PVE::Storage::ZFSPoolPlugin;
 use PVE::Storage::ZFSPlugin;
 use PVE::Storage::DRBDPlugin;
+use PVE::Storage::BTRFSPlugin;
 
 # load and initialize all plugins
 PVE::Storage::DirPlugin->register();
@@ -46,6 +47,7 @@ PVE::Storage::GlusterfsPlugin->register();
 PVE::Storage::ZFSPoolPlugin->register();
 PVE::Storage::ZFSPlugin->register();
 PVE::Storage::DRBDPlugin->register();
+PVE::Storage::BTRFSPlugin->register();
 PVE::Storage::Plugin->init();
 
 my $UDEVADM = '/sbin/udevadm';
diff --git a/PVE/Storage/BTRFSPlugin.pm b/PVE/Storage/BTRFSPlugin.pm
new file mode 100644
index 0000000..eb306d5
--- /dev/null
+++ b/PVE/Storage/BTRFSPlugin.pm
@@ -0,0 +1,364 @@
+package PVE::Storage::BTRFSPlugin;
+
+use strict;
+use warnings;
+
+use File::Path;
+use Fcntl qw(O_RDONLY O_WRONLY O_CREAT O_EXCL);
+
+use PVE::Tools qw(run_command);
+use PVE::JSONSchema qw(get_standard_option);
+
+use PVE::Storage::Plugin;
+use base qw(PVE::Storage::Plugin);
+
+# Configuration (same as for DirPlugin)
+
+sub type {
+    return 'btrfs';
+}
+
+sub plugindata {
+    return {
+	content => [ { images => 1, rootdir => 1, vztmpl => 1, iso => 1, backup => 1, none => 1 },
+		     { images => 1, rootdir => 1 } ],
+	format => [ { raw => 1, qcow2 => 1, vmdk => 1, subvol => 1 } , 'raw' ],
+    };
+}   
+
+sub properties {
+    return {
+	# Already defined in DirPlugin
+	#path => {
+	#    description => "File system path.",
+	#    type => 'string', format => 'pve-storage-path',
+	#},
+    };
+}
+
+sub options {
+    return {
+	path => { fixed => 1 },
+	nodes => { optional => 1 },
+	shared => { optional => 1 },
+	disable => { optional => 1 },
+	maxfiles => { optional => 1 },
+	content => { optional => 1 },
+	format => { optional => 1 },
+   };
+}
+
+# Storage implementation
+
+sub check_config {
+    my ($self, $sectionId, $config, $create, $skipSchemaCheck) = @_;
+    my $opts = PVE::SectionConfig::check_config($self, $sectionId, $config, $create, $skipSchemaCheck);
+    return $opts if !$create;
+    if ($opts->{path} !~ m@^/[-/a-zA-Z0-9_.]+$@) {
+	die "illegal path for directory storage: $opts->{path}\n";
+    }
+    return $opts;
+}
+
+# Same as in Plugin but without the snapname => qcow2 error case
+sub filesystem_path {
+    my ($class, $scfg, $volname, $snapname) = @_;
+
+    my ($vtype, $name, $vmid, undef, undef, $isBase, $format) =
+	$class->parse_volname($volname);
+
+    my $path = $class->get_subdir($scfg, $vtype);
+
+    $path .= "/$vmid" if $vtype eq 'images';
+
+    if ($snapname && ($format eq 'subvol' || $volname =~ /\.raw$/)) {
+	$path .= "/snap_${name}_$snapname";
+    } else {
+	$path .= "/$name";
+    }
+
+    return wantarray ? ($path, $vmid, $vtype) : $path;
+}
+
+sub btrfs_cmd {
+    my ($class, $cmd, $outfunc) = @_;
+
+    my $msg = '';
+    my $func;
+    if (defined($outfunc)) {
+	$func = sub {
+	    my $part = &$outfunc(@_);
+	    $msg .= $part if defined($part);
+	};
+    } else {
+	$func = sub { $msg .= "$_[0]\n" };
+    }
+    run_command(['btrfs', @$cmd],
+	errmsg => 'btrfs error',
+	outfunc => $func);
+
+    return $msg;
+}
+
+sub clone_file {
+    my ($src, $dst) = @_;
+    #alternatively we could use ioctl($dst_fh, 0x40049409, fileno($src_fh));
+    run_command(['cp', '--reflink=always', '--', $src, $dst]);
+}
+
+sub btrfs_get_subvol_id {
+    my ($class, $path) = @_;
+    my $info = $class->btrfs_cmd(['subvolume', 'show', $path]);
+    if ($info !~ /^\s*Object ID:\s*(\d+)$/m) {
+	die "failed to get btrfs subvolume ID from: $info\n";
+    }
+    return $1;
+}
+
+# Other classes have similar function, we explicitly reuse the 'private' one
+# from Plugin.pm without exposing it to the $class-> namespace.
+my $find_free_diskname = sub {
+    return &$PVE::Storage::Plugin::find_free_diskname(@_);
+};
+
+# Same as for in the base (Plugin.pm) but takes subvols into account.
+# This could use some deduplication
+sub create_base {
+    my ($class, $storeid, $scfg, $volname, $protect_callback) = @_;
+    return PVE::Storage::Plugin::create_base(@_, sub {
+	my ($newpath, $format) = @_;
+	if ($format eq 'subvol') {
+	    eval { $class->btrfs_cmd(['property', 'set', $newpath, 'ro', 'true']) };
+	    warn $@ if $@;
+	} else {
+	    chmod(0444, $newpath); # nobody should write anything
+
+	    # also try to set immutable flag
+	    eval { run_command(['/usr/bin/chattr', '+i', $newpath]); };
+	    warn $@ if $@;
+	}
+    });
+}
+
+sub clone_image {
+    my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
+
+    my ($vtype, $basename, $basevmid, undef, undef, $isBase, $format) =
+	$class->parse_volname($volname);
+
+    my $imagedir = $class->get_subdir($scfg, 'images');
+    $imagedir .= "/$vmid";
+    mkpath $imagedir;
+
+    if ($format eq 'subvol' || $snap) {
+	my $path = $class->filesystem_path($scfg, $volname);
+
+	my $name = &$find_free_diskname($imagedir, $vmid, $format);
+	warn "clone $volname: $vtype, $name, $vmid to $name (base=../$basevmid/$basename)\n";
+	my $newvol = "$basevmid/$basename/$vmid/$name";
+
+	my $newpath = $class->filesystem_path($scfg, $newvol);
+
+	if ($format eq 'subvol') {
+	    $class->btrfs_cmd(['subvolume', 'snapshot', '--', $path, $newpath]);
+	} else {
+	    clone_file($path, $newpath);
+	}
+
+	return $newvol;
+    }
+
+    return PVE::Storage::Plugin::clone_image(@_);
+}
+
+sub alloc_image {
+    my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
+
+    if ($fmt eq 'subvol') {
+	my $imagedir = $class->get_subdir($scfg, 'images') . "/$vmid";
+	mkpath $imagedir;
+
+	$name = &$find_free_diskname($imagedir, $vmid, $fmt) if !$name;
+	my (undef, $tmpfmt) = PVE::Storage::Plugin::parse_name_dir($name);
+	die "illegal name '$name' - wrong extension for format ('$tmpfmt != '$fmt')\n"
+	    if $tmpfmt ne $fmt;
+	my $path = "$imagedir/$name";
+	die "disk image '$path' already exists\n" if -e $path;
+
+	$class->btrfs_cmd(['subvolume', 'create', '--', $path]);
+
+	# If we need no limit we're done
+	return "$vmid/$name" if !$size;
+
+	# Use the subvol's default 0/$id qgroup
+	eval {
+	    $class->btrfs_cmd(['quota', 'enable', $path]);
+	    my $id = $class->btrfs_get_subvol_id($path);
+	    $class->btrfs_cmd(['qgroup', 'limit', "${size}k", "0/$id", $path]);
+	};
+	if (my $err = $@) {
+	    $class->btrfs_cmd(['subvolume', 'delete', '--', $path]);
+	    die $err;
+	}
+
+	return "$vmid/$name";
+    }
+
+    return PVE::Storage::Plugin::alloc_image(@_);
+}
+
+sub free_image {
+    my ($class, $storeid, $scfg, $volname, $isBase, $format) = @_;
+
+    my $path = $class->filesystem_path($scfg, $volname);
+
+    if (defined($format) && $format eq 'subvol') {
+	$class->btrfs_cmd(['subvolume', 'delete', $path]);
+	return undef;
+    }
+
+    return PVE::Storage::Plugin::free_image(@_);
+}
+
+sub volume_size_info {
+    my ($class, $scfg, $storeid, $volname, $timeout) = @_;
+
+    my $path = $class->filesystem_path($scfg, $volname);
+
+    my $format = ($class->parse_volname($volname))[6];
+
+    if ($format eq 'subvol') {
+	my $id = '0/' . $class->btrfs_get_subvol_id($path);
+	my $search = qr/^\Q$id\E\s+\d+\s+\d+\s+(\d+)$/;
+	my $size;
+	$class->btrfs_cmd(['qgroup', 'show', '-rf', '--', $path], sub {
+	    if (!defined($size) && $_[0] =~ $search) {
+		$size = $1;
+	    }
+	});
+	die "failed to get subvolume size\n" if !defined($size);
+	return $size;
+    }
+
+    return PVE::Storage::Plugin::file_size_info($path, $timeout);
+}
+
+sub volume_resize {
+    my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
+
+    my $format = ($class->parse_volname($volname))[6];
+    if ($format eq 'subvol') {
+	my $path = $class->filesystem_path($scfg, $volname);
+	my $id = '0/' . $class->btrfs_get_subvol_id($path);
+	$class->btrfs_cmd(['qgroup', 'limit', "${size}k", "0/$id", $path]);
+	return undef;
+    }
+
+    return PVE::Storage::Plugin::volume_resize(@_);
+}
+
+sub volume_snapshot {
+    my ($class, $scfg, $storeid, $volname, $snap) = @_;
+
+    my ($name, $format) = ($class->parse_volname($volname))[1,6];
+    if ($format eq 'subvol' || $volname =~ /\.raw$/) {
+	my $path = $class->filesystem_path($scfg, $volname);
+	my $snap_path = $class->filesystem_path($scfg, $volname, $snap);
+
+	if ($format eq 'subvol') {
+	    $class->btrfs_cmd(['subvolume', 'snapshot', '--', $path, $snap_path]);
+	} else {
+	    clone_file($path, $snap_path);
+	}
+	return undef;
+    }
+
+    return PVE::Storage::Plugin::volume_snapshot(@_);
+}
+
+sub volume_rollback_is_possible {
+    my ($class, $scfg, $storeid, $volname, $snap) = @_; 
+
+    return 1; 
+}
+
+sub volume_snapshot_rollback {
+    my ($class, $scfg, $storeid, $volname, $snap) = @_;
+
+    my ($name, $format) = ($class->parse_volname($volname))[1,6];
+    if ($format eq 'subvol' || $volname =~ /\.raw$/) {
+	my $path = $class->filesystem_path($scfg, $volname);
+	my $snap_path = $class->filesystem_path($scfg, $volname, $snap);
+	if ($format eq 'subvol') {
+	    rename($path, "$path.tmp") or die "failed to rename subvol: $!\n";
+	    eval { $class->btrfs_cmd(['subvolume', 'snapshot', '--', $snap_path, "$path"]) };
+	    if (my $err = $@) {
+		rename("$path.tmp", $path) or die "failed to restore subvolume after error: $!\n";
+		die $err;
+	    }
+	    eval { $class->btrfs_cmd(['subvolume', 'delete', '--', "$path.tmp"]) };
+	    warn $@ if $@;
+	} else {
+	    clone_file($snap_path, $path);
+	}
+	return undef;
+    }
+
+    return PVE::Storage::Plugin::volume_snapshot_rollback(@_);
+}
+
+sub volume_snapshot_delete {
+    my ($class, $scfg, $storeid, $volname, $snap, $running) = @_;
+
+    my ($name, $format) = ($class->parse_volname($volname))[1,6];
+    if ($format eq 'subvol' || $volname =~ /\.raw$/) {
+	my $path = $class->filesystem_path($scfg, $volname);
+	my $snap_path = $class->filesystem_path($scfg, $volname, $snap);
+	if ($format eq 'subvol') {
+	    $class->btrfs_cmd(['subvolume', 'delete', '--', $snap_path]);
+	} else {
+	    unlink($snap_path) or die "failed to unlink snapshot: $!\n";
+	}
+	return undef;
+    }
+
+    return PVE::Storage::Plugin::volume_snapshot_delete(@_);
+}
+
+sub volume_has_feature {
+    my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
+
+    my $features = {
+	snapshot => { current => { qcow2 => 1, raw => 1, subvol => 1 },
+	              snap => { qcow2 => 1, raw => 1, subvol => 1 } },
+	clone => { base => { qcow2 => 1, raw => 1, vmdk => 1, subvol => 1 } },
+	template => { current => { qcow2 => 1, raw => 1, vmdk => 1, subvol => 1 } },
+	copy => { base => { qcow2 => 1, raw => 1, vmdk => 1, subvol => 1 },
+		  current => { qcow2 => 1, raw => 1, vmdk => 1, subvol => 1 },
+		  snap => { qcow2 => 1, raw => 1, subvol => 1 } },
+	sparseinit => { base => { qcow2 => 1, raw => 1, vmdk => 1, subvol => 1 },
+			current => { qcow2 => 1, raw => 1, vmdk => 1, subvol => 1 } },
+    };
+
+    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) =
+	$class->parse_volname($volname);
+
+    my $key = undef;
+    if($snapname){
+        $key = 'snap';
+    }else{
+        $key =  $isBase ? 'base' : 'current';
+    }
+
+    return 1 if defined($features->{$feature}->{$key}->{$format});
+
+    return undef;
+}
+
+sub check_connection {
+    my ($class, $storeid, $scfg) = @_;
+    # do nothing by default
+    return 1;
+}
+
+1;
diff --git a/PVE/Storage/Makefile b/PVE/Storage/Makefile
index b924f21..49d3574 100644
--- a/PVE/Storage/Makefile
+++ b/PVE/Storage/Makefile
@@ -1,4 +1,4 @@
-SOURCES=Plugin.pm DirPlugin.pm LVMPlugin.pm NFSPlugin.pm ISCSIPlugin.pm RBDPlugin.pm SheepdogPlugin.pm ISCSIDirectPlugin.pm GlusterfsPlugin.pm ZFSPoolPlugin.pm ZFSPlugin.pm DRBDPlugin.pm LvmThinPlugin.pm
+SOURCES=Plugin.pm DirPlugin.pm LVMPlugin.pm NFSPlugin.pm ISCSIPlugin.pm RBDPlugin.pm SheepdogPlugin.pm ISCSIDirectPlugin.pm GlusterfsPlugin.pm ZFSPoolPlugin.pm ZFSPlugin.pm DRBDPlugin.pm LvmThinPlugin.pm BTRFSPlugin.pm
 
 .PHONY: install
 install:
diff --git a/PVE/Storage/Plugin.pm b/PVE/Storage/Plugin.pm
index 8089302..91acf39 100644
--- a/PVE/Storage/Plugin.pm
+++ b/PVE/Storage/Plugin.pm
@@ -441,7 +441,7 @@ sub path {
 }
 
 sub create_base {
-    my ($class, $storeid, $scfg, $volname) = @_;
+    my ($class, $storeid, $scfg, $volname, $protect_callback) = @_;
 
     # this only works for file based storage types
     die "storage definintion has no path\n" if !$scfg->{path};
@@ -475,17 +475,20 @@ sub create_base {
 	die "rename '$path' to '$newpath' failed - $!\n";
 
     # We try to protect base volume
+    if ($protect_callback) {
+	&$protect_callback($newpath, $format);
+    } else {
+	chmod(0444, $newpath); # nobody should write anything
 
-    chmod(0444, $newpath); # nobody should write anything
-
-    # also try to set immutable flag
-    eval { run_command(['/usr/bin/chattr', '+i', $newpath]); };
-    warn $@ if $@;
+	# also try to set immutable flag
+	eval { run_command(['/usr/bin/chattr', '+i', $newpath]); };
+	warn $@ if $@;
+    }
 
     return $newvolname;
 }
 
-my $find_free_diskname = sub {
+our $find_free_diskname = sub {
     my ($imgdir, $vmid, $fmt) = @_;
 
     my $disk_ids = {};
-- 
2.1.4





More information about the pve-devel mailing list