[pbs-devel] [PATCH proxmox-backup 4/5] file-restore/disk: support ZFS pools
Stefan Reiter
s.reiter at proxmox.com
Wed Jun 16 12:55:51 CEST 2021
Uses the ZFS utils to detect, import and mount zpools. These are
available as a new Bucket type 'zpool'.
Requires some minor changes to the existing disk and partiton detection
code, so the ZFS-specific part can use the information gathered in the
previous pass to associate drive names with their 'drive-xxxN.img.fidx'
node.
For detecting size, the zpool has to be imported. This is only done with
pools containing 5 or less disks, as anything else might take too long
(and should be seldomly found within VMs).
Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
@Thomas: I changed my mind about the "guess the size from the disk sizes"
approach for a couple of reasons:
* it didn't seem terribly accurate, comparing the values with what ZFS reports
even for simple striped setups
* it would require implementing the calculation for all RAID-Z modes and such,
including handling of differently sized disks - possible, but probably not
worth it, considering:
* the actual import/mount if done *right away* is actually surprisingly fast, I
would assume since the initial scan on import already loads all required
metadata into disk/QEMU caches
src/bin/proxmox_restore_daemon/disk.rs | 153 ++++++++++++++++++++++++-
1 file changed, 152 insertions(+), 1 deletion(-)
diff --git a/src/bin/proxmox_restore_daemon/disk.rs b/src/bin/proxmox_restore_daemon/disk.rs
index 1ff5468f..5b66dd2f 100644
--- a/src/bin/proxmox_restore_daemon/disk.rs
+++ b/src/bin/proxmox_restore_daemon/disk.rs
@@ -7,13 +7,17 @@ use std::collections::HashMap;
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader};
use std::path::{Component, Path, PathBuf};
+use std::process::Command;
use proxmox::const_regex;
use proxmox::tools::fs;
use proxmox_backup::api2::types::BLOCKDEVICE_NAME_REGEX;
+use proxmox_backup::tools::run_command;
const_regex! {
VIRTIO_PART_REGEX = r"^vd[a-z]+(\d+)$";
+ ZPOOL_POOL_NAME_REGEX = r"^ {3}pool: (.*)$";
+ ZPOOL_IMPORT_DISK_REGEX = r"^\t {2,4}(vd[a-z]+(?:\d+)?)\s+ONLINE$";
}
lazy_static! {
@@ -43,6 +47,7 @@ pub enum ResolveResult {
BucketComponents(Vec<(String, u64)>),
}
+#[derive(Clone)]
struct PartitionBucketData {
dev_node: String,
number: i32,
@@ -50,6 +55,13 @@ struct PartitionBucketData {
size: u64,
}
+#[derive(Clone)]
+struct ZFSBucketData {
+ name: String,
+ mountpoint: Option<PathBuf>,
+ size: u64,
+}
+
/// A "Bucket" represents a mapping found on a disk, e.g. a partition, a zfs dataset or an LV. A
/// uniquely identifying path to a file then consists of four components:
/// "/disk/bucket/component/path"
@@ -60,9 +72,11 @@ struct PartitionBucketData {
/// path: relative path of the file on the filesystem indicated by the other parts, may contain
/// more subdirectories
/// e.g.: "/drive-scsi0/part/0/etc/passwd"
+#[derive(Clone)]
enum Bucket {
Partition(PartitionBucketData),
RawFs(PartitionBucketData),
+ ZPool(ZFSBucketData),
}
impl Bucket {
@@ -81,6 +95,13 @@ impl Bucket {
}
}
Bucket::RawFs(_) => ty == "raw",
+ Bucket::ZPool(data) => {
+ if let Some(ref comp) = comp.get(0) {
+ ty == "zpool" && comp.as_ref() == &data.name
+ } else {
+ false
+ }
+ }
})
}
@@ -88,6 +109,7 @@ impl Bucket {
match self {
Bucket::Partition(_) => "part",
Bucket::RawFs(_) => "raw",
+ Bucket::ZPool(_) => "zpool",
}
}
@@ -104,6 +126,7 @@ impl Bucket {
Ok(match self {
Bucket::Partition(data) => data.number.to_string(),
Bucket::RawFs(_) => "raw".to_owned(),
+ Bucket::ZPool(data) => data.name.clone(),
})
}
@@ -111,6 +134,7 @@ impl Bucket {
Ok(match type_string {
"part" => 1,
"raw" => 0,
+ "zpool" => 1,
_ => bail!("invalid bucket type for component depth: {}", type_string),
})
}
@@ -118,6 +142,7 @@ impl Bucket {
fn size(&self) -> u64 {
match self {
Bucket::Partition(data) | Bucket::RawFs(data) => data.size,
+ Bucket::ZPool(data) => data.size,
}
}
}
@@ -162,6 +187,59 @@ impl Filesystems {
data.mountpoint = Some(mp.clone());
Ok(mp)
}
+ Bucket::ZPool(data) => {
+ if let Some(mp) = &data.mountpoint {
+ return Ok(mp.clone());
+ }
+
+ let mntpath = format!("/mnt/{}", &data.name);
+ create_dir_all(&mntpath)?;
+
+ // call ZFS tools to import and mount the pool with the root mount at 'mntpath'
+ let mut cmd = Command::new("/sbin/zpool");
+ cmd.args(
+ [
+ "import",
+ "-f",
+ "-o",
+ "readonly=on",
+ "-d",
+ "/dev",
+ "-R",
+ &mntpath,
+ &data.name,
+ ]
+ .iter(),
+ );
+ if let Err(msg) = run_command(cmd, None) {
+ // ignore double import, this may happen if a previous attempt failed further
+ // down below - this way we can at least try again
+ if !msg
+ .to_string()
+ .contains("a pool with that name already exists")
+ {
+ return Err(msg);
+ }
+ }
+
+ // 'mount -a' simply mounts all datasets that haven't been automounted, which
+ // should only be ones that we've imported just now
+ let mut cmd = Command::new("/sbin/zfs");
+ cmd.args(["mount", "-a"].iter());
+ run_command(cmd, None)?;
+
+ // Now that we have imported the pool, we can also query the size
+ let mut cmd = Command::new("/sbin/zpool");
+ cmd.args(["list", "-o", "size", "-Hp", &data.name].iter());
+ let size = run_command(cmd, None)?;
+ if let Ok(size) = size.trim().parse::<u64>() {
+ data.size = size;
+ }
+
+ let mp = PathBuf::from(mntpath);
+ data.mountpoint = Some(mp.clone());
+ Ok(mp)
+ }
}
}
@@ -204,9 +282,11 @@ impl DiskState {
pub fn scan() -> Result<Self, Error> {
let filesystems = Filesystems::scan()?;
+ let mut disk_map = HashMap::new();
+ let mut drive_info = HashMap::new();
+
// create mapping for virtio drives and .fidx files (via serial description)
// note: disks::DiskManager relies on udev, which we don't have
- let mut disk_map = HashMap::new();
for entry in proxmox_backup::tools::fs::scan_subdir(
libc::AT_FDCWD,
"/sys/block",
@@ -230,6 +310,8 @@ impl DiskState {
}
};
+ drive_info.insert(name.to_owned(), fidx.clone());
+
// attempt to mount device directly
let dev_node = format!("/dev/{}", name);
let size = Self::make_dev_node(&dev_node, &sys_path)?;
@@ -281,11 +363,55 @@ impl DiskState {
});
parts.push(bucket);
+
+ drive_info.insert(part_name.to_owned(), fidx.clone());
}
disk_map.insert(fidx, parts);
}
+ // After the above, every valid disk should have a device node in /dev, so we can query all
+ // of them for zpools
+ let mut cmd = Command::new("/sbin/zpool");
+ cmd.args(["import", "-d", "/dev"].iter());
+ let result = run_command(cmd, None).unwrap();
+ for (pool, disks) in Self::parse_zpool_import(&result) {
+ let mut bucket = Bucket::ZPool(ZFSBucketData {
+ name: pool.clone(),
+ size: 0,
+ mountpoint: None,
+ });
+
+ // anything more than 5 disks we assume to take too long to mount, so we don't
+ // automatically - this means that no size can be reported
+ if disks.len() <= 5 {
+ let mp = filesystems.ensure_mounted(&mut bucket);
+ info!(
+ "zpool '{}' (on: {:?}) auto-mounted at '{:?}' (size: {}B)",
+ &pool,
+ &disks,
+ mp,
+ bucket.size()
+ );
+ } else {
+ info!(
+ "zpool '{}' (on: {:?}) auto-mount skipped, too many disks",
+ &pool, &disks
+ );
+ }
+
+ for disk in disks {
+ if let Some(fidx) = drive_info.get(&disk) {
+ match disk_map.get_mut(fidx) {
+ Some(v) => v.push(bucket.clone()),
+ None => {
+ disk_map.insert(fidx.to_owned(), vec![bucket.clone()]);
+ }
+ }
+ }
+ }
+ }
+
Ok(Self {
filesystems,
disk_map,
@@ -419,4 +545,29 @@ impl DiskState {
stat::mknod(path, stat::SFlag::S_IFBLK, stat::Mode::S_IRWXU, dev)?;
Ok(())
}
+
+ fn parse_zpool_import(data: &str) -> Vec<(String, Vec<String>)> {
+ let mut ret = Vec::new();
+ let mut disks = Vec::new();
+ let mut cur = "".to_string();
+ for line in data.lines() {
+ if let Some(groups) = (ZPOOL_POOL_NAME_REGEX.regex_obj)().captures(line) {
+ if let Some(name) = groups.get(1) {
+ if !disks.is_empty() {
+ ret.push((cur, disks.clone()));
+ }
+ disks.clear();
+ cur = name.as_str().to_owned();
+ }
+ } else if let Some(groups) = (ZPOOL_IMPORT_DISK_REGEX.regex_obj)().captures(line) {
+ if let Some(disk) = groups.get(1) {
+ disks.push(disk.as_str().to_owned());
+ }
+ }
+ }
+ if !disks.is_empty() && !cur.is_empty() {
+ ret.push((cur, disks));
+ }
+ ret
+ }
}
--
2.30.2
More information about the pbs-devel
mailing list