[pbs-devel] [PATCH v2 proxmox-backup 12/20] file-restore-daemon: add disk module
Stefan Reiter
s.reiter at proxmox.com
Wed Mar 24 16:18:19 CET 2021
Includes functionality for scanning and referring to partitions on
attached disks (i.e. snapshot images).
Fairly modular structure, so adding ZFS/LVM/etc... support in the future
should be easy.
The path is encoded as "/disk/bucket/component/path/to/file", e.g.
"/drive-scsi0/part/0/etc/passwd". See the comments for further
explanations on the design.
Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
src/bin/proxmox-restore-daemon.rs | 16 +-
src/bin/proxmox_restore_daemon/disk.rs | 341 +++++++++++++++++++++++++
src/bin/proxmox_restore_daemon/mod.rs | 3 +
3 files changed, 359 insertions(+), 1 deletion(-)
create mode 100644 src/bin/proxmox_restore_daemon/disk.rs
diff --git a/src/bin/proxmox-restore-daemon.rs b/src/bin/proxmox-restore-daemon.rs
index 6b453ad3..a2701b7c 100644
--- a/src/bin/proxmox-restore-daemon.rs
+++ b/src/bin/proxmox-restore-daemon.rs
@@ -1,13 +1,14 @@
///! Daemon binary to run inside a micro-VM for secure single file restore of disk images
use anyhow::{bail, format_err, Error};
use log::error;
+use lazy_static::lazy_static;
use std::os::unix::{
io::{FromRawFd, RawFd},
net,
};
use std::path::Path;
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
@@ -26,6 +27,13 @@ pub const MAX_PENDING: usize = 32;
/// Will be present in base initramfs
pub const VM_DETECT_FILE: &str = "/restore-vm-marker";
+lazy_static! {
+ /// The current disks state. Use for accessing data on the attached snapshots.
+ pub static ref DISK_STATE: Arc<Mutex<DiskState>> = {
+ Arc::new(Mutex::new(DiskState::scan().unwrap()))
+ };
+}
+
/// This is expected to be run by 'proxmox-file-restore' within a mini-VM
fn main() -> Result<(), Error> {
if !Path::new(VM_DETECT_FILE).exists() {
@@ -41,6 +49,12 @@ fn main() -> Result<(), Error> {
.write_style(env_logger::WriteStyle::Never)
.init();
+ // scan all attached disks now, before starting the API
+ // this will panic and stop the VM if anything goes wrong
+ {
+ let _disk_state = DISK_STATE.lock().unwrap();
+ }
+
proxmox_backup::tools::runtime::main(run())
}
diff --git a/src/bin/proxmox_restore_daemon/disk.rs b/src/bin/proxmox_restore_daemon/disk.rs
new file mode 100644
index 00000000..941a9a43
--- /dev/null
+++ b/src/bin/proxmox_restore_daemon/disk.rs
@@ -0,0 +1,341 @@
+//! Low-level disk (image) access functions for file restore VMs.
+use anyhow::{bail, format_err, Error};
+use log::{info, warn};
+use lazy_static::lazy_static;
+
+use std::collections::HashMap;
+use std::fs::{File, create_dir_all};
+use std::io::{BufRead, BufReader};
+use std::path::{Component, Path, PathBuf};
+
+use proxmox::const_regex;
+use proxmox::tools::fs;
+use proxmox_backup::api2::types::BLOCKDEVICE_NAME_REGEX;
+
+const_regex! {
+ VIRTIO_PART_REGEX = r"^vd[a-z]+(\d+)$";
+}
+
+lazy_static! {
+ static ref FS_OPT_MAP: HashMap<&'static str, &'static str> = {
+ let mut m = HashMap::new();
+
+ // otherwise ext complains about mounting read-only
+ m.insert("ext2", "noload");
+ m.insert("ext3", "noload");
+ m.insert("ext4", "noload");
+
+ // ufs2 is used as default since FreeBSD 5.0 released in 2003, so let's assume that
+ // whatever the user is trying to restore is not using anything older...
+ m.insert("ufs", "ufstype=ufs2");
+
+ m
+ };
+}
+
+pub enum ResolveResult {
+ Path(PathBuf),
+ BucketTypes(Vec<&'static str>),
+ BucketComponents(Vec<String>),
+}
+
+struct PartitionBucketData {
+ dev_node: String,
+ number: i32,
+ mountpoint: Option<PathBuf>,
+}
+
+/// A "Bucket" represents a mapping found on a disk, e.g. a partition, a zfs dataset or an LV. A
+/// uniquely identifying path to a file then consists of four components:
+/// "/disk/bucket/component/path"
+/// where
+/// disk: fidx file name
+/// bucket: bucket type
+/// component: identifier of the specific bucket
+/// path: relative path of the file on the filesystem indicated by the other parts, may contain
+/// more subdirectories
+/// e.g.: "/drive-scsi0/part/0/etc/passwd"
+enum Bucket {
+ Partition(PartitionBucketData),
+}
+
+impl Bucket {
+ fn filter_mut<'a, A: AsRef<str>, B: AsRef<str>>(
+ haystack: &'a mut Vec<Bucket>,
+ typ: A,
+ comp: B,
+ ) -> Option<&'a mut Bucket> {
+ let typ = typ.as_ref();
+ let comp = comp.as_ref();
+ haystack
+ .iter_mut()
+ .find(|b| match b {
+ Bucket::Partition(data) => {
+ typ == "part" && comp.parse::<i32>().unwrap() == data.number
+ }
+ })
+ }
+
+ fn type_string(&self) -> &'static str {
+ match self {
+ Bucket::Partition(_) => "part",
+ }
+ }
+
+ fn component_string(&self) -> String {
+ match self {
+ Bucket::Partition(data) => data.number.to_string(),
+ }
+ }
+}
+
+/// Functions related to the local filesystem. This mostly exists so we can use 'supported_fs' in
+/// try_mount while a Bucket is still mutably borrowed from DiskState.
+struct Filesystems {
+ supported_fs: Vec<String>,
+}
+
+impl Filesystems {
+ fn scan() -> Result<Self, Error> {
+ // detect kernel supported filesystems
+ let mut supported_fs = Vec::new();
+ for f in BufReader::new(File::open("/proc/filesystems")?).lines() {
+ if let Ok(f) = f {
+ // ZFS is treated specially, don't attempt to do a regular mount with it
+ if !f.starts_with("nodev") && f != "zfs" {
+ supported_fs.push(f.trim().to_owned());
+ }
+ }
+ }
+
+ Ok(Self { supported_fs })
+ }
+
+ fn ensure_mounted(&self, bucket: &mut Bucket) -> Result<PathBuf, Error> {
+ match bucket {
+ Bucket::Partition(data) => {
+ // regular data partition à la "/dev/vdxN"
+ if let Some(mp) = &data.mountpoint {
+ return Ok(mp.clone());
+ }
+
+ let mp = format!("/mnt{}/", data.dev_node);
+ self.try_mount(&data.dev_node, &mp)?;
+ let mp = PathBuf::from(mp);
+ data.mountpoint = Some(mp.clone());
+ Ok(mp)
+ }
+ }
+ }
+
+ fn try_mount(&self, source: &str, target: &str) -> Result<(), Error> {
+ use nix::mount::*;
+
+ create_dir_all(target)?;
+
+ // try all supported fs until one works - this is the way Busybox's 'mount' does it too:
+ // https://git.busybox.net/busybox/tree/util-linux/mount.c?id=808d93c0eca49e0b22056e23d965f0d967433fbb#n2152
+ // note that ZFS is intentionally left out (see scan())
+ let flags =
+ MsFlags::MS_RDONLY | MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV;
+ for fs in &self.supported_fs {
+ let fs: &str = fs.as_ref();
+ let opts = FS_OPT_MAP.get(fs).copied();
+ match mount(Some(source), target, Some(fs), flags, opts) {
+ Ok(()) => {
+ info!("mounting '{}' succeeded, fstype: '{}'", source, fs);
+ return Ok(());
+ },
+ Err(err) => {
+ warn!("mount error on '{}' ({}) - {}", source, fs, err);
+ }
+ }
+ }
+
+ bail!("all mounts failed or no supported file system")
+ }
+}
+
+pub struct DiskState {
+ filesystems: Filesystems,
+ disk_map: HashMap<String, Vec<Bucket>>,
+}
+
+impl DiskState {
+ /// Scan all disks for supported buckets.
+ pub fn scan() -> Result<Self, Error> {
+ // create mapping for virtio drives and .fidx files (via serial description)
+ // note: disks::DiskManager relies on udev, which we don't have
+ let mut disk_map = HashMap::new();
+ for entry in proxmox_backup::tools::fs::scan_subdir(
+ libc::AT_FDCWD,
+ "/sys/block",
+ &BLOCKDEVICE_NAME_REGEX,
+ )?
+ .filter_map(|x| x.ok())
+ {
+ let name = unsafe { entry.file_name_utf8_unchecked() };
+ if !name.starts_with("vd") {
+ continue;
+ }
+
+ let sys_path: &str = &format!("/sys/block/{}", name);
+
+ let serial = fs::file_read_string(&format!("{}/serial", sys_path));
+ let fidx = match serial {
+ Ok(serial) => serial,
+ Err(err) => {
+ warn!("disk '{}': could not read serial file - {}", name, err);
+ continue;
+ }
+ };
+
+ let mut parts = Vec::new();
+ for entry in proxmox_backup::tools::fs::scan_subdir(
+ libc::AT_FDCWD,
+ sys_path,
+ &VIRTIO_PART_REGEX,
+ )?
+ .filter_map(|x| x.ok())
+ {
+ let part_name = unsafe { entry.file_name_utf8_unchecked() };
+ let devnode = format!("/dev/{}", part_name);
+ let part_path = format!("/sys/block/{}/{}", name, part_name);
+
+ // create partition device node for further use
+ let dev_num_str = fs::file_read_firstline(&format!("{}/dev", part_path))?;
+ let split: Vec<&str> = dev_num_str.trim().split(':').collect();
+ if split.len() != 2 {
+ bail!(
+ "got invalid 'dev' content: '{}' - broken kernel?",
+ dev_num_str
+ );
+ }
+ Self::mknod_blk(&devnode, split[0].parse()?, split[1].parse()?)?;
+
+ let number = match fs::file_read_firstline(&format!("{}/partition", part_path))?
+ .trim()
+ .parse::<i32>()
+ {
+ Ok(number) => number,
+ Err(err) => bail!(
+ "got invalid 'partition' number content - '{}' - broken kernel?",
+ err
+ ),
+ };
+
+ info!(
+ "drive '{}' ('{}'): found partition '{}' ({})",
+ name, fidx, devnode, number
+ );
+
+ let bucket = Bucket::Partition(PartitionBucketData {
+ dev_node: devnode,
+ mountpoint: None,
+ number,
+ });
+
+ parts.push(bucket);
+ }
+
+ disk_map.insert(fidx.to_owned(), parts);
+ }
+
+ Ok(Self {
+ filesystems: Filesystems::scan()?,
+ disk_map,
+ })
+ }
+
+ /// Given a path like "/drive-scsi0.img.fidx/part/0/etc/passwd", this will mount the first
+ /// partition of 'drive-scsi0' on-demand (i.e. if not already mounted) and return a path
+ /// pointing to the requested file locally, e.g. "/mnt/vda1/etc/passwd", which can be used to
+ /// read the file. Given a partial path, i.e. only "/drive-scsi0.img.fidx" or
+ /// "/drive-scsi0.img.fidx/part", it will return a list of available bucket types or bucket
+ /// components respectively
+ pub fn resolve(&mut self, path: &Path) -> Result<ResolveResult, Error> {
+ let mut cmp = path.components().peekable();
+ match cmp.peek() {
+ Some(Component::RootDir) | Some(Component::CurDir) => {
+ cmp.next();
+ }
+ None => bail!("empty path cannot be resolved to file location"),
+ _ => {}
+ }
+
+ let req_fidx = match cmp.next() {
+ Some(Component::Normal(x)) => x.to_string_lossy(),
+ _ => bail!("no or invalid image in path"),
+ };
+
+ let buckets = match self.disk_map.get_mut(req_fidx.as_ref()) {
+ Some(x) => x,
+ None => bail!("given image '{}' not found", req_fidx),
+ };
+
+ let bucket_type = match cmp.next() {
+ Some(Component::Normal(x)) => x.to_string_lossy(),
+ Some(c) => bail!("invalid bucket in path: {:?}", c),
+ None => {
+ // list bucket types available
+ let mut types = buckets
+ .iter()
+ .map(|b| b.type_string())
+ .collect::<Vec<&'static str>>();
+ // dedup requires duplicates to be consecutive, which is the case - see scan()
+ types.dedup();
+ return Ok(ResolveResult::BucketTypes(types));
+ }
+ };
+
+ let component = match cmp.next() {
+ Some(Component::Normal(x)) => x.to_string_lossy(),
+ Some(c) => bail!("invalid bucket component in path: {:?}", c),
+ None => {
+ // list bucket components available
+ let comps = buckets
+ .iter()
+ .filter(|b| b.type_string() == bucket_type)
+ .map(Bucket::component_string)
+ .collect();
+ return Ok(ResolveResult::BucketComponents(comps));
+ }
+ };
+
+ let mut bucket = match Bucket::filter_mut(buckets, &bucket_type, &component) {
+ Some(bucket) => bucket,
+ None => bail!(
+ "bucket/component path not found: {}/{}/{}",
+ req_fidx,
+ bucket_type,
+ component
+ ),
+ };
+
+ // bucket found, check mount
+ let mountpoint = self.filesystems.ensure_mounted(&mut bucket).map_err(|err| {
+ format_err!(
+ "mounting '{}/{}/{}' failed: {}",
+ req_fidx,
+ bucket_type,
+ component,
+ err
+ )
+ })?;
+
+ let mut local_path = PathBuf::new();
+ local_path.push(mountpoint);
+ for rem in cmp {
+ local_path.push(rem);
+ }
+
+ Ok(ResolveResult::Path(local_path))
+ }
+
+ fn mknod_blk(path: &str, maj: u64, min: u64) -> Result<(), Error> {
+ use nix::sys::stat;
+ let dev = stat::makedev(maj, min);
+ stat::mknod(path, stat::SFlag::S_IFBLK, stat::Mode::S_IRWXU, dev)?;
+ Ok(())
+ }
+}
diff --git a/src/bin/proxmox_restore_daemon/mod.rs b/src/bin/proxmox_restore_daemon/mod.rs
index 3b52cf06..58e2bb6e 100644
--- a/src/bin/proxmox_restore_daemon/mod.rs
+++ b/src/bin/proxmox_restore_daemon/mod.rs
@@ -6,3 +6,6 @@ pub mod auth;
mod watchdog;
pub use watchdog::*;
+
+mod disk;
+pub use disk::*;
--
2.20.1
More information about the pbs-devel
mailing list