[pbs-devel] [RFC PATCH proxmox-backup 2/2] file-restore: dynamically increase memory of vm for zpools

Dominik Csapak d.csapak at proxmox.com
Mon Oct 31 12:39:53 CET 2022


when a backup contains a drive with zfs on it, the default memory
size (up to 384 MiB) is often not enough to hold the zfs metadata

to improve that situation, add memory dynamically (1GiB) when a path is
requested that is on zfs. Note that the image must be started with a
kernel capable of memory hotplug.

to achieve that, we also have to add a qmp socket to the vm, so that
we can later connect and add the memory backend and dimm

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
 proxmox-file-restore/src/block_driver_qemu.rs | 45 ++++++++++--
 proxmox-file-restore/src/qemu_helper.rs       | 72 ++++++++++++++++++-
 2 files changed, 107 insertions(+), 10 deletions(-)

diff --git a/proxmox-file-restore/src/block_driver_qemu.rs b/proxmox-file-restore/src/block_driver_qemu.rs
index 736ae2fd..c2cd8d49 100644
--- a/proxmox-file-restore/src/block_driver_qemu.rs
+++ b/proxmox-file-restore/src/block_driver_qemu.rs
@@ -1,7 +1,10 @@
 //! Block file access via a small QEMU restore VM using the PBS block driver in QEMU
 use std::collections::HashMap;
+use std::ffi::OsStr;
 use std::fs::{File, OpenOptions};
 use std::io::{prelude::*, BufReader, BufWriter, SeekFrom};
+use std::os::unix::prelude::OsStrExt;
+use std::path::Path;
 
 use anyhow::{bail, Error};
 use futures::FutureExt;
@@ -16,6 +19,7 @@ use pbs_datastore::catalog::ArchiveEntry;
 
 use super::block_driver::*;
 use crate::get_user_run_dir;
+use crate::qemu_helper::set_dynamic_memory;
 
 const RESTORE_VM_MAP: &str = "restore-vm-map.json";
 
@@ -119,7 +123,7 @@ fn new_ticket() -> String {
     proxmox_uuid::Uuid::generate().to_string()
 }
 
-async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Error> {
+async fn ensure_running(details: &SnapRestoreDetails) -> Result<(i32, VsockClient), Error> {
     let name = make_name(&details.repo, &details.namespace, &details.snapshot);
     let mut state = VMStateMap::load()?;
 
@@ -133,7 +137,7 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
             match res {
                 Ok(_) => {
                     // VM is running and we just reset its timeout, nothing to do
-                    return Ok(client);
+                    return Ok((vm.cid, client));
                 }
                 Err(err) => {
                     log::warn!("stale VM detected, restarting ({})", err);
@@ -170,13 +174,30 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
     };
 
     state.write()?;
-    Ok(VsockClient::new(
+    Ok((
         new_cid,
-        DEFAULT_VSOCK_PORT,
-        Some(vms.ticket),
+        VsockClient::new(new_cid, DEFAULT_VSOCK_PORT, Some(vms.ticket)),
     ))
 }
 
+fn path_is_zfs(path: &[u8]) -> bool {
+    if path.is_empty() {
+        return false;
+    }
+    let path = Path::new(OsStr::from_bytes(path));
+    let mut components = path.components();
+    let part = match components.next() {
+        Some(std::path::Component::RootDir) => match components.next() {
+            Some(std::path::Component::Normal(comp)) => comp,
+            _ => return false,
+        },
+        Some(std::path::Component::Normal(comp)) => comp,
+        _ => return false,
+    };
+
+    part == OsStr::new("zpool") && components.next().is_some()
+}
+
 async fn start_vm(cid_request: i32, details: &SnapRestoreDetails) -> Result<VMState, Error> {
     let ticket = new_ticket();
     let files = details
@@ -199,10 +220,15 @@ impl BlockRestoreDriver for QemuBlockDriver {
         mut path: Vec<u8>,
     ) -> Async<Result<Vec<ArchiveEntry>, Error>> {
         async move {
-            let client = ensure_running(&details).await?;
+            let (cid, client) = ensure_running(&details).await?;
             if !path.is_empty() && path[0] != b'/' {
                 path.insert(0, b'/');
             }
+            if path_is_zfs(&path) {
+                if let Err(err) = set_dynamic_memory(cid, None).await {
+                    log::error!("could not increase memory: {err}");
+                }
+            }
             let path = base64::encode(img_file.bytes().chain(path).collect::<Vec<u8>>());
             let mut result = client
                 .get("api2/json/list", Some(json!({ "path": path })))
@@ -221,10 +247,15 @@ impl BlockRestoreDriver for QemuBlockDriver {
         zstd: bool,
     ) -> Async<Result<Box<dyn tokio::io::AsyncRead + Unpin + Send>, Error>> {
         async move {
-            let client = ensure_running(&details).await?;
+            let (cid, client) = ensure_running(&details).await?;
             if !path.is_empty() && path[0] != b'/' {
                 path.insert(0, b'/');
             }
+            if path_is_zfs(&path) {
+                if let Err(err) = set_dynamic_memory(cid, None).await {
+                    log::error!("could not increase memory: {err}");
+                }
+            }
             let path = base64::encode(img_file.bytes().chain(path).collect::<Vec<u8>>());
             let (mut tx, rx) = tokio::io::duplex(1024 * 4096);
             let mut data = json!({ "path": path, "zstd": zstd });
diff --git a/proxmox-file-restore/src/qemu_helper.rs b/proxmox-file-restore/src/qemu_helper.rs
index d6f4c5a9..7216d351 100644
--- a/proxmox-file-restore/src/qemu_helper.rs
+++ b/proxmox-file-restore/src/qemu_helper.rs
@@ -6,7 +6,12 @@ use std::path::PathBuf;
 use std::time::{Duration, Instant};
 
 use anyhow::{bail, format_err, Error};
-use tokio::time;
+use serde_json::json;
+use tokio::io::AsyncBufRead;
+use tokio::{
+    io::{AsyncBufReadExt, AsyncWrite, AsyncWriteExt},
+    time,
+};
 
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
@@ -22,6 +27,8 @@ use crate::{backup_user, cpio};
 
 const PBS_VM_NAME: &str = "pbs-restore-vm";
 const MAX_CID_TRIES: u64 = 32;
+const DYNAMIC_MEMORY_MB: usize = 1024;
+const QMP_SOCKET_PREFIX: &str = "/run/proxmox-backup/file-restore-qmp-";
 
 fn create_restore_log_dir() -> Result<String, Error> {
     let logpath = format!("{}/file-restore", pbs_buildcfg::PROXMOX_BACKUP_LOG_DIR);
@@ -121,6 +128,54 @@ async fn create_temp_initramfs(ticket: &str, debug: bool) -> Result<(File, Strin
     Ok((tmp_file, path))
 }
 
+async fn send_qmp_request<T: AsyncBufRead + AsyncWrite + Unpin>(
+    stream: &mut T,
+    request: &str,
+) -> Result<String, Error> {
+    stream.write_all(request.as_bytes()).await?;
+    stream.flush().await?;
+    let mut buf = String::new();
+    let _ = stream.read_line(&mut buf).await?;
+    Ok(buf)
+}
+
+pub async fn set_dynamic_memory(cid: i32, target_memory: Option<usize>) -> Result<(), Error> {
+    let target_memory = match target_memory {
+        Some(size) if size > DYNAMIC_MEMORY_MB => {
+            bail!("cannot set to {}M, maximum is {}M", size, DYNAMIC_MEMORY_MB)
+        }
+        Some(size) => size,
+        None => DYNAMIC_MEMORY_MB,
+    };
+
+    let path = format!("{}{}.sock", QMP_SOCKET_PREFIX, cid);
+    let mut stream = tokio::io::BufStream::new(tokio::net::UnixStream::connect(path).await?);
+
+    let _ = stream.read_line(&mut String::new()).await?; // initial qmp message
+    let _ = send_qmp_request(&mut stream, "{\"execute\":\"qmp_capabilities\"}\n").await?;
+
+    let request = json!({
+        "execute": "object-add",
+        "arguments": {
+            "qom-type": "memory-backend-ram",
+            "id": "mem0",
+            "size": target_memory * 1024 * 1024,
+        }
+    });
+    let _ = send_qmp_request(&mut stream, &serde_json::to_string(&request)?).await?;
+    let request = json!({
+        "execute": "device_add",
+        "arguments": {
+            "driver": "pc-dimm",
+            "id": "dimm0",
+            "memdev": "mem0",
+        }
+    });
+    let _ = send_qmp_request(&mut stream, &serde_json::to_string(&request)?).await?;
+
+    Ok(())
+}
+
 pub async fn start_vm(
     // u16 so we can do wrapping_add without going too high
     mut cid: u16,
@@ -185,7 +240,8 @@ pub async fn start_vm(
         &ramfs_path,
         "-append",
         &format!(
-            "{} panic=1 zfs_arc_min=0 zfs_arc_max=0",
+            "{} panic=1 zfs_arc_min=0 zfs_arc_max=0 memhp_default_state=online_kernel
+",
             if debug { "debug" } else { "quiet" }
         ),
         "-daemonize",
@@ -252,13 +308,23 @@ pub async fn start_vm(
         let mut qemu_cmd = std::process::Command::new("qemu-system-x86_64");
         qemu_cmd.args(base_args.iter());
         qemu_cmd.arg("-m");
-        qemu_cmd.arg(ram.to_string());
+        qemu_cmd.arg(format!(
+            "{ram}M,slots=1,maxmem={}M",
+            DYNAMIC_MEMORY_MB + ram
+        ));
         qemu_cmd.args(&drives);
         qemu_cmd.arg("-device");
         qemu_cmd.arg(format!(
             "vhost-vsock-pci,guest-cid={},disable-legacy=on",
             cid
         ));
+        qemu_cmd.arg("-chardev");
+        qemu_cmd.arg(format!(
+            "socket,id=qmp,path={}{}.sock,server=on,wait=off",
+            QMP_SOCKET_PREFIX, cid
+        ));
+        qemu_cmd.arg("-mon");
+        qemu_cmd.arg("chardev=qmp,mode=control");
 
         if debug {
             let debug_args = [
-- 
2.30.2






More information about the pbs-devel mailing list