[pve-devel] [PATCH qemu 2/2] PVE backup: factor out setting up snapshot access for fleecing

Mon Jun 17 13:29:56 CEST 2024

Avoids some line bloat in the create_backup_jobs_bh() function and is
in preparation for setting up the snapshot access independently of
fleecing, in particular that will be useful for providing access to
the snapshot via NBD.

Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---
 .../0050-PVE-backup-add-fleecing-option.patch | 122 +++++++++++-------
 ...ve-error-when-copy-before-write-fail.patch |   2 +-
 2 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
index d8ad45f..fb9f40e 100644
--- a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
+++ b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
@@ -63,9 +63,9 @@ Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
 Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
 ---
  block/monitor/block-hmp-cmds.c |   1 +
- pve-backup.c                   | 137 ++++++++++++++++++++++++++++++++-
+ pve-backup.c                   | 158 ++++++++++++++++++++++++++++++++-
  qapi/block-core.json           |  10 ++-
- 3 files changed, 144 insertions(+), 4 deletions(-)
+ 3 files changed, 165 insertions(+), 4 deletions(-)
 
 diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
 index 5000c084c5..70b3de4c7e 100644
@@ -80,7 +80,7 @@ index 5000c084c5..70b3de4c7e 100644
  
      hmp_handle_error(mon, error);
 diff --git a/pve-backup.c b/pve-backup.c
-index 5ebb6a3947..5ea9e1f82f 100644
+index 5ebb6a3947..e04abf574a 100644
 --- a/pve-backup.c
 +++ b/pve-backup.c
 @@ -7,9 +7,11 @@
@@ -134,7 +134,70 @@ index 5ebb6a3947..5ea9e1f82f 100644
      /*
       * Needs to happen outside of coroutine, because it takes the graph write lock.
       */
-@@ -519,9 +544,79 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -483,6 +508,62 @@ static int coroutine_fn pvebackup_co_add_config(
+     goto out;
+ }
+ 
++/*
++ * Setup a snapshot-access block node for a device with associated fleecing image.
++ */
++static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
++{
++    Error *local_err = NULL;
++
++    if (!di->fleecing.bs) {
++        error_setg(errp, "no associated fleecing image");
++        return -1;
++    }
++
++    QDict *cbw_opts = qdict_new();
++    qdict_put_str(cbw_opts, "driver", "copy-before-write");
++    qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
++    qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
++
++    if (di->bitmap) {
++        /*
++         * Only guest writes to parts relevant for the backup need to be intercepted with
++         * old data being copied to the fleecing image.
++         */
++        qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
++        qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
++    }
++    /*
++     * Fleecing storage is supposed to be fast and it's better to break backup than guest
++     * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
++     * abort a bit before that.
++     */
++    qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
++    qdict_put_int(cbw_opts, "cbw-timeout", 45);
++
++    di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
++
++    if (!di->fleecing.cbw) {
++        error_setg(errp, "appending cbw node for fleecing failed: %s",
++                   local_err ? error_get_pretty(local_err) : "unknown error");
++        return -1;
++    }
++
++    QDict *snapshot_access_opts = qdict_new();
++    qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
++    qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
++
++    di->fleecing.snapshot_access =
++        bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
++    if (!di->fleecing.snapshot_access) {
++        error_setg(errp, "setting up snapshot access for fleecing failed: %s",
++                   local_err ? error_get_pretty(local_err) : "unknown error");
++        return -1;
++    }
++
++    return 0;
++}
++
+ /*
+  * backup_job_create can *not* be run from a coroutine, so this can't either.
+  * The caller is responsible that backup_mutex is held nonetheless.
+@@ -519,9 +600,44 @@ static void create_backup_jobs_bh(void *opaque) {
          }
          bdrv_drained_begin(di->bs);
  
@@ -146,48 +209,13 @@ index 5ebb6a3947..5ea9e1f82f 100644
 +        const char *job_id = bdrv_get_device_name(di->bs);
 +        bdrv_graph_co_rdunlock();
 +        if (di->fleecing.bs) {
-+            QDict *cbw_opts = qdict_new();
-+            qdict_put_str(cbw_opts, "driver", "copy-before-write");
-+            qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
-+            qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
-+
-+            if (di->bitmap) {
-+                /*
-+                 * Only guest writes to parts relevant for the backup need to be intercepted with
-+                 * old data being copied to the fleecing image.
-+                 */
-+                qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
-+                qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
-+            }
-+            /*
-+             * Fleecing storage is supposed to be fast and it's better to break backup than guest
-+             * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
-+             * abort a bit before that.
-+             */
-+            qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
-+            qdict_put_int(cbw_opts, "cbw-timeout", 45);
-+
-+            di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
-+
-+            if (!di->fleecing.cbw) {
-+                error_setg(errp, "appending cbw node for fleecing failed: %s",
-+                           local_err ? error_get_pretty(local_err) : "unknown error");
-+                bdrv_drained_end(di->bs);
-+                break;
-+            }
-+
-+            QDict *snapshot_access_opts = qdict_new();
-+            qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
-+            qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
-+
-+            di->fleecing.snapshot_access =
-+                bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
-+            if (!di->fleecing.snapshot_access) {
++            if (setup_snapshot_access(di, &local_err) < 0) {
 +                error_setg(errp, "setting up snapshot access for fleecing failed: %s",
 +                           local_err ? error_get_pretty(local_err) : "unknown error");
 +                bdrv_drained_end(di->bs);
 +                break;
 +            }
++
 +            source_bs = di->fleecing.snapshot_access;
 +            discard_source = true;
 +
@@ -216,7 +244,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
              BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
              &local_err);
  
-@@ -577,6 +672,14 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -577,6 +693,14 @@ static void create_backup_jobs_bh(void *opaque) {
      aio_co_enter(data->ctx, data->co);
  }
  
@@ -231,7 +259,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
  /*
   * Returns a list of device infos, which needs to be freed by the caller. In
   * case of an error, errp will be set, but the returned value might still be a
-@@ -584,6 +687,7 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -584,6 +708,7 @@ static void create_backup_jobs_bh(void *opaque) {
   */
  static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
      const char *devlist,
@@ -239,7 +267,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
      Error **errp)
  {
      gchar **devs = NULL;
-@@ -607,6 +711,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+@@ -607,6 +732,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
              }
              PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
              di->bs = bs;
@@ -271,7 +299,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
              di_list = g_list_append(di_list, di);
              d++;
          }
-@@ -656,6 +785,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -656,6 +806,7 @@ UuidInfo coroutine_fn *qmp_backup(
      const char *devlist,
      bool has_speed, int64_t speed,
      bool has_max_workers, int64_t max_workers,
@@ -279,7 +307,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
      Error **errp)
  {
      assert(qemu_in_coroutine());
-@@ -684,7 +814,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -684,7 +835,7 @@ UuidInfo coroutine_fn *qmp_backup(
      format = has_format ? format : BACKUP_FORMAT_VMA;
  
      bdrv_graph_co_rdlock();
@@ -288,7 +316,7 @@ index 5ebb6a3947..5ea9e1f82f 100644
      bdrv_graph_co_rdunlock();
      if (local_err) {
          error_propagate(errp, local_err);
-@@ -1089,5 +1219,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+@@ -1089,5 +1240,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
      ret->query_bitmap_info = true;
      ret->pbs_masterkey = true;
      ret->backup_max_workers = true;
diff --git a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
index 7fc2b3c..97de053 100644
--- a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+++ b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
@@ -96,7 +96,7 @@ index dc6cafe7fa..a27d2d7d9f 100644
  
  #endif /* COPY_BEFORE_WRITE_H */
 diff --git a/pve-backup.c b/pve-backup.c
-index 5ea9e1f82f..aa6ddf4c52 100644
+index e04abf574a..6a8cfa18c1 100644
 --- a/pve-backup.c
 +++ b/pve-backup.c
 @@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
-- 
2.39.2