[pve-devel] [PATCH v2 qemu 2/2] PVE backup: factor out setting up snapshot access for fleecing
Fiona Ebner
f.ebner at proxmox.com
Tue Jun 25 15:35:51 CEST 2024
Avoids some line bloat in the create_backup_jobs_bh() function and is
in preparation for setting up the snapshot access independently of
fleecing, in particular that will be useful for providing access to
the snapshot via NBD.
Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---
Changes in v2:
* no actual change here, but the error case will now also clean up
the block nodes, because it is on top of v2 of patch 1/2
.../0050-PVE-backup-add-fleecing-option.patch | 126 +++++++++++-------
...ve-error-when-copy-before-write-fail.patch | 2 +-
2 files changed, 78 insertions(+), 50 deletions(-)
diff --git a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
index 0ba6235..2d2dc40 100644
--- a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
+++ b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
@@ -63,9 +63,9 @@ Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
block/monitor/block-hmp-cmds.c | 1 +
- pve-backup.c | 144 ++++++++++++++++++++++++++++++++-
- qapi/block-core.json | 10 ++-
- 3 files changed, 151 insertions(+), 4 deletions(-)
+ pve-backup.c | 165 ++++++++++++++++++++++++++++++++-
+ qapi/block-core.json | 10 +-
+ 3 files changed, 172 insertions(+), 4 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 5000c084c5..70b3de4c7e 100644
@@ -80,7 +80,7 @@ index 5000c084c5..70b3de4c7e 100644
hmp_handle_error(mon, error);
diff --git a/pve-backup.c b/pve-backup.c
-index 5ebb6a3947..2d3ca78eac 100644
+index 5ebb6a3947..0c1d4a347b 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -7,9 +7,11 @@
@@ -144,7 +144,70 @@ index 5ebb6a3947..2d3ca78eac 100644
/*
* Needs to happen outside of coroutine, because it takes the graph write lock.
*/
-@@ -519,9 +549,80 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -483,6 +513,62 @@ static int coroutine_fn pvebackup_co_add_config(
+ goto out;
+ }
+
++/*
++ * Setup a snapshot-access block node for a device with associated fleecing image.
++ */
++static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
++{
++ Error *local_err = NULL;
++
++ if (!di->fleecing.bs) {
++ error_setg(errp, "no associated fleecing image");
++ return -1;
++ }
++
++ QDict *cbw_opts = qdict_new();
++ qdict_put_str(cbw_opts, "driver", "copy-before-write");
++ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
++
++ if (di->bitmap) {
++ /*
++ * Only guest writes to parts relevant for the backup need to be intercepted with
++ * old data being copied to the fleecing image.
++ */
++ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
++ }
++ /*
++ * Fleecing storage is supposed to be fast and it's better to break backup than guest
++ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
++ * abort a bit before that.
++ */
++ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
++ qdict_put_int(cbw_opts, "cbw-timeout", 45);
++
++ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
++
++ if (!di->fleecing.cbw) {
++ error_setg(errp, "appending cbw node for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ return -1;
++ }
++
++ QDict *snapshot_access_opts = qdict_new();
++ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
++ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
++
++ di->fleecing.snapshot_access =
++ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
++ if (!di->fleecing.snapshot_access) {
++ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ return -1;
++ }
++
++ return 0;
++}
++
+ /*
+ * backup_job_create can *not* be run from a coroutine, so this can't either.
+ * The caller is responsible that backup_mutex is held nonetheless.
+@@ -519,9 +605,45 @@ static void create_backup_jobs_bh(void *opaque) {
}
bdrv_drained_begin(di->bs);
@@ -156,49 +219,14 @@ index 5ebb6a3947..2d3ca78eac 100644
+ const char *job_id = bdrv_get_device_name(di->bs);
+ bdrv_graph_co_rdunlock();
+ if (di->fleecing.bs) {
-+ QDict *cbw_opts = qdict_new();
-+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
-+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
-+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
-+
-+ if (di->bitmap) {
-+ /*
-+ * Only guest writes to parts relevant for the backup need to be intercepted with
-+ * old data being copied to the fleecing image.
-+ */
-+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
-+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
-+ }
-+ /*
-+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
-+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
-+ * abort a bit before that.
-+ */
-+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
-+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
-+
-+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
-+
-+ if (!di->fleecing.cbw) {
-+ error_setg(errp, "appending cbw node for fleecing failed: %s",
-+ local_err ? error_get_pretty(local_err) : "unknown error");
-+ bdrv_drained_end(di->bs);
-+ break;
-+ }
-+
-+ QDict *snapshot_access_opts = qdict_new();
-+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
-+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
-+
-+ di->fleecing.snapshot_access =
-+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
-+ if (!di->fleecing.snapshot_access) {
++ if (setup_snapshot_access(di, &local_err) < 0) {
+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
+ local_err ? error_get_pretty(local_err) : "unknown error");
+ cleanup_snapshot_access(di);
+ bdrv_drained_end(di->bs);
+ break;
+ }
++
+ source_bs = di->fleecing.snapshot_access;
+ discard_source = true;
+
@@ -227,7 +255,7 @@ index 5ebb6a3947..2d3ca78eac 100644
BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
&local_err);
-@@ -535,6 +636,7 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -535,6 +657,7 @@ static void create_backup_jobs_bh(void *opaque) {
}
if (!job || local_err) {
@@ -235,7 +263,7 @@ index 5ebb6a3947..2d3ca78eac 100644
error_setg(errp, "backup_job_create failed: %s",
local_err ? error_get_pretty(local_err) : "null");
break;
-@@ -577,6 +679,14 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -577,6 +700,14 @@ static void create_backup_jobs_bh(void *opaque) {
aio_co_enter(data->ctx, data->co);
}
@@ -250,7 +278,7 @@ index 5ebb6a3947..2d3ca78eac 100644
/*
* Returns a list of device infos, which needs to be freed by the caller. In
* case of an error, errp will be set, but the returned value might still be a
-@@ -584,6 +694,7 @@ static void create_backup_jobs_bh(void *opaque) {
+@@ -584,6 +715,7 @@ static void create_backup_jobs_bh(void *opaque) {
*/
static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
const char *devlist,
@@ -258,7 +286,7 @@ index 5ebb6a3947..2d3ca78eac 100644
Error **errp)
{
gchar **devs = NULL;
-@@ -607,6 +718,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+@@ -607,6 +739,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
}
PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
di->bs = bs;
@@ -290,7 +318,7 @@ index 5ebb6a3947..2d3ca78eac 100644
di_list = g_list_append(di_list, di);
d++;
}
-@@ -656,6 +792,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -656,6 +813,7 @@ UuidInfo coroutine_fn *qmp_backup(
const char *devlist,
bool has_speed, int64_t speed,
bool has_max_workers, int64_t max_workers,
@@ -298,7 +326,7 @@ index 5ebb6a3947..2d3ca78eac 100644
Error **errp)
{
assert(qemu_in_coroutine());
-@@ -684,7 +821,7 @@ UuidInfo coroutine_fn *qmp_backup(
+@@ -684,7 +842,7 @@ UuidInfo coroutine_fn *qmp_backup(
format = has_format ? format : BACKUP_FORMAT_VMA;
bdrv_graph_co_rdlock();
@@ -307,7 +335,7 @@ index 5ebb6a3947..2d3ca78eac 100644
bdrv_graph_co_rdunlock();
if (local_err) {
error_propagate(errp, local_err);
-@@ -1089,5 +1226,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+@@ -1089,5 +1247,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
ret->query_bitmap_info = true;
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
diff --git a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
index 66fc9fa..a314fe7 100644
--- a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+++ b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
@@ -96,7 +96,7 @@ index dc6cafe7fa..a27d2d7d9f 100644
#endif /* COPY_BEFORE_WRITE_H */
diff --git a/pve-backup.c b/pve-backup.c
-index 2d3ca78eac..c4178758b3 100644
+index 0c1d4a347b..051ebffe48 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -374,6 +374,18 @@ static void pvebackup_complete_cb(void *opaque, int ret)
--
2.39.2
More information about the pve-devel
mailing list