[pve-devel] [PATCH qemu v5 03/32] PVE backup: implement backup access setup and teardown API for external providers
Wolfgang Bumiller
w.bumiller at proxmox.com
Mon Mar 24 14:02:13 CET 2025
On Fri, Mar 21, 2025 at 02:48:23PM +0100, Fiona Ebner wrote:
> For external backup providers, the state of the VM's disk images at
> the time the backup is started is preserved via a snapshot-access
> block node. Old data is moved to the fleecing image when new guest
> writes come in. The snapshot-access block node, as well as the
> associated bitmap in case of incremental backup, will be exported via
> NBD to the external provider. The NBD export will be done by the
> management layer, the missing functionality is setting up and tearing
> down the snapshot-access block nodes, which this patch adds.
>
> It is necessary to also set up fleecing for EFI and TPM disks, so that
> old data can be moved out of the way when a new guest write comes in.
>
> There can only be one regular backup or one active backup access at
> a time, because both require replacing the original block node of the
> drive. Thus the backup state is re-used, and checks are added to
> prohibit regular backup while snapshot access is active and vice
> versa.
>
> The block nodes added by the backup-access-setup QMP call are not
> tracked anywhere else (there is no job they are associated to like for
> regular backup). This requires adding a callback for teardown when
> QEMU exits, i.e. in qemu_cleanup(). Otherwise, there will be an
> assertion failure that the block graph is not empty when QEMU exits
> before the backup-access-teardown QMP command is called.
>
> The code for the qmp_backup_access_setup() was based on the existing
> qmp_backup() routine.
>
> The return value for the setup QMP command contains information about
> the snapshot-access block nodes that can be used by the management
> layer to set up the NBD exports.
>
> Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
> ---
>
> Changes in v5:
> * Set finishing state and end time in backup state in QEMU to avoid
> wrongly detecting previous backup as still active leading to a
> warning.
> * Expose support for backup-access API as a feature with
> query-proxmox-support.
>
> pve-backup.c | 283 +++++++++++++++++++++++++++++++++++++++++++
> pve-backup.h | 16 +++
> qapi/block-core.json | 49 ++++++++
> system/runstate.c | 6 +
> 4 files changed, 354 insertions(+)
> create mode 100644 pve-backup.h
>
> diff --git a/pve-backup.c b/pve-backup.c
> index b22064a64e..8f0f9921ab 100644
> --- a/pve-backup.c
> +++ b/pve-backup.c
> @@ -1,4 +1,5 @@
> #include "proxmox-backup-client.h"
> +#include "pve-backup.h"
> #include "vma.h"
>
> #include "qemu/osdep.h"
> @@ -585,6 +586,37 @@ static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp)
> return 0;
> }
>
> +static void setup_all_snapshot_access_bh(void *opaque)
> +{
> + assert(!qemu_in_coroutine());
> +
> + CoCtxData *data = (CoCtxData*)opaque;
> + Error **errp = (Error**)data->data;
> +
> + Error *local_err = NULL;
> +
> + GList *l = backup_state.di_list;
> + while (l) {
> + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
> + l = g_list_next(l);
> +
> + bdrv_drained_begin(di->bs);
> +
> + if (setup_snapshot_access(di, &local_err) < 0) {
> + cleanup_snapshot_access(di);
(not part of this patch, but why does our `setup_snapshot_access()` not
call `cleanup_snapshot_access()` on failure?)
> + bdrv_drained_end(di->bs);
> + error_setg(errp, "%s - setting up snapshot access failed: %s", di->device_name,
> + local_err ? error_get_pretty(local_err) : "unknown error");
> + break;
> + }
> +
> + bdrv_drained_end(di->bs);
> + }
> +
> + /* return */
> + aio_co_enter(data->ctx, data->co);
> +}
> +
> /*
> * backup_job_create can *not* be run from a coroutine, so this can't either.
> * The caller is responsible that backup_mutex is held nonetheless.
> @@ -722,6 +754,11 @@ static bool fleecing_no_efi_tpm(const char *device_id)
> return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
> }
>
> +static bool fleecing_all(const char *device_id)
> +{
> + return true;
> +}
> +
> /*
> * Returns a list of device infos, which needs to be freed by the caller. In
> * case of an error, errp will be set, but the returned value might still be a
> @@ -810,6 +847,251 @@ err:
> return di_list;
> }
>
> +BackupAccessInfoList *coroutine_fn qmp_backup_access_setup(
> + const char *target_id,
> + const char *devlist,
> + Error **errp)
> +{
> + assert(qemu_in_coroutine());
> +
> + qemu_co_mutex_lock(&backup_state.backup_mutex);
> +
> + Error *local_err = NULL;
> + GList *di_list = NULL;
> + GList *l;
> +
> + if (backup_state.di_list) {
> + error_set(errp, ERROR_CLASS_GENERIC_ERROR,
> + "previous backup for target '%s' not finished", backup_state.target_id);
> + qemu_co_mutex_unlock(&backup_state.backup_mutex);
> + return NULL;
> + }
> +
> + bdrv_graph_co_rdlock();
> + di_list = get_device_info(devlist, fleecing_all, &local_err);
> + bdrv_graph_co_rdunlock();
> + if (local_err) {
> + error_propagate(errp, local_err);
> + goto err;
> + }
> + assert(di_list);
> +
> + size_t total = 0;
> +
> + l = di_list;
> + while (l) {
> + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
> + l = g_list_next(l);
> +
> + ssize_t size = bdrv_getlength(di->bs);
> + if (size < 0) {
> + error_setg_errno(errp, -size, "bdrv_getlength failed");
> + goto err;
> + }
> + di->size = size;
> + total += size;
> +
> + di->completed_ret = INT_MAX;
> + }
> +
> + qemu_mutex_lock(&backup_state.stat.lock);
> + backup_state.stat.reused = 0;
> +
> + /* clear previous backup's bitmap_list */
> + if (backup_state.stat.bitmap_list) {
> + GList *bl = backup_state.stat.bitmap_list;
> + while (bl) {
> + g_free(((PBSBitmapInfo *)bl->data)->drive);
> + g_free(bl->data);
> + bl = g_list_next(bl);
> + }
> + g_list_free(backup_state.stat.bitmap_list);
> + backup_state.stat.bitmap_list = NULL;
> + }
^ should be factored into a function instead of copied
The code below can also be factored out as AFAICT it, too, is a copy, if
`backup_file` is a parameter (which which `NULL` may be passed, as
`g_strdup()` explicitly maps NULL to NULL), and `uuid` as well I guess
(here it's not touched but may as well be cleared (or the function would
ignore it when NULL is passed)).
> +
> + /* initialize global backup_state now */
> +
> + if (backup_state.stat.error) {
> + error_free(backup_state.stat.error);
> + backup_state.stat.error = NULL;
> + }
> +
> + backup_state.stat.start_time = time(NULL);
> + backup_state.stat.end_time = 0;
> +
> + if (backup_state.stat.backup_file) {
> + g_free(backup_state.stat.backup_file);
> + }
> + backup_state.stat.backup_file = NULL;
> +
> + if (backup_state.target_id) {
> + g_free(backup_state.target_id);
> + }
> + backup_state.target_id = g_strdup(target_id);
(^ Not sure if `qmp_backup`should include the above)
> +
> + /*
> + * The stats will never update, because there is no internal backup job. Initialize them anyway
> + * for completeness.
> + */
> + backup_state.stat.total = total;
> + backup_state.stat.dirty = total - backup_state.stat.reused;
> + backup_state.stat.transferred = 0;
> + backup_state.stat.zero_bytes = 0;
> + backup_state.stat.finishing = false;
> + backup_state.stat.starting = false; // there's no associated QEMU job
^ up to this point - requiring stat.lock to be held I suppose, (unless
qmp_backup allows moving the stuff in between to above the lock()
call...)
> +
> + qemu_mutex_unlock(&backup_state.stat.lock);
> +
> + backup_state.vmaw = NULL;
> + backup_state.pbs = NULL;
> +
> + backup_state.di_list = di_list;
> +
> + /* Run setup_all_snapshot_access_bh outside of coroutine (in BH) but keep
> + * backup_mutex locked. This is fine, a CoMutex can be held across yield
> + * points, and we'll release it as soon as the BH reschedules us.
> + */
> + CoCtxData waker = {
> + .co = qemu_coroutine_self(),
> + .ctx = qemu_get_current_aio_context(),
> + .data = &local_err,
> + };
> + aio_bh_schedule_oneshot(waker.ctx, setup_all_snapshot_access_bh, &waker);
> + qemu_coroutine_yield();
> +
> + if (local_err) {
> + error_propagate(errp, local_err);
> + goto err;
> + }
> +
> + qemu_co_mutex_unlock(&backup_state.backup_mutex);
> +
> + BackupAccessInfoList *bai_head = NULL, **p_bai_next = &bai_head;
> +
> + l = di_list;
> + while (l) {
> + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
> + l = g_list_next(l);
> +
> + BackupAccessInfoList *info = g_malloc0(sizeof(*info));
> + info->value = g_malloc0(sizeof(*info->value));
> + info->value->node_name = g_strdup(bdrv_get_node_name(di->fleecing.snapshot_access));
> + info->value->device = g_strdup(di->device_name);
> + info->value->size = di->size;
> +
> + *p_bai_next = info;
> + p_bai_next = &info->next;
> + }
> +
> + return bai_head;
> +
> +err:
> +
> + l = di_list;
> + while (l) {
> + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
> + l = g_list_next(l);
> +
> + g_free(di->device_name);
> + di->device_name = NULL;
> +
> + g_free(di);
> + }
> + g_list_free(di_list);
> + backup_state.di_list = NULL;
> +
> + qemu_co_mutex_unlock(&backup_state.backup_mutex);
> + return NULL;
> +}
> +
> +/*
> + * Caller needs to hold the backup mutex or the BQL.
> + */
> +void backup_access_teardown(void)
> +{
> + GList *l = backup_state.di_list;
> +
> + qemu_mutex_lock(&backup_state.stat.lock);
> + backup_state.stat.finishing = true;
> + qemu_mutex_unlock(&backup_state.stat.lock);
> +
> + while (l) {
> + PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
> + l = g_list_next(l);
> +
> + if (di->fleecing.snapshot_access) {
> + bdrv_unref(di->fleecing.snapshot_access);
> + di->fleecing.snapshot_access = NULL;
> + }
> + if (di->fleecing.cbw) {
> + bdrv_cbw_drop(di->fleecing.cbw);
> + di->fleecing.cbw = NULL;
> + }
> +
> + g_free(di->device_name);
> + di->device_name = NULL;
> +
> + g_free(di);
> + }
> + g_list_free(backup_state.di_list);
> + backup_state.di_list = NULL;
> +
> + qemu_mutex_lock(&backup_state.stat.lock);
> + backup_state.stat.end_time = time(NULL);
> + backup_state.stat.finishing = false;
> + qemu_mutex_unlock(&backup_state.stat.lock);
> +}
> +
> +// Not done in a coroutine, because bdrv_co_unref() and cbw_drop() would just spawn BHs anyways.
> +// Caller needs to hold the backup_state.backup_mutex lock
> +static void backup_access_teardown_bh(void *opaque)
> +{
> + CoCtxData *data = (CoCtxData*)opaque;
> +
> + backup_access_teardown();
> +
> + /* return */
> + aio_co_enter(data->ctx, data->co);
> +}
> +
> +void coroutine_fn qmp_backup_access_teardown(const char *target_id, Error **errp)
> +{
> + assert(qemu_in_coroutine());
> +
> + qemu_co_mutex_lock(&backup_state.backup_mutex);
> +
> + if (!backup_state.target_id) { // nothing to do
> + qemu_co_mutex_unlock(&backup_state.backup_mutex);
> + return;
> + }
> +
> + /*
> + * Continue with target_id == NULL, used by the callback registered for qemu_cleanup()
> + */
> + if (target_id && strcmp(backup_state.target_id, target_id)) {
> + error_setg(errp, "cannot teardown backup access - got target %s instead of %s",
> + target_id, backup_state.target_id);
> + qemu_co_mutex_unlock(&backup_state.backup_mutex);
> + return;
> + }
> +
> + if (!strcmp(backup_state.target_id, "Proxmox VE")) {
> + error_setg(errp, "cannot teardown backup access for PVE - use backup-cancel instead");
> + qemu_co_mutex_unlock(&backup_state.backup_mutex);
> + return;
> + }
> +
> + CoCtxData waker = {
> + .co = qemu_coroutine_self(),
> + .ctx = qemu_get_current_aio_context(),
> + };
> + aio_bh_schedule_oneshot(waker.ctx, backup_access_teardown_bh, &waker);
> + qemu_coroutine_yield();
> +
> + qemu_co_mutex_unlock(&backup_state.backup_mutex);
> + return;
> +}
> +
> UuidInfo coroutine_fn *qmp_backup(
> const char *backup_file,
> const char *password,
> @@ -1267,5 +1549,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
> ret->pbs_masterkey = true;
> ret->backup_max_workers = true;
> ret->backup_fleecing = true;
> + ret->backup_access_api = true;
> return ret;
> }
> diff --git a/pve-backup.h b/pve-backup.h
> new file mode 100644
> index 0000000000..4033bc848f
> --- /dev/null
> +++ b/pve-backup.h
> @@ -0,0 +1,16 @@
> +/*
> + * Bacup code used by Proxmox VE
> + *
> + * Copyright (C) Proxmox Server Solutions
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef PVE_BACKUP_H
> +#define PVE_BACKUP_H
> +
> +void backup_access_teardown(void);
> +
> +#endif /* PVE_BACKUP_H */
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index c581f1f238..3f092221ce 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1019,6 +1019,9 @@
> #
> # @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
> #
> +# @backup-access-api: Whether backup access API for external providers is
> +# supported or not.
> +#
> # @backup-fleecing: Whether backup fleecing is supported or not.
> #
> # @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
> @@ -1032,6 +1035,7 @@
> 'pbs-dirty-bitmap-migration': 'bool',
> 'pbs-masterkey': 'bool',
> 'pbs-library-version': 'str',
> + 'backup-access-api': 'bool',
> 'backup-fleecing': 'bool',
> 'backup-max-workers': 'bool' } }
>
> @@ -1098,6 +1102,51 @@
> ##
> { 'command': 'query-pbs-bitmap-info', 'returns': ['PBSBitmapInfo'] }
>
> +##
> +# @BackupAccessInfo:
> +#
> +# Info associated to a snapshot access for backup. For more information about
> +# the bitmap see @BackupAccessBitmapMode.
> +#
> +# @node-name: the block node name of the snapshot-access node.
> +#
> +# @device: the device on top of which the snapshot access was created.
> +#
> +# @size: the size of the block device in bytes.
> +#
> +##
> +{ 'struct': 'BackupAccessInfo',
> + 'data': { 'node-name': 'str', 'device': 'str', 'size': 'size' } }
> +
> +##
> +# @backup-access-setup:
> +#
> +# Set up snapshot access to VM drives for an external backup provider. No other
> +# backup or backup access can be done before tearing down the backup access.
> +#
> +# @target-id: the unique ID of the backup target.
> +#
> +# @devlist: list of block device names (separated by ',', ';' or ':'). By
> +# default the backup includes all writable block devices.
> +#
> +# Returns: a list of @BackupAccessInfo, one for each device.
> +#
> +##
> +{ 'command': 'backup-access-setup',
> + 'data': { 'target-id': 'str', '*devlist': 'str' },
> + 'returns': [ 'BackupAccessInfo' ], 'coroutine': true }
> +
> +##
> +# @backup-access-teardown:
> +#
> +# Tear down previously setup snapshot access for the same target.
> +#
> +# @target-id: the ID of the backup target.
> +#
> +##
> +{ 'command': 'backup-access-teardown', 'data': { 'target-id': 'str' },
> + 'coroutine': true }
> +
> ##
> # @BlockDeviceTimedStats:
> #
> diff --git a/system/runstate.c b/system/runstate.c
> index c2c9afa905..6f93d7c2fb 100644
> --- a/system/runstate.c
> +++ b/system/runstate.c
> @@ -60,6 +60,7 @@
> #include "sysemu/sysemu.h"
> #include "sysemu/tpm.h"
> #include "trace.h"
> +#include "pve-backup.h"
>
> static NotifierList exit_notifiers =
> NOTIFIER_LIST_INITIALIZER(exit_notifiers);
> @@ -920,6 +921,11 @@ void qemu_cleanup(int status)
> * requests happening from here on anyway.
> */
> bdrv_drain_all_begin();
> + /*
> + * The backup access is set up by a QMP command, but is neither owned by a monitor nor
> + * associated to a BlockBackend. Need to tear it down manually here.
> + */
> + backup_access_teardown();
> job_cancel_sync_all();
> bdrv_close_all();
>
> --
> 2.39.5
More information about the pve-devel
mailing list