[pve-devel] [PATCH INFO v2 qemu 5/5] PVE: savevm-async: improve snapshot abort
Stefan Reiter
s.reiter at proxmox.com
Thu Feb 11 17:11:15 CET 2021
Do not block the VM and save the state on aborting a snapshot, as the
snapshot will be invalid anyway.
Also, when aborting, wait for the target file to be closed, otherwise a
client might run into race-conditions when trying to remove the file
still opened by QEMU.
Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
Included in first patch, squashed into existing patches.
New in v2.
hmp-commands.hx | 3 +-
migration/savevm-async.c | 75 +++++++++++++++++++++++++++++++++-------
monitor/hmp-cmds.c | 2 +-
qapi/misc.json | 2 +-
4 files changed, 66 insertions(+), 16 deletions(-)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index b7639d189d..54de3f80e6 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1927,5 +1927,6 @@ ERST
.args_type = "",
.params = "",
.help = "Resume VM after snaphot.",
- .cmd = hmp_savevm_end,
+ .cmd = hmp_savevm_end,
+ .coroutine = true,
},
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
index 156b7a030e..8a17ec1f74 100644
--- a/migration/savevm-async.c
+++ b/migration/savevm-async.c
@@ -16,6 +16,7 @@
#include "qapi/qapi-commands-misc.h"
#include "qapi/qapi-commands-block.h"
#include "qemu/cutils.h"
+#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "qemu/rcu.h"
@@ -52,8 +53,15 @@ static struct SnapshotState {
int64_t total_time;
QEMUBH *finalize_bh;
Coroutine *co;
+ QemuCoSleepState *target_close_wait;
} snap_state;
+static bool savevm_aborted(void)
+{
+ return snap_state.state == SAVE_STATE_CANCELLED ||
+ snap_state.state == SAVE_STATE_ERROR;
+}
+
SaveVMInfo *qmp_query_savevm(Error **errp)
{
SaveVMInfo *info = g_malloc0(sizeof(*info));
@@ -106,17 +114,23 @@ static int save_snapshot_cleanup(void)
}
if (snap_state.target) {
- /* try to truncate, but ignore errors (will fail on block devices).
- * note1: bdrv_read() need whole blocks, so we need to round up
- * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
- */
- size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
- blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+ if (!savevm_aborted()) {
+ /* try to truncate, but ignore errors (will fail on block devices).
+ * note1: bdrv_read() need whole blocks, so we need to round up
+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
+ */
+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+ }
blk_op_unblock_all(snap_state.target, snap_state.blocker);
error_free(snap_state.blocker);
snap_state.blocker = NULL;
blk_unref(snap_state.target);
snap_state.target = NULL;
+
+ if (snap_state.target_close_wait) {
+ qemu_co_sleep_wake(snap_state.target_close_wait);
+ }
}
return ret;
@@ -202,6 +216,8 @@ static void process_savevm_finalize(void *opaque)
AioContext *iohandler_ctx = iohandler_get_aio_context();
MigrationState *ms = migrate_get_current();
+ bool aborted = savevm_aborted();
+
#ifdef DEBUG_SAVEVM_STATE
int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
#endif
@@ -223,10 +239,13 @@ static void process_savevm_finalize(void *opaque)
save_snapshot_error("vm_stop_force_state error %d", ret);
}
- (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
- ret = qemu_file_get_error(snap_state.file);
- if (ret < 0) {
- save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ if (!aborted) {
+ /* skip state saving if we aborted, snapshot will be invalid anyway */
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
}
DPRINTF("state saving complete\n");
@@ -235,7 +254,7 @@ static void process_savevm_finalize(void *opaque)
/* clear migration state */
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
- ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
ms->to_dst_file = NULL;
qemu_savevm_state_cleanup();
@@ -245,6 +264,9 @@ static void process_savevm_finalize(void *opaque)
save_snapshot_error("save_snapshot_cleanup error %d", ret);
} else if (snap_state.state == SAVE_STATE_ACTIVE) {
snap_state.state = SAVE_STATE_COMPLETED;
+ } else if (aborted) {
+ save_snapshot_error("process_savevm_cleanup: found aborted state: %d",
+ snap_state.state);
} else {
save_snapshot_error("process_savevm_cleanup: invalid state: %d",
snap_state.state);
@@ -434,11 +456,14 @@ restart:
if (snap_state.saved_vm_running) {
vm_start();
+ snap_state.saved_vm_running = false;
}
}
-void qmp_savevm_end(Error **errp)
+void coroutine_fn qmp_savevm_end(Error **errp)
{
+ int64_t timeout;
+
if (snap_state.state == SAVE_STATE_DONE) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR,
"VM snapshot not started\n");
@@ -447,14 +472,38 @@ void qmp_savevm_end(Error **errp)
if (snap_state.state == SAVE_STATE_ACTIVE) {
snap_state.state = SAVE_STATE_CANCELLED;
- return;
+ goto wait_for_close;
}
if (snap_state.saved_vm_running) {
vm_start();
+ snap_state.saved_vm_running = false;
}
snap_state.state = SAVE_STATE_DONE;
+
+wait_for_close:
+ if (!snap_state.target) {
+ DPRINTF("savevm-end: no target file open\n");
+ return;
+ }
+
+ /* wait until cleanup is done before returning, this ensures that after this
+ * call exits the statefile will be closed and can be removed immediately */
+ DPRINTF("savevm-end: waiting for cleanup\n");
+ timeout = 30L * 1000 * 1000 * 1000;
+ qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
+ &snap_state.target_close_wait);
+ snap_state.target_close_wait = NULL;
+ if (snap_state.target) {
+ save_snapshot_error("timeout waiting for target file close in "
+ "qmp_savevm_end");
+ /* we cannot assume the snapshot finished in this case, so leave the
+ * state alone - caller has to figure something out */
+ return;
+ }
+
+ DPRINTF("savevm-end: cleanup done\n");
}
// FIXME: Deprecated
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index f47fc01c10..95f4e7f5c1 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -2049,7 +2049,7 @@ void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, errp);
}
-void hmp_savevm_end(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
{
Error *errp = NULL;
diff --git a/qapi/misc.json b/qapi/misc.json
index b7e132698e..4f5333d960 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -506,7 +506,7 @@
# Resume VM after a snapshot.
#
##
-{ 'command': 'savevm-end' }
+{ 'command': 'savevm-end', 'coroutine': true }
##
# @CommandLineParameterType:
--
2.20.1
More information about the pve-devel
mailing list