[pve-devel] [PATCH kvm] Fix #796: convert savevm-async to threads
Alexandre DERUMIER
aderumier at odiso.com
Wed Nov 9 08:51:39 CET 2016
Works fine for me, I don't see any regression. (tested with qcow2 && ceph).
Note that I was already working for me previously, I dind't have problem with virtio-scsi + iothread + ram
----- Mail original -----
De: "Alexandre Derumier" <aderumier at odiso.com>
À: "Wolfgang Bumiller" <w.bumiller at proxmox.com>
Cc: "pve-devel" <pve-devel at pve.proxmox.com>
Envoyé: Mercredi 9 Novembre 2016 08:11:53
Objet: Re: [pve-devel] [PATCH kvm] Fix #796: convert savevm-async to threads
>>Alexandre: could you maybe try this, too? virtio-scsi-single + iothreads
>>enabled caused snapshots+RAM to never finish / hang.
I'll test it today.
----- Mail original -----
De: "Wolfgang Bumiller" <w.bumiller at proxmox.com>
À: "pve-devel" <pve-devel at pve.proxmox.com>
Cc: "Alexandre Derumier" <aderumier at odiso.com>
Envoyé: Mardi 8 Novembre 2016 12:03:08
Objet: [PATCH kvm] Fix #796: convert savevm-async to threads
This should also allow snapshots with RAM to run with
iothreads enabled.
---
Alexandre: could you maybe try this, too? virtio-scsi-single + iothreads
enabled caused snapshots+RAM to never finish / hang.
I originally started this for #796, which is hard to reproduce & verify though.
Rebased and updated it for our current code.
.../pve/0046-convert-savevm-async-to-threads.patch | 240 +++++++++++++++++++++
debian/patches/series | 1 +
2 files changed, 241 insertions(+)
create mode 100644 debian/patches/pve/0046-convert-savevm-async-to-threads.patch
diff --git a/debian/patches/pve/0046-convert-savevm-async-to-threads.patch b/debian/patches/pve/0046-convert-savevm-async-to-threads.patch
new file mode 100644
index 0000000..6a9d9f4
--- /dev/null
+++ b/debian/patches/pve/0046-convert-savevm-async-to-threads.patch
@@ -0,0 +1,240 @@
+From f18d2aee91bca252a6b90583784f49236ec17d58 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller at proxmox.com>
+Date: Tue, 8 Nov 2016 11:13:06 +0100
+Subject: [PATCH 46/46] convert savevm-async to threads
+
+---
+ savevm-async.c | 149 +++++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 93 insertions(+), 56 deletions(-)
+
+diff --git a/savevm-async.c b/savevm-async.c
+index 05b5b19..e293a00 100644
+--- a/savevm-async.c
++++ b/savevm-async.c
+@@ -48,6 +48,8 @@ static struct SnapshotState {
+ int saved_vm_running;
+ QEMUFile *file;
+ int64_t total_time;
++ QEMUBH *cleanup_bh;
++ QemuThread thread;
+ } snap_state;
+
+ SaveVMInfo *qmp_query_savevm(Error **errp)
+@@ -135,19 +137,6 @@ static void save_snapshot_error(const char *fmt, ...)
+ g_free (msg);
+
+ snap_state.state = SAVE_STATE_ERROR;
+-
+- save_snapshot_cleanup();
+-}
+-
+-static void save_snapshot_completed(void)
+-{
+- DPRINTF("save_snapshot_completed\n");
+-
+- if (save_snapshot_cleanup() < 0) {
+- snap_state.state = SAVE_STATE_ERROR;
+- } else {
+- snap_state.state = SAVE_STATE_COMPLETED;
+- }
+ }
+
+ static int block_state_close(void *opaque)
+@@ -156,36 +145,76 @@ static int block_state_close(void *opaque)
+ return blk_flush(snap_state.target);
+ }
+
++typedef struct BlkRwCo {
++ int64_t offset;
++ QEMUIOVector *qiov;
++ int ret;
++} BlkRwCo;
++
++static void block_state_write_entry(void *opaque) {
++ BlkRwCo *rwco = opaque;
++ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
++ rwco->qiov, 0);
++}
++
+ static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos)
+ {
+- int ret;
+ QEMUIOVector qiov;
++ AioContext *aio_context;
++ Coroutine *co;
++ BlkRwCo rwco;
++
++ assert(pos == snap_state.bs_pos);
++ rwco = (BlkRwCo) {
++ .offset = pos,
++ .qiov = &qiov,
++ .ret = NOT_DONE,
++ };
+
+ qemu_iovec_init_external(&qiov, iov, iovcnt);
+- ret = blk_co_pwritev(snap_state.target, pos, qiov.size, &qiov, 0);
+- if (ret < 0) {
+- return ret;
++
++ co = qemu_coroutine_create(&block_state_write_entry, &rwco);
++ qemu_coroutine_enter(co);
++
++ aio_context = blk_get_aio_context(snap_state.target);
++ while (rwco.ret == NOT_DONE) {
++ aio_poll(aio_context, true);
+ }
++
+ snap_state.bs_pos += qiov.size;
+ return qiov.size;
+ }
+
+-static int store_and_stop(void) {
+- if (global_state_store()) {
+- save_snapshot_error("Error saving global state");
+- return 1;
++static void process_savevm_cleanup(void *opaque)
++{
++ int ret;
++ qemu_bh_delete(snap_state.cleanup_bh);
++ snap_state.cleanup_bh = NULL;
++ qemu_mutex_unlock_iothread();
++ qemu_thread_join(&snap_state.thread);
++ qemu_mutex_lock_iothread();
++ ret = save_snapshot_cleanup();
++ if (ret < 0) {
++ save_snapshot_error("save_snapshot_cleanup error %d", ret);
++ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
++ snap_state.state = SAVE_STATE_COMPLETED;
++ } else {
++ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
++ snap_state.state);
+ }
+- if (runstate_is_running()) {
+- vm_stop(RUN_STATE_SAVE_VM);
++ if (snap_state.saved_vm_running) {
++ vm_start();
++ snap_state.saved_vm_running = false;
+ }
+- return 0;
+ }
+
+-static void process_savevm_co(void *opaque)
++static void *process_savevm_thread(void *opaque)
+ {
+ int ret;
+ int64_t maxlen;
++ AioContext *aio_context;
++
+ MigrationParams params = {
+ .blk = 0,
+ .shared = 0
+@@ -193,57 +222,64 @@ static void process_savevm_co(void *opaque)
+
+ snap_state.state = SAVE_STATE_ACTIVE;
+
+- qemu_mutex_unlock_iothread();
++ rcu_register_thread();
++
+ qemu_savevm_state_header(snap_state.file);
+ ret = qemu_savevm_state_begin(snap_state.file, ¶ms);
+- qemu_mutex_lock_iothread();
+
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_begin failed");
+- return;
++ rcu_unregister_thread();
++ return NULL;
+ }
+
++ aio_context = bdrv_get_aio_context(blk_bs(snap_state.target));
++ aio_context_acquire(aio_context);
++
+ while (snap_state.state == SAVE_STATE_ACTIVE) {
+ uint64_t pending_size, pend_post, pend_nonpost;
+
+ qemu_savevm_state_pending(snap_state.file, 0, &pend_nonpost, &pend_post);
+ pending_size = pend_post + pend_nonpost;
+
+- if (pending_size) {
+- ret = qemu_savevm_state_iterate(snap_state.file, false);
+- if (ret < 0) {
+- save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+- break;
+- }
+- DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
++ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
++
++ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
++ ret = qemu_savevm_state_iterate(snap_state.file, false);
++ if (ret < 0) {
++ aio_context_release(aio_context);
++ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
++ break;
++ }
++ DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
+ } else {
+- DPRINTF("done iterating\n");
+- if (store_and_stop())
++ aio_context_release(aio_context);
++ qemu_mutex_lock_iothread();
++ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
++ ret = global_state_store();
++ if (ret) {
++ qemu_mutex_unlock_iothread();
++ save_snapshot_error("global_state_store error %d", ret);
++ break;
++ }
++ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
++ if (ret < 0) {
++ qemu_mutex_unlock_iothread();
++ save_snapshot_error("vm_stop_force_state error %d", ret);
+ break;
++ }
+ DPRINTF("savevm inerate finished\n");
+ qemu_savevm_state_complete_precopy(snap_state.file, false);
++ qemu_savevm_state_cleanup();
+ DPRINTF("save complete\n");
+- save_snapshot_completed();
++ qemu_mutex_unlock_iothread();
+ break;
+ }
+-
+- /* stop the VM if we get to the end of available space,
+- * or if pending_size is just a few MB
+- */
+- maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+- if ((pending_size < 100000) ||
+- ((snap_state.bs_pos + pending_size) >= maxlen)) {
+- if (store_and_stop())
+- break;
+- }
+- }
+-
+- if(snap_state.state == SAVE_STATE_CANCELLED) {
+- save_snapshot_completed();
+- Error *errp = NULL;
+- qmp_savevm_end(&errp);
+ }
+
++ qemu_bh_schedule(snap_state.cleanup_bh);
++ rcu_unregister_thread();
++ return NULL;
+ }
+
+ static const QEMUFileOps block_file_ops = {
+@@ -306,8 +342,9 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+ error_setg(&snap_state.blocker, "block device is in use by savevm");
+ blk_op_block_all(snap_state.target, snap_state.blocker);
+
+- Coroutine *co = qemu_coroutine_create(process_savevm_co, NULL);
+- qemu_coroutine_enter(co);
++ snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
++ qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
++ NULL, QEMU_THREAD_JOINABLE);
+
+ return;
+
+--
+2.1.4
+
diff --git a/debian/patches/series b/debian/patches/series
index 7bd93f4..4ae72b0 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -43,6 +43,7 @@ pve/0042-qmp_snapshot_drive-add-aiocontext.patch
pve/0043-vma-sizes-passed-to-blk_co_preadv-should-be-bytes-no.patch
pve/0044-glusterfs-daemonize.patch
pve/0045-qmp_delete_drive_snapshot-add-aiocontext.patch
+pve/0046-convert-savevm-async-to-threads.patch
#see https://bugs.launchpad.net/qemu/+bug/1488363?comments=all
extra/x86-lapic-Load-LAPIC-state-at-post_load.patch
extra/0001-Revert-target-i386-disable-LINT0-after-reset.patch
--
2.1.4
_______________________________________________
pve-devel mailing list
pve-devel at pve.proxmox.com
http://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
More information about the pve-devel
mailing list