[pve-devel] [PATCH kvm] Fix #796: convert savevm-async to threads

Alexandre DERUMIER aderumier at odiso.com
Wed Nov 9 08:11:12 CET 2016



Alexandre Derumier 
Ingénieur système et stockage 

Manager Infrastructure 


Fixe : +33 3 59 82 20 10 



125 Avenue de la république 
59110 La Madeleine 
[ https://twitter.com/OdisoHosting ] [ https://twitter.com/mindbaz ] [ https://www.linkedin.com/company/odiso ] [ https://www.viadeo.com/fr/company/odiso ] [ https://www.facebook.com/monsiteestlent ] 

[ https://www.monsiteestlent.com/ | MonSiteEstLent.com ] - Blog dédié à la webperformance et la gestion de pics de trafic

----- Mail original -----
De: "Wolfgang Bumiller" <w.bumiller at proxmox.com>
À: "pve-devel" <pve-devel at pve.proxmox.com>
Cc: "Alexandre Derumier" <aderumier at odiso.com>
Envoyé: Mardi 8 Novembre 2016 12:03:08
Objet: [PATCH kvm] Fix #796: convert savevm-async to threads

This should also allow snapshots with RAM to run with 
iothreads enabled. 
--- 
Alexandre: could you maybe try this, too? virtio-scsi-single + iothreads 
enabled caused snapshots+RAM to never finish / hang. 

I originally started this for #796, which is hard to reproduce & verify though. 
Rebased and updated it for our current code. 

.../pve/0046-convert-savevm-async-to-threads.patch | 240 +++++++++++++++++++++ 
debian/patches/series | 1 + 
2 files changed, 241 insertions(+) 
create mode 100644 debian/patches/pve/0046-convert-savevm-async-to-threads.patch 

diff --git a/debian/patches/pve/0046-convert-savevm-async-to-threads.patch b/debian/patches/pve/0046-convert-savevm-async-to-threads.patch 
new file mode 100644 
index 0000000..6a9d9f4 
--- /dev/null 
+++ b/debian/patches/pve/0046-convert-savevm-async-to-threads.patch 
@@ -0,0 +1,240 @@ 
+From f18d2aee91bca252a6b90583784f49236ec17d58 Mon Sep 17 00:00:00 2001 
+From: Wolfgang Bumiller <w.bumiller at proxmox.com> 
+Date: Tue, 8 Nov 2016 11:13:06 +0100 
+Subject: [PATCH 46/46] convert savevm-async to threads 
+ 
+--- 
+ savevm-async.c | 149 +++++++++++++++++++++++++++++++++++---------------------- 
+ 1 file changed, 93 insertions(+), 56 deletions(-) 
+ 
+diff --git a/savevm-async.c b/savevm-async.c 
+index 05b5b19..e293a00 100644 
+--- a/savevm-async.c 
++++ b/savevm-async.c 
+@@ -48,6 +48,8 @@ static struct SnapshotState { 
+ int saved_vm_running; 
+ QEMUFile *file; 
+ int64_t total_time; 
++ QEMUBH *cleanup_bh; 
++ QemuThread thread; 
+ } snap_state; 
+ 
+ SaveVMInfo *qmp_query_savevm(Error **errp) 
+@@ -135,19 +137,6 @@ static void save_snapshot_error(const char *fmt, ...) 
+ g_free (msg); 
+ 
+ snap_state.state = SAVE_STATE_ERROR; 
+- 
+- save_snapshot_cleanup(); 
+-} 
+- 
+-static void save_snapshot_completed(void) 
+-{ 
+- DPRINTF("save_snapshot_completed\n"); 
+- 
+- if (save_snapshot_cleanup() < 0) { 
+- snap_state.state = SAVE_STATE_ERROR; 
+- } else { 
+- snap_state.state = SAVE_STATE_COMPLETED; 
+- } 
+ } 
+ 
+ static int block_state_close(void *opaque) 
+@@ -156,36 +145,76 @@ static int block_state_close(void *opaque) 
+ return blk_flush(snap_state.target); 
+ } 
+ 
++typedef struct BlkRwCo { 
++ int64_t offset; 
++ QEMUIOVector *qiov; 
++ int ret; 
++} BlkRwCo; 
++ 
++static void block_state_write_entry(void *opaque) { 
++ BlkRwCo *rwco = opaque; 
++ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size, 
++ rwco->qiov, 0); 
++} 
++ 
+ static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov, 
+ int iovcnt, int64_t pos) 
+ { 
+- int ret; 
+ QEMUIOVector qiov; 
++ AioContext *aio_context; 
++ Coroutine *co; 
++ BlkRwCo rwco; 
++ 
++ assert(pos == snap_state.bs_pos); 
++ rwco = (BlkRwCo) { 
++ .offset = pos, 
++ .qiov = &qiov, 
++ .ret = NOT_DONE, 
++ }; 
+ 
+ qemu_iovec_init_external(&qiov, iov, iovcnt); 
+- ret = blk_co_pwritev(snap_state.target, pos, qiov.size, &qiov, 0); 
+- if (ret < 0) { 
+- return ret; 
++ 
++ co = qemu_coroutine_create(&block_state_write_entry, &rwco); 
++ qemu_coroutine_enter(co); 
++ 
++ aio_context = blk_get_aio_context(snap_state.target); 
++ while (rwco.ret == NOT_DONE) { 
++ aio_poll(aio_context, true); 
+ } 
++ 
+ snap_state.bs_pos += qiov.size; 
+ return qiov.size; 
+ } 
+ 
+-static int store_and_stop(void) { 
+- if (global_state_store()) { 
+- save_snapshot_error("Error saving global state"); 
+- return 1; 
++static void process_savevm_cleanup(void *opaque) 
++{ 
++ int ret; 
++ qemu_bh_delete(snap_state.cleanup_bh); 
++ snap_state.cleanup_bh = NULL; 
++ qemu_mutex_unlock_iothread(); 
++ qemu_thread_join(&snap_state.thread); 
++ qemu_mutex_lock_iothread(); 
++ ret = save_snapshot_cleanup(); 
++ if (ret < 0) { 
++ save_snapshot_error("save_snapshot_cleanup error %d", ret); 
++ } else if (snap_state.state == SAVE_STATE_ACTIVE) { 
++ snap_state.state = SAVE_STATE_COMPLETED; 
++ } else { 
++ save_snapshot_error("process_savevm_cleanup: invalid state: %d", 
++ snap_state.state); 
+ } 
+- if (runstate_is_running()) { 
+- vm_stop(RUN_STATE_SAVE_VM); 
++ if (snap_state.saved_vm_running) { 
++ vm_start(); 
++ snap_state.saved_vm_running = false; 
+ } 
+- return 0; 
+ } 
+ 
+-static void process_savevm_co(void *opaque) 
++static void *process_savevm_thread(void *opaque) 
+ { 
+ int ret; 
+ int64_t maxlen; 
++ AioContext *aio_context; 
++ 
+ MigrationParams params = { 
+ .blk = 0, 
+ .shared = 0 
+@@ -193,57 +222,64 @@ static void process_savevm_co(void *opaque) 
+ 
+ snap_state.state = SAVE_STATE_ACTIVE; 
+ 
+- qemu_mutex_unlock_iothread(); 
++ rcu_register_thread(); 
++ 
+ qemu_savevm_state_header(snap_state.file); 
+ ret = qemu_savevm_state_begin(snap_state.file, &params); 
+- qemu_mutex_lock_iothread(); 
+ 
+ if (ret < 0) { 
+ save_snapshot_error("qemu_savevm_state_begin failed"); 
+- return; 
++ rcu_unregister_thread(); 
++ return NULL; 
+ } 
+ 
++ aio_context = bdrv_get_aio_context(blk_bs(snap_state.target)); 
++ aio_context_acquire(aio_context); 
++ 
+ while (snap_state.state == SAVE_STATE_ACTIVE) { 
+ uint64_t pending_size, pend_post, pend_nonpost; 
+ 
+ qemu_savevm_state_pending(snap_state.file, 0, &pend_nonpost, &pend_post); 
+ pending_size = pend_post + pend_nonpost; 
+ 
+- if (pending_size) { 
+- ret = qemu_savevm_state_iterate(snap_state.file, false); 
+- if (ret < 0) { 
+- save_snapshot_error("qemu_savevm_state_iterate error %d", ret); 
+- break; 
+- } 
+- DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret); 
++ maxlen = blk_getlength(snap_state.target) - 30*1024*1024; 
++ 
++ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) { 
++ ret = qemu_savevm_state_iterate(snap_state.file, false); 
++ if (ret < 0) { 
++ aio_context_release(aio_context); 
++ save_snapshot_error("qemu_savevm_state_iterate error %d", ret); 
++ break; 
++ } 
++ DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret); 
+ } else { 
+- DPRINTF("done iterating\n"); 
+- if (store_and_stop()) 
++ aio_context_release(aio_context); 
++ qemu_mutex_lock_iothread(); 
++ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); 
++ ret = global_state_store(); 
++ if (ret) { 
++ qemu_mutex_unlock_iothread(); 
++ save_snapshot_error("global_state_store error %d", ret); 
++ break; 
++ } 
++ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 
++ if (ret < 0) { 
++ qemu_mutex_unlock_iothread(); 
++ save_snapshot_error("vm_stop_force_state error %d", ret); 
+ break; 
++ } 
+ DPRINTF("savevm inerate finished\n"); 
+ qemu_savevm_state_complete_precopy(snap_state.file, false); 
++ qemu_savevm_state_cleanup(); 
+ DPRINTF("save complete\n"); 
+- save_snapshot_completed(); 
++ qemu_mutex_unlock_iothread(); 
+ break; 
+ } 
+- 
+- /* stop the VM if we get to the end of available space, 
+- * or if pending_size is just a few MB 
+- */ 
+- maxlen = blk_getlength(snap_state.target) - 30*1024*1024; 
+- if ((pending_size < 100000) || 
+- ((snap_state.bs_pos + pending_size) >= maxlen)) { 
+- if (store_and_stop()) 
+- break; 
+- } 
+- } 
+- 
+- if(snap_state.state == SAVE_STATE_CANCELLED) { 
+- save_snapshot_completed(); 
+- Error *errp = NULL; 
+- qmp_savevm_end(&errp); 
+ } 
+ 
++ qemu_bh_schedule(snap_state.cleanup_bh); 
++ rcu_unregister_thread(); 
++ return NULL; 
+ } 
+ 
+ static const QEMUFileOps block_file_ops = { 
+@@ -306,8 +342,9 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp) 
+ error_setg(&snap_state.blocker, "block device is in use by savevm"); 
+ blk_op_block_all(snap_state.target, snap_state.blocker); 
+ 
+- Coroutine *co = qemu_coroutine_create(process_savevm_co, NULL); 
+- qemu_coroutine_enter(co); 
++ snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state); 
++ qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread, 
++ NULL, QEMU_THREAD_JOINABLE); 
+ 
+ return; 
+ 
+-- 
+2.1.4 
+ 
diff --git a/debian/patches/series b/debian/patches/series 
index 7bd93f4..4ae72b0 100644 
--- a/debian/patches/series 
+++ b/debian/patches/series 
@@ -43,6 +43,7 @@ pve/0042-qmp_snapshot_drive-add-aiocontext.patch 
pve/0043-vma-sizes-passed-to-blk_co_preadv-should-be-bytes-no.patch 
pve/0044-glusterfs-daemonize.patch 
pve/0045-qmp_delete_drive_snapshot-add-aiocontext.patch 
+pve/0046-convert-savevm-async-to-threads.patch 
#see https://bugs.launchpad.net/qemu/+bug/1488363?comments=all 
extra/x86-lapic-Load-LAPIC-state-at-post_load.patch 
extra/0001-Revert-target-i386-disable-LINT0-after-reset.patch 
-- 
2.1.4 




More information about the pve-devel mailing list