[pve-devel] [PATCH kvm] Fix #796: convert savevm-async to threads

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Nov 8 12:03:08 CET 2016


This should also allow snapshots with RAM to run with
iothreads enabled.
---
Alexandre: could you maybe try this, too? virtio-scsi-single + iothreads
enabled caused snapshots+RAM to never finish / hang.

I originally started this for #796, which is hard to reproduce & verify though.
Rebased and updated it for our current code.

 .../pve/0046-convert-savevm-async-to-threads.patch | 240 +++++++++++++++++++++
 debian/patches/series                              |   1 +
 2 files changed, 241 insertions(+)
 create mode 100644 debian/patches/pve/0046-convert-savevm-async-to-threads.patch

diff --git a/debian/patches/pve/0046-convert-savevm-async-to-threads.patch b/debian/patches/pve/0046-convert-savevm-async-to-threads.patch
new file mode 100644
index 0000000..6a9d9f4
--- /dev/null
+++ b/debian/patches/pve/0046-convert-savevm-async-to-threads.patch
@@ -0,0 +1,240 @@
+From f18d2aee91bca252a6b90583784f49236ec17d58 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller at proxmox.com>
+Date: Tue, 8 Nov 2016 11:13:06 +0100
+Subject: [PATCH 46/46] convert savevm-async to threads
+
+---
+ savevm-async.c | 149 +++++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 93 insertions(+), 56 deletions(-)
+
+diff --git a/savevm-async.c b/savevm-async.c
+index 05b5b19..e293a00 100644
+--- a/savevm-async.c
++++ b/savevm-async.c
+@@ -48,6 +48,8 @@ static struct SnapshotState {
+     int saved_vm_running;
+     QEMUFile *file;
+     int64_t total_time;
++    QEMUBH *cleanup_bh;
++    QemuThread thread;
+ } snap_state;
+ 
+ SaveVMInfo *qmp_query_savevm(Error **errp)
+@@ -135,19 +137,6 @@ static void save_snapshot_error(const char *fmt, ...)
+     g_free (msg);
+ 
+     snap_state.state = SAVE_STATE_ERROR;
+-
+-    save_snapshot_cleanup();
+-}
+-
+-static void save_snapshot_completed(void)
+-{
+-    DPRINTF("save_snapshot_completed\n");
+-
+-    if (save_snapshot_cleanup() < 0) {
+-        snap_state.state = SAVE_STATE_ERROR;
+-    } else {
+-        snap_state.state = SAVE_STATE_COMPLETED;
+-    }
+ }
+ 
+ static int block_state_close(void *opaque)
+@@ -156,36 +145,76 @@ static int block_state_close(void *opaque)
+     return blk_flush(snap_state.target);
+ }
+ 
++typedef struct BlkRwCo {
++    int64_t offset;
++    QEMUIOVector *qiov;
++    int ret;
++} BlkRwCo;
++
++static void block_state_write_entry(void *opaque) {
++    BlkRwCo *rwco = opaque;
++    rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
++                               rwco->qiov, 0);
++}
++
+ static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
+                                          int iovcnt, int64_t pos)
+ {
+-    int ret;
+     QEMUIOVector qiov;
++    AioContext *aio_context;
++    Coroutine *co;
++    BlkRwCo rwco;
++
++    assert(pos == snap_state.bs_pos);
++    rwco = (BlkRwCo) {
++        .offset = pos,
++        .qiov = &qiov,
++        .ret = NOT_DONE,
++    };
+ 
+     qemu_iovec_init_external(&qiov, iov, iovcnt);
+-    ret = blk_co_pwritev(snap_state.target, pos, qiov.size, &qiov, 0);
+-    if (ret < 0) {
+-        return ret;
++
++    co = qemu_coroutine_create(&block_state_write_entry, &rwco);
++    qemu_coroutine_enter(co);
++
++    aio_context = blk_get_aio_context(snap_state.target);
++    while (rwco.ret == NOT_DONE) {
++        aio_poll(aio_context, true);
+     }
++
+     snap_state.bs_pos += qiov.size;
+     return qiov.size;
+ }
+ 
+-static int store_and_stop(void) {
+-    if (global_state_store()) {
+-        save_snapshot_error("Error saving global state");
+-        return 1;
++static void process_savevm_cleanup(void *opaque)
++{
++    int ret;
++    qemu_bh_delete(snap_state.cleanup_bh);
++    snap_state.cleanup_bh = NULL;
++    qemu_mutex_unlock_iothread();
++    qemu_thread_join(&snap_state.thread);
++    qemu_mutex_lock_iothread();
++    ret = save_snapshot_cleanup();
++    if (ret < 0) {
++        save_snapshot_error("save_snapshot_cleanup error %d", ret);
++    } else if (snap_state.state == SAVE_STATE_ACTIVE) {
++        snap_state.state = SAVE_STATE_COMPLETED;
++    } else {
++        save_snapshot_error("process_savevm_cleanup: invalid state: %d",
++                            snap_state.state);
+     }
+-    if (runstate_is_running()) {
+-        vm_stop(RUN_STATE_SAVE_VM);
++    if (snap_state.saved_vm_running) {
++        vm_start();
++        snap_state.saved_vm_running = false;
+     }
+-    return 0;
+ }
+ 
+-static void process_savevm_co(void *opaque)
++static void *process_savevm_thread(void *opaque)
+ {
+     int ret;
+     int64_t maxlen;
++    AioContext *aio_context;
++
+     MigrationParams params = {
+         .blk = 0,
+         .shared = 0
+@@ -193,57 +222,64 @@ static void process_savevm_co(void *opaque)
+ 
+     snap_state.state = SAVE_STATE_ACTIVE;
+ 
+-    qemu_mutex_unlock_iothread();
++    rcu_register_thread();
++
+     qemu_savevm_state_header(snap_state.file);
+     ret = qemu_savevm_state_begin(snap_state.file, &params);
+-    qemu_mutex_lock_iothread();
+ 
+     if (ret < 0) {
+         save_snapshot_error("qemu_savevm_state_begin failed");
+-        return;
++        rcu_unregister_thread();
++        return NULL;
+     }
+ 
++    aio_context = bdrv_get_aio_context(blk_bs(snap_state.target));
++    aio_context_acquire(aio_context);
++
+     while (snap_state.state == SAVE_STATE_ACTIVE) {
+         uint64_t pending_size, pend_post, pend_nonpost;
+ 
+         qemu_savevm_state_pending(snap_state.file, 0, &pend_nonpost, &pend_post);
+         pending_size = pend_post + pend_nonpost;
+ 
+-        if (pending_size) {
+-                ret = qemu_savevm_state_iterate(snap_state.file, false);
+-                if (ret < 0) {
+-                    save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+-                    break;
+-                }
+-                DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
++        maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
++
++        if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
++            ret = qemu_savevm_state_iterate(snap_state.file, false);
++            if (ret < 0) {
++                aio_context_release(aio_context);
++                save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
++                break;
++            }
++            DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
+         } else {
+-            DPRINTF("done iterating\n");
+-            if (store_and_stop())
++            aio_context_release(aio_context);
++            qemu_mutex_lock_iothread();
++            qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
++            ret = global_state_store();
++            if (ret) {
++                qemu_mutex_unlock_iothread();
++                save_snapshot_error("global_state_store error %d", ret);
++                break;
++            }
++            ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
++            if (ret < 0) {
++                qemu_mutex_unlock_iothread();
++                save_snapshot_error("vm_stop_force_state error %d", ret);
+                 break;
++            }
+             DPRINTF("savevm inerate finished\n");
+             qemu_savevm_state_complete_precopy(snap_state.file, false);
++            qemu_savevm_state_cleanup();
+             DPRINTF("save complete\n");
+-            save_snapshot_completed();
++            qemu_mutex_unlock_iothread();
+             break;
+         }
+-
+-        /* stop the VM if we get to the end of available space,
+-         * or if pending_size is just a few MB
+-         */
+-        maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+-        if ((pending_size < 100000) ||
+-            ((snap_state.bs_pos + pending_size) >= maxlen)) {
+-            if (store_and_stop())
+-                break;
+-        }
+-    }
+-
+-    if(snap_state.state == SAVE_STATE_CANCELLED) {
+-        save_snapshot_completed();
+-        Error *errp = NULL;
+-        qmp_savevm_end(&errp);
+     }
+ 
++    qemu_bh_schedule(snap_state.cleanup_bh);
++    rcu_unregister_thread();
++    return NULL;
+ }
+ 
+ static const QEMUFileOps block_file_ops = {
+@@ -306,8 +342,9 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+     error_setg(&snap_state.blocker, "block device is in use by savevm");
+     blk_op_block_all(snap_state.target, snap_state.blocker);
+ 
+-    Coroutine *co = qemu_coroutine_create(process_savevm_co, NULL);
+-    qemu_coroutine_enter(co);
++    snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
++    qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
++                       NULL, QEMU_THREAD_JOINABLE);
+ 
+     return;
+ 
+-- 
+2.1.4
+
diff --git a/debian/patches/series b/debian/patches/series
index 7bd93f4..4ae72b0 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -43,6 +43,7 @@ pve/0042-qmp_snapshot_drive-add-aiocontext.patch
 pve/0043-vma-sizes-passed-to-blk_co_preadv-should-be-bytes-no.patch
 pve/0044-glusterfs-daemonize.patch
 pve/0045-qmp_delete_drive_snapshot-add-aiocontext.patch
+pve/0046-convert-savevm-async-to-threads.patch
 #see https://bugs.launchpad.net/qemu/+bug/1488363?comments=all
 extra/x86-lapic-Load-LAPIC-state-at-post_load.patch
 extra/0001-Revert-target-i386-disable-LINT0-after-reset.patch
-- 
2.1.4





More information about the pve-devel mailing list