[pve-devel] [PATCH qemu 1/2] experimentally move savevm-async back into a coroutine

Mon May 4 14:13:41 CEST 2020

> On May 4, 2020 12:43 PM Stefan Reiter <s.reiter at proxmox.com> wrote:
> 
>  
> Fixes the SIGSEGV issues on Ceph with snapshot and rollback for me, so:
> 
> Tested-by: Stefan Reiter <s.reiter at proxmox.com>
> 
> Just for reference, I also bisected the bug this fixes to upstream 
> commit 8c6b0356b53 ("util/async: make bh_aio_poll() O(1)"), i.e. it only 
> breaks after this commit. Might be an upstream bug too somewhere? But I 
> don't see an issue with doing this in a coroutine either.
> 
> See also inline.
> 
> On 5/4/20 12:02 PM, Wolfgang Bumiller wrote:
> > Move qemu_savevm_state_{header,setup} into the main loop and
> > the rest of the iteration into a coroutine. The former need
> > to lock the iothread (and we can't unlock it in the
> > coroutine), and the latter can't deal with being in a
> > separate thread, so a coroutine it must be.
> > 
> > Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
> > ---
> >   ...e-savevm-async-back-into-a-coroutine.patch | 111 ++++++++++++++++++
> >   debian/patches/series                         |   1 +
> >   2 files changed, 112 insertions(+)
> >   create mode 100644 debian/patches/pve/0043-move-savevm-async-back-into-a-coroutine.patch
> > 
> > diff --git a/debian/patches/pve/0043-move-savevm-async-back-into-a-coroutine.patch b/debian/patches/pve/0043-move-savevm-async-back-into-a-coroutine.patch
> > new file mode 100644
> > index 0000000..f4945db
> > --- /dev/null
> > +++ b/debian/patches/pve/0043-move-savevm-async-back-into-a-coroutine.patch
> > @@ -0,0 +1,111 @@
> > +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
> > +From: Wolfgang Bumiller <w.bumiller at proxmox.com>
> > +Date: Thu, 30 Apr 2020 15:55:37 +0200
> > +Subject: [PATCH] move savevm-async back into a coroutine
> > +
> > +Move qemu_savevm_state_{header,setup} into the main loop and
> > +the rest of the iteration into a coroutine. The former need
> > +to lock the iothread (and we can't unlock it in the
> > +coroutine), and the latter can't deal with being in a
> > +separate thread, so a coroutine it must be.
> > +
> > +Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
> > +---
> > + savevm-async.c | 28 +++++++++-------------------
> > + 1 file changed, 9 insertions(+), 19 deletions(-)
> > +
> > +diff --git a/savevm-async.c b/savevm-async.c
> > +index a38b15d652..af865b9a0a 100644
> > +--- a/savevm-async.c
> > ++++ b/savevm-async.c
> > +@@ -51,7 +51,7 @@ static struct SnapshotState {
> > +     QEMUFile *file;
> > +     int64_t total_time;
> > +     QEMUBH *cleanup_bh;
> > +-    QemuThread thread;
> > ++    Coroutine *co;
> > + } snap_state;
> > +
> > + SaveVMInfo *qmp_query_savevm(Error **errp)
> > +@@ -201,11 +201,9 @@ static void process_savevm_cleanup(void *opaque)
> > +     int ret;
> > +     qemu_bh_delete(snap_state.cleanup_bh);
> > +     snap_state.cleanup_bh = NULL;
> > ++    snap_state.co = NULL;
> > +     qemu_savevm_state_cleanup();
> > +
> > +-    qemu_mutex_unlock_iothread();
> > +-    qemu_thread_join(&snap_state.thread);
> > +-    qemu_mutex_lock_iothread();
> > +     ret = save_snapshot_cleanup();
> > +     if (ret < 0) {
> > +         save_snapshot_error("save_snapshot_cleanup error %d", ret);
> > +@@ -221,18 +219,13 @@ static void process_savevm_cleanup(void *opaque)
> > +     }
> > + }
> > +
> > +-static void *process_savevm_thread(void *opaque)
> > ++static void process_savevm_coro(void *opaque)
> > + {
> > +     int ret;
> > +     int64_t maxlen;
> > +     MigrationState *ms = migrate_get_current();
> > +
> > +-    rcu_register_thread();
> > +-
> > +-    qemu_savevm_state_header(snap_state.file);
> > +-    qemu_savevm_state_setup(snap_state.file);
> > +     ret = qemu_file_get_error(snap_state.file);
> > +-
> > +     if (ret < 0) {
> > +         save_snapshot_error("qemu_savevm_state_setup failed");
> > +         goto out;
> > +@@ -247,16 +240,13 @@ static void *process_savevm_thread(void *opaque)
> > +         maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
> > +
> > +         if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
> > +-            qemu_mutex_lock_iothread();
> > +             ret = qemu_savevm_state_iterate(snap_state.file, false);
> > +             if (ret < 0) {
> > +                 save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
> > +                 break;
> > +             }
> > +-            qemu_mutex_unlock_iothread();
> > +             DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
> > +         } else {
> > +-            qemu_mutex_lock_iothread();
> > +             qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
> > +             ret = global_state_store();
> > +             if (ret) {
> > +@@ -285,16 +275,12 @@ static void *process_savevm_thread(void *opaque)
> > +     }
> > +
> > +     qemu_bh_schedule(snap_state.cleanup_bh);
> > +-    qemu_mutex_unlock_iothread();
> > +
> > + out:
> > +     /* set migration state accordingly and clear soon-to-be stale file */
> > +     migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
> > +                       ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
> > +     ms->to_dst_file = NULL;
> > +-
> > +-    rcu_unregister_thread();
> > +-    return NULL;
> > + }
> > +
> > + void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
> > +@@ -373,8 +359,12 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
> > +
> > +     snap_state.state = SAVE_STATE_ACTIVE;
> > +     snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
> > +-    qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
> > +-                       NULL, QEMU_THREAD_JOINABLE);
> > ++    snap_state.co = qemu_coroutine_create(&process_savevm_coro, NULL);
> > ++    qemu_mutex_unlock_iothread();
> 
> Is it safe to unlock the BQL here? It feels okay, since we're in QMP 
> (main thread) anyway, I'd just like to understand the reasoning.

See qemu's savevm.c, these functions will generate a write on the file which will
use `aio_poll()` while waiting for the write coroutine to finish. Without this we'll
just deadlock.

> > ++    qemu_savevm_state_header(snap_state.file);
> > ++    qemu_savevm_state_setup(snap_state.file);
> > ++    qemu_mutex_lock_iothread();
> > ++    aio_co_schedule(iohandler_get_aio_context(), snap_state.co);
> > +
> > +     return;
> > +
> > diff --git a/debian/patches/series b/debian/patches/series
> > index 130ba53..34ca660 100644
> > --- a/debian/patches/series
> > +++ b/debian/patches/series
> > @@ -40,3 +40,4 @@ pve/0039-mirror-move-some-checks-to-qmp.patch
> >   pve/0040-PVE-savevm-async-set-up-migration-state.patch
> >   pve/0041-PVE-Backup-avoid-use-QemuRecMutex-inside-coroutines.patch
> >   pve/0042-PVE-Backup-use-QemuMutex-instead-of-QemuRecMutex.patch
> > +pve/0043-move-savevm-async-back-into-a-coroutine.patch
> >