[pve-devel] [PATCH qemu 2/2] PVE: Don't call job_cancel in coroutines

Stefan Reiter s.reiter at proxmox.com
Thu Oct 22 14:11:18 CEST 2020


...because it hangs on cancelling other jobs in the txn if you do.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
 pve-backup.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/pve-backup.c b/pve-backup.c
index 9179754dcb..af2db0d4b9 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -82,6 +82,12 @@ typedef struct PVEBackupDevInfo {
     BlockJob *job;
 } PVEBackupDevInfo;
 
+typedef struct JobCancelData {
+    AioContext *ctx;
+    Coroutine *co;
+    Job *job;
+} JobCancelData;
+
 static void pvebackup_propagate_error(Error *err)
 {
     qemu_mutex_lock(&backup_state.stat.lock);
@@ -332,6 +338,18 @@ static void pvebackup_complete_cb(void *opaque, int ret)
     aio_co_enter(qemu_get_aio_context(), co);
 }
 
+/*
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
+ * main loop processing via a bottom half.
+ */
+static void job_cancel_bh(void *opaque) {
+    JobCancelData *data = (JobCancelData*)opaque;
+    aio_context_acquire(data->job->aio_context);
+    job_cancel_sync(data->job);
+    aio_context_release(data->job->aio_context);
+    aio_co_schedule(data->ctx, data->co);
+}
+
 static void coroutine_fn pvebackup_co_cancel(void *opaque)
 {
     Error *cancel_err = NULL;
@@ -357,7 +375,13 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
         NULL;
 
     if (cancel_job) {
-        job_cancel(&cancel_job->job, false);
+        JobCancelData data = {
+            .ctx = qemu_get_current_aio_context(),
+            .co = qemu_coroutine_self(),
+            .job = &cancel_job->job,
+        };
+        aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
+        qemu_coroutine_yield();
     }
 
     qemu_co_mutex_unlock(&backup_state.backup_mutex);
-- 
2.20.1






More information about the pve-devel mailing list