[pve-devel] [PATCH qemu 5/7] add patch to disable graph locking

Fiona Ebner f.ebner at proxmox.com
Thu Sep 28 14:59:24 CEST 2023


There are still some issues with graph locking, e.g. deadlocks during
backup canceling [0] and initial attempts to fix it didn't work [1].
Because the AioContext locks still exist, it should still be safe to
disable graph locking.

[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
[1]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg06905.html

Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---
 ...t-graph-lock-Disable-locking-for-now.patch | 152 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 153 insertions(+)
 create mode 100644 debian/patches/extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch

diff --git a/debian/patches/extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch b/debian/patches/extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch
new file mode 100644
index 0000000..0a5baec
--- /dev/null
+++ b/debian/patches/extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch
@@ -0,0 +1,152 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner at proxmox.com>
+Date: Thu, 28 Sep 2023 10:07:03 +0200
+Subject: [PATCH] Revert "Revert "graph-lock: Disable locking for now""
+
+This reverts commit 3cce22defb4b0e47cf135444e30cc673cff5ebad.
+
+There are still some issues with graph locking, e.g. deadlocks during
+backup canceling [0]. Because the AioContext locks still exist, it
+should be safe to disable locking again.
+
+From the original 80fc5d2600 ("graph-lock: Disable locking for now"):
+
+> We don't currently rely on graph locking yet. It is supposed to replace
+> the AioContext lock eventually to enable multiqueue support, but as long
+> as we still have the AioContext lock, it is sufficient without the graph
+> lock. Once the AioContext lock goes away, the deadlock doesn't exist any
+> more either and this commit can be reverted. (Of course, it can also be
+> reverted while the AioContext lock still exists if the callers have been
+> fixed.)
+
+[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
+
+Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
+---
+ block/graph-lock.c | 26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+diff --git a/block/graph-lock.c b/block/graph-lock.c
+index 5e66f01ae8..3bf2591dc4 100644
+--- a/block/graph-lock.c
++++ b/block/graph-lock.c
+@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
+ /* Protects the list of aiocontext and orphaned_reader_count */
+ static QemuMutex aio_context_list_lock;
+ 
++#if 0
+ /* Written and read with atomic operations. */
+ static int has_writer;
++#endif
+ 
+ /*
+  * A reader coroutine could move from an AioContext to another.
+@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
+     g_free(ctx->bdrv_graph);
+ }
+ 
++#if 0
+ static uint32_t reader_count(void)
+ {
+     BdrvGraphRWlock *brdv_graph;
+@@ -105,13 +108,21 @@ static uint32_t reader_count(void)
+     assert((int32_t)rd >= 0);
+     return rd;
+ }
++#endif
+ 
+ void bdrv_graph_wrlock(BlockDriverState *bs)
+ {
+     AioContext *ctx = NULL;
+ 
+     GLOBAL_STATE_CODE();
++    /*
++     * TODO Some callers hold an AioContext lock when this is called, which
++     * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
++     * AioContext locks are gone).
++     */
++#if 0
+     assert(!qatomic_read(&has_writer));
++#endif
+ 
+     /*
+      * Release only non-mainloop AioContext. The mainloop often relies on the
+@@ -126,6 +137,7 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
+         }
+     }
+ 
++#if 0
+     /* Make sure that constantly arriving new I/O doesn't cause starvation */
+     bdrv_drain_all_begin_nopoll();
+ 
+@@ -154,6 +166,7 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
+     } while (reader_count() >= 1);
+ 
+     bdrv_drain_all_end();
++#endif
+ 
+     if (ctx) {
+         aio_context_acquire(bdrv_get_aio_context(bs));
+@@ -163,6 +176,7 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
+ void bdrv_graph_wrunlock(void)
+ {
+     GLOBAL_STATE_CODE();
++#if 0
+     QEMU_LOCK_GUARD(&aio_context_list_lock);
+     assert(qatomic_read(&has_writer));
+ 
+@@ -174,10 +188,13 @@ void bdrv_graph_wrunlock(void)
+ 
+     /* Wake up all coroutine that are waiting to read the graph */
+     qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
++#endif
+ }
+ 
+ void coroutine_fn bdrv_graph_co_rdlock(void)
+ {
++    /* TODO Reenable when wrlock is reenabled */
++#if 0
+     BdrvGraphRWlock *bdrv_graph;
+     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
+ 
+@@ -237,10 +254,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
+             qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
+         }
+     }
++#endif
+ }
+ 
+ void coroutine_fn bdrv_graph_co_rdunlock(void)
+ {
++#if 0
+     BdrvGraphRWlock *bdrv_graph;
+     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
+ 
+@@ -258,6 +277,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
+     if (qatomic_read(&has_writer)) {
+         aio_wait_kick();
+     }
++#endif
+ }
+ 
+ void bdrv_graph_rdlock_main_loop(void)
+@@ -275,13 +295,19 @@ void bdrv_graph_rdunlock_main_loop(void)
+ void assert_bdrv_graph_readable(void)
+ {
+     /* reader_count() is slow due to aio_context_list_lock lock contention */
++    /* TODO Reenable when wrlock is reenabled */
++#if 0
+ #ifdef CONFIG_DEBUG_GRAPH_LOCK
+     assert(qemu_in_main_thread() || reader_count());
+ #endif
++#endif
+ }
+ 
+ void assert_bdrv_graph_writable(void)
+ {
+     assert(qemu_in_main_thread());
++    /* TODO Reenable when wrlock is reenabled */
++#if 0
+     assert(qatomic_read(&has_writer));
++#endif
+ }
diff --git a/debian/patches/series b/debian/patches/series
index 01d4d3c..6d681da 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -3,6 +3,7 @@ extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
 extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
 extra/0004-migration-block-dirty-bitmap-fix-loading-bitmap-when.patch
 extra/0005-hw-ide-reset-cancel-async-DMA-operation-before-reset.patch
+extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch
 bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
 bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
 bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
-- 
2.39.2






More information about the pve-devel mailing list