[pve-devel] [PATCH] add mirror-fix-coroutine-reentrance.patch

Alexandre Derumier aderumier at odiso.com
Thu Aug 13 15:16:12 CEST 2015


This fix a regression introduced by qemu commit dcfb3beb ("mirror: Do zero
write on target if sectors not allocated").

This patch is not in final qemu 2.4 released.

Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
---
 .../patches/mirror-fix-coroutine-reentrance.patch  | 122 +++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 debian/patches/mirror-fix-coroutine-reentrance.patch

diff --git a/debian/patches/mirror-fix-coroutine-reentrance.patch b/debian/patches/mirror-fix-coroutine-reentrance.patch
new file mode 100644
index 0000000..5905571
--- /dev/null
+++ b/debian/patches/mirror-fix-coroutine-reentrance.patch
@@ -0,0 +1,122 @@
+From patchwork Thu Aug 13 08:41:50 2015
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: mirror: Fix coroutine reentrance
+From: Kevin Wolf <kwolf at redhat.com>
+X-Patchwork-Id: 506888
+Message-Id: <1439455310-11263-1-git-send-email-kwolf at redhat.com>
+To: qemu-block at nongnu.org
+Cc: kwolf at redhat.com, famz at redhat.com, jcody at redhat.com,
+ qemu-stable at nongnu.org, 
+ qemu-devel at nongnu.org, stefanha at redhat.com, pbonzini at redhat.com
+Date: Thu, 13 Aug 2015 10:41:50 +0200
+
+This fixes a regression introduced by commit dcfb3beb ("mirror: Do zero
+write on target if sectors not allocated"), which was reported to cause
+aborts with the message "Co-routine re-entered recursively".
+
+The cause for this bug is the following code in mirror_iteration_done():
+
+    if (s->common.busy) {
+        qemu_coroutine_enter(s->common.co, NULL);
+    }
+
+This has always been ugly because - unlike most places that reenter - it
+doesn't have a specific yield that it pairs with, but is more
+uncontrolled.  What we really mean here is "reenter the coroutine if
+it's in one of the four explicit yields in mirror.c".
+
+This used to be equivalent with s->common.busy because neither
+mirror_run() nor mirror_iteration() call any function that could yield.
+However since commit dcfb3beb this doesn't hold true any more:
+bdrv_get_block_status_above() can yield.
+
+So what happens is that bdrv_get_block_status_above() wants to take a
+lock that is already held, so it adds itself to the queue of waiting
+coroutines and yields. Instead of being woken up by the unlock function,
+however, it gets woken up by mirror_iteration_done(), which is obviously
+wrong.
+
+In most cases the code actually happens to cope fairly well with such
+cases, but in this specific case, the unlock must already have scheduled
+the coroutine for wakeup when mirror_iteration_done() reentered it. And
+then the coroutine happened to process the scheduled restarts and tried
+to reenter itself recursively.
+
+This patch fixes the problem by pairing the reenter in
+mirror_iteration_done() with specific yields instead of abusing
+s->common.busy.
+
+Cc: qemu-stable at nongnu.org
+Signed-off-by: Kevin Wolf <kwolf at redhat.com>
+Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ block/mirror.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/block/mirror.c b/block/mirror.c
+index fc4d8f5..b2fb4b9 100644
+--- a/block/mirror.c
++++ b/block/mirror.c
+@@ -60,6 +60,7 @@ typedef struct MirrorBlockJob {
+     int sectors_in_flight;
+     int ret;
+     bool unmap;
++    bool waiting_for_io;
+ } MirrorBlockJob;
+ 
+ typedef struct MirrorOp {
+@@ -114,11 +115,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
+     qemu_iovec_destroy(&op->qiov);
+     g_slice_free(MirrorOp, op);
+ 
+-    /* Enter coroutine when it is not sleeping.  The coroutine sleeps to
+-     * rate-limit itself.  The coroutine will eventually resume since there is
+-     * a sleep timeout so don't wake it early.
+-     */
+-    if (s->common.busy) {
++    if (s->waiting_for_io) {
+         qemu_coroutine_enter(s->common.co, NULL);
+     }
+ }
+@@ -203,7 +200,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+     /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
+     while (test_bit(next_chunk, s->in_flight_bitmap)) {
+         trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
++        s->waiting_for_io = true;
+         qemu_coroutine_yield();
++        s->waiting_for_io = false;
+     }
+ 
+     do {
+@@ -239,7 +238,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+          */
+         while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
+             trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
++            s->waiting_for_io = true;
+             qemu_coroutine_yield();
++            s->waiting_for_io = false;
+         }
+         if (s->buf_free_count < nb_chunks + added_chunks) {
+             trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
+@@ -333,7 +334,9 @@ static void mirror_free_init(MirrorBlockJob *s)
+ static void mirror_drain(MirrorBlockJob *s)
+ {
+     while (s->in_flight > 0) {
++        s->waiting_for_io = true;
+         qemu_coroutine_yield();
++        s->waiting_for_io = false;
+     }
+ }
+ 
+@@ -506,7 +509,9 @@ static void coroutine_fn mirror_run(void *opaque)
+             if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+                 (cnt == 0 && s->in_flight > 0)) {
+                 trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
++                s->waiting_for_io = true;
+                 qemu_coroutine_yield();
++                s->waiting_for_io = false;
+                 continue;
+             } else if (cnt != 0) {
+                 delay_ns = mirror_iteration(s);
-- 
2.1.4




More information about the pve-devel mailing list