[pve-devel] [PATCH qemu 2/2] pick fix for potential deadlock with QMP resize and iothread
Fiona Ebner
f.ebner at proxmox.com
Mon Dec 11 14:28:39 CET 2023
While the patch gives bdrv_graph_wrlock() as an example where the
issue can manifest, something similar can happen even when that is
disabled. Was able to reproduce the issue with
while true; do qm resize 115 scsi0 +4M; sleep 1; done
while running
fio --name=make-mirror-work --size=100M --direct=1 --rw=randwrite \
--bs=4k --ioengine=psync --numjobs=5 --runtime=1200 --time_based
in the VM.
Fix picked up from:
https://lists.nongnu.org/archive/html/qemu-devel/2023-12/msg01102.html
Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---
...d-support-for-sync-bitmap-mode-never.patch | 16 ++++-----
...check-for-bitmap-mode-without-bitmap.patch | 4 +--
.../0006-mirror-move-some-checks-to-qmp.patch | 4 +--
...oContext-locking-in-qmp_block_resize.patch | 36 +++++++++++++++++++
...ckup-Proxmox-backup-patches-for-QEMU.patch | 2 +-
debian/patches/series | 1 +
6 files changed, 50 insertions(+), 13 deletions(-)
create mode 100644 debian/patches/extra/0011-block-Fix-AioContext-locking-in-qmp_block_resize.patch
diff --git a/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch b/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
index c0cb23f..1f149e9 100644
--- a/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
+++ b/debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
@@ -254,10 +254,10 @@ index d3cacd1708..1ff42c8af1 100644
errp);
if (!job) {
diff --git a/blockdev.c b/blockdev.c
-index e6eba61484..a8b1fd2a73 100644
+index c28462a633..a402fa4bf7 100644
--- a/blockdev.c
+++ b/blockdev.c
-@@ -2848,6 +2848,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2849,6 +2849,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
BlockDriverState *target,
const char *replaces,
enum MirrorSyncMode sync,
@@ -267,7 +267,7 @@ index e6eba61484..a8b1fd2a73 100644
BlockMirrorBackingMode backing_mode,
bool zero_target,
bool has_speed, int64_t speed,
-@@ -2866,6 +2869,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2867,6 +2870,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
{
BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT;
@@ -275,7 +275,7 @@ index e6eba61484..a8b1fd2a73 100644
GLOBAL_STATE_CODE();
GRAPH_RDLOCK_GUARD_MAINLOOP();
-@@ -2920,6 +2924,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2921,6 +2925,29 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
@@ -305,7 +305,7 @@ index e6eba61484..a8b1fd2a73 100644
if (!replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs);
-@@ -2965,8 +2992,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2966,8 +2993,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion
*/
mirror_start(job_id, bs, target,
@@ -316,7 +316,7 @@ index e6eba61484..a8b1fd2a73 100644
on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp);
}
-@@ -3114,6 +3141,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
+@@ -3115,6 +3142,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
blockdev_mirror_common(arg->job_id, bs, target_bs,
arg->replaces, arg->sync,
@@ -325,7 +325,7 @@ index e6eba61484..a8b1fd2a73 100644
backing_mode, zero_target,
arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity,
-@@ -3135,6 +3164,8 @@ void qmp_blockdev_mirror(const char *job_id,
+@@ -3136,6 +3165,8 @@ void qmp_blockdev_mirror(const char *job_id,
const char *device, const char *target,
const char *replaces,
MirrorSyncMode sync,
@@ -334,7 +334,7 @@ index e6eba61484..a8b1fd2a73 100644
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
-@@ -3183,7 +3214,8 @@ void qmp_blockdev_mirror(const char *job_id,
+@@ -3184,7 +3215,8 @@ void qmp_blockdev_mirror(const char *job_id,
}
blockdev_mirror_common(job_id, bs, target_bs,
diff --git a/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch b/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
index 4546b78..3bf9797 100644
--- a/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
+++ b/debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@@ -16,10 +16,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c
-index a8b1fd2a73..83d5cc1e49 100644
+index a402fa4bf7..01b0ab0549 100644
--- a/blockdev.c
+++ b/blockdev.c
-@@ -2945,6 +2945,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2946,6 +2946,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
return;
}
diff --git a/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch b/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
index 9a3108f..f9a6e20 100644
--- a/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
+++ b/debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
@@ -62,10 +62,10 @@ index 00f2665ca4..60cf574de5 100644
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c
-index 83d5cc1e49..060d86a65f 100644
+index 01b0ab0549..cd5f205ad1 100644
--- a/blockdev.c
+++ b/blockdev.c
-@@ -2924,7 +2924,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+@@ -2925,7 +2925,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
sync = MIRROR_SYNC_MODE_FULL;
}
diff --git a/debian/patches/extra/0011-block-Fix-AioContext-locking-in-qmp_block_resize.patch b/debian/patches/extra/0011-block-Fix-AioContext-locking-in-qmp_block_resize.patch
new file mode 100644
index 0000000..a79fa80
--- /dev/null
+++ b/debian/patches/extra/0011-block-Fix-AioContext-locking-in-qmp_block_resize.patch
@@ -0,0 +1,36 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf at redhat.com>
+Date: Fri, 8 Dec 2023 13:43:52 +0100
+Subject: [PATCH] block: Fix AioContext locking in qmp_block_resize()
+
+The AioContext must be unlocked before calling blk_co_unref(), because
+it takes the AioContext lock internally in blk_unref_bh(), which is
+scheduled in the main thread. If we don't unlock, the AioContext is
+locked twice and nested event loops such as in bdrv_graph_wrlock() will
+deadlock.
+
+Cc: qemu-stable at nongnu.org
+Fixes: https://issues.redhat.com/browse/RHEL-15965
+Fixes: 0c7d204f50c382c6baac8c94bd57af4a022b3888
+Signed-off-by: Kevin Wolf <kwolf at redhat.com>
+(picked up from https://lists.nongnu.org/archive/html/qemu-devel/2023-12/msg01102.html)
+Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
+---
+ blockdev.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/blockdev.c b/blockdev.c
+index e6eba61484..c28462a633 100644
+--- a/blockdev.c
++++ b/blockdev.c
+@@ -2361,8 +2361,9 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
+
+ bdrv_co_lock(bs);
+ bdrv_drained_end(bs);
+- blk_co_unref(blk);
+ bdrv_co_unlock(bs);
++
++ blk_co_unref(blk);
+ }
+
+ void qmp_block_stream(const char *job_id, const char *device,
diff --git a/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch b/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
index b32c995..3829068 100644
--- a/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
+++ b/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
@@ -205,7 +205,7 @@ index ca2599de44..6efe28cef5 100644
+ hmp_handle_error(mon, error);
+}
diff --git a/blockdev.c b/blockdev.c
-index 060d86a65f..79c3575612 100644
+index cd5f205ad1..7793143d76 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -37,6 +37,7 @@
diff --git a/debian/patches/series b/debian/patches/series
index 0e21f1f..7dcedcb 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -8,6 +8,7 @@ extra/0007-migration-states-workaround-snapshot-performance-reg.patch
extra/0008-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
extra/0009-hw-ide-ahci-fix-legacy-software-reset.patch
extra/0010-ui-vnc-clipboard-fix-inflate_buffer.patch
+extra/0011-block-Fix-AioContext-locking-in-qmp_block_resize.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
--
2.39.2
More information about the pve-devel
mailing list