[pve-devel] [RFC] Changes made for backporting

Fabian Ebner f.ebner at proxmox.com
Tue Nov 23 12:59:49 CET 2021


NOTE: These changes are already included in the previous patch.

Changes to make the patches apply and compile and hopefully still be
semantically equivalent to the fix.

IRQ-safe locks were not needed anymore at some point upstream, but we
still have them.

The function io_wq_max_workers is only present upstream.

There was one remaining access &wqe->work_list that was not handled by
the patch (because it was not present anymore upstream). This was
changed to &acct->work_list as is done in other places in the patch.

There were two other one-line changes to adapt in the surrounding
code in io_wq_create so that the patch would apply.

Signed-off-by: Fabian Ebner <f.ebner at proxmox.com>
---
 .../0010-io-wq-fix-queue-stalling-race.patch  | 15 +--
 ...ded-and-unbounded-work-into-separate.patch | 93 ++++++++++---------
 2 files changed, 55 insertions(+), 53 deletions(-)

diff --git a/patches/kernel/0010-io-wq-fix-queue-stalling-race.patch b/patches/kernel/0010-io-wq-fix-queue-stalling-race.patch
index 3ebdf19..5ef160d 100644
--- a/patches/kernel/0010-io-wq-fix-queue-stalling-race.patch
+++ b/patches/kernel/0010-io-wq-fix-queue-stalling-race.patch
@@ -1,4 +1,4 @@
-From 0242f6426ea78fbe3933b44f8c55ae93ec37f6cc Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Jens Axboe <axboe at kernel.dk>
 Date: Tue, 31 Aug 2021 13:53:00 -0600
 Subject: [PATCH] io-wq: fix queue stalling race
@@ -9,16 +9,17 @@ queued between adding us to the stall hash and io_worker_handle_work()
 marking us stalled.
 
 Signed-off-by: Jens Axboe <axboe at kernel.dk>
+[backport]
 Signed-off-by: Fabian Ebner <f.ebner at proxmox.com>
 ---
  fs/io-wq.c | 15 +++++++--------
  1 file changed, 7 insertions(+), 8 deletions(-)
 
 diff --git a/fs/io-wq.c b/fs/io-wq.c
-index a94060b72f84..aa9656eb832e 100644
+index 6612d0aa497e..33678185f3bc 100644
 --- a/fs/io-wq.c
 +++ b/fs/io-wq.c
-@@ -436,8 +436,7 @@ static bool io_worker_can_run_work(struct io_worker *worker,
+@@ -437,8 +437,7 @@ static bool io_worker_can_run_work(struct io_worker *worker,
  }
  
  static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
@@ -28,7 +29,7 @@ index a94060b72f84..aa9656eb832e 100644
  	__must_hold(wqe->lock)
  {
  	struct io_wq_work_node *node, *prev;
-@@ -475,10 +474,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+@@ -476,10 +475,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
  	}
  
  	if (stall_hash != -1U) {
@@ -44,7 +45,7 @@ index a94060b72f84..aa9656eb832e 100644
  	}
  
  	return NULL;
-@@ -518,7 +521,6 @@ static void io_worker_handle_work(struct io_worker *worker)
+@@ -519,7 +522,6 @@ static void io_worker_handle_work(struct io_worker *worker)
  
  	do {
  		struct io_wq_work *work;
@@ -52,7 +53,7 @@ index a94060b72f84..aa9656eb832e 100644
  get_next:
  		/*
  		 * If we got some work, mark us as busy. If we didn't, but
-@@ -527,12 +529,9 @@ static void io_worker_handle_work(struct io_worker *worker)
+@@ -528,12 +530,9 @@ static void io_worker_handle_work(struct io_worker *worker)
  		 * can't make progress, any work completion or insertion will
  		 * clear the stalled flag.
  		 */
@@ -64,7 +65,7 @@ index a94060b72f84..aa9656eb832e 100644
 -		else if (stalled)
 -			wqe->flags |= IO_WQE_FLAG_STALLED;
  
- 		raw_spin_unlock(&wqe->lock);
+ 		raw_spin_unlock_irq(&wqe->lock);
  		if (!work)
 -- 
 2.30.2
diff --git a/patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch b/patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch
index 5e47df4..47df331 100644
--- a/patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch
+++ b/patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch
@@ -1,4 +1,4 @@
-From f95dc207b93da9c88ddbb7741ec3730c6657b88e Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Jens Axboe <axboe at kernel.dk>
 Date: Tue, 31 Aug 2021 13:57:32 -0600
 Subject: [PATCH] io-wq: split bounded and unbounded work into separate lists
@@ -15,16 +15,17 @@ that is punted to io-wq.
 
 Fixes: ecc53c48c13d ("io-wq: check max_worker limits if a worker transitions bound state")
 Signed-off-by: Jens Axboe <axboe at kernel.dk>
+[backport]
 Signed-off-by: Fabian Ebner <f.ebner at proxmox.com>
 ---
  fs/io-wq.c | 156 +++++++++++++++++++++++------------------------------
  1 file changed, 68 insertions(+), 88 deletions(-)
 
 diff --git a/fs/io-wq.c b/fs/io-wq.c
-index aa9656eb832e..8cba77a937a1 100644
+index 33678185f3bc..2496d8781ea1 100644
 --- a/fs/io-wq.c
 +++ b/fs/io-wq.c
-@@ -32,7 +32,7 @@ enum {
+@@ -34,7 +34,7 @@ enum {
  };
  
  enum {
@@ -33,7 +34,7 @@ index aa9656eb832e..8cba77a937a1 100644
  };
  
  /*
-@@ -71,25 +71,24 @@ struct io_wqe_acct {
+@@ -73,25 +73,24 @@ struct io_wqe_acct {
  	unsigned max_workers;
  	int index;
  	atomic_t nr_running;
@@ -64,7 +65,7 @@ index aa9656eb832e..8cba77a937a1 100644
  
  	struct hlist_nulls_head free_list;
  	struct list_head all_list;
-@@ -195,11 +194,10 @@ static void io_worker_exit(struct io_worker *worker)
+@@ -196,11 +195,10 @@ static void io_worker_exit(struct io_worker *worker)
  	do_exit(0);
  }
  
@@ -79,7 +80,7 @@ index aa9656eb832e..8cba77a937a1 100644
  		return true;
  	return false;
  }
-@@ -208,7 +206,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
+@@ -209,7 +207,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
   * Check head of free list for an available worker. If one isn't available,
   * caller must create one.
   */
@@ -89,7 +90,7 @@ index aa9656eb832e..8cba77a937a1 100644
  	__must_hold(RCU)
  {
  	struct hlist_nulls_node *n;
-@@ -222,6 +221,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
+@@ -223,6 +222,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
  	hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
  		if (!io_worker_get(worker))
  			continue;
@@ -100,7 +101,7 @@ index aa9656eb832e..8cba77a937a1 100644
  		if (wake_up_process(worker->task)) {
  			io_worker_release(worker);
  			return true;
-@@ -340,7 +343,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
+@@ -341,7 +344,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
  	if (!(worker->flags & IO_WORKER_F_UP))
  		return;
  
@@ -109,7 +110,7 @@ index aa9656eb832e..8cba77a937a1 100644
  		atomic_inc(&acct->nr_running);
  		atomic_inc(&wqe->wq->worker_refs);
  		io_queue_worker_create(wqe, worker, acct);
-@@ -355,29 +358,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
+@@ -356,29 +359,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
  			     struct io_wq_work *work)
  	__must_hold(wqe->lock)
  {
@@ -139,8 +140,8 @@ index aa9656eb832e..8cba77a937a1 100644
  }
  
  /*
-@@ -416,44 +400,23 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
- 	spin_unlock_irq(&wq->hash->wait.lock);
+@@ -417,44 +401,23 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+ 	spin_unlock(&wq->hash->wait.lock);
  }
  
 -/*
@@ -188,7 +189,7 @@ index aa9656eb832e..8cba77a937a1 100644
  			return work;
  		}
  
-@@ -464,7 +427,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+@@ -465,7 +428,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
  		/* hashed, can run if not already running */
  		if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
  			wqe->hash_tail[hash] = NULL;
@@ -197,7 +198,7 @@ index aa9656eb832e..8cba77a937a1 100644
  			return work;
  		}
  		if (stall_hash == -1U)
-@@ -478,7 +441,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+@@ -479,7 +442,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
  		 * Set this before dropping the lock to avoid racing with new
  		 * work being added and clearing the stalled bit.
  		 */
@@ -206,7 +207,7 @@ index aa9656eb832e..8cba77a937a1 100644
  		raw_spin_unlock(&wqe->lock);
  		io_wait_on_hash(wqe, stall_hash);
  		raw_spin_lock(&wqe->lock);
-@@ -515,6 +478,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
+@@ -516,6 +479,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
  static void io_worker_handle_work(struct io_worker *worker)
  	__releases(wqe->lock)
  {
@@ -214,7 +215,7 @@ index aa9656eb832e..8cba77a937a1 100644
  	struct io_wqe *wqe = worker->wqe;
  	struct io_wq *wq = wqe->wq;
  	bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
-@@ -529,7 +493,7 @@ static void io_worker_handle_work(struct io_worker *worker)
+@@ -530,7 +494,7 @@ static void io_worker_handle_work(struct io_worker *worker)
  		 * can't make progress, any work completion or insertion will
  		 * clear the stalled flag.
  		 */
@@ -223,19 +224,19 @@ index aa9656eb832e..8cba77a937a1 100644
  		if (work)
  			__io_worker_busy(wqe, worker, work);
  
-@@ -563,10 +527,10 @@ static void io_worker_handle_work(struct io_worker *worker)
+@@ -564,10 +528,10 @@ static void io_worker_handle_work(struct io_worker *worker)
  
  			if (hash != -1U && !next_hashed) {
  				clear_bit(hash, &wq->hash->map);
 +				clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
  				if (wq_has_sleeper(&wq->hash->wait))
  					wake_up(&wq->hash->wait);
- 				raw_spin_lock(&wqe->lock);
+ 				raw_spin_lock_irq(&wqe->lock);
 -				wqe->flags &= ~IO_WQE_FLAG_STALLED;
  				/* skip unnecessary unlock-lock wqe->lock */
  				if (!work)
  					goto get_next;
-@@ -581,6 +545,7 @@ static void io_worker_handle_work(struct io_worker *worker)
+@@ -582,6 +546,7 @@ static void io_worker_handle_work(struct io_worker *worker)
  static int io_wqe_worker(void *data)
  {
  	struct io_worker *worker = data;
@@ -243,16 +244,25 @@ index aa9656eb832e..8cba77a937a1 100644
  	struct io_wqe *wqe = worker->wqe;
  	struct io_wq *wq = wqe->wq;
  	char buf[TASK_COMM_LEN];
-@@ -596,7 +561,7 @@ static int io_wqe_worker(void *data)
+@@ -597,7 +562,7 @@ static int io_wqe_worker(void *data)
  		set_current_state(TASK_INTERRUPTIBLE);
  loop:
- 		raw_spin_lock(&wqe->lock);
+ 		raw_spin_lock_irq(&wqe->lock);
 -		if (io_wqe_run_queue(wqe)) {
 +		if (io_acct_run_queue(acct)) {
  			io_worker_handle_work(worker);
  			goto loop;
  		}
-@@ -764,12 +729,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
+@@ -623,7 +588,7 @@ static int io_wqe_worker(void *data)
+ 
+ 	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+ 		raw_spin_lock_irq(&wqe->lock);
+-		if (!wq_list_empty(&wqe->work_list))
++		if (!wq_list_empty(&acct->work_list))
+ 			io_worker_handle_work(worker);
+ 		else
+ 			raw_spin_unlock_irq(&wqe->lock);
+@@ -769,12 +734,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
  
  static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
  {
@@ -267,7 +277,7 @@ index aa9656eb832e..8cba77a937a1 100644
  		return;
  	}
  
-@@ -779,7 +745,7 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+@@ -784,7 +750,7 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
  	if (!tail)
  		goto append;
  
@@ -276,9 +286,9 @@ index aa9656eb832e..8cba77a937a1 100644
  }
  
  static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
-@@ -800,10 +766,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+@@ -806,10 +772,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
  
- 	raw_spin_lock(&wqe->lock);
+ 	raw_spin_lock_irqsave(&wqe->lock, flags);
  	io_wqe_insert_work(wqe, work);
 -	wqe->flags &= ~IO_WQE_FLAG_STALLED;
 +	clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
@@ -288,8 +298,8 @@ index aa9656eb832e..8cba77a937a1 100644
 +	do_create = !io_wqe_activate_free_worker(wqe, acct);
  	rcu_read_unlock();
  
- 	raw_spin_unlock(&wqe->lock);
-@@ -855,6 +821,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+ 	raw_spin_unlock_irqrestore(&wqe->lock, flags);
+@@ -862,6 +828,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
  					 struct io_wq_work *work,
  					 struct io_wq_work_node *prev)
  {
@@ -297,7 +307,7 @@ index aa9656eb832e..8cba77a937a1 100644
  	unsigned int hash = io_get_work_hash(work);
  	struct io_wq_work *prev_work = NULL;
  
-@@ -866,7 +833,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+@@ -873,7 +840,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
  		else
  			wqe->hash_tail[hash] = NULL;
  	}
@@ -306,20 +316,20 @@ index aa9656eb832e..8cba77a937a1 100644
  }
  
  static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
-@@ -874,22 +841,27 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
- {
+@@ -882,22 +849,27 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
  	struct io_wq_work_node *node, *prev;
  	struct io_wq_work *work;
+ 	unsigned long flags;
 +	int i;
  
  retry:
- 	raw_spin_lock(&wqe->lock);
+ 	raw_spin_lock_irqsave(&wqe->lock, flags);
 -	wq_list_for_each(node, prev, &wqe->work_list) {
 -		work = container_of(node, struct io_wq_work, list);
 -		if (!match->fn(work, match->data))
 -			continue;
 -		io_wqe_remove_pending(wqe, work, prev);
--		raw_spin_unlock(&wqe->lock);
+-		raw_spin_unlock_irqrestore(&wqe->lock, flags);
 -		io_run_cancel(work, wqe);
 -		match->nr_pending++;
 -		if (!match->cancel_all)
@@ -334,7 +344,7 @@ index aa9656eb832e..8cba77a937a1 100644
 +			if (!match->fn(work, match->data))
 +				continue;
 +			io_wqe_remove_pending(wqe, work, prev);
-+			raw_spin_unlock(&wqe->lock);
++			raw_spin_unlock_irqrestore(&wqe->lock, flags);
 +			io_run_cancel(work, wqe);
 +			match->nr_pending++;
 +			if (!match->cancel_all)
@@ -344,9 +354,9 @@ index aa9656eb832e..8cba77a937a1 100644
 +			goto retry;
 +		}
  	}
- 	raw_spin_unlock(&wqe->lock);
+ 	raw_spin_unlock_irqrestore(&wqe->lock, flags);
  }
-@@ -950,18 +922,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
+@@ -958,18 +930,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
  			    int sync, void *key)
  {
  	struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
@@ -368,13 +378,13 @@ index aa9656eb832e..8cba77a937a1 100644
  
  struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
  {
--	int ret, node;
+-	int ret = -ENOMEM, node;
 +	int ret, node, i;
  	struct io_wq *wq;
  
  	if (WARN_ON_ONCE(!data->free_work || !data->do_work))
-@@ -996,18 +974,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
- 		cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
+@@ -1006,18 +984,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+ 			goto err;
  		wq->wqes[node] = wqe;
  		wqe->node = alloc_node;
 -		wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
@@ -400,15 +410,6 @@ index aa9656eb832e..8cba77a937a1 100644
  		INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
  		INIT_LIST_HEAD(&wqe->all_list);
  	}
-@@ -1189,7 +1169,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
- 	for_each_node(node) {
- 		struct io_wqe_acct *acct;
- 
--		for (i = 0; i < 2; i++) {
-+		for (i = 0; i < IO_WQ_ACCT_NR; i++) {
- 			acct = &wq->wqes[node]->acct[i];
- 			prev = max_t(int, acct->max_workers, prev);
- 			if (new_count[i])
 -- 
 2.30.2
 
-- 
2.30.2






More information about the pve-devel mailing list