[pve-devel] [PATCH kernel 2/4] revert buggy NVME setup commit
Fabian Grünbichler
f.gruenbichler at proxmox.com
Fri Jan 20 09:50:56 CET 2017
see https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1626894
Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
---
Makefile | 1 +
nvme-revert-NVMe-only-setup-MSIX-once.patch | 128 ++++++++++++++++++++++++++++
2 files changed, 129 insertions(+)
create mode 100644 nvme-revert-NVMe-only-setup-MSIX-once.patch
diff --git a/Makefile b/Makefile
index dfb2060..bbf92e3 100644
--- a/Makefile
+++ b/Makefile
@@ -268,6 +268,7 @@ ${KERNEL_SRC}/README ${KERNEL_CFG_ORG}: ${KERNELSRCTAR}
cd ${KERNEL_SRC}; patch -p1 < ../cgroup-cpuset-add-cpuset.remap_cpus.patch
cd ${KERNEL_SRC}; patch -p1 < ../0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch
cd ${KERNEL_SRC}; patch -p1 < ../0002-Revert-mm-oom-rework-oom-detection.patch
+ cd ${KERNEL_SRC}; patch -p1 < ../nvme-revert-NVMe-only-setup-MSIX-once.patch
sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
touch $@
diff --git a/nvme-revert-NVMe-only-setup-MSIX-once.patch b/nvme-revert-NVMe-only-setup-MSIX-once.patch
new file mode 100644
index 0000000..b46221e
--- /dev/null
+++ b/nvme-revert-NVMe-only-setup-MSIX-once.patch
@@ -0,0 +1,128 @@
+From af220b3adff164d1b8b89d7d5c8bb741d6195012 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler at proxmox.com>
+Date: Thu, 19 Jan 2017 15:19:46 +0100
+Subject: [PATCH] Revert "UBUNTU: SAUCE: (no-up) NVMe: only setup MSIX once"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This reverts commit 96fce9e4025b96b08bfe5196d3380ab9215cb64b.
+
+Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
+---
+ drivers/nvme/host/pci.c | 73 ++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 51 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index ae1f695..b9cf5aa 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -1613,7 +1613,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ int result, i, vecs, nr_io_queues, size;
+
+- nr_io_queues = dev->max_qid + 1;
++ nr_io_queues = num_possible_cpus();
+ result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
+ if (result < 0)
+ return result;
+@@ -1653,7 +1653,45 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
+ adminq->q_db = dev->dbs;
+ }
+
+- dev->max_qid = nr_io_queues - 1;
++ /* Deregister the admin queue's interrupt */
++ free_irq(dev->entry[0].vector, adminq);
++
++ /*
++ * If we enable msix early due to not intx, disable it again before
++ * setting up the full range we need.
++ */
++ if (pdev->msi_enabled)
++ pci_disable_msi(pdev);
++ else if (pdev->msix_enabled)
++ pci_disable_msix(pdev);
++
++ for (i = 0; i < nr_io_queues; i++)
++ dev->entry[i].entry = i;
++ vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues);
++ if (vecs < 0) {
++ vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32));
++ if (vecs < 0) {
++ vecs = 1;
++ } else {
++ for (i = 0; i < vecs; i++)
++ dev->entry[i].vector = i + pdev->irq;
++ }
++ }
++
++ /*
++ * Should investigate if there's a performance win from allocating
++ * more queues than interrupt vectors; it might allow the submission
++ * path to scale better, even if the receive path is limited by the
++ * number of interrupts.
++ */
++ nr_io_queues = vecs;
++ dev->max_qid = nr_io_queues;
++
++ result = queue_request_irq(dev, adminq, adminq->irqname);
++ if (result) {
++ adminq->cq_vector = -1;
++ goto free_queues;
++ }
+
+ /* Free previously allocated queues that are no longer usable */
+ nvme_free_queues(dev, nr_io_queues + 1);
+@@ -1806,7 +1844,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
+ static int nvme_pci_enable(struct nvme_dev *dev)
+ {
+ u64 cap;
+- int result = -ENOMEM, nr_io_queues, i, vecs;
++ int result = -ENOMEM;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (pci_enable_device_mem(pdev))
+@@ -1823,30 +1861,21 @@ static int nvme_pci_enable(struct nvme_dev *dev)
+ goto disable;
+ }
+
+- nr_io_queues = num_possible_cpus();
+-
+- for (i = 0; i < nr_io_queues; i++)
+- dev->entry[i].entry = i;
+- vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues);
+- if (vecs < 0) {
+- vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32));
+- if (vecs < 0) {
+- result = vecs;
+- goto disable;
+- } else {
+- for (i = 0; i < vecs; i++)
+- dev->entry[i].vector = i + pdev->irq;
+- }
++ /*
++ * Some devices and/or platforms don't advertise or work with INTx
++ * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
++ * adjust this later.
++ */
++ if (pci_enable_msix(pdev, dev->entry, 1)) {
++ pci_enable_msi(pdev);
++ dev->entry[0].vector = pdev->irq;
+ }
+
+- if (vecs < 2) {
+- dev_err(dev->ctrl.device, "Failed to get enough MSI/MSIX interrupts\n");
+- result = -ENOSPC;
++ if (!dev->entry[0].vector) {
++ result = -ENODEV;
+ goto disable;
+ }
+
+- dev->max_qid = vecs - 1;
+-
+ cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+
+ dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
+--
+2.1.4
+
--
2.1.4
More information about the pve-devel
mailing list