[pve-devel] [PATCH kernel 2/7] drop patches applied upstream

Wed Mar 1 11:47:00 CET 2017

Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
---
 Makefile                                           |   3 -
 ...hrottle-on-IO-only-when-there-are-too-man.patch | 118 ----------
 0002-Revert-mm-oom-rework-oom-detection.patch      | 255 ---------------------
 ...x86-fix-emulation-of-MOV-SS-null-selector.patch | 107 ---------
 4 files changed, 483 deletions(-)
 delete mode 100644 0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch
 delete mode 100644 0002-Revert-mm-oom-rework-oom-detection.patch
 delete mode 100644 CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch

diff --git a/Makefile b/Makefile
index 12e43d8..7fa9bc2 100644
--- a/Makefile
+++ b/Makefile
@@ -265,9 +265,6 @@ ${KERNEL_SRC}/README ${KERNEL_CFG_ORG}: ${KERNELSRCTAR}
 	# IPoIB performance regression fix
 	cd ${KERNEL_SRC}; patch -p1 < ../IB-ipoib-move-back-the-IB-LL-address-into-the-hard-header.patch
 	cd ${KERNEL_SRC}; patch -p1 < ../cgroup-cpuset-add-cpuset.remap_cpus.patch
-	cd ${KERNEL_SRC}; patch -p1 < ../0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch
-	cd ${KERNEL_SRC}; patch -p1 < ../0002-Revert-mm-oom-rework-oom-detection.patch
-	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch
 	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-2596-kvm-page-reference-leakage-in-handle_vmon.patch
 	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-6074-dccp-fix-freeing-skb-too-early-for-IPV6_RECVPKTINFO.patch
 	sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
diff --git a/0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch b/0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch
deleted file mode 100644
index b4ff5a8..0000000
--- a/0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From 3168fc7faf603da9d523c9dffbec6fee5b1a8a04 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler at proxmox.com>
-Date: Wed, 4 Jan 2017 11:29:00 +0100
-Subject: [PATCH 1/2] Revert "mm: throttle on IO only when there are too many
- dirty and writeback pages"
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This reverts commit 57e9ef475661f46769cad6c0ed9a13f0cec1dbd8.
-
-Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
----
- mm/backing-dev.c | 20 +++++++++++++++++---
- mm/page_alloc.c  | 41 ++++-------------------------------------
- 2 files changed, 21 insertions(+), 40 deletions(-)
-
-diff --git a/mm/backing-dev.c b/mm/backing-dev.c
-index a1aef87..9ef80bf 100644
---- a/mm/backing-dev.c
-+++ b/mm/backing-dev.c
-@@ -976,8 +976,9 @@ EXPORT_SYMBOL(congestion_wait);
-  * jiffies for either a BDI to exit congestion of the given @sync queue
-  * or a write to complete.
-  *
-- * In the absence of zone congestion, cond_resched() is called to yield
-- * the processor if necessary but otherwise does not sleep.
-+ * In the absence of zone congestion, a short sleep or a cond_resched is
-+ * performed to yield the processor and to allow other subsystems to make
-+ * a forward progress.
-  *
-  * The return value is 0 if the sleep is for the full timeout. Otherwise,
-  * it is the number of jiffies that were still remaining when the function
-@@ -997,7 +998,20 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
- 	 */
- 	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
- 	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
--		cond_resched();
-+
-+		/*
-+		 * Memory allocation/reclaim might be called from a WQ
-+		 * context and the current implementation of the WQ
-+		 * concurrency control doesn't recognize that a particular
-+		 * WQ is congested if the worker thread is looping without
-+		 * ever sleeping. Therefore we have to do a short sleep
-+		 * here rather than calling cond_resched().
-+		 */
-+		if (current->flags & PF_WQ_WORKER)
-+			schedule_timeout_uninterruptible(1);
-+		else
-+			cond_resched();
-+
- 		/* In case we scheduled, work out time remaining */
- 		ret = timeout - (jiffies - start);
- 		if (ret < 0)
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index aadbd7e..f13b503 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -3038,9 +3038,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
- 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
- 					ac->nodemask) {
- 		unsigned long available;
--		unsigned long reclaimable;
- 
--		available = reclaimable = zone_reclaimable_pages(zone);
-+		available = zone_reclaimable_pages(zone);
- 		available -= DIV_ROUND_UP(no_progress_loops * available,
- 					  MAX_RECLAIM_RETRIES);
- 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
-@@ -3050,41 +3049,9 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
- 		 * available?
- 		 */
- 		if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
--				ac->classzone_idx, alloc_flags, available)) {
--			/*
--			 * If we didn't make any progress and have a lot of
--			 * dirty + writeback pages then we should wait for
--			 * an IO to complete to slow down the reclaim and
--			 * prevent from pre mature OOM
--			 */
--			if (!did_some_progress) {
--				unsigned long writeback;
--				unsigned long dirty;
--
--				writeback = zone_page_state_snapshot(zone,
--								     NR_WRITEBACK);
--				dirty = zone_page_state_snapshot(zone, NR_FILE_DIRTY);
--
--				if (2*(writeback + dirty) > reclaimable) {
--					congestion_wait(BLK_RW_ASYNC, HZ/10);
--					return true;
--				}
--			}
--
--			/*
--			 * Memory allocation/reclaim might be called from a WQ
--			 * context and the current implementation of the WQ
--			 * concurrency control doesn't recognize that
--			 * a particular WQ is congested if the worker thread is
--			 * looping without ever sleeping. Therefore we have to
--			 * do a short sleep here rather than calling
--			 * cond_resched().
--			 */
--			if (current->flags & PF_WQ_WORKER)
--				schedule_timeout_uninterruptible(1);
--			else
--				cond_resched();
--
-+				ac->high_zoneidx, alloc_flags, available)) {
-+			/* Wait for some write requests to complete then retry */
-+			wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
- 			return true;
- 		}
- 	}
--- 
-2.1.4
-
diff --git a/0002-Revert-mm-oom-rework-oom-detection.patch b/0002-Revert-mm-oom-rework-oom-detection.patch
deleted file mode 100644
index 5a1ec76..0000000
--- a/0002-Revert-mm-oom-rework-oom-detection.patch
+++ /dev/null
@@ -1,255 +0,0 @@
-From 6e2588df3dc3d1704eae939ed9c9425000f48069 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler at proxmox.com>
-Date: Wed, 4 Jan 2017 11:29:26 +0100
-Subject: [PATCH 2/2] Revert "mm, oom: rework oom detection"
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This reverts commit c630ec12d831521b0566481eb56d7257b051911e.
-
-Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
----
- include/linux/swap.h |   1 -
- mm/page_alloc.c      | 100 +++++----------------------------------------------
- mm/vmscan.c          |  25 ++++++++++---
- 3 files changed, 29 insertions(+), 97 deletions(-)
-
-diff --git a/include/linux/swap.h b/include/linux/swap.h
-index 1498c5a..d8ca2ea 100644
---- a/include/linux/swap.h
-+++ b/include/linux/swap.h
-@@ -318,7 +318,6 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
- 						struct vm_area_struct *vma);
- 
- /* linux/mm/vmscan.c */
--extern unsigned long zone_reclaimable_pages(struct zone *zone);
- extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
- 					gfp_t gfp_mask, nodemask_t *mask);
- extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index f13b503..56319cf 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -2988,77 +2988,6 @@ static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
- 	return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
- }
- 
--/*
-- * Maximum number of reclaim retries without any progress before OOM killer
-- * is consider as the only way to move forward.
-- */
--#define MAX_RECLAIM_RETRIES 16
--
--/*
-- * Checks whether it makes sense to retry the reclaim to make a forward progress
-- * for the given allocation request.
-- * The reclaim feedback represented by did_some_progress (any progress during
-- * the last reclaim round), pages_reclaimed (cumulative number of reclaimed
-- * pages) and no_progress_loops (number of reclaim rounds without any progress
-- * in a row) is considered as well as the reclaimable pages on the applicable
-- * zone list (with a backoff mechanism which is a function of no_progress_loops).
-- *
-- * Returns true if a retry is viable or false to enter the oom path.
-- */
--static inline bool
--should_reclaim_retry(gfp_t gfp_mask, unsigned order,
--		     struct alloc_context *ac, int alloc_flags,
--		     bool did_some_progress, unsigned long pages_reclaimed,
--		     int no_progress_loops)
--{
--	struct zone *zone;
--	struct zoneref *z;
--
--	/*
--	 * Make sure we converge to OOM if we cannot make any progress
--	 * several times in the row.
--	 */
--	if (no_progress_loops > MAX_RECLAIM_RETRIES)
--		return false;
--
--	if (order > PAGE_ALLOC_COSTLY_ORDER) {
--		if (pages_reclaimed >= (1<<order))
--			return false;
--
--		if (did_some_progress)
--			return true;
--	}
--
--	/*
--	 * Keep reclaiming pages while there is a chance this will lead somewhere.
--	 * If none of the target zones can satisfy our allocation request even
--	 * if all reclaimable pages are considered then we are screwed and have
--	 * to go OOM.
--	 */
--	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
--					ac->nodemask) {
--		unsigned long available;
--
--		available = zone_reclaimable_pages(zone);
--		available -= DIV_ROUND_UP(no_progress_loops * available,
--					  MAX_RECLAIM_RETRIES);
--		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
--
--		/*
--		 * Would the allocation succeed if we reclaimed the whole
--		 * available?
--		 */
--		if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
--				ac->high_zoneidx, alloc_flags, available)) {
--			/* Wait for some write requests to complete then retry */
--			wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
--			return true;
--		}
--	}
--
--	return false;
--}
--
- static inline struct page *
- __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
- 						struct alloc_context *ac)
-@@ -3071,7 +3000,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
- 	enum migrate_mode migration_mode = MIGRATE_ASYNC;
- 	bool deferred_compaction = false;
- 	int contended_compaction = COMPACT_CONTENDED_NONE;
--	int no_progress_loops = 0;
- 
- 	/*
- 	 * In the slowpath, we sanity check order to avoid ever trying to
-@@ -3223,24 +3151,14 @@ retry:
- 	if (gfp_mask & __GFP_NORETRY)
- 		goto noretry;
- 
--	/*
--	 * Do not retry costly high order allocations unless they are
--	 * __GFP_REPEAT
--	 */
--	if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
--		goto noretry;
--
--	if (did_some_progress) {
--		no_progress_loops = 0;
--		pages_reclaimed += did_some_progress;
--	} else {
--		no_progress_loops++;
--	}
--
--	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
--				 did_some_progress > 0, pages_reclaimed,
--				 no_progress_loops))
-+	/* Keep reclaiming pages as long as there is reasonable progress */
-+	pages_reclaimed += did_some_progress;
-+	if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
-+	    ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
-+		/* Wait for some write requests to complete then retry */
-+		wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50);
- 		goto retry;
-+	}
- 
- 	/* Reclaim has failed us, start killing things */
- 	page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
-@@ -3248,10 +3166,8 @@ retry:
- 		goto got_pg;
- 
- 	/* Retry as long as the OOM killer is making progress */
--	if (did_some_progress) {
--		no_progress_loops = 0;
-+	if (did_some_progress)
- 		goto retry;
--	}
- 
- noretry:
- 	/*
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 56f902d..3597160 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -192,7 +192,7 @@ static bool sane_reclaim(struct scan_control *sc)
- }
- #endif
- 
--unsigned long zone_reclaimable_pages(struct zone *zone)
-+static unsigned long zone_reclaimable_pages(struct zone *zone)
- {
- 	unsigned long nr;
- 
-@@ -2492,8 +2492,10 @@ static inline bool compaction_ready(struct zone *zone, int order)
-  *
-  * If a zone is deemed to be full of pinned pages then just give it a light
-  * scan then give up on it.
-+ *
-+ * Returns true if a zone was reclaimable.
-  */
--static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
-+static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
- {
- 	struct zoneref *z;
- 	struct zone *zone;
-@@ -2501,6 +2503,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
- 	unsigned long nr_soft_scanned;
- 	gfp_t orig_mask;
- 	enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
-+	bool reclaimable = false;
- 
- 	/*
- 	 * If the number of buffer_heads in the machine exceeds the maximum
-@@ -2565,10 +2568,17 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
- 						&nr_soft_scanned);
- 			sc->nr_reclaimed += nr_soft_reclaimed;
- 			sc->nr_scanned += nr_soft_scanned;
-+			if (nr_soft_reclaimed)
-+				reclaimable = true;
- 			/* need some check for avoid more shrink_zone() */
- 		}
- 
--		shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
-+		if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
-+			reclaimable = true;
-+
-+		if (global_reclaim(sc) &&
-+		    !reclaimable && zone_reclaimable(zone))
-+			reclaimable = true;
- 	}
- 
- 	/*
-@@ -2576,6 +2586,8 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
- 	 * promoted it to __GFP_HIGHMEM.
- 	 */
- 	sc->gfp_mask = orig_mask;
-+
-+	return reclaimable;
- }
- 
- /*
-@@ -2600,6 +2612,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
- 	int initial_priority = sc->priority;
- 	unsigned long total_scanned = 0;
- 	unsigned long writeback_threshold;
-+	bool zones_reclaimable;
- retry:
- 	delayacct_freepages_start();
- 
-@@ -2610,7 +2623,7 @@ retry:
- 		vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
- 				sc->priority);
- 		sc->nr_scanned = 0;
--		shrink_zones(zonelist, sc);
-+		zones_reclaimable = shrink_zones(zonelist, sc);
- 
- 		total_scanned += sc->nr_scanned;
- 		if (sc->nr_reclaimed >= sc->nr_to_reclaim)
-@@ -2657,6 +2670,10 @@ retry:
- 		goto retry;
- 	}
- 
-+	/* Any of the zones still reclaimable?  Don't OOM. */
-+	if (zones_reclaimable)
-+		return 1;
-+
- 	return 0;
- }
- 
--- 
-2.1.4
-
diff --git a/CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch b/CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch
deleted file mode 100644
index 3a984ed..0000000
--- a/CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch
+++ /dev/null
@@ -1,107 +0,0 @@
-From 33ab91103b3415e12457e3104f0e4517ce12d0f3 Mon Sep 17 00:00:00 2001
-From: Paolo Bonzini <pbonzini at redhat.com>
-Date: Thu, 12 Jan 2017 15:02:32 +0100
-Subject: KVM: x86: fix emulation of "MOV SS, null selector"
-
-This is CVE-2017-2583.  On Intel this causes a failed vmentry because
-SS's type is neither 3 nor 7 (even though the manual says this check is
-only done for usable SS, and the dmesg splat says that SS is unusable!).
-On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb.
-
-The fix fabricates a data segment descriptor when SS is set to a null
-selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb.
-Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3;
-this in turn ensures CPL < 3 because RPL must be equal to CPL.
-
-Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing
-the bug and deciphering the manuals.
-
-Reported-by: Xiaohan Zhang <zhangxiaohan1 at huawei.com>
-Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011
-Cc: stable at nongnu.org
-Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
----
- arch/x86/kvm/emulate.c | 48 ++++++++++++++++++++++++++++++++++++++----------
- 1 file changed, 38 insertions(+), 10 deletions(-)
-
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index f36d0fa..cedbba0 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -1585,7 +1585,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
- 				    &ctxt->exception);
- }
- 
--/* Does not support long mode */
- static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
- 				     u16 selector, int seg, u8 cpl,
- 				     enum x86_transfer_type transfer,
-@@ -1622,20 +1621,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
- 
- 	rpl = selector & 3;
- 
--	/* NULL selector is not valid for TR, CS and SS (except for long mode) */
--	if ((seg == VCPU_SREG_CS
--	     || (seg == VCPU_SREG_SS
--		 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
--	     || seg == VCPU_SREG_TR)
--	    && null_selector)
--		goto exception;
--
- 	/* TR should be in GDT only */
- 	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
- 		goto exception;
- 
--	if (null_selector) /* for NULL selector skip all following checks */
-+	/* NULL selector is not valid for TR, CS and (except for long mode) SS */
-+	if (null_selector) {
-+		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
-+			goto exception;
-+
-+		if (seg == VCPU_SREG_SS) {
-+			if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
-+				goto exception;
-+
-+			/*
-+			 * ctxt->ops->set_segment expects the CPL to be in
-+			 * SS.DPL, so fake an expand-up 32-bit data segment.
-+			 */
-+			seg_desc.type = 3;
-+			seg_desc.p = 1;
-+			seg_desc.s = 1;
-+			seg_desc.dpl = cpl;
-+			seg_desc.d = 1;
-+			seg_desc.g = 1;
-+		}
-+
-+		/* Skip all following checks */
- 		goto load;
-+	}
- 
- 	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
- 	if (ret != X86EMUL_CONTINUE)
-@@ -1751,6 +1764,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
- 				   u16 selector, int seg)
- {
- 	u8 cpl = ctxt->ops->cpl(ctxt);
-+
-+	/*
-+	 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
-+	 * they can load it at CPL<3 (Intel's manual says only LSS can,
-+	 * but it's wrong).
-+	 *
-+	 * However, the Intel manual says that putting IST=1/DPL=3 in
-+	 * an interrupt gate will result in SS=3 (the AMD manual instead
-+	 * says it doesn't), so allow SS=3 in __load_segment_descriptor
-+	 * and only forbid it here.
-+	 */
-+	if (seg == VCPU_SREG_SS && selector == 3 &&
-+	    ctxt->mode == X86EMUL_MODE_PROT64)
-+		return emulate_exception(ctxt, GP_VECTOR, 0, true);
-+
- 	return __load_segment_descriptor(ctxt, selector, seg, cpl,
- 					 X86_TRANSFER_NONE, NULL);
- }
--- 
-cgit v0.12
-
-- 
2.1.4