[pve-devel] applied: [PATCH kernel] backport fixes for information leak within a KVM guest

Thomas Lamprecht t.lamprecht at proxmox.com
Fri Jan 31 15:08:15 CET 2020


Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 ...w-Reserved-types-to-be-overwritten-i.patch |   1 +
 ...ul-not-to-clear-KVM_VCPU_FLUSH_TLB-b.patch |  38 +++
 ...09-x86-kvm-Introduce-kvm_-un-map_gfn.patch | 111 +++++++
 ...x86-kvm-Cache-gfn-to-pfn-translation.patch | 292 ++++++++++++++++++
 ...e-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch | 125 ++++++++
 ...Clean-up-host-s-steal-time-structure.patch |  82 +++++
 6 files changed, 649 insertions(+)
 create mode 100644 patches/kernel/0008-x86-kvm-Be-careful-not-to-clear-KVM_VCPU_FLUSH_TLB-b.patch
 create mode 100644 patches/kernel/0009-x86-kvm-Introduce-kvm_-un-map_gfn.patch
 create mode 100644 patches/kernel/0010-x86-kvm-Cache-gfn-to-pfn-translation.patch
 create mode 100644 patches/kernel/0011-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch
 create mode 100644 patches/kernel/0012-x86-KVM-Clean-up-host-s-steal-time-structure.patch

diff --git a/patches/kernel/0006-x86-MCE-AMD-Allow-Reserved-types-to-be-overwritten-i.patch b/patches/kernel/0006-x86-MCE-AMD-Allow-Reserved-types-to-be-overwritten-i.patch
index 52f997078808..874c8eec8bdb 100644
--- a/patches/kernel/0006-x86-MCE-AMD-Allow-Reserved-types-to-be-overwritten-i.patch
+++ b/patches/kernel/0006-x86-MCE-AMD-Allow-Reserved-types-to-be-overwritten-i.patch
@@ -66,6 +66,7 @@ known bank type can update smca_banks[].
 Fixes: 68627a697c19 ("x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type")
 Signed-off-by: Yazen Ghannam <yazen.ghannam at amd.com>
 Signed-off-by: Borislav Petkov <bp at suse.de>
+Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
 ---
  arch/x86/kernel/cpu/mce/amd.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/patches/kernel/0008-x86-kvm-Be-careful-not-to-clear-KVM_VCPU_FLUSH_TLB-b.patch b/patches/kernel/0008-x86-kvm-Be-careful-not-to-clear-KVM_VCPU_FLUSH_TLB-b.patch
new file mode 100644
index 000000000000..a17a5b548d92
--- /dev/null
+++ b/patches/kernel/0008-x86-kvm-Be-careful-not-to-clear-KVM_VCPU_FLUSH_TLB-b.patch
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Date: Fri, 31 Jan 2020 08:06:40 -0300
+Subject: [PATCH] x86/kvm: Be careful not to clear KVM_VCPU_FLUSH_TLB bit
+
+CVE-2019-3016
+CVE-2020-3016
+
+kvm_steal_time_set_preempted() may accidentally clear KVM_VCPU_FLUSH_TLB
+bit if it is called more than once while VCPU is preempted.
+
+This is part of CVE-2019-3016.
+
+(This bug was also independently discovered by Jim Mattson
+<jmattson at google.com>)
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins at oracle.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
+---
+ arch/x86/kvm/x86.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 778b3a899769..92d8e4ebba16 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3393,6 +3393,9 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ 		return;
+ 
++	if (vcpu->arch.st.steal.preempted)
++		return;
++
+ 	vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
+ 
+ 	kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
diff --git a/patches/kernel/0009-x86-kvm-Introduce-kvm_-un-map_gfn.patch b/patches/kernel/0009-x86-kvm-Introduce-kvm_-un-map_gfn.patch
new file mode 100644
index 000000000000..c461d6b3161b
--- /dev/null
+++ b/patches/kernel/0009-x86-kvm-Introduce-kvm_-un-map_gfn.patch
@@ -0,0 +1,111 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Date: Fri, 31 Jan 2020 08:06:41 -0300
+Subject: [PATCH] x86/kvm: Introduce kvm_(un)map_gfn()
+
+CVE-2019-3016
+CVE-2020-3016
+
+kvm_vcpu_(un)map operates on gfns from any current address space.
+In certain cases we want to make sure we are not mapping SMRAM
+and for that we can use kvm_(un)map_gfn() that we are introducing
+in this patch.
+
+This is part of CVE-2019-3016.
+
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins at oracle.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
+---
+ include/linux/kvm_host.h |  2 ++
+ virt/kvm/kvm_main.c      | 29 ++++++++++++++++++++++++-----
+ 2 files changed, 26 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index d41c521a39da..df4cc0ead363 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -758,8 +758,10 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
+ kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
+ kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
++int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map);
+ struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
++int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
+ unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
+ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
+ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 91e56a9b0661..6614e030ae75 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1792,12 +1792,13 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+ }
+ EXPORT_SYMBOL_GPL(gfn_to_page);
+ 
+-static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
++static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
+ 			 struct kvm_host_map *map)
+ {
+ 	kvm_pfn_t pfn;
+ 	void *hva = NULL;
+ 	struct page *page = KVM_UNMAPPED_PAGE;
++	struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
+ 
+ 	if (!map)
+ 		return -EINVAL;
+@@ -1826,14 +1827,20 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
+ 	return 0;
+ }
+ 
++int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
++{
++	return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map);
++}
++EXPORT_SYMBOL_GPL(kvm_map_gfn);
++
+ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
+ {
+-	return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map);
++	return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map);
+ }
+ EXPORT_SYMBOL_GPL(kvm_vcpu_map);
+ 
+-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+-		    bool dirty)
++static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
++			struct kvm_host_map *map, bool dirty)
+ {
+ 	if (!map)
+ 		return;
+@@ -1849,7 +1856,7 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ #endif
+ 
+ 	if (dirty) {
+-		kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
++		mark_page_dirty_in_slot(memslot, map->gfn);
+ 		kvm_release_pfn_dirty(map->pfn);
+ 	} else {
+ 		kvm_release_pfn_clean(map->pfn);
+@@ -1858,6 +1865,18 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ 	map->hva = NULL;
+ 	map->page = NULL;
+ }
++
++int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
++{
++	__kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, dirty);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
++
++void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
++{
++	__kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, dirty);
++}
+ EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
+ 
+ struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
diff --git a/patches/kernel/0010-x86-kvm-Cache-gfn-to-pfn-translation.patch b/patches/kernel/0010-x86-kvm-Cache-gfn-to-pfn-translation.patch
new file mode 100644
index 000000000000..b947a5e936d2
--- /dev/null
+++ b/patches/kernel/0010-x86-kvm-Cache-gfn-to-pfn-translation.patch
@@ -0,0 +1,292 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Date: Fri, 31 Jan 2020 08:06:42 -0300
+Subject: [PATCH] x86/kvm: Cache gfn to pfn translation
+
+CVE-2019-3016
+CVE-2020-3016
+
+__kvm_map_gfn()'s call to gfn_to_pfn_memslot() is
+* relatively expensive
+* in certain cases (such as when done from atomic context) cannot be called
+
+Stashing gfn-to-pfn mapping should help with both cases.
+
+This is part of CVE-2019-3016.
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins at oracle.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
+---
+ arch/x86/include/asm/kvm_host.h |  1 +
+ arch/x86/kvm/x86.c              | 10 ++++
+ include/linux/kvm_host.h        |  7 ++-
+ include/linux/kvm_types.h       |  9 ++-
+ virt/kvm/kvm_main.c             | 98 ++++++++++++++++++++++++++-------
+ 5 files changed, 103 insertions(+), 22 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index f68e174f452f..7c06343614a4 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -678,6 +678,7 @@ struct kvm_vcpu_arch {
+ 		u64 last_steal;
+ 		struct gfn_to_hva_cache stime;
+ 		struct kvm_steal_time steal;
++		struct gfn_to_pfn_cache cache;
+ 	} st;
+ 
+ 	u64 tsc_offset;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 92d8e4ebba16..41fee3d359ab 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8936,6 +8936,9 @@ static void fx_init(struct kvm_vcpu *vcpu)
+ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+ {
+ 	void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
++	struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
++
++	kvm_release_pfn(cache->pfn, cache->dirty, cache);
+ 
+ 	kvmclock_reset(vcpu);
+ 
+@@ -9602,11 +9605,18 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ 
+ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
+ {
++	struct kvm_vcpu *vcpu;
++	int i;
++
+ 	/*
+ 	 * memslots->generation has been incremented.
+ 	 * mmio generation may have reached its maximum value.
+ 	 */
+ 	kvm_mmu_invalidate_mmio_sptes(kvm, gen);
++
++	/* Force re-initialization of steal_time cache */
++	kvm_for_each_vcpu(i, vcpu, kvm)
++		kvm_vcpu_kick(vcpu);
+ }
+ 
+ int kvm_arch_prepare_memory_region(struct kvm *kvm,
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index df4cc0ead363..abfc2fbde957 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -728,6 +728,7 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn);
+ void kvm_set_pfn_accessed(kvm_pfn_t pfn);
+ void kvm_get_pfn(kvm_pfn_t pfn);
+ 
++void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
+ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
+ 			int len);
+ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
+@@ -758,10 +759,12 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
+ kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
+ kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
+-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map);
++int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
++		struct gfn_to_pfn_cache *cache, bool atomic);
+ struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
+-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
++int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
++		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
+ unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
+ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
+ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
+diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
+index bde5374ae021..2382cb58969d 100644
+--- a/include/linux/kvm_types.h
++++ b/include/linux/kvm_types.h
+@@ -18,7 +18,7 @@ struct kvm_memslots;
+ 
+ enum kvm_mr_change;
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ /*
+  * Address types:
+@@ -49,4 +49,11 @@ struct gfn_to_hva_cache {
+ 	struct kvm_memory_slot *memslot;
+ };
+ 
++struct gfn_to_pfn_cache {
++	u64 generation;
++	gfn_t gfn;
++	kvm_pfn_t pfn;
++	bool dirty;
++};
++
+ #endif /* __KVM_TYPES_H__ */
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 6614e030ae75..f05e5b5c30e8 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1792,27 +1792,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+ }
+ EXPORT_SYMBOL_GPL(gfn_to_page);
+ 
++void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
++{
++	if (pfn == 0)
++		return;
++
++	if (cache)
++		cache->pfn = cache->gfn = 0;
++
++	if (dirty)
++		kvm_release_pfn_dirty(pfn);
++	else
++		kvm_release_pfn_clean(pfn);
++}
++
++static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
++				 struct gfn_to_pfn_cache *cache, u64 gen)
++{
++	kvm_release_pfn(cache->pfn, cache->dirty, cache);
++
++	cache->pfn = gfn_to_pfn_memslot(slot, gfn);
++	cache->gfn = gfn;
++	cache->dirty = false;
++	cache->generation = gen;
++}
++
+ static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
+-			 struct kvm_host_map *map)
++			 struct kvm_host_map *map,
++			 struct gfn_to_pfn_cache *cache,
++			 bool atomic)
+ {
+ 	kvm_pfn_t pfn;
+ 	void *hva = NULL;
+ 	struct page *page = KVM_UNMAPPED_PAGE;
+ 	struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
++	u64 gen = slots->generation;
+ 
+ 	if (!map)
+ 		return -EINVAL;
+ 
+-	pfn = gfn_to_pfn_memslot(slot, gfn);
++	if (cache) {
++		if (!cache->pfn || cache->gfn != gfn ||
++			cache->generation != gen) {
++			if (atomic)
++				return -EAGAIN;
++			kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
++		}
++		pfn = cache->pfn;
++	} else {
++		if (atomic)
++			return -EAGAIN;
++		pfn = gfn_to_pfn_memslot(slot, gfn);
++	}
+ 	if (is_error_noslot_pfn(pfn))
+ 		return -EINVAL;
+ 
+ 	if (pfn_valid(pfn)) {
+ 		page = pfn_to_page(pfn);
+-		hva = kmap(page);
++		if (atomic)
++			hva = kmap_atomic(page);
++		else
++			hva = kmap(page);
+ #ifdef CONFIG_HAS_IOMEM
+-	} else {
++	} else if (!atomic) {
+ 		hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
++	} else {
++		return -EINVAL;
+ #endif
+ 	}
+ 
+@@ -1827,20 +1872,25 @@ static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
+ 	return 0;
+ }
+ 
+-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
++int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
++		struct gfn_to_pfn_cache *cache, bool atomic)
+ {
+-	return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map);
++	return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
++			cache, atomic);
+ }
+ EXPORT_SYMBOL_GPL(kvm_map_gfn);
+ 
+ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
+ {
+-	return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map);
++	return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
++		NULL, false);
+ }
+ EXPORT_SYMBOL_GPL(kvm_vcpu_map);
+ 
+ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+-			struct kvm_host_map *map, bool dirty)
++			struct kvm_host_map *map,
++			struct gfn_to_pfn_cache *cache,
++			bool dirty, bool atomic)
+ {
+ 	if (!map)
+ 		return;
+@@ -1848,34 +1898,44 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+ 	if (!map->hva)
+ 		return;
+ 
+-	if (map->page != KVM_UNMAPPED_PAGE)
+-		kunmap(map->page);
++	if (map->page != KVM_UNMAPPED_PAGE) {
++		if (atomic)
++			kunmap_atomic(map->hva);
++		else
++			kunmap(map->page);
++	}
+ #ifdef CONFIG_HAS_IOMEM
+-	else
++	else if (!atomic)
+ 		memunmap(map->hva);
++	else
++		WARN_ONCE(1, "Unexpected unmapping in atomic context");
+ #endif
+ 
+-	if (dirty) {
++	if (dirty)
+ 		mark_page_dirty_in_slot(memslot, map->gfn);
+-		kvm_release_pfn_dirty(map->pfn);
+-	} else {
+-		kvm_release_pfn_clean(map->pfn);
+-	}
++
++	if (cache)
++		cache->dirty |= dirty;
++	else
++		kvm_release_pfn(map->pfn, dirty, NULL);
+ 
+ 	map->hva = NULL;
+ 	map->page = NULL;
+ }
+ 
+-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
++int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
++		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
+ {
+-	__kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, dirty);
++	__kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
++			cache, dirty, atomic);
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
+ 
+ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
+ {
+-	__kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, dirty);
++	__kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
++			dirty, false);
+ }
+ EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
+ 
diff --git a/patches/kernel/0011-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch b/patches/kernel/0011-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch
new file mode 100644
index 000000000000..62f967a9048a
--- /dev/null
+++ b/patches/kernel/0011-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch
@@ -0,0 +1,125 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Date: Fri, 31 Jan 2020 08:06:43 -0300
+Subject: [PATCH] x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed
+
+CVE-2019-3016
+CVE-2020-3016
+
+There is a potential race in record_steal_time() between setting
+host-local vcpu->arch.st.steal.preempted to zero (i.e. clearing
+KVM_VCPU_PREEMPTED) and propagating this value to the guest with
+kvm_write_guest_cached(). Between those two events the guest may
+still see KVM_VCPU_PREEMPTED in its copy of kvm_steal_time, set
+KVM_VCPU_FLUSH_TLB and assume that hypervisor will do the right
+thing. Which it won't.
+
+Instad of copying, we should map kvm_steal_time and that will
+guarantee atomicity of accesses to @preempted.
+
+This is part of CVE-2019-3016.
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins at oracle.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
+---
+ arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++-------------------
+ 1 file changed, 29 insertions(+), 20 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 41fee3d359ab..431e34965707 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2482,43 +2482,45 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+ 
+ static void record_steal_time(struct kvm_vcpu *vcpu)
+ {
++	struct kvm_host_map map;
++	struct kvm_steal_time *st;
++
+ 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ 		return;
+ 
+-	if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
++	/* -EAGAIN is returned in atomic context so we can just return. */
++	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
++			&map, &vcpu->arch.st.cache, false))
+ 		return;
+ 
++	st = map.hva +
++		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++
+ 	/*
+ 	 * Doing a TLB flush here, on the guest's behalf, can avoid
+ 	 * expensive IPIs.
+ 	 */
+-	if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
++	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ 		kvm_vcpu_flush_tlb(vcpu, false);
+ 
+-	if (vcpu->arch.st.steal.version & 1)
+-		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
++	vcpu->arch.st.steal.preempted = 0;
+ 
+-	vcpu->arch.st.steal.version += 1;
++	if (st->version & 1)
++		st->version += 1;  /* first time write, random junk */
+ 
+-	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
++	st->version += 1;
+ 
+ 	smp_wmb();
+ 
+-	vcpu->arch.st.steal.steal += current->sched_info.run_delay -
++	st->steal += current->sched_info.run_delay -
+ 		vcpu->arch.st.last_steal;
+ 	vcpu->arch.st.last_steal = current->sched_info.run_delay;
+ 
+-	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+-
+ 	smp_wmb();
+ 
+-	vcpu->arch.st.steal.version += 1;
++	st->version += 1;
+ 
+-	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
++	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ }
+ 
+ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+@@ -3390,18 +3392,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ 
+ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ {
++	struct kvm_host_map map;
++	struct kvm_steal_time *st;
++
+ 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ 		return;
+ 
+ 	if (vcpu->arch.st.steal.preempted)
+ 		return;
+ 
+-	vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
++	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
++			&vcpu->arch.st.cache, true))
++		return;
++
++	st = map.hva +
++		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++
++	st->preempted = vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
+ 
+-	kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-			&vcpu->arch.st.steal.preempted,
+-			offsetof(struct kvm_steal_time, preempted),
+-			sizeof(vcpu->arch.st.steal.preempted));
++	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+ }
+ 
+ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
diff --git a/patches/kernel/0012-x86-KVM-Clean-up-host-s-steal-time-structure.patch b/patches/kernel/0012-x86-KVM-Clean-up-host-s-steal-time-structure.patch
new file mode 100644
index 000000000000..1300b80997a3
--- /dev/null
+++ b/patches/kernel/0012-x86-KVM-Clean-up-host-s-steal-time-structure.patch
@@ -0,0 +1,82 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Date: Fri, 31 Jan 2020 08:06:44 -0300
+Subject: [PATCH] x86/KVM: Clean up host's steal time structure
+
+CVE-2019-3016
+CVE-2020-3016
+
+Now that we are mapping kvm_steal_time from the guest directly we
+don't need keep a copy of it in kvm_vcpu_arch.st. The same is true
+for the stime field.
+
+This is part of CVE-2019-3016.
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins at oracle.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
+---
+ arch/x86/include/asm/kvm_host.h |  3 +--
+ arch/x86/kvm/x86.c              | 11 +++--------
+ 2 files changed, 4 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 7c06343614a4..f62f4ff5f4f4 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -674,10 +674,9 @@ struct kvm_vcpu_arch {
+ 	bool pvclock_set_guest_stopped_request;
+ 
+ 	struct {
++		u8 preempted;
+ 		u64 msr_val;
+ 		u64 last_steal;
+-		struct gfn_to_hva_cache stime;
+-		struct kvm_steal_time steal;
+ 		struct gfn_to_pfn_cache cache;
+ 	} st;
+ 
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 431e34965707..c059728f8a44 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2503,7 +2503,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
+ 	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ 		kvm_vcpu_flush_tlb(vcpu, false);
+ 
+-	vcpu->arch.st.steal.preempted = 0;
++	vcpu->arch.st.preempted = 0;
+ 
+ 	if (st->version & 1)
+ 		st->version += 1;  /* first time write, random junk */
+@@ -2676,11 +2676,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ 		if (data & KVM_STEAL_RESERVED_MASK)
+ 			return 1;
+ 
+-		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
+-						data & KVM_STEAL_VALID_BITS,
+-						sizeof(struct kvm_steal_time)))
+-			return 1;
+-
+ 		vcpu->arch.st.msr_val = data;
+ 
+ 		if (!(data & KVM_MSR_ENABLED))
+@@ -3398,7 +3393,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ 		return;
+ 
+-	if (vcpu->arch.st.steal.preempted)
++	if (vcpu->arch.st.preempted)
+ 		return;
+ 
+ 	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
+@@ -3408,7 +3403,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ 	st = map.hva +
+ 		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+ 
+-	st->preempted = vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
++	st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+ 
+ 	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+ }
-- 
2.20.1





More information about the pve-devel mailing list