[pve-devel] [PATCH] kvm mttr fixes (pci passthrough)

Alexandre Derumier aderumier at odiso.com
Wed Dec 30 09:58:05 CET 2015


Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
---
 0001-mttr.patch | 34 +++++++++++++++++++++++++
 0002-mttr.patch | 61 ++++++++++++++++++++++++++++++++++++++++++++
 0003-mttr.patch | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 Makefile        |  5 ++++
 4 files changed, 178 insertions(+)
 create mode 100644 0001-mttr.patch
 create mode 100644 0002-mttr.patch
 create mode 100644 0003-mttr.patch

diff --git a/0001-mttr.patch b/0001-mttr.patch
new file mode 100644
index 0000000..6ffbe38
--- /dev/null
+++ b/0001-mttr.patch
@@ -0,0 +1,34 @@
+From a7f2d7865720ff13d5b0f2a3bb1fd80dc3d7a73f Mon Sep 17 00:00:00 2001
+From: Alexis Dambricourt <alexis.dambricourt at gmail.com>
+Date: Mon, 14 Dec 2015 15:39:34 +0100
+Subject: KVM: MTRR: fix fixed MTRR segment look up
+
+This fixes the slow-down of VM running with pci-passthrough, since some MTRR
+range changed from MTRR_TYPE_WRBACK to MTRR_TYPE_UNCACHABLE.  Memory in the
+0K-640K range was incorrectly treated as uncacheable.
+
+Fixes: f7bfb57b3e89ff89c0da9f93dedab89f68d6ca27
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107561
+Cc: qemu-stable at nongnu.org
+Signed-off-by: Alexis Dambricourt <alexis.dambricourt at gmail.com>
+[Use correct BZ for "Fixes" annotation.  - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/kvm/mtrr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
+index 9e8bf13..adc54e1 100644
+--- a/arch/x86/kvm/mtrr.c
++++ b/arch/x86/kvm/mtrr.c
+@@ -267,7 +267,7 @@ static int fixed_mtrr_addr_to_seg(u64 addr)
+ 
+ 	for (seg = 0; seg < seg_num; seg++) {
+ 		mtrr_seg = &fixed_seg_table[seg];
+-		if (mtrr_seg->start >= addr && addr < mtrr_seg->end)
++		if (mtrr_seg->start <= addr && addr < mtrr_seg->end)
+ 			return seg;
+ 	}
+ 
+-- 
+cgit v0.11.2
diff --git a/0002-mttr.patch b/0002-mttr.patch
new file mode 100644
index 0000000..fe1c420
--- /dev/null
+++ b/0002-mttr.patch
@@ -0,0 +1,61 @@
+From fa7c4ebd5ae0c22f9908436303106a9ffcf0cf42 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini at redhat.com>
+Date: Mon, 14 Dec 2015 16:57:31 +0100
+Subject: KVM: MTRR: observe maxphyaddr from guest CPUID, not host
+
+Conversion of MTRRs to ranges used the maxphyaddr from the boot CPU.
+This is wrong, because var_mtrr_range's mask variable then is discontiguous
+(like FF00FFFF000, where the first run of 0s corresponds to the bits
+between host and guest maxphyaddr).  Instead always set up the masks
+to be full 64-bit values---we know that the reserved bits at the top
+are zero, and we can restore them when reading the MSR.  This way
+var_mtrr_range gets a mask that just works.
+
+Fixes: a13842dc668b40daef4327294a6d3bdc8bd30276
+Cc: qemu-stable at nongnu.org
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107561
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/kvm/mtrr.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
+index adc54e1..7747b6d 100644
+--- a/arch/x86/kvm/mtrr.c
++++ b/arch/x86/kvm/mtrr.c
+@@ -300,7 +300,6 @@ static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
+ 	*start = range->base & PAGE_MASK;
+ 
+ 	mask = range->mask & PAGE_MASK;
+-	mask |= ~0ULL << boot_cpu_data.x86_phys_bits;
+ 
+ 	/* This cannot overflow because writing to the reserved bits of
+ 	 * variable MTRRs causes a #GP.
+@@ -356,10 +355,14 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+ 	if (var_mtrr_range_is_valid(cur))
+ 		list_del(&mtrr_state->var_ranges[index].node);
+ 
++	/* Extend the mask with all 1 bits to the left, since those
++	 * bits must implicitly be 0.  The bits are then cleared
++	 * when reading them.
++	 */
+ 	if (!is_mtrr_mask)
+ 		cur->base = data;
+ 	else
+-		cur->mask = data;
++		cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu));
+ 
+ 	/* add it to the list if it's enabled. */
+ 	if (var_mtrr_range_is_valid(cur)) {
+@@ -426,6 +429,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+ 			*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
+ 		else
+ 			*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
++
++		*pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1;
+ 	}
+ 
+ 	return 0;
+-- 
+cgit v0.11.2
+
diff --git a/0003-mttr.patch b/0003-mttr.patch
new file mode 100644
index 0000000..61606e4
--- /dev/null
+++ b/0003-mttr.patch
@@ -0,0 +1,78 @@
+From e24dea2afc6a0852983dc741072d8e96155e13f5 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini at redhat.com>
+Date: Tue, 22 Dec 2015 15:20:00 +0100
+Subject: KVM: MTRR: treat memory as writeback if MTRR is disabled in guest
+ CPUID
+
+Virtual machines can be run with CPUID such that there are no MTRRs.
+In that case, the firmware will never enable MTRRs and it is obviously
+undesirable to run the guest entirely with UC memory.  Check out guest
+CPUID, and use WB memory if MTRR do not exist.
+
+Cc: qemu-stable at nongnu.org
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107561
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/kvm/cpuid.h |  8 ++++++++
+ arch/x86/kvm/mtrr.c  | 14 +++++++++++---
+ 2 files changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
+index 06332cb..3f5c48d 100644
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -38,6 +38,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+ 	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
+ }
+ 
++static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
++{
++	struct kvm_cpuid_entry2 *best;
++
++	best = kvm_find_cpuid_entry(vcpu, 1, 0);
++	return best && (best->edx & bit(X86_FEATURE_MTRR));
++}
++
+ static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
+ {
+ 	struct kvm_cpuid_entry2 *best;
+diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
+index 7747b6d..3f8c732 100644
+--- a/arch/x86/kvm/mtrr.c
++++ b/arch/x86/kvm/mtrr.c
+@@ -120,14 +120,22 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
+ 	return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
+ }
+ 
+-static u8 mtrr_disabled_type(void)
++static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
+ {
+ 	/*
+ 	 * Intel SDM 11.11.2.2: all MTRRs are disabled when
+ 	 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
+ 	 * memory type is applied to all of physical memory.
++	 *
++	 * However, virtual machines can be run with CPUID such that
++	 * there are no MTRRs.  In that case, the firmware will never
++	 * enable MTRRs and it is obviously undesirable to run the
++	 * guest entirely with UC memory and we use WB.
+ 	 */
+-	return MTRR_TYPE_UNCACHABLE;
++	if (guest_cpuid_has_mtrr(vcpu))
++		return MTRR_TYPE_UNCACHABLE;
++	else
++		return MTRR_TYPE_WRBACK;
+ }
+ 
+ /*
+@@ -675,7 +683,7 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+ 	}
+ 
+ 	if (iter.mtrr_disabled)
+-		return mtrr_disabled_type();
++		return mtrr_disabled_type(vcpu);
+ 
+ 	/* not contained in any MTRRs. */
+ 	if (type == -1)
+-- 
+cgit v0.11.2
diff --git a/Makefile b/Makefile
index ce86bc2..ca3c00a 100644
--- a/Makefile
+++ b/Makefile
@@ -249,6 +249,11 @@ ${KERNEL_SRC}/README ${KERNEL_CFG_ORG}: ${KERNELSRCTAR}
 	cd ${KERNEL_SRC}; patch -p1 <../0007-aacraid-ioctl-fix.patch
 	cd ${KERNEL_SRC}; patch -p1 <../0008-aacraid-use-pci-enable-msix-range.patch
 	cd ${KERNEL_SRC}; patch -p1 <../0009-aacraid-update-driver-version.patch
+	# backport MTTR kvm fixed (pci passthrough bug) from kernel 4.4rc7
+	cd ${KERNEL_SRC}; patch -p1 <../0001-mttr.patch
+	cd ${KERNEL_SRC}; patch -p1 <../0002-mttr.patch
+	cd ${KERNEL_SRC}; patch -p1 <../0003-mttr.patch
+
 	sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
 	touch $@
 
-- 
2.1.4




More information about the pve-devel mailing list