KVM: nVMX: Eliminate vmcs02 pool
commit de3a0021a60635de96aa92713c1a31a96747d72c upstream. The potential performance advantages of a vmcs02 pool have never been realized. To simplify the code, eliminate the pool. Instead, a single vmcs02 is allocated per VCPU when the VCPU enters VMX operation. Signed-off-by: Jim Mattson <jmattson@google.com> Signed-off-by: Mark Kanda <mark.kanda@oracle.com> Reviewed-by: Ameya More <ameya.more@oracle.com> Reviewed-by: David Hildenbrand <david@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [bwh: Backported to 4.4: - No loaded_vmcs::shadow_vmcs field to initialise - Adjust context] Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
1bec1a14bb
commit
81cd492667
1 changed files with 22 additions and 122 deletions
|
@ -172,7 +172,6 @@ module_param(ple_window_max, int, S_IRUGO);
|
||||||
extern const ulong vmx_return;
|
extern const ulong vmx_return;
|
||||||
|
|
||||||
#define NR_AUTOLOAD_MSRS 8
|
#define NR_AUTOLOAD_MSRS 8
|
||||||
#define VMCS02_POOL_SIZE 1
|
|
||||||
|
|
||||||
struct vmcs {
|
struct vmcs {
|
||||||
u32 revision_id;
|
u32 revision_id;
|
||||||
|
@ -205,7 +204,7 @@ struct shared_msr_entry {
|
||||||
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
|
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
|
||||||
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
|
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
|
||||||
* More than one of these structures may exist, if L1 runs multiple L2 guests.
|
* More than one of these structures may exist, if L1 runs multiple L2 guests.
|
||||||
* nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
|
* nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
|
||||||
* underlying hardware which will be used to run L2.
|
* underlying hardware which will be used to run L2.
|
||||||
* This structure is packed to ensure that its layout is identical across
|
* This structure is packed to ensure that its layout is identical across
|
||||||
* machines (necessary for live migration).
|
* machines (necessary for live migration).
|
||||||
|
@ -384,13 +383,6 @@ struct __packed vmcs12 {
|
||||||
*/
|
*/
|
||||||
#define VMCS12_SIZE 0x1000
|
#define VMCS12_SIZE 0x1000
|
||||||
|
|
||||||
/* Used to remember the last vmcs02 used for some recently used vmcs12s */
|
|
||||||
struct vmcs02_list {
|
|
||||||
struct list_head list;
|
|
||||||
gpa_t vmptr;
|
|
||||||
struct loaded_vmcs vmcs02;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
|
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
|
||||||
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
|
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
|
||||||
|
@ -412,16 +404,16 @@ struct nested_vmx {
|
||||||
*/
|
*/
|
||||||
bool sync_shadow_vmcs;
|
bool sync_shadow_vmcs;
|
||||||
|
|
||||||
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
|
|
||||||
struct list_head vmcs02_pool;
|
|
||||||
int vmcs02_num;
|
|
||||||
u64 vmcs01_tsc_offset;
|
u64 vmcs01_tsc_offset;
|
||||||
bool change_vmcs01_virtual_x2apic_mode;
|
bool change_vmcs01_virtual_x2apic_mode;
|
||||||
/* L2 must run next, and mustn't decide to exit to L1. */
|
/* L2 must run next, and mustn't decide to exit to L1. */
|
||||||
bool nested_run_pending;
|
bool nested_run_pending;
|
||||||
|
|
||||||
|
struct loaded_vmcs vmcs02;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Guest pages referred to in vmcs02 with host-physical pointers, so
|
* Guest pages referred to in the vmcs02 with host-physical
|
||||||
* we must keep them pinned while L2 runs.
|
* pointers, so we must keep them pinned while L2 runs.
|
||||||
*/
|
*/
|
||||||
struct page *apic_access_page;
|
struct page *apic_access_page;
|
||||||
struct page *virtual_apic_page;
|
struct page *virtual_apic_page;
|
||||||
|
@ -6434,93 +6426,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
|
||||||
return handle_nop(vcpu);
|
return handle_nop(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
|
|
||||||
* We could reuse a single VMCS for all the L2 guests, but we also want the
|
|
||||||
* option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
|
|
||||||
* allows keeping them loaded on the processor, and in the future will allow
|
|
||||||
* optimizations where prepare_vmcs02 doesn't need to set all the fields on
|
|
||||||
* every entry if they never change.
|
|
||||||
* So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
|
|
||||||
* (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
|
|
||||||
*
|
|
||||||
* The following functions allocate and free a vmcs02 in this pool.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
|
|
||||||
static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
|
|
||||||
{
|
|
||||||
struct vmcs02_list *item;
|
|
||||||
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
|
|
||||||
if (item->vmptr == vmx->nested.current_vmptr) {
|
|
||||||
list_move(&item->list, &vmx->nested.vmcs02_pool);
|
|
||||||
return &item->vmcs02;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
|
|
||||||
/* Recycle the least recently used VMCS. */
|
|
||||||
item = list_entry(vmx->nested.vmcs02_pool.prev,
|
|
||||||
struct vmcs02_list, list);
|
|
||||||
item->vmptr = vmx->nested.current_vmptr;
|
|
||||||
list_move(&item->list, &vmx->nested.vmcs02_pool);
|
|
||||||
return &item->vmcs02;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Create a new VMCS */
|
|
||||||
item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
|
|
||||||
if (!item)
|
|
||||||
return NULL;
|
|
||||||
item->vmcs02.vmcs = alloc_vmcs();
|
|
||||||
if (!item->vmcs02.vmcs) {
|
|
||||||
kfree(item);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
loaded_vmcs_init(&item->vmcs02);
|
|
||||||
item->vmptr = vmx->nested.current_vmptr;
|
|
||||||
list_add(&(item->list), &(vmx->nested.vmcs02_pool));
|
|
||||||
vmx->nested.vmcs02_num++;
|
|
||||||
return &item->vmcs02;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
|
|
||||||
static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
|
|
||||||
{
|
|
||||||
struct vmcs02_list *item;
|
|
||||||
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
|
|
||||||
if (item->vmptr == vmptr) {
|
|
||||||
free_loaded_vmcs(&item->vmcs02);
|
|
||||||
list_del(&item->list);
|
|
||||||
kfree(item);
|
|
||||||
vmx->nested.vmcs02_num--;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Free all VMCSs saved for this vcpu, except the one pointed by
|
|
||||||
* vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
|
|
||||||
* must be &vmx->vmcs01.
|
|
||||||
*/
|
|
||||||
static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
|
|
||||||
{
|
|
||||||
struct vmcs02_list *item, *n;
|
|
||||||
|
|
||||||
WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
|
|
||||||
list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
|
|
||||||
/*
|
|
||||||
* Something will leak if the above WARN triggers. Better than
|
|
||||||
* a use-after-free.
|
|
||||||
*/
|
|
||||||
if (vmx->loaded_vmcs == &item->vmcs02)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
free_loaded_vmcs(&item->vmcs02);
|
|
||||||
list_del(&item->list);
|
|
||||||
kfree(item);
|
|
||||||
vmx->nested.vmcs02_num--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
|
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
|
||||||
* set the success or error code of an emulated VMX instruction, as specified
|
* set the success or error code of an emulated VMX instruction, as specified
|
||||||
|
@ -6833,6 +6738,11 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vmx->nested.vmcs02.vmcs = alloc_vmcs();
|
||||||
|
if (!vmx->nested.vmcs02.vmcs)
|
||||||
|
goto out_vmcs02;
|
||||||
|
loaded_vmcs_init(&vmx->nested.vmcs02);
|
||||||
|
|
||||||
if (cpu_has_vmx_msr_bitmap()) {
|
if (cpu_has_vmx_msr_bitmap()) {
|
||||||
vmx->nested.msr_bitmap =
|
vmx->nested.msr_bitmap =
|
||||||
(unsigned long *)__get_free_page(GFP_KERNEL);
|
(unsigned long *)__get_free_page(GFP_KERNEL);
|
||||||
|
@ -6851,9 +6761,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
||||||
vmx->nested.current_shadow_vmcs = shadow_vmcs;
|
vmx->nested.current_shadow_vmcs = shadow_vmcs;
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
|
|
||||||
vmx->nested.vmcs02_num = 0;
|
|
||||||
|
|
||||||
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
|
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
|
||||||
HRTIMER_MODE_REL);
|
HRTIMER_MODE_REL);
|
||||||
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
|
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
|
||||||
|
@ -6870,6 +6777,9 @@ out_shadow_vmcs:
|
||||||
free_page((unsigned long)vmx->nested.msr_bitmap);
|
free_page((unsigned long)vmx->nested.msr_bitmap);
|
||||||
|
|
||||||
out_msr_bitmap:
|
out_msr_bitmap:
|
||||||
|
free_loaded_vmcs(&vmx->nested.vmcs02);
|
||||||
|
|
||||||
|
out_vmcs02:
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6946,7 +6856,7 @@ static void free_nested(struct vcpu_vmx *vmx)
|
||||||
}
|
}
|
||||||
if (enable_shadow_vmcs)
|
if (enable_shadow_vmcs)
|
||||||
free_vmcs(vmx->nested.current_shadow_vmcs);
|
free_vmcs(vmx->nested.current_shadow_vmcs);
|
||||||
/* Unpin physical memory we referred to in current vmcs02 */
|
/* Unpin physical memory we referred to in the vmcs02 */
|
||||||
if (vmx->nested.apic_access_page) {
|
if (vmx->nested.apic_access_page) {
|
||||||
nested_release_page(vmx->nested.apic_access_page);
|
nested_release_page(vmx->nested.apic_access_page);
|
||||||
vmx->nested.apic_access_page = NULL;
|
vmx->nested.apic_access_page = NULL;
|
||||||
|
@ -6962,7 +6872,7 @@ static void free_nested(struct vcpu_vmx *vmx)
|
||||||
vmx->nested.pi_desc = NULL;
|
vmx->nested.pi_desc = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
nested_free_all_saved_vmcss(vmx);
|
free_loaded_vmcs(&vmx->nested.vmcs02);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Emulate the VMXOFF instruction */
|
/* Emulate the VMXOFF instruction */
|
||||||
|
@ -6996,8 +6906,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
|
||||||
vmptr + offsetof(struct vmcs12, launch_state),
|
vmptr + offsetof(struct vmcs12, launch_state),
|
||||||
&zero, sizeof(zero));
|
&zero, sizeof(zero));
|
||||||
|
|
||||||
nested_free_vmcs02(vmx, vmptr);
|
|
||||||
|
|
||||||
skip_emulated_instruction(vcpu);
|
skip_emulated_instruction(vcpu);
|
||||||
nested_vmx_succeed(vcpu);
|
nested_vmx_succeed(vcpu);
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -7784,10 +7692,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The host physical addresses of some pages of guest memory
|
* The host physical addresses of some pages of guest memory
|
||||||
* are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
|
* are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
|
||||||
* may write to these pages via their host physical address while
|
* Page). The CPU may write to these pages via their host
|
||||||
* L2 is running, bypassing any address-translation-based dirty
|
* physical address while L2 is running, bypassing any
|
||||||
* tracking (e.g. EPT write protection).
|
* address-translation-based dirty tracking (e.g. EPT write
|
||||||
|
* protection).
|
||||||
*
|
*
|
||||||
* Mark them dirty on every exit from L2 to prevent them from
|
* Mark them dirty on every exit from L2 to prevent them from
|
||||||
* getting out of sync with dirty tracking.
|
* getting out of sync with dirty tracking.
|
||||||
|
@ -9889,7 +9798,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
||||||
struct vmcs12 *vmcs12;
|
struct vmcs12 *vmcs12;
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
int cpu;
|
int cpu;
|
||||||
struct loaded_vmcs *vmcs02;
|
|
||||||
bool ia32e;
|
bool ia32e;
|
||||||
u32 msr_entry_idx;
|
u32 msr_entry_idx;
|
||||||
|
|
||||||
|
@ -10029,10 +9937,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
||||||
* the nested entry.
|
* the nested entry.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
vmcs02 = nested_get_current_vmcs02(vmx);
|
|
||||||
if (!vmcs02)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
enter_guest_mode(vcpu);
|
enter_guest_mode(vcpu);
|
||||||
|
|
||||||
vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
|
vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
|
||||||
|
@ -10041,7 +9945,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
||||||
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||||
|
|
||||||
cpu = get_cpu();
|
cpu = get_cpu();
|
||||||
vmx->loaded_vmcs = vmcs02;
|
vmx->loaded_vmcs = &vmx->nested.vmcs02;
|
||||||
vmx_vcpu_put(vcpu);
|
vmx_vcpu_put(vcpu);
|
||||||
vmx_vcpu_load(vcpu, cpu);
|
vmx_vcpu_load(vcpu, cpu);
|
||||||
vcpu->cpu = cpu;
|
vcpu->cpu = cpu;
|
||||||
|
@ -10553,10 +10457,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||||
vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
|
vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
|
||||||
vmx_segment_cache_clear(vmx);
|
vmx_segment_cache_clear(vmx);
|
||||||
|
|
||||||
/* if no vmcs02 cache requested, remove the one we used */
|
|
||||||
if (VMCS02_POOL_SIZE == 0)
|
|
||||||
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
|
|
||||||
|
|
||||||
load_vmcs12_host_state(vcpu, vmcs12);
|
load_vmcs12_host_state(vcpu, vmcs12);
|
||||||
|
|
||||||
/* Update TSC_OFFSET if TSC was changed while L2 ran */
|
/* Update TSC_OFFSET if TSC was changed while L2 ran */
|
||||||
|
|
Loading…
Add table
Reference in a new issue