@@ -1792,10 +1792,15 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
return;
}
cpu->thread_kicked = true;
- err = pthread_kill(cpu->thread->thread, SIG_IPI);
- if (err && err != ESRCH) {
- fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
- exit(1);
+
+ if (hvf_enabled()) {
+ hvf_vcpu_kick(cpu);
+ } else {
+ err = pthread_kill(cpu->thread->thread, SIG_IPI);
+ if (err && err != ESRCH) {
+ fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+ exit(1);
+ }
}
#else /* _WIN32 */
if (!qemu_cpu_is_self(cpu)) {
@@ -438,7 +438,7 @@ struct CPUState {
struct hax_vcpu_state *hax_vcpu;
- int hvf_fd;
+ unsigned hvf_fd;
/* track IOMMUs whose translations we've cached in the TCG TLB */
GArray *iommu_notifiers;
@@ -25,6 +25,7 @@ extern bool hvf_allowed;
int hvf_init_vcpu(CPUState *);
int hvf_vcpu_exec(CPUState *);
+void hvf_vcpu_kick(CPUState *);
void hvf_cpu_synchronize_state(CPUState *);
void hvf_cpu_synchronize_post_reset(CPUState *);
void hvf_cpu_synchronize_post_init(CPUState *);
@@ -1602,6 +1602,7 @@ typedef struct CPUX86State {
struct kvm_nested_state *nested_state;
#endif
#if defined(CONFIG_HVF)
+ bool hvf_in_guest;
HVFX86LazyFlags hvf_lflags;
void *hvf_mmio_buf;
#endif
@@ -73,6 +73,7 @@
#include "target/i386/cpu.h"
HVFState *hvf_state;
+pthread_key_t hvf_cpu;
static void assert_hvf_ok(hv_return_t ret)
{
@@ -458,8 +459,17 @@ void hvf_vcpu_destroy(CPUState *cpu)
assert_hvf_ok(ret);
}
-static void dummy_signal(int sig)
+static void hvf_handle_ipi(int sig)
{
+ CPUState *cpu = pthread_getspecific(hvf_cpu);
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+
+ if (!atomic_xchg(&env->hvf_in_guest, false)) {
+ wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+ rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+ | VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
+ }
}
int hvf_init_vcpu(CPUState *cpu)
@@ -474,16 +484,24 @@ int hvf_init_vcpu(CPUState *cpu)
struct sigaction sigact;
memset(&sigact, 0, sizeof(sigact));
- sigact.sa_handler = dummy_signal;
+ sigact.sa_handler = hvf_handle_ipi;
sigaction(SIG_IPI, &sigact, NULL);
+ r = pthread_setspecific(hvf_cpu, cpu);
+ if (r) {
+ fprintf(stderr, "qemu:%s: %s", __func__, strerror(r));
+ exit(1);
+ }
+
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
+ pthread_sigmask(SIG_SETMASK, &set, NULL);
init_emu();
init_decoder();
hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
+ env->hvf_in_guest = false;
env->hvf_mmio_buf = g_new(char, 4096);
r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
@@ -529,6 +547,7 @@ int hvf_init_vcpu(CPUState *cpu)
wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
+ wvmcs(cpu->hvf_fd, VMCS_PREEMPTION_TIMER_VALUE, 0);
x86cpu = X86_CPU(cpu);
x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
@@ -631,7 +650,9 @@ int hvf_vcpu_exec(CPUState *cpu)
return EXCP_HLT;
}
+ atomic_mb_set(&env->hvf_in_guest, true);
hv_return_t r = hv_vcpu_run(cpu->hvf_fd);
+ atomic_mb_set(&env->hvf_in_guest, false);
assert_hvf_ok(r);
/* handle VMEXIT */
@@ -774,6 +795,10 @@ int hvf_vcpu_exec(CPUState *cpu)
vmx_clear_nmi_window_exiting(cpu);
ret = EXCP_INTERRUPT;
break;
+ case EXIT_REASON_VMX_PREEMPT:
+ wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+ rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+ & ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
case EXIT_REASON_EXT_INTR:
/* force exit and allow io handling */
ret = EXCP_INTERRUPT;
@@ -872,6 +897,22 @@ int hvf_vcpu_exec(CPUState *cpu)
return ret;
}
+void hvf_vcpu_kick(CPUState *cpu)
+{
+ hv_return_t err;
+
+ err = pthread_kill(cpu->thread->thread, SIG_IPI);
+ if (err) {
+ fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+ exit(1);
+ }
+ err = hv_vcpu_interrupt(&cpu->hvf_fd, 1);
+ if (err) {
+ fprintf(stderr, "qemu:%s error %#x\n", __func__, err);
+ exit(1);
+ }
+}
+
bool hvf_allowed;
static int hvf_accel_init(MachineState *ms)
@@ -880,6 +921,14 @@ static int hvf_accel_init(MachineState *ms)
hv_return_t ret;
HVFState *s;
+ /* TODO add pthread_key_delete() */
+ ret = pthread_key_create(&hvf_cpu, NULL);
+ if (ret) {
+ fprintf(stderr, "qemu:%s: %s", __func__, strerror(ret));
+ exit(1);
+ }
+
+ /* TODO add hv_vm_unmap() and hv_vm_destroy() */
ret = hv_vm_create(HV_VM_DEFAULT);
assert_hvf_ok(ret);
@@ -349,6 +349,7 @@
#define VMCS_PIN_BASED_CTLS_EXTINT (1 << 0)
#define VMCS_PIN_BASED_CTLS_NMI (1 << 3)
#define VMCS_PIN_BASED_CTLS_VNMI (1 << 5)
+#define VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER (1 << 6)
#define VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING (1 << 2)
#define VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET (1 << 3)
HVF doesn't have a CPU kick and without it it's not possible to perform an action on CPU thread until a VMEXIT happens. The kick is also needed for timely interrupt delivery. Existing implementation of CPU kick sends SIG_IPI (aka SIGUSR1) to vCPU thread, but it's different from what hv_vcpu_interrupt does. The latter one results in invocation of mp_cpus_kick() in XNU kernel [1]. mp_cpus_kick() sends an IPI through the host LAPIC to the HVF vCPU. And the kick interrupt leads to VM exit because "external-interrupt exiting” VM-execution control is enabled for HVF. hv_vcpu_interrupt() would have no effect if it's delivered when vCPU is outside of a guest, therefore to avoid kick loss it's complemented with a SIG_IPI handler and zero VMX-preemption timer. If the kick happens outside of hv_vcpu_run(), the signal handler will re-queue the kick by enabling zero VMX-preemption timer for the next hv_vcpu_run(). There's still a small chance of kick loss, on user-to-kernel border between atomic_mb_set's just before the entry to hv_vcpu_run and just after it. Also correct type of hvf_fd to the type of hv_vcpuid_t to avoid compilation warnings. 1. https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/i386/mp.c Cc: Cameron Esfahani <dirty@apple.com> Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com> --- This is a rework of a kick patch in v2 of the series (https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg09899.html) based on the comments from Paolo. Unfortunately the race is still there, may be the race window is just a bit smaller. Changes since v2: - Reworked workaround to minimize kick loss race. Use signals to interrupt vCPU thread outside of hv_vcpu_run() and turn-on/turn-off VMX-preemeption timer, while timer value is always zero. v3 also assumes that VMX-preemption timer control is always available on the hardware that supports HVF. Changes since v1: - Reduced kick loss race (Paolo) and removed SIG_IPI blocking cpus.c | 13 +++++++---- include/hw/core/cpu.h | 2 +- include/sysemu/hvf.h | 1 + target/i386/cpu.h | 1 + target/i386/hvf/hvf.c | 53 ++++++++++++++++++++++++++++++++++++++++-- target/i386/hvf/vmcs.h | 1 + 6 files changed, 64 insertions(+), 7 deletions(-)