Message ID | 1404914112-7298-1-git-send-email-alex.bennee@linaro.org |
---|---|
State | Awaiting Upstream |
Headers | show |
On Wed, Jul 09, 2014 at 02:55:12PM +0100, Alex Bennée wrote: > To cleanly restore an SMP VM we need to ensure that the current pause > state of each vcpu is correctly recorded. Things could get confused if > the CPU starts running after migration restore completes when it was > paused before it state was captured. > > I've done this by exposing a register (currently only 1 bit used) via > the GET/SET_ONE_REG logic to pass the state between KVM and the VM > controller (e.g. QEMU). > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > --- > arch/arm64/include/uapi/asm/kvm.h | 8 +++++ > arch/arm64/kvm/guest.c | 61 ++++++++++++++++++++++++++++++++++++++- > 2 files changed, 68 insertions(+), 1 deletion(-) > > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h > index eaf54a3..8990e6e 100644 > --- a/arch/arm64/include/uapi/asm/kvm.h > +++ b/arch/arm64/include/uapi/asm/kvm.h > @@ -148,6 +148,14 @@ struct kvm_arch_memory_slot { > #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) > #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) > > +/* Power state (PSCI), not real registers */ > +#define KVM_REG_ARM_PSCI (0x0014 << KVM_REG_ARM_COPROC_SHIFT) > +#define KVM_REG_ARM_PSCI_REG(n) \ > + (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_PSCI | \ > + (n & ~KVM_REG_ARM_COPROC_MASK)) I don't understand this mask, why isn't this (n & 0xffff)) > +#define KVM_REG_ARM_PSCI_STATE KVM_REG_ARM_PSCI_REG(0) > +#define NUM_KVM_PSCI_REGS 1 > + you're missing updates to Documentation/virtual/kvm/api.txt here. > /* Device Control API: ARM VGIC */ > #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 > #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 > diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c > index 205f0d8..31d6439 100644 > --- a/arch/arm64/kvm/guest.c > +++ b/arch/arm64/kvm/guest.c > @@ -189,6 +189,54 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > } > > /** > + * PSCI State > + * > + * These are not real registers as they do not actually exist in the > + * hardware but represent the current power state of the vCPU full stop > + */ > + > +static bool is_psci_reg(u64 index) > +{ > + switch (index) { > + case KVM_REG_ARM_PSCI_STATE: > + return true; > + } > + return false; > +} > + > +static int copy_psci_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > +{ > + if (put_user(KVM_REG_ARM_PSCI_STATE, uindices)) > + return -EFAULT; > + return 0; > +} > + > +static int set_psci_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > +{ > + void __user *uaddr = (void __user *)(long)reg->addr; > + u64 val; > + int ret; > + > + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); > + if (ret != 0) > + return ret; > + > + vcpu->arch.pause = (val & 0x1) ? false : true; tabs I really need the documentation of the ABI, why is bit[0] == 1 not paused? If we are not complaining when setting the pause value to false if it was true before, then we probably also need to wake up the thread in case this is called from another thread, right? or perhaps we should just return an error if you're trying to un-pause a CPU through this interface, hmmmm. > + return 0; > +} > + > +static int get_psci_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > +{ > + void __user *uaddr = (void __user *)(long)reg->addr; > + u64 val; > + > + /* currently we only use one bit */ tabs useless comment, just include docs. > + val = vcpu->arch.pause ? 0 : 1; > + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); > +} > + > + > +/** > * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG > * > * This is for all registers. > @@ -196,7 +244,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) > { > return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) > - + NUM_TIMER_REGS; > + + NUM_TIMER_REGS + NUM_KVM_PSCI_REGS; > } > > /** > @@ -221,6 +269,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > return ret; > uindices += NUM_TIMER_REGS; > > + ret = copy_psci_indices(vcpu, uindices); indentation, please use tabs. > + if (ret) > + return ret; > + uindices += NUM_KVM_PSCI_REGS; > + > return kvm_arm_copy_sys_reg_indices(vcpu, uindices); > } > > @@ -237,6 +290,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > if (is_timer_reg(reg->id)) > return get_timer_reg(vcpu, reg); > > + if (is_psci_reg(reg->id)) > + return get_psci_reg(vcpu, reg); > + > return kvm_arm_sys_reg_get_reg(vcpu, reg); > } > > @@ -253,6 +309,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > if (is_timer_reg(reg->id)) > return set_timer_reg(vcpu, reg); > > + if (is_psci_reg(reg->id)) > + return set_psci_reg(vcpu, reg); > + > return kvm_arm_sys_reg_set_reg(vcpu, reg); > } > > -- > 2.0.1 > please check for use of tabs versus spaces, checkpatch.pl should complain. Can you add the 32-bit counterpart as part of this patch? Thanks, -Christoffer -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Christoffer Dall writes: > On Wed, Jul 09, 2014 at 02:55:12PM +0100, Alex Bennée wrote: >> To cleanly restore an SMP VM we need to ensure that the current pause >> state of each vcpu is correctly recorded. Things could get confused if >> the CPU starts running after migration restore completes when it was >> paused before it state was captured. >> <snip> >> +/* Power state (PSCI), not real registers */ >> +#define KVM_REG_ARM_PSCI (0x0014 << KVM_REG_ARM_COPROC_SHIFT) >> +#define KVM_REG_ARM_PSCI_REG(n) \ >> + (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_PSCI | \ >> + (n & ~KVM_REG_ARM_COPROC_MASK)) > > I don't understand this mask, why isn't this > (n & 0xffff)) I was trying to use the existing masks, but of course if anyone changes that it would be an ABI change so probably not worth it. > >> +#define KVM_REG_ARM_PSCI_STATE KVM_REG_ARM_PSCI_REG(0) >> +#define NUM_KVM_PSCI_REGS 1 >> + > > you're missing updates to Documentation/virtual/kvm/api.txt here. Will add. >> /* Device Control API: ARM VGIC */ >> #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 >> #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 >> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c >> index 205f0d8..31d6439 100644 >> --- a/arch/arm64/kvm/guest.c >> +++ b/arch/arm64/kvm/guest.c >> @@ -189,6 +189,54 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >> } >> >> /** >> + * PSCI State >> + * >> + * These are not real registers as they do not actually exist in the >> + * hardware but represent the current power state of the vCPU > > full stop > >> + */ >> + >> +static bool is_psci_reg(u64 index) >> +{ >> + switch (index) { >> + case KVM_REG_ARM_PSCI_STATE: >> + return true; >> + } >> + return false; >> +} >> + >> +static int copy_psci_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) >> +{ >> + if (put_user(KVM_REG_ARM_PSCI_STATE, uindices)) >> + return -EFAULT; >> + return 0; >> +} >> + >> +static int set_psci_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >> +{ >> + void __user *uaddr = (void __user *)(long)reg->addr; >> + u64 val; >> + int ret; >> + >> + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); >> + if (ret != 0) >> + return ret; >> + >> + vcpu->arch.pause = (val & 0x1) ? false : true; > > tabs Hmmm, apparently the GNU Emacs "linux" style doesn't actually enforce that. Who knew? I'll beat the editor into submission. > I really need the documentation of the ABI, why is bit[0] == 1 not > paused? I figured 1 == running, but I can switch it round if you want to to map directly to the .pause flag. > If we are not complaining when setting the pause value to false if it > was true before, then we probably also need to wake up the thread in > case this is called from another thread, right? > > or perhaps we should just return an error if you're trying to un-pause a > CPU through this interface, hmmmm. Wouldn't it be an error to mess with any register when the system is not in a quiescent state? I was assuming that the wake state is dealt with when the run loop finally restarts. <snip> > > please check for use of tabs versus spaces, checkpatch.pl should > complain. > > Can you add the 32-bit counterpart as part of this patch? Same patch? Sure. > > Thanks, > -Christoffer
On Thu, Jul 31, 2014 at 04:14:51PM +0100, Alex Bennée wrote: > > Christoffer Dall writes: > > > On Wed, Jul 09, 2014 at 02:55:12PM +0100, Alex Bennée wrote: > >> To cleanly restore an SMP VM we need to ensure that the current pause > >> state of each vcpu is correctly recorded. Things could get confused if > >> the CPU starts running after migration restore completes when it was > >> paused before it state was captured. > >> > <snip> > >> +/* Power state (PSCI), not real registers */ > >> +#define KVM_REG_ARM_PSCI (0x0014 << KVM_REG_ARM_COPROC_SHIFT) > >> +#define KVM_REG_ARM_PSCI_REG(n) \ > >> + (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_PSCI | \ > >> + (n & ~KVM_REG_ARM_COPROC_MASK)) > > > > I don't understand this mask, why isn't this > > (n & 0xffff)) > > I was trying to use the existing masks, but of course if anyone changes > that it would be an ABI change so probably not worth it. > the KVM_REG_ARM_COPROC_MASK is part of the uapi IIRC, so that's not the issue, but that mask doesn't cover all the upper bits, so it feels weird to use that to me. > > > >> +#define KVM_REG_ARM_PSCI_STATE KVM_REG_ARM_PSCI_REG(0) > >> +#define NUM_KVM_PSCI_REGS 1 > >> + > > > > you're missing updates to Documentation/virtual/kvm/api.txt here. > > Will add. > > >> /* Device Control API: ARM VGIC */ > >> #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 > >> #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 > >> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c > >> index 205f0d8..31d6439 100644 > >> --- a/arch/arm64/kvm/guest.c > >> +++ b/arch/arm64/kvm/guest.c > >> @@ -189,6 +189,54 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > >> } > >> > >> /** > >> + * PSCI State > >> + * > >> + * These are not real registers as they do not actually exist in the > >> + * hardware but represent the current power state of the vCPU > > > > full stop > > > >> + */ > >> + > >> +static bool is_psci_reg(u64 index) > >> +{ > >> + switch (index) { > >> + case KVM_REG_ARM_PSCI_STATE: > >> + return true; > >> + } > >> + return false; > >> +} > >> + > >> +static int copy_psci_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > >> +{ > >> + if (put_user(KVM_REG_ARM_PSCI_STATE, uindices)) > >> + return -EFAULT; > >> + return 0; > >> +} > >> + > >> +static int set_psci_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > >> +{ > >> + void __user *uaddr = (void __user *)(long)reg->addr; > >> + u64 val; > >> + int ret; > >> + > >> + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); > >> + if (ret != 0) > >> + return ret; > >> + > >> + vcpu->arch.pause = (val & 0x1) ? false : true; > > > > tabs > > Hmmm, apparently the GNU Emacs "linux" style doesn't actually enforce > that. Who knew? I'll beat the editor into submission. > > > I really need the documentation of the ABI, why is bit[0] == 1 not > > paused? > > I figured 1 == running, but I can switch it round if you want to to map > directly to the .pause flag. > don't care very deeply, just want something to describe it clearly. > > If we are not complaining when setting the pause value to false if it > > was true before, then we probably also need to wake up the thread in > > case this is called from another thread, right? > > > > or perhaps we should just return an error if you're trying to un-pause a > > CPU through this interface, hmmmm. > > Wouldn't it be an error to mess with any register when the system is not > in a quiescent state? I was assuming that the wake state is dealt with > when the run loop finally restarts. > The ABI doesn't really define it as an error (the ABI doesn't enforce anything right now) so the question is, does it ever make sense to clear the pause flag through this ioctl? If not, I think we should just err on the side of caution and specify in the docs that this is not supported and return an error. > <snip> > > > > please check for use of tabs versus spaces, checkpatch.pl should > > complain. > > > > Can you add the 32-bit counterpart as part of this patch? > > Same patch? Sure. really up to you if you want to split it up into two patches, but I think it's small enough that you can just create one patch. Thanks, -Christoffer -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Il 09/07/2014 15:55, Alex Bennée ha scritto: > To cleanly restore an SMP VM we need to ensure that the current pause > state of each vcpu is correctly recorded. Things could get confused if > the CPU starts running after migration restore completes when it was > paused before it state was captured. > > I've done this by exposing a register (currently only 1 bit used) via > the GET/SET_ONE_REG logic to pass the state between KVM and the VM > controller (e.g. QEMU). > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > --- > arch/arm64/include/uapi/asm/kvm.h | 8 +++++ > arch/arm64/kvm/guest.c | 61 ++++++++++++++++++++++++++++++++++++++- > 2 files changed, 68 insertions(+), 1 deletion(-) Since it's a pseudo register anyway, would it make sense to use the existing KVM_GET/SET_MP_STATE ioctl interface? How is this represented within QEMU in TCG mode? Also, how is KVM/ARM representing (and passing to QEMU) the halted state of the VCPU? Paolo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
On 31 July 2014 17:57, Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 09/07/2014 15:55, Alex Bennée ha scritto: >> To cleanly restore an SMP VM we need to ensure that the current pause >> state of each vcpu is correctly recorded. Things could get confused if >> the CPU starts running after migration restore completes when it was >> paused before it state was captured. >> >> I've done this by exposing a register (currently only 1 bit used) via >> the GET/SET_ONE_REG logic to pass the state between KVM and the VM >> controller (e.g. QEMU). >> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> --- >> arch/arm64/include/uapi/asm/kvm.h | 8 +++++ >> arch/arm64/kvm/guest.c | 61 ++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 68 insertions(+), 1 deletion(-) > > Since it's a pseudo register anyway, would it make sense to use the > existing KVM_GET/SET_MP_STATE ioctl interface? That appears to be an x86-specific thing relating to IRQ chips. > How is this represented within QEMU in TCG mode? We don't implement it in TCG yet; Rob Herring has posted patches but they had a few minor issues (didn't compile on non-Linux hosts). The answer will be 'in a "bool powered_off" flag in struct ARMCPU'. > Also, how is KVM/ARM > representing (and passing to QEMU) the halted state of the > VCPU? We don't. In ARM the equivalent of x86 HLT (which is WFI, wait-for-interrupt) is allowed to resume at any time. So we don't need to care about saving and restoring whether we were sat in a WFI at point of migration. thanks -- PMM -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Il 31/07/2014 19:04, Peter Maydell ha scritto: > On 31 July 2014 17:57, Paolo Bonzini <pbonzini@redhat.com> wrote: >> Il 09/07/2014 15:55, Alex Bennée ha scritto: >>> To cleanly restore an SMP VM we need to ensure that the current pause >>> state of each vcpu is correctly recorded. Things could get confused if >>> the CPU starts running after migration restore completes when it was >>> paused before it state was captured. >>> >>> I've done this by exposing a register (currently only 1 bit used) via >>> the GET/SET_ONE_REG logic to pass the state between KVM and the VM >>> controller (e.g. QEMU). >>> >>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >>> --- >>> arch/arm64/include/uapi/asm/kvm.h | 8 +++++ >>> arch/arm64/kvm/guest.c | 61 ++++++++++++++++++++++++++++++++++++++- >>> 2 files changed, 68 insertions(+), 1 deletion(-) >> >> Since it's a pseudo register anyway, would it make sense to use the >> existing KVM_GET/SET_MP_STATE ioctl interface? > > That appears to be an x86-specific thing relating to > IRQ chips. No, it's not. It's just the state of the CPU, s390 will be using it too. On x86 the states are uninitialized (UNINITIALIZED), stopped (INIT_RECEIVED), running (RUNNABLE), halted (HALTED). CPU 0 starts in RUNNABLE state, other CPUs start in UNINITIALIZED state. There are x86-specific cases (uninitialized) and x86-isms (the INIT_RECEIVED name), but the idea is widely applicable. >> Also, how is KVM/ARM >> representing (and passing to QEMU) the halted state of the >> VCPU? > > We don't. In ARM the equivalent of x86 HLT (which is > WFI, wait-for-interrupt) is allowed to resume at any time. > So we don't need to care about saving and restoring > whether we were sat in a WFI at point of migration. What does ARM do if you have a WFI while interrupts are disabled? On x86 after "cli;hlt" only an NMI will wake you up. With spurious wakeups, it's pretty much guaranteed that you will break such "cli;hlt" sequences. Paolo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
On 31 July 2014 18:21, Paolo Bonzini <pbonzini@redhat.com> wrote: > What does ARM do if you have a WFI while interrupts are disabled? On > x86 after "cli;hlt" only an NMI will wake you up. With spurious > wakeups, it's pretty much guaranteed that you will break such "cli;hlt" > sequences. The architecture mandates some things that *must* wake you from a WFI, but it also allows wakeups for other reasons not listed, or for no reason at all. It's perfectly valid to implement WFI as a NOP (though it would not be very good for power efficiency, obviously). Guests which don't surround WFI with a "check whether we should just go back to WFI" loop are buggy. thanks -- PMM -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
On Thu, Jul 31, 2014 at 06:36:35PM +0100, Peter Maydell wrote: > On 31 July 2014 18:21, Paolo Bonzini <pbonzini@redhat.com> wrote: > > What does ARM do if you have a WFI while interrupts are disabled? On > > x86 after "cli;hlt" only an NMI will wake you up. With spurious > > wakeups, it's pretty much guaranteed that you will break such "cli;hlt" > > sequences. > > The architecture mandates some things that *must* wake you from > a WFI, but it also allows wakeups for other reasons not listed, or > for no reason at all. It's perfectly valid to implement WFI as a NOP > (though it would not be very good for power efficiency, obviously). > Guests which don't surround WFI with a "check whether we should > just go back to WFI" loop are buggy. (and in case that wasn't clear, local_irq_disable() doesn't prevent an interrupt from waking you up from wfi, otherwise our idle code would be broken). Will -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Christoffer Dall writes: > On Thu, Jul 31, 2014 at 04:14:51PM +0100, Alex Bennée wrote: >> >> Christoffer Dall writes: >> >> > On Wed, Jul 09, 2014 at 02:55:12PM +0100, Alex Bennée wrote: >> >> To cleanly restore an SMP VM we need to ensure that the current pause >> >> state of each vcpu is correctly recorded. Things could get confused if >> >> the CPU starts running after migration restore completes when it was >> >> paused before it state was captured. >> >> >> <snip> >> >> +/* Power state (PSCI), not real registers */ >> >> +#define KVM_REG_ARM_PSCI (0x0014 << KVM_REG_ARM_COPROC_SHIFT) >> >> +#define KVM_REG_ARM_PSCI_REG(n) \ >> >> + (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_PSCI | \ >> >> + (n & ~KVM_REG_ARM_COPROC_MASK)) >> > >> > I don't understand this mask, why isn't this >> > (n & 0xffff)) >> >> I was trying to use the existing masks, but of course if anyone changes >> that it would be an ABI change so probably not worth it. >> > > the KVM_REG_ARM_COPROC_MASK is part of the uapi IIRC, so that's not the > issue, but that mask doesn't cover all the upper bits, so it feels weird > to use that to me. Yeah I missed that. I could do a: #define KVM_REG_ARM_COPROC_INDEX_MASK ((1<<KVM_REG_ARM_COPROC_SHIFT)-1) and use that. I'm generally try to avoid hardcoded numbers but I could be being a little OCD here ;-) >> > Can you add the 32-bit counterpart as part of this patch? >> >> Same patch? Sure. > > really up to you if you want to split it up into two patches, but I > think it's small enough that you can just create one patch. Given the similarity of this code between arm and arm64 I'm wondering if it's worth doing a arch/arm/kvm/guest_common.c or something to reduce the amount of copy paste stuff?
On Fri, Aug 01, 2014 at 10:11:52AM +0100, Alex Bennée wrote: > > Christoffer Dall writes: > > > On Thu, Jul 31, 2014 at 04:14:51PM +0100, Alex Bennée wrote: > >> > >> Christoffer Dall writes: > >> > >> > On Wed, Jul 09, 2014 at 02:55:12PM +0100, Alex Bennée wrote: > >> >> To cleanly restore an SMP VM we need to ensure that the current pause > >> >> state of each vcpu is correctly recorded. Things could get confused if > >> >> the CPU starts running after migration restore completes when it was > >> >> paused before it state was captured. > >> >> > >> <snip> > >> >> +/* Power state (PSCI), not real registers */ > >> >> +#define KVM_REG_ARM_PSCI (0x0014 << KVM_REG_ARM_COPROC_SHIFT) > >> >> +#define KVM_REG_ARM_PSCI_REG(n) \ > >> >> + (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_PSCI | \ > >> >> + (n & ~KVM_REG_ARM_COPROC_MASK)) > >> > > >> > I don't understand this mask, why isn't this > >> > (n & 0xffff)) > >> > >> I was trying to use the existing masks, but of course if anyone changes > >> that it would be an ABI change so probably not worth it. > >> > > > > the KVM_REG_ARM_COPROC_MASK is part of the uapi IIRC, so that's not the > > issue, but that mask doesn't cover all the upper bits, so it feels weird > > to use that to me. > > Yeah I missed that. I could do a: > > #define KVM_REG_ARM_COPROC_INDEX_MASK ((1<<KVM_REG_ARM_COPROC_SHIFT)-1) > > and use that. I'm generally try to avoid hardcoded numbers but I could > be being a little OCD here ;-) > > >> > Can you add the 32-bit counterpart as part of this patch? > >> > >> Same patch? Sure. > > > > really up to you if you want to split it up into two patches, but I > > think it's small enough that you can just create one patch. > > Given the similarity of this code between arm and arm64 I'm wondering if > it's worth doing a arch/arm/kvm/guest_common.c or something to reduce > the amount of copy paste stuff? > We've gotten by without it so far. I fear we end up with a bunch of complications due to differences in sizeof(unsigned long) etc., but I may be wrong. The amount of code that is copied should be trivial boilerplate stuff, but if you think it's worth unifying, then I'd be happy to review the patch. Thanks, -Christoffer -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index eaf54a3..8990e6e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -148,6 +148,14 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* Power state (PSCI), not real registers */ +#define KVM_REG_ARM_PSCI (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_PSCI_REG(n) \ + (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_PSCI | \ + (n & ~KVM_REG_ARM_COPROC_MASK)) +#define KVM_REG_ARM_PSCI_STATE KVM_REG_ARM_PSCI_REG(0) +#define NUM_KVM_PSCI_REGS 1 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 205f0d8..31d6439 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -189,6 +189,54 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } /** + * PSCI State + * + * These are not real registers as they do not actually exist in the + * hardware but represent the current power state of the vCPU + */ + +static bool is_psci_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_PSCI_STATE: + return true; + } + return false; +} + +static int copy_psci_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_PSCI_STATE, uindices)) + return -EFAULT; + return 0; +} + +static int set_psci_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + vcpu->arch.pause = (val & 0x1) ? false : true; + return 0; +} + +static int get_psci_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + /* currently we only use one bit */ + val = vcpu->arch.pause ? 0 : 1; + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + + +/** * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG * * This is for all registers. @@ -196,7 +244,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) - + NUM_TIMER_REGS; + + NUM_TIMER_REGS + NUM_KVM_PSCI_REGS; } /** @@ -221,6 +269,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return ret; uindices += NUM_TIMER_REGS; + ret = copy_psci_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_KVM_PSCI_REGS; + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -237,6 +290,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (is_timer_reg(reg->id)) return get_timer_reg(vcpu, reg); + if (is_psci_reg(reg->id)) + return get_psci_reg(vcpu, reg); + return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -253,6 +309,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (is_timer_reg(reg->id)) return set_timer_reg(vcpu, reg); + if (is_psci_reg(reg->id)) + return set_psci_reg(vcpu, reg); + return kvm_arm_sys_reg_set_reg(vcpu, reg); }
To cleanly restore an SMP VM we need to ensure that the current pause state of each vcpu is correctly recorded. Things could get confused if the CPU starts running after migration restore completes when it was paused before it state was captured. I've done this by exposing a register (currently only 1 bit used) via the GET/SET_ONE_REG logic to pass the state between KVM and the VM controller (e.g. QEMU). Signed-off-by: Alex Bennée <alex.bennee@linaro.org> --- arch/arm64/include/uapi/asm/kvm.h | 8 +++++ arch/arm64/kvm/guest.c | 61 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-)