diff mbox series

[v5,07/10] Drivers: hv: Introduce per-cpu event ring tail

Message ID 1740611284-27506-8-git-send-email-nunodasneves@linux.microsoft.com
State New
Headers show
Series Introduce /dev/mshv root partition driver | expand

Commit Message

Nuno Das Neves Feb. 26, 2025, 11:08 p.m. UTC
Add a pointer hv_synic_eventring_tail to track the tail pointer for the
SynIC event ring buffer for each SINT.

This will be used by the mshv driver, but must be tracked independently
since the driver module could be removed and re-inserted.

Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Reviewed-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/hv_common.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

Comments

Stanislav Kinsburskii Feb. 26, 2025, 11:39 p.m. UTC | #1
On Wed, Feb 26, 2025 at 03:08:01PM -0800, Nuno Das Neves wrote:
> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
> SynIC event ring buffer for each SINT.
> 
> This will be used by the mshv driver, but must be tracked independently
> since the driver module could be removed and re-inserted.
> 
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> Reviewed-by: Wei Liu <wei.liu@kernel.org>
> ---
>  drivers/hv/hv_common.c | 34 ++++++++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 252fd66ad4db..2763cb6d3678 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -460,6 +478,7 @@ void __init ms_hyperv_late_init(void)
>  int hv_common_cpu_init(unsigned int cpu)
>  {
>  	void **inputarg, **outputarg;
> +	u8 **synic_eventring_tail;
>  	u64 msr_vp_index;
>  	gfp_t flags;
>  	const int pgcount = hv_output_page_exists() ? 2 : 1;
> @@ -472,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu)
>  	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
>  
>  	/*
> -	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
> -	 * allocated if this CPU was previously online and then taken offline
> +	 * The per-cpu memory is already allocated if this CPU was previously
> +	 * online and then taken offline
>  	 */
>  	if (!*inputarg) {
>  		mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
> @@ -485,6 +504,17 @@ int hv_common_cpu_init(unsigned int cpu)
>  			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
>  		}
>  
> +		if (hv_root_partition()) {
> +			synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
> +			*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
> +							sizeof(u8), flags);
> +

Redundant empty line ^^^
> +			if (unlikely(!*synic_eventring_tail)) {
> +				kfree(mem);
> +				return -ENOMEM;
> +			}
> +		}
> +
>  		if (!ms_hyperv.paravisor_present &&
>  		    (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
>  			ret = set_memory_decrypted((unsigned long)mem, pgcount);

Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>

> -- 
> 2.34.1
>
Michael Kelley March 7, 2025, 5:02 p.m. UTC | #2
From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February 26, 2025 3:08 PM
> 
> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
> SynIC event ring buffer for each SINT.
> 
> This will be used by the mshv driver, but must be tracked independently
> since the driver module could be removed and re-inserted.
> 
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> Reviewed-by: Wei Liu <wei.liu@kernel.org>
> ---
>  drivers/hv/hv_common.c | 34 ++++++++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 252fd66ad4db..2763cb6d3678 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -68,6 +68,16 @@ static void hv_kmsg_dump_unregister(void);
> 
>  static struct ctl_table_header *hv_ctl_table_hdr;
> 
> +/*
> + * Per-cpu array holding the tail pointer for the SynIC event ring buffer
> + * for each SINT.
> + *
> + * We cannot maintain this in mshv driver because the tail pointer should
> + * persist even if the mshv driver is unloaded.
> + */
> +u8 __percpu **hv_synic_eventring_tail;

I think the "__percpu" is in the wrong place here. This placement
is likely to cause errors from the "sparse" tool.  It should be

u8 * __percpu *hv_synic_eventring_tail;

See the way hyperv_pcpu_input_arg, for example, is defined.  And
see commit db3c65bc3a13 where I fixed hyperv_pcpu_input_arg.

> +EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);

The "extern" declaration for this variable is in Patch 10 of the series
in drivers/hv/mshv_root.h. I guess that's OK, but I would normally
expect to find such a declaration in the header file associated with
where the variable is defined, which in this case is mshyperv.h.
Perhaps you are trying to restrict its usage to just mshv?

> +
>  /*
>   * Hyper-V specific initialization and shutdown code that is
>   * common across all architectures.  Called from architecture
> @@ -90,6 +100,9 @@ void __init hv_common_free(void)
> 
>  	free_percpu(hyperv_pcpu_input_arg);
>  	hyperv_pcpu_input_arg = NULL;
> +
> +	free_percpu(hv_synic_eventring_tail);
> +	hv_synic_eventring_tail = NULL;
>  }
> 
>  /*
> @@ -372,6 +385,11 @@ int __init hv_common_init(void)
>  		BUG_ON(!hyperv_pcpu_output_arg);
>  	}
> 
> +	if (hv_root_partition()) {
> +		hv_synic_eventring_tail = alloc_percpu(u8 *);
> +		BUG_ON(hv_synic_eventring_tail == NULL);
> +	}
> +
>  	hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
>  				    GFP_KERNEL);
>  	if (!hv_vp_index) {
> @@ -460,6 +478,7 @@ void __init ms_hyperv_late_init(void)
>  int hv_common_cpu_init(unsigned int cpu)
>  {
>  	void **inputarg, **outputarg;
> +	u8 **synic_eventring_tail;
>  	u64 msr_vp_index;
>  	gfp_t flags;
>  	const int pgcount = hv_output_page_exists() ? 2 : 1;
> @@ -472,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu)
>  	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> 
>  	/*
> -	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
> -	 * allocated if this CPU was previously online and then taken offline
> +	 * The per-cpu memory is already allocated if this CPU was previously
> +	 * online and then taken offline
>  	 */
>  	if (!*inputarg) {
>  		mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
> @@ -485,6 +504,17 @@ int hv_common_cpu_init(unsigned int cpu)
>  			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
>  		}
> 
> +		if (hv_root_partition()) {
> +			synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
> +			*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
> +							sizeof(u8), flags);
> +
> +			if (unlikely(!*synic_eventring_tail)) {
> +				kfree(mem);
> +				return -ENOMEM;
> +			}
> +		}
> +

Adding this code under the "if(!*inputarg)" implicitly ties the lifecycle of
synic_eventring_tail to the lifecycle of hyperv_pcpu_input_arg and
hyperv_pcpu_output_arg. Is there some logical relationship between the
two that warrants tying the lifecycles together (other than just both being
per-cpu)?  hyperv_pcpu_input_arg and hyperv_pcpu_output_arg have an
unusual lifecycle management in that they aren't freed when a CPU goes
offline, as described in the comment in hv_common_cpu_die(). Does
synic_eventring_tail also need that same unusual lifecycle?

Assuming there's no logical relationship, I'm thinking synic_eventring_tail
should be managed independent of the other two. If it does need the
unusual lifecycle, make sure to add a comment in hv_common_cpu_die()
explaining why. If it doesn't need the unusual lifecycle, maybe just do
the normal thing of allocating it in hv_common_cpu_init() and freeing
it in hv_common_cpu_die().

The code as written in your patch isn't wrong and would work OK. But
the structure implies a relationship with hyperv_pcpu_*_arg that I
suspect doesn't exist.

Michael

>  		if (!ms_hyperv.paravisor_present &&
>  		    (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
>  			ret = set_memory_decrypted((unsigned long)mem, pgcount);
> --
> 2.34.1
Nuno Das Neves March 7, 2025, 10:06 p.m. UTC | #3
On 3/7/2025 9:02 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February 26, 2025 3:08 PM
>>
>> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
>> SynIC event ring buffer for each SINT.
>>
>> This will be used by the mshv driver, but must be tracked independently
>> since the driver module could be removed and re-inserted.
>>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> Reviewed-by: Wei Liu <wei.liu@kernel.org>
>> ---
>>  drivers/hv/hv_common.c | 34 ++++++++++++++++++++++++++++++++--
>>  1 file changed, 32 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 252fd66ad4db..2763cb6d3678 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -68,6 +68,16 @@ static void hv_kmsg_dump_unregister(void);
>>
>>  static struct ctl_table_header *hv_ctl_table_hdr;
>>
>> +/*
>> + * Per-cpu array holding the tail pointer for the SynIC event ring buffer
>> + * for each SINT.
>> + *
>> + * We cannot maintain this in mshv driver because the tail pointer should
>> + * persist even if the mshv driver is unloaded.
>> + */
>> +u8 __percpu **hv_synic_eventring_tail;
> 
> I think the "__percpu" is in the wrong place here. This placement
> is likely to cause errors from the "sparse" tool.  It should be
> 
> u8 * __percpu *hv_synic_eventring_tail;
> 
> See the way hyperv_pcpu_input_arg, for example, is defined.  And
> see commit db3c65bc3a13 where I fixed hyperv_pcpu_input_arg.
> 
Thanks. I'll fix it.

>> +EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
> 
> The "extern" declaration for this variable is in Patch 10 of the series
> in drivers/hv/mshv_root.h. I guess that's OK, but I would normally
> expect to find such a declaration in the header file associated with
> where the variable is defined, which in this case is mshyperv.h.
> Perhaps you are trying to restrict its usage to just mshv?
> 
Yes, that's the idea - it should only be used by the driver.

>> +
>>  /*
>>   * Hyper-V specific initialization and shutdown code that is
>>   * common across all architectures.  Called from architecture
>> @@ -90,6 +100,9 @@ void __init hv_common_free(void)
>>
>>  	free_percpu(hyperv_pcpu_input_arg);
>>  	hyperv_pcpu_input_arg = NULL;
>> +
>> +	free_percpu(hv_synic_eventring_tail);
>> +	hv_synic_eventring_tail = NULL;
>>  }
>>
>>  /*
>> @@ -372,6 +385,11 @@ int __init hv_common_init(void)
>>  		BUG_ON(!hyperv_pcpu_output_arg);
>>  	}
>>
>> +	if (hv_root_partition()) {
>> +		hv_synic_eventring_tail = alloc_percpu(u8 *);
>> +		BUG_ON(hv_synic_eventring_tail == NULL);
>> +	}
>> +
>>  	hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
>>  				    GFP_KERNEL);
>>  	if (!hv_vp_index) {
>> @@ -460,6 +478,7 @@ void __init ms_hyperv_late_init(void)
>>  int hv_common_cpu_init(unsigned int cpu)
>>  {
>>  	void **inputarg, **outputarg;
>> +	u8 **synic_eventring_tail;
>>  	u64 msr_vp_index;
>>  	gfp_t flags;
>>  	const int pgcount = hv_output_page_exists() ? 2 : 1;
>> @@ -472,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu)
>>  	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
>>
>>  	/*
>> -	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
>> -	 * allocated if this CPU was previously online and then taken offline
>> +	 * The per-cpu memory is already allocated if this CPU was previously
>> +	 * online and then taken offline
>>  	 */
>>  	if (!*inputarg) {
>>  		mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
>> @@ -485,6 +504,17 @@ int hv_common_cpu_init(unsigned int cpu)
>>  			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
>>  		}
>>
>> +		if (hv_root_partition()) {
>> +			synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
>> +			*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
>> +							sizeof(u8), flags);
>> +
>> +			if (unlikely(!*synic_eventring_tail)) {
>> +				kfree(mem);
>> +				return -ENOMEM;
>> +			}
>> +		}
>> +
> 
> Adding this code under the "if(!*inputarg)" implicitly ties the lifecycle of
> synic_eventring_tail to the lifecycle of hyperv_pcpu_input_arg and
> hyperv_pcpu_output_arg. Is there some logical relationship between the
> two that warrants tying the lifecycles together (other than just both being
> per-cpu)?  hyperv_pcpu_input_arg and hyperv_pcpu_output_arg have an
> unusual lifecycle management in that they aren't freed when a CPU goes
> offline, as described in the comment in hv_common_cpu_die(). Does
> synic_eventring_tail also need that same unusual lifecycle?
> 
I thought about it, and no I don't think it shares the same exact lifecycle.
It's only used by the mshv_root driver - it just needs to remain present
whenever there's a chance the module could be re-inserted and expect it to
be there.

> Assuming there's no logical relationship, I'm thinking synic_eventring_tail
> should be managed independent of the other two. If it does need the
> unusual lifecycle, make sure to add a comment in hv_common_cpu_die()
> explaining why. If it doesn't need the unusual lifecycle, maybe just do
> the normal thing of allocating it in hv_common_cpu_init() and freeing
> it in hv_common_cpu_die().
> 
Yep, I suppose it should just be freed normally then, assuming
hv_common_cpu_die() is only called when the hypervisor is going to reset
(or remove) the synic pages for this partition. Is that the case here?

Otherwise we'd want to retain it, in case mshv_root ever needs it again for
that CPU in the lifetime of this partition.

Nuno

> The code as written in your patch isn't wrong and would work OK. But
> the structure implies a relationship with hyperv_pcpu_*_arg that I
> suspect doesn't exist.
> 
> Michael
> 
>>  		if (!ms_hyperv.paravisor_present &&
>>  		    (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
>>  			ret = set_memory_decrypted((unsigned long)mem, pgcount);
>> --
>> 2.34.1
Michael Kelley March 7, 2025, 11:21 p.m. UTC | #4
From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, March 7, 2025 2:07 PM
> 
> On 3/7/2025 9:02 AM, Michael Kelley wrote:
> > From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February 26, 2025 3:08 PM
> >>
> >> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
> >> SynIC event ring buffer for each SINT.
> >>
> >> This will be used by the mshv driver, but must be tracked independently
> >> since the driver module could be removed and re-inserted.
> >>
> >> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> >> Reviewed-by: Wei Liu <wei.liu@kernel.org>
> >> ---
> >>  drivers/hv/hv_common.c | 34 ++++++++++++++++++++++++++++++++--
> >>  1 file changed, 32 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> >> index 252fd66ad4db..2763cb6d3678 100644
> >> --- a/drivers/hv/hv_common.c
> >> +++ b/drivers/hv/hv_common.c
> >> @@ -68,6 +68,16 @@ static void hv_kmsg_dump_unregister(void);
> >>
> >>  static struct ctl_table_header *hv_ctl_table_hdr;
> >>
> >> +/*
> >> + * Per-cpu array holding the tail pointer for the SynIC event ring buffer
> >> + * for each SINT.
> >> + *
> >> + * We cannot maintain this in mshv driver because the tail pointer should
> >> + * persist even if the mshv driver is unloaded.
> >> + */
> >> +u8 __percpu **hv_synic_eventring_tail;
> >
> > I think the "__percpu" is in the wrong place here. This placement
> > is likely to cause errors from the "sparse" tool.  It should be
> >
> > u8 * __percpu *hv_synic_eventring_tail;
> >
> > See the way hyperv_pcpu_input_arg, for example, is defined.  And
> > see commit db3c65bc3a13 where I fixed hyperv_pcpu_input_arg.
> >
> Thanks. I'll fix it.
> 
> >> +EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
> >
> > The "extern" declaration for this variable is in Patch 10 of the series
> > in drivers/hv/mshv_root.h. I guess that's OK, but I would normally
> > expect to find such a declaration in the header file associated with
> > where the variable is defined, which in this case is mshyperv.h.
> > Perhaps you are trying to restrict its usage to just mshv?
> >
> Yes, that's the idea - it should only be used by the driver.
> 
> >> +
> >>  /*
> >>   * Hyper-V specific initialization and shutdown code that is
> >>   * common across all architectures.  Called from architecture
> >> @@ -90,6 +100,9 @@ void __init hv_common_free(void)
> >>
> >>  	free_percpu(hyperv_pcpu_input_arg);
> >>  	hyperv_pcpu_input_arg = NULL;
> >> +
> >> +	free_percpu(hv_synic_eventring_tail);
> >> +	hv_synic_eventring_tail = NULL;
> >>  }
> >>
> >>  /*
> >> @@ -372,6 +385,11 @@ int __init hv_common_init(void)
> >>  		BUG_ON(!hyperv_pcpu_output_arg);
> >>  	}
> >>
> >> +	if (hv_root_partition()) {
> >> +		hv_synic_eventring_tail = alloc_percpu(u8 *);
> >> +		BUG_ON(hv_synic_eventring_tail == NULL);
> >> +	}
> >> +
> >>  	hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
> >>  				    GFP_KERNEL);
> >>  	if (!hv_vp_index) {
> >> @@ -460,6 +478,7 @@ void __init ms_hyperv_late_init(void)
> >>  int hv_common_cpu_init(unsigned int cpu)
> >>  {
> >>  	void **inputarg, **outputarg;
> >> +	u8 **synic_eventring_tail;
> >>  	u64 msr_vp_index;
> >>  	gfp_t flags;
> >>  	const int pgcount = hv_output_page_exists() ? 2 : 1;
> >> @@ -472,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu)
> >>  	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> >>
> >>  	/*
> >> -	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
> >> -	 * allocated if this CPU was previously online and then taken offline
> >> +	 * The per-cpu memory is already allocated if this CPU was previously
> >> +	 * online and then taken offline
> >>  	 */
> >>  	if (!*inputarg) {
> >>  		mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
> >> @@ -485,6 +504,17 @@ int hv_common_cpu_init(unsigned int cpu)
> >>  			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
> >>  		}
> >>
> >> +		if (hv_root_partition()) {
> >> +			synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
> >> +			*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
> >> +							sizeof(u8), flags);
> >> +
> >> +			if (unlikely(!*synic_eventring_tail)) {
> >> +				kfree(mem);
> >> +				return -ENOMEM;
> >> +			}
> >> +		}
> >> +
> >
> > Adding this code under the "if(!*inputarg)" implicitly ties the lifecycle of
> > synic_eventring_tail to the lifecycle of hyperv_pcpu_input_arg and
> > hyperv_pcpu_output_arg. Is there some logical relationship between the
> > two that warrants tying the lifecycles together (other than just both being
> > per-cpu)?  hyperv_pcpu_input_arg and hyperv_pcpu_output_arg have an
> > unusual lifecycle management in that they aren't freed when a CPU goes
> > offline, as described in the comment in hv_common_cpu_die(). Does
> > synic_eventring_tail also need that same unusual lifecycle?
> >
> I thought about it, and no I don't think it shares the same exact lifecycle.
> It's only used by the mshv_root driver - it just needs to remain present
> whenever there's a chance the module could be re-inserted and expect it to
> be there.
> 
> > Assuming there's no logical relationship, I'm thinking synic_eventring_tail
> > should be managed independent of the other two. If it does need the
> > unusual lifecycle, make sure to add a comment in hv_common_cpu_die()
> > explaining why. If it doesn't need the unusual lifecycle, maybe just do
> > the normal thing of allocating it in hv_common_cpu_init() and freeing
> > it in hv_common_cpu_die().
> >
> Yep, I suppose it should just be freed normally then, assuming
> hv_common_cpu_die() is only called when the hypervisor is going to reset
> (or remove) the synic pages for this partition. Is that the case here?
> 

Yes, it is the case here. A particular vCPU can be taken offline
independent of other vCPUs in the VM (such as by writing "0"
to /sys/devices/system/cpu/cpu<nn>/online). When that happens
the vCPU going offline runs hv_synic_cleanup() first, and then it
runs hv_cpu_die(), which calls hv_common_cpu_die(). So by the
time hv_common_cpu_die() runs, the synic_message_page and
synic_event_page will have been unmapped and the pointers set
to NULL.

On arm64, there is no hv_cpu_init()/die(), and the "common"
versions are called directly. Perhaps at some point in the future there
will be arm64 specific things to be done, and hv_cpu_init()/die()
will need to be added. But the ordering is the same and
hv_synic_cleanup() runs first.

So, yes, since synic_eventring_tail is tied to the synic, it sounds like
the normal lifecycle could be used, like with the VP assist page that
is handled in hv_cpu_init()/die() on x86.

> Otherwise we'd want to retain it, in case mshv_root ever needs it again for
> that CPU in the lifetime of this partition.
> 
> Nuno
> 
> > The code as written in your patch isn't wrong and would work OK. But
> > the structure implies a relationship with hyperv_pcpu_*_arg that I
> > suspect doesn't exist.
> >
> > Michael
> >
> >>  		if (!ms_hyperv.paravisor_present &&
> >>  		    (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
> >>  			ret = set_memory_decrypted((unsigned long)mem, pgcount);
> >> --
> >> 2.34.1
Nuno Das Neves March 7, 2025, 11:31 p.m. UTC | #5
On 3/7/2025 3:21 PM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, March 7, 2025 2:07 PM
>>
>> On 3/7/2025 9:02 AM, Michael Kelley wrote:
>>> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February 26, 2025 3:08 PM
>>>>
>>>> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
>>>> SynIC event ring buffer for each SINT.
>>>>
>>>> This will be used by the mshv driver, but must be tracked independently
>>>> since the driver module could be removed and re-inserted.
>>>>
>>>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>>>> Reviewed-by: Wei Liu <wei.liu@kernel.org>
>>>> ---
>>>>  drivers/hv/hv_common.c | 34 ++++++++++++++++++++++++++++++++--
>>>>  1 file changed, 32 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>>>> index 252fd66ad4db..2763cb6d3678 100644
>>>> --- a/drivers/hv/hv_common.c
>>>> +++ b/drivers/hv/hv_common.c
>>>> @@ -68,6 +68,16 @@ static void hv_kmsg_dump_unregister(void);
>>>>
>>>>  static struct ctl_table_header *hv_ctl_table_hdr;
>>>>
>>>> +/*
>>>> + * Per-cpu array holding the tail pointer for the SynIC event ring buffer
>>>> + * for each SINT.
>>>> + *
>>>> + * We cannot maintain this in mshv driver because the tail pointer should
>>>> + * persist even if the mshv driver is unloaded.
>>>> + */
>>>> +u8 __percpu **hv_synic_eventring_tail;
>>>
>>> I think the "__percpu" is in the wrong place here. This placement
>>> is likely to cause errors from the "sparse" tool.  It should be
>>>
>>> u8 * __percpu *hv_synic_eventring_tail;
>>>
>>> See the way hyperv_pcpu_input_arg, for example, is defined.  And
>>> see commit db3c65bc3a13 where I fixed hyperv_pcpu_input_arg.
>>>
>> Thanks. I'll fix it.
>>
>>>> +EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
>>>
>>> The "extern" declaration for this variable is in Patch 10 of the series
>>> in drivers/hv/mshv_root.h. I guess that's OK, but I would normally
>>> expect to find such a declaration in the header file associated with
>>> where the variable is defined, which in this case is mshyperv.h.
>>> Perhaps you are trying to restrict its usage to just mshv?
>>>
>> Yes, that's the idea - it should only be used by the driver.
>>
>>>> +
>>>>  /*
>>>>   * Hyper-V specific initialization and shutdown code that is
>>>>   * common across all architectures.  Called from architecture
>>>> @@ -90,6 +100,9 @@ void __init hv_common_free(void)
>>>>
>>>>  	free_percpu(hyperv_pcpu_input_arg);
>>>>  	hyperv_pcpu_input_arg = NULL;
>>>> +
>>>> +	free_percpu(hv_synic_eventring_tail);
>>>> +	hv_synic_eventring_tail = NULL;
>>>>  }
>>>>
>>>>  /*
>>>> @@ -372,6 +385,11 @@ int __init hv_common_init(void)
>>>>  		BUG_ON(!hyperv_pcpu_output_arg);
>>>>  	}
>>>>
>>>> +	if (hv_root_partition()) {
>>>> +		hv_synic_eventring_tail = alloc_percpu(u8 *);
>>>> +		BUG_ON(hv_synic_eventring_tail == NULL);
>>>> +	}
>>>> +
>>>>  	hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
>>>>  				    GFP_KERNEL);
>>>>  	if (!hv_vp_index) {
>>>> @@ -460,6 +478,7 @@ void __init ms_hyperv_late_init(void)
>>>>  int hv_common_cpu_init(unsigned int cpu)
>>>>  {
>>>>  	void **inputarg, **outputarg;
>>>> +	u8 **synic_eventring_tail;
>>>>  	u64 msr_vp_index;
>>>>  	gfp_t flags;
>>>>  	const int pgcount = hv_output_page_exists() ? 2 : 1;
>>>> @@ -472,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu)
>>>>  	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
>>>>
>>>>  	/*
>>>> -	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
>>>> -	 * allocated if this CPU was previously online and then taken offline
>>>> +	 * The per-cpu memory is already allocated if this CPU was previously
>>>> +	 * online and then taken offline
>>>>  	 */
>>>>  	if (!*inputarg) {
>>>>  		mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
>>>> @@ -485,6 +504,17 @@ int hv_common_cpu_init(unsigned int cpu)
>>>>  			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
>>>>  		}
>>>>
>>>> +		if (hv_root_partition()) {
>>>> +			synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
>>>> +			*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
>>>> +							sizeof(u8), flags);
>>>> +
>>>> +			if (unlikely(!*synic_eventring_tail)) {
>>>> +				kfree(mem);
>>>> +				return -ENOMEM;
>>>> +			}
>>>> +		}
>>>> +
>>>
>>> Adding this code under the "if(!*inputarg)" implicitly ties the lifecycle of
>>> synic_eventring_tail to the lifecycle of hyperv_pcpu_input_arg and
>>> hyperv_pcpu_output_arg. Is there some logical relationship between the
>>> two that warrants tying the lifecycles together (other than just both being
>>> per-cpu)?  hyperv_pcpu_input_arg and hyperv_pcpu_output_arg have an
>>> unusual lifecycle management in that they aren't freed when a CPU goes
>>> offline, as described in the comment in hv_common_cpu_die(). Does
>>> synic_eventring_tail also need that same unusual lifecycle?
>>>
>> I thought about it, and no I don't think it shares the same exact lifecycle.
>> It's only used by the mshv_root driver - it just needs to remain present
>> whenever there's a chance the module could be re-inserted and expect it to
>> be there.
>>
>>> Assuming there's no logical relationship, I'm thinking synic_eventring_tail
>>> should be managed independent of the other two. If it does need the
>>> unusual lifecycle, make sure to add a comment in hv_common_cpu_die()
>>> explaining why. If it doesn't need the unusual lifecycle, maybe just do
>>> the normal thing of allocating it in hv_common_cpu_init() and freeing
>>> it in hv_common_cpu_die().
>>>
>> Yep, I suppose it should just be freed normally then, assuming
>> hv_common_cpu_die() is only called when the hypervisor is going to reset
>> (or remove) the synic pages for this partition. Is that the case here?
>>
> 
> Yes, it is the case here. A particular vCPU can be taken offline
> independent of other vCPUs in the VM (such as by writing "0"
> to /sys/devices/system/cpu/cpu<nn>/online). When that happens
> the vCPU going offline runs hv_synic_cleanup() first, and then it
> runs hv_cpu_die(), which calls hv_common_cpu_die(). So by the
> time hv_common_cpu_die() runs, the synic_message_page and
> synic_event_page will have been unmapped and the pointers set
> to NULL.
> 
> On arm64, there is no hv_cpu_init()/die(), and the "common"
> versions are called directly. Perhaps at some point in the future there
> will be arm64 specific things to be done, and hv_cpu_init()/die()
> will need to be added. But the ordering is the same and
> hv_synic_cleanup() runs first.
> 
> So, yes, since synic_eventring_tail is tied to the synic, it sounds like
> the normal lifecycle could be used, like with the VP assist page that
> is handled in hv_cpu_init()/die() on x86.
> 
Great, thanks for the clarification! I'll fix it for v6.

Nuno

>> Otherwise we'd want to retain it, in case mshv_root ever needs it again for
>> that CPU in the lifetime of this partition.
>>
>> Nuno
>>
>>> The code as written in your patch isn't wrong and would work OK. But
>>> the structure implies a relationship with hyperv_pcpu_*_arg that I
>>> suspect doesn't exist.
>>>
>>> Michael
>>>
>>>>  		if (!ms_hyperv.paravisor_present &&
>>>>  		    (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
>>>>  			ret = set_memory_decrypted((unsigned long)mem, pgcount);
>>>> --
>>>> 2.34.1
>
Michael Kelley March 7, 2025, 11:37 p.m. UTC | #6
From: Michael Kelley <mhklinux@outlook.com> Sent: Friday, March 7, 2025 3:21 PM
> 
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, March 7, 2025
> 2:07 PM
> >

[snip]

> > >> @@ -485,6 +504,17 @@ int hv_common_cpu_init(unsigned int cpu)
> > >>  			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
> > >>  		}
> > >>
> > >> +		if (hv_root_partition()) {
> > >> +			synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
> > >> +			*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
> > >> +							sizeof(u8), flags);
> > >> +
> > >> +			if (unlikely(!*synic_eventring_tail)) {
> > >> +				kfree(mem);
> > >> +				return -ENOMEM;
> > >> +			}
> > >> +		}
> > >> +
> > >
> > > Adding this code under the "if(!*inputarg)" implicitly ties the lifecycle of
> > > synic_eventring_tail to the lifecycle of hyperv_pcpu_input_arg and
> > > hyperv_pcpu_output_arg. Is there some logical relationship between the
> > > two that warrants tying the lifecycles together (other than just both being
> > > per-cpu)?  hyperv_pcpu_input_arg and hyperv_pcpu_output_arg have an
> > > unusual lifecycle management in that they aren't freed when a CPU goes
> > > offline, as described in the comment in hv_common_cpu_die(). Does
> > > synic_eventring_tail also need that same unusual lifecycle?
> > >
> > I thought about it, and no I don't think it shares the same exact lifecycle.
> > It's only used by the mshv_root driver - it just needs to remain present
> > whenever there's a chance the module could be re-inserted and expect it to
> > be there.
> >
> > > Assuming there's no logical relationship, I'm thinking synic_eventring_tail
> > > should be managed independent of the other two. If it does need the
> > > unusual lifecycle, make sure to add a comment in hv_common_cpu_die()
> > > explaining why. If it doesn't need the unusual lifecycle, maybe just do
> > > the normal thing of allocating it in hv_common_cpu_init() and freeing
> > > it in hv_common_cpu_die().
> > >
> > Yep, I suppose it should just be freed normally then, assuming
> > hv_common_cpu_die() is only called when the hypervisor is going to reset
> > (or remove) the synic pages for this partition. Is that the case here?
> >
> 
> Yes, it is the case here. A particular vCPU can be taken offline
> independent of other vCPUs in the VM (such as by writing "0"
> to /sys/devices/system/cpu/cpu<nn>/online). When that happens
> the vCPU going offline runs hv_synic_cleanup() first, and then it
> runs hv_cpu_die(), which calls hv_common_cpu_die(). So by the
> time hv_common_cpu_die() runs, the synic_message_page and
> synic_event_page will have been unmapped and the pointers set
> to NULL.
> 
> On arm64, there is no hv_cpu_init()/die(), and the "common"
> versions are called directly. Perhaps at some point in the future there
> will be arm64 specific things to be done, and hv_cpu_init()/die()
> will need to be added. But the ordering is the same and
> hv_synic_cleanup() runs first.
> 
> So, yes, since synic_eventring_tail is tied to the synic, it sounds like
> the normal lifecycle could be used, like with the VP assist page that
> is handled in hv_cpu_init()/die() on x86.
> 

One more thought:

Perhaps there's more affinity with synic code than with generic
per-cpu memory, and it would be even better to allocate and
free the synic_eventring_tail memory for each vCPU in
hv_synic_init()/cleanup(), or hv_synic_enable/disable_regs().
There's potentially some interaction with hibernate suspend/resume,
which I assume isn't a valid scenario for the root partition. But I
haven't thought through all the details.

Michael
Tianyu Lan March 10, 2025, 1:01 p.m. UTC | #7
On Thu, Feb 27, 2025 at 7:09 AM Nuno Das Neves
<nunodasneves@linux.microsoft.com> wrote:
>
> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
> SynIC event ring buffer for each SINT.
>
> This will be used by the mshv driver, but must be tracked independently
> since the driver module could be removed and re-inserted.
>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> Reviewed-by: Wei Liu <wei.liu@kernel.org>

It's better to expose a function to check the tail instead of exposing
hv_synic_eventring_tail directly.

BTW, how does mshv driver use hv_synic_eventring_tail? Which patch
uses it in this series?

Thanks.


> ---
>  drivers/hv/hv_common.c | 34 ++++++++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 252fd66ad4db..2763cb6d3678 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -68,6 +68,16 @@ static void hv_kmsg_dump_unregister(void);
>
>  static struct ctl_table_header *hv_ctl_table_hdr;
>
> +/*
> + * Per-cpu array holding the tail pointer for the SynIC event ring buffer
> + * for each SINT.
> + *
> + * We cannot maintain this in mshv driver because the tail pointer should
> + * persist even if the mshv driver is unloaded.
> + */
> +u8 __percpu **hv_synic_eventring_tail;
> +EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
> +
>  /*
>   * Hyper-V specific initialization and shutdown code that is
>   * common across all architectures.  Called from architecture
> @@ -90,6 +100,9 @@ void __init hv_common_free(void)
>
>         free_percpu(hyperv_pcpu_input_arg);
>         hyperv_pcpu_input_arg = NULL;
> +
> +       free_percpu(hv_synic_eventring_tail);
> +       hv_synic_eventring_tail = NULL;
>  }
>
>  /*
> @@ -372,6 +385,11 @@ int __init hv_common_init(void)
>                 BUG_ON(!hyperv_pcpu_output_arg);
>         }
>
> +       if (hv_root_partition()) {
> +               hv_synic_eventring_tail = alloc_percpu(u8 *);
> +               BUG_ON(hv_synic_eventring_tail == NULL);
> +       }
> +
>         hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
>                                     GFP_KERNEL);
>         if (!hv_vp_index) {
> @@ -460,6 +478,7 @@ void __init ms_hyperv_late_init(void)
>  int hv_common_cpu_init(unsigned int cpu)
>  {
>         void **inputarg, **outputarg;
> +       u8 **synic_eventring_tail;
>         u64 msr_vp_index;
>         gfp_t flags;
>         const int pgcount = hv_output_page_exists() ? 2 : 1;
> @@ -472,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu)
>         inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
>
>         /*
> -        * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
> -        * allocated if this CPU was previously online and then taken offline
> +        * The per-cpu memory is already allocated if this CPU was previously
> +        * online and then taken offline
>          */
>         if (!*inputarg) {
>                 mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
> @@ -485,6 +504,17 @@ int hv_common_cpu_init(unsigned int cpu)
>                         *outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
>                 }
>
> +               if (hv_root_partition()) {
> +                       synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
> +                       *synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
> +                                                       sizeof(u8), flags);
> +
> +                       if (unlikely(!*synic_eventring_tail)) {
> +                               kfree(mem);
> +                               return -ENOMEM;
> +                       }
> +               }
> +
>                 if (!ms_hyperv.paravisor_present &&
>                     (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
>                         ret = set_memory_decrypted((unsigned long)mem, pgcount);
> --
> 2.34.1
>
>
Tianyu Lan March 13, 2025, 7:34 a.m. UTC | #8
On Thu, Mar 13, 2025 at 3:45 AM Nuno Das Neves
<nunodasneves@linux.microsoft.com> wrote:
>
> On 3/10/2025 6:01 AM, Tianyu Lan wrote:
> > On Thu, Feb 27, 2025 at 7:09 AM Nuno Das Neves
> > <nunodasneves@linux.microsoft.com> wrote:
> >>
> >> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
> >> SynIC event ring buffer for each SINT.
> >>
> >> This will be used by the mshv driver, but must be tracked independently
> >> since the driver module could be removed and re-inserted.
> >>
> >> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> >> Reviewed-by: Wei Liu <wei.liu@kernel.org>
> >
> > It's better to expose a function to check the tail instead of exposing
> > hv_synic_eventring_tail directly.
> >
> What is the advantage of using a function for this? We need to both set
> and get the tail.

We may add lock or check to avoid race conditions and this depends on the
user case. This is why I want to see how mshv driver uses it.

>
> > BTW, how does mshv driver use hv_synic_eventring_tail? Which patch
> > uses it in this series?
> >
> This variable stores indices into the synic eventring page (one for each
> SINT, and per-cpu). Each SINT has a ringbuffer of u32 messages. The tail
> index points to the latest one.
>
> This is only used for doorbell messages today. The message in this case is
> a port number which is used to lookup and invoke a callback, which signals
> ioeventfd(s), to notify the VMM of a guest MMIO write.
>
> It is used in patch 10.

I found "extern u8 __percpu **hv_synic_eventring_tail;" in the
drivers/hv/mshv_root.h of patch 10.
I seem to miss the code to use it.

+int hv_call_unmap_stat_page(enum hv_stats_object_type type,
+                           const union hv_stats_object_identity *identity);
+int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
+                                  u64 page_struct_count, u32 host_access,
+                                  u32 flags, u8 acquire);
+
+extern struct mshv_root mshv_root;
+extern enum hv_scheduler_type hv_scheduler_type;
+extern u8 __percpu **hv_synic_eventring_tail;
+
+#endif /* _MSHV_ROOT_H_ */
Nuno Das Neves March 13, 2025, 3:56 p.m. UTC | #9
On 3/13/2025 12:34 AM, Tianyu Lan wrote:
> On Thu, Mar 13, 2025 at 3:45 AM Nuno Das Neves
> <nunodasneves@linux.microsoft.com> wrote:
>>
>> On 3/10/2025 6:01 AM, Tianyu Lan wrote:
>>> On Thu, Feb 27, 2025 at 7:09 AM Nuno Das Neves
>>> <nunodasneves@linux.microsoft.com> wrote:
>>>>
>>>> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
>>>> SynIC event ring buffer for each SINT.
>>>>
>>>> This will be used by the mshv driver, but must be tracked independently
>>>> since the driver module could be removed and re-inserted.
>>>>
>>>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>>>> Reviewed-by: Wei Liu <wei.liu@kernel.org>
>>>
>>> It's better to expose a function to check the tail instead of exposing
>>> hv_synic_eventring_tail directly.
>>>
>> What is the advantage of using a function for this? We need to both set
>> and get the tail.
> 
> We may add lock or check to avoid race conditions and this depends on the
> user case. This is why I want to see how mshv driver uses it.
> 
>>
>>> BTW, how does mshv driver use hv_synic_eventring_tail? Which patch
>>> uses it in this series?
>>>
>> This variable stores indices into the synic eventring page (one for each
>> SINT, and per-cpu). Each SINT has a ringbuffer of u32 messages. The tail
>> index points to the latest one.
>>
>> This is only used for doorbell messages today. The message in this case is
>> a port number which is used to lookup and invoke a callback, which signals
>> ioeventfd(s), to notify the VMM of a guest MMIO write.
>>
>> It is used in patch 10.
> 
> I found "extern u8 __percpu **hv_synic_eventring_tail;" in the
> drivers/hv/mshv_root.h of patch 10.
> I seem to miss the code to use it.
> 
> +int hv_call_unmap_stat_page(enum hv_stats_object_type type,
> +                           const union hv_stats_object_identity *identity);
> +int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
> +                                  u64 page_struct_count, u32 host_access,
> +                                  u32 flags, u8 acquire);
> +
> +extern struct mshv_root mshv_root;
> +extern enum hv_scheduler_type hv_scheduler_type;
> +extern u8 __percpu **hv_synic_eventring_tail;
> +
> +#endif /* _MSHV_ROOT_H_ */
> 

It is used in mshv_synic.c in synic_event_ring_get_queued_port():

diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c
new file mode 100644
index 000000000000..e7782f92e339
--- /dev/null
+++ b/drivers/hv/mshv_synic.c
@@ -0,0 +1,665 @@
<snip>
+static u32 synic_event_ring_get_queued_port(u32 sint_index)
+{
+	struct hv_synic_event_ring_page **event_ring_page;
+	volatile struct hv_synic_event_ring *ring;
+	struct hv_synic_pages *spages;
+	u8 **synic_eventring_tail;
+	u32 message;
+	u8 tail;
+
+	spages = this_cpu_ptr(mshv_root.synic_pages);
+	event_ring_page = &spages->synic_event_ring_page;
+	synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
+	tail = (*synic_eventring_tail)[sint_index];
Tianyu Lan March 13, 2025, 4 p.m. UTC | #10
On Thu, Mar 13, 2025 at 11:56 PM Nuno Das Neves
<nunodasneves@linux.microsoft.com> wrote:
>
> On 3/13/2025 12:34 AM, Tianyu Lan wrote:
> > On Thu, Mar 13, 2025 at 3:45 AM Nuno Das Neves
> > <nunodasneves@linux.microsoft.com> wrote:
> >>
> >> On 3/10/2025 6:01 AM, Tianyu Lan wrote:
> >>> On Thu, Feb 27, 2025 at 7:09 AM Nuno Das Neves
> >>> <nunodasneves@linux.microsoft.com> wrote:
> >>>>
> >>>> Add a pointer hv_synic_eventring_tail to track the tail pointer for the
> >>>> SynIC event ring buffer for each SINT.
> >>>>
> >>>> This will be used by the mshv driver, but must be tracked independently
> >>>> since the driver module could be removed and re-inserted.
> >>>>
> >>>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> >>>> Reviewed-by: Wei Liu <wei.liu@kernel.org>
> >>>
> >>> It's better to expose a function to check the tail instead of exposing
> >>> hv_synic_eventring_tail directly.
> >>>
> >> What is the advantage of using a function for this? We need to both set
> >> and get the tail.
> >
> > We may add lock or check to avoid race conditions and this depends on the
> > user case. This is why I want to see how mshv driver uses it.
> >
> >>
> >>> BTW, how does mshv driver use hv_synic_eventring_tail? Which patch
> >>> uses it in this series?
> >>>
> >> This variable stores indices into the synic eventring page (one for each
> >> SINT, and per-cpu). Each SINT has a ringbuffer of u32 messages. The tail
> >> index points to the latest one.
> >>
> >> This is only used for doorbell messages today. The message in this case is
> >> a port number which is used to lookup and invoke a callback, which signals
> >> ioeventfd(s), to notify the VMM of a guest MMIO write.
> >>
> >> It is used in patch 10.
> >
> > I found "extern u8 __percpu **hv_synic_eventring_tail;" in the
> > drivers/hv/mshv_root.h of patch 10.
> > I seem to miss the code to use it.
> >
> > +int hv_call_unmap_stat_page(enum hv_stats_object_type type,
> > +                           const union hv_stats_object_identity *identity);
> > +int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
> > +                                  u64 page_struct_count, u32 host_access,
> > +                                  u32 flags, u8 acquire);
> > +
> > +extern struct mshv_root mshv_root;
> > +extern enum hv_scheduler_type hv_scheduler_type;
> > +extern u8 __percpu **hv_synic_eventring_tail;
> > +
> > +#endif /* _MSHV_ROOT_H_ */
> >
>
> It is used in mshv_synic.c in synic_event_ring_get_queued_port():
>
> diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c
> new file mode 100644
> index 000000000000..e7782f92e339
> --- /dev/null
> +++ b/drivers/hv/mshv_synic.c
> @@ -0,0 +1,665 @@
> <snip>
> +static u32 synic_event_ring_get_queued_port(u32 sint_index)
> +{
> +       struct hv_synic_event_ring_page **event_ring_page;
> +       volatile struct hv_synic_event_ring *ring;
> +       struct hv_synic_pages *spages;
> +       u8 **synic_eventring_tail;
> +       u32 message;
> +       u8 tail;
> +
> +       spages = this_cpu_ptr(mshv_root.synic_pages);
> +       event_ring_page = &spages->synic_event_ring_page;
> +       synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
> +       tail = (*synic_eventring_tail)[sint_index];

OK. I got it. Thanks.
diff mbox series

Patch

diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 252fd66ad4db..2763cb6d3678 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -68,6 +68,16 @@  static void hv_kmsg_dump_unregister(void);
 
 static struct ctl_table_header *hv_ctl_table_hdr;
 
+/*
+ * Per-cpu array holding the tail pointer for the SynIC event ring buffer
+ * for each SINT.
+ *
+ * We cannot maintain this in mshv driver because the tail pointer should
+ * persist even if the mshv driver is unloaded.
+ */
+u8 __percpu **hv_synic_eventring_tail;
+EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
+
 /*
  * Hyper-V specific initialization and shutdown code that is
  * common across all architectures.  Called from architecture
@@ -90,6 +100,9 @@  void __init hv_common_free(void)
 
 	free_percpu(hyperv_pcpu_input_arg);
 	hyperv_pcpu_input_arg = NULL;
+
+	free_percpu(hv_synic_eventring_tail);
+	hv_synic_eventring_tail = NULL;
 }
 
 /*
@@ -372,6 +385,11 @@  int __init hv_common_init(void)
 		BUG_ON(!hyperv_pcpu_output_arg);
 	}
 
+	if (hv_root_partition()) {
+		hv_synic_eventring_tail = alloc_percpu(u8 *);
+		BUG_ON(hv_synic_eventring_tail == NULL);
+	}
+
 	hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
 				    GFP_KERNEL);
 	if (!hv_vp_index) {
@@ -460,6 +478,7 @@  void __init ms_hyperv_late_init(void)
 int hv_common_cpu_init(unsigned int cpu)
 {
 	void **inputarg, **outputarg;
+	u8 **synic_eventring_tail;
 	u64 msr_vp_index;
 	gfp_t flags;
 	const int pgcount = hv_output_page_exists() ? 2 : 1;
@@ -472,8 +491,8 @@  int hv_common_cpu_init(unsigned int cpu)
 	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
 
 	/*
-	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
-	 * allocated if this CPU was previously online and then taken offline
+	 * The per-cpu memory is already allocated if this CPU was previously
+	 * online and then taken offline
 	 */
 	if (!*inputarg) {
 		mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
@@ -485,6 +504,17 @@  int hv_common_cpu_init(unsigned int cpu)
 			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
 		}
 
+		if (hv_root_partition()) {
+			synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
+			*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
+							sizeof(u8), flags);
+
+			if (unlikely(!*synic_eventring_tail)) {
+				kfree(mem);
+				return -ENOMEM;
+			}
+		}
+
 		if (!ms_hyperv.paravisor_present &&
 		    (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
 			ret = set_memory_decrypted((unsigned long)mem, pgcount);