diff mbox series

[3/8] scsi: ufshpb: Add region's reads counter

Message ID 20210127151217.24760-4-avri.altman@wdc.com
State New
Headers show
Series Add Host control mode to HPB | expand

Commit Message

Avri Altman Jan. 27, 2021, 3:12 p.m. UTC
In host control mode, reads are the major source of activation trials.
Keep track of those reads counters, for both active as well inactive
regions.

We reset the read counter upon write - we are only interested in "clean"
reads.  less intuitive however, is that we also reset it upon region's
deactivation.  Region deactivation is often due to the fact that
eviction took place: a region become active on the expense of another.
This is happening when the max-active-regions limit has crossed. If we
don’t reset the counter, we will trigger a lot of trashing of the HPB
database, since few reads (or even one) to the region that was
deactivated, will trigger a re-activation trial.

Keep those counters normalized, as we are using those reads as a
comparative score, to make various decisions.
If during consecutive normalizations an active region has exhaust its
reads - inactivate it.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
---
 drivers/scsi/ufs/ufshpb.c | 96 +++++++++++++++++++++++++++++++++------
 drivers/scsi/ufs/ufshpb.h |  5 ++
 2 files changed, 86 insertions(+), 15 deletions(-)

Comments

gregkh@linuxfoundation.org Jan. 27, 2021, 3:21 p.m. UTC | #1
On Wed, Jan 27, 2021 at 05:12:12PM +0200, Avri Altman wrote:
> In host control mode, reads are the major source of activation trials.
> Keep track of those reads counters, for both active as well inactive
> regions.
> 
> We reset the read counter upon write - we are only interested in "clean"
> reads.  less intuitive however, is that we also reset it upon region's
> deactivation.  Region deactivation is often due to the fact that
> eviction took place: a region become active on the expense of another.
> This is happening when the max-active-regions limit has crossed. If we
> don’t reset the counter, we will trigger a lot of trashing of the HPB
> database, since few reads (or even one) to the region that was
> deactivated, will trigger a re-activation trial.
> 
> Keep those counters normalized, as we are using those reads as a
> comparative score, to make various decisions.
> If during consecutive normalizations an active region has exhaust its
> reads - inactivate it.
> 
> Signed-off-by: Avri Altman <avri.altman@wdc.com>
> ---
>  drivers/scsi/ufs/ufshpb.c | 96 +++++++++++++++++++++++++++++++++------
>  drivers/scsi/ufs/ufshpb.h |  5 ++
>  2 files changed, 86 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
> index 5fa1f5bc08e6..51c3607166bc 100644
> --- a/drivers/scsi/ufs/ufshpb.c
> +++ b/drivers/scsi/ufs/ufshpb.c
> @@ -16,6 +16,9 @@
>  #include "ufshpb.h"
>  #include "../sd.h"
>  
> +#define WORK_PENDING 0
> +#define ACTIVATION_THRSHLD 4 /* 4 IOs */
> +
>  /* memory management */
>  static struct kmem_cache *ufshpb_mctx_cache;
>  static mempool_t *ufshpb_mctx_pool;
> @@ -261,6 +264,21 @@ ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb *lrbp,
>  	cdb[14] = transfer_len;
>  }
>  
> +static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int rgn_idx,
> +				      int srgn_idx)
> +{
> +	struct ufshpb_region *rgn;
> +	struct ufshpb_subregion *srgn;
> +
> +	rgn = hpb->rgn_tbl + rgn_idx;
> +	srgn = rgn->srgn_tbl + srgn_idx;
> +
> +	list_del_init(&rgn->list_inact_rgn);
> +
> +	if (list_empty(&srgn->list_act_srgn))
> +		list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn);
> +}
> +
>  /*
>   * This function will set up HPB read command using host-side L2P map data.
>   * In HPB v1.0, maximum size of HPB read command is 4KB.
> @@ -276,6 +294,7 @@ void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
>  	unsigned long flags;
>  	int transfer_len, rgn_idx, srgn_idx, srgn_offset;
>  	int err = 0;
> +	u64 reads;
>  
>  	hpb = ufshpb_get_hpb_data(cmd->device);
>  	if (!hpb)
> @@ -306,12 +325,39 @@ void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
>  		ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
>  				 transfer_len);
>  		spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
> +
> +		if (ufshpb_mode == HPB_HOST_CONTROL)
> +			atomic64_set(&rgn->reads, 0);
> +
>  		return;
>  	}
>  
> +	if (ufshpb_mode == HPB_HOST_CONTROL)
> +		reads = atomic64_inc_return(&rgn->reads);
> +
>  	if (!ufshpb_is_support_chunk(transfer_len))
>  		return;
>  
> +	if (ufshpb_mode == HPB_HOST_CONTROL) {
> +		/*
> +		 * in host control mode, reads are the main source for
> +		 * activation trials.
> +		 */
> +		if (reads == ACTIVATION_THRSHLD) {
> +			spin_lock_irqsave(&hpb->rsp_list_lock, flags);
> +			ufshpb_update_active_info(hpb, rgn_idx, srgn_idx);
> +			hpb->stats.rb_active_cnt++;
> +			spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
> +			dev_dbg(&hpb->sdev_ufs_lu->sdev_dev,
> +				"activate region %d-%d\n", rgn_idx, srgn_idx);
> +		}
> +
> +		/* keep those counters normalized */
> +		if (reads > hpb->entries_per_srgn &&
> +		    !test_and_set_bit(WORK_PENDING, &hpb->work_data_bits))
> +			schedule_work(&hpb->ufshpb_normalization_work);
> +	}
> +
>  	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
>  	if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
>  				   transfer_len)) {
> @@ -396,21 +442,6 @@ static int ufshpb_clear_dirty_bitmap(struct ufshpb_lu *hpb,
>  	return 0;
>  }
>  
> -static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int rgn_idx,
> -				      int srgn_idx)
> -{
> -	struct ufshpb_region *rgn;
> -	struct ufshpb_subregion *srgn;
> -
> -	rgn = hpb->rgn_tbl + rgn_idx;
> -	srgn = rgn->srgn_tbl + srgn_idx;
> -
> -	list_del_init(&rgn->list_inact_rgn);
> -
> -	if (list_empty(&srgn->list_act_srgn))
> -		list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn);
> -}
> -
>  static void ufshpb_update_inactive_info(struct ufshpb_lu *hpb, int rgn_idx)
>  {
>  	struct ufshpb_region *rgn;
> @@ -646,6 +677,9 @@ static void __ufshpb_evict_region(struct ufshpb_lu *hpb,
>  
>  	ufshpb_cleanup_lru_info(lru_info, rgn);
>  
> +	if (ufshpb_mode == HPB_HOST_CONTROL)
> +		atomic64_set(&rgn->reads, 0);
> +
>  	for_each_sub_region(rgn, srgn_idx, srgn)
>  		ufshpb_purge_active_subregion(hpb, srgn);
>  }
> @@ -1044,6 +1078,33 @@ static void ufshpb_run_inactive_region_list(struct ufshpb_lu *hpb)
>  	spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
>  }
>  
> +static void ufshpb_normalization_work_handler(struct work_struct *work)
> +{
> +	struct ufshpb_lu *hpb;
> +	int rgn_idx;
> +
> +	hpb = container_of(work, struct ufshpb_lu, ufshpb_normalization_work);
> +
> +	for (rgn_idx = 0; rgn_idx < hpb->rgns_per_lu; rgn_idx++) {
> +		struct ufshpb_region *rgn = hpb->rgn_tbl + rgn_idx;
> +		u64 reads = atomic64_read(&rgn->reads);
> +
> +		if (reads)
> +			atomic64_set(&rgn->reads, reads >> 1);
> +
> +		if (rgn->rgn_state != HPB_RGN_ACTIVE ||
> +			atomic64_read(&rgn->reads))
> +			continue;
> +
> +		/* if region is active but has no reads - inactivate it */
> +		spin_lock(&hpb->rsp_list_lock);
> +		ufshpb_update_inactive_info(hpb, rgn->rgn_idx);
> +		spin_unlock(&hpb->rsp_list_lock);
> +	}
> +
> +	clear_bit(WORK_PENDING, &hpb->work_data_bits);
> +}
> +
>  static void ufshpb_map_work_handler(struct work_struct *work)
>  {
>  	struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu, map_work);
> @@ -1308,6 +1369,9 @@ static int ufshpb_lu_hpb_init(struct ufs_hba *hba, struct ufshpb_lu *hpb)
>  	INIT_LIST_HEAD(&hpb->list_hpb_lu);
>  
>  	INIT_WORK(&hpb->map_work, ufshpb_map_work_handler);
> +	if (ufshpb_mode == HPB_HOST_CONTROL)
> +		INIT_WORK(&hpb->ufshpb_normalization_work,
> +			  ufshpb_normalization_work_handler);
>  
>  	hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",
>  			  sizeof(struct ufshpb_req), 0, 0, NULL);
> @@ -1394,6 +1458,8 @@ static void ufshpb_discard_rsp_lists(struct ufshpb_lu *hpb)
>  
>  static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)
>  {
> +	if (ufshpb_mode == HPB_HOST_CONTROL)
> +		cancel_work_sync(&hpb->ufshpb_normalization_work);
>  	cancel_work_sync(&hpb->map_work);
>  }
>  
> diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h
> index 8a34b0f42754..b0e78728af38 100644
> --- a/drivers/scsi/ufs/ufshpb.h
> +++ b/drivers/scsi/ufs/ufshpb.h
> @@ -115,6 +115,9 @@ struct ufshpb_region {
>  	/* below information is used by lru */
>  	struct list_head list_lru_rgn;
>  	unsigned long rgn_flags;
> +
> +	/* region reads - for host mode */
> +	atomic64_t reads;

Why do you need an atomic variable for this?  What are you trying to
"protect" here by flushing the cpus all the time?  What protects this
variable from changing right after you have read from it (hint, you do
that above...)

atomics are not race-free, use a real lock if you need that.

thanks,

greg k-h
Avri Altman Jan. 31, 2021, 7:25 a.m. UTC | #2
> >

> > +     if (ufshpb_mode == HPB_HOST_CONTROL)

> > +             reads = atomic64_inc_return(&rgn->reads);

> > +

> >       if (!ufshpb_is_support_chunk(transfer_len))

> >               return;

> >

> > +     if (ufshpb_mode == HPB_HOST_CONTROL) {

> > +             /*

> > +              * in host control mode, reads are the main source for

> > +              * activation trials.

> > +              */

> > +             if (reads == ACTIVATION_THRSHLD) {

Oops - this is a bug...

> > +

> > +     /* region reads - for host mode */

> > +     atomic64_t reads;

> 

> Why do you need an atomic variable for this?  What are you trying to

> "protect" here by flushing the cpus all the time?  What protects this

> variable from changing right after you have read from it (hint, you do

> that above...)

> 

> atomics are not race-free, use a real lock if you need that.

We are on the data path here - this is called from queuecommand.
The "reads" counter is being symmetrically read and written,
so adding a spin lock here might become a source for contention.

Also I am not worried about the exact value of this counter, except of one place - 
See above.  Will fix it.

Thanks,
Avri
gregkh@linuxfoundation.org Jan. 31, 2021, 7:35 a.m. UTC | #3
On Sun, Jan 31, 2021 at 07:25:37AM +0000, Avri Altman wrote:
> > >

> > > +     if (ufshpb_mode == HPB_HOST_CONTROL)

> > > +             reads = atomic64_inc_return(&rgn->reads);

> > > +

> > >       if (!ufshpb_is_support_chunk(transfer_len))

> > >               return;

> > >

> > > +     if (ufshpb_mode == HPB_HOST_CONTROL) {

> > > +             /*

> > > +              * in host control mode, reads are the main source for

> > > +              * activation trials.

> > > +              */

> > > +             if (reads == ACTIVATION_THRSHLD) {

> Oops - this is a bug...

> 

> > > +

> > > +     /* region reads - for host mode */

> > > +     atomic64_t reads;

> > 

> > Why do you need an atomic variable for this?  What are you trying to

> > "protect" here by flushing the cpus all the time?  What protects this

> > variable from changing right after you have read from it (hint, you do

> > that above...)

> > 

> > atomics are not race-free, use a real lock if you need that.

> We are on the data path here - this is called from queuecommand.

> The "reads" counter is being symmetrically read and written,

> so adding a spin lock here might become a source for contention.


And an atomic varible is not?  You do know what spinlocks are made of,
right?  :)

> Also I am not worried about the exact value of this counter, except of one place - 

> See above.  Will fix it.


So it's not really needed?

thanks,

greg k-h
Daejun Park Feb. 1, 2021, 3:51 a.m. UTC | #4
Hi Avri,

Thanks for adding HCM support on HPB.
I have some opinion for this patch.

> +#define WORK_PENDING 0

> +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

Rather than fixing it with macro, how about using sysfs and make it
configurable?

> @@ -306,12 +325,39 @@ void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)

>  		ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,

>  				 transfer_len);

>  		spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);

> +

> +		if (ufshpb_mode == HPB_HOST_CONTROL)

> +			atomic64_set(&rgn->reads, 0);

> +

>  		return;

>  	}

>  

> +	if (ufshpb_mode == HPB_HOST_CONTROL)

> +		reads = atomic64_inc_return(&rgn->reads);

> +

>  	if (!ufshpb_is_support_chunk(transfer_len))

>  		return; <- *this*

>  

> +	if (ufshpb_mode == HPB_HOST_CONTROL) {

> +		/*

> +		 * in host control mode, reads are the main source for

> +		 * activation trials.

> +		 */

> +		if (reads == ACTIVATION_THRSHLD) {

If the chunk size is not supported, we can not active this region
permanently. It may be returned before get this statement.

> diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h

> index 8a34b0f42754..b0e78728af38 100644

> --- a/drivers/scsi/ufs/ufshpb.h

> +++ b/drivers/scsi/ufs/ufshpb.h

> @@ -115,6 +115,9 @@ struct ufshpb_region {

>  	/* below information is used by lru */

>  	struct list_head list_lru_rgn;

>  	unsigned long rgn_flags;

> +

> +	/* region reads - for host mode */

> +	atomic64_t reads;

I think 32 bits are suitable, because it is normalized by worker on every
specific time.

Thanks,
Daejun
Avri Altman Feb. 1, 2021, 7:12 a.m. UTC | #5
> > +#define WORK_PENDING 0

> > +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

> Rather than fixing it with macro, how about using sysfs and make it

> configurable?

Yes.
I will add a patch making all the logic configurable.
As all those are hpb-related parameters, I think module parameters are more adequate.


> 

> > @@ -306,12 +325,39 @@ void ufshpb_prep(struct ufs_hba *hba, struct

> ufshcd_lrb *lrbp)

> >               ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,

> >                                transfer_len);

> >               spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);

> > +

> > +             if (ufshpb_mode == HPB_HOST_CONTROL)

> > +                     atomic64_set(&rgn->reads, 0);

> > +

> >               return;

> >       }

> >

> > +     if (ufshpb_mode == HPB_HOST_CONTROL)

> > +             reads = atomic64_inc_return(&rgn->reads);

> > +

> >       if (!ufshpb_is_support_chunk(transfer_len))

> >               return; <- *this*

> >

> > +     if (ufshpb_mode == HPB_HOST_CONTROL) {

> > +             /*

> > +              * in host control mode, reads are the main source for

> > +              * activation trials.

> > +              */

> > +             if (reads == ACTIVATION_THRSHLD) {

> If the chunk size is not supported, we can not active this region

> permanently. It may be returned before get this statement.

Yes.
I already noticed that replying to Greg.
Fixed that when I dropped the use of atomic variables.
 
> 

> > diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h

> > index 8a34b0f42754..b0e78728af38 100644

> > --- a/drivers/scsi/ufs/ufshpb.h

> > +++ b/drivers/scsi/ufs/ufshpb.h

> > @@ -115,6 +115,9 @@ struct ufshpb_region {

> >       /* below information is used by lru */

> >       struct list_head list_lru_rgn;

> >       unsigned long rgn_flags;

> > +

> > +     /* region reads - for host mode */

> > +     atomic64_t reads;

> I think 32 bits are suitable, because it is normalized by worker on every

> specific time.

Done.
gregkh@linuxfoundation.org Feb. 1, 2021, 7:30 a.m. UTC | #6
On Mon, Feb 01, 2021 at 07:12:53AM +0000, Avri Altman wrote:
> > > +#define WORK_PENDING 0

> > > +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

> > Rather than fixing it with macro, how about using sysfs and make it

> > configurable?

> Yes.

> I will add a patch making all the logic configurable.

> As all those are hpb-related parameters, I think module parameters are more adequate.


No, this is not the 1990's, please never add new module parameters to
drivers.  If not for the basic problem of they do not work on a
per-device basis, but on a per-driver basis, which is what you almost
never want.

But why would you want to change this value, why can't the driver "just
work" and not need manual intervention?

thanks,

greg k-h
Avri Altman Feb. 1, 2021, 7:51 a.m. UTC | #7
> 

> On Mon, Feb 01, 2021 at 07:12:53AM +0000, Avri Altman wrote:

> > > > +#define WORK_PENDING 0

> > > > +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

> > > Rather than fixing it with macro, how about using sysfs and make it

> > > configurable?

> > Yes.

> > I will add a patch making all the logic configurable.

> > As all those are hpb-related parameters, I think module parameters are

> more adequate.

> 

> No, this is not the 1990's, please never add new module parameters to

> drivers.  If not for the basic problem of they do not work on a

> per-device basis, but on a per-driver basis, which is what you almost

> never want.

OK.

> 

> But why would you want to change this value, why can't the driver "just

> work" and not need manual intervention?

It is.
But those are a knobs each vendor may want to tweak,
So it'll be optimized with its internal device's implementation.

Tweaking the parameters, as well as the entire logic, is really an endless task.
Some logic works better for some scenarios, while falling behind on others.

How about leaving it for now, to be elaborated it in the future?
Maybe even can be a part of a scheme, to make the logic proprietary?
gregkh@linuxfoundation.org Feb. 1, 2021, 8:01 a.m. UTC | #8
On Mon, Feb 01, 2021 at 07:51:19AM +0000, Avri Altman wrote:
> > 

> > On Mon, Feb 01, 2021 at 07:12:53AM +0000, Avri Altman wrote:

> > > > > +#define WORK_PENDING 0

> > > > > +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

> > > > Rather than fixing it with macro, how about using sysfs and make it

> > > > configurable?

> > > Yes.

> > > I will add a patch making all the logic configurable.

> > > As all those are hpb-related parameters, I think module parameters are

> > more adequate.

> > 

> > No, this is not the 1990's, please never add new module parameters to

> > drivers.  If not for the basic problem of they do not work on a

> > per-device basis, but on a per-driver basis, which is what you almost

> > never want.

> OK.

> 

> > 

> > But why would you want to change this value, why can't the driver "just

> > work" and not need manual intervention?

> It is.

> But those are a knobs each vendor may want to tweak,

> So it'll be optimized with its internal device's implementation.

> 

> Tweaking the parameters, as well as the entire logic, is really an endless task.

> Some logic works better for some scenarios, while falling behind on others.


Shouldn't the hardware know how to handle this dynamically?  If not, how
is a user going to know?

> How about leaving it for now, to be elaborated it in the future?


I do not care, just do not make it a module parameter for the reason
that does not work on a per-device basis.

> Maybe even can be a part of a scheme, to make the logic proprietary?


What do you mean by "proprietary"?

thanks,

greg k-h
Avri Altman Feb. 1, 2021, 8:17 a.m. UTC | #9
> 

> On Mon, Feb 01, 2021 at 07:51:19AM +0000, Avri Altman wrote:

> > >

> > > On Mon, Feb 01, 2021 at 07:12:53AM +0000, Avri Altman wrote:

> > > > > > +#define WORK_PENDING 0

> > > > > > +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

> > > > > Rather than fixing it with macro, how about using sysfs and make it

> > > > > configurable?

> > > > Yes.

> > > > I will add a patch making all the logic configurable.

> > > > As all those are hpb-related parameters, I think module parameters are

> > > more adequate.

> > >

> > > No, this is not the 1990's, please never add new module parameters to

> > > drivers.  If not for the basic problem of they do not work on a

> > > per-device basis, but on a per-driver basis, which is what you almost

> > > never want.

> > OK.

> >

> > >

> > > But why would you want to change this value, why can't the driver "just

> > > work" and not need manual intervention?

> > It is.

> > But those are a knobs each vendor may want to tweak,

> > So it'll be optimized with its internal device's implementation.

> >

> > Tweaking the parameters, as well as the entire logic, is really an endless

> task.

> > Some logic works better for some scenarios, while falling behind on others.

> 

> Shouldn't the hardware know how to handle this dynamically?  If not, how

> is a user going to know?

There is one "brain".
It is either in the device - in device mode, Or in the host - in host mode control.
The "brain" decides which region is active, thus carrying the physical address along with the logical -
minimizing context switches in the device's RAM.

There can be up to N active regions.
Activation and deactivation has its overhead.
So basically it is a constraint-optimization problem.

> 

> > How about leaving it for now, to be elaborated it in the future?

> 

> I do not care, just do not make it a module parameter for the reason

> that does not work on a per-device basis.

OK.  Will make it a sysfs per hpb-lun, like Daejun proposed.

Thanks,
Avri
gregkh@linuxfoundation.org Feb. 1, 2021, 8:44 a.m. UTC | #10
On Mon, Feb 01, 2021 at 08:17:59AM +0000, Avri Altman wrote:
> > 

> > On Mon, Feb 01, 2021 at 07:51:19AM +0000, Avri Altman wrote:

> > > >

> > > > On Mon, Feb 01, 2021 at 07:12:53AM +0000, Avri Altman wrote:

> > > > > > > +#define WORK_PENDING 0

> > > > > > > +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

> > > > > > Rather than fixing it with macro, how about using sysfs and make it

> > > > > > configurable?

> > > > > Yes.

> > > > > I will add a patch making all the logic configurable.

> > > > > As all those are hpb-related parameters, I think module parameters are

> > > > more adequate.

> > > >

> > > > No, this is not the 1990's, please never add new module parameters to

> > > > drivers.  If not for the basic problem of they do not work on a

> > > > per-device basis, but on a per-driver basis, which is what you almost

> > > > never want.

> > > OK.

> > >

> > > >

> > > > But why would you want to change this value, why can't the driver "just

> > > > work" and not need manual intervention?

> > > It is.

> > > But those are a knobs each vendor may want to tweak,

> > > So it'll be optimized with its internal device's implementation.

> > >

> > > Tweaking the parameters, as well as the entire logic, is really an endless

> > task.

> > > Some logic works better for some scenarios, while falling behind on others.

> > 

> > Shouldn't the hardware know how to handle this dynamically?  If not, how

> > is a user going to know?

> There is one "brain".

> It is either in the device - in device mode, Or in the host - in host mode control.

> The "brain" decides which region is active, thus carrying the physical address along with the logical -

> minimizing context switches in the device's RAM.

> 

> There can be up to N active regions.

> Activation and deactivation has its overhead.

> So basically it is a constraint-optimization problem.


So how do you solve it?  And how would you expect a user to solve it if
the kernel can not?

You better document the heck out of these configuration options :)

thanks,

greg k-h
Avri Altman Feb. 1, 2021, 9:04 a.m. UTC | #11
> On Mon, Feb 01, 2021 at 08:17:59AM +0000, Avri Altman wrote:

> > >

> > > On Mon, Feb 01, 2021 at 07:51:19AM +0000, Avri Altman wrote:

> > > > >

> > > > > On Mon, Feb 01, 2021 at 07:12:53AM +0000, Avri Altman wrote:

> > > > > > > > +#define WORK_PENDING 0

> > > > > > > > +#define ACTIVATION_THRSHLD 4 /* 4 IOs */

> > > > > > > Rather than fixing it with macro, how about using sysfs and make it

> > > > > > > configurable?

> > > > > > Yes.

> > > > > > I will add a patch making all the logic configurable.

> > > > > > As all those are hpb-related parameters, I think module parameters

> are

> > > > > more adequate.

> > > > >

> > > > > No, this is not the 1990's, please never add new module parameters to

> > > > > drivers.  If not for the basic problem of they do not work on a

> > > > > per-device basis, but on a per-driver basis, which is what you almost

> > > > > never want.

> > > > OK.

> > > >

> > > > >

> > > > > But why would you want to change this value, why can't the driver

> "just

> > > > > work" and not need manual intervention?

> > > > It is.

> > > > But those are a knobs each vendor may want to tweak,

> > > > So it'll be optimized with its internal device's implementation.

> > > >

> > > > Tweaking the parameters, as well as the entire logic, is really an endless

> > > task.

> > > > Some logic works better for some scenarios, while falling behind on

> others.

> > >

> > > Shouldn't the hardware know how to handle this dynamically?  If not, how

> > > is a user going to know?

> > There is one "brain".

> > It is either in the device - in device mode, Or in the host - in host mode

> control.

> > The "brain" decides which region is active, thus carrying the physical address

> along with the logical -

> > minimizing context switches in the device's RAM.

> >

> > There can be up to N active regions.

> > Activation and deactivation has its overhead.

> > So basically it is a constraint-optimization problem.

> 

> So how do you solve it?  And how would you expect a user to solve it if

> the kernel can not?

> 

> You better document the heck out of these configuration options :)

Yes.  Will do.

Thanks,
Avri
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
index 5fa1f5bc08e6..51c3607166bc 100644
--- a/drivers/scsi/ufs/ufshpb.c
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -16,6 +16,9 @@ 
 #include "ufshpb.h"
 #include "../sd.h"
 
+#define WORK_PENDING 0
+#define ACTIVATION_THRSHLD 4 /* 4 IOs */
+
 /* memory management */
 static struct kmem_cache *ufshpb_mctx_cache;
 static mempool_t *ufshpb_mctx_pool;
@@ -261,6 +264,21 @@  ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb *lrbp,
 	cdb[14] = transfer_len;
 }
 
+static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int rgn_idx,
+				      int srgn_idx)
+{
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+
+	rgn = hpb->rgn_tbl + rgn_idx;
+	srgn = rgn->srgn_tbl + srgn_idx;
+
+	list_del_init(&rgn->list_inact_rgn);
+
+	if (list_empty(&srgn->list_act_srgn))
+		list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn);
+}
+
 /*
  * This function will set up HPB read command using host-side L2P map data.
  * In HPB v1.0, maximum size of HPB read command is 4KB.
@@ -276,6 +294,7 @@  void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 	unsigned long flags;
 	int transfer_len, rgn_idx, srgn_idx, srgn_offset;
 	int err = 0;
+	u64 reads;
 
 	hpb = ufshpb_get_hpb_data(cmd->device);
 	if (!hpb)
@@ -306,12 +325,39 @@  void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 		ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
 				 transfer_len);
 		spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+
+		if (ufshpb_mode == HPB_HOST_CONTROL)
+			atomic64_set(&rgn->reads, 0);
+
 		return;
 	}
 
+	if (ufshpb_mode == HPB_HOST_CONTROL)
+		reads = atomic64_inc_return(&rgn->reads);
+
 	if (!ufshpb_is_support_chunk(transfer_len))
 		return;
 
+	if (ufshpb_mode == HPB_HOST_CONTROL) {
+		/*
+		 * in host control mode, reads are the main source for
+		 * activation trials.
+		 */
+		if (reads == ACTIVATION_THRSHLD) {
+			spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+			ufshpb_update_active_info(hpb, rgn_idx, srgn_idx);
+			hpb->stats.rb_active_cnt++;
+			spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+			dev_dbg(&hpb->sdev_ufs_lu->sdev_dev,
+				"activate region %d-%d\n", rgn_idx, srgn_idx);
+		}
+
+		/* keep those counters normalized */
+		if (reads > hpb->entries_per_srgn &&
+		    !test_and_set_bit(WORK_PENDING, &hpb->work_data_bits))
+			schedule_work(&hpb->ufshpb_normalization_work);
+	}
+
 	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
 	if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
 				   transfer_len)) {
@@ -396,21 +442,6 @@  static int ufshpb_clear_dirty_bitmap(struct ufshpb_lu *hpb,
 	return 0;
 }
 
-static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int rgn_idx,
-				      int srgn_idx)
-{
-	struct ufshpb_region *rgn;
-	struct ufshpb_subregion *srgn;
-
-	rgn = hpb->rgn_tbl + rgn_idx;
-	srgn = rgn->srgn_tbl + srgn_idx;
-
-	list_del_init(&rgn->list_inact_rgn);
-
-	if (list_empty(&srgn->list_act_srgn))
-		list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn);
-}
-
 static void ufshpb_update_inactive_info(struct ufshpb_lu *hpb, int rgn_idx)
 {
 	struct ufshpb_region *rgn;
@@ -646,6 +677,9 @@  static void __ufshpb_evict_region(struct ufshpb_lu *hpb,
 
 	ufshpb_cleanup_lru_info(lru_info, rgn);
 
+	if (ufshpb_mode == HPB_HOST_CONTROL)
+		atomic64_set(&rgn->reads, 0);
+
 	for_each_sub_region(rgn, srgn_idx, srgn)
 		ufshpb_purge_active_subregion(hpb, srgn);
 }
@@ -1044,6 +1078,33 @@  static void ufshpb_run_inactive_region_list(struct ufshpb_lu *hpb)
 	spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
 }
 
+static void ufshpb_normalization_work_handler(struct work_struct *work)
+{
+	struct ufshpb_lu *hpb;
+	int rgn_idx;
+
+	hpb = container_of(work, struct ufshpb_lu, ufshpb_normalization_work);
+
+	for (rgn_idx = 0; rgn_idx < hpb->rgns_per_lu; rgn_idx++) {
+		struct ufshpb_region *rgn = hpb->rgn_tbl + rgn_idx;
+		u64 reads = atomic64_read(&rgn->reads);
+
+		if (reads)
+			atomic64_set(&rgn->reads, reads >> 1);
+
+		if (rgn->rgn_state != HPB_RGN_ACTIVE ||
+			atomic64_read(&rgn->reads))
+			continue;
+
+		/* if region is active but has no reads - inactivate it */
+		spin_lock(&hpb->rsp_list_lock);
+		ufshpb_update_inactive_info(hpb, rgn->rgn_idx);
+		spin_unlock(&hpb->rsp_list_lock);
+	}
+
+	clear_bit(WORK_PENDING, &hpb->work_data_bits);
+}
+
 static void ufshpb_map_work_handler(struct work_struct *work)
 {
 	struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu, map_work);
@@ -1308,6 +1369,9 @@  static int ufshpb_lu_hpb_init(struct ufs_hba *hba, struct ufshpb_lu *hpb)
 	INIT_LIST_HEAD(&hpb->list_hpb_lu);
 
 	INIT_WORK(&hpb->map_work, ufshpb_map_work_handler);
+	if (ufshpb_mode == HPB_HOST_CONTROL)
+		INIT_WORK(&hpb->ufshpb_normalization_work,
+			  ufshpb_normalization_work_handler);
 
 	hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",
 			  sizeof(struct ufshpb_req), 0, 0, NULL);
@@ -1394,6 +1458,8 @@  static void ufshpb_discard_rsp_lists(struct ufshpb_lu *hpb)
 
 static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)
 {
+	if (ufshpb_mode == HPB_HOST_CONTROL)
+		cancel_work_sync(&hpb->ufshpb_normalization_work);
 	cancel_work_sync(&hpb->map_work);
 }
 
diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h
index 8a34b0f42754..b0e78728af38 100644
--- a/drivers/scsi/ufs/ufshpb.h
+++ b/drivers/scsi/ufs/ufshpb.h
@@ -115,6 +115,9 @@  struct ufshpb_region {
 	/* below information is used by lru */
 	struct list_head list_lru_rgn;
 	unsigned long rgn_flags;
+
+	/* region reads - for host mode */
+	atomic64_t reads;
 };
 
 #define for_each_sub_region(rgn, i, srgn)				\
@@ -175,6 +178,8 @@  struct ufshpb_lu {
 
 	/* for selecting victim */
 	struct victim_select_info lru_info;
+	struct work_struct ufshpb_normalization_work;
+	unsigned long work_data_bits;
 
 	/* pinned region information */
 	u32 lu_pinned_start;