diff mbox series

[v5,07/10] scsi: ufshpb: Add "Cold" regions timer

Message ID 20210302132503.224670-8-avri.altman@wdc.com
State New
Headers show
Series Add Host control mode to HPB | expand

Commit Message

Avri Altman March 2, 2021, 1:25 p.m. UTC
In order not to hang on to “cold” regions, we shall inactivate a
region that has no READ access for a predefined amount of time -
READ_TO_MS. For that purpose we shall monitor the active regions list,
polling it on every POLLING_INTERVAL_MS. On timeout expiry we shall add
the region to the "to-be-inactivated" list, unless it is clean and did
not exhaust its READ_TO_EXPIRIES - another parameter.

All this does not apply to pinned regions.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
---
 drivers/scsi/ufs/ufshpb.c | 65 +++++++++++++++++++++++++++++++++++++++
 drivers/scsi/ufs/ufshpb.h |  6 ++++
 2 files changed, 71 insertions(+)

Comments

Can Guo March 15, 2021, 1:37 a.m. UTC | #1
On 2021-03-02 21:25, Avri Altman wrote:
> In order not to hang on to “cold” regions, we shall inactivate a

> region that has no READ access for a predefined amount of time -

> READ_TO_MS. For that purpose we shall monitor the active regions list,

> polling it on every POLLING_INTERVAL_MS. On timeout expiry we shall add

> the region to the "to-be-inactivated" list, unless it is clean and did

> not exhaust its READ_TO_EXPIRIES - another parameter.

> 

> All this does not apply to pinned regions.

> 

> Signed-off-by: Avri Altman <avri.altman@wdc.com>

> ---

>  drivers/scsi/ufs/ufshpb.c | 65 +++++++++++++++++++++++++++++++++++++++

>  drivers/scsi/ufs/ufshpb.h |  6 ++++

>  2 files changed, 71 insertions(+)

> 

> diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c

> index 0034fa03fdc6..89a930e72cff 100644

> --- a/drivers/scsi/ufs/ufshpb.c

> +++ b/drivers/scsi/ufs/ufshpb.c

> @@ -18,6 +18,9 @@

> 

>  #define ACTIVATION_THRESHOLD 4 /* 4 IOs */

>  #define EVICTION_THRESHOLD (ACTIVATION_THRESHOLD << 6) /* 256 IOs */

> +#define READ_TO_MS 1000

> +#define READ_TO_EXPIRIES 100

> +#define POLLING_INTERVAL_MS 200

> 

>  /* memory management */

>  static struct kmem_cache *ufshpb_mctx_cache;

> @@ -1024,12 +1027,61 @@ static int

> ufshpb_check_srgns_issue_state(struct ufshpb_lu *hpb,

>  	return 0;

>  }

> 

> +static void ufshpb_read_to_handler(struct work_struct *work)

> +{

> +	struct delayed_work *dwork = to_delayed_work(work);

> +	struct ufshpb_lu *hpb;


         struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu, 
ufshpb_read_to_work.work);

usually we use this to get data of a delayed work.

> +	struct victim_select_info *lru_info;


         struct victim_select_info *lru_info = &hpb->lru_info;

This can save some lines.

Thanks,
Can Guo.

> +	struct ufshpb_region *rgn;

> +	unsigned long flags;

> +	LIST_HEAD(expired_list);

> +

> +	spin_lock_irqsave(&hpb->rgn_state_lock, flags);

> +

> +	list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {

> +		bool timedout = ktime_after(ktime_get(), rgn->read_timeout);

> +

> +		if (timedout) {

> +			rgn->read_timeout_expiries--;

> +			if (is_rgn_dirty(rgn) ||

> +			    rgn->read_timeout_expiries == 0)

> +				list_add(&rgn->list_expired_rgn, &expired_list);

> +			else

> +				rgn->read_timeout = ktime_add_ms(ktime_get(),

> +							 READ_TO_MS);

> +		}

> +	}

> +

> +	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);

> +

> +	list_for_each_entry(rgn, &expired_list, list_expired_rgn) {

> +		list_del_init(&rgn->list_expired_rgn);

> +		spin_lock_irqsave(&hpb->rsp_list_lock, flags);

> +		ufshpb_update_inactive_info(hpb, rgn->rgn_idx);

> +		hpb->stats.rb_inactive_cnt++;

> +		spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);

> +	}

> +

> +	ufshpb_kick_map_work(hpb);

> +

> +	schedule_delayed_work(&hpb->ufshpb_read_to_work,

> +			      msecs_to_jiffies(POLLING_INTERVAL_MS));

> +}

> +

>  static void ufshpb_add_lru_info(struct victim_select_info *lru_info,

>  				struct ufshpb_region *rgn)

>  {

>  	rgn->rgn_state = HPB_RGN_ACTIVE;

>  	list_add_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);

>  	atomic_inc(&lru_info->active_cnt);

> +	if (rgn->hpb->is_hcm) {

> +		rgn->read_timeout = ktime_add_ms(ktime_get(), READ_TO_MS);

> +		rgn->read_timeout_expiries = READ_TO_EXPIRIES;

> +	}

>  }

> 

>  static void ufshpb_hit_lru_info(struct victim_select_info *lru_info,

> @@ -1813,6 +1865,7 @@ static int ufshpb_alloc_region_tbl(struct

> ufs_hba *hba, struct ufshpb_lu *hpb)

> 

>  		INIT_LIST_HEAD(&rgn->list_inact_rgn);

>  		INIT_LIST_HEAD(&rgn->list_lru_rgn);

> +		INIT_LIST_HEAD(&rgn->list_expired_rgn);

> 

>  		if (rgn_idx == hpb->rgns_per_lu - 1) {

>  			srgn_cnt = ((hpb->srgns_per_lu - 1) %

> @@ -1834,6 +1887,7 @@ static int ufshpb_alloc_region_tbl(struct

> ufs_hba *hba, struct ufshpb_lu *hpb)

>  		}

> 

>  		rgn->rgn_flags = 0;

> +		rgn->hpb = hpb;

>  	}

> 

>  	return 0;

> @@ -2053,6 +2107,8 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> *hba, struct ufshpb_lu *hpb)

>  			  ufshpb_normalization_work_handler);

>  		INIT_WORK(&hpb->ufshpb_lun_reset_work,

>  			  ufshpb_reset_work_handler);

> +		INIT_DELAYED_WORK(&hpb->ufshpb_read_to_work,

> +				  ufshpb_read_to_handler);

>  	}

> 

>  	hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",

> @@ -2087,6 +2143,10 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> *hba, struct ufshpb_lu *hpb)

>  	ufshpb_stat_init(hpb);

>  	ufshpb_param_init(hpb);

> 

> +	if (hpb->is_hcm)

> +		schedule_delayed_work(&hpb->ufshpb_read_to_work,

> +				      msecs_to_jiffies(POLLING_INTERVAL_MS));

> +

>  	return 0;

> 

>  release_pre_req_mempool:

> @@ -2154,6 +2214,7 @@ static void ufshpb_discard_rsp_lists(struct

> ufshpb_lu *hpb)

>  static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)

>  {

>  	if (hpb->is_hcm) {

> +		cancel_delayed_work_sync(&hpb->ufshpb_read_to_work);

>  		cancel_work_sync(&hpb->ufshpb_lun_reset_work);

>  		cancel_work_sync(&hpb->ufshpb_normalization_work);

>  	}

> @@ -2264,6 +2325,10 @@ void ufshpb_resume(struct ufs_hba *hba)

>  			continue;

>  		ufshpb_set_state(hpb, HPB_PRESENT);

>  		ufshpb_kick_map_work(hpb);

> +		if (hpb->is_hcm)

> +			schedule_delayed_work(&hpb->ufshpb_read_to_work,

> +				msecs_to_jiffies(POLLING_INTERVAL_MS));

> +

>  	}

>  }

> 

> diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h

> index 37c1b0ea0c0a..b49e9a34267f 100644

> --- a/drivers/scsi/ufs/ufshpb.h

> +++ b/drivers/scsi/ufs/ufshpb.h

> @@ -109,6 +109,7 @@ struct ufshpb_subregion {

>  };

> 

>  struct ufshpb_region {

> +	struct ufshpb_lu *hpb;

>  	struct ufshpb_subregion *srgn_tbl;

>  	enum HPB_RGN_STATE rgn_state;

>  	int rgn_idx;

> @@ -126,6 +127,10 @@ struct ufshpb_region {

>  	/* region reads - for host mode */

>  	spinlock_t rgn_lock;

>  	unsigned int reads;

> +	/* region "cold" timer - for host mode */

> +	ktime_t read_timeout;

> +	unsigned int read_timeout_expiries;

> +	struct list_head list_expired_rgn;

>  };

> 

>  #define for_each_sub_region(rgn, i, srgn)				\

> @@ -219,6 +224,7 @@ struct ufshpb_lu {

>  	struct victim_select_info lru_info;

>  	struct work_struct ufshpb_normalization_work;

>  	struct work_struct ufshpb_lun_reset_work;

> +	struct delayed_work ufshpb_read_to_work;

> 

>  	/* pinned region information */

>  	u32 lu_pinned_start;
Avri Altman March 15, 2021, 7:54 a.m. UTC | #2
> >

> > +static void ufshpb_read_to_handler(struct work_struct *work)

> > +{

> > +     struct delayed_work *dwork = to_delayed_work(work);

> > +     struct ufshpb_lu *hpb;

> 

>          struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu,

> ufshpb_read_to_work.work);

> 

> usually we use this to get data of a delayed work.

> 

> > +     struct victim_select_info *lru_info;

> 

>          struct victim_select_info *lru_info = &hpb->lru_info;

> 

> This can save some lines.

Done.

Thanks,
Avri

> Thanks,

> Can Guo.

> 

> > +     struct ufshpb_region *rgn;

> > +     unsigned long flags;

> > +     LIST_HEAD(expired_list);
Can Guo March 15, 2021, 9:36 a.m. UTC | #3
On 2021-03-02 21:25, Avri Altman wrote:
> In order not to hang on to “cold” regions, we shall inactivate a

> region that has no READ access for a predefined amount of time -

> READ_TO_MS. For that purpose we shall monitor the active regions list,

> polling it on every POLLING_INTERVAL_MS. On timeout expiry we shall add

> the region to the "to-be-inactivated" list, unless it is clean and did

> not exhaust its READ_TO_EXPIRIES - another parameter.

> 

> All this does not apply to pinned regions.

> 

> Signed-off-by: Avri Altman <avri.altman@wdc.com>

> ---

>  drivers/scsi/ufs/ufshpb.c | 65 +++++++++++++++++++++++++++++++++++++++

>  drivers/scsi/ufs/ufshpb.h |  6 ++++

>  2 files changed, 71 insertions(+)

> 

> diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c

> index 0034fa03fdc6..89a930e72cff 100644

> --- a/drivers/scsi/ufs/ufshpb.c

> +++ b/drivers/scsi/ufs/ufshpb.c

> @@ -18,6 +18,9 @@

> 

>  #define ACTIVATION_THRESHOLD 4 /* 4 IOs */

>  #define EVICTION_THRESHOLD (ACTIVATION_THRESHOLD << 6) /* 256 IOs */

> +#define READ_TO_MS 1000

> +#define READ_TO_EXPIRIES 100

> +#define POLLING_INTERVAL_MS 200

> 

>  /* memory management */

>  static struct kmem_cache *ufshpb_mctx_cache;

> @@ -1024,12 +1027,61 @@ static int

> ufshpb_check_srgns_issue_state(struct ufshpb_lu *hpb,

>  	return 0;

>  }

> 

> +static void ufshpb_read_to_handler(struct work_struct *work)

> +{

> +	struct delayed_work *dwork = to_delayed_work(work);

> +	struct ufshpb_lu *hpb;

> +	struct victim_select_info *lru_info;

> +	struct ufshpb_region *rgn;

> +	unsigned long flags;

> +	LIST_HEAD(expired_list);

> +

> +	hpb = container_of(dwork, struct ufshpb_lu, ufshpb_read_to_work);

> +

> +	spin_lock_irqsave(&hpb->rgn_state_lock, flags);

> +

> +	lru_info = &hpb->lru_info;

> +

> +	list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {

> +		bool timedout = ktime_after(ktime_get(), rgn->read_timeout);

> +

> +		if (timedout) {

> +			rgn->read_timeout_expiries--;

> +			if (is_rgn_dirty(rgn) ||

> +			    rgn->read_timeout_expiries == 0)

> +				list_add(&rgn->list_expired_rgn, &expired_list);

> +			else

> +				rgn->read_timeout = ktime_add_ms(ktime_get(),

> +							 READ_TO_MS);

> +		}

> +	}

> +

> +	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);

> +

> +	list_for_each_entry(rgn, &expired_list, list_expired_rgn) {


Here can be problematic - since you don't have the native expired_list 
initialized
before use, if above loop did not insert anything to expired_list, it 
shall become
a dead loop here.

And, which lock is protecting rgn->list_expired_rgn? If two 
read_to_handler works
are running in parallel, one can be inserting it to its expired_list 
while another
can be deleting it.

Can Guo.

> +		list_del_init(&rgn->list_expired_rgn);

> +		spin_lock_irqsave(&hpb->rsp_list_lock, flags);

> +		ufshpb_update_inactive_info(hpb, rgn->rgn_idx);

> +		hpb->stats.rb_inactive_cnt++;

> +		spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);

> +	}

> +

> +	ufshpb_kick_map_work(hpb);

> +

> +	schedule_delayed_work(&hpb->ufshpb_read_to_work,

> +			      msecs_to_jiffies(POLLING_INTERVAL_MS));

> +}

> +

>  static void ufshpb_add_lru_info(struct victim_select_info *lru_info,

>  				struct ufshpb_region *rgn)

>  {

>  	rgn->rgn_state = HPB_RGN_ACTIVE;

>  	list_add_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);

>  	atomic_inc(&lru_info->active_cnt);

> +	if (rgn->hpb->is_hcm) {

> +		rgn->read_timeout = ktime_add_ms(ktime_get(), READ_TO_MS);

> +		rgn->read_timeout_expiries = READ_TO_EXPIRIES;

> +	}

>  }

> 

>  static void ufshpb_hit_lru_info(struct victim_select_info *lru_info,

> @@ -1813,6 +1865,7 @@ static int ufshpb_alloc_region_tbl(struct

> ufs_hba *hba, struct ufshpb_lu *hpb)

> 

>  		INIT_LIST_HEAD(&rgn->list_inact_rgn);

>  		INIT_LIST_HEAD(&rgn->list_lru_rgn);

> +		INIT_LIST_HEAD(&rgn->list_expired_rgn);

> 

>  		if (rgn_idx == hpb->rgns_per_lu - 1) {

>  			srgn_cnt = ((hpb->srgns_per_lu - 1) %

> @@ -1834,6 +1887,7 @@ static int ufshpb_alloc_region_tbl(struct

> ufs_hba *hba, struct ufshpb_lu *hpb)

>  		}

> 

>  		rgn->rgn_flags = 0;

> +		rgn->hpb = hpb;

>  	}

> 

>  	return 0;

> @@ -2053,6 +2107,8 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> *hba, struct ufshpb_lu *hpb)

>  			  ufshpb_normalization_work_handler);

>  		INIT_WORK(&hpb->ufshpb_lun_reset_work,

>  			  ufshpb_reset_work_handler);

> +		INIT_DELAYED_WORK(&hpb->ufshpb_read_to_work,

> +				  ufshpb_read_to_handler);

>  	}

> 

>  	hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",

> @@ -2087,6 +2143,10 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> *hba, struct ufshpb_lu *hpb)

>  	ufshpb_stat_init(hpb);

>  	ufshpb_param_init(hpb);

> 

> +	if (hpb->is_hcm)

> +		schedule_delayed_work(&hpb->ufshpb_read_to_work,

> +				      msecs_to_jiffies(POLLING_INTERVAL_MS));

> +

>  	return 0;

> 

>  release_pre_req_mempool:

> @@ -2154,6 +2214,7 @@ static void ufshpb_discard_rsp_lists(struct

> ufshpb_lu *hpb)

>  static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)

>  {

>  	if (hpb->is_hcm) {

> +		cancel_delayed_work_sync(&hpb->ufshpb_read_to_work);

>  		cancel_work_sync(&hpb->ufshpb_lun_reset_work);

>  		cancel_work_sync(&hpb->ufshpb_normalization_work);

>  	}

> @@ -2264,6 +2325,10 @@ void ufshpb_resume(struct ufs_hba *hba)

>  			continue;

>  		ufshpb_set_state(hpb, HPB_PRESENT);

>  		ufshpb_kick_map_work(hpb);

> +		if (hpb->is_hcm)

> +			schedule_delayed_work(&hpb->ufshpb_read_to_work,

> +				msecs_to_jiffies(POLLING_INTERVAL_MS));

> +

>  	}

>  }

> 

> diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h

> index 37c1b0ea0c0a..b49e9a34267f 100644

> --- a/drivers/scsi/ufs/ufshpb.h

> +++ b/drivers/scsi/ufs/ufshpb.h

> @@ -109,6 +109,7 @@ struct ufshpb_subregion {

>  };

> 

>  struct ufshpb_region {

> +	struct ufshpb_lu *hpb;

>  	struct ufshpb_subregion *srgn_tbl;

>  	enum HPB_RGN_STATE rgn_state;

>  	int rgn_idx;

> @@ -126,6 +127,10 @@ struct ufshpb_region {

>  	/* region reads - for host mode */

>  	spinlock_t rgn_lock;

>  	unsigned int reads;

> +	/* region "cold" timer - for host mode */

> +	ktime_t read_timeout;

> +	unsigned int read_timeout_expiries;

> +	struct list_head list_expired_rgn;

>  };

> 

>  #define for_each_sub_region(rgn, i, srgn)				\

> @@ -219,6 +224,7 @@ struct ufshpb_lu {

>  	struct victim_select_info lru_info;

>  	struct work_struct ufshpb_normalization_work;

>  	struct work_struct ufshpb_lun_reset_work;

> +	struct delayed_work ufshpb_read_to_work;

> 

>  	/* pinned region information */

>  	u32 lu_pinned_start;
Avri Altman March 16, 2021, 9:21 a.m. UTC | #4
> > +static void ufshpb_read_to_handler(struct work_struct *work)

> > +{

> > +     struct delayed_work *dwork = to_delayed_work(work);

> > +     struct ufshpb_lu *hpb;

> > +     struct victim_select_info *lru_info;

> > +     struct ufshpb_region *rgn;

> > +     unsigned long flags;

> > +     LIST_HEAD(expired_list);

> > +

> > +     hpb = container_of(dwork, struct ufshpb_lu, ufshpb_read_to_work);

> > +

> > +     spin_lock_irqsave(&hpb->rgn_state_lock, flags);

> > +

> > +     lru_info = &hpb->lru_info;

> > +

> > +     list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {

> > +             bool timedout = ktime_after(ktime_get(), rgn->read_timeout);

> > +

> > +             if (timedout) {

> > +                     rgn->read_timeout_expiries--;

> > +                     if (is_rgn_dirty(rgn) ||

> > +                         rgn->read_timeout_expiries == 0)

> > +                             list_add(&rgn->list_expired_rgn, &expired_list);

> > +                     else

> > +                             rgn->read_timeout = ktime_add_ms(ktime_get(),

> > +                                                      READ_TO_MS);

> > +             }

> > +     }

> > +

> > +     spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);

> > +

> > +     list_for_each_entry(rgn, &expired_list, list_expired_rgn) {

> 

> Here can be problematic - since you don't have the native expired_list

> initialized

> before use, if above loop did not insert anything to expired_list, it

> shall become

> a dead loop here.

Not sure what you meant by native initialization.
LIST_HEAD is statically initializing an empty list, resulting the same outcome as INIT_LIST_HEAD.

> 

> And, which lock is protecting rgn->list_expired_rgn? If two

> read_to_handler works

> are running in parallel, one can be inserting it to its expired_list

> while another

> can be deleting it.

The timeout handler, being a delayed work, is meant to run every polling period.
Originally, I had it protected from 2 handlers running concurrently,
But I removed it following Daejun's comment, which I accepted,
Since it is always scheduled using the same polling period.

Thanks,
Avri

> 

> Can Guo.

> 

> > +             list_del_init(&rgn->list_expired_rgn);

> > +             spin_lock_irqsave(&hpb->rsp_list_lock, flags);

> > +             ufshpb_update_inactive_info(hpb, rgn->rgn_idx);

> > +             hpb->stats.rb_inactive_cnt++;

> > +             spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);

> > +     }

> > +

> > +     ufshpb_kick_map_work(hpb);

> > +

> > +     schedule_delayed_work(&hpb->ufshpb_read_to_work,

> > +                           msecs_to_jiffies(POLLING_INTERVAL_MS));

> > +}

> > +

> >  static void ufshpb_add_lru_info(struct victim_select_info *lru_info,

> >                               struct ufshpb_region *rgn)

> >  {

> >       rgn->rgn_state = HPB_RGN_ACTIVE;

> >       list_add_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);

> >       atomic_inc(&lru_info->active_cnt);

> > +     if (rgn->hpb->is_hcm) {

> > +             rgn->read_timeout = ktime_add_ms(ktime_get(), READ_TO_MS);

> > +             rgn->read_timeout_expiries = READ_TO_EXPIRIES;

> > +     }

> >  }

> >

> >  static void ufshpb_hit_lru_info(struct victim_select_info *lru_info,

> > @@ -1813,6 +1865,7 @@ static int ufshpb_alloc_region_tbl(struct

> > ufs_hba *hba, struct ufshpb_lu *hpb)

> >

> >               INIT_LIST_HEAD(&rgn->list_inact_rgn);

> >               INIT_LIST_HEAD(&rgn->list_lru_rgn);

> > +             INIT_LIST_HEAD(&rgn->list_expired_rgn);

> >

> >               if (rgn_idx == hpb->rgns_per_lu - 1) {

> >                       srgn_cnt = ((hpb->srgns_per_lu - 1) %

> > @@ -1834,6 +1887,7 @@ static int ufshpb_alloc_region_tbl(struct

> > ufs_hba *hba, struct ufshpb_lu *hpb)

> >               }

> >

> >               rgn->rgn_flags = 0;

> > +             rgn->hpb = hpb;

> >       }

> >

> >       return 0;

> > @@ -2053,6 +2107,8 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> > *hba, struct ufshpb_lu *hpb)

> >                         ufshpb_normalization_work_handler);

> >               INIT_WORK(&hpb->ufshpb_lun_reset_work,

> >                         ufshpb_reset_work_handler);

> > +             INIT_DELAYED_WORK(&hpb->ufshpb_read_to_work,

> > +                               ufshpb_read_to_handler);

> >       }

> >

> >       hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",

> > @@ -2087,6 +2143,10 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> > *hba, struct ufshpb_lu *hpb)

> >       ufshpb_stat_init(hpb);

> >       ufshpb_param_init(hpb);

> >

> > +     if (hpb->is_hcm)

> > +             schedule_delayed_work(&hpb->ufshpb_read_to_work,

> > +                                   msecs_to_jiffies(POLLING_INTERVAL_MS));

> > +

> >       return 0;

> >

> >  release_pre_req_mempool:

> > @@ -2154,6 +2214,7 @@ static void ufshpb_discard_rsp_lists(struct

> > ufshpb_lu *hpb)

> >  static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)

> >  {

> >       if (hpb->is_hcm) {

> > +             cancel_delayed_work_sync(&hpb->ufshpb_read_to_work);

> >               cancel_work_sync(&hpb->ufshpb_lun_reset_work);

> >               cancel_work_sync(&hpb->ufshpb_normalization_work);

> >       }

> > @@ -2264,6 +2325,10 @@ void ufshpb_resume(struct ufs_hba *hba)

> >                       continue;

> >               ufshpb_set_state(hpb, HPB_PRESENT);

> >               ufshpb_kick_map_work(hpb);

> > +             if (hpb->is_hcm)

> > +                     schedule_delayed_work(&hpb->ufshpb_read_to_work,

> > +                             msecs_to_jiffies(POLLING_INTERVAL_MS));

> > +

> >       }

> >  }

> >

> > diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h

> > index 37c1b0ea0c0a..b49e9a34267f 100644

> > --- a/drivers/scsi/ufs/ufshpb.h

> > +++ b/drivers/scsi/ufs/ufshpb.h

> > @@ -109,6 +109,7 @@ struct ufshpb_subregion {

> >  };

> >

> >  struct ufshpb_region {

> > +     struct ufshpb_lu *hpb;

> >       struct ufshpb_subregion *srgn_tbl;

> >       enum HPB_RGN_STATE rgn_state;

> >       int rgn_idx;

> > @@ -126,6 +127,10 @@ struct ufshpb_region {

> >       /* region reads - for host mode */

> >       spinlock_t rgn_lock;

> >       unsigned int reads;

> > +     /* region "cold" timer - for host mode */

> > +     ktime_t read_timeout;

> > +     unsigned int read_timeout_expiries;

> > +     struct list_head list_expired_rgn;

> >  };

> >

> >  #define for_each_sub_region(rgn, i, srgn)                            \

> > @@ -219,6 +224,7 @@ struct ufshpb_lu {

> >       struct victim_select_info lru_info;

> >       struct work_struct ufshpb_normalization_work;

> >       struct work_struct ufshpb_lun_reset_work;

> > +     struct delayed_work ufshpb_read_to_work;

> >

> >       /* pinned region information */

> >       u32 lu_pinned_start;
Can Guo March 17, 2021, 2:45 a.m. UTC | #5
On 2021-03-16 17:21, Avri Altman wrote:
>> > +static void ufshpb_read_to_handler(struct work_struct *work)

>> > +{

>> > +     struct delayed_work *dwork = to_delayed_work(work);

>> > +     struct ufshpb_lu *hpb;

>> > +     struct victim_select_info *lru_info;

>> > +     struct ufshpb_region *rgn;

>> > +     unsigned long flags;

>> > +     LIST_HEAD(expired_list);

>> > +

>> > +     hpb = container_of(dwork, struct ufshpb_lu, ufshpb_read_to_work);

>> > +

>> > +     spin_lock_irqsave(&hpb->rgn_state_lock, flags);

>> > +

>> > +     lru_info = &hpb->lru_info;

>> > +

>> > +     list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {

>> > +             bool timedout = ktime_after(ktime_get(), rgn->read_timeout);

>> > +

>> > +             if (timedout) {

>> > +                     rgn->read_timeout_expiries--;

>> > +                     if (is_rgn_dirty(rgn) ||

>> > +                         rgn->read_timeout_expiries == 0)

>> > +                             list_add(&rgn->list_expired_rgn, &expired_list);

>> > +                     else

>> > +                             rgn->read_timeout = ktime_add_ms(ktime_get(),

>> > +                                                      READ_TO_MS);

>> > +             }

>> > +     }

>> > +

>> > +     spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);

>> > +

>> > +     list_for_each_entry(rgn, &expired_list, list_expired_rgn) {

>> 

>> Here can be problematic - since you don't have the native expired_list

>> initialized

>> before use, if above loop did not insert anything to expired_list, it

>> shall become

>> a dead loop here.

> Not sure what you meant by native initialization.

> LIST_HEAD is statically initializing an empty list, resulting the same

> outcome as INIT_LIST_HEAD.

> 


Sorry for making you confused, you should use list_for_each_entry_safe()
instead of list_for_each_entry() as you are deleting entries within the 
loop,
otherwise, this can become an infinite loop. Again, have you tested this 
patch
before upload? I am sure this is problematic - when it becomes an 
inifinite
loop, below path will hang...

ufshcd_suspend()->ufshpb_suspend()->cancel_jobs()->cancel_delayed_work()

>> 

>> And, which lock is protecting rgn->list_expired_rgn? If two

>> read_to_handler works

>> are running in parallel, one can be inserting it to its expired_list

>> while another

>> can be deleting it.

> The timeout handler, being a delayed work, is meant to run every 

> polling period.

> Originally, I had it protected from 2 handlers running concurrently,

> But I removed it following Daejun's comment, which I accepted,

> Since it is always scheduled using the same polling period.


But one can set the delay to 0 through sysfs, right?

Thanks,
Can Guo.

> 

> Thanks,

> Avri

> 

>> 

>> Can Guo.

>> 

>> > +             list_del_init(&rgn->list_expired_rgn);

>> > +             spin_lock_irqsave(&hpb->rsp_list_lock, flags);

>> > +             ufshpb_update_inactive_info(hpb, rgn->rgn_idx);

>> > +             hpb->stats.rb_inactive_cnt++;

>> > +             spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);

>> > +     }

>> > +

>> > +     ufshpb_kick_map_work(hpb);

>> > +

>> > +     schedule_delayed_work(&hpb->ufshpb_read_to_work,

>> > +                           msecs_to_jiffies(POLLING_INTERVAL_MS));

>> > +}

>> > +

>> >  static void ufshpb_add_lru_info(struct victim_select_info *lru_info,

>> >                               struct ufshpb_region *rgn)

>> >  {

>> >       rgn->rgn_state = HPB_RGN_ACTIVE;

>> >       list_add_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);

>> >       atomic_inc(&lru_info->active_cnt);

>> > +     if (rgn->hpb->is_hcm) {

>> > +             rgn->read_timeout = ktime_add_ms(ktime_get(), READ_TO_MS);

>> > +             rgn->read_timeout_expiries = READ_TO_EXPIRIES;

>> > +     }

>> >  }

>> >

>> >  static void ufshpb_hit_lru_info(struct victim_select_info *lru_info,

>> > @@ -1813,6 +1865,7 @@ static int ufshpb_alloc_region_tbl(struct

>> > ufs_hba *hba, struct ufshpb_lu *hpb)

>> >

>> >               INIT_LIST_HEAD(&rgn->list_inact_rgn);

>> >               INIT_LIST_HEAD(&rgn->list_lru_rgn);

>> > +             INIT_LIST_HEAD(&rgn->list_expired_rgn);

>> >

>> >               if (rgn_idx == hpb->rgns_per_lu - 1) {

>> >                       srgn_cnt = ((hpb->srgns_per_lu - 1) %

>> > @@ -1834,6 +1887,7 @@ static int ufshpb_alloc_region_tbl(struct

>> > ufs_hba *hba, struct ufshpb_lu *hpb)

>> >               }

>> >

>> >               rgn->rgn_flags = 0;

>> > +             rgn->hpb = hpb;

>> >       }

>> >

>> >       return 0;

>> > @@ -2053,6 +2107,8 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

>> > *hba, struct ufshpb_lu *hpb)

>> >                         ufshpb_normalization_work_handler);

>> >               INIT_WORK(&hpb->ufshpb_lun_reset_work,

>> >                         ufshpb_reset_work_handler);

>> > +             INIT_DELAYED_WORK(&hpb->ufshpb_read_to_work,

>> > +                               ufshpb_read_to_handler);

>> >       }

>> >

>> >       hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",

>> > @@ -2087,6 +2143,10 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

>> > *hba, struct ufshpb_lu *hpb)

>> >       ufshpb_stat_init(hpb);

>> >       ufshpb_param_init(hpb);

>> >

>> > +     if (hpb->is_hcm)

>> > +             schedule_delayed_work(&hpb->ufshpb_read_to_work,

>> > +                                   msecs_to_jiffies(POLLING_INTERVAL_MS));

>> > +

>> >       return 0;

>> >

>> >  release_pre_req_mempool:

>> > @@ -2154,6 +2214,7 @@ static void ufshpb_discard_rsp_lists(struct

>> > ufshpb_lu *hpb)

>> >  static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)

>> >  {

>> >       if (hpb->is_hcm) {

>> > +             cancel_delayed_work_sync(&hpb->ufshpb_read_to_work);

>> >               cancel_work_sync(&hpb->ufshpb_lun_reset_work);

>> >               cancel_work_sync(&hpb->ufshpb_normalization_work);

>> >       }

>> > @@ -2264,6 +2325,10 @@ void ufshpb_resume(struct ufs_hba *hba)

>> >                       continue;

>> >               ufshpb_set_state(hpb, HPB_PRESENT);

>> >               ufshpb_kick_map_work(hpb);

>> > +             if (hpb->is_hcm)

>> > +                     schedule_delayed_work(&hpb->ufshpb_read_to_work,

>> > +                             msecs_to_jiffies(POLLING_INTERVAL_MS));

>> > +

>> >       }

>> >  }

>> >

>> > diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h

>> > index 37c1b0ea0c0a..b49e9a34267f 100644

>> > --- a/drivers/scsi/ufs/ufshpb.h

>> > +++ b/drivers/scsi/ufs/ufshpb.h

>> > @@ -109,6 +109,7 @@ struct ufshpb_subregion {

>> >  };

>> >

>> >  struct ufshpb_region {

>> > +     struct ufshpb_lu *hpb;

>> >       struct ufshpb_subregion *srgn_tbl;

>> >       enum HPB_RGN_STATE rgn_state;

>> >       int rgn_idx;

>> > @@ -126,6 +127,10 @@ struct ufshpb_region {

>> >       /* region reads - for host mode */

>> >       spinlock_t rgn_lock;

>> >       unsigned int reads;

>> > +     /* region "cold" timer - for host mode */

>> > +     ktime_t read_timeout;

>> > +     unsigned int read_timeout_expiries;

>> > +     struct list_head list_expired_rgn;

>> >  };

>> >

>> >  #define for_each_sub_region(rgn, i, srgn)                            \

>> > @@ -219,6 +224,7 @@ struct ufshpb_lu {

>> >       struct victim_select_info lru_info;

>> >       struct work_struct ufshpb_normalization_work;

>> >       struct work_struct ufshpb_lun_reset_work;

>> > +     struct delayed_work ufshpb_read_to_work;

>> >

>> >       /* pinned region information */

>> >       u32 lu_pinned_start;
Avri Altman March 17, 2021, 7:55 a.m. UTC | #6
> On 2021-03-16 17:21, Avri Altman wrote:

> >> > +static void ufshpb_read_to_handler(struct work_struct *work)

> >> > +{

> >> > +     struct delayed_work *dwork = to_delayed_work(work);

> >> > +     struct ufshpb_lu *hpb;

> >> > +     struct victim_select_info *lru_info;

> >> > +     struct ufshpb_region *rgn;

> >> > +     unsigned long flags;

> >> > +     LIST_HEAD(expired_list);

> >> > +

> >> > +     hpb = container_of(dwork, struct ufshpb_lu, ufshpb_read_to_work);

> >> > +

> >> > +     spin_lock_irqsave(&hpb->rgn_state_lock, flags);

> >> > +

> >> > +     lru_info = &hpb->lru_info;

> >> > +

> >> > +     list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {

> >> > +             bool timedout = ktime_after(ktime_get(), rgn->read_timeout);

> >> > +

> >> > +             if (timedout) {

> >> > +                     rgn->read_timeout_expiries--;

> >> > +                     if (is_rgn_dirty(rgn) ||

> >> > +                         rgn->read_timeout_expiries == 0)

> >> > +                             list_add(&rgn->list_expired_rgn, &expired_list);

> >> > +                     else

> >> > +                             rgn->read_timeout = ktime_add_ms(ktime_get(),

> >> > +                                                      READ_TO_MS);

> >> > +             }

> >> > +     }

> >> > +

> >> > +     spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);

> >> > +

> >> > +     list_for_each_entry(rgn, &expired_list, list_expired_rgn) {

> >>

> >> Here can be problematic - since you don't have the native expired_list

> >> initialized

> >> before use, if above loop did not insert anything to expired_list, it

> >> shall become

> >> a dead loop here.

> > Not sure what you meant by native initialization.

> > LIST_HEAD is statically initializing an empty list, resulting the same

> > outcome as INIT_LIST_HEAD.

> >

> 

> Sorry for making you confused, you should use list_for_each_entry_safe()

> instead of list_for_each_entry() as you are deleting entries within the

> loop,

> otherwise, this can become an infinite loop. Again, have you tested this

> patch

> before upload? I am sure this is problematic - when it becomes an

> inifinite

> loop, below path will hang...

> 

> ufshcd_suspend()->ufshpb_suspend()->cancel_jobs()->cancel_delayed_work()

Ahh  - yes.  You are right.  Originally I used list_for_each_entry_safe.
Not sure why I changed it here.  Will fix it.

I openly disclosed that I am testing the code on gs20 and mi10.
Those are v4.19 platforms, and I am using a driver adopted from the original public hpb driver
Published by Samsung with the gs20 code.
I am also concern as those drivers are drifted apart as the review process commences.
Will try to bring-up a more advanced platform (gs21) and apply the mainline hpb driver.


> 

> >>

> >> And, which lock is protecting rgn->list_expired_rgn? If two

> >> read_to_handler works

> >> are running in parallel, one can be inserting it to its expired_list

> >> while another

> >> can be deleting it.

> > The timeout handler, being a delayed work, is meant to run every

> > polling period.

> > Originally, I had it protected from 2 handlers running concurrently,

> > But I removed it following Daejun's comment, which I accepted,

> > Since it is always scheduled using the same polling period.

> 

> But one can set the delay to 0 through sysfs, right?

Will restore the protection.  Thanks.

Thanks,
Avri

> 

> Thanks,

> Can Guo.

> 

> >

> > Thanks,

> > Avri

> >

> >>

> >> Can Guo.

> >>

> >> > +             list_del_init(&rgn->list_expired_rgn);

> >> > +             spin_lock_irqsave(&hpb->rsp_list_lock, flags);

> >> > +             ufshpb_update_inactive_info(hpb, rgn->rgn_idx);

> >> > +             hpb->stats.rb_inactive_cnt++;

> >> > +             spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);

> >> > +     }

> >> > +

> >> > +     ufshpb_kick_map_work(hpb);

> >> > +

> >> > +     schedule_delayed_work(&hpb->ufshpb_read_to_work,

> >> > +                           msecs_to_jiffies(POLLING_INTERVAL_MS));

> >> > +}

> >> > +

> >> >  static void ufshpb_add_lru_info(struct victim_select_info *lru_info,

> >> >                               struct ufshpb_region *rgn)

> >> >  {

> >> >       rgn->rgn_state = HPB_RGN_ACTIVE;

> >> >       list_add_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);

> >> >       atomic_inc(&lru_info->active_cnt);

> >> > +     if (rgn->hpb->is_hcm) {

> >> > +             rgn->read_timeout = ktime_add_ms(ktime_get(), READ_TO_MS);

> >> > +             rgn->read_timeout_expiries = READ_TO_EXPIRIES;

> >> > +     }

> >> >  }

> >> >

> >> >  static void ufshpb_hit_lru_info(struct victim_select_info *lru_info,

> >> > @@ -1813,6 +1865,7 @@ static int ufshpb_alloc_region_tbl(struct

> >> > ufs_hba *hba, struct ufshpb_lu *hpb)

> >> >

> >> >               INIT_LIST_HEAD(&rgn->list_inact_rgn);

> >> >               INIT_LIST_HEAD(&rgn->list_lru_rgn);

> >> > +             INIT_LIST_HEAD(&rgn->list_expired_rgn);

> >> >

> >> >               if (rgn_idx == hpb->rgns_per_lu - 1) {

> >> >                       srgn_cnt = ((hpb->srgns_per_lu - 1) %

> >> > @@ -1834,6 +1887,7 @@ static int ufshpb_alloc_region_tbl(struct

> >> > ufs_hba *hba, struct ufshpb_lu *hpb)

> >> >               }

> >> >

> >> >               rgn->rgn_flags = 0;

> >> > +             rgn->hpb = hpb;

> >> >       }

> >> >

> >> >       return 0;

> >> > @@ -2053,6 +2107,8 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> >> > *hba, struct ufshpb_lu *hpb)

> >> >                         ufshpb_normalization_work_handler);

> >> >               INIT_WORK(&hpb->ufshpb_lun_reset_work,

> >> >                         ufshpb_reset_work_handler);

> >> > +             INIT_DELAYED_WORK(&hpb->ufshpb_read_to_work,

> >> > +                               ufshpb_read_to_handler);

> >> >       }

> >> >

> >> >       hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",

> >> > @@ -2087,6 +2143,10 @@ static int ufshpb_lu_hpb_init(struct ufs_hba

> >> > *hba, struct ufshpb_lu *hpb)

> >> >       ufshpb_stat_init(hpb);

> >> >       ufshpb_param_init(hpb);

> >> >

> >> > +     if (hpb->is_hcm)

> >> > +             schedule_delayed_work(&hpb->ufshpb_read_to_work,

> >> > +                                   msecs_to_jiffies(POLLING_INTERVAL_MS));

> >> > +

> >> >       return 0;

> >> >

> >> >  release_pre_req_mempool:

> >> > @@ -2154,6 +2214,7 @@ static void ufshpb_discard_rsp_lists(struct

> >> > ufshpb_lu *hpb)

> >> >  static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)

> >> >  {

> >> >       if (hpb->is_hcm) {

> >> > +             cancel_delayed_work_sync(&hpb->ufshpb_read_to_work);

> >> >               cancel_work_sync(&hpb->ufshpb_lun_reset_work);

> >> >               cancel_work_sync(&hpb->ufshpb_normalization_work);

> >> >       }

> >> > @@ -2264,6 +2325,10 @@ void ufshpb_resume(struct ufs_hba *hba)

> >> >                       continue;

> >> >               ufshpb_set_state(hpb, HPB_PRESENT);

> >> >               ufshpb_kick_map_work(hpb);

> >> > +             if (hpb->is_hcm)

> >> > +                     schedule_delayed_work(&hpb->ufshpb_read_to_work,

> >> > +                             msecs_to_jiffies(POLLING_INTERVAL_MS));

> >> > +

> >> >       }

> >> >  }

> >> >

> >> > diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h

> >> > index 37c1b0ea0c0a..b49e9a34267f 100644

> >> > --- a/drivers/scsi/ufs/ufshpb.h

> >> > +++ b/drivers/scsi/ufs/ufshpb.h

> >> > @@ -109,6 +109,7 @@ struct ufshpb_subregion {

> >> >  };

> >> >

> >> >  struct ufshpb_region {

> >> > +     struct ufshpb_lu *hpb;

> >> >       struct ufshpb_subregion *srgn_tbl;

> >> >       enum HPB_RGN_STATE rgn_state;

> >> >       int rgn_idx;

> >> > @@ -126,6 +127,10 @@ struct ufshpb_region {

> >> >       /* region reads - for host mode */

> >> >       spinlock_t rgn_lock;

> >> >       unsigned int reads;

> >> > +     /* region "cold" timer - for host mode */

> >> > +     ktime_t read_timeout;

> >> > +     unsigned int read_timeout_expiries;

> >> > +     struct list_head list_expired_rgn;

> >> >  };

> >> >

> >> >  #define for_each_sub_region(rgn, i, srgn)                            \

> >> > @@ -219,6 +224,7 @@ struct ufshpb_lu {

> >> >       struct victim_select_info lru_info;

> >> >       struct work_struct ufshpb_normalization_work;

> >> >       struct work_struct ufshpb_lun_reset_work;

> >> > +     struct delayed_work ufshpb_read_to_work;

> >> >

> >> >       /* pinned region information */

> >> >       u32 lu_pinned_start;
Bart Van Assche June 1, 2021, 4:22 p.m. UTC | #7
On 3/17/21 12:55 AM, Avri Altman wrote:
>> On 2021-03-16 17:21, Avri Altman wrote:

[ ... ]
>>>> And, which lock is protecting rgn->list_expired_rgn? If two

>>>> read_to_handler works

>>>> are running in parallel, one can be inserting it to its expired_list

>>>> while another

>>>> can be deleting it.

>>> The timeout handler, being a delayed work, is meant to run every

>>> polling period.

>>> Originally, I had it protected from 2 handlers running concurrently,

>>> But I removed it following Daejun's comment, which I accepted,

>>> Since it is always scheduled using the same polling period.

>>

>> But one can set the delay to 0 through sysfs, right?

>

> Will restore the protection.  Thanks.


(replying to an email from 2.5 months ago)

Hi Can,

How can two read_to_handler works run in parallel? How can it make a
difference whether or not the delay can be set to zero? Are you aware
that since kernel v3.7 (released in 2012) all workqueues are
non-reentrant? See also commit dbf2576e37da ("workqueue: make all
workqueues non-reentrant"). From the description of that commit:

    This patch makes all workqueues non-reentrant.  If a work item is
    executing on a different CPU when queueing is requested, it is
    always queued to that CPU.  This guarantees that any given work item
    can be executing on one CPU at maximum and if a work item is queued
    and executing, both are on the same CPU.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
index 0034fa03fdc6..89a930e72cff 100644
--- a/drivers/scsi/ufs/ufshpb.c
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -18,6 +18,9 @@ 
 
 #define ACTIVATION_THRESHOLD 4 /* 4 IOs */
 #define EVICTION_THRESHOLD (ACTIVATION_THRESHOLD << 6) /* 256 IOs */
+#define READ_TO_MS 1000
+#define READ_TO_EXPIRIES 100
+#define POLLING_INTERVAL_MS 200
 
 /* memory management */
 static struct kmem_cache *ufshpb_mctx_cache;
@@ -1024,12 +1027,61 @@  static int ufshpb_check_srgns_issue_state(struct ufshpb_lu *hpb,
 	return 0;
 }
 
+static void ufshpb_read_to_handler(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct ufshpb_lu *hpb;
+	struct victim_select_info *lru_info;
+	struct ufshpb_region *rgn;
+	unsigned long flags;
+	LIST_HEAD(expired_list);
+
+	hpb = container_of(dwork, struct ufshpb_lu, ufshpb_read_to_work);
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+
+	lru_info = &hpb->lru_info;
+
+	list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {
+		bool timedout = ktime_after(ktime_get(), rgn->read_timeout);
+
+		if (timedout) {
+			rgn->read_timeout_expiries--;
+			if (is_rgn_dirty(rgn) ||
+			    rgn->read_timeout_expiries == 0)
+				list_add(&rgn->list_expired_rgn, &expired_list);
+			else
+				rgn->read_timeout = ktime_add_ms(ktime_get(),
+							 READ_TO_MS);
+		}
+	}
+
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+
+	list_for_each_entry(rgn, &expired_list, list_expired_rgn) {
+		list_del_init(&rgn->list_expired_rgn);
+		spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+		ufshpb_update_inactive_info(hpb, rgn->rgn_idx);
+		hpb->stats.rb_inactive_cnt++;
+		spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+	}
+
+	ufshpb_kick_map_work(hpb);
+
+	schedule_delayed_work(&hpb->ufshpb_read_to_work,
+			      msecs_to_jiffies(POLLING_INTERVAL_MS));
+}
+
 static void ufshpb_add_lru_info(struct victim_select_info *lru_info,
 				struct ufshpb_region *rgn)
 {
 	rgn->rgn_state = HPB_RGN_ACTIVE;
 	list_add_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);
 	atomic_inc(&lru_info->active_cnt);
+	if (rgn->hpb->is_hcm) {
+		rgn->read_timeout = ktime_add_ms(ktime_get(), READ_TO_MS);
+		rgn->read_timeout_expiries = READ_TO_EXPIRIES;
+	}
 }
 
 static void ufshpb_hit_lru_info(struct victim_select_info *lru_info,
@@ -1813,6 +1865,7 @@  static int ufshpb_alloc_region_tbl(struct ufs_hba *hba, struct ufshpb_lu *hpb)
 
 		INIT_LIST_HEAD(&rgn->list_inact_rgn);
 		INIT_LIST_HEAD(&rgn->list_lru_rgn);
+		INIT_LIST_HEAD(&rgn->list_expired_rgn);
 
 		if (rgn_idx == hpb->rgns_per_lu - 1) {
 			srgn_cnt = ((hpb->srgns_per_lu - 1) %
@@ -1834,6 +1887,7 @@  static int ufshpb_alloc_region_tbl(struct ufs_hba *hba, struct ufshpb_lu *hpb)
 		}
 
 		rgn->rgn_flags = 0;
+		rgn->hpb = hpb;
 	}
 
 	return 0;
@@ -2053,6 +2107,8 @@  static int ufshpb_lu_hpb_init(struct ufs_hba *hba, struct ufshpb_lu *hpb)
 			  ufshpb_normalization_work_handler);
 		INIT_WORK(&hpb->ufshpb_lun_reset_work,
 			  ufshpb_reset_work_handler);
+		INIT_DELAYED_WORK(&hpb->ufshpb_read_to_work,
+				  ufshpb_read_to_handler);
 	}
 
 	hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",
@@ -2087,6 +2143,10 @@  static int ufshpb_lu_hpb_init(struct ufs_hba *hba, struct ufshpb_lu *hpb)
 	ufshpb_stat_init(hpb);
 	ufshpb_param_init(hpb);
 
+	if (hpb->is_hcm)
+		schedule_delayed_work(&hpb->ufshpb_read_to_work,
+				      msecs_to_jiffies(POLLING_INTERVAL_MS));
+
 	return 0;
 
 release_pre_req_mempool:
@@ -2154,6 +2214,7 @@  static void ufshpb_discard_rsp_lists(struct ufshpb_lu *hpb)
 static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)
 {
 	if (hpb->is_hcm) {
+		cancel_delayed_work_sync(&hpb->ufshpb_read_to_work);
 		cancel_work_sync(&hpb->ufshpb_lun_reset_work);
 		cancel_work_sync(&hpb->ufshpb_normalization_work);
 	}
@@ -2264,6 +2325,10 @@  void ufshpb_resume(struct ufs_hba *hba)
 			continue;
 		ufshpb_set_state(hpb, HPB_PRESENT);
 		ufshpb_kick_map_work(hpb);
+		if (hpb->is_hcm)
+			schedule_delayed_work(&hpb->ufshpb_read_to_work,
+				msecs_to_jiffies(POLLING_INTERVAL_MS));
+
 	}
 }
 
diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h
index 37c1b0ea0c0a..b49e9a34267f 100644
--- a/drivers/scsi/ufs/ufshpb.h
+++ b/drivers/scsi/ufs/ufshpb.h
@@ -109,6 +109,7 @@  struct ufshpb_subregion {
 };
 
 struct ufshpb_region {
+	struct ufshpb_lu *hpb;
 	struct ufshpb_subregion *srgn_tbl;
 	enum HPB_RGN_STATE rgn_state;
 	int rgn_idx;
@@ -126,6 +127,10 @@  struct ufshpb_region {
 	/* region reads - for host mode */
 	spinlock_t rgn_lock;
 	unsigned int reads;
+	/* region "cold" timer - for host mode */
+	ktime_t read_timeout;
+	unsigned int read_timeout_expiries;
+	struct list_head list_expired_rgn;
 };
 
 #define for_each_sub_region(rgn, i, srgn)				\
@@ -219,6 +224,7 @@  struct ufshpb_lu {
 	struct victim_select_info lru_info;
 	struct work_struct ufshpb_normalization_work;
 	struct work_struct ufshpb_lun_reset_work;
+	struct delayed_work ufshpb_read_to_work;
 
 	/* pinned region information */
 	u32 lu_pinned_start;