diff mbox series

[v3,1/2] scsi: ufs: Probe for temperature notification support

Message ID 20210912131919.12962-2-avri.altman@wdc.com
State New
Headers show
Series Add temperature notification support | expand

Commit Message

Avri Altman Sept. 12, 2021, 1:19 p.m. UTC
Probe the dExtendedUFSFeaturesSupport register for the device's
temperature notification support, and if supported, add a hw monitor
device.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
---
 drivers/scsi/ufs/Kconfig     |  10 ++
 drivers/scsi/ufs/Makefile    |   1 +
 drivers/scsi/ufs/ufs-hwmon.c | 179 +++++++++++++++++++++++++++++++++++
 drivers/scsi/ufs/ufs.h       |   6 ++
 drivers/scsi/ufs/ufshcd.c    |  28 ++++++
 drivers/scsi/ufs/ufshcd.h    |  18 ++++
 6 files changed, 242 insertions(+)
 create mode 100644 drivers/scsi/ufs/ufs-hwmon.c

Comments

Avri Altman Sept. 13, 2021, 7:06 a.m. UTC | #1
> > +config SCSI_UFS_HWMON

> > +     bool "UFS  Temperature Notification"

> > +     depends on SCSI_UFSHCD && HWMON

> > +     help

> > +       This provides support for UFS hardware monitoring. If enabled,

> > +       a hardware monitoring device will be created for the UFS device.

> > +

> > +       If unsure, say N.

> > +

> 

> git complains about blank line at EOF.

Done.

> 

> > diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile

> > index c407da9b5171..966048875b50 100644

> > --- a/drivers/scsi/ufs/Makefile

> > +++ b/drivers/scsi/ufs/Makefile

> > @@ -10,6 +10,7 @@ ufshcd-core-$(CONFIG_SCSI_UFS_BSG)  += ufs_bsg.o

> >   ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO)       += ufshcd-crypto.o

> >   ufshcd-core-$(CONFIG_SCSI_UFS_HPB)  += ufshpb.o

> >   ufshcd-core-$(CONFIG_SCSI_UFS_FAULT_INJECTION) +=

> > ufs-fault-injection.o

> > +ufshcd-core-$(CONFIG_SCSI_UFS_HWMON) += ufs-hwmon.o

> >

> >   obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o

> tc-dwc-g210.o

> >   obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o

> > ufshcd-dwc.o tc-dwc-g210.o diff --git a/drivers/scsi/ufs/ufs-hwmon.c

> > b/drivers/scsi/ufs/ufs-hwmon.c new file mode 100644 index

> > 000000000000..a50e83f645f4

> > --- /dev/null

> > +++ b/drivers/scsi/ufs/ufs-hwmon.c

> > @@ -0,0 +1,179 @@

> > +// SPDX-License-Identifier: GPL-2.0

> > +/*

> > + * UFS hardware monitoring support

> > + * Copyright (c) 2021, Western Digital Corporation  */

> > +

> > +#include <linux/hwmon.h>

> > +

> > +#include "ufshcd.h"

> > +

> > +struct ufs_hwmon_data {

> > +     struct ufs_hba *hba;

> > +     u8 mask;

> > +};

> > +

> > +static bool ufs_temp_enabled(struct ufs_hba *hba, u8 mask) {

> > +     u32 ee_mask;

> > +

> > +     if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR,

> > +                           QUERY_ATTR_IDN_EE_CONTROL, 0, 0, &ee_mask))

> > +             return false;

> > +

> > +     return (mask & ee_mask & MASK_EE_TOO_HIGH_TEMP) ||

> > +             (mask & ee_mask & MASK_EE_TOO_LOW_TEMP); }

> > +

> > +static bool ufs_temp_valid(struct ufs_hba *hba, u8 mask,

> > +                        enum attr_idn idn, u32 value) {

> > +     return (idn == QUERY_ATTR_IDN_CASE_ROUGH_TEMP && value >= 1

> &&

> > +             value <= 250 && ufs_temp_enabled(hba, mask)) ||

> > +           (idn == QUERY_ATTR_IDN_HIGH_TEMP_BOUND && value >= 100 &&

> > +            value <= 250) ||

> > +           (idn == QUERY_ATTR_IDN_LOW_TEMP_BOUND && value >= 1 &&

> > +            value <= 80);

> > +}

> > +

> The value ranges checed above suggest that the temperature is reported in

> degrees C (or maybe degrees C with an offset). 

Yes.  No offset.

>The hwmon API expects

> temperatures to be reported in milli-degrees C, and I don't see a conversion in

> the actual read functions. What does the "sensors" command report ?

I missed that (Although it is well documented) - sorry about that.
I wasn't aware of the sensors command.  I don't have it in my arm64 android platform image (galaxy s21).
Will try to get it.
I was reading the temperature using hwmon sysfs entries, which indicate the correct temperature.
e.g
t2s:/ # ls -la /sys/class/hwmon/
total 0
drwxr-xr-x   2 root root 0 2020-12-20 10:16 .
drwxr-xr-x 104 root root 0 2020-12-19 19:05 ..
lrwxrwxrwx   1 root root 0 2020-12-20 10:16 hwmon0 -> ../../devices/platform/13100000.ufs/hwmon/hwmon0
.....

t2s:/ # cat /sys/class/hwmon/hwmon0/temp1_input
25

Will fix it.  Thanks.

> 

> > +static int ufs_get_temp(struct ufs_hba *hba, u8 mask, enum attr_idn

> > +idn) {

> > +     u32 value;

> > +

> > +     if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, idn, 0, 0,

> > +         &value))

> 

> checkpatch states that alignment is off, and I am quite sure this fits into one

> line anyway (with the 100-column limit). There are more instances with bad

> alignment according to checkpatch.

I wasn't aware that the Linux Kernel deprecates the 80 Character Line Coding Style.
Will try to make it full 100-characters lines.
I didn't get any alignment complaints from checkpatch.

> 

> Also, ufshcd_query_attr() returns a valid Linux error code. That should be

> returned to the caller and not be replaced. More on that below.

> 

> > +             return 0;

> > +

> > +     if (ufs_temp_valid(hba, mask, idn, value))

> > +             return value - 80;

> > +

> 

> This again suggests that the temperature is not milli-degrees C.

> 

> Is there reason to believe that this validation is necessary ?

> Note that this reports an "error" if the returned temperature value happens to

> have a value of 80. Again, more on that below.

Data->mask holds the temperature related bits in the ufs features register: TOO_LOW_TEMPERATURE and TOO_HIGH_TEMPERATURE.
This is set for the device by the flash vendor and can't be changed by the OEMs.
If the device doesn't support any of that, then hwmon_probe is not even called, see ufshcd_temp_notif_probe.
So data->mask is not 0, and never changes.

When the device returns a 0 temperature value, it means that it is not valid.
The spec say about the Device’s rough package case surface temperature:
"
This value shall be valid when (TOO_HIGH_TEMPERATURE is supported and TOO_HIGH_TEMP_EN is enabled) or 
( TOO_LOW_TEMPERATURE is supported and TOO_LOW_TEMP_EN is enabled ).
0 : Unknown Temperature , 1~250 : ( this value – 80 ) degrees in Celsius. ( i.e., -79 ºC ~ 170 ºC )
Others: Reserved
"
data->mask is not 0, but the temperature exception bits: TOO_HIGH_TEMP_EN and TOO_LOW_TEMP_EN are of type read/volatile,
Meaning it can be written many times, e.g. by debugfs or ufs-utils.

To sum up:
 - yes, checking the temperature against the spec boundaries is useless.
   The device will return 0 if it is not valid.
   ufs_temp_valid() can be removed, and just need to check that the temperature is not 0.

 - The return value of querry_attr is of less interest.
    if it failed or temp == 0, then the temperature is invalid and the proper return value should be -EINVAL.

> 

> > +     return 0;

> > +}

> > +

> > +static int ufs_hwmon_read(struct device *dev, enum hwmon_sensor_types

> type,

> > +                        u32 attr, int channel, long *val) {

> > +     struct ufs_hwmon_data *data = dev_get_drvdata(dev);

> > +     struct ufs_hba *hba = data->hba;

> > +     u8 mask = data->mask;

> > +     int err = 0;

> > +     bool get_temp = true;

> > +

> > +     if (type != hwmon_temp)

> > +             return 0;

> > +

> > +     down(&hba->host_sem);

> > +

> > +     if (!ufshcd_is_user_access_allowed(hba)) {

> > +             up(&hba->host_sem);

> > +             return -EBUSY;

> > +     }

> > +

> > +     ufshcd_rpm_get_sync(hba);

> > +

> > +     switch (attr) {

> > +     case hwmon_temp_enable:

> > +             *val = ufs_temp_enabled(hba, mask);

> > +             get_temp = false;

> > +

> 

> This seems to be read-only, and the mask only affects the limit registers as far

> as I con see. If so, this is wrong: The mask should be used to enable or hide the

> limit attributes as needed. If the mask is 0, and if this means that the current

> temperature is not reported either, the driver should not instantiate at all.

> 

> The "enable" attribute only makes sense if it can be used to actually enable or

> disable a specific sensor, and is not tied to limit attributes but to the actual

> sensor values.

See explanation above.
 Will make it writable as well.

> 

> > +             break;

> > +     case hwmon_temp_max_alarm:

> > +             *val = ufs_get_temp(hba, mask,

> > + QUERY_ATTR_IDN_HIGH_TEMP_BOUND);

> > +

> > +             break;

> > +     case hwmon_temp_min_alarm:

> > +             *val = ufs_get_temp(hba, mask,

> > + QUERY_ATTR_IDN_LOW_TEMP_BOUND);

> > +

> > +             break;

> > +     case hwmon_temp_input:

> > +             *val = ufs_get_temp(hba, mask,

> > + QUERY_ATTR_IDN_CASE_ROUGH_TEMP);

> > +

> If an enable attribute exists and is 0 (disabled), this should return -ENODATA.

> In this case, that would imply that the driver should not be instantiated in the

> first place since it has nothing to report.

See explanation above.
Will fix it so the error value will make more sense.

> 

> > +             break;

> > +     default:

> > +             err = -EOPNOTSUPP;

> > +

> > +             break;

> > +     }

> > +

> > +     ufshcd_rpm_put_sync(hba);

> > +

> > +     up(&hba->host_sem);

> > +

> > +     if (get_temp && !err && *val == 0)

> > +             err = -EINVAL;

> > +

> That is an odd way of detection errors. If it was in the hwmon subsystem, I'd ask

> for the error handling to be moved into the case statements. On top of that,

> interpreting a return value of "0" as error seems wrong.

> ufs_get_temp() returns 0 if the measured temperature or the reported limit

> happens to have a value of 80, and that is perfectly valid. If ufs_get_temp()

> reports an error, it should report that as error.

> 

> Also, EINVAL is "invalid argument", which I am quite sure does not apply here.

Ditto.
EINVAL implies that the temperature is invalid.

> >

> > +static void ufshcd_temp_notif_probe(struct ufs_hba *hba, u8

> > +*desc_buf) {

> > +     struct ufs_dev_info *dev_info = &hba->dev_info;

> > +     u32 ext_ufs_feature;

> > +     u8 mask = 0;

> > +

> > +     if (!(hba->caps & UFSHCD_CAP_TEMP_NOTIF) ||

> > +         dev_info->wspecversion < 0x300)

> 

> I am quite sure this fits a single line.

Done.
Guenter Roeck Sept. 13, 2021, 7:41 a.m. UTC | #2
On 9/13/21 12:06 AM, Avri Altman wrote:
>>> +config SCSI_UFS_HWMON

>>> +     bool "UFS  Temperature Notification"

>>> +     depends on SCSI_UFSHCD && HWMON

>>> +     help

>>> +       This provides support for UFS hardware monitoring. If enabled,

>>> +       a hardware monitoring device will be created for the UFS device.

>>> +

>>> +       If unsure, say N.

>>> +

>>

>> git complains about blank line at EOF.

> Done.

> 

>>

>>> diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile

>>> index c407da9b5171..966048875b50 100644

>>> --- a/drivers/scsi/ufs/Makefile

>>> +++ b/drivers/scsi/ufs/Makefile

>>> @@ -10,6 +10,7 @@ ufshcd-core-$(CONFIG_SCSI_UFS_BSG)  += ufs_bsg.o

>>>    ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO)       += ufshcd-crypto.o

>>>    ufshcd-core-$(CONFIG_SCSI_UFS_HPB)  += ufshpb.o

>>>    ufshcd-core-$(CONFIG_SCSI_UFS_FAULT_INJECTION) +=

>>> ufs-fault-injection.o

>>> +ufshcd-core-$(CONFIG_SCSI_UFS_HWMON) += ufs-hwmon.o

>>>

>>>    obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o

>> tc-dwc-g210.o

>>>    obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o

>>> ufshcd-dwc.o tc-dwc-g210.o diff --git a/drivers/scsi/ufs/ufs-hwmon.c

>>> b/drivers/scsi/ufs/ufs-hwmon.c new file mode 100644 index

>>> 000000000000..a50e83f645f4

>>> --- /dev/null

>>> +++ b/drivers/scsi/ufs/ufs-hwmon.c

>>> @@ -0,0 +1,179 @@

>>> +// SPDX-License-Identifier: GPL-2.0

>>> +/*

>>> + * UFS hardware monitoring support

>>> + * Copyright (c) 2021, Western Digital Corporation  */

>>> +

>>> +#include <linux/hwmon.h>

>>> +

>>> +#include "ufshcd.h"

>>> +

>>> +struct ufs_hwmon_data {

>>> +     struct ufs_hba *hba;

>>> +     u8 mask;

>>> +};

>>> +

>>> +static bool ufs_temp_enabled(struct ufs_hba *hba, u8 mask) {

>>> +     u32 ee_mask;

>>> +

>>> +     if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR,

>>> +                           QUERY_ATTR_IDN_EE_CONTROL, 0, 0, &ee_mask))

>>> +             return false;

>>> +

>>> +     return (mask & ee_mask & MASK_EE_TOO_HIGH_TEMP) ||

>>> +             (mask & ee_mask & MASK_EE_TOO_LOW_TEMP); }

>>> +

>>> +static bool ufs_temp_valid(struct ufs_hba *hba, u8 mask,

>>> +                        enum attr_idn idn, u32 value) {

>>> +     return (idn == QUERY_ATTR_IDN_CASE_ROUGH_TEMP && value >= 1

>> &&

>>> +             value <= 250 && ufs_temp_enabled(hba, mask)) ||

>>> +           (idn == QUERY_ATTR_IDN_HIGH_TEMP_BOUND && value >= 100 &&

>>> +            value <= 250) ||

>>> +           (idn == QUERY_ATTR_IDN_LOW_TEMP_BOUND && value >= 1 &&

>>> +            value <= 80);

>>> +}

>>> +

>> The value ranges checed above suggest that the temperature is reported in

>> degrees C (or maybe degrees C with an offset).

> Yes.  No offset.

> 

>> The hwmon API expects

>> temperatures to be reported in milli-degrees C, and I don't see a conversion in

>> the actual read functions. What does the "sensors" command report ?

> I missed that (Although it is well documented) - sorry about that.

> I wasn't aware of the sensors command.  I don't have it in my arm64 android platform image (galaxy s21).

> Will try to get it.

> I was reading the temperature using hwmon sysfs entries, which indicate the correct temperature.

> e.g

> t2s:/ # ls -la /sys/class/hwmon/

> total 0

> drwxr-xr-x   2 root root 0 2020-12-20 10:16 .

> drwxr-xr-x 104 root root 0 2020-12-19 19:05 ..

> lrwxrwxrwx   1 root root 0 2020-12-20 10:16 hwmon0 -> ../../devices/platform/13100000.ufs/hwmon/hwmon0

> .....

> 

> t2s:/ # cat /sys/class/hwmon/hwmon0/temp1_input

> 25

> 

> Will fix it.  Thanks.

> 

>>

>>> +static int ufs_get_temp(struct ufs_hba *hba, u8 mask, enum attr_idn

>>> +idn) {

>>> +     u32 value;

>>> +

>>> +     if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, idn, 0, 0,

>>> +         &value))

>>

>> checkpatch states that alignment is off, and I am quite sure this fits into one

>> line anyway (with the 100-column limit). There are more instances with bad

>> alignment according to checkpatch.

> I wasn't aware that the Linux Kernel deprecates the 80 Character Line Coding Style.

> Will try to make it full 100-characters lines.

> I didn't get any alignment complaints from checkpatch.

> 

>>

>> Also, ufshcd_query_attr() returns a valid Linux error code. That should be

>> returned to the caller and not be replaced. More on that below.

>>

>>> +             return 0;

>>> +

>>> +     if (ufs_temp_valid(hba, mask, idn, value))

>>> +             return value - 80;

>>> +

>>

>> This again suggests that the temperature is not milli-degrees C.

>>

>> Is there reason to believe that this validation is necessary ?

>> Note that this reports an "error" if the returned temperature value happens to

>> have a value of 80. Again, more on that below.

> Data->mask holds the temperature related bits in the ufs features register: TOO_LOW_TEMPERATURE and TOO_HIGH_TEMPERATURE.

> This is set for the device by the flash vendor and can't be changed by the OEMs.

> If the device doesn't support any of that, then hwmon_probe is not even called, see ufshcd_temp_notif_probe.

> So data->mask is not 0, and never changes.

> 

> When the device returns a 0 temperature value, it means that it is not valid.

> The spec say about the Device’s rough package case surface temperature:

> "

> This value shall be valid when (TOO_HIGH_TEMPERATURE is supported and TOO_HIGH_TEMP_EN is enabled) or

> ( TOO_LOW_TEMPERATURE is supported and TOO_LOW_TEMP_EN is enabled ).

> 0 : Unknown Temperature , 1~250 : ( this value – 80 ) degrees in Celsius. ( i.e., -79 ºC ~ 170 ºC )

> Others: Reserved

> "

> data->mask is not 0, but the temperature exception bits: TOO_HIGH_TEMP_EN and TOO_LOW_TEMP_EN are of type read/volatile,

> Meaning it can be written many times, e.g. by debugfs or ufs-utils.

> 

> To sum up:

>   - yes, checking the temperature against the spec boundaries is useless.

>     The device will return 0 if it is not valid.

>     ufs_temp_valid() can be removed, and just need to check that the temperature is not 0.

> 

>   - The return value of querry_attr is of less interest.

>      if it failed or temp == 0, then the temperature is invalid and the proper return value should be -EINVAL.

> 

>>

>>> +     return 0;

>>> +}

>>> +

>>> +static int ufs_hwmon_read(struct device *dev, enum hwmon_sensor_types

>> type,

>>> +                        u32 attr, int channel, long *val) {

>>> +     struct ufs_hwmon_data *data = dev_get_drvdata(dev);

>>> +     struct ufs_hba *hba = data->hba;

>>> +     u8 mask = data->mask;

>>> +     int err = 0;

>>> +     bool get_temp = true;

>>> +

>>> +     if (type != hwmon_temp)

>>> +             return 0;

>>> +

>>> +     down(&hba->host_sem);

>>> +

>>> +     if (!ufshcd_is_user_access_allowed(hba)) {

>>> +             up(&hba->host_sem);

>>> +             return -EBUSY;

>>> +     }

>>> +

>>> +     ufshcd_rpm_get_sync(hba);

>>> +

>>> +     switch (attr) {

>>> +     case hwmon_temp_enable:

>>> +             *val = ufs_temp_enabled(hba, mask);

>>> +             get_temp = false;

>>> +

>>

>> This seems to be read-only, and the mask only affects the limit registers as far

>> as I con see. If so, this is wrong: The mask should be used to enable or hide the

>> limit attributes as needed. If the mask is 0, and if this means that the current

>> temperature is not reported either, the driver should not instantiate at all.

>>

>> The "enable" attribute only makes sense if it can be used to actually enable or

>> disable a specific sensor, and is not tied to limit attributes but to the actual

>> sensor values.

> See explanation above.

>   Will make it writable as well.

> 


That only makes sense if the information is passed to the chip. What is going
to happen if the user writes 0 into the attribute ?

Guenter

>>

>>> +             break;

>>> +     case hwmon_temp_max_alarm:

>>> +             *val = ufs_get_temp(hba, mask,

>>> + QUERY_ATTR_IDN_HIGH_TEMP_BOUND);

>>> +

>>> +             break;

>>> +     case hwmon_temp_min_alarm:

>>> +             *val = ufs_get_temp(hba, mask,

>>> + QUERY_ATTR_IDN_LOW_TEMP_BOUND);

>>> +

>>> +             break;

>>> +     case hwmon_temp_input:

>>> +             *val = ufs_get_temp(hba, mask,

>>> + QUERY_ATTR_IDN_CASE_ROUGH_TEMP);

>>> +

>> If an enable attribute exists and is 0 (disabled), this should return -ENODATA.

>> In this case, that would imply that the driver should not be instantiated in the

>> first place since it has nothing to report.

> See explanation above.

> Will fix it so the error value will make more sense.

> 

>>

>>> +             break;

>>> +     default:

>>> +             err = -EOPNOTSUPP;

>>> +

>>> +             break;

>>> +     }

>>> +

>>> +     ufshcd_rpm_put_sync(hba);

>>> +

>>> +     up(&hba->host_sem);

>>> +

>>> +     if (get_temp && !err && *val == 0)

>>> +             err = -EINVAL;

>>> +

>> That is an odd way of detection errors. If it was in the hwmon subsystem, I'd ask

>> for the error handling to be moved into the case statements. On top of that,

>> interpreting a return value of "0" as error seems wrong.

>> ufs_get_temp() returns 0 if the measured temperature or the reported limit

>> happens to have a value of 80, and that is perfectly valid. If ufs_get_temp()

>> reports an error, it should report that as error.

>>

>> Also, EINVAL is "invalid argument", which I am quite sure does not apply here.

> Ditto.

> EINVAL implies that the temperature is invalid.

> 

>>>

>>> +static void ufshcd_temp_notif_probe(struct ufs_hba *hba, u8

>>> +*desc_buf) {

>>> +     struct ufs_dev_info *dev_info = &hba->dev_info;

>>> +     u32 ext_ufs_feature;

>>> +     u8 mask = 0;

>>> +

>>> +     if (!(hba->caps & UFSHCD_CAP_TEMP_NOTIF) ||

>>> +         dev_info->wspecversion < 0x300)

>>

>> I am quite sure this fits a single line.

> Done.

>
Avri Altman Sept. 13, 2021, 7:49 a.m. UTC | #3
> >> The "enable" attribute only makes sense if it can be used to actually

> >> enable or disable a specific sensor, and is not tied to limit

> >> attributes but to the actual sensor values.

> > See explanation above.

> >   Will make it writable as well.

> >

> 

> That only makes sense if the information is passed to the chip. What is going to

> happen if the user writes 0 into the attribute ?

Will turn off the temperature exception bits, so that Tcase is no longer valid,
and the device will always return Tcase = 0.

> Guenter
Guenter Roeck Sept. 13, 2021, 2:24 p.m. UTC | #4
On 9/13/21 12:49 AM, Avri Altman wrote:
>>>> The "enable" attribute only makes sense if it can be used to actually

>>>> enable or disable a specific sensor, and is not tied to limit

>>>> attributes but to the actual sensor values.

>>> See explanation above.

>>>    Will make it writable as well.

>>>

>>

>> That only makes sense if the information is passed to the chip. What is going to

>> happen if the user writes 0 into the attribute ?

> Will turn off the temperature exception bits, so that Tcase is no longer valid,

> and the device will always return Tcase = 0.

> 


Ok. Then attempts to read the temperature should return -ENODATA, not -EINVAL,
if Tcase == 0.

Guenter
Avri Altman Sept. 13, 2021, 3:26 p.m. UTC | #5
> On 9/13/21 12:49 AM, Avri Altman wrote:

> >>>> The "enable" attribute only makes sense if it can be used to

> >>>> actually enable or disable a specific sensor, and is not tied to

> >>>> limit attributes but to the actual sensor values.

> >>> See explanation above.

> >>>    Will make it writable as well.

> >>>

> >>

> >> That only makes sense if the information is passed to the chip. What

> >> is going to happen if the user writes 0 into the attribute ?

> > Will turn off the temperature exception bits, so that Tcase is no

> > longer valid, and the device will always return Tcase = 0.

> >

> 

> Ok. Then attempts to read the temperature should return -ENODATA, not -

> EINVAL, if Tcase == 0.

OK. Thanks,

Avri

> 

> Guenter
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 432df76e6318..b930f29fc375 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -199,3 +199,13 @@  config SCSI_UFS_FAULT_INJECTION
 	help
 	  Enable fault injection support in the UFS driver. This makes it easier
 	  to test the UFS error handler and abort handler.
+
+config SCSI_UFS_HWMON
+	bool "UFS  Temperature Notification"
+	depends on SCSI_UFSHCD && HWMON
+	help
+	  This provides support for UFS hardware monitoring. If enabled,
+	  a hardware monitoring device will be created for the UFS device.
+
+	  If unsure, say N.
+
diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile
index c407da9b5171..966048875b50 100644
--- a/drivers/scsi/ufs/Makefile
+++ b/drivers/scsi/ufs/Makefile
@@ -10,6 +10,7 @@  ufshcd-core-$(CONFIG_SCSI_UFS_BSG)	+= ufs_bsg.o
 ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO)	+= ufshcd-crypto.o
 ufshcd-core-$(CONFIG_SCSI_UFS_HPB)	+= ufshpb.o
 ufshcd-core-$(CONFIG_SCSI_UFS_FAULT_INJECTION) += ufs-fault-injection.o
+ufshcd-core-$(CONFIG_SCSI_UFS_HWMON) += ufs-hwmon.o
 
 obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o tc-dwc-g210.o
 obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o ufshcd-dwc.o tc-dwc-g210.o
diff --git a/drivers/scsi/ufs/ufs-hwmon.c b/drivers/scsi/ufs/ufs-hwmon.c
new file mode 100644
index 000000000000..a50e83f645f4
--- /dev/null
+++ b/drivers/scsi/ufs/ufs-hwmon.c
@@ -0,0 +1,179 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * UFS hardware monitoring support
+ * Copyright (c) 2021, Western Digital Corporation
+ */
+
+#include <linux/hwmon.h>
+
+#include "ufshcd.h"
+
+struct ufs_hwmon_data {
+	struct ufs_hba *hba;
+	u8 mask;
+};
+
+static bool ufs_temp_enabled(struct ufs_hba *hba, u8 mask)
+{
+	u32 ee_mask;
+
+	if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR,
+			      QUERY_ATTR_IDN_EE_CONTROL, 0, 0, &ee_mask))
+		return false;
+
+	return (mask & ee_mask & MASK_EE_TOO_HIGH_TEMP) ||
+		(mask & ee_mask & MASK_EE_TOO_LOW_TEMP);
+}
+
+static bool ufs_temp_valid(struct ufs_hba *hba, u8 mask,
+			   enum attr_idn idn, u32 value)
+{
+	return (idn == QUERY_ATTR_IDN_CASE_ROUGH_TEMP && value >= 1 &&
+		value <= 250 && ufs_temp_enabled(hba, mask)) ||
+	      (idn == QUERY_ATTR_IDN_HIGH_TEMP_BOUND && value >= 100 &&
+	       value <= 250) ||
+	      (idn == QUERY_ATTR_IDN_LOW_TEMP_BOUND && value >= 1 &&
+	       value <= 80);
+}
+
+static int ufs_get_temp(struct ufs_hba *hba, u8 mask, enum attr_idn idn)
+{
+	u32 value;
+
+	if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, idn, 0, 0,
+	    &value))
+		return 0;
+
+	if (ufs_temp_valid(hba, mask, idn, value))
+		return value - 80;
+
+	return 0;
+}
+
+static int ufs_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			   u32 attr, int channel, long *val)
+{
+	struct ufs_hwmon_data *data = dev_get_drvdata(dev);
+	struct ufs_hba *hba = data->hba;
+	u8 mask = data->mask;
+	int err = 0;
+	bool get_temp = true;
+
+	if (type != hwmon_temp)
+		return 0;
+
+	down(&hba->host_sem);
+
+	if (!ufshcd_is_user_access_allowed(hba)) {
+		up(&hba->host_sem);
+		return -EBUSY;
+	}
+
+	ufshcd_rpm_get_sync(hba);
+
+	switch (attr) {
+	case hwmon_temp_enable:
+		*val = ufs_temp_enabled(hba, mask);
+		get_temp = false;
+
+		break;
+	case hwmon_temp_max_alarm:
+		*val = ufs_get_temp(hba, mask, QUERY_ATTR_IDN_HIGH_TEMP_BOUND);
+
+		break;
+	case hwmon_temp_min_alarm:
+		*val = ufs_get_temp(hba, mask, QUERY_ATTR_IDN_LOW_TEMP_BOUND);
+
+		break;
+	case hwmon_temp_input:
+		*val = ufs_get_temp(hba, mask, QUERY_ATTR_IDN_CASE_ROUGH_TEMP);
+
+		break;
+	default:
+		err = -EOPNOTSUPP;
+
+		break;
+	}
+
+	ufshcd_rpm_put_sync(hba);
+
+	up(&hba->host_sem);
+
+	if (get_temp && !err && *val == 0)
+		err = -EINVAL;
+
+	return err;
+}
+
+static umode_t ufs_hwmon_is_visible(const void *_data,
+				     enum hwmon_sensor_types type,
+				     u32 attr, int channel)
+{
+	if (type != hwmon_temp)
+		return 0;
+
+	switch (attr) {
+	case hwmon_temp_enable:
+	case hwmon_temp_max_alarm:
+	case hwmon_temp_min_alarm:
+	case hwmon_temp_input:
+		return 0444;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static const struct hwmon_channel_info *ufs_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(temp, HWMON_T_ENABLE | HWMON_T_INPUT |
+			    HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM),
+	NULL
+};
+
+static const struct hwmon_ops ufs_hwmon_ops = {
+	.is_visible	= ufs_hwmon_is_visible,
+	.read		= ufs_hwmon_read,
+};
+
+static const struct hwmon_chip_info ufs_hwmon_hba_info = {
+	.ops	= &ufs_hwmon_ops,
+	.info	= ufs_hwmon_info,
+};
+
+void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask)
+{
+	struct device *dev = hba->dev;
+	struct ufs_hwmon_data *data;
+	struct device *hwmon;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return;
+
+	data->hba = hba;
+	data->mask = mask;
+
+	hwmon = hwmon_device_register_with_info(dev, "ufs",
+						data, &ufs_hwmon_hba_info,
+						NULL);
+	if (IS_ERR(hwmon)) {
+		dev_warn(dev, "Failed to instantiate hwmon device\n");
+		kfree(data);
+		return;
+	}
+
+	hba->hwmon_device = hwmon;
+}
+
+void ufs_hwmon_remove(struct ufs_hba *hba)
+{
+	struct ufs_hwmon_data *data;
+
+	if (!hba->hwmon_device)
+		return;
+
+	data = dev_get_drvdata(hba->hwmon_device);
+	hwmon_device_unregister(hba->hwmon_device);
+	hba->hwmon_device = NULL;
+	kfree(data);
+}
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 8c6b38b1b142..171b27be7b1d 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -152,6 +152,9 @@  enum attr_idn {
 	QUERY_ATTR_IDN_PSA_STATE		= 0x15,
 	QUERY_ATTR_IDN_PSA_DATA_SIZE		= 0x16,
 	QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME	= 0x17,
+	QUERY_ATTR_IDN_CASE_ROUGH_TEMP          = 0x18,
+	QUERY_ATTR_IDN_HIGH_TEMP_BOUND          = 0x19,
+	QUERY_ATTR_IDN_LOW_TEMP_BOUND           = 0x1A,
 	QUERY_ATTR_IDN_WB_FLUSH_STATUS	        = 0x1C,
 	QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE       = 0x1D,
 	QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST    = 0x1E,
@@ -338,6 +341,9 @@  enum {
 
 /* Possible values for dExtendedUFSFeaturesSupport */
 enum {
+	UFS_DEV_LOW_TEMP_NOTIF		= BIT(4),
+	UFS_DEV_HIGH_TEMP_NOTIF		= BIT(5),
+	UFS_DEV_EXT_TEMP_NOTIF		= BIT(6),
 	UFS_DEV_HPB_SUPPORT		= BIT(7),
 	UFS_DEV_WRITE_BOOSTER_SUP	= BIT(8),
 };
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 67889d74761c..90c2e9677435 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7469,6 +7469,31 @@  static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf)
 	hba->caps &= ~UFSHCD_CAP_WB_EN;
 }
 
+static void ufshcd_temp_notif_probe(struct ufs_hba *hba, u8 *desc_buf)
+{
+	struct ufs_dev_info *dev_info = &hba->dev_info;
+	u32 ext_ufs_feature;
+	u8 mask = 0;
+
+	if (!(hba->caps & UFSHCD_CAP_TEMP_NOTIF) ||
+	    dev_info->wspecversion < 0x300)
+		return;
+
+	ext_ufs_feature = get_unaligned_be32(desc_buf +
+					DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP);
+
+	if (ext_ufs_feature & UFS_DEV_LOW_TEMP_NOTIF)
+		mask |= MASK_EE_TOO_LOW_TEMP;
+
+	if (ext_ufs_feature & UFS_DEV_HIGH_TEMP_NOTIF)
+		mask |= MASK_EE_TOO_HIGH_TEMP;
+
+	if (mask) {
+		ufshcd_enable_ee(hba, mask);
+		ufs_hwmon_probe(hba, mask);
+	}
+}
+
 void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, struct ufs_dev_fix *fixups)
 {
 	struct ufs_dev_fix *f;
@@ -7564,6 +7589,8 @@  static int ufs_get_device_desc(struct ufs_hba *hba)
 
 	ufshcd_wb_probe(hba, desc_buf);
 
+	ufshcd_temp_notif_probe(hba, desc_buf);
+
 	/*
 	 * ufshcd_read_string_desc returns size of the string
 	 * reset the error value
@@ -9408,6 +9435,7 @@  void ufshcd_remove(struct ufs_hba *hba)
 {
 	if (hba->sdev_ufs_device)
 		ufshcd_rpm_get_sync(hba);
+	ufs_hwmon_remove(hba);
 	ufs_bsg_remove(hba);
 	ufshpb_remove(hba);
 	ufs_sysfs_remove_nodes(hba->dev);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 4723f27a55d1..798a408d71e5 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -653,6 +653,12 @@  enum ufshcd_caps {
 	 * in order to exit DeepSleep state.
 	 */
 	UFSHCD_CAP_DEEPSLEEP				= 1 << 10,
+
+	/*
+	 * This capability allows the host controller driver to use temperature
+	 * notification if it is supported by the UFS device.
+	 */
+	UFSHCD_CAP_TEMP_NOTIF				= 1 << 11,
 };
 
 struct ufs_hba_variant_params {
@@ -789,6 +795,10 @@  struct ufs_hba {
 	struct scsi_device *sdev_ufs_device;
 	struct scsi_device *sdev_rpmb;
 
+#ifdef CONFIG_SCSI_UFS_HWMON
+	struct device *hwmon_device;
+#endif
+
 	enum ufs_dev_pwr_mode curr_dev_pwr_mode;
 	enum uic_link_state uic_link_state;
 	/* Desired UFS power management level during runtime PM */
@@ -1050,6 +1060,14 @@  static inline u8 ufshcd_wb_get_query_index(struct ufs_hba *hba)
 	return 0;
 }
 
+#ifdef CONFIG_SCSI_UFS_HWMON
+void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask);
+void ufs_hwmon_remove(struct ufs_hba *hba);
+#else
+static inline void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask) {}
+static inline void ufs_hwmon_remove(struct ufs_hba *hba) {}
+#endif
+
 #ifdef CONFIG_PM
 extern int ufshcd_runtime_suspend(struct device *dev);
 extern int ufshcd_runtime_resume(struct device *dev);