[01/13] thermal/drivers/hisi: Fix missing interrupt enablement

Message ID 1504082857-21702-1-git-send-email-daniel.lezcano@linaro.org
State Superseded
Headers show
Series
  • [01/13] thermal/drivers/hisi: Fix missing interrupt enablement
Related show

Commit Message

Daniel Lezcano Aug. 30, 2017, 8:47 a.m.
The interrupt for the temperature threshold is not enabled at the end of the
probe function, enable it after the setup is complete.

On the other side, the irq_enabled is not correctly set as we are checking if
the interrupt is masked where 'yes' means irq_enabled=false.

	irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED,
				&data->irq_enabled);

As we are always enabling the interrupt, it is pointless to check if
the interrupt is masked or not, just set irq_enabled to 'true'.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

---
 drivers/thermal/hisi_thermal.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

-- 
2.7.4

Comments

Leo Yan Sept. 1, 2017, 2:05 p.m. | #1
Hi Daniel,

On Wed, Aug 30, 2017 at 10:47:26AM +0200, Daniel Lezcano wrote:
> By essence, the tsensor does not really support multiple sensor at the same

> time. It allows to set a sensor and use it to get the temperature, another

> sensor could be switched but with a delay of 3-5ms. It is difficult to read

> simultaneously several sensors without a big delay.

> 

> Today, just one sensor is used, it is not necessary to deal with multiple

> sensors in the code. Remove them and if it is needed in the future add them

> on top of a code which will be clean up in the meantime.

> 

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> ---

>  drivers/thermal/hisi_thermal.c | 77 +++++++++++-------------------------------

>  1 file changed, 20 insertions(+), 57 deletions(-)

> 

> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> index 8381696..92b6889 100644

> --- a/drivers/thermal/hisi_thermal.c

> +++ b/drivers/thermal/hisi_thermal.c

> @@ -39,6 +39,7 @@

>  #define HISI_TEMP_RESET			(100000)

>  

>  #define HISI_MAX_SENSORS		4

> +#define HISI_DEFAULT_SENSOR		2


So if we always set the sensor id 2 as default sensor, that means it's
pointless to bind sensor id in dts. E.g. I change to bind thermal
sensor 0, then it's impossible to bind successfully:

-                               thermal-sensors = <&tsensor 2>;
+                               thermal-sensors = <&tsensor 0>;

It's okay for this change, do you think we need reflect this in DT
binding doc [1] ?

[1] Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt

>  struct hisi_thermal_sensor {

>  	struct hisi_thermal_data *thermal;

> @@ -53,11 +54,10 @@ struct hisi_thermal_data {

>  	struct mutex thermal_lock;    /* protects register data */

>  	struct platform_device *pdev;

>  	struct clk *clk;

> -	struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS];

> -

> -	int irq, irq_bind_sensor;

> +	struct hisi_thermal_sensor sensors;

> +	int irq;

>  	bool irq_enabled;

> -

> +	


Repots whitespace error when I apply patch.

>  	void __iomem *regs;

>  };

>  

> @@ -113,7 +113,7 @@ static void hisi_thermal_enable_bind_irq_sensor

>  

>  	mutex_lock(&data->thermal_lock);

>  

> -	sensor = &data->sensors[data->irq_bind_sensor];

> +	sensor = &data->sensors;

>  

>  	/* setting the hdak time */

>  	writel(0x0, data->regs + TEMP0_CFG);

> @@ -160,31 +160,8 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp)

>  	struct hisi_thermal_sensor *sensor = _sensor;

>  	struct hisi_thermal_data *data = sensor->thermal;

>  

> -	int sensor_id = -1, i;

> -	long max_temp = 0;

> -

>  	*temp = hisi_thermal_get_sensor_temp(data, sensor);

>  

> -	sensor->sensor_temp = *temp;

> -

> -	for (i = 0; i < HISI_MAX_SENSORS; i++) {

> -		if (!data->sensors[i].tzd)

> -			continue;

> -

> -		if (data->sensors[i].sensor_temp >= max_temp) {

> -			max_temp = data->sensors[i].sensor_temp;

> -			sensor_id = i;

> -		}

> -	}

> -

> -	/* If no sensor has been enabled, then skip to enable irq */

> -	if (sensor_id == -1)

> -		return 0;

> -

> -	mutex_lock(&data->thermal_lock);

> -	data->irq_bind_sensor = sensor_id;

> -	mutex_unlock(&data->thermal_lock);

> -

>  	dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n",

>  		sensor->id, data->irq_enabled, *temp, sensor->thres_temp);

>  	/*

> @@ -197,7 +174,7 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp)

>  		return 0;

>  	}

>  

> -	if (max_temp < sensor->thres_temp) {

> +	if (*temp < sensor->thres_temp) {

>  		data->irq_enabled = true;

>  		hisi_thermal_enable_bind_irq_sensor(data);

>  		enable_irq(data->irq);

> @@ -224,22 +201,16 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)

>  {

>  	struct hisi_thermal_data *data = dev;

>  	struct hisi_thermal_sensor *sensor;

> -	int i;

>  

>  	mutex_lock(&data->thermal_lock);

> -	sensor = &data->sensors[data->irq_bind_sensor];

> +	sensor = &data->sensors;

>  

>  	dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n",

>  		 sensor->thres_temp / 1000);

>  	mutex_unlock(&data->thermal_lock);

>  

> -	for (i = 0; i < HISI_MAX_SENSORS; i++) {

> -		if (!data->sensors[i].tzd)

> -			continue;

> -

> -		thermal_zone_device_update(data->sensors[i].tzd,

> -					   THERMAL_EVENT_UNSPECIFIED);

> -	}

> +	thermal_zone_device_update(data->sensors.tzd,

> +				   THERMAL_EVENT_UNSPECIFIED);

>  

>  	return IRQ_HANDLED;

>  }

> @@ -296,7 +267,6 @@ static int hisi_thermal_probe(struct platform_device *pdev)

>  {

>  	struct hisi_thermal_data *data;

>  	struct resource *res;

> -	int i;

>  	int ret;

>  

>  	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);

> @@ -347,16 +317,17 @@ static int hisi_thermal_probe(struct platform_device *pdev)

>  	hisi_thermal_enable_bind_irq_sensor(data);

>  	data->irq_enabled = true;

>  

> -	for (i = 0; i < HISI_MAX_SENSORS; ++i) {

> -		ret = hisi_thermal_register_sensor(pdev, data,

> -						   &data->sensors[i], i);

> -		if (ret)

> -			dev_err(&pdev->dev,

> -				"failed to register thermal sensor: %d\n", ret);

> -		else

> -			hisi_thermal_toggle_sensor(&data->sensors[i], true);

> +	ret = hisi_thermal_register_sensor(pdev, data,

> +					   &data->sensors,

> +					   HISI_DEFAULT_SENSOR);

> +	if (ret) {

> +		dev_err(&pdev->dev, "failed to register thermal sensor: %d\n",

> +			ret);

> +		return ret;

>  	}

>  

> +	hisi_thermal_toggle_sensor(&data->sensors, true);

> +

>  	enable_irq(data->irq);

>  

>  	return 0;

> @@ -365,17 +336,9 @@ static int hisi_thermal_probe(struct platform_device *pdev)

>  static int hisi_thermal_remove(struct platform_device *pdev)

>  {

>  	struct hisi_thermal_data *data = platform_get_drvdata(pdev);

> -	int i;

> -

> -	for (i = 0; i < HISI_MAX_SENSORS; i++) {

> -		struct hisi_thermal_sensor *sensor = &data->sensors[i];

> -

> -		if (!sensor->tzd)

> -			continue;

> -

> -		hisi_thermal_toggle_sensor(sensor, false);

> -	}

> +	struct hisi_thermal_sensor *sensor = &data->sensors;

>  

> +	hisi_thermal_toggle_sensor(sensor, false);

>  	hisi_thermal_disable_sensor(data);

>  	clk_disable_unprepare(data->clk);

>  

> -- 

> 2.7.4

>
Leo Yan Sept. 1, 2017, 2:40 p.m. | #2
On Wed, Aug 30, 2017 at 10:47:29AM +0200, Daniel Lezcano wrote:
> The DT specifies a threshold of 65000, we setup the register with a value in

> the temperature resolution for the controller, 64656.

> 

> When we reach 64656, the interrupt fires, the interrupt is disabled. Then the

> irq thread runs and calls thermal_zone_device_update() which will call in turn

> hisi_thermal_get_temp().

> 

> The function will look if the temperature decreased, assuming it was more than

> 65000, but that is not the case because the current temperature is 64656

> (because of the rounding when setting the threshold). This condition being

> true, we re-enable the interrupt which fires immediately after exiting the irq

> thread. That happens again and again until the temperature goes to more than

> 65000.

> 

> Potentially, there is here an interrupt storm if the temperature stabilizes at

> this temperature. A very unlikely case but possible.

> 

> In any case, it does not make sense to handle dozens of alarm interrupt for

> nothing.

> 

> Fix this by rounding the threshold value to the controller resolution so the

> check against the threshold is consistent with the one set in the controller.

> 

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>


This is a good fixing. I do see when the temperature over the tipping
point, if without this patch it's possible to generate interrupt for
2~3 times; after applied this patch it always generate single
interrupt.

Reviewed-by: Leo Yan <leo.yan@linaro.org>

Tested-by: Leo Yan <leo.yan@linaro.org>


> ---

>  drivers/thermal/hisi_thermal.c | 10 ++++++++--

>  1 file changed, 8 insertions(+), 2 deletions(-)

> 

> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> index b58ad40..524310d 100644

> --- a/drivers/thermal/hisi_thermal.c

> +++ b/drivers/thermal/hisi_thermal.c

> @@ -90,6 +90,12 @@ static inline long hisi_thermal_temp_to_step(long temp)

>  	return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP;

>  }

>  

> +static inline long hisi_thermal_round_temp(int temp)

> +{

> +	return hisi_thermal_step_to_temp(

> +		hisi_thermal_temp_to_step(temp));

> +}

> +

>  static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,

>  					 struct hisi_thermal_sensor *sensor)

>  {

> @@ -221,7 +227,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)

>  	sensor = &data->sensors;

>  

>  	dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n",

> -		 sensor->thres_temp / 1000);

> +		 sensor->thres_temp);

>  	mutex_unlock(&data->thermal_lock);

>  

>  	thermal_zone_device_update(data->sensors.tzd,

> @@ -255,7 +261,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev,

>  

>  	for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) {

>  		if (trip[i].type == THERMAL_TRIP_PASSIVE) {

> -			sensor->thres_temp = trip[i].temperature;

> +			sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature);

>  			break;

>  		}

>  	}

> -- 

> 2.7.4

>
Leo Yan Sept. 1, 2017, 2:44 p.m. | #3
On Wed, Aug 30, 2017 at 10:47:30AM +0200, Daniel Lezcano wrote:
> The threaded interrupt inspect the sensors structure to look in the temp

> threshold field, but this field is read-only in all the code, except in the

> probe function before the threaded interrupt is set. In other words there

> is not race window in the threaded interrupt when reading the field value.

> 

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>


Reviewed-by: Leo Yan <leo.yan@linaro.org>


> ---

>  drivers/thermal/hisi_thermal.c | 6 +-----

>  1 file changed, 1 insertion(+), 5 deletions(-)

> 

> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> index 524310d..6f0dab1 100644

> --- a/drivers/thermal/hisi_thermal.c

> +++ b/drivers/thermal/hisi_thermal.c

> @@ -221,14 +221,10 @@ static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev)

>  static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)

>  {

>  	struct hisi_thermal_data *data = dev;

> -	struct hisi_thermal_sensor *sensor;

> -

> -	mutex_lock(&data->thermal_lock);

> -	sensor = &data->sensors;

> +	struct hisi_thermal_sensor *sensor = &data->sensors;

>  

>  	dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n",

>  		 sensor->thres_temp);

> -	mutex_unlock(&data->thermal_lock);

>  

>  	thermal_zone_device_update(data->sensors.tzd,

>  				   THERMAL_EVENT_UNSPECIFIED);

> -- 

> 2.7.4

>
Daniel Lezcano Sept. 1, 2017, 8:48 p.m. | #4
On 01/09/2017 16:05, Leo Yan wrote:
> Hi Daniel,

> 

> On Wed, Aug 30, 2017 at 10:47:26AM +0200, Daniel Lezcano wrote:

>> By essence, the tsensor does not really support multiple sensor at the same

>> time. It allows to set a sensor and use it to get the temperature, another

>> sensor could be switched but with a delay of 3-5ms. It is difficult to read

>> simultaneously several sensors without a big delay.

>>

>> Today, just one sensor is used, it is not necessary to deal with multiple

>> sensors in the code. Remove them and if it is needed in the future add them

>> on top of a code which will be clean up in the meantime.

>>

>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

>> ---

>>  drivers/thermal/hisi_thermal.c | 77 +++++++++++-------------------------------

>>  1 file changed, 20 insertions(+), 57 deletions(-)

>>

>> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

>> index 8381696..92b6889 100644

>> --- a/drivers/thermal/hisi_thermal.c

>> +++ b/drivers/thermal/hisi_thermal.c

>> @@ -39,6 +39,7 @@

>>  #define HISI_TEMP_RESET			(100000)

>>  

>>  #define HISI_MAX_SENSORS		4

>> +#define HISI_DEFAULT_SENSOR		2

> 

> So if we always set the sensor id 2 as default sensor, that means it's

> pointless to bind sensor id in dts. E.g. I change to bind thermal

> sensor 0, then it's impossible to bind successfully:

> 

> -                               thermal-sensors = <&tsensor 2>;

> +                               thermal-sensors = <&tsensor 0>;

> 

> It's okay for this change, do you think we need reflect this in DT

> binding doc [1] ?

> 

> [1] Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt


Actually, the current behavior is to go through all the sensors and
register them. By side effect, we end up by registering a sensor with an
id matching the one specified in the DT, others are failing.

That is because there is something missing in the of-thermal API where
we can't call thermal_zone_of_sensor_register() without a matching index.

That is probably why we have in the drivers/thermal/qoriq_thermal.c file
the qoriq_tmu_get_sensor_id() which inspect the thermal-zone DT
structure for the thermal-sensors and read the sensor's id from there in
order to reuse it for thermal_zone_of_sensor_register(). Needless to say
that is not the right place to do that.

IMHO, as nothing gets changed for the moment on hi6220, we can live with
HISI_DEFAULT_SENSOR until we sort out what to do with
thermal_zone_of_sensor_register().


>>  struct hisi_thermal_sensor {

>>  	struct hisi_thermal_data *thermal;

>> @@ -53,11 +54,10 @@ struct hisi_thermal_data {

>>  	struct mutex thermal_lock;    /* protects register data */

>>  	struct platform_device *pdev;

>>  	struct clk *clk;

>> -	struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS];

>> -

>> -	int irq, irq_bind_sensor;

>> +	struct hisi_thermal_sensor sensors;

>> +	int irq;

>>  	bool irq_enabled;

>> -

>> +	

> 

> Repots whitespace error when I apply patch.


Ok, I will fix that.

[ ... ]

Thanks for the review.

  -- Daniel

-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Leo Yan Sept. 2, 2017, 2:09 a.m. | #5
On Wed, Aug 30, 2017 at 10:47:31AM +0200, Daniel Lezcano wrote:
> Hopefully, the function name can help to clarify the semantic of the operations

> when writing in the register.

> 

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> ---

>  drivers/thermal/hisi_thermal.c | 96 +++++++++++++++++++++++++++++++-----------

>  1 file changed, 72 insertions(+), 24 deletions(-)

> 

> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> index 6f0dab1..d77a938 100644

> --- a/drivers/thermal/hisi_thermal.c

> +++ b/drivers/thermal/hisi_thermal.c

> @@ -26,6 +26,7 @@

>  

>  #include "thermal_core.h"

>  

> +#define TEMP0_LAG			(0x0)

>  #define TEMP0_TH			(0x4)

>  #define TEMP0_RST_TH			(0x8)

>  #define TEMP0_CFG			(0xC)

> @@ -96,6 +97,56 @@ static inline long hisi_thermal_round_temp(int temp)

>  		hisi_thermal_temp_to_step(temp));

>  }

>  

> +static inline void hisi_thermal_set_lag(void __iomem *addr, int value)

> +{

> +	writel(value, addr + TEMP0_LAG);

> +}

> +

> +static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value)

> +{

> +	writel(value, addr + TEMP0_INT_CLR);

> +}

> +

> +static inline void hisi_thermal_alarm_enable(void __iomem *addr, int value)

> +{

> +	writel(value, addr + TEMP0_INT_EN);

> +}

> +

> +static inline void hisi_thermal_alarm_set(void __iomem *addr, int temp)

> +{

> +	writel(hisi_thermal_temp_to_step(temp) | 0x0FFFFFF00, addr + TEMP0_TH);

> +}

> +

> +static inline void hisi_thermal_reset_set(void __iomem *addr, int temp)

> +{

> +        writel(hisi_thermal_temp_to_step(temp), addr + TEMP0_RST_TH);

> +}

> +

> +static inline void hisi_thermal_reset_enable(void __iomem *addr, int value)

> +{

> +        writel(value, addr + TEMP0_RST_MSK);

> +}

> +

> +static inline void hisi_thermal_enable(void __iomem *addr, int value)

> +{

> +	writel(value, addr + TEMP0_EN);

> +}

> +

> +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor)

> +{

> +	writel((sensor << 12), addr + TEMP0_CFG);

> +}

> +

> +static inline int hisi_thermal_get_temperature(void __iomem *addr)

> +{

> +	return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE));

> +}

> +

> +static inline void hisi_thermal_hdak_set(void __iomem *addr, int value)

> +{

> +	writel(value, addr + TEMP0_CFG);

> +}


hdak and sensor id setting share the same one register, so it's
possible to overwrite their value with each other. And for hdak
setting, it should offset 4 bits.

The issues are exists in old code yet but not introduce by this
patch. Could fix these issues as well in this patch?

> +

>  static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,

>  					 struct hisi_thermal_sensor *sensor)

>  {

> @@ -104,22 +155,21 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,

>  	mutex_lock(&data->thermal_lock);

>  

>  	/* disable interrupt */

> -	writel(0x0, data->regs + TEMP0_INT_EN);

> -	writel(0x1, data->regs + TEMP0_INT_CLR);

> +	hisi_thermal_alarm_enable(data->regs, 0);

> +	hisi_thermal_alarm_clear(data->regs, 1);

>  

>  	/* disable module firstly */

> -	writel(0x0, data->regs + TEMP0_EN);

> +	hisi_thermal_enable(data->regs, 0);

>  

>  	/* select sensor id */

> -	writel((sensor->id << 12), data->regs + TEMP0_CFG);

> +	hisi_thermal_sensor_select(data->regs, sensor->id);

>  

>  	/* enable module */

> -	writel(0x1, data->regs + TEMP0_EN);

> +	hisi_thermal_enable(data->regs, 1);

>  

>  	usleep_range(3000, 5000);

>  

> -	val = readl(data->regs + TEMP0_VALUE);

> -	val = hisi_thermal_step_to_temp(val);

> +	val = hisi_thermal_get_temperature(data->regs);

>  

>  	mutex_unlock(&data->thermal_lock);

>  

> @@ -136,29 +186,27 @@ static void hisi_thermal_enable_bind_irq_sensor

>  	sensor = &data->sensors;

>  

>  	/* setting the hdak time */

> -	writel(0x0, data->regs + TEMP0_CFG);

> +	hisi_thermal_hdak_set(data->regs, 0);

>  

>  	/* disable module firstly */

> -	writel(0x0, data->regs + TEMP0_RST_MSK);

> -	writel(0x0, data->regs + TEMP0_EN);

> +	hisi_thermal_reset_enable(data->regs, 0);

> +	hisi_thermal_enable(data->regs, 0);

>  

>  	/* select sensor id */

> -	writel((sensor->id << 12), data->regs + TEMP0_CFG);

> +	hisi_thermal_sensor_select(data->regs, sensor->id);

>  

>  	/* enable for interrupt */

> -	writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00,

> -	       data->regs + TEMP0_TH);

> +	hisi_thermal_alarm_set(data->regs, sensor->thres_temp);

>  

> -	writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET),

> -	       data->regs + TEMP0_RST_TH);

> +	hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET);

>  

>  	/* enable module */

> -	writel(0x1, data->regs + TEMP0_RST_MSK);

> -	writel(0x1, data->regs + TEMP0_EN);

> -

> -	writel(0x0, data->regs + TEMP0_INT_CLR);

> -	writel(0x1, data->regs + TEMP0_INT_EN);

> +	hisi_thermal_reset_enable(data->regs, 1);

> +	hisi_thermal_enable(data->regs, 1);

>  

> +	hisi_thermal_alarm_clear(data->regs, 0);

> +	hisi_thermal_alarm_enable(data->regs, 1);

> +	


whitespace error.

>  	usleep_range(3000, 5000);

>  

>  	mutex_unlock(&data->thermal_lock);

> @@ -169,10 +217,10 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data)

>  	mutex_lock(&data->thermal_lock);

>  

>  	/* disable sensor module */

> -	writel(0x0, data->regs + TEMP0_INT_EN);

> -	writel(0x0, data->regs + TEMP0_RST_MSK);

> -	writel(0x0, data->regs + TEMP0_EN);

> -

> +	hisi_thermal_enable(data->regs, 0);

> +	hisi_thermal_alarm_enable(data->regs, 0);

> +	hisi_thermal_reset_enable(data->regs, 0);

> +	


whitespace error.

>  	mutex_unlock(&data->thermal_lock);

>  }

>  

> -- 

> 2.7.4

>
Leo Yan Sept. 2, 2017, 2:17 a.m. | #6
On Sat, Sep 02, 2017 at 10:09:15AM +0800, Leo Yan wrote:
> On Wed, Aug 30, 2017 at 10:47:31AM +0200, Daniel Lezcano wrote:

> > Hopefully, the function name can help to clarify the semantic of the operations

> > when writing in the register.

> > 

> > Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> > ---

> >  drivers/thermal/hisi_thermal.c | 96 +++++++++++++++++++++++++++++++-----------

> >  1 file changed, 72 insertions(+), 24 deletions(-)

> > 

> > diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> > index 6f0dab1..d77a938 100644

> > --- a/drivers/thermal/hisi_thermal.c

> > +++ b/drivers/thermal/hisi_thermal.c

> > @@ -26,6 +26,7 @@

> >  

> >  #include "thermal_core.h"

> >  

> > +#define TEMP0_LAG			(0x0)

> >  #define TEMP0_TH			(0x4)

> >  #define TEMP0_RST_TH			(0x8)

> >  #define TEMP0_CFG			(0xC)

> > @@ -96,6 +97,56 @@ static inline long hisi_thermal_round_temp(int temp)

> >  		hisi_thermal_temp_to_step(temp));

> >  }

> >  

> > +static inline void hisi_thermal_set_lag(void __iomem *addr, int value)

> > +{

> > +	writel(value, addr + TEMP0_LAG);

> > +}

> > +

> > +static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value)

> > +{

> > +	writel(value, addr + TEMP0_INT_CLR);

> > +}

> > +

> > +static inline void hisi_thermal_alarm_enable(void __iomem *addr, int value)

> > +{

> > +	writel(value, addr + TEMP0_INT_EN);

> > +}

> > +

> > +static inline void hisi_thermal_alarm_set(void __iomem *addr, int temp)

> > +{

> > +	writel(hisi_thermal_temp_to_step(temp) | 0x0FFFFFF00, addr + TEMP0_TH);

> > +}

> > +

> > +static inline void hisi_thermal_reset_set(void __iomem *addr, int temp)

> > +{

> > +        writel(hisi_thermal_temp_to_step(temp), addr + TEMP0_RST_TH);

> > +}

> > +

> > +static inline void hisi_thermal_reset_enable(void __iomem *addr, int value)

> > +{

> > +        writel(value, addr + TEMP0_RST_MSK);

> > +}

> > +

> > +static inline void hisi_thermal_enable(void __iomem *addr, int value)

> > +{

> > +	writel(value, addr + TEMP0_EN);

> > +}

> > +

> > +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor)

> > +{

> > +	writel((sensor << 12), addr + TEMP0_CFG);

> > +}

> > +

> > +static inline int hisi_thermal_get_temperature(void __iomem *addr)

> > +{

> > +	return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE));

> > +}

> > +

> > +static inline void hisi_thermal_hdak_set(void __iomem *addr, int value)

> > +{

> > +	writel(value, addr + TEMP0_CFG);

> > +}

> 

> hdak and sensor id setting share the same one register, so it's

> possible to overwrite their value with each other. And for hdak

> setting, it should offset 4 bits.

> 

> The issues are exists in old code yet but not introduce by this

> patch. Could fix these issues as well in this patch?


Have seen the mentioned issues have been fixed in patch 0008. So have
no more question at here.

[...]

Thanks,
Leo Yan
Leo Yan Sept. 2, 2017, 3:41 a.m. | #7
On Wed, Aug 30, 2017 at 10:47:35AM +0200, Daniel Lezcano wrote:
> There is no point to specify the temperature as long variable, the int is

> enough.

> 

> Replace all long variables to int, so making the code consistent.

> 

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>


Reviewed-by: Leo Yan <leo.yan@linaro.org>


> ---

>  drivers/thermal/hisi_thermal.c | 4 ++--

>  1 file changed, 2 insertions(+), 2 deletions(-)

> 

> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> index 68b625c..9eee82b 100644

> --- a/drivers/thermal/hisi_thermal.c

> +++ b/drivers/thermal/hisi_thermal.c

> @@ -83,12 +83,12 @@ static inline int hisi_thermal_step_to_temp(int step)

>  	return HISI_TEMP_BASE + (step * HISI_TEMP_STEP);

>  }

>  

> -static inline long hisi_thermal_temp_to_step(long temp)

> +static inline int hisi_thermal_temp_to_step(int temp)

>  {

>  	return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP;

>  }

>  

> -static inline long hisi_thermal_round_temp(int temp)

> +static inline int hisi_thermal_round_temp(int temp)

>  {

>  	return hisi_thermal_step_to_temp(

>  		hisi_thermal_temp_to_step(temp));

> -- 

> 2.7.4

>
Leo Yan Sept. 2, 2017, 4:04 a.m. | #8
On Wed, Aug 30, 2017 at 10:47:37AM +0200, Daniel Lezcano wrote:
> The mutex is used to protect against writes in the configuration register.

> 

> That happens at probe time, with no possible race yet.

> 

> Then when the module is unloaded and at suspend/resume.

> 

> When the module is unloaded, it is an userspace operation, thus via a process.

> Suspending the system goes through the freezer to suspend all the tasks

> synchronously before continuing. So it is not possible to hit the suspend ops

> in this driver while we are unloading it.

> 

> The resume is the same situation than the probe.

> 

> In other words, even if there are several places where we write the

> configuration register, there is no situation where we can write it at the same

> time, so far as I can judge


After applied your previous patches, configuration reigster accessing
only happens in the probe and resume functions. So shouldn't have
chance to access it at the same time.

BTW, I verified this patch with system suspend/resume, so far it works
well after system resume back.

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>


Reviewed-by: Leo Yan <leo.yan@linaro.org>

Tested-by: Leo Yan <leo.yan@linaro.org>


> ---

>  drivers/thermal/hisi_thermal.c | 6 ------

>  1 file changed, 6 deletions(-)

> 

> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> index b1b086ab..b9e8ee2 100644

> --- a/drivers/thermal/hisi_thermal.c

> +++ b/drivers/thermal/hisi_thermal.c

> @@ -51,7 +51,6 @@ struct hisi_thermal_sensor {

>  };

>  

>  struct hisi_thermal_data {

> -	struct mutex thermal_lock;    /* protects register data */

>  	struct platform_device *pdev;

>  	struct clk *clk;

>  	struct hisi_thermal_sensor sensor;

> @@ -196,14 +195,10 @@ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value)

>  

>  static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data)

>  {

> -	mutex_lock(&data->thermal_lock);

> -

>  	/* disable sensor module */

>  	hisi_thermal_enable(data->regs, 0);

>  	hisi_thermal_alarm_enable(data->regs, 0);

>  	hisi_thermal_reset_enable(data->regs, 0);

> -	

> -	mutex_unlock(&data->thermal_lock);

>  }

>  

>  static int hisi_thermal_get_temp(void *__data, int *temp)

> @@ -340,7 +335,6 @@ static int hisi_thermal_probe(struct platform_device *pdev)

>  	if (!data)

>  		return -ENOMEM;

>  

> -	mutex_init(&data->thermal_lock);

>  	data->pdev = pdev;

>  

>  	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);

> -- 

> 2.7.4

>
Daniel Lezcano Sept. 2, 2017, 1:11 p.m. | #9
On 02/09/2017 06:04, Leo Yan wrote:
> On Wed, Aug 30, 2017 at 10:47:37AM +0200, Daniel Lezcano wrote:

>> The mutex is used to protect against writes in the configuration register.

>>

>> That happens at probe time, with no possible race yet.

>>

>> Then when the module is unloaded and at suspend/resume.

>>

>> When the module is unloaded, it is an userspace operation, thus via a process.

>> Suspending the system goes through the freezer to suspend all the tasks

>> synchronously before continuing. So it is not possible to hit the suspend ops

>> in this driver while we are unloading it.

>>

>> The resume is the same situation than the probe.

>>

>> In other words, even if there are several places where we write the

>> configuration register, there is no situation where we can write it at the same

>> time, so far as I can judge

> 

> After applied your previous patches, configuration reigster accessing

> only happens in the probe and resume functions. So shouldn't have

> chance to access it at the same time.

> 

> BTW, I verified this patch with system suspend/resume, so far it works

> well after system resume back.


Great, thanks for testing this.

>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> 

> Reviewed-by: Leo Yan <leo.yan@linaro.org>

> Tested-by: Leo Yan <leo.yan@linaro.org>

> 

>> ---

>>  drivers/thermal/hisi_thermal.c | 6 ------

>>  1 file changed, 6 deletions(-)

>>

>> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

>> index b1b086ab..b9e8ee2 100644

>> --- a/drivers/thermal/hisi_thermal.c

>> +++ b/drivers/thermal/hisi_thermal.c

>> @@ -51,7 +51,6 @@ struct hisi_thermal_sensor {

>>  };

>>  

>>  struct hisi_thermal_data {

>> -	struct mutex thermal_lock;    /* protects register data */

>>  	struct platform_device *pdev;

>>  	struct clk *clk;

>>  	struct hisi_thermal_sensor sensor;

>> @@ -196,14 +195,10 @@ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value)

>>  

>>  static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data)

>>  {

>> -	mutex_lock(&data->thermal_lock);

>> -

>>  	/* disable sensor module */

>>  	hisi_thermal_enable(data->regs, 0);

>>  	hisi_thermal_alarm_enable(data->regs, 0);

>>  	hisi_thermal_reset_enable(data->regs, 0);

>> -	

>> -	mutex_unlock(&data->thermal_lock);

>>  }

>>  

>>  static int hisi_thermal_get_temp(void *__data, int *temp)

>> @@ -340,7 +335,6 @@ static int hisi_thermal_probe(struct platform_device *pdev)

>>  	if (!data)

>>  		return -ENOMEM;

>>  

>> -	mutex_init(&data->thermal_lock);

>>  	data->pdev = pdev;

>>  

>>  	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);

>> -- 

>> 2.7.4

>>



-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Leo Yan Sept. 4, 2017, 12:50 a.m. | #10
On Sat, Sep 02, 2017 at 03:10:29PM +0200, Daniel Lezcano wrote:

[...]

> On 02/09/2017 05:29, Leo Yan wrote:

> > On Wed, Aug 30, 2017 at 10:47:33AM +0200, Daniel Lezcano wrote:

> >> The sensor is all setup, bind, resetted, acked, etc... every single second.

> >>

> >> That was the way to workaround a problem with the interrupt bouncing again and

> >> again.

> >>

> >> With the following changes, we fix all in one:

> >>

> >>  - Do the setup, one time, at probe time

> >>

> >>  - Add the IRQF_ONESHOT, ack the interrupt in the threaded handler

> >>

> >>  - Remove the interrupt handler

> >>

> >>  - Set the correct value for the LAG register

> >>

> >>  - Remove all the irq_enabled stuff in the code as the interruption

> >>    handling is fixed

> >>

> >>  - Remove the 3ms delay

> >>

> >>  - Reorder the initialization routine to be in the right order

> >>

> >> It ends up to a nicer code and more efficient, the 3-5ms delay is removed from

> >> the get_temp() path.

> >>

> >> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> >> ---

> >>  drivers/thermal/hisi_thermal.c | 203 +++++++++++++++++++----------------------

> >>  1 file changed, 93 insertions(+), 110 deletions(-)

> >>

> >> diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

> >> index 3e03908..b038d8a 100644

> >> --- a/drivers/thermal/hisi_thermal.c

> >> +++ b/drivers/thermal/hisi_thermal.c

> >> @@ -39,6 +39,7 @@

> >>  #define HISI_TEMP_BASE			(-60000)

> >>  #define HISI_TEMP_RESET			(100000)

> >>  #define HISI_TEMP_STEP			(784)

> >> +#define HISI_TEMP_LAG			(3500)

> > 

> > So I am curious what's the reason to select 3.5'c for lag value? Is

> > this a heuristics result?

> 

> Yes, it is. I tried 5°C but I find it too long. After several tests, I

> found 3.5°C looked fine.

> 

> [ ... ]

> 

> >> + * A very short lag can lead to an interrupt storm, a long lag

> >> + * increase the latency to react to the temperature changes.  In our

> >> + * case, that is not really a problem as we are polling the

> >> + * temperature.

> > 

> > So here means if we set a long lag value and the interrupt is delayed,

> > sometimes we even don't receive the interrupt; but this is acceptable

> > due we are polling the temperature so we still can get the updated

> > temperature value, right?

> No, what I meant is if the hysteresis is too large, eg. 45 - 65, then

> the interrupt will come after the temperature goes below 45 and that may

> take a long time, especially if the temperature is fluctuating between

> the two hysteresis values.

> 

> The only setup preventing an interrupt to happen is when crossing the

> low value hysteresis is not possible in practical. For example, the

> temperature of the SoC is ~44°C at idle time. If we set the threshold to

> 65°C and the lag to the max 24°C, the interrupt will raised when we go

> below 41°C and that situation can't happen.

> 

> In the current situation, the interrupt is only used to raise an alarm.


Thanks for the explanation, Ionela (have Cced.) reminded me the
interrupt for 'alarm on' is fatal, if we use polling method then the
delay can be 1000ms, but with interrupt to alarm we can handle the
temperature rasing immediately.

> The get temperature is resulting from a polling, so the delay between

> the alarm on/off is not issue for now.


Understood.

> >> + *

> >> + * [0:4] : lag register

> >> + *

> >> + * The temperature is coded in steps, cf. HISI_TEMP_STEP.

> >> + *

> >> + * Min : 0x00 :  0.0 °C

> >> + * Max : 0x1F : 24.3 °C

> >> + *

> >> + * The 'value' parameter is in milliCelsius.

> >> + */

> >>  static inline void hisi_thermal_set_lag(void __iomem *addr, int value)

> >>  {

> >> -	writel(value, addr + TEMP0_LAG);

> >> +	writel((value / HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG);

> >>  }

> 

> 

> [ ... ]

> 

> >> -	thermal_zone_device_update(data->sensors.tzd,

> >> -				   THERMAL_EVENT_UNSPECIFIED);

> >> +	} else if (temp < sensor->thres_temp) {

> >> +		dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n",

> >> +			 temp, sensor->thres_temp);

> >> +	}

> > 

> > For temperature increasing and decreasing both cases, can always call

> > thermal_zone_device_update()?

> 

> That is something I asked myself but I finally decided to not change the

> current behavior and let that be added in a separate patch.


This is fine.

Thanks,
Leo Yan
Leo Yan Sept. 4, 2017, 2:30 p.m. | #11
On Mon, Sep 04, 2017 at 01:29:19PM +0200, Daniel Lezcano wrote:
> On 04/09/2017 02:50, Leo Yan wrote:

> 

> [ ... ]

> 

> >> That is something I asked myself but I finally decided to not change the

> >> current behavior and let that be added in a separate patch.

> > 

> > This is fine.

> 

> Hi Leo,

> 

> can you tag this patch if you agree with it?


Yeah; I have tested this patch at my side:

Reviewed-by: Leo Yan <leo.yan@linaro.org>

Tested-by: Leo Yan <leo.yan@linaro.org>


> 

> -- 

>  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

> 

> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |

> <http://twitter.com/#!/linaroorg> Twitter |

> <http://www.linaro.org/linaro-blog/> Blog

>

Patch

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index bd3572c..8381696 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -345,8 +345,7 @@  static int hisi_thermal_probe(struct platform_device *pdev)
 	}
 
 	hisi_thermal_enable_bind_irq_sensor(data);
-	irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED,
-			      &data->irq_enabled);
+	data->irq_enabled = true;
 
 	for (i = 0; i < HISI_MAX_SENSORS; ++i) {
 		ret = hisi_thermal_register_sensor(pdev, data,
@@ -358,6 +357,8 @@  static int hisi_thermal_probe(struct platform_device *pdev)
 			hisi_thermal_toggle_sensor(&data->sensors[i], true);
 	}
 
+	enable_irq(data->irq);
+
 	return 0;
 }