diff mbox series

thermal/thresholds: Fix boundaries and detection routine

Message ID 20241212190737.4127274-1-daniel.lezcano@linaro.org
State New
Headers show
Series thermal/thresholds: Fix boundaries and detection routine | expand

Commit Message

Daniel Lezcano Dec. 12, 2024, 7:07 p.m. UTC
The current implementation does not work if the thermal zone is
interrupt driven only.

The boundaries are not correctly checked and computed as it happens
only when the temperature is increasing or decreasing.

The problem arises because the routine to detect when we cross a
threshold is correlated with the computation of the boundaries. We
assume we have to recompute the boundaries when a threshold is crossed
but actually we should do that even if the it is not the case.

Mixing the boundaries computation and the threshold detection for the
sake of optimizing the routine is much more complex as it appears
intuitively and prone to errors.

This fix separates the boundaries computation and the threshold
crossing detection into different routines. The result is a code much
more simple to understand, thus easier to maintain.

The drawback is we browse the thresholds list several time but we can
consider that as neglictible because that happens when the temperature
is updated. There are certainly some aeras to improve in the
temperature update routine but it would be not adequate as this change
aims to fix the thresholds for v6.13.

Fixes: 445936f9e258 ("thermal: core: Add user thresholds support")
Tested-by: Daniel Lezcano <daniel.lezcano@linaro.org> # rock5b, Lenovo x13s
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/thermal_thresholds.c | 68 +++++++++++++++-------------
 1 file changed, 36 insertions(+), 32 deletions(-)

Comments

Manaf Meethalavalappu Pallikunhi Dec. 15, 2024, 7:02 p.m. UTC | #1
Hi Daniel,


On 12/13/2024 12:37 AM, Daniel Lezcano wrote:
> The current implementation does not work if the thermal zone is
> interrupt driven only.
>
> The boundaries are not correctly checked and computed as it happens
> only when the temperature is increasing or decreasing.
>
> The problem arises because the routine to detect when we cross a
> threshold is correlated with the computation of the boundaries. We
> assume we have to recompute the boundaries when a threshold is crossed
> but actually we should do that even if the it is not the case.
>
> Mixing the boundaries computation and the threshold detection for the
> sake of optimizing the routine is much more complex as it appears
> intuitively and prone to errors.
>
> This fix separates the boundaries computation and the threshold
> crossing detection into different routines. The result is a code much
> more simple to understand, thus easier to maintain.
>
> The drawback is we browse the thresholds list several time but we can
> consider that as neglictible because that happens when the temperature
> is updated. There are certainly some aeras to improve in the
> temperature update routine but it would be not adequate as this change
> aims to fix the thresholds for v6.13.
>
> Fixes: 445936f9e258 ("thermal: core: Add user thresholds support")
> Tested-by: Daniel Lezcano <daniel.lezcano@linaro.org> # rock5b, Lenovo x13s
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>   drivers/thermal/thermal_thresholds.c | 68 +++++++++++++++-------------
>   1 file changed, 36 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/thermal/thermal_thresholds.c b/drivers/thermal/thermal_thresholds.c
> index d9b2a0bb44fc..dc2852721151 100644
> --- a/drivers/thermal/thermal_thresholds.c
> +++ b/drivers/thermal/thermal_thresholds.c
> @@ -69,58 +69,60 @@ static struct user_threshold *__thermal_thresholds_find(const struct list_head *
>   	return NULL;
>   }
>   
> -static bool __thermal_threshold_is_crossed(struct user_threshold *threshold, int temperature,
> -					   int last_temperature, int direction,
> -					   int *low, int *high)
> +static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature,
> +					      int last_temperature)
>   {
> +	struct user_threshold *t;
>   
> -	if (temperature >= threshold->temperature) {
> -		if (threshold->temperature > *low &&
> -		    THERMAL_THRESHOLD_WAY_DOWN & threshold->direction)
> -			*low = threshold->temperature;
> +	list_for_each_entry(t, thresholds, list_node) {
>   
> -		if (last_temperature < threshold->temperature &&
> -		    threshold->direction & direction)
> -			return true;
> -	} else {
> -		if (threshold->temperature < *high && THERMAL_THRESHOLD_WAY_UP
> -		    & threshold->direction)
> -			*high = threshold->temperature;
> +		if (!(t->direction & THERMAL_THRESHOLD_WAY_UP))
> +		    continue;
>   
> -		if (last_temperature >= threshold->temperature &&
> -		    threshold->direction & direction)
> +		if (temperature >= t->temperature &&
> +		    last_temperature < t->temperature)
>   			return true;
>   	}
>   
>   	return false;
>   }
>   
> -static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature,
> -					      int last_temperature, int *low, int *high)
> +static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature,
> +					       int last_temperature)
>   {
>   	struct user_threshold *t;
>   
> -	list_for_each_entry(t, thresholds, list_node) {
> -		if (__thermal_threshold_is_crossed(t, temperature, last_temperature,
> -						   THERMAL_THRESHOLD_WAY_UP, low, high))
> +	list_for_each_entry_reverse(t, thresholds, list_node) {
> +
> +		if (!(t->direction & THERMAL_THRESHOLD_WAY_DOWN))
> +		    continue;
> +
> +		if (temperature < t->temperature &&
> +		    last_temperature >= t->temperature)
>   			return true;

Currently WAY_UP notification triggers if temperature is greater than or 
equal to t-> temperature, but for WAY_DOWN

it is only checking temperature is less than t->temperature. Why don't 
we include temperature = t->temperature

for WAY_DOWN threshold ? In that case it will be consistent for both 
WAY_UP and WAY_DOWN notification for userspace.

If we are not considering 'equal to' for WAY_DOWN, there is a 
possibility of missing WAY_DOWN notification if a sensor

is violated with same WAY_DOWN threshold temperature and only interrupt 
mode is enabled for that sensor.


Thank you

Manaf

>   	}
>   
>   	return false;
>   }
>   
> -static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature,
> -					       int last_temperature, int *low, int *high)
> +static void thermal_threshold_find_boundaries(struct list_head *thresholds, int temperature,
> +					      int *low, int *high)
>   {
>   	struct user_threshold *t;
>   
> -	list_for_each_entry_reverse(t, thresholds, list_node) {
> -		if (__thermal_threshold_is_crossed(t, temperature, last_temperature,
> -						   THERMAL_THRESHOLD_WAY_DOWN, low, high))
> -			return true;
> +	list_for_each_entry(t, thresholds, list_node) {
> +		if (temperature < t->temperature &&
> +		    (t->direction & THERMAL_THRESHOLD_WAY_UP) &&
> +		    *high > t->temperature)
> +			*high = t->temperature;
>   	}
>   
> -	return false;
> +	list_for_each_entry_reverse(t, thresholds, list_node) {
> +		if (temperature > t->temperature &&
> +		    (t->direction & THERMAL_THRESHOLD_WAY_DOWN) &&
> +		    *low < t->temperature)
> +			*low = t->temperature;
> +	}
>   }
>   
>   void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high)
> @@ -132,6 +134,8 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi
>   
>   	lockdep_assert_held(&tz->lock);
>   
> +	thermal_threshold_find_boundaries(thresholds, temperature, low, high);
> +
>   	/*
>   	 * We need a second update in order to detect a threshold being crossed
>   	 */
> @@ -151,12 +155,12 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi
>   	 * - decreased : thresholds are crossed the way down
>   	 */
>   	if (temperature > last_temperature) {
> -		if (thermal_thresholds_handle_raising(thresholds, temperature,
> -						      last_temperature, low, high))
> +		if (thermal_thresholds_handle_raising(thresholds,
> +						      temperature, last_temperature))
>   			thermal_notify_threshold_up(tz);
>   	} else {
> -		if (thermal_thresholds_handle_dropping(thresholds, temperature,
> -						       last_temperature, low, high))
> +		if (thermal_thresholds_handle_dropping(thresholds,
> +						       temperature, last_temperature))
>   			thermal_notify_threshold_down(tz);
>   	}
>   }
Daniel Lezcano Dec. 15, 2024, 7:27 p.m. UTC | #2
Hi Manaf,

On 12/15/24 20:02, Manaf Meethalavalappu Pallikunhi wrote:
> 
> Hi Daniel,

[ ... ]

>> -static bool thermal_thresholds_handle_raising(struct list_head 
>> *thresholds, int temperature,
>> -                          int last_temperature, int *low, int *high)
>> +static bool thermal_thresholds_handle_dropping(struct list_head 
>> *thresholds, int temperature,
>> +                           int last_temperature)
>>   {
>>       struct user_threshold *t;
>> -    list_for_each_entry(t, thresholds, list_node) {
>> -        if (__thermal_threshold_is_crossed(t, temperature, 
>> last_temperature,
>> -                           THERMAL_THRESHOLD_WAY_UP, low, high))
>> +    list_for_each_entry_reverse(t, thresholds, list_node) {
>> +
>> +        if (!(t->direction & THERMAL_THRESHOLD_WAY_DOWN))
>> +            continue;
>> +
>> +        if (temperature < t->temperature &&
>> +            last_temperature >= t->temperature)
>>               return true;
> 
> Currently WAY_UP notification triggers if temperature is greater than or 
> equal to t-> temperature, but for WAY_DOWN
> 
> it is only checking temperature is less than t->temperature. Why don't 
> we include temperature = t->temperature
> 
> for WAY_DOWN threshold ? In that case it will be consistent for both 
> WAY_UP and WAY_DOWN notification for userspace.
> 
> If we are not considering 'equal to' for WAY_DOWN, there is a 
> possibility of missing WAY_DOWN notification if a sensor
> 
> is violated with same WAY_DOWN threshold temperature and only interrupt 
> mode is enabled for that sensor.

You are right, we should detect when the temperature is lesser or equal 
to the threshold to be crossed the way down.

I'll send a V2 with this condition fixed.

Thanks for reporting this

>>       }
>>       return false;
>>   }
>> -static bool thermal_thresholds_handle_dropping(struct list_head 
>> *thresholds, int temperature,
>> -                           int last_temperature, int *low, int *high)
>> +static void thermal_threshold_find_boundaries(struct list_head 
>> *thresholds, int temperature,
>> +                          int *low, int *high)
>>   {
>>       struct user_threshold *t;
>> -    list_for_each_entry_reverse(t, thresholds, list_node) {
>> -        if (__thermal_threshold_is_crossed(t, temperature, 
>> last_temperature,
>> -                           THERMAL_THRESHOLD_WAY_DOWN, low, high))
>> -            return true;
>> +    list_for_each_entry(t, thresholds, list_node) {
>> +        if (temperature < t->temperature &&
>> +            (t->direction & THERMAL_THRESHOLD_WAY_UP) &&
>> +            *high > t->temperature)
>> +            *high = t->temperature;
>>       }
>> -    return false;
>> +    list_for_each_entry_reverse(t, thresholds, list_node) {
>> +        if (temperature > t->temperature &&
>> +            (t->direction & THERMAL_THRESHOLD_WAY_DOWN) &&
>> +            *low < t->temperature)
>> +            *low = t->temperature;
>> +    }
>>   }
>>   void thermal_thresholds_handle(struct thermal_zone_device *tz, int 
>> *low, int *high)
>> @@ -132,6 +134,8 @@ void thermal_thresholds_handle(struct 
>> thermal_zone_device *tz, int *low, int *hi
>>       lockdep_assert_held(&tz->lock);
>> +    thermal_threshold_find_boundaries(thresholds, temperature, low, 
>> high);
>> +
>>       /*
>>        * We need a second update in order to detect a threshold being 
>> crossed
>>        */
>> @@ -151,12 +155,12 @@ void thermal_thresholds_handle(struct 
>> thermal_zone_device *tz, int *low, int *hi
>>        * - decreased : thresholds are crossed the way down
>>        */
>>       if (temperature > last_temperature) {
>> -        if (thermal_thresholds_handle_raising(thresholds, temperature,
>> -                              last_temperature, low, high))
>> +        if (thermal_thresholds_handle_raising(thresholds,
>> +                              temperature, last_temperature))
>>               thermal_notify_threshold_up(tz);
>>       } else {
>> -        if (thermal_thresholds_handle_dropping(thresholds, temperature,
>> -                               last_temperature, low, high))
>> +        if (thermal_thresholds_handle_dropping(thresholds,
>> +                               temperature, last_temperature))
>>               thermal_notify_threshold_down(tz);
>>       }
>>   }
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_thresholds.c b/drivers/thermal/thermal_thresholds.c
index d9b2a0bb44fc..dc2852721151 100644
--- a/drivers/thermal/thermal_thresholds.c
+++ b/drivers/thermal/thermal_thresholds.c
@@ -69,58 +69,60 @@  static struct user_threshold *__thermal_thresholds_find(const struct list_head *
 	return NULL;
 }
 
-static bool __thermal_threshold_is_crossed(struct user_threshold *threshold, int temperature,
-					   int last_temperature, int direction,
-					   int *low, int *high)
+static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature,
+					      int last_temperature)
 {
+	struct user_threshold *t;
 
-	if (temperature >= threshold->temperature) {
-		if (threshold->temperature > *low &&
-		    THERMAL_THRESHOLD_WAY_DOWN & threshold->direction)
-			*low = threshold->temperature;
+	list_for_each_entry(t, thresholds, list_node) {
 
-		if (last_temperature < threshold->temperature &&
-		    threshold->direction & direction)
-			return true;
-	} else {
-		if (threshold->temperature < *high && THERMAL_THRESHOLD_WAY_UP
-		    & threshold->direction)
-			*high = threshold->temperature;
+		if (!(t->direction & THERMAL_THRESHOLD_WAY_UP))
+		    continue;
 
-		if (last_temperature >= threshold->temperature &&
-		    threshold->direction & direction)
+		if (temperature >= t->temperature &&
+		    last_temperature < t->temperature)
 			return true;
 	}
 
 	return false;
 }
 
-static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature,
-					      int last_temperature, int *low, int *high)
+static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature,
+					       int last_temperature)
 {
 	struct user_threshold *t;
 
-	list_for_each_entry(t, thresholds, list_node) {
-		if (__thermal_threshold_is_crossed(t, temperature, last_temperature,
-						   THERMAL_THRESHOLD_WAY_UP, low, high))
+	list_for_each_entry_reverse(t, thresholds, list_node) {
+
+		if (!(t->direction & THERMAL_THRESHOLD_WAY_DOWN))
+		    continue;
+
+		if (temperature < t->temperature &&
+		    last_temperature >= t->temperature)
 			return true;
 	}
 
 	return false;
 }
 
-static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature,
-					       int last_temperature, int *low, int *high)
+static void thermal_threshold_find_boundaries(struct list_head *thresholds, int temperature,
+					      int *low, int *high)
 {
 	struct user_threshold *t;
 
-	list_for_each_entry_reverse(t, thresholds, list_node) {
-		if (__thermal_threshold_is_crossed(t, temperature, last_temperature,
-						   THERMAL_THRESHOLD_WAY_DOWN, low, high))
-			return true;
+	list_for_each_entry(t, thresholds, list_node) {
+		if (temperature < t->temperature &&
+		    (t->direction & THERMAL_THRESHOLD_WAY_UP) &&
+		    *high > t->temperature)
+			*high = t->temperature;
 	}
 
-	return false;
+	list_for_each_entry_reverse(t, thresholds, list_node) {
+		if (temperature > t->temperature &&
+		    (t->direction & THERMAL_THRESHOLD_WAY_DOWN) &&
+		    *low < t->temperature)
+			*low = t->temperature;
+	}
 }
 
 void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high)
@@ -132,6 +134,8 @@  void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi
 
 	lockdep_assert_held(&tz->lock);
 
+	thermal_threshold_find_boundaries(thresholds, temperature, low, high);
+
 	/*
 	 * We need a second update in order to detect a threshold being crossed
 	 */
@@ -151,12 +155,12 @@  void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi
 	 * - decreased : thresholds are crossed the way down
 	 */
 	if (temperature > last_temperature) {
-		if (thermal_thresholds_handle_raising(thresholds, temperature,
-						      last_temperature, low, high))
+		if (thermal_thresholds_handle_raising(thresholds,
+						      temperature, last_temperature))
 			thermal_notify_threshold_up(tz);
 	} else {
-		if (thermal_thresholds_handle_dropping(thresholds, temperature,
-						       last_temperature, low, high))
+		if (thermal_thresholds_handle_dropping(thresholds,
+						       temperature, last_temperature))
 			thermal_notify_threshold_down(tz);
 	}
 }