Message ID | 31999d801bfb4d8063dc1ceec1234b6b80b4ae68.1600238586.git.viresh.kumar@linaro.org |
---|---|
State | New |
Headers | show |
Series | cpufreq: Record stats with fast-switching | expand |
On Wed, Sep 16, 2020 at 8:45 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > In order to prepare for lock-less stats update, add support to defer any > updates to it until cpufreq_stats_record_transition() is called. This is a bit devoid of details. I guess you mean reset in particular, but that's not clear from the above. Also, it would be useful to describe the design somewhat. > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > --- > drivers/cpufreq/cpufreq_stats.c | 75 ++++++++++++++++++++++++--------- > 1 file changed, 56 insertions(+), 19 deletions(-) > > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c > index 94d959a8e954..3e7eee29ee86 100644 > --- a/drivers/cpufreq/cpufreq_stats.c > +++ b/drivers/cpufreq/cpufreq_stats.c > @@ -22,17 +22,22 @@ struct cpufreq_stats { > spinlock_t lock; > unsigned int *freq_table; > unsigned int *trans_table; > + > + /* Deferred reset */ > + unsigned int reset_pending; > + unsigned long long reset_time; > }; > > -static void cpufreq_stats_update(struct cpufreq_stats *stats) > +static void cpufreq_stats_update(struct cpufreq_stats *stats, > + unsigned long long time) > { > unsigned long long cur_time = get_jiffies_64(); > > - stats->time_in_state[stats->last_index] += cur_time - stats->last_time; > + stats->time_in_state[stats->last_index] += cur_time - time; > stats->last_time = cur_time; > } > > -static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) > +static void cpufreq_stats_reset_table(struct cpufreq_stats *stats) > { > unsigned int count = stats->max_state; > > @@ -41,42 +46,67 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) > memset(stats->trans_table, 0, count * count * sizeof(int)); > stats->last_time = get_jiffies_64(); > stats->total_trans = 0; > + > + /* Adjust for the time elapsed since reset was requested */ > + WRITE_ONCE(stats->reset_pending, 0); What if this runs in parallel with store_reset()? The latter may update reset_pending to 1 before the below runs. Conversely, this may clear reset_pending right after store_reset() has set it to 1, but before it manages to set reset_time. Is that not a problem? > + cpufreq_stats_update(stats, stats->reset_time); > > spin_unlock(&stats->lock); > } > > static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) > { > - return sprintf(buf, "%d\n", policy->stats->total_trans); > + struct cpufreq_stats *stats = policy->stats; > + > + if (READ_ONCE(stats->reset_pending)) > + return sprintf(buf, "%d\n", 0); > + else > + return sprintf(buf, "%d\n", stats->total_trans); > } > cpufreq_freq_attr_ro(total_trans); > > static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) > { > struct cpufreq_stats *stats = policy->stats; > + bool pending = READ_ONCE(stats->reset_pending); > + unsigned long long time; > ssize_t len = 0; > int i; > > if (policy->fast_switch_enabled) > return 0; > > - spin_lock(&stats->lock); > - cpufreq_stats_update(stats); > - spin_unlock(&stats->lock); > - > for (i = 0; i < stats->state_num; i++) { > + if (pending) { > + if (i == stats->last_index) > + time = get_jiffies_64() - stats->reset_time; What if this runs in parallel with store_reset() and reads reset_time before the latter manages to update it? > + else > + time = 0; > + } else { > + time = stats->time_in_state[i]; > + if (i == stats->last_index) > + time += get_jiffies_64() - stats->last_time; > + } > + > len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], > - (unsigned long long) > - jiffies_64_to_clock_t(stats->time_in_state[i])); > + jiffies_64_to_clock_t(time)); > } > return len; > } > cpufreq_freq_attr_ro(time_in_state); > > +/* We don't care what is written to the attribute */ > static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf, > size_t count) > { > - /* We don't care what is written to the attribute. */ > - cpufreq_stats_clear_table(policy->stats); > + struct cpufreq_stats *stats = policy->stats; > + > + /* > + * Defer resetting of stats to cpufreq_stats_record_transition() to > + * avoid races. > + */ > + WRITE_ONCE(stats->reset_pending, 1); > + stats->reset_time = get_jiffies_64(); AFAICS, there is nothing to ensure that reset_time will be updated in one go and even to ensure that it won't be partially updated before setting reset_pending. This should at least be addressed in a comment to explain why it is not a problem. > + > return count; > } > cpufreq_freq_attr_wo(reset); > @@ -84,8 +114,9 @@ cpufreq_freq_attr_wo(reset); > static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) > { > struct cpufreq_stats *stats = policy->stats; > + bool pending = READ_ONCE(stats->reset_pending); > ssize_t len = 0; > - int i, j; > + int i, j, count; > > if (policy->fast_switch_enabled) > return 0; > @@ -113,8 +144,13 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) > for (j = 0; j < stats->state_num; j++) { > if (len >= PAGE_SIZE) > break; > - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", > - stats->trans_table[i*stats->max_state+j]); > + > + if (pending) > + count = 0; > + else > + count = stats->trans_table[i * stats->max_state + j]; > + > + len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", count); > } > if (len >= PAGE_SIZE) > break; > @@ -228,10 +264,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, > struct cpufreq_stats *stats = policy->stats; > int old_index, new_index; > > - if (!stats) { > - pr_debug("%s: No stats found\n", __func__); > + if (!stats) > return; > - } > + > + if (READ_ONCE(stats->reset_pending)) > + cpufreq_stats_reset_table(stats); This is a bit confusing, because cpufreq_stats_reset_table() calls cpufreq_stats_update() and passes reset_time to it, but it is called again below with last_time as the second arg. It is not particularly clear to me why this needs to be done this way. > > old_index = stats->last_index; > new_index = freq_table_get_index(stats, new_freq); > @@ -241,7 +278,7 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, > return; > > spin_lock(&stats->lock); > - cpufreq_stats_update(stats); > + cpufreq_stats_update(stats, stats->last_time); > > stats->last_index = new_index; > stats->trans_table[old_index * stats->max_state + new_index]++; > -- > 2.25.0.rc1.19.g042ed3e048af >
On 9/16/20 7:45 AM, Viresh Kumar wrote: > In order to prepare for lock-less stats update, add support to defer any > updates to it until cpufreq_stats_record_transition() is called. > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > --- > drivers/cpufreq/cpufreq_stats.c | 75 ++++++++++++++++++++++++--------- > 1 file changed, 56 insertions(+), 19 deletions(-) > [snip] > @@ -228,10 +264,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, > struct cpufreq_stats *stats = policy->stats; > int old_index, new_index; > > - if (!stats) { > - pr_debug("%s: No stats found\n", __func__); > + if (!stats) > return; > - } > + > + if (READ_ONCE(stats->reset_pending)) > + cpufreq_stats_reset_table(stats); > This is in the hot path code, called from the scheduler. I wonder if we avoid it or make that branch 'unlikely'? if (unlikely(READ_ONCE(stats->reset_pending))) Probably the CPU (when it has good prefetcher) would realize about it, but maybe we can help a bit here.
On 25-09-20, 09:21, Lukasz Luba wrote: > > > On 9/16/20 7:45 AM, Viresh Kumar wrote: > > In order to prepare for lock-less stats update, add support to defer any > > updates to it until cpufreq_stats_record_transition() is called. > > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > --- > > drivers/cpufreq/cpufreq_stats.c | 75 ++++++++++++++++++++++++--------- > > 1 file changed, 56 insertions(+), 19 deletions(-) > > > > [snip] > > > @@ -228,10 +264,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, > > struct cpufreq_stats *stats = policy->stats; > > int old_index, new_index; > > - if (!stats) { > > - pr_debug("%s: No stats found\n", __func__); > > + if (!stats) > > return; > > - } > > + > > + if (READ_ONCE(stats->reset_pending)) > > + cpufreq_stats_reset_table(stats); > > This is in the hot path code, called from the scheduler. I wonder if we > avoid it or make that branch 'unlikely'? > > if (unlikely(READ_ONCE(stats->reset_pending))) > > Probably the CPU (when it has good prefetcher) would realize about it, > but maybe we can help a bit here. Sure. -- viresh
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 94d959a8e954..3e7eee29ee86 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -22,17 +22,22 @@ struct cpufreq_stats { spinlock_t lock; unsigned int *freq_table; unsigned int *trans_table; + + /* Deferred reset */ + unsigned int reset_pending; + unsigned long long reset_time; }; -static void cpufreq_stats_update(struct cpufreq_stats *stats) +static void cpufreq_stats_update(struct cpufreq_stats *stats, + unsigned long long time) { unsigned long long cur_time = get_jiffies_64(); - stats->time_in_state[stats->last_index] += cur_time - stats->last_time; + stats->time_in_state[stats->last_index] += cur_time - time; stats->last_time = cur_time; } -static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) +static void cpufreq_stats_reset_table(struct cpufreq_stats *stats) { unsigned int count = stats->max_state; @@ -41,42 +46,67 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) memset(stats->trans_table, 0, count * count * sizeof(int)); stats->last_time = get_jiffies_64(); stats->total_trans = 0; + + /* Adjust for the time elapsed since reset was requested */ + WRITE_ONCE(stats->reset_pending, 0); + cpufreq_stats_update(stats, stats->reset_time); spin_unlock(&stats->lock); } static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) { - return sprintf(buf, "%d\n", policy->stats->total_trans); + struct cpufreq_stats *stats = policy->stats; + + if (READ_ONCE(stats->reset_pending)) + return sprintf(buf, "%d\n", 0); + else + return sprintf(buf, "%d\n", stats->total_trans); } cpufreq_freq_attr_ro(total_trans); static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) { struct cpufreq_stats *stats = policy->stats; + bool pending = READ_ONCE(stats->reset_pending); + unsigned long long time; ssize_t len = 0; int i; if (policy->fast_switch_enabled) return 0; - spin_lock(&stats->lock); - cpufreq_stats_update(stats); - spin_unlock(&stats->lock); - for (i = 0; i < stats->state_num; i++) { + if (pending) { + if (i == stats->last_index) + time = get_jiffies_64() - stats->reset_time; + else + time = 0; + } else { + time = stats->time_in_state[i]; + if (i == stats->last_index) + time += get_jiffies_64() - stats->last_time; + } + len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], - (unsigned long long) - jiffies_64_to_clock_t(stats->time_in_state[i])); + jiffies_64_to_clock_t(time)); } return len; } cpufreq_freq_attr_ro(time_in_state); +/* We don't care what is written to the attribute */ static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf, size_t count) { - /* We don't care what is written to the attribute. */ - cpufreq_stats_clear_table(policy->stats); + struct cpufreq_stats *stats = policy->stats; + + /* + * Defer resetting of stats to cpufreq_stats_record_transition() to + * avoid races. + */ + WRITE_ONCE(stats->reset_pending, 1); + stats->reset_time = get_jiffies_64(); + return count; } cpufreq_freq_attr_wo(reset); @@ -84,8 +114,9 @@ cpufreq_freq_attr_wo(reset); static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) { struct cpufreq_stats *stats = policy->stats; + bool pending = READ_ONCE(stats->reset_pending); ssize_t len = 0; - int i, j; + int i, j, count; if (policy->fast_switch_enabled) return 0; @@ -113,8 +144,13 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) for (j = 0; j < stats->state_num; j++) { if (len >= PAGE_SIZE) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", - stats->trans_table[i*stats->max_state+j]); + + if (pending) + count = 0; + else + count = stats->trans_table[i * stats->max_state + j]; + + len += scnprintf(buf + len, PAGE_SIZE - len, "%9u ", count); } if (len >= PAGE_SIZE) break; @@ -228,10 +264,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, struct cpufreq_stats *stats = policy->stats; int old_index, new_index; - if (!stats) { - pr_debug("%s: No stats found\n", __func__); + if (!stats) return; - } + + if (READ_ONCE(stats->reset_pending)) + cpufreq_stats_reset_table(stats); old_index = stats->last_index; new_index = freq_table_get_index(stats, new_freq); @@ -241,7 +278,7 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, return; spin_lock(&stats->lock); - cpufreq_stats_update(stats); + cpufreq_stats_update(stats, stats->last_time); stats->last_index = new_index; stats->trans_table[old_index * stats->max_state + new_index]++;
In order to prepare for lock-less stats update, add support to defer any updates to it until cpufreq_stats_record_transition() is called. Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> --- drivers/cpufreq/cpufreq_stats.c | 75 ++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 19 deletions(-) -- 2.25.0.rc1.19.g042ed3e048af