From patchwork Wed Jun 1 19:39:20 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dietmar Eggemann X-Patchwork-Id: 69061 Delivered-To: patch@linaro.org Received: by 10.140.92.199 with SMTP id b65csp275114qge; Wed, 1 Jun 2016 12:40:02 -0700 (PDT) X-Received: by 10.98.3.4 with SMTP id 4mr12180601pfd.33.1464810002183; Wed, 01 Jun 2016 12:40:02 -0700 (PDT) Return-Path: Received: from vger.kernel.org (vger.kernel.org. [209.132.180.67]) by mx.google.com with ESMTP id m64si4518607pfb.124.2016.06.01.12.40.01; Wed, 01 Jun 2016 12:40:02 -0700 (PDT) Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; Authentication-Results: mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751355AbcFATjo (ORCPT + 30 others); Wed, 1 Jun 2016 15:39:44 -0400 Received: from foss.arm.com ([217.140.101.70]:39942 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751216AbcFATjn (ORCPT ); Wed, 1 Jun 2016 15:39:43 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id B69E228; Wed, 1 Jun 2016 12:40:11 -0700 (PDT) Received: from e107985-lin.cambridge.arm.com (e107985-lin.cambridge.arm.com [10.1.207.26]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 0DD953F253; Wed, 1 Jun 2016 12:39:40 -0700 (PDT) From: Dietmar Eggemann To: Peter Zijlstra , linux-kernel@vger.kernel.org Cc: Vincent Guittot , Ben Segall , Morten Rasmussen , Yuyang Du Subject: [RFC PATCH 1/3] sched/fair: Aggregate task utilization only on root cfs_rq Date: Wed, 1 Jun 2016 20:39:20 +0100 Message-Id: <1464809962-25814-2-git-send-email-dietmar.eggemann@arm.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1464809962-25814-1-git-send-email-dietmar.eggemann@arm.com> References: <1464809962-25814-1-git-send-email-dietmar.eggemann@arm.com> Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org cpu utilization (cpu_util()) is defined as the cpu (original) capacity capped cfs_rq->avg->util_avg signal of the root cfs_rq. With the current pelt version, the utilization of a task [en|de]queued on/from a cfs_rq, representing a task group other than the root task group on a cpu, is not immediately propagated down to the root cfs_rq. This makes decisions based on cpu_util() for scheduling or cpu frequency settings less accurate in case tasks are running in task groups. This patch aggregates the task utilization only on the root cfs_rq, essentially avoiding maintaining utilization for a se/cfs_rq representing task groups other than the root task group (!entity_is_task(se) and &rq_of(cfs_rq)->cfs != cfs_rq). The additional if/else condition to set @update_util in __update_load_avg() is replaced in 'sched/fair: Change @running of __update_load_avg() to @update_util' by providing the information whether utilization has to be maintained via an argument to this function. The additional requirements for the alignment of the last_update_time of a se and the root cfs_rq are handled by the patch 'sched/fair: Sync se with root cfs_rq'. Signed-off-by: Dietmar Eggemann --- kernel/sched/fair.c | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) -- 1.9.1 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 218f8e83db73..212becd3708f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2705,6 +2705,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, u32 contrib; unsigned int delta_w, scaled_delta_w, decayed = 0; unsigned long scale_freq, scale_cpu; + int update_util = 0; delta = now - sa->last_update_time; /* @@ -2725,6 +2726,12 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, return 0; sa->last_update_time = now; + if (cfs_rq) { + if (&rq_of(cfs_rq)->cfs == cfs_rq) + update_util = 1; + } else if (entity_is_task(container_of(sa, struct sched_entity, avg))) + update_util = 1; + scale_freq = arch_scale_freq_capacity(NULL, cpu); scale_cpu = arch_scale_cpu_capacity(NULL, cpu); @@ -2750,7 +2757,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, weight * scaled_delta_w; } } - if (running) + if (update_util && running) sa->util_sum += scaled_delta_w * scale_cpu; delta -= delta_w; @@ -2774,7 +2781,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, if (cfs_rq) cfs_rq->runnable_load_sum += weight * contrib; } - if (running) + if (update_util && running) sa->util_sum += contrib * scale_cpu; } @@ -2785,7 +2792,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, if (cfs_rq) cfs_rq->runnable_load_sum += weight * scaled_delta; } - if (running) + if (update_util && running) sa->util_sum += scaled_delta * scale_cpu; sa->period_contrib += delta; @@ -2796,7 +2803,8 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, cfs_rq->runnable_load_avg = div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX); } - sa->util_avg = sa->util_sum / LOAD_AVG_MAX; + if (update_util) + sa->util_avg = sa->util_sum / LOAD_AVG_MAX; } return decayed; @@ -2918,7 +2926,8 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) removed_load = 1; } - if (atomic_long_read(&cfs_rq->removed_util_avg)) { + if ((&rq_of(cfs_rq)->cfs == cfs_rq) && + atomic_long_read(&cfs_rq->removed_util_avg)) { long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); sa->util_avg = max_t(long, sa->util_avg - r, 0); sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0); @@ -2982,8 +2991,12 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s se->avg.last_update_time = cfs_rq->avg.last_update_time; cfs_rq->avg.load_avg += se->avg.load_avg; cfs_rq->avg.load_sum += se->avg.load_sum; - cfs_rq->avg.util_avg += se->avg.util_avg; - cfs_rq->avg.util_sum += se->avg.util_sum; + + if (!entity_is_task(se)) + return; + + rq_of(cfs_rq)->cfs.avg.util_avg += se->avg.util_avg; + rq_of(cfs_rq)->cfs.avg.util_sum += se->avg.util_sum; cfs_rq_util_change(cfs_rq); } @@ -2996,8 +3009,14 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); - cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); - cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); + + if (!entity_is_task(se)) + return; + + rq_of(cfs_rq)->cfs.avg.util_avg = + max_t(long, rq_of(cfs_rq)->cfs.avg.util_avg - se->avg.util_avg, 0); + rq_of(cfs_rq)->cfs.avg.util_sum = + max_t(s32, rq_of(cfs_rq)->cfs.avg.util_sum - se->avg.util_sum, 0); cfs_rq_util_change(cfs_rq); } @@ -3082,7 +3101,11 @@ void remove_entity_load_avg(struct sched_entity *se) __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); - atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg); + + if (!entity_is_task(se)) + return; + + atomic_long_add(se->avg.util_avg, &rq_of(cfs_rq)->cfs.removed_util_avg); } static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq) @@ -8460,7 +8483,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) #endif #ifdef CONFIG_SMP atomic_long_set(&cfs_rq->removed_load_avg, 0); - atomic_long_set(&cfs_rq->removed_util_avg, 0); + + if (&rq_of(cfs_rq)->cfs == cfs_rq) + atomic_long_set(&cfs_rq->removed_util_avg, 0); #endif } @@ -8525,7 +8550,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) init_cfs_rq(cfs_rq); init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); init_entity_runnable_average(se); - post_init_entity_util_avg(se); } return 1;