Message ID | 1464809962-25814-3-git-send-email-dietmar.eggemann@arm.com |
---|---|
State | New |
Headers | show |
On Wed, Jun 01, 2016 at 08:39:21PM +0100, Dietmar Eggemann wrote: > Since task utilization is accrued only on the root cfs_rq, there are a > couple of places where the se has to be synced with the root cfs_rq: > > (1) The root cfs_rq has to be updated in attach_entity_load_avg() for > an se representing a task in a tg other than the root tg before > the se utilization can be added to it. > > (2) The last_update_time value of the root cfs_rq can be higher > than the one of the cfs_rq the se is enqueued in. Call > __update_load_avg() on the se with the last_update_time value of > the root cfs_rq before removing se's utilization from the root > cfs_rq in [remove|detach]_entity_load_avg(). > > In case the difference between the last_update_time value of the cfs_rq > and the root cfs_rq is smaller than 1024ns, the additional calls to > __update_load_avg() will bail early. > > Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> > --- > kernel/sched/fair.c | 21 +++++++++++++++++++-- > 1 file changed, 19 insertions(+), 2 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 212becd3708f..3ae8e79fb687 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -2970,6 +2970,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) > > static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) > { > + struct cfs_rq* root_cfs_rq; > + > if (!sched_feat(ATTACH_AGE_LOAD)) > goto skip_aging; > > @@ -2995,8 +2997,16 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s > if (!entity_is_task(se)) > return; > > - rq_of(cfs_rq)->cfs.avg.util_avg += se->avg.util_avg; > - rq_of(cfs_rq)->cfs.avg.util_sum += se->avg.util_sum; > + root_cfs_rq = &rq_of(cfs_rq)->cfs; > + > + if (parent_entity(se)) > + __update_load_avg(cfs_rq_clock_task(root_cfs_rq), > + cpu_of(rq_of(root_cfs_rq)), &root_cfs_rq->avg, > + scale_load_down(root_cfs_rq->load.weight), > + upd_util_cfs_rq(root_cfs_rq), root_cfs_rq); Maybe add below macro in this patch for more readable: #define upd_util_cfs_rq(cfs_rq) XXX > + > + root_cfs_rq->avg.util_avg += se->avg.util_avg; > + root_cfs_rq->avg.util_sum += se->avg.util_sum; > > cfs_rq_util_change(cfs_rq); > } > @@ -3013,6 +3023,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s > if (!entity_is_task(se)) > return; > > + __update_load_avg(rq_of(cfs_rq)->cfs.avg.last_update_time, cpu_of(rq_of(cfs_rq)), > + &se->avg, se->on_rq * scale_load_down(se->load.weight), > + cfs_rq->curr == se, NULL); > + > rq_of(cfs_rq)->cfs.avg.util_avg = > max_t(long, rq_of(cfs_rq)->cfs.avg.util_avg - se->avg.util_avg, 0); > rq_of(cfs_rq)->cfs.avg.util_sum = > @@ -3105,6 +3119,9 @@ void remove_entity_load_avg(struct sched_entity *se) > if (!entity_is_task(se)) > return; > > + last_update_time = cfs_rq_last_update_time(&rq_of(cfs_rq)->cfs); > + > + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); > atomic_long_add(se->avg.util_avg, &rq_of(cfs_rq)->cfs.removed_util_avg); > } > > -- > 1.9.1 >
On 06/06/16 03:59, Leo Yan wrote: > On Wed, Jun 01, 2016 at 08:39:21PM +0100, Dietmar Eggemann wrote: [...] >> @@ -2995,8 +2997,16 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s >> if (!entity_is_task(se)) >> return; >> >> - rq_of(cfs_rq)->cfs.avg.util_avg += se->avg.util_avg; >> - rq_of(cfs_rq)->cfs.avg.util_sum += se->avg.util_sum; >> + root_cfs_rq = &rq_of(cfs_rq)->cfs; >> + >> + if (parent_entity(se)) >> + __update_load_avg(cfs_rq_clock_task(root_cfs_rq), >> + cpu_of(rq_of(root_cfs_rq)), &root_cfs_rq->avg, >> + scale_load_down(root_cfs_rq->load.weight), >> + upd_util_cfs_rq(root_cfs_rq), root_cfs_rq); > > Maybe add below macro in this patch for more readable: > #define upd_util_cfs_rq(cfs_rq) XXX Sorry, this doesn't belong here since the macro is only introduced in 3/3. Should be 'root_cfs_rq->curr != NULL' here instead. [...]
Hi Dietmar, On 1 June 2016 at 21:39, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote: > Since task utilization is accrued only on the root cfs_rq, there are a > couple of places where the se has to be synced with the root cfs_rq: > > (1) The root cfs_rq has to be updated in attach_entity_load_avg() for > an se representing a task in a tg other than the root tg before > the se utilization can be added to it. > > (2) The last_update_time value of the root cfs_rq can be higher > than the one of the cfs_rq the se is enqueued in. Call > __update_load_avg() on the se with the last_update_time value of > the root cfs_rq before removing se's utilization from the root > cfs_rq in [remove|detach]_entity_load_avg(). > > In case the difference between the last_update_time value of the cfs_rq > and the root cfs_rq is smaller than 1024ns, the additional calls to > __update_load_avg() will bail early. > > Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> > --- > kernel/sched/fair.c | 21 +++++++++++++++++++-- > 1 file changed, 19 insertions(+), 2 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 212becd3708f..3ae8e79fb687 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -2970,6 +2970,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) > > static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) > { > + struct cfs_rq* root_cfs_rq; > + > if (!sched_feat(ATTACH_AGE_LOAD)) > goto skip_aging; > > @@ -2995,8 +2997,16 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s > if (!entity_is_task(se)) > return; > > - rq_of(cfs_rq)->cfs.avg.util_avg += se->avg.util_avg; > - rq_of(cfs_rq)->cfs.avg.util_sum += se->avg.util_sum; > + root_cfs_rq = &rq_of(cfs_rq)->cfs; > + > + if (parent_entity(se)) > + __update_load_avg(cfs_rq_clock_task(root_cfs_rq), > + cpu_of(rq_of(root_cfs_rq)), &root_cfs_rq->avg, > + scale_load_down(root_cfs_rq->load.weight), > + upd_util_cfs_rq(root_cfs_rq), root_cfs_rq); > + > + root_cfs_rq->avg.util_avg += se->avg.util_avg; > + root_cfs_rq->avg.util_sum += se->avg.util_sum; The main issue with flat utilization is that we can't keep the sched_avg on an sched_entity synced (from a last_update_time pov) with both the cfs_rq on which load is attached and the root_cfs rq on which the utilization is attached. With this additional sync to root cfs_rq in attach/detach_entity_load_avg and in remove_entity_load_avg, the load of a sched_entity is no more synced to the time stamp of cfs_rq onto which it is attached. This can generate several wrong update of the load of the latter. As an example, lets take a task TA that sleeps and move it on TGB which has not run recently so TGB.avg.last_update_time << root cfs_rq.avg.last_update_time (a decay of 20ms remove 35% of the load) When we attach TA to TGB, TA is sync with TGB for attaching it and then decayed to be synced with root cfs_rq. If TA is then moved to another task group, we try to sync TA to TGB but TA is in the future so TA.avg.last_update_time is set to TGB one. Then, TA load is removed to TGB but TA load has been decayed so only a part will be effectively subtracted. Then, TA load is synced with root cfs_rq which means decayed one more time for the same time slot because TA.avg.last_update_time has been reset to TGB.avg.last_update_time so we will substract less utilization than what we should in root cfs_rq. I think that similar behavior can apply with the removed load. > > cfs_rq_util_change(cfs_rq); > } > @@ -3013,6 +3023,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s > if (!entity_is_task(se)) > return; > > + __update_load_avg(rq_of(cfs_rq)->cfs.avg.last_update_time, cpu_of(rq_of(cfs_rq)), > + &se->avg, se->on_rq * scale_load_down(se->load.weight), > + cfs_rq->curr == se, NULL); > + > rq_of(cfs_rq)->cfs.avg.util_avg = > max_t(long, rq_of(cfs_rq)->cfs.avg.util_avg - se->avg.util_avg, 0); > rq_of(cfs_rq)->cfs.avg.util_sum = > @@ -3105,6 +3119,9 @@ void remove_entity_load_avg(struct sched_entity *se) > if (!entity_is_task(se)) > return; > > + last_update_time = cfs_rq_last_update_time(&rq_of(cfs_rq)->cfs); > + > + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); > atomic_long_add(se->avg.util_avg, &rq_of(cfs_rq)->cfs.removed_util_avg); > } > > -- > 1.9.1 >
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 212becd3708f..3ae8e79fb687 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2970,6 +2970,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { + struct cfs_rq* root_cfs_rq; + if (!sched_feat(ATTACH_AGE_LOAD)) goto skip_aging; @@ -2995,8 +2997,16 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s if (!entity_is_task(se)) return; - rq_of(cfs_rq)->cfs.avg.util_avg += se->avg.util_avg; - rq_of(cfs_rq)->cfs.avg.util_sum += se->avg.util_sum; + root_cfs_rq = &rq_of(cfs_rq)->cfs; + + if (parent_entity(se)) + __update_load_avg(cfs_rq_clock_task(root_cfs_rq), + cpu_of(rq_of(root_cfs_rq)), &root_cfs_rq->avg, + scale_load_down(root_cfs_rq->load.weight), + upd_util_cfs_rq(root_cfs_rq), root_cfs_rq); + + root_cfs_rq->avg.util_avg += se->avg.util_avg; + root_cfs_rq->avg.util_sum += se->avg.util_sum; cfs_rq_util_change(cfs_rq); } @@ -3013,6 +3023,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s if (!entity_is_task(se)) return; + __update_load_avg(rq_of(cfs_rq)->cfs.avg.last_update_time, cpu_of(rq_of(cfs_rq)), + &se->avg, se->on_rq * scale_load_down(se->load.weight), + cfs_rq->curr == se, NULL); + rq_of(cfs_rq)->cfs.avg.util_avg = max_t(long, rq_of(cfs_rq)->cfs.avg.util_avg - se->avg.util_avg, 0); rq_of(cfs_rq)->cfs.avg.util_sum = @@ -3105,6 +3119,9 @@ void remove_entity_load_avg(struct sched_entity *se) if (!entity_is_task(se)) return; + last_update_time = cfs_rq_last_update_time(&rq_of(cfs_rq)->cfs); + + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); atomic_long_add(se->avg.util_avg, &rq_of(cfs_rq)->cfs.removed_util_avg); }
Since task utilization is accrued only on the root cfs_rq, there are a couple of places where the se has to be synced with the root cfs_rq: (1) The root cfs_rq has to be updated in attach_entity_load_avg() for an se representing a task in a tg other than the root tg before the se utilization can be added to it. (2) The last_update_time value of the root cfs_rq can be higher than the one of the cfs_rq the se is enqueued in. Call __update_load_avg() on the se with the last_update_time value of the root cfs_rq before removing se's utilization from the root cfs_rq in [remove|detach]_entity_load_avg(). In case the difference between the last_update_time value of the cfs_rq and the root cfs_rq is smaller than 1024ns, the additional calls to __update_load_avg() will bail early. Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> --- kernel/sched/fair.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) -- 1.9.1