Message ID | 20160617120454.080767343@infradead.org |
---|---|
State | New |
Headers | show |
On 17 June 2016 at 14:01, Peter Zijlstra <peterz@infradead.org> wrote: > From: Vincent Guittot <vincent.guittot@linaro.org> > > A new fair task is detached and attached from/to task_group with: > > cgroup_post_fork() > ss->fork(child) := cpu_cgroup_fork() > sched_move_task() > task_move_group_fair() > > Which is wrong, because at this point in fork() the task isn't fully > initialized and it cannot 'move' to another group, because its not > attached to any group as yet. > > In fact, cpu_cgroup_fork needs a small part of sched_move_task so we > can just call this small part directly instead sched_move_task. And > the task doesn't really migrate because it is not yet attached so we > need the sequence: > > do_fork() > sched_fork() > __set_task_cpu() > > cgroup_post_fork() > set_task_rq() # set task group and runqueue > > wake_up_new_task() > select_task_rq() can select a new cpu > __set_task_cpu > post_init_entity_util_avg > attach_task_cfs_rq() > activate_task > enqueue_task > > This patch makes that happen. > With this patch and patch 1, the fork sequence looks correct in my test > Maybe-Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> You can remove the Maybe if you want > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > kernel/sched/core.c | 67 ++++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 47 insertions(+), 20 deletions(-) > > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -7743,27 +7743,17 @@ void sched_offline_group(struct task_gro > spin_unlock_irqrestore(&task_group_lock, flags); > } > > -/* change task's runqueue when it moves between groups. > - * The caller of this function should have put the task in its new group > - * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to > - * reflect its new group. > +/* > + * Set task's runqueue and group. > + * > + * In case of a move between group, we update src and dst group thanks to > + * sched_class->task_move_group. Otherwise, we just need to set runqueue and > + * group pointers. The task will be attached to the runqueue during its wake > + * up. > */ > -void sched_move_task(struct task_struct *tsk) > +static void sched_set_group(struct task_struct *tsk, bool move) > { > struct task_group *tg; > - int queued, running; > - struct rq_flags rf; > - struct rq *rq; > - > - rq = task_rq_lock(tsk, &rf); > - > - running = task_current(rq, tsk); > - queued = task_on_rq_queued(tsk); > - > - if (queued) > - dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); > - if (unlikely(running)) > - put_prev_task(rq, tsk); > > /* > * All callers are synchronized by task_rq_lock(); we do not use RCU > @@ -7776,11 +7766,37 @@ void sched_move_task(struct task_struct > tsk->sched_task_group = tg; > > #ifdef CONFIG_FAIR_GROUP_SCHED > - if (tsk->sched_class->task_move_group) > + if (move && tsk->sched_class->task_move_group) > tsk->sched_class->task_move_group(tsk); > else > #endif > set_task_rq(tsk, task_cpu(tsk)); > +} > + > +/* > + * Change task's runqueue when it moves between groups. > + * > + * The caller of this function should have put the task in its new group by > + * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect > + * its new group. > + */ > +void sched_move_task(struct task_struct *tsk) > +{ > + int queued, running; > + struct rq_flags rf; > + struct rq *rq; > + > + rq = task_rq_lock(tsk, &rf); > + > + running = task_current(rq, tsk); > + queued = task_on_rq_queued(tsk); > + > + if (queued) > + dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); > + if (unlikely(running)) > + put_prev_task(rq, tsk); > + > + sched_set_group(tsk, true); > > if (unlikely(running)) > tsk->sched_class->set_curr_task(rq); > @@ -8208,9 +8224,20 @@ static void cpu_cgroup_css_free(struct c > sched_free_group(tg); > } > > +/* > + * This is called before wake_up_new_task(), therefore we really only > + * have to set its group bits, all the other stuff does not apply. > + */ > static void cpu_cgroup_fork(struct task_struct *task) > { > - sched_move_task(task); > + struct rq_flags rf; > + struct rq *rq; > + > + rq = task_rq_lock(task, &rf); > + > + sched_set_group(task, false); > + > + task_rq_unlock(rq, task, &rf); > } > > static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) > >
--- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7743,27 +7743,17 @@ void sched_offline_group(struct task_gro spin_unlock_irqrestore(&task_group_lock, flags); } -/* change task's runqueue when it moves between groups. - * The caller of this function should have put the task in its new group - * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to - * reflect its new group. +/* + * Set task's runqueue and group. + * + * In case of a move between group, we update src and dst group thanks to + * sched_class->task_move_group. Otherwise, we just need to set runqueue and + * group pointers. The task will be attached to the runqueue during its wake + * up. */ -void sched_move_task(struct task_struct *tsk) +static void sched_set_group(struct task_struct *tsk, bool move) { struct task_group *tg; - int queued, running; - struct rq_flags rf; - struct rq *rq; - - rq = task_rq_lock(tsk, &rf); - - running = task_current(rq, tsk); - queued = task_on_rq_queued(tsk); - - if (queued) - dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); - if (unlikely(running)) - put_prev_task(rq, tsk); /* * All callers are synchronized by task_rq_lock(); we do not use RCU @@ -7776,11 +7766,37 @@ void sched_move_task(struct task_struct tsk->sched_task_group = tg; #ifdef CONFIG_FAIR_GROUP_SCHED - if (tsk->sched_class->task_move_group) + if (move && tsk->sched_class->task_move_group) tsk->sched_class->task_move_group(tsk); else #endif set_task_rq(tsk, task_cpu(tsk)); +} + +/* + * Change task's runqueue when it moves between groups. + * + * The caller of this function should have put the task in its new group by + * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect + * its new group. + */ +void sched_move_task(struct task_struct *tsk) +{ + int queued, running; + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(tsk, &rf); + + running = task_current(rq, tsk); + queued = task_on_rq_queued(tsk); + + if (queued) + dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); + if (unlikely(running)) + put_prev_task(rq, tsk); + + sched_set_group(tsk, true); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); @@ -8208,9 +8224,20 @@ static void cpu_cgroup_css_free(struct c sched_free_group(tg); } +/* + * This is called before wake_up_new_task(), therefore we really only + * have to set its group bits, all the other stuff does not apply. + */ static void cpu_cgroup_fork(struct task_struct *task) { - sched_move_task(task); + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(task, &rf); + + sched_set_group(task, false); + + task_rq_unlock(rq, task, &rf); } static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)