[v2,1/2] sched: force update of blocked load of idle cpus

Message ID	20171201180157.18937-2-brendan.jackman@arm.com
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; From: Brendan Jackman <brendan.jackman@arm.com> To: Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, linux-kernel@vger.kernel.org Cc: Ingo Molnar <mingo@redhat.com>, Morten Rasmussen <morten.rasmussen@arm.com> Subject: [PATCH v2 1/2] sched: force update of blocked load of idle cpus Date: Fri, 1 Dec 2017 18:01:56 +0000 Message-Id: <20171201180157.18937-2-brendan.jackman@arm.com> In-Reply-To: <20171201180157.18937-1-brendan.jackman@arm.com> References: <20171201180157.18937-1-brendan.jackman@arm.com> Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk
Series	[v2,1/2] sched: force update of blocked load of idle cpus \| expand [v2,1/2] sched: force update of blocked load of idle cpus

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4037e19bbca2..f83e8f0d4f06 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6258,6 +6258,9 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) return min_cap * 1024 < task_util(p) * capacity_margin; } +static inline bool nohz_kick_needed(struct rq *rq, bool only_update); +static void nohz_balancer_kick(bool only_update); + /* * select_task_rq_fair: Select target runqueue for the waking task in domains * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, @@ -6334,6 +6337,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } rcu_read_unlock(); +#ifdef CONFIG_NO_HZ_COMMON + if (nohz_kick_needed(cpu_rq(new_cpu), true)) + nohz_balancer_kick(true); +#endif + return new_cpu; } @@ -8927,6 +8935,7 @@ static struct { cpumask_var_t idle_cpus_mask; atomic_t nr_cpus; unsigned long next_balance; /* in jiffy units */ + unsigned long next_update; /* in jiffy units */ } nohz ____cacheline_aligned; static inline int find_new_ilb(void) @@ -8944,7 +8953,7 @@ static inline int find_new_ilb(void) * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle * CPU (if there is one). */ -static void nohz_balancer_kick(void) +static void nohz_balancer_kick(bool only_update) { int ilb_cpu; @@ -8955,8 +8964,20 @@ static void nohz_balancer_kick(void) if (ilb_cpu >= nr_cpu_ids) return; - if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(ilb_cpu))) + if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(ilb_cpu))) { + if (!only_update) { + /* + * There's a pending/ongoing nohz kick/balance. If it's + * just for stats, convert it to a proper load balance. + */ + clear_bit(NOHZ_STATS_KICK, nohz_flags(ilb_cpu)); + } return; + } + + if (only_update) + set_bit(NOHZ_STATS_KICK, nohz_flags(ilb_cpu)); + /* * Use smp_send_reschedule() instead of resched_cpu(). * This way we generate a sched IPI on the target cpu which @@ -9044,6 +9065,8 @@ void nohz_balance_enter_idle(int cpu) atomic_inc(&nohz.nr_cpus); set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); } +#else +static inline void nohz_balancer_kick(bool only_update) {} #endif static DEFINE_SPINLOCK(balancing); @@ -9075,8 +9098,6 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) int need_serialize, need_decay = 0; u64 max_cost = 0; - update_blocked_averages(cpu); - rcu_read_lock(); for_each_domain(cpu, sd) { /* @@ -9167,6 +9188,13 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) } #ifdef CONFIG_NO_HZ_COMMON + +/* True iff @cpu was kicked for nohz balance, but just to update blocked load */ +static inline bool in_nohz_stats_kick(int cpu) +{ + return test_bit(NOHZ_STATS_KICK, nohz_flags(cpu)); +} + /* * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. @@ -9175,6 +9203,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { int this_cpu = this_rq->cpu; struct rq *rq; + struct sched_domain *sd; int balance_cpu; /* Earliest time when we have to do rebalance again */ unsigned long next_balance = jiffies + 60*HZ; @@ -9184,6 +9213,23 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu))) goto end; + /* + * This cpu is going to update the blocked load of idle CPUs either + * before doing a rebalancing or just to keep metrics up to date. we + * can safely update the next update timestamp + */ + rcu_read_lock(); + sd = rcu_dereference(this_rq->sd); + /* + * Check whether there is a sched_domain available for this cpu. + * The last other cpu can have been unplugged since the ILB has been + * triggered and the sched_domain can now be null. The idle balance + * sequence will quickly be aborted as there is no more idle CPUs + */ + if (sd) + nohz.next_update = jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD); + rcu_read_unlock(); + for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { if (balance_cpu == this_cpu || !idle_cpu(balance_cpu)) continue; @@ -9210,7 +9256,15 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) cpu_load_update_idle(rq); rq_unlock_irq(rq, &rf); - rebalance_domains(rq, CPU_IDLE); + update_blocked_averages(balance_cpu); + /* + * This idle load balance softirq may have been + * triggered only to update the blocked load and shares + * of idle CPUs (which we have just done for + * balance_cpu). In that case skip the actual balance. + */ + if (!in_nohz_stats_kick(this_cpu)) + rebalance_domains(rq, idle); } if (time_after(next_balance, rq->next_balance)) { @@ -9241,7 +9295,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * domain span are idle. */ -static inline bool nohz_kick_needed(struct rq *rq) +static inline bool nohz_kick_needed(struct rq *rq, bool only_update) { unsigned long now = jiffies; struct sched_domain_shared *sds; @@ -9249,7 +9303,7 @@ static inline bool nohz_kick_needed(struct rq *rq) int nr_busy, i, cpu = rq->cpu; bool kick = false; - if (unlikely(rq->idle_balance)) + if (unlikely(rq->idle_balance) && !only_update) return false; /* @@ -9266,6 +9320,9 @@ static inline bool nohz_kick_needed(struct rq *rq) if (likely(!atomic_read(&nohz.nr_cpus))) return false; + if (only_update) + return time_after_eq(now, nohz.next_update); + if (time_before(now, nohz.next_balance)) return false; @@ -9313,8 +9370,11 @@ static inline bool nohz_kick_needed(struct rq *rq) rcu_read_unlock(); return kick; } + #else +static inline bool in_nohz_stats_kick(int cpu) { return false; } static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { } +static inline bool nohz_kick_needed(struct rq *rq, bool only_update) { return false; } #endif /* @@ -9336,7 +9396,12 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) * and abort nohz_idle_balance altogether if we pull some load. */ nohz_idle_balance(this_rq, idle); - rebalance_domains(this_rq, idle); + update_blocked_averages(this_rq->cpu); + if (!in_nohz_stats_kick(this_rq->cpu)) + rebalance_domains(this_rq, idle); +#ifdef CONFIG_NO_HZ_COMMON + clear_bit(NOHZ_STATS_KICK, nohz_flags(this_rq->cpu)); +#endif } /* @@ -9351,8 +9416,8 @@ void trigger_load_balance(struct rq *rq) if (time_after_eq(jiffies, rq->next_balance)) raise_softirq(SCHED_SOFTIRQ); #ifdef CONFIG_NO_HZ_COMMON - if (nohz_kick_needed(rq)) - nohz_balancer_kick(); + if (nohz_kick_needed(rq, false)) + nohz_balancer_kick(false); #endif } @@ -9927,6 +9992,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; + nohz.next_update = jiffies; zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); #endif #endif /* SMP */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b19552a212de..a7e48b6ee68c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2014,6 +2014,7 @@ extern void cfs_bandwidth_usage_dec(void); enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, + NOHZ_STATS_KICK }; #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)

[v2,1/2] sched: force update of blocked load of idle cpus

Commit Message

Comments

Patch