[6/7] sched/rt: make it configurable

Message ID	20170529210302.26868-7-nicolas.pitre@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; From: Nicolas Pitre <nicolas.pitre@linaro.org> To: Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org> Cc: linux-kernel@vger.kernel.org Subject: [PATCH 6/7] sched/rt: make it configurable Date: Mon, 29 May 2017 17:03:01 -0400 Message-Id: <20170529210302.26868-7-nicolas.pitre@linaro.org> In-Reply-To: <20170529210302.26868-1-nicolas.pitre@linaro.org> References: <20170529210302.26868-1-nicolas.pitre@linaro.org> Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk
Series	scheduler tinification \| expand [0/7] scheduler tinification [1/7] cpuset/sched: cpuset makes sense for SMP only [2/7] sched: omit stop_sched_class when !SMP [3/7] sched/deadline: move dl related code out of sched/core.c [4/7] sched/deadline: make it configurable [5/7] sched/rt: move rt related code out of sched/core.c [6/7] sched/rt: make it configurable [7/7] rtmutex: compatibility with CONFIG_SCHED_RT=n

diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e049526bc1..6befc0aa61 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -225,6 +225,16 @@ extern struct cred init_cred; #define INIT_TASK_SECURITY #endif +#ifdef CONFIG_SCHED_RT +#define INIT_TASK_RT(tsk) \ + .rt = { \ + .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ + .time_slice = RR_TIMESLICE, \ + }, +#else +#define INIT_TASK_RT(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -250,10 +260,7 @@ extern struct cred init_cred; .se = { \ .group_node = LIST_HEAD_INIT(tsk.se.group_node), \ }, \ - .rt = { \ - .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ - .time_slice = RR_TIMESLICE, \ - }, \ + INIT_TASK_RT(tsk) \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ INIT_CGROUP_SCHED(tsk) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index ba0c203669..71a43480ed 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -518,7 +518,9 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; +#ifdef CONFIG_SCHED_RT struct sched_rt_entity rt; +#endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index f93329aba3..f2d636582d 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -7,7 +7,7 @@ struct task_struct; static inline int rt_prio(int prio) { - if (unlikely(prio < MAX_RT_PRIO)) + if (IS_ENABLED(CONFIG_SCHED_RT) && unlikely(prio < MAX_RT_PRIO)) return 1; return 0; } @@ -17,7 +17,7 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } -#ifdef CONFIG_RT_MUTEXES +#if defined(CONFIG_RT_MUTEXES) && defined(CONFIG_SCHED_RT) /* * Must hold either p->pi_lock or task_rq(p)->lock. */ diff --git a/init/Kconfig b/init/Kconfig index f73e3f0940..3bcd49f576 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -687,7 +687,7 @@ config TREE_RCU_TRACE config RCU_BOOST bool "Enable RCU priority boosting" - depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT + depends on SCHED_RT && RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT default n help This option boosts the priority of preempted RCU readers that @@ -1090,7 +1090,7 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" - depends on CGROUP_SCHED + depends on CGROUP_SCHED && SCHED_RT default n help This feature lets you explicitly allocate real CPU bandwidth @@ -1303,8 +1303,17 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. +config SCHED_RT + bool "Real Time Task Scheduling" if EXPERT + default y + help + This adds the sched_rt scheduling class to the kernel providing + support for the SCHED_FIFO and SCHED_RR policies. You might want + to disable this to reduce the kernel size. If unsure say y. + config SCHED_DL bool "Deadline Task Scheduling" if EXPERT + depends on SCHED_RT default y help This adds the sched_dl scheduling class to the kernel providing @@ -1632,6 +1641,7 @@ config BASE_FULL config FUTEX bool "Enable futex support" if EXPERT default y + depends on SCHED_RT select RT_MUTEXES help Disabling this option will cause the kernel to be built without diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 3bd6a7c1cc..bccbef85e5 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -16,8 +16,8 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer endif obj-y += core.o loadavg.o clock.o cputime.o -obj-y += wait.o swait.o completion.o idle.o -obj-y += idle_task.o fair.o rt.o +obj-y += wait.o swait.o completion.o idle.o idle_task.o fair.o +obj-$(CONFIG_SCHED_RT) += rt.o obj-$(CONFIG_SCHED_DL) += deadline.o $(if $(CONFIG_SMP),cpudeadline.o) obj-$(CONFIG_SMP) += cpupri.o topology.o stop_task.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a7b004e440..3dd6fce750 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -640,6 +640,7 @@ bool sched_can_stop_tick(struct rq *rq) return false; #endif +#ifdef CONFIG_SCHED_RT /* * If there are more than one RR tasks, we need the tick to effect the * actual RR behaviour. @@ -658,6 +659,7 @@ bool sched_can_stop_tick(struct rq *rq) fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; if (fifo_nr_running) return true; +#endif /* * If there are no DL,RR/FIFO tasks, there must only be CFS tasks left; @@ -1586,7 +1588,7 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) * Reset it back to a normal scheduling class so that * it can die in pieces. */ - old_stop->sched_class = &rt_sched_class; + old_stop->sched_class = stop_sched_class.next; } } @@ -2182,11 +2184,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) __dl_clear_params(p); #endif +#ifdef CONFIG_SCHED_RT INIT_LIST_HEAD(&p->rt.run_list); p->rt.timeout = 0; p->rt.time_slice = sched_rr_timeslice; p->rt.on_rq = 0; p->rt.on_list = 0; +#endif #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); @@ -3716,13 +3720,18 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) p->sched_class = &dl_sched_class; } else #endif +#ifdef CONFIG_SCHED_RT if (rt_prio(prio)) { if (oldprio < prio) queue_flag |= ENQUEUE_HEAD; p->sched_class = &rt_sched_class; - } else { + } else +#endif + { +#ifdef CONFIG_SCHED_RT if (rt_prio(oldprio)) p->rt.timeout = 0; +#endif p->sched_class = &fair_sched_class; } @@ -3997,6 +4006,23 @@ static int __sched_setscheduler(struct task_struct *p, /* May grab non-irq protected spin_locks: */ BUG_ON(in_interrupt()); + + /* + * When the RT scheduling class is disabled, let's make sure kernel threads + * wanting RT still get lowest nice value to give them highest available + * priority rather than simply returning an error. Obviously we can't test + * rt_policy() here as it is always false in that case. + */ + if (!IS_ENABLED(CONFIG_SCHED_RT) && !user && + (policy == SCHED_FIFO || policy == SCHED_RR)) { + static const struct sched_attr k_attr = { + .sched_policy = SCHED_NORMAL, + .sched_nice = MIN_NICE, + }; + attr = &k_attr; + policy = SCHED_NORMAL; + } + recheck: /* Double check policy once rq lock held: */ if (policy < 0) { @@ -5726,7 +5752,9 @@ void __init sched_init_smp(void) sched_init_granularity(); free_cpumask_var(non_isolated_cpus); +#ifdef CONFIG_SCHED_RT init_sched_rt_class(); +#endif #ifdef CONFIG_SCHED_DL init_sched_dl_class(); #endif @@ -5832,7 +5860,9 @@ void __init sched_init(void) } #endif /* CONFIG_CPUMASK_OFFSTACK */ +#ifdef CONFIG_SCHED_RT init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime()); +#endif #ifdef CONFIG_SCHED_DL init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime()); #endif @@ -5864,7 +5894,10 @@ void __init sched_init(void) rq->calc_load_active = 0; rq->calc_load_update = jiffies + LOAD_FREQ; init_cfs_rq(&rq->cfs); +#ifdef CONFIG_SCHED_RT init_rt_rq(&rq->rt); + rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; +#endif #ifdef CONFIG_SCHED_DL init_dl_rq(&rq->dl); #endif @@ -5895,7 +5928,6 @@ void __init sched_init(void) init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); #endif /* CONFIG_FAIR_GROUP_SCHED */ - rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; #ifdef CONFIG_RT_GROUP_SCHED init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); #endif @@ -6132,7 +6164,9 @@ static DEFINE_SPINLOCK(task_group_lock); static void sched_free_group(struct task_group *tg) { free_fair_sched_group(tg); +#ifdef CONFIG_SCHED_RT free_rt_sched_group(tg); +#endif autogroup_free(tg); kmem_cache_free(task_group_cache, tg); } @@ -6149,8 +6183,10 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_fair_sched_group(tg, parent)) goto err; +#ifdef CONFIG_SCHED_RT if (!alloc_rt_sched_group(tg, parent)) goto err; +#endif return tg; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 84f80a81ab..c550723ce9 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -645,7 +645,9 @@ do { \ spin_lock_irqsave(&sched_debug_lock, flags); print_cfs_stats(m, cpu); +#ifdef CONFIG_SCHED_RT print_rt_stats(m, cpu); +#endif #ifdef CONFIG_SCHED_DL print_dl_stats(m, cpu); #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 41dc10b707..38439eefd3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -132,7 +132,8 @@ static inline int fair_policy(int policy) static inline int rt_policy(int policy) { - return policy == SCHED_FIFO || policy == SCHED_RR; + return IS_ENABLED(CONFIG_SCHED_RT) && + (policy == SCHED_FIFO || policy == SCHED_RR); } static inline int dl_policy(int policy) @@ -1447,8 +1448,10 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr) #define sched_class_highest (&stop_sched_class) #elif defined(CONFIG_SCHED_DL) #define sched_class_highest (&dl_sched_class) -#else +#elif defined(CONFIG_SCHED_RT) #define sched_class_highest (&rt_sched_class) +#else +#define sched_class_highest (&fair_sched_class) #endif #define for_each_class(class) \ diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 5632dc3e63..7cad8c1540 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -112,8 +112,10 @@ static void update_curr_stop(struct rq *rq) const struct sched_class stop_sched_class = { #ifdef CONFIG_SCHED_DL .next = &dl_sched_class, -#else +#elif defined(CONFIG_SCHED_RT) .next = &rt_sched_class, +#else + .next = &fair_sched_class, #endif .enqueue_task = enqueue_task_stop, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4dfba1a76c..1c670f4053 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -401,6 +401,7 @@ static struct ctl_table kern_table[] = { }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ +#ifdef CONFIG_SCHED_RT { .procname = "sched_rt_period_us", .data = &sysctl_sched_rt_period, @@ -422,6 +423,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_rr_handler, }, +#endif #ifdef CONFIG_SCHED_AUTOGROUP { .procname = "sched_autogroup_enabled", @@ -1071,7 +1073,7 @@ static struct ctl_table kern_table[] = { .extra1 = &neg_one, }, #endif -#ifdef CONFIG_RT_MUTEXES +#if defined(CONFIG_RT_MUTEXES) && defined(CONFIG_SCHED_RT) { .procname = "max_lock_depth", .data = &max_lock_depth, diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index d2a1e6dd02..010efb0e91 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -790,10 +790,12 @@ static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { struct list_head *timers = tsk->cpu_timers; - struct signal_struct *const sig = tsk->signal; struct task_cputime *tsk_expires = &tsk->cputime_expires; u64 expires; +#ifdef CONFIG_SCHED_RT + struct signal_struct *const sig = tsk->signal; unsigned long soft; +#endif /* * If cputime_expires is zero, then there are no active @@ -811,6 +813,7 @@ static void check_thread_timers(struct task_struct *tsk, tsk_expires->sched_exp = check_timers_list(++timers, firing, tsk->se.sum_exec_runtime); +#ifdef CONFIG_SCHED_RT /* * Check for the special case thread timers. */ @@ -847,6 +850,7 @@ static void check_thread_timers(struct task_struct *tsk, __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } +#endif if (task_cputime_zero(tsk_expires)) tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); }

[6/7] sched/rt: make it configurable

Commit Message

Comments

Patch