diff mbox

[RFC] hrtimers: system-wide and per-task hrtimer slacks

Message ID 1330418777-29597-1-git-send-email-dmitry.antipov@linaro.org
State New
Headers show

Commit Message

Dmitry Antipov Feb. 28, 2012, 8:46 a.m. UTC
This patch proposes a system-wide sysctl-aware default for the
high-resolution timer slack value, which may be changed from 0
to HRTIMER_MAX_SLACK nanoseconds. Default system-wide and per-task
values are HRTIMER_DEFAULT_SLACK. Per-task value isn't inherited
across fork(); instead, newborn task uses system-wide value by
default, and newborn thread uses it's group leader value.

Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
 Documentation/sysctl/kernel.txt |    8 ++++++++
 include/linux/hrtimer.h         |   11 +++++++++++
 include/linux/init_task.h       |    2 +-
 include/linux/sched.h           |   11 ++++++++---
 kernel/fork.c                   |    9 +++++++--
 kernel/futex.c                  |    4 ++--
 kernel/hrtimer.c                |   10 +++++++---
 kernel/sys.c                    |    8 +++++---
 kernel/sysctl.c                 |   10 ++++++++++
 9 files changed, 59 insertions(+), 14 deletions(-)
diff mbox

Patch

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 6d78841..83b63ed 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -606,6 +606,14 @@  can be ORed together:
 
 ==============================================================
 
+timer_slack:
+
+This value can be used to query and set the default slack for
+high-resolution timers, in nanoseconds. The default value is 50
+microseconds, and can be changed from 0 nanoseconds to 1 millisecond.
+
+==============================================================
+
 unknown_nmi_panic:
 
 The value in this file affects behavior of handling NMI. When the
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0dc30..b9da137 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -24,6 +24,16 @@ 
 #include <linux/timer.h>
 #include <linux/timerqueue.h>
 
+/*
+ * Default system-wide and per-task hrtimer slack, in nanoseconds.
+ */
+#define HRTIMER_DEFAULT_SLACK 50000
+
+/*
+ * Reasonable limit for hrtimer slack, in nanoseconds.
+ */
+#define HRTIMER_MAX_SLACK 1000000
+
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
 
@@ -323,6 +333,7 @@  extern ktime_t ktime_get_monotonic_offset(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
+extern int default_timer_slack_ns;
 
 /* Exported timer functions: */
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9c66b1a..b29be0d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -178,7 +178,7 @@  extern struct cred init_cred;
 	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
-	.timer_slack_ns = 50000, /* 50 usec default slack */		\
+	.timer_slack_ns = HRTIMER_DEFAULT_SLACK,			\
 	.pids = {							\
 		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),		\
 		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),		\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..aa0a806 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1551,11 +1551,11 @@  struct task_struct {
 	struct latency_record latency_record[LT_SAVECOUNT];
 #endif
 	/*
-	 * time slack values; these are used to round up poll() and
-	 * select() etc timeout values. These are in nanoseconds.
+	 * High-resolution timer slack value, in nanoseconds.
+	 * Used to round up poll()/select(), nanosleep, futex
+	 * waiting, etc. timeout values of non-realtime tasks.
 	 */
 	unsigned long timer_slack_ns;
-	unsigned long default_timer_slack_ns;
 
 	struct list_head	*scm_work_list;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -2628,6 +2628,11 @@  static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
+static inline unsigned long task_timer_slack(struct task_struct *tsk)
+{
+	return rt_task(tsk) ? 0 : tsk->timer_slack_ns;
+}
+
 /*
  * Thread group CPU time accounting.
  */
diff --git a/kernel/fork.c b/kernel/fork.c
index e2cd3e2..0f9a983 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1167,8 +1167,13 @@  static struct task_struct *copy_process(unsigned long clone_flags,
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
-
-	p->default_timer_slack_ns = current->timer_slack_ns;
+	/* 
+	 * New thread inherits the slack from the group
+	 * leader. New process uses system-default slack.
+	 */
+	p->timer_slack_ns = (clone_flags & CLONE_THREAD) ?
+		current->group_leader->timer_slack_ns :
+		default_timer_slack_ns;
 
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
diff --git a/kernel/futex.c b/kernel/futex.c
index 1614be2..a0d302d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1887,7 +1887,7 @@  static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
 				      HRTIMER_MODE_ABS);
 		hrtimer_init_sleeper(to, current);
 		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
-					     current->timer_slack_ns);
+					     task_timer_slack(current));
 	}
 
 retry:
@@ -2281,7 +2281,7 @@  static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 				      HRTIMER_MODE_ABS);
 		hrtimer_init_sleeper(to, current);
 		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
-					     current->timer_slack_ns);
+					     task_timer_slack(current));
 	}
 
 	/*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ae34bf5..0c56fec 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -51,6 +51,12 @@ 
 #include <trace/events/timer.h>
 
 /*
+ * Default hrtimer slack value, in nanoseconds. May be changed in
+ * [0..HRTIMER_MAX_SLACK] range through kernel.timer_slack sysctl.
+ */
+__read_mostly int default_timer_slack_ns = HRTIMER_DEFAULT_SLACK;
+
+/*
  * The timer bases:
  *
  * There are more clockids then hrtimer bases. Thus, we index
@@ -1564,9 +1570,7 @@  long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 	int ret = 0;
 	unsigned long slack;
 
-	slack = current->timer_slack_ns;
-	if (rt_task(current))
-		slack = 0;
+	slack = task_timer_slack(current);
 
 	hrtimer_init_on_stack(&t.timer, clockid, mode);
 	hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
diff --git a/kernel/sys.c b/kernel/sys.c
index 4070153..e976540 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -22,6 +22,7 @@ 
 #include <linux/device.h>
 #include <linux/key.h>
 #include <linux/times.h>
+#include <linux/hrtimer.h>
 #include <linux/posix-timers.h>
 #include <linux/security.h>
 #include <linux/dcookies.h>
@@ -1919,10 +1920,11 @@  SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		case PR_SET_TIMERSLACK:
 			if (arg2 <= 0)
 				current->timer_slack_ns =
-					current->default_timer_slack_ns;
-			else
+					default_timer_slack_ns;
+			else if (arg2 <= HRTIMER_MAX_SLACK)
 				current->timer_slack_ns = arg2;
-			error = 0;
+			else
+				error = -EINVAL;
 			break;
 		case PR_MCE_KILL:
 			if (arg4 | arg5)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f487f25..2cd42c6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,6 +136,7 @@  static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
+static const int slack_max = HRTIMER_MAX_SLACK;
 
 #ifdef CONFIG_INOTIFY_USER
 #include <linux/inotify.h>
@@ -1004,6 +1005,15 @@  static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
+	{
+		.procname	= "timer_slack",
+		.data		= &default_timer_slack_ns,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &slack_max,
+	},
 	{ }
 };