diff mbox

[1/3,RFC] devfreq: Core updates to support devices which can idle

Message ID 1345227645-20703-2-git-send-email-rajagopal.venkat@linaro.org
State New
Headers show

Commit Message

rajagopal.venkat@linaro.org Aug. 17, 2012, 6:20 p.m. UTC
Prepare devfreq core framework to support devices which
can idle. When device idleness is detected perhaps through
runtime-pm, need some mechanism to suspend devfreq load
monitoring and resume when device is back online. Present
code continues monitoring unless device is removed from
devfreq core.

This patch introduces following updates,

- move device load monitoring logic to ondemand governor as
  it is specific to ondemand.
- devfreq core interacts with governors via events to perform
  specific actions. These events include start/stop devfreq,
  and frequency limit changes outside devfreq. This sets ground
  for adding suspend/resume events.
- use per device work instead of global work to monitor device
  load. This enables suspend/resume of device devfreq and
  reduces monitoring code complexity.
- Force devfreq users to set min/max supported frequencies in
  device profile to help governors to predict target frequecy
  with in limits.

The devfreq apis are not modified and are kept intact.

Signed-off-by: Rajagopal Venkat <rajagopal.venkat@linaro.org>
---
 Documentation/ABI/testing/sysfs-class-devfreq |  33 +-
 drivers/devfreq/devfreq.c                     | 418 ++++----------------------
 drivers/devfreq/governor.h                    |   6 +-
 drivers/devfreq/governor_performance.c        |  34 +--
 drivers/devfreq/governor_powersave.c          |  31 +-
 drivers/devfreq/governor_simpleondemand.c     | 216 +++++++++++--
 drivers/devfreq/governor_userspace.c          | 142 ++++-----
 include/linux/devfreq.h                       |  56 ++--
 8 files changed, 398 insertions(+), 538 deletions(-)
diff mbox

Patch

diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 23d78b5..c083433 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -21,27 +21,32 @@  Description:
 		The /sys/class/devfreq/.../cur_freq shows the current
 		frequency of the corresponding devfreq object.
 
-What:		/sys/class/devfreq/.../central_polling
+What:		/sys/class/devfreq/.../max_freq
 Date:		September 2011
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
 Description:
-		The /sys/class/devfreq/.../central_polling shows whether
-		the devfreq ojbect is using devfreq-provided central
-		polling mechanism or not.
+		The /sys/class/devfreq/.../max_freq shows the current
+		max frequency of the corresponding devfreq object. This
+		max frequency is guaranteed to be with in device
+		operating frequency limits.
 
-What:		/sys/class/devfreq/.../polling_interval
+What:		/sys/class/devfreq/.../min_freq
 Date:		September 2011
 Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
 Description:
-		The /sys/class/devfreq/.../polling_interval shows and sets
-		the requested polling interval of the corresponding devfreq
-		object. The values are represented in ms. If the value is
-		less than 1 jiffy, it is considered to be 0, which means
-		no polling. This value is meaningless if the governor is
-		not polling; thus. If the governor is not using
-		devfreq-provided central polling
-		(/sys/class/devfreq/.../central_polling is 0), this value
-		may be useless.
+		The /sys/class/devfreq/.../min_freq shows the current
+		min frequency of the corresponding devfreq object. This
+		min frequency is guaranteed to be with in device
+		operating frequency limits.
+
+What:		/sys/class/devfreq/.../ondemand/polling_interval
+Date:		September 2011
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+		The /sys/class/devfreq/.../ondemand/polling_interval shows
+		and sets the requested polling interval of the corresponding
+		devfreq	object if ondemand governor is in effect. The values
+		are represented in ms.
 
 What:		/sys/class/devfreq/.../userspace/set_freq
 Date:		September 2011
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 70c31d4..5aa23a8 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -11,7 +11,6 @@ 
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -20,28 +19,12 @@ 
 #include <linux/stat.h>
 #include <linux/opp.h>
 #include <linux/devfreq.h>
-#include <linux/workqueue.h>
 #include <linux/platform_device.h>
 #include <linux/list.h>
-#include <linux/printk.h>
-#include <linux/hrtimer.h>
 #include "governor.h"
 
 struct class *devfreq_class;
 
-/*
- * devfreq_work periodically monitors every registered device.
- * The minimum polling interval is one jiffy. The polling interval is
- * determined by the minimum polling period among all polling devfreq
- * devices. The resolution of polling interval is one jiffy.
- */
-static bool polling;
-static struct workqueue_struct *devfreq_wq;
-static struct delayed_work devfreq_work;
-
-/* wait removing if this is to be removed */
-static struct devfreq *wait_remove_device;
-
 /* The list of all device-devfreq */
 static LIST_HEAD(devfreq_list);
 static DEFINE_MUTEX(devfreq_list_lock);
@@ -73,51 +56,15 @@  static struct devfreq *find_device_devfreq(struct device *dev)
 }
 
 /**
- * update_devfreq() - Reevaluate the device and configure frequency.
- * @devfreq:	the devfreq instance.
- *
- * Note: Lock devfreq->lock before calling update_devfreq
- *	 This function is exported for governors.
+ * devfreq_device_target - Set device target frequency.
+ * @devfreq	device devfreq instance
+ * @freq	target freq to be set
+ * @flags	influences how target freq is set
  */
-int update_devfreq(struct devfreq *devfreq)
+int devfreq_device_target(struct devfreq *devfreq,
+				unsigned long freq, u32 flags)
 {
-	unsigned long freq;
-	int err = 0;
-	u32 flags = 0;
-
-	if (!mutex_is_locked(&devfreq->lock)) {
-		WARN(true, "devfreq->lock must be locked by the caller.\n");
-		return -EINVAL;
-	}
-
-	/* Reevaluate the proper frequency */
-	err = devfreq->governor->get_target_freq(devfreq, &freq);
-	if (err)
-		return err;
-
-	/*
-	 * Adjust the freuqency with user freq and QoS.
-	 *
-	 * List from the highest proiority
-	 * max_freq (probably called by thermal when it's too hot)
-	 * min_freq
-	 */
-
-	if (devfreq->min_freq && freq < devfreq->min_freq) {
-		freq = devfreq->min_freq;
-		flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use GLB */
-	}
-	if (devfreq->max_freq && freq > devfreq->max_freq) {
-		freq = devfreq->max_freq;
-		flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use LUB */
-	}
-
-	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
-	if (err)
-		return err;
-
-	devfreq->previous_freq = freq;
-	return err;
+	return devfreq->profile->target(devfreq->dev.parent, &freq, flags);
 }
 
 /**
@@ -133,69 +80,37 @@  static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
 				 void *devp)
 {
 	struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
-	int ret;
-
-	mutex_lock(&devfreq->lock);
-	ret = update_devfreq(devfreq);
-	mutex_unlock(&devfreq->lock);
-
-	return ret;
+	return devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_LIMITS);
 }
 
 /**
- * _remove_devfreq() - Remove devfreq from the device.
+ * _remove_devfreq() - Remove device devfreq from the devfreq list
+ *		and release its resources.
  * @devfreq:	the devfreq struct
  * @skip:	skip calling device_unregister().
- *
- * Note that the caller should lock devfreq->lock before calling
- * this. _remove_devfreq() will unlock it and free devfreq
- * internally. devfreq_list_lock should be locked by the caller
- * as well (not relased at return)
- *
- * Lock usage:
- * devfreq->lock: locked before call.
- *		  unlocked at return (and freed)
- * devfreq_list_lock: locked before call.
- *		      kept locked at return.
- *		      if devfreq is centrally polled.
- *
- * Freed memory:
- * devfreq
  */
 static void _remove_devfreq(struct devfreq *devfreq, bool skip)
 {
-	if (!mutex_is_locked(&devfreq->lock)) {
-		WARN(true, "devfreq->lock must be locked by the caller.\n");
-		return;
-	}
-	if (!devfreq->governor->no_central_polling &&
-	    !mutex_is_locked(&devfreq_list_lock)) {
-		WARN(true, "devfreq_list_lock must be locked by the caller.\n");
+	mutex_lock(&devfreq_list_lock);
+	if (!find_device_devfreq(&devfreq->dev)) {
+		mutex_unlock(&devfreq_list_lock);
+		dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n");
 		return;
 	}
+	list_del(&devfreq->node);
+	mutex_unlock(&devfreq_list_lock);
 
-	if (devfreq->being_removed)
-		return;
-
-	devfreq->being_removed = true;
+	devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_STOP);
 
 	if (devfreq->profile->exit)
 		devfreq->profile->exit(devfreq->dev.parent);
 
-	if (devfreq->governor->exit)
-		devfreq->governor->exit(devfreq);
-
 	if (!skip && get_device(&devfreq->dev)) {
 		device_unregister(&devfreq->dev);
 		put_device(&devfreq->dev);
 	}
 
-	if (!devfreq->governor->no_central_polling)
-		list_del(&devfreq->node);
-
-	mutex_unlock(&devfreq->lock);
 	mutex_destroy(&devfreq->lock);
-
 	kfree(devfreq);
 }
 
@@ -203,137 +118,15 @@  static void _remove_devfreq(struct devfreq *devfreq, bool skip)
  * devfreq_dev_release() - Callback for struct device to release the device.
  * @dev:	the devfreq device
  *
- * This calls _remove_devfreq() if _remove_devfreq() is not called.
+ * This calls _remove_devfreq() to release device and its devfreq.
  * Note that devfreq_dev_release() could be called by _remove_devfreq() as
  * well as by others unregistering the device.
  */
 static void devfreq_dev_release(struct device *dev)
 {
 	struct devfreq *devfreq = to_devfreq(dev);
-	bool central_polling = !devfreq->governor->no_central_polling;
-
-	/*
-	 * If devfreq_dev_release() was called by device_unregister() of
-	 * _remove_devfreq(), we cannot mutex_lock(&devfreq->lock) and
-	 * being_removed is already set. This also partially checks the case
-	 * where devfreq_dev_release() is called from a thread other than
-	 * the one called _remove_devfreq(); however, this case is
-	 * dealt completely with another following being_removed check.
-	 *
-	 * Because being_removed is never being
-	 * unset, we do not need to worry about race conditions on
-	 * being_removed.
-	 */
-	if (devfreq->being_removed)
-		return;
-
-	if (central_polling)
-		mutex_lock(&devfreq_list_lock);
 
-	mutex_lock(&devfreq->lock);
-
-	/*
-	 * Check being_removed flag again for the case where
-	 * devfreq_dev_release() was called in a thread other than the one
-	 * possibly called _remove_devfreq().
-	 */
-	if (devfreq->being_removed) {
-		mutex_unlock(&devfreq->lock);
-		goto out;
-	}
-
-	/* devfreq->lock is unlocked and removed in _removed_devfreq() */
 	_remove_devfreq(devfreq, true);
-
-out:
-	if (central_polling)
-		mutex_unlock(&devfreq_list_lock);
-}
-
-/**
- * devfreq_monitor() - Periodically poll devfreq objects.
- * @work: the work struct used to run devfreq_monitor periodically.
- *
- */
-static void devfreq_monitor(struct work_struct *work)
-{
-	static unsigned long last_polled_at;
-	struct devfreq *devfreq, *tmp;
-	int error;
-	unsigned long jiffies_passed;
-	unsigned long next_jiffies = ULONG_MAX, now = jiffies;
-	struct device *dev;
-
-	/* Initially last_polled_at = 0, polling every device at bootup */
-	jiffies_passed = now - last_polled_at;
-	last_polled_at = now;
-	if (jiffies_passed == 0)
-		jiffies_passed = 1;
-
-	mutex_lock(&devfreq_list_lock);
-	list_for_each_entry_safe(devfreq, tmp, &devfreq_list, node) {
-		mutex_lock(&devfreq->lock);
-		dev = devfreq->dev.parent;
-
-		/* Do not remove tmp for a while */
-		wait_remove_device = tmp;
-
-		if (devfreq->governor->no_central_polling ||
-		    devfreq->next_polling == 0) {
-			mutex_unlock(&devfreq->lock);
-			continue;
-		}
-		mutex_unlock(&devfreq_list_lock);
-
-		/*
-		 * Reduce more next_polling if devfreq_wq took an extra
-		 * delay. (i.e., CPU has been idled.)
-		 */
-		if (devfreq->next_polling <= jiffies_passed) {
-			error = update_devfreq(devfreq);
-
-			/* Remove a devfreq with an error. */
-			if (error && error != -EAGAIN) {
-
-				dev_err(dev, "Due to update_devfreq error(%d), devfreq(%s) is removed from the device\n",
-					error, devfreq->governor->name);
-
-				/*
-				 * Unlock devfreq before locking the list
-				 * in order to avoid deadlock with
-				 * find_device_devfreq or others
-				 */
-				mutex_unlock(&devfreq->lock);
-				mutex_lock(&devfreq_list_lock);
-				/* Check if devfreq is already removed */
-				if (IS_ERR(find_device_devfreq(dev)))
-					continue;
-				mutex_lock(&devfreq->lock);
-				/* This unlocks devfreq->lock and free it */
-				_remove_devfreq(devfreq, false);
-				continue;
-			}
-			devfreq->next_polling = devfreq->polling_jiffies;
-		} else {
-			devfreq->next_polling -= jiffies_passed;
-		}
-
-		if (devfreq->next_polling)
-			next_jiffies = (next_jiffies > devfreq->next_polling) ?
-					devfreq->next_polling : next_jiffies;
-
-		mutex_unlock(&devfreq->lock);
-		mutex_lock(&devfreq_list_lock);
-	}
-	wait_remove_device = NULL;
-	mutex_unlock(&devfreq_list_lock);
-
-	if (next_jiffies > 0 && next_jiffies < ULONG_MAX) {
-		polling = true;
-		queue_delayed_work(devfreq_wq, &devfreq_work, next_jiffies);
-	} else {
-		polling = false;
-	}
 }
 
 /**
@@ -352,21 +145,20 @@  struct devfreq *devfreq_add_device(struct device *dev,
 	struct devfreq *devfreq;
 	int err = 0;
 
-	if (!dev || !profile || !governor) {
+	if (!dev || !profile || !governor ||
+		!profile->min_freq || !profile->max_freq) {
 		dev_err(dev, "%s: Invalid parameters.\n", __func__);
 		return ERR_PTR(-EINVAL);
 	}
 
-
-	if (!governor->no_central_polling) {
-		mutex_lock(&devfreq_list_lock);
-		devfreq = find_device_devfreq(dev);
-		mutex_unlock(&devfreq_list_lock);
-		if (!IS_ERR(devfreq)) {
-			dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__);
-			err = -EINVAL;
-			goto err_out;
-		}
+	mutex_lock(&devfreq_list_lock);
+	devfreq = find_device_devfreq(dev);
+	mutex_unlock(&devfreq_list_lock);
+	if (!IS_ERR(devfreq)) {
+		dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n",
+			__func__);
+		err = -EINVAL;
+		goto err_out;
 	}
 
 	devfreq = kzalloc(sizeof(struct devfreq), GFP_KERNEL);
@@ -385,49 +177,44 @@  struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->profile = profile;
 	devfreq->governor = governor;
 	devfreq->previous_freq = profile->initial_freq;
-	devfreq->data = data;
-	devfreq->next_polling = devfreq->polling_jiffies
-			      = msecs_to_jiffies(devfreq->profile->polling_ms);
+	devfreq->governor_data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
+	devfreq->min_freq = profile->min_freq;
+	devfreq->max_freq = profile->max_freq;
 
 	dev_set_name(&devfreq->dev, dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
 		put_device(&devfreq->dev);
+		dev_err(dev, "%s: Unable to register devfreq device\n",
+			__func__);
 		goto err_dev;
 	}
-
-	if (governor->init)
-		err = governor->init(devfreq);
-	if (err)
-		goto err_init;
-
 	mutex_unlock(&devfreq->lock);
 
-	if (governor->no_central_polling)
-		goto out;
-
 	mutex_lock(&devfreq_list_lock);
-
 	list_add(&devfreq->node, &devfreq_list);
+	mutex_unlock(&devfreq_list_lock);
 
-	if (devfreq_wq && devfreq->next_polling && !polling) {
-		polling = true;
-		queue_delayed_work(devfreq_wq, &devfreq_work,
-				   devfreq->next_polling);
+	err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START);
+	if (err) {
+		dev_err(dev, "%s: Unable to start governor for the device\n",
+			__func__);
+		list_del(&devfreq->node);
+		device_unregister(&devfreq->dev);
+		goto err_init;
 	}
-	mutex_unlock(&devfreq_list_lock);
-out:
+
 	return devfreq;
 
-err_init:
-	device_unregister(&devfreq->dev);
 err_dev:
 	mutex_unlock(&devfreq->lock);
+err_init:
 	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(devfreq_add_device);
 
 /**
  * devfreq_remove_device() - Remove devfreq feature from a device.
@@ -435,30 +222,16 @@  err_out:
  */
 int devfreq_remove_device(struct devfreq *devfreq)
 {
-	bool central_polling;
-
 	if (!devfreq)
 		return -EINVAL;
 
-	central_polling = !devfreq->governor->no_central_polling;
-
-	if (central_polling) {
-		mutex_lock(&devfreq_list_lock);
-		while (wait_remove_device == devfreq) {
-			mutex_unlock(&devfreq_list_lock);
-			schedule();
-			mutex_lock(&devfreq_list_lock);
-		}
-	}
-
 	mutex_lock(&devfreq->lock);
-	_remove_devfreq(devfreq, false); /* it unlocks devfreq->lock */
-
-	if (central_polling)
-		mutex_unlock(&devfreq_list_lock);
+	_remove_devfreq(devfreq, false);
+	mutex_unlock(&devfreq->lock);
 
 	return 0;
 }
+EXPORT_SYMBOL(devfreq_remove_device);
 
 static ssize_t show_governor(struct device *dev,
 			     struct device_attribute *attr, char *buf)
@@ -472,77 +245,28 @@  static ssize_t show_freq(struct device *dev,
 	return sprintf(buf, "%lu\n", to_devfreq(dev)->previous_freq);
 }
 
-static ssize_t show_polling_interval(struct device *dev,
-				     struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%d\n", to_devfreq(dev)->profile->polling_ms);
-}
-
-static ssize_t store_polling_interval(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf, size_t count)
-{
-	struct devfreq *df = to_devfreq(dev);
-	unsigned int value;
-	int ret;
-
-	ret = sscanf(buf, "%u", &value);
-	if (ret != 1)
-		goto out;
-
-	mutex_lock(&df->lock);
-	df->profile->polling_ms = value;
-	df->next_polling = df->polling_jiffies
-			 = msecs_to_jiffies(value);
-	mutex_unlock(&df->lock);
-
-	ret = count;
-
-	if (df->governor->no_central_polling)
-		goto out;
-
-	mutex_lock(&devfreq_list_lock);
-	if (df->next_polling > 0 && !polling) {
-		polling = true;
-		queue_delayed_work(devfreq_wq, &devfreq_work,
-				   df->next_polling);
-	}
-	mutex_unlock(&devfreq_list_lock);
-out:
-	return ret;
-}
-
-static ssize_t show_central_polling(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%d\n",
-		       !to_devfreq(dev)->governor->no_central_polling);
-}
-
 static ssize_t store_min_freq(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long max;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		goto out;
 
-	mutex_lock(&df->lock);
-	max = df->max_freq;
-	if (value && max && value > max) {
+	if (value < df->profile->min_freq || value > df->profile->max_freq) {
 		ret = -EINVAL;
-		goto unlock;
+		goto out;
 	}
 
+	mutex_lock(&df->lock);
 	df->min_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
 	mutex_unlock(&df->lock);
+
+	df->governor->event_handler(df, DEVFREQ_GOV_LIMITS);
+	ret = count;
 out:
 	return ret;
 }
@@ -559,24 +283,22 @@  static ssize_t store_max_freq(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long min;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		goto out;
 
-	mutex_lock(&df->lock);
-	min = df->min_freq;
-	if (value && min && value < min) {
+	if (value < df->profile->min_freq || value > df->profile->max_freq) {
 		ret = -EINVAL;
-		goto unlock;
+		goto out;
 	}
 
+	mutex_lock(&df->lock);
 	df->max_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
 	mutex_unlock(&df->lock);
+
+	df->governor->event_handler(df, DEVFREQ_GOV_LIMITS);
+	ret = count;
 out:
 	return ret;
 }
@@ -590,31 +312,11 @@  static ssize_t show_max_freq(struct device *dev, struct device_attribute *attr,
 static struct device_attribute devfreq_attrs[] = {
 	__ATTR(governor, S_IRUGO, show_governor, NULL),
 	__ATTR(cur_freq, S_IRUGO, show_freq, NULL),
-	__ATTR(central_polling, S_IRUGO, show_central_polling, NULL),
-	__ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval,
-	       store_polling_interval),
 	__ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq),
 	__ATTR(max_freq, S_IRUGO | S_IWUSR, show_max_freq, store_max_freq),
 	{ },
 };
 
-/**
- * devfreq_start_polling() - Initialize data structure for devfreq framework and
- *			   start polling registered devfreq devices.
- */
-static int __init devfreq_start_polling(void)
-{
-	mutex_lock(&devfreq_list_lock);
-	polling = false;
-	devfreq_wq = create_freezable_workqueue("devfreq_wq");
-	INIT_DELAYED_WORK_DEFERRABLE(&devfreq_work, devfreq_monitor);
-	mutex_unlock(&devfreq_list_lock);
-
-	devfreq_monitor(&devfreq_work.work);
-	return 0;
-}
-late_initcall(devfreq_start_polling);
-
 static int __init devfreq_init(void)
 {
 	devfreq_class = class_create(THIS_MODULE, "devfreq");
@@ -623,6 +325,8 @@  static int __init devfreq_init(void)
 		return PTR_ERR(devfreq_class);
 	}
 	devfreq_class->dev_attrs = devfreq_attrs;
+	mutex_init(&devfreq_list_lock);
+
 	return 0;
 }
 subsys_initcall(devfreq_init);
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index ea7f13c..15b4c52 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -16,9 +16,9 @@ 
 
 #include <linux/devfreq.h>
 
-#define to_devfreq(DEV)	container_of((DEV), struct devfreq, dev)
+int devfreq_device_target(struct devfreq *devfreq,
+				unsigned long freq, u32 flags);
 
-/* Caution: devfreq->lock must be locked before calling update_devfreq */
-extern int update_devfreq(struct devfreq *devfreq);
+#define to_devfreq(DEV)	container_of((DEV), struct devfreq, dev)
 
 #endif /* _GOVERNOR_H */
diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c
index af75ddd..ffc694d 100644
--- a/drivers/devfreq/governor_performance.c
+++ b/drivers/devfreq/governor_performance.c
@@ -13,27 +13,27 @@ 
 #include "governor.h"
 
 static int devfreq_performance_func(struct devfreq *df,
-				    unsigned long *freq)
+				    unsigned int event)
 {
-	/*
-	 * target callback should be able to get floor value as
-	 * said in devfreq.h
-	 */
-	if (!df->max_freq)
-		*freq = UINT_MAX;
-	else
-		*freq = df->max_freq;
-	return 0;
-}
+	int ret = 0;
+	u32 flags = DEVFREQ_FLAG_LEAST_UPPER_BOUND;
 
-static int performance_init(struct devfreq *devfreq)
-{
-	return update_devfreq(devfreq);
+	switch (event) {
+	case DEVFREQ_GOV_START:
+	case DEVFREQ_GOV_LIMITS:
+		mutex_lock(&df->lock);
+		ret = devfreq_device_target(df, df->max_freq, flags);
+		mutex_unlock(&df->lock);
+		break;
+	default:
+		break;
+
+	}
+
+	return ret;
 }
 
 const struct devfreq_governor devfreq_performance = {
 	.name = "performance",
-	.init = performance_init,
-	.get_target_freq = devfreq_performance_func,
-	.no_central_polling = true,
+	.event_handler = devfreq_performance_func,
 };
diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c
index fec0cdb..a19474c 100644
--- a/drivers/devfreq/governor_powersave.c
+++ b/drivers/devfreq/governor_powersave.c
@@ -13,24 +13,27 @@ 
 #include "governor.h"
 
 static int devfreq_powersave_func(struct devfreq *df,
-				  unsigned long *freq)
+				  unsigned int event)
 {
-	/*
-	 * target callback should be able to get ceiling value as
-	 * said in devfreq.h
-	 */
-	*freq = df->min_freq;
-	return 0;
-}
+	int ret = 0;
+	u32 flags = ~DEVFREQ_FLAG_LEAST_UPPER_BOUND;
 
-static int powersave_init(struct devfreq *devfreq)
-{
-	return update_devfreq(devfreq);
+	switch (event) {
+	case DEVFREQ_GOV_START:
+	case DEVFREQ_GOV_LIMITS:
+		mutex_lock(&df->lock);
+		ret = devfreq_device_target(df, df->min_freq, flags);
+		mutex_unlock(&df->lock);
+		break;
+	default:
+		break;
+
+	}
+
+	return ret;
 }
 
 const struct devfreq_governor devfreq_powersave = {
 	.name = "powersave",
-	.init = powersave_init,
-	.get_target_freq = devfreq_powersave_func,
-	.no_central_polling = true,
+	.event_handler = devfreq_powersave_func,
 };
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index a2e3eae..7c70c30 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -9,41 +9,87 @@ 
  * published by the Free Software Foundation.
  */
 
+#include <linux/slab.h>
 #include <linux/errno.h>
+#include <linux/stat.h>
 #include <linux/devfreq.h>
 #include <linux/math64.h>
+#include <linux/workqueue.h>
+#include "governor.h"
 
 /* Default constants for DevFreq-Simple-Ondemand (DFSO) */
 #define DFSO_UPTHRESHOLD	(90)
 #define DFSO_DOWNDIFFERENCTIAL	(5)
-static int devfreq_simple_ondemand_func(struct devfreq *df,
-					unsigned long *freq)
+
+static struct workqueue_struct *devfreq_wq;
+
+struct ondemand_data {
+	struct devfreq *devfreq;
+	struct delayed_work work;
+	unsigned int upthreshold;
+	unsigned int downdifferential;
+	bool stop_queuing;
+};
+
+static ssize_t show_polling_interval(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", to_devfreq(dev)->profile->polling_ms);
+}
+
+static ssize_t store_polling_interval(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct devfreq *df = to_devfreq(dev);
+	unsigned int value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &value);
+	if (ret != 1)
+		return ret;
+
+	mutex_lock(&df->lock);
+	df->profile->polling_ms = value;
+	mutex_unlock(&df->lock);
+
+	return count;
+}
+
+static DEVICE_ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval,
+	       store_polling_interval);
+static struct attribute *dev_entries[] = {
+	&dev_attr_polling_interval.attr,
+	NULL,
+};
+static struct attribute_group dev_attr_group = {
+	.name = "ondemand",
+	.attrs = dev_entries,
+};
+
+static int update_devfreq(struct devfreq *df)
 {
 	struct devfreq_dev_status stat;
+	struct ondemand_data *data = df->governor_data;
 	int err = df->profile->get_dev_status(df->dev.parent, &stat);
+	unsigned int dfso_upthreshold = data->upthreshold;
+	unsigned int dfso_downdifferential = data->downdifferential;
+
 	unsigned long long a, b;
-	unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD;
-	unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL;
-	struct devfreq_simple_ondemand_data *data = df->data;
-	unsigned long max = (df->max_freq) ? df->max_freq : UINT_MAX;
+	unsigned long freq;
+	u32 flags = 0;
 
 	if (err)
 		return err;
 
-	if (data) {
-		if (data->upthreshold)
-			dfso_upthreshold = data->upthreshold;
-		if (data->downdifferential)
-			dfso_downdifferential = data->downdifferential;
-	}
 	if (dfso_upthreshold > 100 ||
 	    dfso_upthreshold < dfso_downdifferential)
 		return -EINVAL;
 
-	/* Assume MAX if it is going to be divided by zero */
+	/* Perhaps device is inactive, set device min frequency */
 	if (stat.total_time == 0) {
-		*freq = max;
-		return 0;
+		freq = df->min_freq;
+		goto target;
 	}
 
 	/* Prevent overflow */
@@ -55,21 +101,21 @@  static int devfreq_simple_ondemand_func(struct devfreq *df,
 	/* Set MAX if it's busy enough */
 	if (stat.busy_time * 100 >
 	    stat.total_time * dfso_upthreshold) {
-		*freq = max;
-		return 0;
+		freq = df->max_freq;
+		goto target;
 	}
 
 	/* Set MAX if we do not know the initial frequency */
 	if (stat.current_frequency == 0) {
-		*freq = max;
-		return 0;
+		freq = df->max_freq;
+		goto target;
 	}
 
 	/* Keep the current frequency */
 	if (stat.busy_time * 100 >
 	    stat.total_time * (dfso_upthreshold - dfso_downdifferential)) {
-		*freq = stat.current_frequency;
-		return 0;
+		freq = stat.current_frequency;
+		goto target;
 	}
 
 	/* Set the desired frequency based on the load */
@@ -78,17 +124,131 @@  static int devfreq_simple_ondemand_func(struct devfreq *df,
 	b = div_u64(a, stat.total_time);
 	b *= 100;
 	b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2));
-	*freq = (unsigned long) b;
+	freq = (unsigned long) b;
+
+	if (df->min_freq && freq < df->min_freq) {
+		freq = df->min_freq;
+		flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND;
+	}
+	if (df->max_freq && freq > df->max_freq) {
+		freq = df->max_freq;
+		flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND;
+	}
 
-	if (df->min_freq && *freq < df->min_freq)
-		*freq = df->min_freq;
-	if (df->max_freq && *freq > df->max_freq)
-		*freq = df->max_freq;
+target:
+	err = devfreq_device_target(df, freq, flags);
+	return err;
+}
 
-	return 0;
+static void devfreq_monitor(struct work_struct *work)
+{
+	int ret;
+	struct ondemand_data *data = container_of(work,
+					struct ondemand_data, work.work);
+	struct devfreq *df = data->devfreq;
+
+	mutex_lock(&df->lock);
+	ret = update_devfreq(df);
+	mutex_unlock(&df->lock);
+	if (ret)
+		dev_err(&df->dev, "dvfs failed with (%d) error\n", ret);
+
+	if (!data->stop_queuing)
+		queue_delayed_work(devfreq_wq, &data->work,
+				msecs_to_jiffies(df->profile->polling_ms));
+}
+
+static int ondemand_init(struct devfreq *df)
+{
+	int err;
+	struct ondemand_data *data = kzalloc(sizeof(struct ondemand_data),
+							GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/**
+	 * check if devfreq user has passed governor configurable
+	 * parameters. if no, use default values.
+	 */
+	if (df->governor_data) {
+		struct devfreq_simple_ondemand_data *drv = df->governor_data;
+		data->upthreshold = drv->upthreshold;
+		data->downdifferential = drv->downdifferential;
+	} else {
+		data->upthreshold = DFSO_UPTHRESHOLD;
+		data->downdifferential = DFSO_DOWNDIFFERENCTIAL;
+	}
+
+	data->stop_queuing = false;
+	data->devfreq = df;
+	df->governor_data = data;
+	INIT_DELAYED_WORK_DEFERRABLE(&data->work, devfreq_monitor);
+
+	err = sysfs_create_group(&df->dev.kobj, &dev_attr_group);
+	if (err)
+		kfree(data);
+
+	return err;
+}
+
+static void ondemand_exit(struct devfreq *df)
+{
+	sysfs_remove_group(&df->dev.kobj, &dev_attr_group);
+	kfree(df->governor_data);
+	df->governor_data = NULL;
+}
+
+static int devfreq_simple_ondemand_func(struct devfreq *df,
+					unsigned int event)
+{
+	int ret = 0;
+	struct ondemand_data *data = df->governor_data;
+
+	switch (event) {
+	case DEVFREQ_GOV_START:
+		ret = ondemand_init(df);
+		if (ret)
+			goto out;
+
+		data = df->governor_data;
+		queue_delayed_work(devfreq_wq, &data->work,
+				msecs_to_jiffies(df->profile->polling_ms));
+		break;
+
+	case DEVFREQ_GOV_STOP:
+		data->stop_queuing = true;
+		cancel_delayed_work_sync(&data->work);
+		ondemand_exit(df);
+		break;
+
+	case DEVFREQ_GOV_LIMITS:
+		if (delayed_work_pending(&data->work)) {
+			mutex_lock(&df->lock);
+			ret = update_devfreq(df);
+			mutex_unlock(&df->lock);
+		}
+		break;
+	default:
+		break;
+	}
+
+out:
+	return ret;
 }
 
 const struct devfreq_governor devfreq_simple_ondemand = {
 	.name = "simple_ondemand",
-	.get_target_freq = devfreq_simple_ondemand_func,
+	.event_handler = devfreq_simple_ondemand_func,
 };
+
+static int __init devfreq_ondemand_init(void)
+{
+	devfreq_wq = create_freezable_workqueue("devfreq_wq");
+	if (IS_ERR(devfreq_wq)) {
+		pr_err("%s: couldn't create workqueue\n", __FILE__);
+		return PTR_ERR(devfreq_wq);
+	}
+
+	return 0;
+}
+fs_initcall(devfreq_ondemand_init);
diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 0681246..c20a937 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -18,67 +18,34 @@ 
 
 struct userspace_data {
 	unsigned long user_frequency;
-	bool valid;
 };
 
-static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq)
-{
-	struct userspace_data *data = df->data;
-
-	if (data->valid) {
-		unsigned long adjusted_freq = data->user_frequency;
-
-		if (df->max_freq && adjusted_freq > df->max_freq)
-			adjusted_freq = df->max_freq;
-
-		if (df->min_freq && adjusted_freq < df->min_freq)
-			adjusted_freq = df->min_freq;
-
-		*freq = adjusted_freq;
-	} else {
-		*freq = df->previous_freq; /* No user freq specified yet */
-	}
-	return 0;
-}
+static int devfreq_userspace_func(struct devfreq *df, unsigned int event);
 
 static ssize_t store_freq(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t count)
 {
-	struct devfreq *devfreq = to_devfreq(dev);
-	struct userspace_data *data;
-	unsigned long wanted;
-	int err = 0;
-
-
-	mutex_lock(&devfreq->lock);
-	data = devfreq->data;
-
-	sscanf(buf, "%lu", &wanted);
-	data->user_frequency = wanted;
-	data->valid = true;
-	err = update_devfreq(devfreq);
-	if (err == 0)
-		err = count;
-	mutex_unlock(&devfreq->lock);
-	return err;
+	unsigned long val;
+	int ret;
+	struct devfreq *df = to_devfreq(dev);
+	struct userspace_data *data = df->governor_data;
+
+	ret = sscanf(buf, "%lu", &val);
+	if (ret != 1)
+		return ret;
+
+	data->user_frequency = val;
+	devfreq_userspace_func(df, DEVFREQ_GOV_LIMITS);
+
+	return count;
 }
 
 static ssize_t show_freq(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
-	struct devfreq *devfreq = to_devfreq(dev);
-	struct userspace_data *data;
-	int err = 0;
-
-	mutex_lock(&devfreq->lock);
-	data = devfreq->data;
-
-	if (data->valid)
-		err = sprintf(buf, "%lu\n", data->user_frequency);
-	else
-		err = sprintf(buf, "undefined\n");
-	mutex_unlock(&devfreq->lock);
-	return err;
+	struct devfreq *df = to_devfreq(dev);
+	struct userspace_data *data = df->governor_data;
+	return sprintf(buf, "%lu\n", data->user_frequency);
 }
 
 static DEVICE_ATTR(set_freq, 0644, show_freq, store_freq);
@@ -91,35 +58,74 @@  static struct attribute_group dev_attr_group = {
 	.attrs	= dev_entries,
 };
 
-static int userspace_init(struct devfreq *devfreq)
+static int userspace_init(struct devfreq *df)
 {
-	int err = 0;
+	int err;
 	struct userspace_data *data = kzalloc(sizeof(struct userspace_data),
 					      GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
 
-	if (!data) {
-		err = -ENOMEM;
-		goto out;
-	}
-	data->valid = false;
-	devfreq->data = data;
+	df->governor_data = data;
+
+	err = sysfs_create_group(&df->dev.kobj, &dev_attr_group);
+	if (err)
+		kfree(data);
 
-	err = sysfs_create_group(&devfreq->dev.kobj, &dev_attr_group);
-out:
 	return err;
 }
 
-static void userspace_exit(struct devfreq *devfreq)
+static void userspace_exit(struct devfreq *df)
 {
-	sysfs_remove_group(&devfreq->dev.kobj, &dev_attr_group);
-	kfree(devfreq->data);
-	devfreq->data = NULL;
+	sysfs_remove_group(&df->dev.kobj, &dev_attr_group);
+	kfree(df->governor_data);
+	df->governor_data = NULL;
+}
+
+static int update_devfreq(struct devfreq *df)
+{
+	u32 flags = 0;
+	struct userspace_data *data = df->governor_data;
+
+	if (!data->user_frequency)
+		return 0;
+
+	if (data->user_frequency > df->max_freq) {
+		data->user_frequency = df->max_freq;
+		flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND;
+	}
+
+	if (data->user_frequency < df->min_freq) {
+		data->user_frequency = df->min_freq;
+		flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND;
+	}
+
+	return devfreq_device_target(df, data->user_frequency, flags);
+}
+
+static int devfreq_userspace_func(struct devfreq *df, unsigned int event)
+{
+	int ret = 0;
+	switch (event) {
+	case DEVFREQ_GOV_START:
+		ret = userspace_init(df);
+		break;
+	case DEVFREQ_GOV_STOP:
+		userspace_exit(df);
+		break;
+	case DEVFREQ_GOV_LIMITS:
+		mutex_lock(&df->lock);
+		ret = update_devfreq(df);
+		mutex_unlock(&df->lock);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
 }
 
 const struct devfreq_governor devfreq_userspace = {
 	.name = "userspace",
-	.get_target_freq = devfreq_userspace_func,
-	.init = userspace_init,
-	.exit = userspace_exit,
-	.no_central_polling = true,
+	.event_handler = devfreq_userspace_func,
 };
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 281c72a..600cc2e 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -52,11 +52,19 @@  struct devfreq_dev_status {
  */
 #define DEVFREQ_FLAG_LEAST_UPPER_BOUND		0x1
 
+/* Devfreq events */
+#define DEVFREQ_GOV_START			0x1
+#define DEVFREQ_GOV_STOP			0x2
+#define DEVFREQ_GOV_LIMITS			0x3
+
 /**
  * struct devfreq_dev_profile - Devfreq's user device profile
  * @initial_freq	The operating frequency when devfreq_add_device() is
  *			called.
- * @polling_ms		The polling interval in ms. 0 disables polling.
+ * @max_freq		Maximum frequency supported by device
+ * @min_freq		Minimum frequency supported by device
+ * @polling_ms		The polling interval in ms. optional, valid only
+ *			for ondemand users.
  * @target		The device should set its operating frequency at
  *			freq or lowest-upper-than-freq value. If freq is
  *			higher than any operable frequency, set maximum.
@@ -74,6 +82,8 @@  struct devfreq_dev_status {
  */
 struct devfreq_dev_profile {
 	unsigned long initial_freq;
+	unsigned long max_freq;
+	unsigned long min_freq;
 	unsigned int polling_ms;
 
 	int (*target)(struct device *dev, unsigned long *freq, u32 flags);
@@ -85,31 +95,14 @@  struct devfreq_dev_profile {
 /**
  * struct devfreq_governor - Devfreq policy governor
  * @name		Governor's name
- * @get_target_freq	Returns desired operating frequency for the device.
- *			Basically, get_target_freq will run
- *			devfreq_dev_profile.get_dev_status() to get the
- *			status of the device (load = busy_time / total_time).
- *			If no_central_polling is set, this callback is called
- *			only with update_devfreq() notified by OPP.
- * @init		Called when the devfreq is being attached to a device
- * @exit		Called when the devfreq is being removed from a
- *			device. Governor should stop any internal routines
- *			before return because related data may be
- *			freed after exit().
- * @no_central_polling	Do not use devfreq's central polling mechanism.
- *			When this is set, devfreq will not call
- *			get_target_freq with devfreq_monitor(). However,
- *			devfreq will call get_target_freq with
- *			devfreq_update() notified by OPP framework.
- *
- * Note that the callbacks are called with devfreq->lock locked by devfreq.
+ * @event_handler	Callback for devfreq core framework to notify events
+ *			to governors. Events include per device governor
+ *			init and exit, opp changes out of devfreq, suspend
+ *			and resume of per device devfreq during device idle.
  */
 struct devfreq_governor {
 	const char name[DEVFREQ_NAME_LEN];
-	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
-	int (*init)(struct devfreq *this);
-	void (*exit)(struct devfreq *this);
-	const bool no_central_polling;
+	int (*event_handler)(struct devfreq *devfreq, unsigned int event);
 };
 
 /**
@@ -124,16 +117,9 @@  struct devfreq_governor {
  * @nb		notifier block used to notify devfreq object that it should
  *		reevaluate operable frequencies. Devfreq users may use
  *		devfreq.nb to the corresponding register notifier call chain.
- * @polling_jiffies	interval in jiffies.
  * @previous_freq	previously configured frequency value.
- * @next_polling	the number of remaining jiffies to poll with
- *			"devfreq_monitor" executions to reevaluate
- *			frequency/voltage of the device. Set by
- *			profile's polling_ms interval.
- * @data	Private data of the governor. The devfreq framework does not
- *		touch this.
- * @being_removed	a flag to mark that this object is being removed in
- *			order to prevent trying to remove the object multiple times.
+ * @governor_data	Private data of the governor. The devfreq framework
+ *			does not touch this.
  * @min_freq	Limit minimum frequency requested by user (0: none)
  * @max_freq	Limit maximum frequency requested by user (0: none)
  *
@@ -154,13 +140,9 @@  struct devfreq {
 	const struct devfreq_governor *governor;
 	struct notifier_block nb;
 
-	unsigned long polling_jiffies;
 	unsigned long previous_freq;
-	unsigned int next_polling;
-
-	void *data; /* private data for governors */
 
-	bool being_removed;
+	void *governor_data; /* private data for governors */
 
 	unsigned long min_freq;
 	unsigned long max_freq;