@@ -22,6 +22,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pm.h>
+#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
@@ -1180,6 +1181,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
unsigned char **page = NULL;
struct dec_data *data = NULL;
struct crc_data *crc = NULL;
+ struct pm_qos_request qos;
hib_init_batch(&hb);
@@ -1190,6 +1192,8 @@ static int load_image_lzo(struct swap_map_handle *handle,
nr_threads = num_online_cpus() - 1;
nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+ cpu_latency_qos_add_request(&qos, 0);
+
page = vmalloc(array_size(LZO_MAX_RD_PAGES, sizeof(*page)));
if (!page) {
pr_err("Failed to allocate LZO page\n");
@@ -1470,6 +1474,8 @@ static int load_image_lzo(struct swap_map_handle *handle,
}
vfree(page);
+ cpu_latency_qos_remove_request(&qos);
+
return ret;
}
The (SMP) load_image_lzo procedure relies on multiple threads and their synchronization for decompressing the hibernate image. for performance reasons, it is important to keep the CPUs responsive so that the synchronization overhead stays minimal. This overhead roughly corresponds to thread wakeup latency, which is linked to the CPU idle exit time. By requesting 0 CPU latency, we prevent CPUs to enter their deepest idle states, ensuring that they will be as responsive as possible during the whole decompression procedure, and by extension improve the hibernate resume time (CPU dependent). On iMX8M mini SoC, that gives a ~40% boost for the decompression time: PM: hibernation: Read 365640 kbytes in 1.88 seconds (194.48 MB/s) vs PM: hibernation: Read 363476 kbytes in 1.26 seconds (288.47 MB/s) Note: 40% diff sounds huge, and may point to iMX specific cpuidle issue. Anyhow, we know that during this bottleneck procedure, the CPUs will be fully dedicated to the decompress task, and must be kept ready for it. Signed-off-by: Loic Poulain <loic.poulain@linaro.org> --- kernel/power/swap.c | 6 ++++++ 1 file changed, 6 insertions(+)