new file mode 100644
@@ -0,0 +1,14 @@
+AM_CPPFLAGS := \
+ -W \
+ -Wall \
+ -Wextra \
+ -Wunused \
+ -Werror
+
+#Build vmrd binary
+bin_PROGRAMS := vmrd
+vmrd_SOURCES := vmrd.cpp
+vmrd_CPPFLAGS := $(AM_CPPFLAGS) @LIBBASE_CFLAGS@ @LIBCUTILS_CFLAGS@
+vmrd_LDFLAGS := -lbase -lpthread -lcutils
+
+pkgconfigdir := $(libdir)/pkgconfig
new file mode 100644
@@ -0,0 +1,1154 @@
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * Dynamic VM Memory Resizing Daemon (vmrd)
+ */
+
+#define LOG_TAG "vmrd"
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <sstream>
+#include <time.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/syslog.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <atomic>
+
+#define SIZE_1MB 0x00100000
+#define SIZE_1KB 0x00000400
+
+#ifndef MS_PER_SEC
+#define MS_PER_SEC (1000)
+#endif
+
+#define US_PER_SEC 1000000
+#define US_PER_MS (US_PER_SEC / MS_PER_SEC)
+#define NS_PER_SEC 1000000000
+#define NS_PER_MS (NS_PER_SEC / MS_PER_SEC)
+#define NS_PER_US (NS_PER_SEC / US_PER_SEC)
+
+#define LOGI(fmt, arg...) syslog (LOG_INFO, fmt, ##arg)
+#define LOGE(fmt, arg...) syslog (LOG_ERR, fmt, ##arg)
+#define LOGD(fmt, arg...) syslog (LOG_DEBUG, fmt, ##arg)
+
+#ifndef __unused
+#define __unused __attribute__((__unused__))
+#endif
+
+enum pressure_levels {
+ PRESSURE_INVALID = -2,
+ PRESSURE_NONE,
+ PRESSURE_MIN = 0,
+
+ PRESSURE_EVT_5 = PRESSURE_MIN,
+ PRESSURE_EVT_10,
+ PRESSURE_EVT_15,
+ PRESSURE_EVT_20,
+ PRESSURE_EVT_25,
+ PRESSURE_EVT_30,
+ PRESSURE_EVT_35,
+ PRESSURE_EVT_40,
+ PRESSURE_EVT_45,
+ PRESSURE_EVT_50,
+
+ PRESSURE_EVT_COUNT
+};
+
+enum zone_name {
+ ZONE_NORMAL = 0,
+ ZONE_MOVABLE,
+ ZONE_MAX
+};
+
+static size_t sys_page_size;
+
+static const char* const zone_names[ZONE_MAX] = {
+ "Normal",
+ "Movable",
+};
+
+#define MEMINFO_PATH "/proc/meminfo"
+#define ZONEINFO_PATH "/proc/zoneinfo"
+#define PSI_MEMORY_PATH "/proc/pressure/memory"
+
+#define WAKE_LOCK_PATH "/sys/power/wake_lock"
+#define WAKE_UNLOCK_PATH "/sys/power/wake_unlock"
+#define WAKELOCK_STR "vmrd_lock"
+#define WAKEUNLOCK_STR WAKELOCK_STR
+
+/* memory plugin size defaults (in MBs)*/
+#define DEFAULT_PLUGIN_RESOLUTION_MB (4)
+#define DEFAULT_MAX_MEMORY_PLUGIN_MB (256)
+
+#define MAX_UNPLUG_RETRY (2)
+
+enum psi_stall_type {
+ PSI_SOME,
+ PSI_FULL,
+ PSI_TYPE_COUNT
+};
+
+static const char* stall_type_name[] = {
+ "some",
+ "full",
+};
+
+struct psi_threshold {
+ enum psi_stall_type stall_type;
+ int threshold_ms;
+};
+
+struct memory_snapshot {
+ uint64_t sys_memfree_kb;
+ uint64_t normal_free_kb;
+ uint64_t movable_free_kb;
+ uint64_t movable_inactive_anon_kb;
+ uint64_t movable_inactive_file_kb;
+};
+
+struct psi_pressure {
+ float avg10;
+ float avg60;
+ float avg300;
+ uint64_t total;
+};
+
+struct psi_memory_pressure {
+ struct psi_pressure some;
+ struct psi_pressure full;
+};
+
+/* PSI threshold levels in milliseconds */
+static struct psi_threshold psi_thresholds[PRESSURE_EVT_COUNT] = {
+ { PSI_SOME, 5 }, { PSI_SOME, 10 }, { PSI_SOME, 15 }, { PSI_SOME, 20 },
+ { PSI_SOME, 25 }, { PSI_SOME, 30 }, { PSI_SOME, 35 }, { PSI_SOME, 40 },
+ { PSI_SOME, 45 }, { PSI_SOME, 50 }
+};
+
+/*
+ * we wait until memory pressure decays below certain
+ * threshold before unplugging memory.
+ */
+static bool enable_pressure_decay_wait = false;
+
+/* global buffer for file reads */
+static char* readfile_buf;
+static ssize_t readfile_buf_size;
+
+/* serialize fileread buffer accesses using mutex */
+static pthread_mutex_t fileread_buffer_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* in MBs */
+static uint64_t resolution, max_plugged_memory;
+
+/* num of memory chunks plugged-in, each of resolution MB size */
+static std::atomic<uint64_t> mem_chunks_plugged{0};
+
+/* total plugged memory in VM system (in MBs) */
+static std::atomic<uint64_t> plugged_memory{0};
+
+/* fds of all registered PSI events */
+static int32_t event_fds[PRESSURE_EVT_COUNT];
+
+/* fd for main epoll_wait */
+static int32_t psi_epollfd = -1;
+
+/*
+ * used to calculate exponential decay time (in steps of 1 seconds)
+ * from one load_avg to another load_avg.
+ */
+#define EXP_1S_10S 0.9048F /* 1/exp(1s/10s) */
+#define EXP_1S_60S 0.9834F /* 1/exp(1s/60s) */
+#define EXP_1S_300S 0.9966F /* 1/exp(1s/300s) */
+
+static pthread_mutex_t thread_execution_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t thread_execution_cond;
+static pthread_condattr_t thread_execution_cond_attr;
+
+static std::atomic<bool> cancel_check(false);
+static std::atomic<bool> wait_in_progress(false);
+
+/* default wait time for pressure to be idle (in seconds) */
+#define IDLE_WAIT_TIME_S 10
+
+/* stall-tracking window size, 50ms*/
+static int PSI_WINDOW_SIZE_US = (50 * US_PER_MS);
+
+/* acceptable pressure during 60 sec window is 18 ms (0.03% of 60 sec) */
+#define AVG60_EXP_THRESHOLD 0.03F
+
+#define AVG10_EXP_THRESHOLD 0.00F
+
+#define TARGET_OOM_SCORE_ADJ -1000
+
+#define LINE_MAX 250
+#define STRINGIFY(x) STRINGIFY_INTERNAL(x)
+#define STRINGIFY_INTERNAL(x) #x
+
+static char str_buf[LINE_MAX];
+
+using namespace std;
+
+/*
+ * Vendor-specific implementation of memory plug and unplug requests
+ * are needed to support the functionality of vmrd.
+ */
+
+static int memory_plug_init(void) {
+ LOGE("memory plug request not supported");
+ return -ENOTTY;
+}
+
+static void memory_plug_deinit(void) {
+ LOGE("memory plug request not supported");
+}
+
+/*
+ * Plugs in memory of given size into the system by requesting it from host VM.
+ * This call is expected to be blocking call.
+ */
+static int memory_plug_request(uint64_t size) {
+ (void) size;
+ LOGE("Memory plug request not supported");
+ return -ENOTTY;
+}
+
+/*
+ * Unplugs the memory of given size from the system and releases the memory
+ * back to host VM. Only memory that is previously added can be unplugged.
+ * his call is expected to be blocking call.
+ */
+static int memory_unplug_request(uint64_t size) {
+ (void) size;
+ LOGE("Memory unplug request not supported");
+ return -ENOTTY;
+}
+
+/*
+ * Releases all previously plugged memory back to the host VM.
+ * This can be done by unplugging all the memory by each block-by-block or
+ * by making a single request to unplug memory of size which is equal to
+ * total of all memory previously added.
+ * This call is expected to be blocking call.
+ */
+static int __unused memory_unplug_all_request(void) {
+ LOGE("Memory unplug all request not supported");
+ return -ENOTTY;
+}
+
+static int write_file(const char *file_path, char *s) {
+ int fd;
+ ssize_t len;
+
+ fd = open(file_path, O_WRONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ LOGE("%s open failed, err: %s", file_path, strerror(errno));
+ return -EINVAL;
+ }
+
+ len = write(fd, s, strlen(s));
+ if (len < (ssize_t)strlen(s)) {
+ LOGE("error writing to file: %s val %s", file_path, s);
+ close(fd);
+ return -EINVAL;
+ }
+
+ close(fd);
+ return 0;
+}
+
+static char *read_file(const char *file_path) {
+ int fd;
+ ssize_t readsize;
+ char *new_buf = NULL;
+
+ fd = open(file_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ LOGE("%s open failed, err: %s", file_path, strerror(errno));
+ return NULL;
+ }
+
+ while ((readsize = pread(fd, readfile_buf,
+ readfile_buf_size, 0)) == readfile_buf_size) {
+ /*
+ * if file content is more than readfile buffer size, resize the buffer
+ * to double its previous size with realloc
+ */
+ readfile_buf_size *= 2;
+ new_buf = (char *)realloc(readfile_buf, readfile_buf_size);
+ if (new_buf == NULL) {
+ LOGE("resizing fileread buffer failed, errno: %s", strerror(errno));
+ close(fd);
+ return NULL;
+ }
+ readfile_buf = new_buf;
+ }
+
+ readfile_buf[readsize] = 0;
+ close(fd);
+ return readfile_buf;
+}
+
+/*
+ * Memory block size or resolution.
+ */
+static int get_memory_plugin_resolution(uint64_t *plugin_resolution_mb) {
+ *plugin_resolution_mb = DEFAULT_PLUGIN_RESOLUTION_MB;
+ return 0;
+}
+
+/*
+ * Total max memory that the system (guest VM) allows to be pluuged-in.
+ */
+static int get_max_memory_plugin_allowed(uint64_t *max_memory_plugin_mb) {
+ *max_memory_plugin_mb = DEFAULT_MAX_MEMORY_PLUGIN_MB;
+ return 0;
+}
+
+static inline char *nextln(char *buf)
+{
+ char *x;
+
+ x = (char *)memchr(buf, '\n', strlen(buf));
+ if (!x)
+ return buf + strlen(buf);
+ return x + 1;
+}
+
+/*** core functionality starts here ***/
+
+/*
+ * returns 0 on failure
+ */
+static unsigned int wake_lock_acquire()
+{
+ char str_val[LINE_MAX];
+
+ snprintf(str_val, sizeof(str_val), WAKELOCK_STR);
+ if (write_file(WAKE_LOCK_PATH, str_val)) {
+ LOGE("failed to write to %s errno: %s", WAKE_LOCK_PATH,
+ strerror(errno));
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * returns 0 on failure
+ */
+static unsigned int wake_unlock()
+{
+ char str_val[LINE_MAX];
+
+ snprintf(str_val, sizeof(str_val), WAKEUNLOCK_STR);
+ if (write_file(WAKE_UNLOCK_PATH, str_val)) {
+ LOGE("failed to write to %s errno: %s", WAKE_UNLOCK_PATH,
+ strerror(errno));
+ return 0;
+ }
+
+ return 1;
+}
+
+static int parse_field(char *buf, const char *field_name, uint64_t *val) {
+ std::istringstream sstr(buf);
+ std::string line;
+ int nargs;
+
+ if (field_name == NULL) {
+ LOGE("%s: field name for parsing is null", __func__);
+ goto err;
+ }
+
+ while (std::getline(sstr, line)) {
+ if(strstr(line.c_str(), field_name) == NULL)
+ continue;
+ /* found our line */
+ nargs = sscanf(line.c_str(), "%*[^0-9]%lu", val);
+ if (nargs != 1) {
+ LOGE("parsing field value %s in line %s failed",
+ field_name, line.c_str());
+ goto err;
+ }
+ return 0;
+ }
+
+ LOGE("%s: field name %s not found", __func__, field_name);
+err:
+ return -EINVAL;
+}
+
+/* must be used with fileread_buffer_mutex lock taken */
+static inline char *get_zoneinfo(void)
+{
+ return read_file(ZONEINFO_PATH);
+}
+
+static inline char *get_meminfo(void)
+{
+ return read_file(MEMINFO_PATH);
+}
+
+static int parse_zone_field(char *buf, const char *zone_name,
+ const char *field_name, uint64_t *val)
+{
+ char name[LINE_MAX + 1]; /* LINE_MAX + 1 to avoid sscanf overflow */
+ int nargs;
+
+ if (!buf)
+ return -EINVAL;
+
+ while (*buf) {
+ nargs = sscanf(buf, "Node %*u, zone %" STRINGIFY(LINE_MAX) "s", name);
+ buf = nextln(buf);
+ if (nargs == 1 && !strcmp(name, zone_name))
+ break;
+ }
+
+ if (!*buf)
+ return -EINVAL;
+
+ return parse_field(buf, field_name, val);
+}
+
+/*
+ * Take snapshot of system memory on the fields defined in
+ * struct memory_snapshot.
+ */
+static int system_memory_snapshot(struct memory_snapshot *mem_snap)
+{
+ char *buf;
+ int res = -1;
+
+ pthread_mutex_lock(&fileread_buffer_mutex);
+ buf = get_zoneinfo();
+ if (!buf)
+ goto err;
+
+ /*
+ * Some zone fields may not be present if zone is empty,
+ * so not necessarily parsing errors. Return 0 for fields
+ * that don't exist.
+ */
+
+ if (parse_zone_field(buf, zone_names[ZONE_NORMAL],
+ "pages free", &mem_snap->normal_free_kb))
+ mem_snap->normal_free_kb = 0;
+ mem_snap->normal_free_kb *= sys_page_size / SIZE_1KB;
+
+ if (parse_zone_field(buf, zone_names[ZONE_MOVABLE],
+ "pages free", &mem_snap->movable_free_kb))
+ mem_snap->movable_free_kb = 0;
+ mem_snap->movable_free_kb *= sys_page_size / SIZE_1KB;
+
+ if (parse_zone_field(buf, zone_names[ZONE_MOVABLE],
+ "nr_zone_inactive_anon", &mem_snap->movable_inactive_anon_kb))
+ mem_snap->movable_inactive_anon_kb = 0;
+ mem_snap->movable_inactive_anon_kb *= sys_page_size / SIZE_1KB;
+
+ if (parse_zone_field(buf, zone_names[ZONE_MOVABLE],
+ "nr_zone_inactive_file", &mem_snap->movable_inactive_file_kb))
+ mem_snap->movable_inactive_file_kb = 0;
+ mem_snap->movable_inactive_file_kb *= sys_page_size / SIZE_1KB;
+
+ buf = get_meminfo();
+ if (!buf)
+ goto err;
+
+ if(parse_field(buf, "MemFree", &mem_snap->sys_memfree_kb))
+ goto err;
+ res = 0;
+
+err:
+ if (res)
+ LOGE("failed to get memory snapshot");
+ pthread_mutex_unlock(&fileread_buffer_mutex);
+ return res;
+}
+
+/* gets the PSI averages for stall types PSI_SOME and PSI_FULL */
+static int parse_system_psi_memory(struct psi_memory_pressure *psi_memory)
+{
+ char *buf;
+ int nargs, res = -1;
+
+ pthread_mutex_lock(&fileread_buffer_mutex);
+ buf = read_file(PSI_MEMORY_PATH);
+ if (!buf)
+ goto err;
+
+ memset(psi_memory, 0, sizeof(struct psi_memory_pressure));
+
+ /* get memory pressure for PSI_SOME */
+ nargs = sscanf(buf, "some avg10=%f avg60=%f avg300=%f total=%lu",
+ &psi_memory->some.avg10,
+ &psi_memory->some.avg60,
+ &psi_memory->some.avg300,
+ &psi_memory->some.total);
+ if (nargs != 4) {
+ LOGE("line parse for PSI_SOME avgs failed");
+ goto err;
+ }
+
+ buf = nextln(buf);
+
+ /* get memory pressure for PSI_FULL */
+ nargs = sscanf(buf, "full avg10=%f avg60=%f avg300=%f total=%lu",
+ &psi_memory->full.avg10,
+ &psi_memory->full.avg60,
+ &psi_memory->full.avg300,
+ &psi_memory->full.total);
+ if (nargs != 4) {
+ LOGE("line parse for PSI_FULL avgs failed");
+ goto err;
+ }
+
+ res = 0;
+err:
+ pthread_mutex_unlock(&fileread_buffer_mutex);
+ return res;
+}
+
+/* registers the PSI events (thresholds) for a given stall type and window */
+static int register_psi_memory(enum psi_stall_type stall_type,
+ int threshold_us, int window_us) {
+ int fd;
+ int res;
+ char buf[LINE_MAX];
+
+ fd = open(PSI_MEMORY_PATH, O_WRONLY | O_CLOEXEC);
+ if (fd < 0) {
+ LOGE("no kernel psi monitor support, errno: %s", strerror(errno));
+ return -1;
+ }
+
+ /* monitor memory pressure for partial stall */
+ snprintf(buf, sizeof(buf), "%s %d %d",
+ stall_type_name[stall_type], threshold_us, window_us);
+
+ res = write(fd, buf, strlen(buf) + 1);
+ if (res < 0) {
+ LOGE("%s write failed for psi stall type '%s', errno: %s",
+ PSI_MEMORY_PATH, stall_type_name[stall_type], strerror(errno));
+ goto err;
+ }
+
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static int register_epoll_events(int epollfd, int psi_event_fd, void *data) {
+ int res;
+ struct epoll_event epevent;
+
+ /* register for epoll with EPOLLPRI and EPOLLWAKEUP events */
+ epevent.events = EPOLLPRI | EPOLLWAKEUP;
+ epevent.data.ptr = data;
+ res = epoll_ctl(epollfd, EPOLL_CTL_ADD, psi_event_fd, &epevent);
+ if (res < 0) {
+ LOGE("epoll_ctl for psi monitor failed, errno: %s", strerror(errno));
+ }
+ return res;
+}
+
+static int unregister_epoll_events(int epollfd, int psi_event_fd) {
+ return epoll_ctl(epollfd, EPOLL_CTL_DEL, psi_event_fd, NULL);
+}
+
+static void unregister_psi_events() {
+ int num;
+
+ for (num = PRESSURE_MIN; num < PRESSURE_EVT_COUNT; num ++) {
+ if (event_fds[num] >= 0) {
+ unregister_epoll_events(psi_epollfd, event_fds[num]);
+ close(event_fds[num]);
+ }
+ event_fds[num] = -1;
+ }
+}
+
+static const char *psi_level_to_string(pressure_levels level) {
+ uint16_t num;
+
+ if (level == PRESSURE_NONE)
+ return "NONE";
+ if (level == PRESSURE_INVALID)
+ return "INVALID";
+
+ num = psi_thresholds[level].threshold_ms;
+ memset(str_buf, 0, sizeof(str_buf));
+ snprintf(str_buf, sizeof(str_buf), "EVENT_%dMS", num);
+
+ return str_buf;
+}
+
+static int init_and_register_psi_events() {
+
+ int num;
+
+ for (num = PRESSURE_MIN; num < PRESSURE_EVT_COUNT; num ++)
+ event_fds[num] = -1;
+
+ psi_epollfd = epoll_create(PRESSURE_EVT_COUNT);
+ if (psi_epollfd == -1) {
+ LOGE("epoll_create failed, errno: %s", strerror(errno));
+ return -1;
+ }
+
+ for (num = PRESSURE_MIN; num < PRESSURE_EVT_COUNT; num ++) {
+
+ /* register to psi moniters */
+ event_fds[num] = register_psi_memory(psi_thresholds[num].stall_type,
+ psi_thresholds[num].threshold_ms * US_PER_MS,
+ PSI_WINDOW_SIZE_US);
+
+ if (event_fds[num] < 0) {
+ LOGE("PSI init failed for event %s, fd %d",
+ psi_level_to_string((enum pressure_levels)num),
+ event_fds[num]);
+ goto fail;
+ }
+
+ /* initialize epoll events */
+ if (register_epoll_events(psi_epollfd, event_fds[num],
+ (void*)((uint64_t) num)) != 0) {
+ LOGE("PSI registration failed for event %s",
+ psi_level_to_string((enum pressure_levels)num));
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ unregister_psi_events();
+ close(psi_epollfd);
+ psi_epollfd = -1;
+ return -1;
+
+}
+
+/*
+ * calculates the time it takes (in seconds) to go from one load_avg to another.
+ * This is used to determine how much time it would take for memory pressure to
+ * decay down from current PSI load_avg to the set thresholds.
+ */
+static int32_t calc_time_to_decay(struct psi_memory_pressure psi_memory)
+{
+ int32_t avg10_decay = 0, avg60_decay = 0;
+ float avg;
+
+ /* calc exponential time decay time for 10 seconds average */
+ avg = psi_memory.some.avg10 * EXP_1S_10S;
+ /*
+ * we need avg10 to be 0.00 and the least is 0.01 value. Calculate avg10
+ * time to decay until value is less than 0.009F to avoid floating
+ * point approximations.
+ */
+ while (avg > 0.009F) {
+ ++avg10_decay;
+ avg *= EXP_1S_10S;
+ }
+
+ /* calc exponential time decay time for 60 seconds average */
+ avg = psi_memory.some.avg60 * EXP_1S_60S;
+ /* acceptable pressure during 60 sec window is 18 ms (0.03% of 60 sec) */
+ while (avg > AVG60_EXP_THRESHOLD) {
+ ++avg60_decay;
+ avg *= EXP_1S_60S;
+ }
+
+ return (std::max(avg10_decay, avg60_decay));
+}
+
+static bool inline get_atomic_variable_bool(std::atomic<bool> &variable) {
+ return variable.load();
+}
+
+static void inline set_atomic_variable_bool(std::atomic<bool> &variable, bool val) {
+ variable.exchange(val);
+}
+
+/* conditionally sleep with timeout */
+static void wait_for_condition_timed(pthread_cond_t *condition,
+ pthread_mutex_t *mutex, const struct timespec *timeout_ts) {
+
+ if (!condition || !mutex)
+ return;
+
+ pthread_mutex_lock(mutex);
+ if (timeout_ts)
+ pthread_cond_timedwait(condition, mutex, timeout_ts);
+ else
+ /* timeout_ts = NULL means wait indefinitely with no timeout */
+ pthread_cond_wait(condition, mutex);
+ pthread_mutex_unlock(mutex);
+}
+
+/* wait until PSI averages decay to set thresholds */
+static void wait_until_pressure_decay(void)
+{
+ struct psi_memory_pressure pressure;
+ struct timespec timeout;
+ int32_t decay_time;
+ int res;
+
+ LOGD("waiting for pressure to decay...");
+
+ while(1) {
+ res = parse_system_psi_memory(&pressure);
+ if (res < 0) {
+ LOGE("parsing system psi memory failed, errno: %s",
+ strerror(errno));
+ break;
+ }
+
+ /* check if we out of memory pressure thresholds */
+ if (pressure.some.avg10 == AVG10_EXP_THRESHOLD &&
+ pressure.some.avg60 <= AVG60_EXP_THRESHOLD) {
+ LOGD("PSI memory avgs are below the set thresholds");
+ break;
+ }
+
+ decay_time = calc_time_to_decay(pressure);
+
+ /* add another 2 seconds as extra cushion time for decay to settle */
+ decay_time += 2;
+
+ /* sleep until decay.. and break if CANCEL signal is sent */
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+ timeout.tv_sec += decay_time;
+
+ LOGI("sleeping for %d seconds for pressure to decay", decay_time);
+ wait_for_condition_timed(&thread_execution_cond,
+ &thread_execution_mutex, &timeout);
+
+ /* check if we recived new pressure event. If so, we may want to break
+ * from this pressure decay wait loop and return to pthread main loop.
+ */
+ if (get_atomic_variable_bool(cancel_check))
+ return;
+ }
+
+ LOGD("Out of pressure to decay");
+}
+
+/*
+ * Separate thread function that tracks psi pressure decay. This is done by
+ * taking system power wake_lock to make sure our sleep timers aren't freezed.
+ * Once PSI averages are decayed down below set thresholds, we go ahead checking
+ * for any free memory that can be given back to host VM.
+ *
+ * We can use CLOCK_BOOTTIME_ALARM clock type for this thread so that we can
+ * wakeup from suspend when we goto sleep. Bringing the system out from suspend
+ * could be power consuming. So maybe its best to hold wake_lock for certain
+ * perioid, do our job in reclaiming memory back to host and release wake_lock?
+ */
+static void* memtrack_thread_function(void *arg) {
+
+ (void)(arg);
+ struct timespec timeout;
+ uint64_t count = 0;
+ uint64_t mem_chunks_unplugged = 0;
+ uint64_t total_free = 0;
+ struct memory_snapshot mem_snap;
+ unsigned int wake_locked = 0;
+ int retry_count = 0;
+ int res;
+
+ while (1) {
+
+ /* lets wait for notify */
+ wait_for_condition_timed(&thread_execution_cond,
+ &thread_execution_mutex, NULL);
+
+startover:
+ if (!wake_locked && wake_lock_acquire())
+ wake_locked = 1;
+ mem_chunks_unplugged = 0;
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+ timeout.tv_sec += IDLE_WAIT_TIME_S;
+
+ set_atomic_variable_bool(wait_in_progress, true);
+
+ /* wait for IDLE_WAIT_TIME_S seconds of idle in memory pressure */
+ wait_for_condition_timed(&thread_execution_cond,
+ &thread_execution_mutex, &timeout);
+
+ /* check if we received any new memory pressure event during idle time wait */
+ if (get_atomic_variable_bool(cancel_check)) {
+ set_atomic_variable_bool(cancel_check, false);
+ set_atomic_variable_bool(wait_in_progress, false);
+ goto startover;
+ }
+
+ /*
+ * we have passed the IDLE_WAIT_TIME_S seconds of idle in memory pressure.
+ * its safe now to assume that no memory consuming usescases are running.
+ */
+
+ /* wait until pressure is decayed */
+ if (enable_pressure_decay_wait)
+ wait_until_pressure_decay();
+
+ /* check if we received any new memory pressure event during pressure decay wait */
+ if (get_atomic_variable_bool(cancel_check)) {
+ set_atomic_variable_bool(cancel_check, false);
+ set_atomic_variable_bool(wait_in_progress, false);
+ goto startover;
+ }
+
+ /* didn't receive any new memory pressure events, so time to release memory to host VM */
+
+ /* take snapshot of memory */
+ if (system_memory_snapshot(&mem_snap))
+ continue;
+
+ LOGI("MemFree before UNPLUG: %lu KB (Normal: %lu KB, Movable: %lu KB)",
+ mem_snap.sys_memfree_kb, mem_snap.normal_free_kb,
+ mem_snap.movable_free_kb);
+
+ LOGI("Movable inactive_file: %lu KB inactive_anon: %lu",
+ mem_snap.movable_inactive_file_kb,
+ mem_snap.movable_inactive_anon_kb);
+
+ total_free = mem_snap.movable_free_kb;
+
+ /*
+ * inactive_file pages can be reclaimed easily, and
+ * inactive_anon pages can be swapped and reused.
+ */
+ total_free += mem_snap.movable_inactive_file_kb +
+ mem_snap.movable_inactive_anon_kb;
+
+ count = (total_free / SIZE_1KB) / resolution;
+
+ LOGD("count: %lu mem_chunks_plugged %lu resolution %lu MB plugged_memory %lu MB",
+ count, mem_chunks_plugged.load(), resolution,
+ plugged_memory.load());
+
+ count = std::min(count, mem_chunks_plugged.load());
+
+ while(count-- > 0) {
+ res = memory_unplug_request(resolution);
+ if (res) {
+ LOGE("failed to unplug one memory chunk of %lu MB", resolution);
+ continue;
+ }
+ mem_chunks_unplugged++;
+ }
+
+ if (mem_chunks_unplugged)
+ LOGI("unplugged %lu memory chunks. Total memory unplugged: %lu MB",
+ mem_chunks_unplugged, mem_chunks_unplugged * resolution);
+ plugged_memory -= (resolution * mem_chunks_unplugged);
+ mem_chunks_plugged -= mem_chunks_unplugged;
+
+ /* take memory snapshot after unplugging */
+ if (system_memory_snapshot(&mem_snap))
+ continue;
+
+ LOGD("MemFree after UNPLUG: %lu KB (Normal: %lu KB, Movable: %lu KB)",
+ mem_snap.sys_memfree_kb, mem_snap.normal_free_kb,
+ mem_snap.movable_free_kb);
+
+ if (mem_chunks_plugged && retry_count < MAX_UNPLUG_RETRY) {
+ ++retry_count;
+ LOGI("Retrying unplug after %d seconds (retry attempt: %d)",
+ IDLE_WAIT_TIME_S, retry_count);
+ goto startover;
+ }
+ else {
+ if (retry_count == MAX_UNPLUG_RETRY)
+ LOGI("max retry attempt reached for unplugging!!");
+ if (!mem_chunks_plugged)
+ LOGI("unplugged all memory!!");
+ retry_count = 0;
+ }
+
+ /*
+ * TODO: should we keep checking until all plugged
+ * memory is unplugged? goto startover ?
+ */
+
+ if(wake_locked && !wake_unlock())
+ LOGE("failed to wake unlock");
+ else
+ wake_locked = 0;
+
+ /* now lets wait for notify again... */
+
+ }
+
+ LOGE("pthread exiting..");
+ return (void *)NULL;
+}
+
+/* epoll_wait for PSI events */
+static pressure_levels psi_wait_for_pressure(void) {
+
+ pressure_levels pressure_level = PRESSURE_NONE;
+ struct epoll_event events[PRESSURE_EVT_COUNT];
+ int nevents = 0;
+
+ do {
+ if (pressure_level == PRESSURE_NONE) {
+ /* Wait for events with no timeout */
+ nevents = epoll_wait(psi_epollfd, events, PRESSURE_EVT_COUNT, -1);
+ } else {
+ /* Assume that the memory pressure state will stay high for at least 1s.
+ * Within that 1s window, the memory pressure state can go up due to
+ * a different FD becoming available or it can go down when that window expires.
+ * Accordingly, there's no polling: just epoll_wait with a 1s timeout.
+ */
+ nevents = epoll_wait(psi_epollfd, events, PRESSURE_EVT_COUNT, 1000);
+ if (nevents == 0) {
+ pressure_level = PRESSURE_NONE;
+ return pressure_level;
+ }
+ }
+ /* keep waiting if interrupted */
+ } while (nevents == -1 && errno == EINTR);
+
+ if (nevents == -1) {
+ LOGE("epoll_wait failed while waiting for psi events, err: %s",
+ strerror(errno));
+ return PRESSURE_INVALID;
+ }
+ /* reset pressure_level and raise it based on received events */
+ pressure_level = PRESSURE_NONE;
+ for (int i = 0; i < nevents; i++) {
+ if (events[i].events & (EPOLLERR | EPOLLHUP)) {
+ /* should never happen unless psi got disabled in kernel */
+ LOGE("memory pressure events are not available anymore");
+ return PRESSURE_INVALID;
+ }
+
+ /* record the highest reported level */
+ if ((pressure_levels)events[i].data.u32 > pressure_level) {
+ pressure_level = (pressure_levels) events[i].data.u32;
+ }
+ }
+
+ return pressure_level;
+}
+
+static uint64_t get_timespec_delta_us(struct timespec start_tv, struct timespec end_tv) {
+ uint64_t start_us;
+ uint64_t end_us;
+
+ end_us = (end_tv.tv_nsec / NS_PER_US) + (end_tv.tv_sec * US_PER_SEC);
+ start_us = (start_tv.tv_nsec / NS_PER_US) + (start_tv.tv_sec * US_PER_SEC);
+
+ return (end_us - start_us);
+}
+
+/*
+ * PSI mainloop. Wait for PSI events and start adding memory blocks if free
+ * memory is below our set thresholds.
+ */
+static void psi_mainloop(void) {
+ pressure_levels pressure_level = PRESSURE_NONE;
+ struct timespec cur, start, end;
+ struct memory_snapshot mem_snap;
+ int res;
+
+ while (1) {
+ pressure_level = psi_wait_for_pressure();
+ clock_gettime(CLOCK_MONOTONIC, &cur);
+
+ if (pressure_level == PRESSURE_INVALID) {
+ LOGD("error waiting for PSI event");
+ break;
+ }
+
+ if (pressure_level == PRESSURE_NONE) {
+ LOGD("no PSI events received. epoll_wait again...");
+ continue;
+ }
+
+ LOGI("Received pressure event %s . TIME: %09ld.%09ld s",
+ psi_level_to_string(pressure_level),
+ cur.tv_sec, cur.tv_nsec/NS_PER_MS);
+
+ if (system_memory_snapshot(&mem_snap))
+ continue;
+
+ LOGI("MemFree : %lu KB (Normal: %lu KB, Movable: %lu KB)",
+ mem_snap.sys_memfree_kb, mem_snap.normal_free_kb,
+ mem_snap.movable_free_kb);
+
+ /*
+ * if memtrack pthread is waiting on pressure decay, notify to
+ * start over since we received new pressure event.
+ */
+ if (get_atomic_variable_bool(wait_in_progress))
+ set_atomic_variable_bool(cancel_check, true);
+
+ /* notify memtrack thread that we received new memory pressure event */
+ pthread_cond_signal(&thread_execution_cond);
+
+ /*
+ * TODO: add more checks for memory stats such as
+ * reclaimable memory, zram etc.
+ */
+ if ((pressure_level >= PRESSURE_EVT_5) && /* min threshold reached */
+ (plugged_memory.load() < (uint64_t)max_plugged_memory) && /* max memory boundary */
+ (mem_snap.movable_free_kb < ((uint64_t)resolution * SIZE_1KB) / 2)) { /* only if movable < 1/2 * resolution */
+
+ LOGD("plugging-in %lu MB of memory", resolution);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ /*
+ * TODO: push memory_plug_request call onto separate thread
+ * so that the main thread can continue with PSI monitoring
+ */
+ res = memory_plug_request(resolution);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ if (res < 0) {
+ LOGE("memory plugin request for %lu MB failed", resolution);
+ continue;
+ }
+
+ plugged_memory += resolution;
+ mem_chunks_plugged++;
+
+ LOGD("plugged-in %lu MB of memory: SUCCESS", resolution);
+ LOGI("time taken to plug-in %lu MB: %lu us. TIME: %09ld.%09ld s",
+ resolution, get_timespec_delta_us(start, end),
+ end.tv_sec, end.tv_nsec/NS_PER_MS);
+
+ LOGI("Total memory pluged-in so far: %lu MB. Total memory chunks plugged-in: %lu",
+ plugged_memory.load(), mem_chunks_plugged.load());
+
+ if (system_memory_snapshot(&mem_snap))
+ continue;
+
+ LOGD("MemFree after PLUG: %lu KB (Normal: %lu KB, Movable: %lu KB)",
+ mem_snap.sys_memfree_kb, mem_snap.normal_free_kb,
+ mem_snap.movable_free_kb);
+ }
+ }
+}
+
+/*
+ * vmrd could be treated as critical service which adds memory when needed, and
+ * must not be killed by oom-killer or any userspace memory killing daemon.
+ */
+static void set_oom_score_adj_self(int adj)
+{
+ char path[LINE_MAX] = "/proc/self/oom_score_adj";
+ char val[LINE_MAX];
+ int fd;
+
+ snprintf(val, sizeof(val), "%d", adj);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ LOGE("couldn't open %s", path);
+ return;
+ }
+
+ if (write(fd, val, strlen(val)) < 0)
+ LOGE("Couldn't write %s to %s", val, path);
+
+ close(fd);
+}
+
+int main(void) {
+
+ int i;
+ pthread_t memtrack_thread;
+ std::string thresholds;
+ char str[LINE_MAX];
+ struct memory_snapshot mem_snap;
+
+ /* get system PAGE_SIZE */
+ sys_page_size = sysconf(_SC_PAGE_SIZE);
+ if (!sys_page_size) {
+ LOGE("getting system page size failed");
+ return -EINVAL;
+ }
+
+ readfile_buf_size = sys_page_size;
+
+ /* allocate buffer for file reads */
+ readfile_buf = (char *)calloc(readfile_buf_size, sizeof(*readfile_buf));
+ if (!readfile_buf) {
+ LOGE("buffer allocation for file reads failed");
+ return -ENOMEM;
+ }
+
+ if (memory_plug_init()) {
+ LOGE("memory plugin init failed");
+ return -EINVAL;
+ }
+
+ /* Initialize PSI monitors */
+ if (init_and_register_psi_events()) {
+ LOGE("registering to PSI events failed");
+ return -EINVAL;
+ }
+
+ pthread_condattr_init(&thread_execution_cond_attr);
+
+ /* set clock type to CLOCK_MONOTONIC */
+ pthread_condattr_setclock(&thread_execution_cond_attr, CLOCK_MONOTONIC);
+
+ /* initialize condition variable using the attribute */
+ pthread_cond_init(&thread_execution_cond, &thread_execution_cond_attr);
+
+ /* create pthread for downword memory pressure tracking */
+ if (pthread_create(&memtrack_thread, NULL, &memtrack_thread_function, NULL)) {
+ LOGE("error creating pthread for downward mem tracking");
+ return -EINVAL;
+ }
+
+ pthread_condattr_destroy(&thread_execution_cond_attr);
+
+ for (i = 0; i < PRESSURE_EVT_COUNT; i++) {
+ snprintf(str, sizeof(str), "%dMS ", psi_thresholds[i].threshold_ms);
+ thresholds.append(str);
+ }
+ LOGI("PSI init completed! Thresholds: %s", thresholds.c_str());
+
+ set_oom_score_adj_self(TARGET_OOM_SCORE_ADJ);
+
+ get_memory_plugin_resolution(&resolution);
+ get_max_memory_plugin_allowed(&max_plugged_memory);
+ LOGI("Memory plug-in resolution: %lu MB", resolution);
+ LOGI("Maximum memory plug-in allowed: %lu MB", max_plugged_memory);
+
+ if (system_memory_snapshot(&mem_snap))
+ return -EINVAL;
+
+ LOGI("MemFree : %lu KB (Normal: %lu KB, Movable: %lu KB)",
+ mem_snap.sys_memfree_kb, mem_snap.normal_free_kb,
+ mem_snap.movable_free_kb);
+
+ LOGI("Waiting for pressure events...");
+ psi_mainloop();
+
+ /* should not exit */
+ LOGE("Exiting...");
+
+ memory_plug_deinit();
+ return 0;
+}