Merge "lmkd: rate-limit and cleanup failed kill reports" am: 83193c6c78

am: b6d606d06a

Change-Id: I79304f6860cf8da2cac0096e0117f111a47f33b2
This commit is contained in:
Suren Baghdasaryan 2018-09-06 12:56:18 -07:00 committed by android-build-merger
commit 18d0260406
2 changed files with 47 additions and 38 deletions

View file

@ -97,6 +97,8 @@
#define min(a, b) (((a) < (b)) ? (a) : (b)) #define min(a, b) (((a) < (b)) ? (a) : (b))
#define FAIL_REPORT_RLIMIT_MS 1000
/* default to old in-kernel interface if no memory pressure events */ /* default to old in-kernel interface if no memory pressure events */
static bool use_inkernel_interface = true; static bool use_inkernel_interface = true;
static bool has_inkernel_module; static bool has_inkernel_module;
@ -1090,8 +1092,7 @@ static struct proc *proc_get_heaviest(int oomadj) {
} }
/* Kill one process specified by procp. Returns the size of the process killed */ /* Kill one process specified by procp. Returns the size of the process killed */
static int kill_one_process(struct proc* procp, int min_score_adj, static int kill_one_process(struct proc* procp) {
enum vmpressure_level level) {
int pid = procp->pid; int pid = procp->pid;
uid_t uid = procp->uid; uid_t uid = procp->uid;
char *taskname; char *taskname;
@ -1125,11 +1126,8 @@ static int kill_one_process(struct proc* procp, int min_score_adj,
/* CAP_KILL required */ /* CAP_KILL required */
r = kill(pid, SIGKILL); r = kill(pid, SIGKILL);
ALOGI( ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB",
"Killing '%s' (%d), uid %d, adj %d\n" taskname, pid, uid, procp->oomadj, tasksize * page_k);
" to free %ldkB because system is under %s memory pressure (min_oom_adj=%d)\n",
taskname, pid, uid, procp->oomadj, tasksize * page_k,
level_name[level], min_score_adj);
pid_remove(pid); pid_remove(pid);
TRACE_KILL_END(); TRACE_KILL_END();
@ -1156,8 +1154,7 @@ static int kill_one_process(struct proc* procp, int min_score_adj,
* If pages_to_free is set to 0 only one process will be killed. * If pages_to_free is set to 0 only one process will be killed.
* Returns the size of the killed processes. * Returns the size of the killed processes.
*/ */
static int find_and_kill_processes(enum vmpressure_level level, static int find_and_kill_processes(int min_score_adj, int pages_to_free) {
int min_score_adj, int pages_to_free) {
int i; int i;
int killed_size; int killed_size;
int pages_freed = 0; int pages_freed = 0;
@ -1176,7 +1173,7 @@ static int find_and_kill_processes(enum vmpressure_level level,
if (!procp) if (!procp)
break; break;
killed_size = kill_one_process(procp, min_score_adj, level); killed_size = kill_one_process(procp);
if (killed_size >= 0) { if (killed_size >= 0) {
#ifdef LMKD_LOG_STATS #ifdef LMKD_LOG_STATS
if (enable_stats_log && !lmk_state_change_start) { if (enable_stats_log && !lmk_state_change_start) {
@ -1275,6 +1272,7 @@ static void mp_event_common(int data, uint32_t events __unused) {
enum vmpressure_level lvl; enum vmpressure_level lvl;
union meminfo mi; union meminfo mi;
union zoneinfo zi; union zoneinfo zi;
struct timespec curr_tm;
static struct timespec last_kill_tm; static struct timespec last_kill_tm;
static unsigned long kill_skip_count = 0; static unsigned long kill_skip_count = 0;
enum vmpressure_level level = (enum vmpressure_level)data; enum vmpressure_level level = (enum vmpressure_level)data;
@ -1305,14 +1303,12 @@ static void mp_event_common(int data, uint32_t events __unused) {
} }
} }
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time");
return;
}
if (kill_timeout_ms) { if (kill_timeout_ms) {
struct timespec curr_tm;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time");
return;
}
if (get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) { if (get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) {
kill_skip_count++; kill_skip_count++;
return; return;
@ -1418,7 +1414,7 @@ static void mp_event_common(int data, uint32_t events __unused) {
do_kill: do_kill:
if (low_ram_device) { if (low_ram_device) {
/* For Go devices kill only one task */ /* For Go devices kill only one task */
if (find_and_kill_processes(level, level_oomadj[level], 0) == 0) { if (find_and_kill_processes(level_oomadj[level], 0) == 0) {
if (debug_process_killing) { if (debug_process_killing) {
ALOGI("Nothing to kill"); ALOGI("Nothing to kill");
} }
@ -1427,6 +1423,8 @@ do_kill:
} }
} else { } else {
int pages_freed; int pages_freed;
static struct timespec last_report_tm;
static unsigned long report_skip_count = 0;
if (!use_minfree_levels) { if (!use_minfree_levels) {
/* If pressure level is less than critical and enough free swap then ignore */ /* If pressure level is less than critical and enough free swap then ignore */
@ -1454,30 +1452,41 @@ do_kill:
min_score_adj = level_oomadj[level]; min_score_adj = level_oomadj[level];
} }
pages_freed = find_and_kill_processes(level, min_score_adj, pages_to_free); pages_freed = find_and_kill_processes(min_score_adj, pages_to_free);
if (use_minfree_levels) { if (pages_freed == 0) {
ALOGI("Killing because cache %ldkB is below " /* Rate limit kill reports when nothing was reclaimed */
"limit %ldkB for oom_adj %d\n" if (get_time_diff_ms(&last_report_tm, &curr_tm) < FAIL_REPORT_RLIMIT_MS) {
" Free memory is %ldkB %s reserved", report_skip_count++;
other_file * page_k, minfree * page_k, min_score_adj,
other_free * page_k, other_free >= 0 ? "above" : "below");
}
if (pages_freed < pages_to_free) {
ALOGI("Unable to free enough memory (pages to free=%d, pages freed=%d)",
pages_to_free, pages_freed);
} else {
ALOGI("Reclaimed enough memory (pages to free=%d, pages freed=%d)",
pages_to_free, pages_freed);
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &last_kill_tm) != 0) {
ALOGE("Failed to get current time");
return; return;
} }
} }
if (pages_freed > 0) {
meminfo_log(&mi); /* Log meminfo whenever we kill or when report rate limit allows */
meminfo_log(&mi);
if (pages_freed >= pages_to_free) {
/* Reset kill time only if reclaimed enough memory */
last_kill_tm = curr_tm;
} }
if (use_minfree_levels) {
ALOGI("Killing to reclaim %ldkB, reclaimed %ldkB, cache(%ldkB) and "
"free(%" PRId64 "kB)-reserved(%" PRId64 "kB) below min(%ldkB) for oom_adj %d",
pages_to_free * page_k, pages_freed * page_k,
other_file * page_k, mi.field.nr_free_pages * page_k,
zi.field.totalreserve_pages * page_k,
minfree * page_k, min_score_adj);
} else {
ALOGI("Killing to reclaim %ldkB, reclaimed %ldkB at oom_adj %d",
pages_to_free * page_k, pages_freed * page_k, min_score_adj);
}
if (report_skip_count > 0) {
ALOGI("Suppressed %lu failed kill reports", report_skip_count);
report_skip_count = 0;
}
last_report_tm = curr_tm;
} }
} }

View file

@ -39,7 +39,7 @@ using namespace android::base;
#define LMKDTEST_RESPAWN_FLAG "LMKDTEST_RESPAWN" #define LMKDTEST_RESPAWN_FLAG "LMKDTEST_RESPAWN"
#define LMKD_LOGCAT_MARKER "lowmemorykiller" #define LMKD_LOGCAT_MARKER "lowmemorykiller"
#define LMKD_KILL_MARKER_TEMPLATE LMKD_LOGCAT_MARKER ": Killing '%s'" #define LMKD_KILL_MARKER_TEMPLATE LMKD_LOGCAT_MARKER ": Kill '%s'"
#define OOM_MARKER "Out of memory" #define OOM_MARKER "Out of memory"
#define OOM_KILL_MARKER "Killed process" #define OOM_KILL_MARKER "Killed process"
#define MIN_LOG_SIZE 100 #define MIN_LOG_SIZE 100