Merge "llkd: add live-lock daemon"
This commit is contained in:
commit
656bc24630
10 changed files with 1748 additions and 0 deletions
|
|
@ -130,6 +130,7 @@
|
||||||
#define AID_INCIDENTD 1067 /* incidentd daemon */
|
#define AID_INCIDENTD 1067 /* incidentd daemon */
|
||||||
#define AID_SECURE_ELEMENT 1068 /* secure element subsystem */
|
#define AID_SECURE_ELEMENT 1068 /* secure element subsystem */
|
||||||
#define AID_LMKD 1069 /* low memory killer daemon */
|
#define AID_LMKD 1069 /* low memory killer daemon */
|
||||||
|
#define AID_LLKD 1070 /* live lock daemon */
|
||||||
/* Changes to this file must be made in AOSP, *not* in internal branches. */
|
/* Changes to this file must be made in AOSP, *not* in internal branches. */
|
||||||
|
|
||||||
#define AID_SHELL 2000 /* adb and debug shell user */
|
#define AID_SHELL 2000 /* adb and debug shell user */
|
||||||
|
|
|
||||||
42
llkd/Android.bp
Normal file
42
llkd/Android.bp
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
cc_library_headers {
|
||||||
|
name: "llkd_headers",
|
||||||
|
|
||||||
|
export_include_dirs: ["include"],
|
||||||
|
}
|
||||||
|
|
||||||
|
cc_library_static {
|
||||||
|
name: "libllkd",
|
||||||
|
|
||||||
|
srcs: [
|
||||||
|
"libllkd.cpp",
|
||||||
|
],
|
||||||
|
|
||||||
|
shared_libs: [
|
||||||
|
"libbase",
|
||||||
|
"libcutils",
|
||||||
|
"liblog",
|
||||||
|
],
|
||||||
|
|
||||||
|
export_include_dirs: ["include"],
|
||||||
|
|
||||||
|
cflags: ["-Werror"],
|
||||||
|
}
|
||||||
|
|
||||||
|
cc_binary {
|
||||||
|
name: "llkd",
|
||||||
|
|
||||||
|
srcs: [
|
||||||
|
"llkd.cpp",
|
||||||
|
],
|
||||||
|
shared_libs: [
|
||||||
|
"libbase",
|
||||||
|
"libcutils",
|
||||||
|
"liblog",
|
||||||
|
],
|
||||||
|
static_libs: [
|
||||||
|
"libllkd",
|
||||||
|
],
|
||||||
|
cflags: ["-Werror"],
|
||||||
|
|
||||||
|
init_rc: ["llkd.rc"],
|
||||||
|
}
|
||||||
2
llkd/OWNERS
Normal file
2
llkd/OWNERS
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
salyzyn@google.com
|
||||||
|
surenb@google.com
|
||||||
116
llkd/README.md
Normal file
116
llkd/README.md
Normal file
|
|
@ -0,0 +1,116 @@
|
||||||
|
Android Live-LocK Daemon
|
||||||
|
========================
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
Android Live-LocK Daemon (llkd) is used to catch kernel deadlocks and mitigate.
|
||||||
|
|
||||||
|
Code is structured to allow integration into another service as either as part
|
||||||
|
of the main loop, or spun off as a thread should that be necessary. A default
|
||||||
|
standalone implementation is provided by llkd component.
|
||||||
|
|
||||||
|
The 'C' interface from libllkd component is thus:
|
||||||
|
|
||||||
|
#include "llkd.h"
|
||||||
|
bool llkInit(const char* threadname) /* return true if enabled */
|
||||||
|
unsigned llkCheckMillseconds(void) /* ms to sleep for next check */
|
||||||
|
|
||||||
|
If a threadname is provided, a thread will be automatically spawned, otherwise
|
||||||
|
caller must call llkCheckMilliseconds in its main loop. Function will return
|
||||||
|
the period of time before the next expected call to this handler.
|
||||||
|
|
||||||
|
Operations
|
||||||
|
----------
|
||||||
|
|
||||||
|
If a thread is in D or Z state with no forward progress for longer than
|
||||||
|
ro.llk.timeout_ms, or ro.llk.[D|Z].timeout_ms, kill the process or parent
|
||||||
|
process respectively. If another scan shows the same process continues to
|
||||||
|
exist, then have a confirmed live-lock condition and need to panic. Panic
|
||||||
|
the kernel in a manner to provide the greatest bugreporting details as to the
|
||||||
|
condition. Add a alarm self watchdog should llkd ever get locked up that is
|
||||||
|
double the expected time to flow through the mainloop. Sampling is every
|
||||||
|
ro.llk_sample_ms.
|
||||||
|
|
||||||
|
Default will not monitor init, or [kthreadd] and all that [kthreadd] spawns.
|
||||||
|
This reduces the effectiveness of llkd by limiting its coverage. If there is
|
||||||
|
value in covering [kthreadd] spawned threads, the requirement will be that
|
||||||
|
the drivers not remain in a persistent 'D' state, or that they have mechanisms
|
||||||
|
to recover the thread should it be killed externally (this is good driver
|
||||||
|
coding hygiene, a common request to add such to publicly reviewed kernel.org
|
||||||
|
maintained drivers). For instance use wait_event_interruptible() instead of
|
||||||
|
wait_event(). The blacklists can be adjusted accordingly if these
|
||||||
|
conditions are met to cover kernel components.
|
||||||
|
|
||||||
|
An accompanying gTest set have been added, and will setup a persistent D or Z
|
||||||
|
process, with and without forward progress, but not in a live-lock state
|
||||||
|
because that would require a buggy kernel, or a module or kernel modification
|
||||||
|
to stimulate. The test will check that llkd will mitigate first by killing
|
||||||
|
the appropriate process. D state is setup by vfork() waiting for exec() in
|
||||||
|
child process. Z state is setup by fork() and an un-waited for child process.
|
||||||
|
Should be noted that both of these conditions should never happen on Android
|
||||||
|
on purpose, and llkd effectively sweeps up processes that create these
|
||||||
|
conditions. If the test can, it will reconfigure llkd to expedite the test
|
||||||
|
duration by adjusting the ro.llk.* Android properties. Tests run the D state
|
||||||
|
with some scheduling progress to ensure that ABA checking prevents false
|
||||||
|
triggers.
|
||||||
|
|
||||||
|
Android Properties
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Android Properties llkd respond to (<prop>_ms parms are in milliseconds):
|
||||||
|
|
||||||
|
#### ro.config.low_ram
|
||||||
|
default false, if true do not sysrq t (dump all threads).
|
||||||
|
|
||||||
|
#### ro.llk.enable
|
||||||
|
default false, allow live-lock daemon to be enabled.
|
||||||
|
|
||||||
|
#### ro.khungtask.enable
|
||||||
|
default false, allow [khungtask] daemon to be enabled.
|
||||||
|
|
||||||
|
#### ro.llk.mlockall
|
||||||
|
default false, enable call to mlockall().
|
||||||
|
|
||||||
|
#### ro.khungtask.timeout
|
||||||
|
default value 12 minutes, [khungtask] maximum timelimit.
|
||||||
|
|
||||||
|
#### ro.llk.timeout_ms
|
||||||
|
default 10 minutes, D or Z maximum timelimit, double this value and it sets
|
||||||
|
the alarm watchdog for llkd.
|
||||||
|
|
||||||
|
#### ro.llk.D.timeout_ms
|
||||||
|
default ro.llk.timeout_ms, D maximum timelimit.
|
||||||
|
|
||||||
|
#### ro.llk.Z.timeout_ms
|
||||||
|
default ro.llk.timeout_ms, Z maximum timelimit.
|
||||||
|
|
||||||
|
#### ro.llk.check_ms
|
||||||
|
default 2 minutes samples of threads for D or Z.
|
||||||
|
|
||||||
|
#### ro.llk.blacklist.process
|
||||||
|
default 0,1,2 (kernel, init and [kthreadd]) plus process names
|
||||||
|
init,[kthreadd],[khungtaskd],lmkd,lmkd.llkd,llkd,watchdogd,
|
||||||
|
[watchdogd],[watchdogd/0],...,[watchdogd/<get_nprocs-1>].
|
||||||
|
|
||||||
|
#### ro.llk.blacklist.parent
|
||||||
|
default 0,2 (kernel and [kthreadd]).
|
||||||
|
|
||||||
|
#### ro.llk.blacklist.uid
|
||||||
|
default <empty>, comma separated list of uid numbers or names.
|
||||||
|
|
||||||
|
Architectural Concerns
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
- Figure out how to communicate the kernel panic better to bootstat canonical
|
||||||
|
boot reason determination. This may require an alteration to bootstat, or
|
||||||
|
some logging from llkd. Would like to see boot reason to be
|
||||||
|
watchdog,livelock as a minimum requirement. Or more specifically would want
|
||||||
|
watchdog,livelock,device or watchdog,livelock,zombie be reported.
|
||||||
|
Currently reports panic,sysrq (user requested panic) or panic depending on
|
||||||
|
system support of pstore.
|
||||||
|
- Create kernel module and associated gTest to actually test panic.
|
||||||
|
- Create gTest to test out blacklist (ro.llk.blacklist.<properties> generally
|
||||||
|
not be inputs). Could require more test-only interfaces to libllkd.
|
||||||
|
- Speed up gTest using something else than ro.llk.<properties>, which should
|
||||||
|
not be inputs.
|
||||||
76
llkd/include/llkd.h
Normal file
76
llkd/include/llkd.h
Normal file
|
|
@ -0,0 +1,76 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2018 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _LLKD_H_
|
||||||
|
#define _LLKD_H_
|
||||||
|
|
||||||
|
#ifndef LOG_TAG
|
||||||
|
#define LOG_TAG "livelock"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
|
||||||
|
__BEGIN_DECLS
|
||||||
|
|
||||||
|
bool llkInit(const char* threadname); /* threadname NULL, not spawned */
|
||||||
|
unsigned llkCheckMilliseconds(void);
|
||||||
|
|
||||||
|
/* clang-format off */
|
||||||
|
#define LLK_ENABLE_PROPERTY "ro.llk.enable"
|
||||||
|
#define LLK_ENABLE_DEFAULT false
|
||||||
|
#define KHT_ENABLE_PROPERTY "ro.khungtask.enable"
|
||||||
|
#define LLK_MLOCKALL_PROPERTY "ro.llk.mlockall"
|
||||||
|
#define LLK_MLOCKALL_DEFAULT true
|
||||||
|
#define LLK_TIMEOUT_MS_PROPERTY "ro.llk.timeout_ms"
|
||||||
|
#define KHT_TIMEOUT_PROPERTY "ro.khungtask.timeout"
|
||||||
|
#define LLK_D_TIMEOUT_MS_PROPERTY "ro.llk.D.timeout_ms"
|
||||||
|
#define LLK_Z_TIMEOUT_MS_PROPERTY "ro.llk.Z.timeout_ms"
|
||||||
|
#define LLK_CHECK_MS_PROPERTY "ro.llk.check_ms"
|
||||||
|
/* LLK_CHECK_MS_DEFAULT = actual timeout_ms / LLK_CHECKS_PER_TIMEOUT_DEFAULT */
|
||||||
|
#define LLK_CHECKS_PER_TIMEOUT_DEFAULT 5
|
||||||
|
#define LLK_BLACKLIST_PROCESS_PROPERTY "ro.llk.blacklist.process"
|
||||||
|
#define LLK_BLACKLIST_PROCESS_DEFAULT \
|
||||||
|
"0,1,2,init,[kthreadd],[khungtaskd],lmkd,lmkd.llkd,llkd,watchdogd,[watchdogd],[watchdogd/0]"
|
||||||
|
#define LLK_BLACKLIST_PARENT_PROPERTY "ro.llk.blacklist.parent"
|
||||||
|
#define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd]"
|
||||||
|
#define LLK_BLACKLIST_UID_PROPERTY "ro.llk.blacklist.uid"
|
||||||
|
#define LLK_BLACKLIST_UID_DEFAULT ""
|
||||||
|
/* clang-format on */
|
||||||
|
|
||||||
|
__END_DECLS
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C++" { /* In case this included wrapped with __BEGIN_DECLS */
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
__BEGIN_DECLS
|
||||||
|
/* C++ code allowed to not specify threadname argument for this C linkage */
|
||||||
|
bool llkInit(const char* threadname = nullptr);
|
||||||
|
__END_DECLS
|
||||||
|
std::chrono::milliseconds llkCheck(bool checkRunning = false);
|
||||||
|
|
||||||
|
/* clang-format off */
|
||||||
|
#define LLK_TIMEOUT_MS_DEFAULT std::chrono::duration_cast<milliseconds>(std::chrono::minutes(10))
|
||||||
|
#define LLK_TIMEOUT_MS_MINIMUM std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::seconds(10))
|
||||||
|
#define LLK_CHECK_MS_MINIMUM std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::seconds(1))
|
||||||
|
/* clang-format on */
|
||||||
|
|
||||||
|
} /* extern "C++" */
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif /* _LLKD_H_ */
|
||||||
1159
llkd/libllkd.cpp
Normal file
1159
llkd/libllkd.cpp
Normal file
File diff suppressed because it is too large
Load diff
50
llkd/llkd.cpp
Normal file
50
llkd/llkd.cpp
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2018 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "llkd.h"
|
||||||
|
|
||||||
|
#include <sched.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
#include <android-base/logging.h>
|
||||||
|
|
||||||
|
using namespace std::chrono;
|
||||||
|
|
||||||
|
int main(int, char**) {
|
||||||
|
LOG(INFO) << "started";
|
||||||
|
|
||||||
|
bool enabled = llkInit();
|
||||||
|
|
||||||
|
// Would like this policy to be automatic as part of libllkd,
|
||||||
|
// but that would be presumptuous and bad side-effect.
|
||||||
|
struct sched_param param;
|
||||||
|
memset(¶m, 0, sizeof(param));
|
||||||
|
sched_setscheduler(0, SCHED_BATCH, ¶m);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
if (enabled) {
|
||||||
|
::usleep(duration_cast<microseconds>(llkCheck()).count());
|
||||||
|
} else {
|
||||||
|
::pause();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// NOTREACHED
|
||||||
|
|
||||||
|
LOG(INFO) << "exiting";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
18
llkd/llkd.rc
Normal file
18
llkd/llkd.rc
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Configure [khungtaskd]
|
||||||
|
on property:ro.khungtask.enable=true
|
||||||
|
write /proc/sys/kernel/hung_task_timeout_secs ${ro.khungtask.timeout:-720}
|
||||||
|
write /proc/sys/kernel/hung_task_warnings 65535
|
||||||
|
write /proc/sys/kernel/hung_task_check_count 65535
|
||||||
|
write /proc/sys/kernel/hung_task_panic 1
|
||||||
|
|
||||||
|
on property:ro.llk.enable=true
|
||||||
|
start llkd
|
||||||
|
|
||||||
|
service llkd /system/bin/llkd
|
||||||
|
class late_start
|
||||||
|
disabled
|
||||||
|
user llkd
|
||||||
|
group llkd readproc
|
||||||
|
capabilities KILL IPC_LOCK
|
||||||
|
file /proc/sysrq-trigger w
|
||||||
|
writepid /dev/cpuset/system-background/tasks
|
||||||
41
llkd/tests/Android.bp
Normal file
41
llkd/tests/Android.bp
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
// Copyright (C) 2018 The Android Open Source Project
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
cc_test {
|
||||||
|
name: "llkd_unit_test",
|
||||||
|
|
||||||
|
shared_libs: [
|
||||||
|
"libbase",
|
||||||
|
"liblog",
|
||||||
|
],
|
||||||
|
header_libs: [
|
||||||
|
"llkd_headers",
|
||||||
|
],
|
||||||
|
|
||||||
|
target: {
|
||||||
|
android: {
|
||||||
|
srcs: [
|
||||||
|
"llkd_test.cpp",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
cflags: [
|
||||||
|
"-Wall",
|
||||||
|
"-Wextra",
|
||||||
|
"-Werror",
|
||||||
|
],
|
||||||
|
|
||||||
|
compile_multilib: "first",
|
||||||
|
}
|
||||||
243
llkd/tests/llkd_test.cpp
Normal file
243
llkd/tests/llkd_test.cpp
Normal file
|
|
@ -0,0 +1,243 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2018 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <android-base/properties.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <log/log_time.h> // for MS_PER_SEC and US_PER_SEC
|
||||||
|
|
||||||
|
#include "llkd.h"
|
||||||
|
|
||||||
|
using namespace std::chrono;
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
milliseconds GetUintProperty(const std::string& key, milliseconds def) {
|
||||||
|
return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
|
||||||
|
static_cast<uint64_t>(def.max().count())));
|
||||||
|
}
|
||||||
|
|
||||||
|
seconds GetUintProperty(const std::string& key, seconds def) {
|
||||||
|
return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
|
||||||
|
static_cast<uint64_t>(def.max().count())));
|
||||||
|
}
|
||||||
|
|
||||||
|
// GTEST_LOG_(WARNING) output is fugly, this has much less noise
|
||||||
|
// ToDo: look into fixing googletest to produce output that matches style of
|
||||||
|
// all the other status messages, and can switch off __line__ and
|
||||||
|
// __function__ noise
|
||||||
|
#define GTEST_LOG_WARNING std::cerr << "[ WARNING ] "
|
||||||
|
#define GTEST_LOG_INFO std::cerr << "[ INFO ] "
|
||||||
|
|
||||||
|
// Properties is _not_ a high performance ABI!
|
||||||
|
void rest() {
|
||||||
|
usleep(200000);
|
||||||
|
}
|
||||||
|
|
||||||
|
void execute(const char* command) {
|
||||||
|
if (getuid() || system(command)) {
|
||||||
|
system((std::string("su root ") + command).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seconds llkdSleepPeriod(char state) {
|
||||||
|
auto default_enable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, LLK_ENABLE_DEFAULT);
|
||||||
|
if (android::base::GetProperty(LLK_ENABLE_PROPERTY, "nothing") == "nothing") {
|
||||||
|
GTEST_LOG_INFO << LLK_ENABLE_PROPERTY " defaults to " << (default_enable ? "true" : "false")
|
||||||
|
<< "\n";
|
||||||
|
}
|
||||||
|
// Hail Mary hope is unconfigured.
|
||||||
|
if ((GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, LLK_TIMEOUT_MS_DEFAULT) !=
|
||||||
|
duration_cast<milliseconds>(120s)) ||
|
||||||
|
(GetUintProperty(LLK_CHECK_MS_PROPERTY,
|
||||||
|
LLK_TIMEOUT_MS_DEFAULT / LLK_CHECKS_PER_TIMEOUT_DEFAULT) !=
|
||||||
|
duration_cast<milliseconds>(10s))) {
|
||||||
|
execute("stop llkd");
|
||||||
|
rest();
|
||||||
|
std::string setprop("setprop ");
|
||||||
|
execute((setprop + LLK_TIMEOUT_MS_PROPERTY + " 120000").c_str());
|
||||||
|
rest();
|
||||||
|
execute((setprop + KHT_TIMEOUT_PROPERTY + " 130").c_str());
|
||||||
|
rest();
|
||||||
|
execute((setprop + LLK_CHECK_MS_PROPERTY + " 10000").c_str());
|
||||||
|
rest();
|
||||||
|
execute((setprop + LLK_ENABLE_PROPERTY + " true").c_str());
|
||||||
|
rest();
|
||||||
|
}
|
||||||
|
default_enable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, false);
|
||||||
|
if (default_enable) {
|
||||||
|
execute("start llkd");
|
||||||
|
rest();
|
||||||
|
GTEST_LOG_INFO << "llkd enabled\n";
|
||||||
|
} else {
|
||||||
|
GTEST_LOG_WARNING << "llkd disabled\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
/* KISS follows llk_init() */
|
||||||
|
milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
|
||||||
|
seconds khtTimeout = duration_cast<seconds>(
|
||||||
|
llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) / LLK_CHECKS_PER_TIMEOUT_DEFAULT);
|
||||||
|
khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
|
||||||
|
llkTimeoutMs =
|
||||||
|
khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
|
||||||
|
llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
|
||||||
|
if (llkTimeoutMs < LLK_TIMEOUT_MS_MINIMUM) {
|
||||||
|
llkTimeoutMs = LLK_TIMEOUT_MS_MINIMUM;
|
||||||
|
}
|
||||||
|
milliseconds llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
|
||||||
|
auto timeout = GetUintProperty(
|
||||||
|
(state == 'Z') ? LLK_Z_TIMEOUT_MS_PROPERTY : LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
|
||||||
|
if (timeout < LLK_TIMEOUT_MS_MINIMUM) {
|
||||||
|
timeout = LLK_TIMEOUT_MS_MINIMUM;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (llkCheckMs > timeout) {
|
||||||
|
llkCheckMs = timeout;
|
||||||
|
}
|
||||||
|
llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
|
||||||
|
timeout += llkCheckMs;
|
||||||
|
auto sec = duration_cast<seconds>(timeout);
|
||||||
|
if (sec == 0s) {
|
||||||
|
++sec;
|
||||||
|
} else if (sec > 59s) {
|
||||||
|
GTEST_LOG_WARNING << "llkd is configured for about " << duration_cast<minutes>(sec).count()
|
||||||
|
<< " minutes to react\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// 33% margin for the test to naturally timeout waiting for llkd to respond
|
||||||
|
return (sec * 4 + 2s) / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void waitForPid(pid_t child_pid) {
|
||||||
|
int wstatus;
|
||||||
|
ASSERT_LE(0, waitpid(child_pid, &wstatus, 0));
|
||||||
|
EXPECT_FALSE(WIFEXITED(wstatus)) << "[ INFO ] exit=" << WEXITSTATUS(wstatus);
|
||||||
|
ASSERT_TRUE(WIFSIGNALED(wstatus));
|
||||||
|
ASSERT_EQ(WTERMSIG(wstatus), SIGKILL);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// The tests that use this helper are to simulate processes stuck in 'D'
|
||||||
|
// state that are experiencing forward scheduled progress. As such the
|
||||||
|
// expectation is that llkd will _not_ perform any mitigations. The sleepfor
|
||||||
|
// argument helps us set the amount of forward scheduler progress.
|
||||||
|
static void llkd_driver_ABA(const microseconds sleepfor) {
|
||||||
|
const auto period = llkdSleepPeriod('D');
|
||||||
|
if (period <= sleepfor) {
|
||||||
|
GTEST_LOG_WARNING << "llkd configuration too short for "
|
||||||
|
<< duration_cast<milliseconds>(sleepfor).count() << "ms work cycle\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto child_pid = fork();
|
||||||
|
ASSERT_LE(0, child_pid);
|
||||||
|
int wstatus;
|
||||||
|
if (!child_pid) {
|
||||||
|
auto ratio = period / sleepfor;
|
||||||
|
ASSERT_LT(0, ratio);
|
||||||
|
// vfork() parent is uninterruptable D state waiting for child to exec()
|
||||||
|
while (--ratio > 0) {
|
||||||
|
auto driver_pid = vfork();
|
||||||
|
ASSERT_LE(0, driver_pid);
|
||||||
|
if (driver_pid) { // parent
|
||||||
|
waitpid(driver_pid, &wstatus, 0);
|
||||||
|
if (!WIFEXITED(wstatus)) {
|
||||||
|
exit(42);
|
||||||
|
}
|
||||||
|
if (WEXITSTATUS(wstatus) != 42) {
|
||||||
|
exit(42);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
usleep(sleepfor.count());
|
||||||
|
exit(42);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
ASSERT_LE(0, waitpid(child_pid, &wstatus, 0));
|
||||||
|
EXPECT_TRUE(WIFEXITED(wstatus));
|
||||||
|
if (WIFEXITED(wstatus)) {
|
||||||
|
EXPECT_EQ(0, WEXITSTATUS(wstatus));
|
||||||
|
}
|
||||||
|
ASSERT_FALSE(WIFSIGNALED(wstatus)) << "[ INFO ] signo=" << WTERMSIG(wstatus);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(llkd, driver_ABA_fast) {
|
||||||
|
llkd_driver_ABA(5ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(llkd, driver_ABA_slow) {
|
||||||
|
llkd_driver_ABA(1s);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(llkd, driver_ABA_glacial) {
|
||||||
|
llkd_driver_ABA(1min);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Following tests must be last in this file to capture possible errant
|
||||||
|
// kernel_panic mitigation failure.
|
||||||
|
|
||||||
|
// The following tests simulate processes stick in 'Z' or 'D' state with
|
||||||
|
// no forward scheduling progress, but interruptible. As such the expectation
|
||||||
|
// is that llkd will perform kill mitigation and not progress to kernel_panic.
|
||||||
|
|
||||||
|
TEST(llkd, zombie) {
|
||||||
|
const auto period = llkdSleepPeriod('Z');
|
||||||
|
|
||||||
|
/* Create a Persistent Zombie Process */
|
||||||
|
pid_t child_pid = fork();
|
||||||
|
ASSERT_LE(0, child_pid);
|
||||||
|
if (!child_pid) {
|
||||||
|
auto zombie_pid = fork();
|
||||||
|
ASSERT_LE(0, zombie_pid);
|
||||||
|
if (!zombie_pid) {
|
||||||
|
sleep(1);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
sleep(period.count());
|
||||||
|
exit(42);
|
||||||
|
}
|
||||||
|
|
||||||
|
waitForPid(child_pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(llkd, driver) {
|
||||||
|
const auto period = llkdSleepPeriod('D');
|
||||||
|
|
||||||
|
/* Create a Persistent Device Process */
|
||||||
|
auto child_pid = fork();
|
||||||
|
ASSERT_LE(0, child_pid);
|
||||||
|
if (!child_pid) {
|
||||||
|
// vfork() parent is uninterruptable D state waiting for child to exec()
|
||||||
|
auto driver_pid = vfork();
|
||||||
|
ASSERT_LE(0, driver_pid);
|
||||||
|
sleep(period.count());
|
||||||
|
exit(driver_pid ? 42 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
waitForPid(child_pid);
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue