Merge "Add recoverable GWP-ASan."

This commit is contained in:
Mitch Phillips 2023-02-03 18:35:08 +00:00 committed by Gerrit Code Review
commit 6e0eb996b3
4 changed files with 233 additions and 94 deletions

View file

@ -98,6 +98,7 @@ cc_library_static {
"libbase_headers",
"libdebuggerd_common_headers",
"bionic_libc_platform_headers",
"gwp_asan_headers",
],
whole_static_libs: [

View file

@ -65,6 +65,7 @@
#include "crash_test.h"
#include "debuggerd/handler.h"
#include "gtest/gtest.h"
#include "libdebuggerd/utility.h"
#include "protocol.h"
#include "tombstoned/tombstoned.h"
@ -111,19 +112,6 @@ constexpr char kWaitForDebuggerKey[] = "debug.debuggerd.wait_for_debugger";
ASSERT_MATCH(result, \
R"(#\d\d pc [0-9a-f]+\s+ \S+ (\(offset 0x[0-9a-f]+\) )?\()" frame_name R"(\+)");
// Enable GWP-ASan at the start of this process. GWP-ASan is enabled using
// process sampling, so we need to ensure we force GWP-ASan on.
__attribute__((constructor)) static void enable_gwp_asan() {
android_mallopt_gwp_asan_options_t opts;
// No, we're not an app, but let's turn ourselves on without sampling.
// Technically, if someone's using the *.default_app sysprops, they'll adjust
// our settings, but I don't think this will be common on a device that's
// running debuggerd_tests.
opts.desire = android_mallopt_gwp_asan_options_t::Action::TURN_ON_FOR_APP;
opts.program_name = "";
android_mallopt(M_INITIALIZE_GWP_ASAN, &opts, sizeof(android_mallopt_gwp_asan_options_t));
}
static void tombstoned_intercept(pid_t target_pid, unique_fd* intercept_fd, unique_fd* output_fd,
InterceptStatus* status, DebuggerdDumpType intercept_type) {
intercept_fd->reset(socket_local_client(kTombstonedInterceptSocketName,
@ -469,76 +457,6 @@ static void SetTagCheckingLevelAsync() {
}
#endif
// Number of iterations required to reliably guarantee a GWP-ASan crash.
// GWP-ASan's sample rate is not truly nondeterministic, it initialises a
// thread-local counter at 2*SampleRate, and decrements on each malloc(). Once
// the counter reaches zero, we provide a sampled allocation. Then, double that
// figure to allow for left/right allocation alignment, as this is done randomly
// without bias.
#define GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH (0x20000)
struct GwpAsanTestParameters {
size_t alloc_size;
bool free_before_access;
int access_offset;
std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line.
};
struct GwpAsanCrasherTest : CrasherTest, testing::WithParamInterface<GwpAsanTestParameters> {};
GwpAsanTestParameters gwp_asan_tests[] = {
{/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0, "Use After Free, 0 bytes into a 7-byte allocation"},
{/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 1, "Use After Free, 1 byte into a 7-byte allocation"},
{/* alloc_size */ 7, /* free_before_access */ false, /* access_offset */ 16, "Buffer Overflow, 9 bytes right of a 7-byte allocation"},
{/* alloc_size */ 16, /* free_before_access */ false, /* access_offset */ -1, "Buffer Underflow, 1 byte left of a 16-byte allocation"},
};
INSTANTIATE_TEST_SUITE_P(GwpAsanTests, GwpAsanCrasherTest, testing::ValuesIn(gwp_asan_tests));
TEST_P(GwpAsanCrasherTest, gwp_asan_uaf) {
if (mte_supported()) {
// Skip this test on MTE hardware, as MTE will reliably catch these errors
// instead of GWP-ASan.
GTEST_SKIP() << "Skipped on MTE.";
}
// Skip this test on HWASan, which will reliably catch test errors as well.
SKIP_WITH_HWASAN;
GwpAsanTestParameters params = GetParam();
LogcatCollector logcat_collector;
int intercept_result;
unique_fd output_fd;
StartProcess([&params]() {
for (unsigned i = 0; i < GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH; ++i) {
volatile char* p = reinterpret_cast<volatile char*>(malloc(params.alloc_size));
if (params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
p[params.access_offset] = 42;
if (!params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
}
});
StartIntercept(&output_fd);
FinishCrasher();
AssertDeath(SIGSEGV);
FinishIntercept(&intercept_result);
ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
std::vector<std::string> log_sources(2);
ConsumeFd(std::move(output_fd), &log_sources[0]);
logcat_collector.Collect(&log_sources[1]);
for (const auto& result : log_sources) {
ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
if (params.free_before_access) {
ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)");
}
ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)");
}
}
struct SizeParamCrasherTest : CrasherTest, testing::WithParamInterface<size_t> {};
INSTANTIATE_TEST_SUITE_P(Sizes, SizeParamCrasherTest, testing::Values(0, 16, 131072));
@ -1279,7 +1197,11 @@ TEST_F(CrasherTest, fake_pid) {
static const char* const kDebuggerdSeccompPolicy =
"/system/etc/seccomp_policy/crash_dump." ABI_STRING ".policy";
static pid_t seccomp_fork_impl(void (*prejail)()) {
static void setup_jail(minijail* jail) {
if (!jail) {
LOG(FATAL) << "failed to create minijail";
}
std::string policy;
if (!android::base::ReadFileToString(kDebuggerdSeccompPolicy, &policy)) {
PLOG(FATAL) << "failed to read policy file";
@ -1306,15 +1228,15 @@ static pid_t seccomp_fork_impl(void (*prejail)()) {
PLOG(FATAL) << "failed to seek tmp_fd";
}
ScopedMinijail jail{minijail_new()};
if (!jail) {
LOG(FATAL) << "failed to create minijail";
}
minijail_no_new_privs(jail);
minijail_log_seccomp_filter_failures(jail);
minijail_use_seccomp_filter(jail);
minijail_parse_seccomp_filters_from_fd(jail, tmp_fd.release());
}
minijail_no_new_privs(jail.get());
minijail_log_seccomp_filter_failures(jail.get());
minijail_use_seccomp_filter(jail.get());
minijail_parse_seccomp_filters_from_fd(jail.get(), tmp_fd.release());
static pid_t seccomp_fork_impl(void (*prejail)()) {
ScopedMinijail jail{minijail_new()};
setup_jail(jail.get());
pid_t result = fork();
if (result == -1) {
@ -1628,6 +1550,138 @@ TEST_F(CrasherTest, competing_tracer) {
AssertDeath(SIGABRT);
}
struct GwpAsanTestParameters {
size_t alloc_size;
bool free_before_access;
int access_offset;
std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line.
};
struct GwpAsanCrasherTest
: CrasherTest,
testing::WithParamInterface<
std::tuple<GwpAsanTestParameters, /* recoverable */ bool, /* seccomp */ bool>> {};
GwpAsanTestParameters gwp_asan_tests[] = {
{/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0,
"Use After Free, 0 bytes into a 7-byte allocation"},
{/* alloc_size */ 15, /* free_before_access */ true, /* access_offset */ 1,
"Use After Free, 1 byte into a 15-byte allocation"},
{/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ 4098,
"Buffer Overflow, 2 bytes right of a 4096-byte allocation"},
{/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ -1,
"Buffer Underflow, 1 byte left of a 4096-byte allocation"},
};
INSTANTIATE_TEST_SUITE_P(
GwpAsanTests, GwpAsanCrasherTest,
testing::Combine(testing::ValuesIn(gwp_asan_tests),
/* recoverable */ testing::Bool(),
/* seccomp */ testing::Bool()),
[](const testing::TestParamInfo<
std::tuple<GwpAsanTestParameters, /* recoverable */ bool, /* seccomp */ bool>>& info) {
const GwpAsanTestParameters& params = std::get<0>(info.param);
std::string name = params.free_before_access ? "UseAfterFree" : "Overflow";
name += testing::PrintToString(params.alloc_size);
name += "Alloc";
if (params.access_offset < 0) {
name += "Left";
name += testing::PrintToString(params.access_offset * -1);
} else {
name += "Right";
name += testing::PrintToString(params.access_offset);
}
name += "Bytes";
if (std::get<1>(info.param)) name += "Recoverable";
if (std::get<2>(info.param)) name += "Seccomp";
return name;
});
TEST_P(GwpAsanCrasherTest, run_gwp_asan_test) {
if (mte_supported()) {
// Skip this test on MTE hardware, as MTE will reliably catch these errors
// instead of GWP-ASan.
GTEST_SKIP() << "Skipped on MTE.";
}
// Skip this test on HWASan, which will reliably catch test errors as well.
SKIP_WITH_HWASAN;
GwpAsanTestParameters params = std::get<0>(GetParam());
bool recoverable = std::get<1>(GetParam());
LogcatCollector logcat_collector;
int intercept_result;
unique_fd output_fd;
StartProcess([&recoverable]() {
const char* env[] = {"GWP_ASAN_SAMPLE_RATE=1", "GWP_ASAN_PROCESS_SAMPLING=1",
"GWP_ASAN_MAX_ALLOCS=40000", nullptr, nullptr};
if (recoverable) {
env[3] = "GWP_ASAN_RECOVERABLE=true";
}
std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
test_name = std::regex_replace(test_name, std::regex("run_gwp_asan_test"),
"DISABLED_run_gwp_asan_test");
std::string test_filter = "--gtest_filter=*";
test_filter += test_name;
std::string this_binary = android::base::GetExecutablePath();
const char* args[] = {this_binary.c_str(), "--gtest_also_run_disabled_tests",
test_filter.c_str(), nullptr};
// We check the crash report from a debuggerd handler and from logcat. The
// echo from stdout/stderr of the subprocess trips up atest, because it
// doesn't like that two tests started in a row without the first one
// finishing (even though the second one is in a subprocess).
close(STDOUT_FILENO);
close(STDERR_FILENO);
execve(this_binary.c_str(), const_cast<char**>(args), const_cast<char**>(env));
});
StartIntercept(&output_fd);
FinishCrasher();
if (recoverable) {
AssertDeath(0);
} else {
AssertDeath(SIGSEGV);
}
FinishIntercept(&intercept_result);
ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
std::vector<std::string> log_sources(2);
ConsumeFd(std::move(output_fd), &log_sources[0]);
logcat_collector.Collect(&log_sources[1]);
// seccomp forces the fallback handler, which doesn't print GWP-ASan debugging
// information. Make sure the recovery still works, but the report won't be
// hugely useful, it looks like a regular SEGV.
bool seccomp = std::get<2>(GetParam());
if (!seccomp) {
for (const auto& result : log_sources) {
ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
if (params.free_before_access) {
ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)");
}
ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)");
}
}
}
TEST_P(GwpAsanCrasherTest, DISABLED_run_gwp_asan_test) {
GwpAsanTestParameters params = std::get<0>(GetParam());
bool seccomp = std::get<2>(GetParam());
if (seccomp) {
ScopedMinijail jail{minijail_new()};
setup_jail(jail.get());
minijail_enter(jail.get());
}
// Use 'volatile' to prevent a very clever compiler eliminating the store.
char* volatile p = reinterpret_cast<char* volatile>(malloc(params.alloc_size));
if (params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
p[params.access_offset] = 42;
if (!params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
}
TEST_F(CrasherTest, fdsan_warning_abort_message) {
int intercept_result;
unique_fd output_fd;

View file

@ -565,17 +565,38 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
process_info = g_callbacks.get_process_info();
}
// GWP-ASan catches use-after-free and heap-buffer-overflow by using PROT_NONE
// guard pages, which lead to SEGV. Normally, debuggerd prints a bug report
// and the process terminates, but in some cases, we actually want to print
// the bug report and let the signal handler return, and restart the process.
// In order to do that, we need to disable GWP-ASan's guard pages. The
// following callbacks handle this case.
gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
bool gwp_asan_recoverable = false;
if (signal_number == SIGSEGV && signal_has_si_addr(info) &&
gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery &&
gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report &&
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report &&
gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
gwp_asan_recoverable = true;
}
// If sival_int is ~0, it means that the fallback handler has been called
// once before and this function is being called again to dump the stack
// of a specific thread. It is possible that the prctl call might return 1,
// then return 0 in subsequent calls, so check the sival_int to determine if
// the fallback handler should be called first.
if (si_val == kDebuggerdFallbackSivalUintptrRequestDump ||
prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) {
bool no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1;
if (si_val == kDebuggerdFallbackSivalUintptrRequestDump || no_new_privs) {
// This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely,
// you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
// ANR trace.
debuggerd_fallback_handler(info, ucontext, process_info.abort_msg);
if (no_new_privs && gwp_asan_recoverable) {
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
return;
}
resend_signal(info);
return;
}
@ -649,6 +670,9 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
// If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
// starting to dump right before our death.
pthread_mutex_unlock(&crash_mutex);
} else if (gwp_asan_recoverable) {
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
pthread_mutex_unlock(&crash_mutex);
}
#ifdef __aarch64__
else if (info->si_signo == SIGSEGV &&
@ -727,3 +751,52 @@ void debuggerd_init(debuggerd_callbacks_t* callbacks) {
debuggerd_register_handlers(&action);
}
// When debuggerd's signal handler is the first handler called, it's great at
// handling the recoverable GWP-ASan mode. For apps, sigchain (from libart) is
// always the first signal handler, and so the following function is what
// sigchain must call before processing the signal. This allows for processing
// of a potentially recoverable GWP-ASan crash. If the signal requires GWP-ASan
// recovery, then dump a report (via the regular debuggerd hanndler), and patch
// up the allocator, and allow the process to continue (indicated by returning
// 'true'). If the crash has nothing to do with GWP-ASan, or recovery isn't
// possible, return 'false'.
bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context) {
if (signal_number != SIGSEGV || !signal_has_si_addr(info)) return false;
gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
if (gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery == nullptr ||
gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report == nullptr ||
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report == nullptr ||
!gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
return false;
}
// Only dump a crash report for the first GWP-ASan crash. ActivityManager
// doesn't like it when an app crashes multiple times, and is even more strict
// about an app crashing multiple times in a short time period. While the app
// won't crash fully when we do GWP-ASan recovery, ActivityManager still gets
// the information about the crash through the DropBoxManager service. If an
// app has multiple back-to-back GWP-ASan crashes, this would lead to the app
// being killed, which defeats the purpose of having the recoverable mode. To
// mitigate against this, only generate a debuggerd crash report for the first
// GWP-ASan crash encountered. We still need to do the patching up of the
// allocator though, so do that.
static pthread_mutex_t first_crash_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&first_crash_mutex);
static bool first_crash = true;
if (first_crash) {
// `debuggerd_signal_handler` will call
// `debuggerd_gwp_asan_(pre|post)_crash_report`, so no need to manually call
// them here.
debuggerd_signal_handler(signal_number, info, context);
first_crash = false;
} else {
gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
}
pthread_mutex_unlock(&first_crash_mutex);
return true;
}

View file

@ -46,14 +46,25 @@ struct debugger_process_info {
size_t scudo_ring_buffer_size;
};
// GWP-ASan calbacks to support the recoverable mode. Separate from the
// debuggerd_callbacks_t because these values aren't available at debuggerd_init
// time, and have to be synthesized on request.
typedef struct {
bool (*debuggerd_needs_gwp_asan_recovery)(void* fault_addr);
void (*debuggerd_gwp_asan_pre_crash_report)(void* fault_addr);
void (*debuggerd_gwp_asan_post_crash_report)(void* fault_addr);
} gwp_asan_callbacks_t;
// These callbacks are called in a signal handler, and thus must be async signal safe.
// If null, the callbacks will not be called.
typedef struct {
debugger_process_info (*get_process_info)();
gwp_asan_callbacks_t (*get_gwp_asan_callbacks)();
void (*post_dump)();
} debuggerd_callbacks_t;
void debuggerd_init(debuggerd_callbacks_t* callbacks);
bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context);
// DEBUGGER_ACTION_DUMP_TOMBSTONE and DEBUGGER_ACTION_DUMP_BACKTRACE are both
// triggered via BIONIC_SIGNAL_DEBUGGER. The debugger_action_t is sent via si_value