From 18ce54241cb726d9783be7385c734f6ea6d3051b Mon Sep 17 00:00:00 2001 From: Mitch Phillips Date: Thu, 19 Jan 2023 14:23:49 -0800 Subject: [PATCH] Add recoverable GWP-ASan. Recoverable GWP-ASan is a mode landed upstream in https://reviews.llvm.org/D140173. For more information about why/what it is, see https://android-review.git.corp.google.com/c/platform/bionic/+/2394588. This patch makes debuggerd call the required libc callbacks for GWP-ASan to recover from the memory corruption. It also adds the functionality that libart/sigchain eventually ends up calling, which dumps a GWP-ASan report for the first error encountered. Test: Build the platform, run sanitizer-status in recoverable mode, asserting that it doesn't crash but we get a debuggerd report. Bug: 247012630 Change-Id: I27212f7250844c20a8fd1e961417cdb4e5bd3626 --- debuggerd/Android.bp | 1 + debuggerd/debuggerd_test.cpp | 238 +++++++++++++++--------- debuggerd/handler/debuggerd_handler.cpp | 77 +++++++- debuggerd/include/debuggerd/handler.h | 11 ++ 4 files changed, 233 insertions(+), 94 deletions(-) diff --git a/debuggerd/Android.bp b/debuggerd/Android.bp index 15b5813b1..5da1b4005 100644 --- a/debuggerd/Android.bp +++ b/debuggerd/Android.bp @@ -98,6 +98,7 @@ cc_library_static { "libbase_headers", "libdebuggerd_common_headers", "bionic_libc_platform_headers", + "gwp_asan_headers", ], whole_static_libs: [ diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp index 9c1b1361d..f904d487a 100644 --- a/debuggerd/debuggerd_test.cpp +++ b/debuggerd/debuggerd_test.cpp @@ -64,6 +64,7 @@ #include "crash_test.h" #include "debuggerd/handler.h" +#include "gtest/gtest.h" #include "libdebuggerd/utility.h" #include "protocol.h" #include "tombstoned/tombstoned.h" @@ -110,19 +111,6 @@ constexpr char kWaitForDebuggerKey[] = "debug.debuggerd.wait_for_debugger"; ASSERT_MATCH(result, \ R"(#\d\d pc [0-9a-f]+\s+ \S+ (\(offset 0x[0-9a-f]+\) )?\()" frame_name R"(\+)"); -// Enable GWP-ASan at the start of this process. GWP-ASan is enabled using -// process sampling, so we need to ensure we force GWP-ASan on. -__attribute__((constructor)) static void enable_gwp_asan() { - android_mallopt_gwp_asan_options_t opts; - // No, we're not an app, but let's turn ourselves on without sampling. - // Technically, if someone's using the *.default_app sysprops, they'll adjust - // our settings, but I don't think this will be common on a device that's - // running debuggerd_tests. - opts.desire = android_mallopt_gwp_asan_options_t::Action::TURN_ON_FOR_APP; - opts.program_name = ""; - android_mallopt(M_INITIALIZE_GWP_ASAN, &opts, sizeof(android_mallopt_gwp_asan_options_t)); -} - static void tombstoned_intercept(pid_t target_pid, unique_fd* intercept_fd, unique_fd* output_fd, InterceptStatus* status, DebuggerdDumpType intercept_type) { intercept_fd->reset(socket_local_client(kTombstonedInterceptSocketName, @@ -468,76 +456,6 @@ static void SetTagCheckingLevelAsync() { } #endif -// Number of iterations required to reliably guarantee a GWP-ASan crash. -// GWP-ASan's sample rate is not truly nondeterministic, it initialises a -// thread-local counter at 2*SampleRate, and decrements on each malloc(). Once -// the counter reaches zero, we provide a sampled allocation. Then, double that -// figure to allow for left/right allocation alignment, as this is done randomly -// without bias. -#define GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH (0x20000) - -struct GwpAsanTestParameters { - size_t alloc_size; - bool free_before_access; - int access_offset; - std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line. -}; - -struct GwpAsanCrasherTest : CrasherTest, testing::WithParamInterface {}; - -GwpAsanTestParameters gwp_asan_tests[] = { - {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0, "Use After Free, 0 bytes into a 7-byte allocation"}, - {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 1, "Use After Free, 1 byte into a 7-byte allocation"}, - {/* alloc_size */ 7, /* free_before_access */ false, /* access_offset */ 16, "Buffer Overflow, 9 bytes right of a 7-byte allocation"}, - {/* alloc_size */ 16, /* free_before_access */ false, /* access_offset */ -1, "Buffer Underflow, 1 byte left of a 16-byte allocation"}, -}; - -INSTANTIATE_TEST_SUITE_P(GwpAsanTests, GwpAsanCrasherTest, testing::ValuesIn(gwp_asan_tests)); - -TEST_P(GwpAsanCrasherTest, gwp_asan_uaf) { - if (mte_supported()) { - // Skip this test on MTE hardware, as MTE will reliably catch these errors - // instead of GWP-ASan. - GTEST_SKIP() << "Skipped on MTE."; - } - // Skip this test on HWASan, which will reliably catch test errors as well. - SKIP_WITH_HWASAN; - - GwpAsanTestParameters params = GetParam(); - LogcatCollector logcat_collector; - - int intercept_result; - unique_fd output_fd; - StartProcess([¶ms]() { - for (unsigned i = 0; i < GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH; ++i) { - volatile char* p = reinterpret_cast(malloc(params.alloc_size)); - if (params.free_before_access) free(static_cast(const_cast(p))); - p[params.access_offset] = 42; - if (!params.free_before_access) free(static_cast(const_cast(p))); - } - }); - - StartIntercept(&output_fd); - FinishCrasher(); - AssertDeath(SIGSEGV); - FinishIntercept(&intercept_result); - - ASSERT_EQ(1, intercept_result) << "tombstoned reported failure"; - - std::vector log_sources(2); - ConsumeFd(std::move(output_fd), &log_sources[0]); - logcat_collector.Collect(&log_sources[1]); - - for (const auto& result : log_sources) { - ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))"); - ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle); - if (params.free_before_access) { - ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)"); - } - ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)"); - } -} - struct SizeParamCrasherTest : CrasherTest, testing::WithParamInterface {}; INSTANTIATE_TEST_SUITE_P(Sizes, SizeParamCrasherTest, testing::Values(0, 16, 131072)); @@ -1278,7 +1196,11 @@ TEST_F(CrasherTest, fake_pid) { static const char* const kDebuggerdSeccompPolicy = "/system/etc/seccomp_policy/crash_dump." ABI_STRING ".policy"; -static pid_t seccomp_fork_impl(void (*prejail)()) { +static void setup_jail(minijail* jail) { + if (!jail) { + LOG(FATAL) << "failed to create minijail"; + } + std::string policy; if (!android::base::ReadFileToString(kDebuggerdSeccompPolicy, &policy)) { PLOG(FATAL) << "failed to read policy file"; @@ -1305,15 +1227,15 @@ static pid_t seccomp_fork_impl(void (*prejail)()) { PLOG(FATAL) << "failed to seek tmp_fd"; } - ScopedMinijail jail{minijail_new()}; - if (!jail) { - LOG(FATAL) << "failed to create minijail"; - } + minijail_no_new_privs(jail); + minijail_log_seccomp_filter_failures(jail); + minijail_use_seccomp_filter(jail); + minijail_parse_seccomp_filters_from_fd(jail, tmp_fd.release()); +} - minijail_no_new_privs(jail.get()); - minijail_log_seccomp_filter_failures(jail.get()); - minijail_use_seccomp_filter(jail.get()); - minijail_parse_seccomp_filters_from_fd(jail.get(), tmp_fd.release()); +static pid_t seccomp_fork_impl(void (*prejail)()) { + ScopedMinijail jail{minijail_new()}; + setup_jail(jail.get()); pid_t result = fork(); if (result == -1) { @@ -1627,6 +1549,138 @@ TEST_F(CrasherTest, competing_tracer) { AssertDeath(SIGABRT); } +struct GwpAsanTestParameters { + size_t alloc_size; + bool free_before_access; + int access_offset; + std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line. +}; + +struct GwpAsanCrasherTest + : CrasherTest, + testing::WithParamInterface< + std::tuple> {}; + +GwpAsanTestParameters gwp_asan_tests[] = { + {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0, + "Use After Free, 0 bytes into a 7-byte allocation"}, + {/* alloc_size */ 15, /* free_before_access */ true, /* access_offset */ 1, + "Use After Free, 1 byte into a 15-byte allocation"}, + {/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ 4098, + "Buffer Overflow, 2 bytes right of a 4096-byte allocation"}, + {/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ -1, + "Buffer Underflow, 1 byte left of a 4096-byte allocation"}, +}; + +INSTANTIATE_TEST_SUITE_P( + GwpAsanTests, GwpAsanCrasherTest, + testing::Combine(testing::ValuesIn(gwp_asan_tests), + /* recoverable */ testing::Bool(), + /* seccomp */ testing::Bool()), + [](const testing::TestParamInfo< + std::tuple>& info) { + const GwpAsanTestParameters& params = std::get<0>(info.param); + std::string name = params.free_before_access ? "UseAfterFree" : "Overflow"; + name += testing::PrintToString(params.alloc_size); + name += "Alloc"; + if (params.access_offset < 0) { + name += "Left"; + name += testing::PrintToString(params.access_offset * -1); + } else { + name += "Right"; + name += testing::PrintToString(params.access_offset); + } + name += "Bytes"; + if (std::get<1>(info.param)) name += "Recoverable"; + if (std::get<2>(info.param)) name += "Seccomp"; + return name; + }); + +TEST_P(GwpAsanCrasherTest, run_gwp_asan_test) { + if (mte_supported()) { + // Skip this test on MTE hardware, as MTE will reliably catch these errors + // instead of GWP-ASan. + GTEST_SKIP() << "Skipped on MTE."; + } + // Skip this test on HWASan, which will reliably catch test errors as well. + SKIP_WITH_HWASAN; + + GwpAsanTestParameters params = std::get<0>(GetParam()); + bool recoverable = std::get<1>(GetParam()); + LogcatCollector logcat_collector; + + int intercept_result; + unique_fd output_fd; + StartProcess([&recoverable]() { + const char* env[] = {"GWP_ASAN_SAMPLE_RATE=1", "GWP_ASAN_PROCESS_SAMPLING=1", + "GWP_ASAN_MAX_ALLOCS=40000", nullptr, nullptr}; + if (recoverable) { + env[3] = "GWP_ASAN_RECOVERABLE=true"; + } + std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + test_name = std::regex_replace(test_name, std::regex("run_gwp_asan_test"), + "DISABLED_run_gwp_asan_test"); + std::string test_filter = "--gtest_filter=*"; + test_filter += test_name; + std::string this_binary = android::base::GetExecutablePath(); + const char* args[] = {this_binary.c_str(), "--gtest_also_run_disabled_tests", + test_filter.c_str(), nullptr}; + // We check the crash report from a debuggerd handler and from logcat. The + // echo from stdout/stderr of the subprocess trips up atest, because it + // doesn't like that two tests started in a row without the first one + // finishing (even though the second one is in a subprocess). + close(STDOUT_FILENO); + close(STDERR_FILENO); + execve(this_binary.c_str(), const_cast(args), const_cast(env)); + }); + + StartIntercept(&output_fd); + FinishCrasher(); + if (recoverable) { + AssertDeath(0); + } else { + AssertDeath(SIGSEGV); + } + FinishIntercept(&intercept_result); + + ASSERT_EQ(1, intercept_result) << "tombstoned reported failure"; + + std::vector log_sources(2); + ConsumeFd(std::move(output_fd), &log_sources[0]); + logcat_collector.Collect(&log_sources[1]); + + // seccomp forces the fallback handler, which doesn't print GWP-ASan debugging + // information. Make sure the recovery still works, but the report won't be + // hugely useful, it looks like a regular SEGV. + bool seccomp = std::get<2>(GetParam()); + if (!seccomp) { + for (const auto& result : log_sources) { + ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))"); + ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle); + if (params.free_before_access) { + ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)"); + } + ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)"); + } + } +} + +TEST_P(GwpAsanCrasherTest, DISABLED_run_gwp_asan_test) { + GwpAsanTestParameters params = std::get<0>(GetParam()); + bool seccomp = std::get<2>(GetParam()); + if (seccomp) { + ScopedMinijail jail{minijail_new()}; + setup_jail(jail.get()); + minijail_enter(jail.get()); + } + + // Use 'volatile' to prevent a very clever compiler eliminating the store. + char* volatile p = reinterpret_cast(malloc(params.alloc_size)); + if (params.free_before_access) free(static_cast(const_cast(p))); + p[params.access_offset] = 42; + if (!params.free_before_access) free(static_cast(const_cast(p))); +} + TEST_F(CrasherTest, fdsan_warning_abort_message) { int intercept_result; unique_fd output_fd; diff --git a/debuggerd/handler/debuggerd_handler.cpp b/debuggerd/handler/debuggerd_handler.cpp index 7120d735b..d2bf0d705 100644 --- a/debuggerd/handler/debuggerd_handler.cpp +++ b/debuggerd/handler/debuggerd_handler.cpp @@ -565,17 +565,38 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c process_info = g_callbacks.get_process_info(); } + // GWP-ASan catches use-after-free and heap-buffer-overflow by using PROT_NONE + // guard pages, which lead to SEGV. Normally, debuggerd prints a bug report + // and the process terminates, but in some cases, we actually want to print + // the bug report and let the signal handler return, and restart the process. + // In order to do that, we need to disable GWP-ASan's guard pages. The + // following callbacks handle this case. + gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks(); + bool gwp_asan_recoverable = false; + if (signal_number == SIGSEGV && signal_has_si_addr(info) && + gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery && + gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report && + gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report && + gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) { + gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr); + gwp_asan_recoverable = true; + } + // If sival_int is ~0, it means that the fallback handler has been called // once before and this function is being called again to dump the stack // of a specific thread. It is possible that the prctl call might return 1, // then return 0 in subsequent calls, so check the sival_int to determine if // the fallback handler should be called first. - if (si_val == kDebuggerdFallbackSivalUintptrRequestDump || - prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) { + bool no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1; + if (si_val == kDebuggerdFallbackSivalUintptrRequestDump || no_new_privs) { // This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely, // you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing // ANR trace. debuggerd_fallback_handler(info, ucontext, process_info.abort_msg); + if (no_new_privs && gwp_asan_recoverable) { + gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr); + return; + } resend_signal(info); return; } @@ -649,6 +670,9 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c // If the signal is fatal, don't unlock the mutex to prevent other crashing threads from // starting to dump right before our death. pthread_mutex_unlock(&crash_mutex); + } else if (gwp_asan_recoverable) { + gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr); + pthread_mutex_unlock(&crash_mutex); } #ifdef __aarch64__ else if (info->si_signo == SIGSEGV && @@ -727,3 +751,52 @@ void debuggerd_init(debuggerd_callbacks_t* callbacks) { debuggerd_register_handlers(&action); } + +// When debuggerd's signal handler is the first handler called, it's great at +// handling the recoverable GWP-ASan mode. For apps, sigchain (from libart) is +// always the first signal handler, and so the following function is what +// sigchain must call before processing the signal. This allows for processing +// of a potentially recoverable GWP-ASan crash. If the signal requires GWP-ASan +// recovery, then dump a report (via the regular debuggerd hanndler), and patch +// up the allocator, and allow the process to continue (indicated by returning +// 'true'). If the crash has nothing to do with GWP-ASan, or recovery isn't +// possible, return 'false'. +bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context) { + if (signal_number != SIGSEGV || !signal_has_si_addr(info)) return false; + + gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks(); + if (gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery == nullptr || + gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report == nullptr || + gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report == nullptr || + !gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) { + return false; + } + + // Only dump a crash report for the first GWP-ASan crash. ActivityManager + // doesn't like it when an app crashes multiple times, and is even more strict + // about an app crashing multiple times in a short time period. While the app + // won't crash fully when we do GWP-ASan recovery, ActivityManager still gets + // the information about the crash through the DropBoxManager service. If an + // app has multiple back-to-back GWP-ASan crashes, this would lead to the app + // being killed, which defeats the purpose of having the recoverable mode. To + // mitigate against this, only generate a debuggerd crash report for the first + // GWP-ASan crash encountered. We still need to do the patching up of the + // allocator though, so do that. + static pthread_mutex_t first_crash_mutex = PTHREAD_MUTEX_INITIALIZER; + pthread_mutex_lock(&first_crash_mutex); + static bool first_crash = true; + + if (first_crash) { + // `debuggerd_signal_handler` will call + // `debuggerd_gwp_asan_(pre|post)_crash_report`, so no need to manually call + // them here. + debuggerd_signal_handler(signal_number, info, context); + first_crash = false; + } else { + gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr); + gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr); + } + + pthread_mutex_unlock(&first_crash_mutex); + return true; +} diff --git a/debuggerd/include/debuggerd/handler.h b/debuggerd/include/debuggerd/handler.h index 1f9f4e243..de88be5d9 100644 --- a/debuggerd/include/debuggerd/handler.h +++ b/debuggerd/include/debuggerd/handler.h @@ -46,14 +46,25 @@ struct debugger_process_info { size_t scudo_ring_buffer_size; }; +// GWP-ASan calbacks to support the recoverable mode. Separate from the +// debuggerd_callbacks_t because these values aren't available at debuggerd_init +// time, and have to be synthesized on request. +typedef struct { + bool (*debuggerd_needs_gwp_asan_recovery)(void* fault_addr); + void (*debuggerd_gwp_asan_pre_crash_report)(void* fault_addr); + void (*debuggerd_gwp_asan_post_crash_report)(void* fault_addr); +} gwp_asan_callbacks_t; + // These callbacks are called in a signal handler, and thus must be async signal safe. // If null, the callbacks will not be called. typedef struct { debugger_process_info (*get_process_info)(); + gwp_asan_callbacks_t (*get_gwp_asan_callbacks)(); void (*post_dump)(); } debuggerd_callbacks_t; void debuggerd_init(debuggerd_callbacks_t* callbacks); +bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context); // DEBUGGER_ACTION_DUMP_TOMBSTONE and DEBUGGER_ACTION_DUMP_BACKTRACE are both // triggered via BIONIC_SIGNAL_DEBUGGER. The debugger_action_t is sent via si_value