Merge "Add recoverable GWP-ASan."

2023-02-03 18:35:08 +00:00 · 2023-02-03 18:35:08 +00:00 · 6e0eb996b3
commit 6e0eb996b3
parent 15d7230ed6 18ce54241c
4 changed files with 233 additions and 94 deletions
--- a/debuggerd/Android.bp
+++ b/debuggerd/Android.bp
@ -98,6 +98,7 @@ cc_library_static {
        "libbase_headers",
        "libdebuggerd_common_headers",
        "bionic_libc_platform_headers",
+        "gwp_asan_headers",
    ],

    whole_static_libs: [
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@ -65,6 +65,7 @@

 #include "crash_test.h"
 #include "debuggerd/handler.h"
+#include "gtest/gtest.h"
 #include "libdebuggerd/utility.h"
 #include "protocol.h"
 #include "tombstoned/tombstoned.h"
@ -111,19 +112,6 @@ constexpr char kWaitForDebuggerKey[] = "debug.debuggerd.wait_for_debugger";
  ASSERT_MATCH(result,                             \
               R"(#\d\d pc [0-9a-f]+\s+ \S+ (\(offset 0x[0-9a-f]+\) )?\()" frame_name R"(\+)");

-// Enable GWP-ASan at the start of this process. GWP-ASan is enabled using
-// process sampling, so we need to ensure we force GWP-ASan on.
-__attribute__((constructor)) static void enable_gwp_asan() {
-  android_mallopt_gwp_asan_options_t opts;
-  // No, we're not an app, but let's turn ourselves on without sampling.
-  // Technically, if someone's using the *.default_app sysprops, they'll adjust
-  // our settings, but I don't think this will be common on a device that's
-  // running debuggerd_tests.
-  opts.desire = android_mallopt_gwp_asan_options_t::Action::TURN_ON_FOR_APP;
-  opts.program_name = "";
-  android_mallopt(M_INITIALIZE_GWP_ASAN, &opts, sizeof(android_mallopt_gwp_asan_options_t));
-}
-
 static void tombstoned_intercept(pid_t target_pid, unique_fd* intercept_fd, unique_fd* output_fd,
                                 InterceptStatus* status, DebuggerdDumpType intercept_type) {
  intercept_fd->reset(socket_local_client(kTombstonedInterceptSocketName,
@ -469,76 +457,6 @@ static void SetTagCheckingLevelAsync() {
 }
 #endif

-// Number of iterations required to reliably guarantee a GWP-ASan crash.
-// GWP-ASan's sample rate is not truly nondeterministic, it initialises a
-// thread-local counter at 2*SampleRate, and decrements on each malloc(). Once
-// the counter reaches zero, we provide a sampled allocation. Then, double that
-// figure to allow for left/right allocation alignment, as this is done randomly
-// without bias.
-#define GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH (0x20000)
-
-struct GwpAsanTestParameters {
-  size_t alloc_size;
-  bool free_before_access;
-  int access_offset;
-  std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line.
-};
-
-struct GwpAsanCrasherTest : CrasherTest, testing::WithParamInterface<GwpAsanTestParameters> {};
-
-GwpAsanTestParameters gwp_asan_tests[] = {
-  {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0, "Use After Free, 0 bytes into a 7-byte allocation"},
-  {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 1, "Use After Free, 1 byte into a 7-byte allocation"},
-  {/* alloc_size */ 7, /* free_before_access */ false, /* access_offset */ 16, "Buffer Overflow, 9 bytes right of a 7-byte allocation"},
-  {/* alloc_size */ 16, /* free_before_access */ false, /* access_offset */ -1, "Buffer Underflow, 1 byte left of a 16-byte allocation"},
-};
-
-INSTANTIATE_TEST_SUITE_P(GwpAsanTests, GwpAsanCrasherTest, testing::ValuesIn(gwp_asan_tests));
-
-TEST_P(GwpAsanCrasherTest, gwp_asan_uaf) {
-  if (mte_supported()) {
-    // Skip this test on MTE hardware, as MTE will reliably catch these errors
-    // instead of GWP-ASan.
-    GTEST_SKIP() << "Skipped on MTE.";
-  }
-  // Skip this test on HWASan, which will reliably catch test errors as well.
-  SKIP_WITH_HWASAN;
-
-  GwpAsanTestParameters params = GetParam();
-  LogcatCollector logcat_collector;
-
-  int intercept_result;
-  unique_fd output_fd;
-  StartProcess([&params]() {
-    for (unsigned i = 0; i < GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH; ++i) {
-      volatile char* p = reinterpret_cast<volatile char*>(malloc(params.alloc_size));
-      if (params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
-      p[params.access_offset] = 42;
-      if (!params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
-    }
-  });
-
-  StartIntercept(&output_fd);
-  FinishCrasher();
-  AssertDeath(SIGSEGV);
-  FinishIntercept(&intercept_result);
-
-  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
-
-  std::vector<std::string> log_sources(2);
-  ConsumeFd(std::move(output_fd), &log_sources[0]);
-  logcat_collector.Collect(&log_sources[1]);
-
-  for (const auto& result : log_sources) {
-    ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
-    ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
-    if (params.free_before_access) {
-      ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)");
-    }
-    ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)");
-  }
-}
-
 struct SizeParamCrasherTest : CrasherTest, testing::WithParamInterface<size_t> {};

 INSTANTIATE_TEST_SUITE_P(Sizes, SizeParamCrasherTest, testing::Values(0, 16, 131072));
@ -1279,7 +1197,11 @@ TEST_F(CrasherTest, fake_pid) {
 static const char* const kDebuggerdSeccompPolicy =
    "/system/etc/seccomp_policy/crash_dump." ABI_STRING ".policy";

-static pid_t seccomp_fork_impl(void (*prejail)()) {
+static void setup_jail(minijail* jail) {
+  if (!jail) {
+    LOG(FATAL) << "failed to create minijail";
+  }
+
  std::string policy;
  if (!android::base::ReadFileToString(kDebuggerdSeccompPolicy, &policy)) {
    PLOG(FATAL) << "failed to read policy file";
@ -1306,15 +1228,15 @@ static pid_t seccomp_fork_impl(void (*prejail)()) {
    PLOG(FATAL) << "failed to seek tmp_fd";
  }

-  ScopedMinijail jail{minijail_new()};
-  if (!jail) {
-    LOG(FATAL) << "failed to create minijail";
-  }
+  minijail_no_new_privs(jail);
+  minijail_log_seccomp_filter_failures(jail);
+  minijail_use_seccomp_filter(jail);
+  minijail_parse_seccomp_filters_from_fd(jail, tmp_fd.release());
+}

-  minijail_no_new_privs(jail.get());
-  minijail_log_seccomp_filter_failures(jail.get());
-  minijail_use_seccomp_filter(jail.get());
-  minijail_parse_seccomp_filters_from_fd(jail.get(), tmp_fd.release());
+static pid_t seccomp_fork_impl(void (*prejail)()) {
+  ScopedMinijail jail{minijail_new()};
+  setup_jail(jail.get());

  pid_t result = fork();
  if (result == -1) {
@ -1628,6 +1550,138 @@ TEST_F(CrasherTest, competing_tracer) {
  AssertDeath(SIGABRT);
 }

+struct GwpAsanTestParameters {
+  size_t alloc_size;
+  bool free_before_access;
+  int access_offset;
+  std::string cause_needle;  // Needle to be found in the "Cause: [GWP-ASan]" line.
+};
+
+struct GwpAsanCrasherTest
+    : CrasherTest,
+      testing::WithParamInterface<
+          std::tuple<GwpAsanTestParameters, /* recoverable */ bool, /* seccomp */ bool>> {};
+
+GwpAsanTestParameters gwp_asan_tests[] = {
+    {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0,
+     "Use After Free, 0 bytes into a 7-byte allocation"},
+    {/* alloc_size */ 15, /* free_before_access */ true, /* access_offset */ 1,
+     "Use After Free, 1 byte into a 15-byte allocation"},
+    {/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ 4098,
+     "Buffer Overflow, 2 bytes right of a 4096-byte allocation"},
+    {/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ -1,
+     "Buffer Underflow, 1 byte left of a 4096-byte allocation"},
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    GwpAsanTests, GwpAsanCrasherTest,
+    testing::Combine(testing::ValuesIn(gwp_asan_tests),
+                     /* recoverable */ testing::Bool(),
+                     /* seccomp */ testing::Bool()),
+    [](const testing::TestParamInfo<
+        std::tuple<GwpAsanTestParameters, /* recoverable */ bool, /* seccomp */ bool>>& info) {
+      const GwpAsanTestParameters& params = std::get<0>(info.param);
+      std::string name = params.free_before_access ? "UseAfterFree" : "Overflow";
+      name += testing::PrintToString(params.alloc_size);
+      name += "Alloc";
+      if (params.access_offset < 0) {
+        name += "Left";
+        name += testing::PrintToString(params.access_offset * -1);
+      } else {
+        name += "Right";
+        name += testing::PrintToString(params.access_offset);
+      }
+      name += "Bytes";
+      if (std::get<1>(info.param)) name += "Recoverable";
+      if (std::get<2>(info.param)) name += "Seccomp";
+      return name;
+    });
+
+TEST_P(GwpAsanCrasherTest, run_gwp_asan_test) {
+  if (mte_supported()) {
+    // Skip this test on MTE hardware, as MTE will reliably catch these errors
+    // instead of GWP-ASan.
+    GTEST_SKIP() << "Skipped on MTE.";
+  }
+  // Skip this test on HWASan, which will reliably catch test errors as well.
+  SKIP_WITH_HWASAN;
+
+  GwpAsanTestParameters params = std::get<0>(GetParam());
+  bool recoverable = std::get<1>(GetParam());
+  LogcatCollector logcat_collector;
+
+  int intercept_result;
+  unique_fd output_fd;
+  StartProcess([&recoverable]() {
+    const char* env[] = {"GWP_ASAN_SAMPLE_RATE=1", "GWP_ASAN_PROCESS_SAMPLING=1",
+                         "GWP_ASAN_MAX_ALLOCS=40000", nullptr, nullptr};
+    if (recoverable) {
+      env[3] = "GWP_ASAN_RECOVERABLE=true";
+    }
+    std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
+    test_name = std::regex_replace(test_name, std::regex("run_gwp_asan_test"),
+                                   "DISABLED_run_gwp_asan_test");
+    std::string test_filter = "--gtest_filter=*";
+    test_filter += test_name;
+    std::string this_binary = android::base::GetExecutablePath();
+    const char* args[] = {this_binary.c_str(), "--gtest_also_run_disabled_tests",
+                          test_filter.c_str(), nullptr};
+    // We check the crash report from a debuggerd handler and from logcat. The
+    // echo from stdout/stderr of the subprocess trips up atest, because it
+    // doesn't like that two tests started in a row without the first one
+    // finishing (even though the second one is in a subprocess).
+    close(STDOUT_FILENO);
+    close(STDERR_FILENO);
+    execve(this_binary.c_str(), const_cast<char**>(args), const_cast<char**>(env));
+  });
+
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  if (recoverable) {
+    AssertDeath(0);
+  } else {
+    AssertDeath(SIGSEGV);
+  }
+  FinishIntercept(&intercept_result);
+
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::vector<std::string> log_sources(2);
+  ConsumeFd(std::move(output_fd), &log_sources[0]);
+  logcat_collector.Collect(&log_sources[1]);
+
+  // seccomp forces the fallback handler, which doesn't print GWP-ASan debugging
+  // information. Make sure the recovery still works, but the report won't be
+  // hugely useful, it looks like a regular SEGV.
+  bool seccomp = std::get<2>(GetParam());
+  if (!seccomp) {
+    for (const auto& result : log_sources) {
+      ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
+      ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
+      if (params.free_before_access) {
+        ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)");
+      }
+      ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)");
+    }
+  }
+}
+
+TEST_P(GwpAsanCrasherTest, DISABLED_run_gwp_asan_test) {
+  GwpAsanTestParameters params = std::get<0>(GetParam());
+  bool seccomp = std::get<2>(GetParam());
+  if (seccomp) {
+    ScopedMinijail jail{minijail_new()};
+    setup_jail(jail.get());
+    minijail_enter(jail.get());
+  }
+
+  // Use 'volatile' to prevent a very clever compiler eliminating the store.
+  char* volatile p = reinterpret_cast<char* volatile>(malloc(params.alloc_size));
+  if (params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
+  p[params.access_offset] = 42;
+  if (!params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
+}
+
 TEST_F(CrasherTest, fdsan_warning_abort_message) {
  int intercept_result;
  unique_fd output_fd;
--- a/debuggerd/handler/debuggerd_handler.cpp
+++ b/debuggerd/handler/debuggerd_handler.cpp
@ -565,17 +565,38 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
    process_info = g_callbacks.get_process_info();
  }

+  // GWP-ASan catches use-after-free and heap-buffer-overflow by using PROT_NONE
+  // guard pages, which lead to SEGV. Normally, debuggerd prints a bug report
+  // and the process terminates, but in some cases, we actually want to print
+  // the bug report and let the signal handler return, and restart the process.
+  // In order to do that, we need to disable GWP-ASan's guard pages. The
+  // following callbacks handle this case.
+  gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
+  bool gwp_asan_recoverable = false;
+  if (signal_number == SIGSEGV && signal_has_si_addr(info) &&
+      gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery &&
+      gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report &&
+      gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report &&
+      gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
+    gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
+    gwp_asan_recoverable = true;
+  }
+
  // If sival_int is ~0, it means that the fallback handler has been called
  // once before and this function is being called again to dump the stack
  // of a specific thread. It is possible that the prctl call might return 1,
  // then return 0 in subsequent calls, so check the sival_int to determine if
  // the fallback handler should be called first.
-  if (si_val == kDebuggerdFallbackSivalUintptrRequestDump ||
-      prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) {
+  bool no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1;
+  if (si_val == kDebuggerdFallbackSivalUintptrRequestDump || no_new_privs) {
    // This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely,
    // you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
    // ANR trace.
    debuggerd_fallback_handler(info, ucontext, process_info.abort_msg);
+    if (no_new_privs && gwp_asan_recoverable) {
+      gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
+      return;
+    }
    resend_signal(info);
    return;
  }
@ -649,6 +670,9 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
    // If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
    // starting to dump right before our death.
    pthread_mutex_unlock(&crash_mutex);
+  } else if (gwp_asan_recoverable) {
+    gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
+    pthread_mutex_unlock(&crash_mutex);
  }
 #ifdef __aarch64__
  else if (info->si_signo == SIGSEGV &&
@ -727,3 +751,52 @@ void debuggerd_init(debuggerd_callbacks_t* callbacks) {

  debuggerd_register_handlers(&action);
 }
+
+// When debuggerd's signal handler is the first handler called, it's great at
+// handling the recoverable GWP-ASan mode. For apps, sigchain (from libart) is
+// always the first signal handler, and so the following function is what
+// sigchain must call before processing the signal. This allows for processing
+// of a potentially recoverable GWP-ASan crash. If the signal requires GWP-ASan
+// recovery, then dump a report (via the regular debuggerd hanndler), and patch
+// up the allocator, and allow the process to continue (indicated by returning
+// 'true'). If the crash has nothing to do with GWP-ASan, or recovery isn't
+// possible, return 'false'.
+bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context) {
+  if (signal_number != SIGSEGV || !signal_has_si_addr(info)) return false;
+
+  gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
+  if (gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery == nullptr ||
+      gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report == nullptr ||
+      gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report == nullptr ||
+      !gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
+    return false;
+  }
+
+  // Only dump a crash report for the first GWP-ASan crash. ActivityManager
+  // doesn't like it when an app crashes multiple times, and is even more strict
+  // about an app crashing multiple times in a short time period. While the app
+  // won't crash fully when we do GWP-ASan recovery, ActivityManager still gets
+  // the information about the crash through the DropBoxManager service. If an
+  // app has multiple back-to-back GWP-ASan crashes, this would lead to the app
+  // being killed, which defeats the purpose of having the recoverable mode. To
+  // mitigate against this, only generate a debuggerd crash report for the first
+  // GWP-ASan crash encountered. We still need to do the patching up of the
+  // allocator though, so do that.
+  static pthread_mutex_t first_crash_mutex = PTHREAD_MUTEX_INITIALIZER;
+  pthread_mutex_lock(&first_crash_mutex);
+  static bool first_crash = true;
+
+  if (first_crash) {
+    // `debuggerd_signal_handler` will call
+    // `debuggerd_gwp_asan_(pre|post)_crash_report`, so no need to manually call
+    // them here.
+    debuggerd_signal_handler(signal_number, info, context);
+    first_crash = false;
+  } else {
+    gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
+    gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
+  }
+
+  pthread_mutex_unlock(&first_crash_mutex);
+  return true;
+}
--- a/debuggerd/include/debuggerd/handler.h
+++ b/debuggerd/include/debuggerd/handler.h
@ -46,14 +46,25 @@ struct debugger_process_info {
  size_t scudo_ring_buffer_size;
 };

+// GWP-ASan calbacks to support the recoverable mode. Separate from the
+// debuggerd_callbacks_t because these values aren't available at debuggerd_init
+// time, and have to be synthesized on request.
+typedef struct {
+  bool (*debuggerd_needs_gwp_asan_recovery)(void* fault_addr);
+  void (*debuggerd_gwp_asan_pre_crash_report)(void* fault_addr);
+  void (*debuggerd_gwp_asan_post_crash_report)(void* fault_addr);
+} gwp_asan_callbacks_t;
+
 // These callbacks are called in a signal handler, and thus must be async signal safe.
 // If null, the callbacks will not be called.
 typedef struct {
  debugger_process_info (*get_process_info)();
+  gwp_asan_callbacks_t (*get_gwp_asan_callbacks)();
  void (*post_dump)();
 } debuggerd_callbacks_t;

 void debuggerd_init(debuggerd_callbacks_t* callbacks);
+bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context);

 // DEBUGGER_ACTION_DUMP_TOMBSTONE and DEBUGGER_ACTION_DUMP_BACKTRACE are both
 // triggered via BIONIC_SIGNAL_DEBUGGER. The debugger_action_t is sent via si_value