From 6a5db6838514d92d9d2361810f9b96d5039e2801 Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Fri, 15 Nov 2024 01:06:55 +0000 Subject: [PATCH] Remove non-UTF8 characters from string fields. The string type in the tombstone proto does not support non-UTF8 characters. Therefore, use the oct_encode function to encode the abort_message field and message field from LogMessage. Fix up stl includes, add ones that were missing and remove those not being used. Add new unit test to verify that the abort and log messages are sanitized. Bug: 279496937 Bug: 377940849 Bug: 378185483 Test: All unit tests pass. Test: Ran pbtombstone on a crash with non-UTF8 characters and verified Test: it processes properly after this change and fails before the change. Change-Id: I3554d56caf9fcbfc410b4d554f6c3b4888b37e28 --- debuggerd/debuggerd_test.cpp | 27 +++++++++++++++++++ .../include/libdebuggerd/utility_host.h | 2 ++ debuggerd/libdebuggerd/tombstone_proto.cpp | 9 +++++-- .../libdebuggerd/tombstone_proto_to_text.cpp | 26 +++--------------- debuggerd/libdebuggerd/utility_host.cpp | 23 ++++++++++++++++ 5 files changed, 62 insertions(+), 25 deletions(-) diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp index 13c8d70b5..5bdc94646 100644 --- a/debuggerd/debuggerd_test.cpp +++ b/debuggerd/debuggerd_test.cpp @@ -3302,3 +3302,30 @@ TEST_F(CrasherTest, log_with_newline) { ASSERT_MATCH(result, ":\\s*This line has a newline."); ASSERT_MATCH(result, ":\\s*This is on the next line."); } + +TEST_F(CrasherTest, log_with_non_utf8) { + StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; }); + + unique_fd output_fd; + StartIntercept(&output_fd); + FinishCrasher(); + AssertDeath(SIGABRT); + int intercept_result; + FinishIntercept(&intercept_result); + ASSERT_EQ(1, intercept_result) << "tombstoned reported failure"; + + std::string result; + ConsumeFd(std::move(output_fd), &result); + // Verify the abort message is sanitized properly. + size_t pos = result.find( + "Abort message: 'Invalid UTF-8: " + "\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'"); + EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result; + + // Make sure that the log message is sanitized properly too. + EXPECT_TRUE( + result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 " + "and some other data.", + pos + 30) != std::string::npos) + << "Couldn't find sanitized log message: " << result; +} diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h index 43fb8bdc1..df22e017c 100644 --- a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h +++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h @@ -29,3 +29,5 @@ constexpr size_t kTagGranuleSize = 16; // Number of rows and columns to display in an MTE tag dump. constexpr size_t kNumTagColumns = 16; constexpr size_t kNumTagRows = 16; + +std::string oct_encode(const std::string& data); diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp index d59358c65..ef303f065 100644 --- a/debuggerd/libdebuggerd/tombstone_proto.cpp +++ b/debuggerd/libdebuggerd/tombstone_proto.cpp @@ -34,10 +34,13 @@ #include #include +#include #include #include #include #include +#include +#include #include @@ -463,7 +466,8 @@ static void dump_abort_message(Tombstone* tombstone, } msg.resize(index); - tombstone->set_abort_message(msg); + // Make sure only UTF8 characters are present since abort_message is a string. + tombstone->set_abort_message(oct_encode(msg)); } static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) { @@ -771,7 +775,8 @@ static void dump_log_file(Tombstone* tombstone, const char* logger, pid_t pid) { log_msg->set_tid(log_entry.entry.tid); log_msg->set_priority(prio); log_msg->set_tag(tag); - log_msg->set_message(msg); + // Make sure only UTF8 characters are present since message is a string. + log_msg->set_message(oct_encode(msg)); } while ((msg = nl)); } android_logger_list_free(logger_list); diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp index fedafc0c3..e885c5a73 100644 --- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp +++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp @@ -19,9 +19,9 @@ #include -#include +#include #include -#include +#include #include #include #include @@ -32,6 +32,7 @@ #include #include +#include "libdebuggerd/utility_host.h" #include "tombstone.pb.h" using android::base::StringAppendF; @@ -418,27 +419,6 @@ static void print_memory_maps(CallbackType callback, const Tombstone& tombstone) } } -static std::string oct_encode(const std::string& data) { - std::string oct_encoded; - oct_encoded.reserve(data.size()); - - // N.B. the unsigned here is very important, otherwise e.g. \255 would render as - // \-123 (and overflow our buffer). - for (unsigned char c : data) { - if (isprint(c)) { - oct_encoded += c; - } else { - std::string oct_digits("\\\0\0\0", 4); - // char is encodable in 3 oct digits - static_assert(std::numeric_limits::max() <= 8 * 8 * 8); - auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8); - oct_digits.resize(ptr - oct_digits.data()); - oct_encoded += oct_digits; - } - } - return oct_encoded; -} - static void print_main_thread(CallbackType callback, SymbolizeCallbackType symbolize, const Tombstone& tombstone, const Thread& thread) { print_thread_header(callback, tombstone, thread, true); diff --git a/debuggerd/libdebuggerd/utility_host.cpp b/debuggerd/libdebuggerd/utility_host.cpp index 72a560cfe..4efa03c8c 100644 --- a/debuggerd/libdebuggerd/utility_host.cpp +++ b/debuggerd/libdebuggerd/utility_host.cpp @@ -18,6 +18,8 @@ #include +#include +#include #include #include @@ -99,3 +101,24 @@ std::string describe_pac_enabled_keys(long value) { DESCRIBE_FLAG(PR_PAC_APGAKEY); return describe_end(value, desc); } + +std::string oct_encode(const std::string& data) { + std::string oct_encoded; + oct_encoded.reserve(data.size()); + + // N.B. the unsigned here is very important, otherwise e.g. \255 would render as + // \-123 (and overflow our buffer). + for (unsigned char c : data) { + if (isprint(c)) { + oct_encoded += c; + } else { + std::string oct_digits("\\\0\0\0", 4); + // char is encodable in 3 oct digits + static_assert(std::numeric_limits::max() <= 8 * 8 * 8); + auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8); + oct_digits.resize(ptr - oct_digits.data()); + oct_encoded += oct_digits; + } + } + return oct_encoded; +}