Remove non-UTF8 characters from string fields.

The string type in the tombstone proto does not support non-UTF8
characters. Therefore, use the oct_encode function to encode the
abort_message field and message field from LogMessage.

Fix up stl includes, add ones that were missing and remove those not
being used.

Add new unit test to verify that the abort and log messages are
sanitized.

Bug: 279496937
Bug: 377940849
Bug: 378185483

Test: All unit tests pass.
Test: Ran pbtombstone on a crash with non-UTF8 characters and verified
Test: it processes properly after this change and fails before the change.
Change-Id: I3554d56caf9fcbfc410b4d554f6c3b4888b37e28
This commit is contained in:
Christopher Ferris 2024-11-15 01:06:55 +00:00
parent 243850ca5f
commit 6a5db68385
5 changed files with 62 additions and 25 deletions

View file

@ -3302,3 +3302,30 @@ TEST_F(CrasherTest, log_with_newline) {
ASSERT_MATCH(result, ":\\s*This line has a newline.");
ASSERT_MATCH(result, ":\\s*This is on the next line.");
}
TEST_F(CrasherTest, log_with_non_utf8) {
StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; });
unique_fd output_fd;
StartIntercept(&output_fd);
FinishCrasher();
AssertDeath(SIGABRT);
int intercept_result;
FinishIntercept(&intercept_result);
ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
std::string result;
ConsumeFd(std::move(output_fd), &result);
// Verify the abort message is sanitized properly.
size_t pos = result.find(
"Abort message: 'Invalid UTF-8: "
"\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'");
EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result;
// Make sure that the log message is sanitized properly too.
EXPECT_TRUE(
result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 "
"and some other data.",
pos + 30) != std::string::npos)
<< "Couldn't find sanitized log message: " << result;
}

View file

@ -29,3 +29,5 @@ constexpr size_t kTagGranuleSize = 16;
// Number of rows and columns to display in an MTE tag dump.
constexpr size_t kNumTagColumns = 16;
constexpr size_t kNumTagRows = 16;
std::string oct_encode(const std::string& data);

View file

@ -34,10 +34,13 @@
#include <sys/sysinfo.h>
#include <time.h>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include <async_safe/log.h>
@ -463,7 +466,8 @@ static void dump_abort_message(Tombstone* tombstone,
}
msg.resize(index);
tombstone->set_abort_message(msg);
// Make sure only UTF8 characters are present since abort_message is a string.
tombstone->set_abort_message(oct_encode(msg));
}
static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) {
@ -771,7 +775,8 @@ static void dump_log_file(Tombstone* tombstone, const char* logger, pid_t pid) {
log_msg->set_tid(log_entry.entry.tid);
log_msg->set_priority(prio);
log_msg->set_tag(tag);
log_msg->set_message(msg);
// Make sure only UTF8 characters are present since message is a string.
log_msg->set_message(oct_encode(msg));
} while ((msg = nl));
}
android_logger_list_free(logger_list);

View file

@ -19,9 +19,9 @@
#include <inttypes.h>
#include <charconv>
#include <algorithm>
#include <functional>
#include <limits>
#include <optional>
#include <set>
#include <string>
#include <unordered_set>
@ -32,6 +32,7 @@
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include "libdebuggerd/utility_host.h"
#include "tombstone.pb.h"
using android::base::StringAppendF;
@ -418,27 +419,6 @@ static void print_memory_maps(CallbackType callback, const Tombstone& tombstone)
}
}
static std::string oct_encode(const std::string& data) {
std::string oct_encoded;
oct_encoded.reserve(data.size());
// N.B. the unsigned here is very important, otherwise e.g. \255 would render as
// \-123 (and overflow our buffer).
for (unsigned char c : data) {
if (isprint(c)) {
oct_encoded += c;
} else {
std::string oct_digits("\\\0\0\0", 4);
// char is encodable in 3 oct digits
static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
oct_digits.resize(ptr - oct_digits.data());
oct_encoded += oct_digits;
}
}
return oct_encoded;
}
static void print_main_thread(CallbackType callback, SymbolizeCallbackType symbolize,
const Tombstone& tombstone, const Thread& thread) {
print_thread_header(callback, tombstone, thread, true);

View file

@ -18,6 +18,8 @@
#include <sys/prctl.h>
#include <charconv>
#include <limits>
#include <string>
#include <android-base/stringprintf.h>
@ -99,3 +101,24 @@ std::string describe_pac_enabled_keys(long value) {
DESCRIBE_FLAG(PR_PAC_APGAKEY);
return describe_end(value, desc);
}
std::string oct_encode(const std::string& data) {
std::string oct_encoded;
oct_encoded.reserve(data.size());
// N.B. the unsigned here is very important, otherwise e.g. \255 would render as
// \-123 (and overflow our buffer).
for (unsigned char c : data) {
if (isprint(c)) {
oct_encoded += c;
} else {
std::string oct_digits("\\\0\0\0", 4);
// char is encodable in 3 oct digits
static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
oct_digits.resize(ptr - oct_digits.data());
oct_encoded += oct_digits;
}
}
return oct_encoded;
}