From 747c0e6216f2d805ac0ac91c3d5296e11124165b Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Wed, 20 Sep 2017 08:37:46 -0700 Subject: [PATCH 1/2] bootstat: better validation of battery level (shutdown,battery) Replace simple strtoull with loop that ensures no leading zeros. Restrict size of value buffer being checked as allocation was going to end of retrieved buffer, which can cause unnecessary memory pressure during boot. Test: system/core/bootstat/boot_reason_test.sh Bug: 63736262 Change-Id: Ifdc1d4fd3a73794c001577024ce7cbfde9c25028 --- bootstat/bootstat.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/bootstat/bootstat.cpp b/bootstat/bootstat.cpp index 07d410c91..cdb4b9b3b 100644 --- a/bootstat/bootstat.cpp +++ b/bootstat/bootstat.cpp @@ -508,10 +508,16 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { size_t pos = content.rfind(battery); // last one std::string digits; if (pos != std::string::npos) { - digits = content.substr(pos + strlen(battery)); + digits = content.substr(pos + strlen(battery), strlen("100 ")); + } + const char* endptr = digits.c_str(); + unsigned level = 0; + while (::isdigit(*endptr)) { + level *= 10; + level += *endptr++ - '0'; + // make sure no leading zeros, except zero itself, and range check. + if ((level == 0) || (level > 100)) break; } - char* endptr = NULL; - unsigned long long level = strtoull(digits.c_str(), &endptr, 10); if ((level <= 100) && (endptr != digits.c_str()) && (*endptr == ' ')) { LOG(INFO) << "Battery level at shutdown " << level << "%"; if (level <= battery_dead_threshold) { @@ -552,10 +558,16 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { pos = content.find(match); // The first one it finds. if (pos != std::string::npos) { - digits = content.substr(pos + strlen(match)); + digits = content.substr(pos + strlen(match), strlen("100 ")); + } + endptr = digits.c_str(); + level = 0; + while (::isdigit(*endptr)) { + level *= 10; + level += *endptr++ - '0'; + // make sure no leading zeros, except zero itself, and range check. + if ((level == 0) || (level > 100)) break; } - endptr = NULL; - level = strtoull(digits.c_str(), &endptr, 10); if ((level <= 100) && (endptr != digits.c_str()) && (*endptr == ' ')) { LOG(INFO) << "Battery level at startup " << level << "%"; if (level <= battery_dead_threshold) { From 293cb3b2172725c8859c4bb2e991f07ea076cf98 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Wed, 20 Sep 2017 08:37:46 -0700 Subject: [PATCH 2/2] bootstat: handle a bad bit error rate issue with pstore Create a private rfind that allows a fuzzy match based on a bit error rate (BER) of 1 every 8 bits. last kmsg is affected by pstore ramoops backing that suffers from data corruption. Add some additional validation based on possible data corruption scenarios, as a noisy match means higher chance of noisy data. Noisy data notably can affect the battery level detection, but do not typically result in false positives. Battery level, or failure, is the responsibility of the BatteryStats service, providing a positive signal and strong device-independent algorithm. The checking done in bootstat is likely to be deprecated in favour of an API request to BatteryStats once their algorithms deal with surprise outages due to aging. The kernel logging heuristic and BER fixup handily deals with a prevalent issue where some bootloaders failure to properly notify us of panics. This is where the gains are noticed with this improvement. Test: system/core/bootstat/boot_reason_test.sh Bug: 63736262 Change-Id: I93b4210f12fb47c5c036f4d6eb4cafeee4896d35 --- bootstat/bootstat.cpp | 146 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 137 insertions(+), 9 deletions(-) diff --git a/bootstat/bootstat.cpp b/bootstat/bootstat.cpp index cdb4b9b3b..10753ce28 100644 --- a/bootstat/bootstat.cpp +++ b/bootstat/bootstat.cpp @@ -328,7 +328,105 @@ bool readPstoreConsole(std::string& console) { return android::base::ReadFileToString("/sys/fs/pstore/console-ramoops", &console); } -bool addKernelPanicSubReason(const std::string& console, std::string& ret) { +// Implement a variant of std::string::rfind that is resilient to errors in +// the data stream being inspected. +class pstoreConsole { + private: + const size_t kBitErrorRate = 8; // number of bits per error + const std::string& console; + + // Number of bits that differ between the two arguments l and r. + // Returns zero if the values for l and r are identical. + size_t numError(uint8_t l, uint8_t r) const { return std::bitset<8>(l ^ r).count(); } + + // A string comparison function, reports the number of errors discovered + // in the match to a maximum of the bitLength / kBitErrorRate, at that + // point returning npos to indicate match is too poor. + // + // Since called in rfind which works backwards, expect cache locality will + // help if we check in reverse here as well for performance. + // + // Assumption: l (from console.c_str() + pos) is long enough to house + // _r.length(), checked in rfind caller below. + // + size_t numError(size_t pos, const std::string& _r) const { + const char* l = console.c_str() + pos; + const char* r = _r.c_str(); + size_t n = _r.length(); + const uint8_t* le = reinterpret_cast(l) + n; + const uint8_t* re = reinterpret_cast(r) + n; + size_t count = 0; + n = 0; + do { + // individual character bit error rate > threshold + slop + size_t num = numError(*--le, *--re); + if (num > ((8 + kBitErrorRate) / kBitErrorRate)) return std::string::npos; + // total bit error rate > threshold + slop + count += num; + ++n; + if (count > ((n * 8 + kBitErrorRate - (n > 2)) / kBitErrorRate)) { + return std::string::npos; + } + } while (le != reinterpret_cast(l)); + return count; + } + + public: + explicit pstoreConsole(const std::string& console) : console(console) {} + // scope of argument must be equal to or greater than scope of pstoreConsole + explicit pstoreConsole(const std::string&& console) = delete; + explicit pstoreConsole(std::string&& console) = delete; + + // Our implementation of rfind, use exact match first, then resort to fuzzy. + size_t rfind(const std::string& needle) const { + size_t pos = console.rfind(needle); // exact match? + if (pos != std::string::npos) return pos; + + // Check to make sure needle fits in console string. + pos = console.length(); + if (needle.length() > pos) return std::string::npos; + pos -= needle.length(); + // fuzzy match to maximum kBitErrorRate + do { + if (numError(pos, needle) != std::string::npos) return pos; + } while (pos-- != 0); + return std::string::npos; + } + + // Our implementation of find, use only fuzzy match. + size_t find(const std::string& needle, size_t start = 0) const { + // Check to make sure needle fits in console string. + if (needle.length() > console.length()) return std::string::npos; + const size_t last_pos = console.length() - needle.length(); + // fuzzy match to maximum kBitErrorRate + for (size_t pos = start; pos <= last_pos; ++pos) { + if (numError(pos, needle) != std::string::npos) return pos; + } + return std::string::npos; + } +}; + +// If bit error match to needle, correct it. +// Return true if any corrections were discovered and applied. +bool correctForBer(std::string& reason, const std::string& needle) { + bool corrected = false; + if (reason.length() < needle.length()) return corrected; + const pstoreConsole console(reason); + const size_t last_pos = reason.length() - needle.length(); + for (size_t pos = 0; pos <= last_pos; pos += needle.length()) { + pos = console.find(needle, pos); + if (pos == std::string::npos) break; + + // exact match has no malice + if (needle == reason.substr(pos, needle.length())) continue; + + corrected = true; + reason = reason.substr(0, pos) + needle + reason.substr(pos + needle.length()); + } + return corrected; +} + +bool addKernelPanicSubReason(const pstoreConsole& console, std::string& ret) { // Check for kernel panic types to refine information if (console.rfind("SysRq : Trigger a crash") != std::string::npos) { // Can not happen, except on userdebug, during testing/debugging. @@ -347,6 +445,10 @@ bool addKernelPanicSubReason(const std::string& console, std::string& ret) { return false; } +bool addKernelPanicSubReason(const std::string& content, std::string& ret) { + return addKernelPanicSubReason(pstoreConsole(content), ret); +} + // std::transform Helper callback functions: // Converts a string value representing the reason the system booted to a // string complying with Android system standard reason. @@ -451,9 +553,10 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { // Check to see if last klog has some refinement hints. std::string content; if (readPstoreConsole(content)) { + const pstoreConsole console(content); // The toybox reboot command used directly (unlikely)? But also // catches init's response to Android's more controlled reboot command. - if (content.rfind("reboot: Power down") != std::string::npos) { + if (console.rfind("reboot: Power down") != std::string::npos) { ret = "shutdown"; // Still too blunt, but more accurate. // ToDo: init should record the shutdown reason to kernel messages ala: // init: shutdown system with command 'last_reboot_reason' @@ -462,13 +565,25 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { } static const char cmd[] = "reboot: Restarting system with command '"; - size_t pos = content.rfind(cmd); + size_t pos = console.rfind(cmd); if (pos != std::string::npos) { pos += strlen(cmd); std::string subReason(content.substr(pos, max_reason_length)); + // Correct against any known strings that Bit Error Match + for (const auto& s : knownReasons) { + correctForBer(subReason, s); + } + for (const auto& m : kBootReasonMap) { + if (m.first.length() <= strlen("cold")) continue; // too short? + if (correctForBer(subReason, m.first + "'")) continue; + if (m.first.length() <= strlen("reboot,cold")) continue; // short? + if (!android::base::StartsWith(m.first, "reboot,")) continue; + correctForBer(subReason, m.first.substr(strlen("reboot,")) + "'"); + } for (pos = 0; pos < subReason.length(); ++pos) { char c = subReason[pos]; - if (!::isprint(c) || (c == '\'')) { + // #, &, %, / are common single bit error for ' that we can block + if (!::isprint(c) || (c == '\'') || (c == '#') || (c == '&') || (c == '%') || (c == '/')) { subReason.erase(pos); break; } @@ -486,10 +601,10 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { } // Check for kernel panics, allowed to override reboot command. - if (!addKernelPanicSubReason(content, ret) && + if (!addKernelPanicSubReason(console, ret) && // check for long-press power down - ((content.rfind("Power held for ") != std::string::npos) || - (content.rfind("charger: [") != std::string::npos))) { + ((console.rfind("Power held for ") != std::string::npos) || + (console.rfind("charger: [") != std::string::npos))) { ret = "cold"; } } @@ -505,10 +620,15 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { // Really a hail-mary pass to find it in last klog content ... static const int battery_dead_threshold = 2; // percent static const char battery[] = "healthd: battery l="; - size_t pos = content.rfind(battery); // last one + const pstoreConsole console(content); + size_t pos = console.rfind(battery); // last one std::string digits; if (pos != std::string::npos) { digits = content.substr(pos + strlen(battery), strlen("100 ")); + // correct common errors + correctForBer(digits, "100 "); + if (digits[0] == '!') digits[0] = '1'; + if (digits[1] == '!') digits[1] = '1'; } const char* endptr = digits.c_str(); unsigned level = 0; @@ -518,7 +638,15 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { // make sure no leading zeros, except zero itself, and range check. if ((level == 0) || (level > 100)) break; } - if ((level <= 100) && (endptr != digits.c_str()) && (*endptr == ' ')) { + // example bit error rate issues for 10% + // 'l=10 ' no bits in error + // 'l=00 ' single bit error (fails above) + // 'l=1 ' single bit error + // 'l=0 ' double bit error + // There are others, not typically critical because of 2% + // battery_dead_threshold. KISS check, make sure second + // character after digit sequence is not a space. + if ((level <= 100) && (endptr != digits.c_str()) && (endptr[0] == ' ') && (endptr[1] != ' ')) { LOG(INFO) << "Battery level at shutdown " << level << "%"; if (level <= battery_dead_threshold) { ret = "shutdown,battery";