From 45215ae6e5742d9dff5468d744a02013e3efbef7 Mon Sep 17 00:00:00 2001 From: Woody Lin Date: Thu, 26 Dec 2019 22:22:28 +0800 Subject: [PATCH] init/service_parser: Add arguments `window' and `target' for `critical' The critical services can now using the interface `critical [window=] [target=]` to setup the timing window that when there are more than 4 crashes in it, the init will regard it as a fatal system error and reboot the system. Config `window=${zygote.critical_window.minute:-off}' and `target=zygote-fatal' for all system-server services, so platform that configures ro.boot.zygote_critical_window can escape the system-server crash-loop via init fatal handler. Bug: 146818493 Change-Id: Ib2dc253616be6935ab9ab52184a1b6394665e813 --- init/README.md | 7 +++++-- init/host_init_stubs.h | 3 ++- init/reboot_utils.cpp | 8 +++++++- init/reboot_utils.h | 3 ++- init/service.cpp | 15 ++++++++++----- init/service.h | 2 ++ init/service_parser.cpp | 35 ++++++++++++++++++++++++++++++++++- rootdir/init.zygote32.rc | 1 + rootdir/init.zygote64.rc | 1 + rootdir/init.zygote64_32.rc | 1 + 10 files changed, 65 insertions(+), 11 deletions(-) diff --git a/init/README.md b/init/README.md index 6439393c6..ab6a885a2 100644 --- a/init/README.md +++ b/init/README.md @@ -172,9 +172,12 @@ runs the service. This option connects stdin, stdout, and stderr to the console. It is mutually exclusive with the stdio_to_kmsg option, which only connects stdout and stderr to kmsg. -`critical` +`critical [window=] [target=]` > This is a device-critical service. If it exits more than four times in - four minutes or before boot completes, the device will reboot into bootloader. + _fatal crash window mins_ minutes or before boot completes, the device + will reboot into _fatal reboot target_. + The default value of _fatal crash window mins_ is 4, and default value + of _fatal reboot target_ is 'bootloader'. `disabled` > This service will not automatically start with its class. diff --git a/init/host_init_stubs.h b/init/host_init_stubs.h index caa8f8d0d..2a8bf6c2b 100644 --- a/init/host_init_stubs.h +++ b/init/host_init_stubs.h @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -41,7 +42,7 @@ inline bool CanReadProperty(const std::string&, const std::string&) { } // reboot_utils.h -inline void SetFatalRebootTarget() {} +inline void SetFatalRebootTarget(const std::optional& = std::nullopt) {} inline void __attribute__((noreturn)) InitFatalReboot(int signal_number) { abort(); } diff --git a/init/reboot_utils.cpp b/init/reboot_utils.cpp index 76460a5df..98f6857f5 100644 --- a/init/reboot_utils.cpp +++ b/init/reboot_utils.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -37,7 +38,7 @@ namespace init { static std::string init_fatal_reboot_target = "bootloader"; static bool init_fatal_panic = false; -void SetFatalRebootTarget() { +void SetFatalRebootTarget(const std::optional& reboot_target) { std::string cmdline; android::base::ReadFileToString("/proc/cmdline", &cmdline); cmdline = android::base::Trim(cmdline); @@ -45,6 +46,11 @@ void SetFatalRebootTarget() { const char kInitFatalPanicString[] = "androidboot.init_fatal_panic=true"; init_fatal_panic = cmdline.find(kInitFatalPanicString) != std::string::npos; + if (reboot_target) { + init_fatal_reboot_target = *reboot_target; + return; + } + const char kRebootTargetString[] = "androidboot.init_fatal_reboot_target="; auto start_pos = cmdline.find(kRebootTargetString); if (start_pos == std::string::npos) { diff --git a/init/reboot_utils.h b/init/reboot_utils.h index 05bb9ae8e..a0023b9bb 100644 --- a/init/reboot_utils.h +++ b/init/reboot_utils.h @@ -16,6 +16,7 @@ #pragma once +#include #include #define PROC_SYSRQ "/proc/sysrq-trigger" @@ -23,7 +24,7 @@ namespace android { namespace init { -void SetFatalRebootTarget(); +void SetFatalRebootTarget(const std::optional& reboot_target = std::nullopt); // Determines whether the system is capable of rebooting. This is conservative, // so if any of the attempts to determine this fail, it will still return true. bool IsRebootCapable(); diff --git a/init/service.cpp b/init/service.cpp index 68365b3c3..ecc86d969 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -45,6 +45,7 @@ #include #include "mount_namespace.h" +#include "reboot_utils.h" #include "selinux.h" #else #include "host_init_stubs.h" @@ -312,20 +313,24 @@ void Service::Reap(const siginfo_t& siginfo) { #endif const bool is_process_updatable = !pre_apexd_ && is_apex_updatable; - // If we crash > 4 times in 4 minutes or before boot_completed, + // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed, // reboot into bootloader or set crashing property boot_clock::time_point now = boot_clock::now(); if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) { bool boot_completed = android::base::GetBoolProperty("sys.boot_completed", false); - if (now < time_crashed_ + 4min || !boot_completed) { + if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) { if (++crash_count_ > 4) { + auto exit_reason = boot_completed ? + "in " + std::to_string(fatal_crash_window_.count()) + " minutes" : + "before boot completed"; if (flags_ & SVC_CRITICAL) { - // Aborts into bootloader + // Aborts into `fatal_reboot_target_'. + SetFatalRebootTarget(fatal_reboot_target_); LOG(FATAL) << "critical process '" << name_ << "' exited 4 times " - << (boot_completed ? "in 4 minutes" : "before boot completed"); + << exit_reason; } else { LOG(ERROR) << "updatable process '" << name_ << "' exited 4 times " - << (boot_completed ? "in 4 minutes" : "before boot completed"); + << exit_reason; // Notifies update_verifier and apexd SetProperty("sys.init.updatable_crashing_process_name", name_); SetProperty("sys.init.updatable_crashing", "1"); diff --git a/init/service.h b/init/service.h index 34ed5eff9..bc5c90fe6 100644 --- a/init/service.h +++ b/init/service.h @@ -155,6 +155,8 @@ class Service { android::base::boot_clock::time_point time_started_; // time of last start android::base::boot_clock::time_point time_crashed_; // first crash within inspection window int crash_count_; // number of times crashed within window + std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it + std::optional fatal_reboot_target_; // reboot target of fatal handler std::optional capabilities_; ProcessAttributes proc_attr_; diff --git a/init/service_parser.cpp b/init/service_parser.cpp index bdac0777b..97621dac6 100644 --- a/init/service_parser.cpp +++ b/init/service_parser.cpp @@ -93,6 +93,39 @@ Result ServiceParser::ParseConsole(std::vector&& args) { } Result ServiceParser::ParseCritical(std::vector&& args) { + std::optional fatal_reboot_target; + std::optional fatal_crash_window; + + for (auto it = args.begin() + 1; it != args.end(); ++it) { + auto arg = android::base::Split(*it, "="); + if (arg.size() != 2) { + return Error() << "critical: Argument '" << *it << "' is not supported"; + } else if (arg[0] == "target") { + fatal_reboot_target = arg[1]; + } else if (arg[0] == "window") { + int minutes; + auto window = ExpandProps(arg[1]); + if (!window.ok()) { + return Error() << "critical: Could not expand argument ': " << arg[1]; + } + if (*window == "off") { + return {}; + } + if (!ParseInt(*window, &minutes, 0)) { + return Error() << "critical: 'fatal_crash_window' must be an integer > 0"; + } + fatal_crash_window = std::chrono::minutes(minutes); + } else { + return Error() << "critical: Argument '" << *it << "' is not supported"; + } + } + + if (fatal_reboot_target) { + service_->fatal_reboot_target_ = *fatal_reboot_target; + } + if (fatal_crash_window) { + service_->fatal_crash_window_ = *fatal_crash_window; + } service_->flags_ |= SVC_CRITICAL; return {}; } @@ -506,7 +539,7 @@ const KeywordMap& ServiceParser::GetParserMap() con {"capabilities", {0, kMax, &ServiceParser::ParseCapabilities}}, {"class", {1, kMax, &ServiceParser::ParseClass}}, {"console", {0, 1, &ServiceParser::ParseConsole}}, - {"critical", {0, 0, &ServiceParser::ParseCritical}}, + {"critical", {0, 2, &ServiceParser::ParseCritical}}, {"disabled", {0, 0, &ServiceParser::ParseDisabled}}, {"enter_namespace", {2, 2, &ServiceParser::ParseEnterNamespace}}, {"file", {2, 2, &ServiceParser::ParseFile}}, diff --git a/rootdir/init.zygote32.rc b/rootdir/init.zygote32.rc index e827cf557..9469a4873 100644 --- a/rootdir/init.zygote32.rc +++ b/rootdir/init.zygote32.rc @@ -13,3 +13,4 @@ service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-sys onrestart restart netd onrestart restart wificond writepid /dev/cpuset/foreground/tasks + critical window=${zygote.critical_window.minute:-off} target=zygote-fatal diff --git a/rootdir/init.zygote64.rc b/rootdir/init.zygote64.rc index adc703102..98dc0881f 100644 --- a/rootdir/init.zygote64.rc +++ b/rootdir/init.zygote64.rc @@ -13,3 +13,4 @@ service zygote /system/bin/app_process64 -Xzygote /system/bin --zygote --start-s onrestart restart netd onrestart restart wificond writepid /dev/cpuset/foreground/tasks + critical window=${zygote.critical_window.minute:-off} target=zygote-fatal diff --git a/rootdir/init.zygote64_32.rc b/rootdir/init.zygote64_32.rc index fb9e99b69..3eee180ba 100644 --- a/rootdir/init.zygote64_32.rc +++ b/rootdir/init.zygote64_32.rc @@ -13,6 +13,7 @@ service zygote /system/bin/app_process64 -Xzygote /system/bin --zygote --start-s onrestart restart netd onrestart restart wificond task_profiles ProcessCapacityHigh MaxPerformance + critical window=${zygote.critical_window.minute:-off} target=zygote-fatal service zygote_secondary /system/bin/app_process32 -Xzygote /system/bin --zygote --socket-name=zygote_secondary --enable-lazy-preload class main