From 60971e6ce22bd4fee531dc72875acb4b913f69a7 Mon Sep 17 00:00:00 2001 From: Tom Cherry Date: Tue, 10 Sep 2019 10:40:47 -0700 Subject: [PATCH] init: add reboot_on_failure service option This replaces the recently added `exec_reboot_on_failure` builtin, since it'll be cleaner to extend service definitions than extending `exec`. This is in line with what we decided when adding `exec_start` instead of extending `exec` to add parameters for priority. Test: `exec_start` a service with a reboot_on_failure option and watch the system reboot appropriately when the service is not found and when the service terminates with a non-zero exit code. Change-Id: I332bf9839fa94840d159a810c4a6ba2522189d0b --- init/README.md | 6 +++ init/builtins.cpp | 15 ------- init/host_init_stubs.h | 5 +++ init/init.cpp | 21 +++++----- init/init.h | 2 + init/service.cpp | 23 +++++++++++ init/service.h | 2 + init/service_parser.cpp | 88 +++++++++++++++++++++-------------------- init/service_parser.h | 1 + rootdir/init.rc | 22 ++++++++--- 10 files changed, 113 insertions(+), 72 deletions(-) diff --git a/init/README.md b/init/README.md index 2de76a9ec..423c6d151 100644 --- a/init/README.md +++ b/init/README.md @@ -263,6 +263,12 @@ runs the service. > Scheduling priority of the service process. This value has to be in range -20 to 19. Default priority is 0. Priority is set via setpriority(). +`reboot_on_failure ` +> If this process cannot be started or if the process terminates with an exit code other than + CLD_EXITED or an status other than '0', reboot the system with the target specified in + _target_. _target_ takes the same format as the parameter to sys.powerctl. This is particularly + intended to be used with the `exec_start` builtin for any must-have checks during boot. + `restart_period ` > If a non-oneshot service exits, it will be restarted at its start time plus this period. It defaults to 5s to rate limit crashing services. diff --git a/init/builtins.cpp b/init/builtins.cpp index 21db8dca7..42211b2c1 100644 --- a/init/builtins.cpp +++ b/init/builtins.cpp @@ -1104,20 +1104,6 @@ static Result ExecWithFunctionOnFailure(const std::vector& ar return {}; } -static Result do_exec_reboot_on_failure(const BuiltinArguments& args) { - auto reboot_reason = args[1]; - auto reboot = [reboot_reason](const std::string& message) { - property_set(LAST_REBOOT_REASON_PROPERTY, reboot_reason); - sync(); - LOG(FATAL) << message << ": rebooting into bootloader, reason: " << reboot_reason; - }; - - std::vector remaining_args(args.begin() + 1, args.end()); - remaining_args[0] = args[0]; - - return ExecWithFunctionOnFailure(remaining_args, reboot); -} - static Result ExecVdcRebootOnFailure(const std::string& vdc_arg) { auto reboot_reason = vdc_arg + "_failed"; @@ -1225,7 +1211,6 @@ const BuiltinFunctionMap& GetBuiltinFunctionMap() { {"enable", {1, 1, {false, do_enable}}}, {"exec", {1, kMax, {false, do_exec}}}, {"exec_background", {1, kMax, {false, do_exec_background}}}, - {"exec_reboot_on_failure", {2, kMax, {false, do_exec_reboot_on_failure}}}, {"exec_start", {1, 1, {false, do_exec_start}}}, {"export", {2, 2, {false, do_export}}}, {"hostname", {1, 1, {true, do_hostname}}}, diff --git a/init/host_init_stubs.h b/init/host_init_stubs.h index f9a08a543..e3068b224 100644 --- a/init/host_init_stubs.h +++ b/init/host_init_stubs.h @@ -34,6 +34,11 @@ namespace android { namespace init { +// init.h +inline void EnterShutdown(const std::string&) { + abort(); +} + // property_service.h inline bool CanReadProperty(const std::string&, const std::string&) { return true; diff --git a/init/init.cpp b/init/init.cpp index 4e9a14f87..53b065f12 100644 --- a/init/init.cpp +++ b/init/init.cpp @@ -180,6 +180,16 @@ void ResetWaitForProp() { waiting_for_prop.reset(); } +void EnterShutdown(const std::string& command) { + // We can't call HandlePowerctlMessage() directly in this function, + // because it modifies the contents of the action queue, which can cause the action queue + // to get into a bad state if this function is called from a command being executed by the + // action queue. Instead we set this flag and ensure that shutdown happens before the next + // command is run in the main init loop. + shutdown_command = command; + do_shutdown = true; +} + void property_changed(const std::string& name, const std::string& value) { // If the property is sys.powerctl, we bypass the event queue and immediately handle it. // This is to ensure that init will always and immediately shutdown/reboot, regardless of @@ -188,16 +198,7 @@ void property_changed(const std::string& name, const std::string& value) { // In non-thermal-shutdown case, 'shutdown' trigger will be fired to let device specific // commands to be executed. if (name == "sys.powerctl") { - // Despite the above comment, we can't call HandlePowerctlMessage() in this function, - // because it modifies the contents of the action queue, which can cause the action queue - // to get into a bad state if this function is called from a command being executed by the - // action queue. Instead we set this flag and ensure that shutdown happens before the next - // command is run in the main init loop. - // TODO: once property service is removed from init, this will never happen from a builtin, - // but rather from a callback from the property service socket, in which case this hack can - // go away. - shutdown_command = value; - do_shutdown = true; + EnterShutdown(value); } if (property_triggers_enabled) ActionManager::GetInstance().QueuePropertyChange(name, value); diff --git a/init/init.h b/init/init.h index 8ac52e2c4..61fb110ea 100644 --- a/init/init.h +++ b/init/init.h @@ -31,6 +31,8 @@ namespace init { Parser CreateParser(ActionManager& action_manager, ServiceList& service_list); Parser CreateServiceOnlyParser(ServiceList& service_list); +void EnterShutdown(const std::string& command); + bool start_waiting_for_property(const char *name, const char *value); void DumpState(); diff --git a/init/service.cpp b/init/service.cpp index 7a209664e..793a2b2cf 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ #if defined(__ANDROID__) #include +#include "init.h" #include "mount_namespace.h" #include "property_service.h" #else @@ -50,6 +52,7 @@ using android::base::boot_clock; using android::base::GetProperty; using android::base::Join; +using android::base::make_scope_guard; using android::base::StartsWith; using android::base::StringPrintf; using android::base::WriteStringToFile; @@ -250,6 +253,11 @@ void Service::Reap(const siginfo_t& siginfo) { f(siginfo); } + if ((siginfo.si_code != CLD_EXITED || siginfo.si_status != 0) && on_failure_reboot_target_) { + LOG(ERROR) << "Service with 'reboot_on_failure' option failed, shutting down system."; + EnterShutdown(*on_failure_reboot_target_); + } + if (flags_ & SVC_EXEC) UnSetExec(); if (flags_ & SVC_TEMPORARY) return; @@ -325,6 +333,12 @@ void Service::DumpState() const { Result Service::ExecStart() { + auto reboot_on_failure = make_scope_guard([this] { + if (on_failure_reboot_target_) { + EnterShutdown(*on_failure_reboot_target_); + } + }); + if (is_updatable() && !ServiceList::GetInstance().IsServicesUpdated()) { // Don't delay the service for ExecStart() as the semantic is that // the caller might depend on the side effect of the execution. @@ -345,10 +359,17 @@ Result Service::ExecStart() { << " gid " << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context " << (!seclabel_.empty() ? seclabel_ : "default") << ") started; waiting..."; + reboot_on_failure.Disable(); return {}; } Result Service::Start() { + auto reboot_on_failure = make_scope_guard([this] { + if (on_failure_reboot_target_) { + EnterShutdown(*on_failure_reboot_target_); + } + }); + if (is_updatable() && !ServiceList::GetInstance().IsServicesUpdated()) { ServiceList::GetInstance().DelayService(*this); return Error() << "Cannot start an updatable service '" << name_ @@ -371,6 +392,7 @@ Result Service::Start() { flags_ |= SVC_RESTART; } // It is not an error to try to start a service that is already running. + reboot_on_failure.Disable(); return {}; } @@ -532,6 +554,7 @@ Result Service::Start() { } NotifyStateChange("running"); + reboot_on_failure.Disable(); return {}; } diff --git a/init/service.h b/init/service.h index ccefc8e91..788f792a1 100644 --- a/init/service.h +++ b/init/service.h @@ -196,6 +196,8 @@ class Service { bool post_data_ = false; bool running_at_post_data_reset_ = false; + + std::optional on_failure_reboot_target_; }; } // namespace init diff --git a/init/service_parser.cpp b/init/service_parser.cpp index 543be2329..4322dc70a 100644 --- a/init/service_parser.cpp +++ b/init/service_parser.cpp @@ -310,6 +310,18 @@ Result ServiceParser::ParseProcessRlimit(std::vector&& args) return {}; } +Result ServiceParser::ParseRebootOnFailure(std::vector&& args) { + if (service_->on_failure_reboot_target_) { + return Error() << "Only one reboot_on_failure command may be specified"; + } + if (!StartsWith(args[1], "shutdown") && !StartsWith(args[1], "reboot")) { + return Error() + << "reboot_on_failure commands must begin with either 'shutdown' or 'reboot'"; + } + service_->on_failure_reboot_target_ = std::move(args[1]); + return {}; +} + Result ServiceParser::ParseRestartPeriod(std::vector&& args) { int period; if (!ParseInt(args[1], &period, 5)) { @@ -471,49 +483,41 @@ const KeywordMap& ServiceParser::GetParserMap() con constexpr std::size_t kMax = std::numeric_limits::max(); // clang-format off static const KeywordMap parser_map = { - {"capabilities", - {0, kMax, &ServiceParser::ParseCapabilities}}, - {"class", {1, kMax, &ServiceParser::ParseClass}}, - {"console", {0, 1, &ServiceParser::ParseConsole}}, - {"critical", {0, 0, &ServiceParser::ParseCritical}}, - {"disabled", {0, 0, &ServiceParser::ParseDisabled}}, - {"enter_namespace", - {2, 2, &ServiceParser::ParseEnterNamespace}}, - {"file", {2, 2, &ServiceParser::ParseFile}}, - {"group", {1, NR_SVC_SUPP_GIDS + 1, &ServiceParser::ParseGroup}}, - {"interface", {2, 2, &ServiceParser::ParseInterface}}, - {"ioprio", {2, 2, &ServiceParser::ParseIoprio}}, - {"keycodes", {1, kMax, &ServiceParser::ParseKeycodes}}, - {"memcg.limit_in_bytes", - {1, 1, &ServiceParser::ParseMemcgLimitInBytes}}, - {"memcg.limit_percent", - {1, 1, &ServiceParser::ParseMemcgLimitPercent}}, - {"memcg.limit_property", - {1, 1, &ServiceParser::ParseMemcgLimitProperty}}, + {"capabilities", {0, kMax, &ServiceParser::ParseCapabilities}}, + {"class", {1, kMax, &ServiceParser::ParseClass}}, + {"console", {0, 1, &ServiceParser::ParseConsole}}, + {"critical", {0, 0, &ServiceParser::ParseCritical}}, + {"disabled", {0, 0, &ServiceParser::ParseDisabled}}, + {"enter_namespace", {2, 2, &ServiceParser::ParseEnterNamespace}}, + {"file", {2, 2, &ServiceParser::ParseFile}}, + {"group", {1, NR_SVC_SUPP_GIDS + 1, &ServiceParser::ParseGroup}}, + {"interface", {2, 2, &ServiceParser::ParseInterface}}, + {"ioprio", {2, 2, &ServiceParser::ParseIoprio}}, + {"keycodes", {1, kMax, &ServiceParser::ParseKeycodes}}, + {"memcg.limit_in_bytes", {1, 1, &ServiceParser::ParseMemcgLimitInBytes}}, + {"memcg.limit_percent", {1, 1, &ServiceParser::ParseMemcgLimitPercent}}, + {"memcg.limit_property", {1, 1, &ServiceParser::ParseMemcgLimitProperty}}, {"memcg.soft_limit_in_bytes", - {1, 1, &ServiceParser::ParseMemcgSoftLimitInBytes}}, - {"memcg.swappiness", - {1, 1, &ServiceParser::ParseMemcgSwappiness}}, - {"namespace", {1, 2, &ServiceParser::ParseNamespace}}, - {"oneshot", {0, 0, &ServiceParser::ParseOneshot}}, - {"onrestart", {1, kMax, &ServiceParser::ParseOnrestart}}, - {"oom_score_adjust", - {1, 1, &ServiceParser::ParseOomScoreAdjust}}, - {"override", {0, 0, &ServiceParser::ParseOverride}}, - {"priority", {1, 1, &ServiceParser::ParsePriority}}, - {"restart_period", - {1, 1, &ServiceParser::ParseRestartPeriod}}, - {"rlimit", {3, 3, &ServiceParser::ParseProcessRlimit}}, - {"seclabel", {1, 1, &ServiceParser::ParseSeclabel}}, - {"setenv", {2, 2, &ServiceParser::ParseSetenv}}, - {"shutdown", {1, 1, &ServiceParser::ParseShutdown}}, - {"sigstop", {0, 0, &ServiceParser::ParseSigstop}}, - {"socket", {3, 6, &ServiceParser::ParseSocket}}, - {"timeout_period", - {1, 1, &ServiceParser::ParseTimeoutPeriod}}, - {"updatable", {0, 0, &ServiceParser::ParseUpdatable}}, - {"user", {1, 1, &ServiceParser::ParseUser}}, - {"writepid", {1, kMax, &ServiceParser::ParseWritepid}}, + {1, 1, &ServiceParser::ParseMemcgSoftLimitInBytes}}, + {"memcg.swappiness", {1, 1, &ServiceParser::ParseMemcgSwappiness}}, + {"namespace", {1, 2, &ServiceParser::ParseNamespace}}, + {"oneshot", {0, 0, &ServiceParser::ParseOneshot}}, + {"onrestart", {1, kMax, &ServiceParser::ParseOnrestart}}, + {"oom_score_adjust", {1, 1, &ServiceParser::ParseOomScoreAdjust}}, + {"override", {0, 0, &ServiceParser::ParseOverride}}, + {"priority", {1, 1, &ServiceParser::ParsePriority}}, + {"reboot_on_failure", {1, 1, &ServiceParser::ParseRebootOnFailure}}, + {"restart_period", {1, 1, &ServiceParser::ParseRestartPeriod}}, + {"rlimit", {3, 3, &ServiceParser::ParseProcessRlimit}}, + {"seclabel", {1, 1, &ServiceParser::ParseSeclabel}}, + {"setenv", {2, 2, &ServiceParser::ParseSetenv}}, + {"shutdown", {1, 1, &ServiceParser::ParseShutdown}}, + {"sigstop", {0, 0, &ServiceParser::ParseSigstop}}, + {"socket", {3, 6, &ServiceParser::ParseSocket}}, + {"timeout_period", {1, 1, &ServiceParser::ParseTimeoutPeriod}}, + {"updatable", {0, 0, &ServiceParser::ParseUpdatable}}, + {"user", {1, 1, &ServiceParser::ParseUser}}, + {"writepid", {1, kMax, &ServiceParser::ParseWritepid}}, }; // clang-format on return parser_map; diff --git a/init/service_parser.h b/init/service_parser.h index 47298748e..ace4d7068 100644 --- a/init/service_parser.h +++ b/init/service_parser.h @@ -68,6 +68,7 @@ class ServiceParser : public SectionParser { Result ParseMemcgSwappiness(std::vector&& args); Result ParseNamespace(std::vector&& args); Result ParseProcessRlimit(std::vector&& args); + Result ParseRebootOnFailure(std::vector&& args); Result ParseRestartPeriod(std::vector&& args); Result ParseSeclabel(std::vector&& args); Result ParseSetenv(std::vector&& args); diff --git a/rootdir/init.rc b/rootdir/init.rc index b99c149c3..66247acc5 100644 --- a/rootdir/init.rc +++ b/rootdir/init.rc @@ -58,13 +58,25 @@ on early-init # Run boringssl self test for each ABI so that later processes can skip it. http://b/139348610 on early-init && property:ro.product.cpu.abilist32=* - exec_reboot_on_failure boringssl-self-check-failed /system/bin/boringssl_self_test32 + exec_start boringssl_self_test32 on early-init && property:ro.product.cpu.abilist64=* - exec_reboot_on_failure boringssl-self-check-failed /system/bin/boringssl_self_test64 -on property:apexd.status=ready && property:ro.product.cpu.abilist64=* - exec_reboot_on_failure boringssl-self-check-failed /apex/com.android.conscrypt/bin/boringssl_self_test64 + exec_start boringssl_self_test64 on property:apexd.status=ready && property:ro.product.cpu.abilist32=* - exec_reboot_on_failure boringssl-self-check-failed /apex/com.android.conscrypt/bin/boringssl_self_test32 + exec_start boringssl_self_test_apex32 +on property:apexd.status=ready && property:ro.product.cpu.abilist64=* + exec_start boringssl_self_test_apex64 + +service boringssl_self_test32 /system/bin/boringssl_self_test32 + reboot_on_failure reboot,bootloader,boringssl-self-check-failed + +service boringssl_self_test64 /system/bin/boringssl_self_test64 + reboot_on_failure reboot,bootloader,boringssl-self-check-failed + +service boringssl_self_test_apex32 /apex/com.android.conscrypt/bin/boringssl_self_test32 + reboot_on_failure reboot,bootloader,boringssl-self-check-failed + +service boringssl_self_test_apex64 /apex/com.android.conscrypt/bin/boringssl_self_test64 + reboot_on_failure reboot,bootloader,boringssl-self-check-failed on init sysclktz 0