diff --git a/init/README.md b/init/README.md index 13c6ebdfa..888c65be4 100644 --- a/init/README.md +++ b/init/README.md @@ -316,11 +316,14 @@ runs the service. intended to be used with the `exec_start` builtin for any must-have checks during boot. `restart_period ` -> If a non-oneshot service exits, it will be restarted at its start time plus - this period. It defaults to 5s to rate limit crashing services. - This can be increased for services that are meant to run periodically. For - example, it may be set to 3600 to indicate that the service should run every hour - or 86400 to indicate that the service should run every day. +> If a non-oneshot service exits, it will be restarted at its previous start time plus this period. + The default value is 5s. This can be used to implement periodic services together with the + `timeout_period` command below. For example, it may be set to 3600 to indicate that the service + should run every hour or 86400 to indicate that the service should run every day. This can be set + to a value shorter than 5s for example 0, but the minimum 5s delay is enforced if the restart was + due to a crash. This is to rate limit persistentally crashing services. In other words, + `` smaller than 5 is respected only when the service exits deliverately and successfully + (i.e. by calling exit(0)). `rlimit ` > This applies the given rlimit to the service. rlimits are inherited by child diff --git a/init/service.cpp b/init/service.cpp index bd704cf8e..7d83d60e4 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -298,6 +298,7 @@ void Service::Reap(const siginfo_t& siginfo) { pid_ = 0; flags_ &= (~SVC_RUNNING); start_order_ = 0; + was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0; // Oneshot processes go into the disabled state on exit, // except when manually restarted. @@ -321,7 +322,8 @@ void Service::Reap(const siginfo_t& siginfo) { // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed, // reboot into bootloader or set crashing property boot_clock::time_point now = boot_clock::now(); - if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) { + if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) && + !was_last_exit_ok_) { bool boot_completed = GetBoolProperty("sys.boot_completed", false); if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) { if (++crash_count_ > 4) { diff --git a/init/service.h b/init/service.h index c314aa1c6..f5f361901 100644 --- a/init/service.h +++ b/init/service.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -113,6 +114,7 @@ class Service { pid_t pid() const { return pid_; } android::base::boot_clock::time_point time_started() const { return time_started_; } int crash_count() const { return crash_count_; } + int was_last_exit_ok() const { return was_last_exit_ok_; } uid_t uid() const { return proc_attr_.uid; } gid_t gid() const { return proc_attr_.gid; } int namespace_flags() const { return namespaces_.flags; } @@ -128,7 +130,15 @@ class Service { bool process_cgroup_empty() const { return process_cgroup_empty_; } unsigned long start_order() const { return start_order_; } void set_sigstop(bool value) { sigstop_ = value; } - std::chrono::seconds restart_period() const { return restart_period_; } + std::chrono::seconds restart_period() const { + // If the service exited abnormally or due to timeout, late limit the restart even if + // restart_period is set to a very short value. + // If not, i.e. restart after a deliberate and successful exit, respect the period. + if (!was_last_exit_ok_) { + return std::max(restart_period_, default_restart_period_); + } + return restart_period_; + } std::optional timeout_period() const { return timeout_period_; } const std::vector& args() const { return args_; } bool is_updatable() const { return updatable_; } @@ -171,6 +181,8 @@ class Service { int crash_count_; // number of times crashed within window std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it std::optional fatal_reboot_target_; // reboot target of fatal handler + bool was_last_exit_ok_ = + true; // true if the service never exited, or exited with status code 0 std::optional capabilities_; ProcessAttributes proc_attr_; @@ -211,7 +223,8 @@ class Service { bool sigstop_ = false; - std::chrono::seconds restart_period_ = 5s; + const std::chrono::seconds default_restart_period_ = 5s; + std::chrono::seconds restart_period_ = default_restart_period_; std::optional timeout_period_; bool updatable_ = false; diff --git a/init/service_parser.cpp b/init/service_parser.cpp index 9e914ee7c..08d82c1d3 100644 --- a/init/service_parser.cpp +++ b/init/service_parser.cpp @@ -364,8 +364,8 @@ Result ServiceParser::ParseRebootOnFailure(std::vector&& args Result ServiceParser::ParseRestartPeriod(std::vector&& args) { int period; - if (!ParseInt(args[1], &period, 5)) { - return Error() << "restart_period value must be an integer >= 5"; + if (!ParseInt(args[1], &period, 0)) { + return Error() << "restart_period value must be an integer >= 0"; } service_->restart_period_ = std::chrono::seconds(period); return {};