From 9088d1bb12b1a97f49055abef609e0fc48a10234 Mon Sep 17 00:00:00 2001 From: Jiyong Park Date: Fri, 9 Jun 2023 09:52:49 +0900 Subject: [PATCH] init: non-crashing service can restart immediately This CL allows restart_period to be set to a value shorter than 5s. Previously this was prohibited to rate limit crashing services. That behavior is considered to be a bit too conservative because some services don't crash, but exit deliverately. adbd is the motivating example. When adb root or adb unroot is requested, it changes its mode of operation (via sysprop), exits itself, and restarts (by init) to enter into the mode. However, due to the 5s delay, the mode change can complete no earlier than 5 seconds after adbd was started last time. This can slow the mode change when it is requested right after the boot. With this CL, restart_period can be set to a value smaller than 5. And services like adbd can make use of it. However, in ordef to rate limit crashing service, the default is enforced if the service was crashed last time. In addition, such intended restart is not counted as crashes when monitoring successive crashes during booting. Bug: 286061817 Bug: 339844204 Test: /packages/modules/Virtualization/vm/vm_shell.sh start-microdroid \ --auto-connect -- --protected * with this change: within 2s * without this change: over 6s (cherry picked from https://android-review.googlesource.com/q/commit:0d277d777f290729fd9708b88f7ed57f0e0e49eb) Merged-In: I1b3f0c92d349e8c8760821cf50fb69997b67b242 Change-Id: I1b3f0c92d349e8c8760821cf50fb69997b67b242 --- init/README.md | 13 ++++++++----- init/service.cpp | 4 +++- init/service.h | 17 +++++++++++++++-- init/service_parser.cpp | 4 ++-- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/init/README.md b/init/README.md index aaafe7887..734e14eee 100644 --- a/init/README.md +++ b/init/README.md @@ -316,11 +316,14 @@ runs the service. intended to be used with the `exec_start` builtin for any must-have checks during boot. `restart_period ` -> If a non-oneshot service exits, it will be restarted at its start time plus - this period. It defaults to 5s to rate limit crashing services. - This can be increased for services that are meant to run periodically. For - example, it may be set to 3600 to indicate that the service should run every hour - or 86400 to indicate that the service should run every day. +> If a non-oneshot service exits, it will be restarted at its previous start time plus this period. + The default value is 5s. This can be used to implement periodic services together with the + `timeout_period` command below. For example, it may be set to 3600 to indicate that the service + should run every hour or 86400 to indicate that the service should run every day. This can be set + to a value shorter than 5s for example 0, but the minimum 5s delay is enforced if the restart was + due to a crash. This is to rate limit persistentally crashing services. In other words, + `` smaller than 5 is respected only when the service exits deliverately and successfully + (i.e. by calling exit(0)). `rlimit ` > This applies the given rlimit to the service. rlimits are inherited by child diff --git a/init/service.cpp b/init/service.cpp index bd704cf8e..7d83d60e4 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -298,6 +298,7 @@ void Service::Reap(const siginfo_t& siginfo) { pid_ = 0; flags_ &= (~SVC_RUNNING); start_order_ = 0; + was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0; // Oneshot processes go into the disabled state on exit, // except when manually restarted. @@ -321,7 +322,8 @@ void Service::Reap(const siginfo_t& siginfo) { // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed, // reboot into bootloader or set crashing property boot_clock::time_point now = boot_clock::now(); - if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) { + if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) && + !was_last_exit_ok_) { bool boot_completed = GetBoolProperty("sys.boot_completed", false); if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) { if (++crash_count_ > 4) { diff --git a/init/service.h b/init/service.h index c314aa1c6..f5f361901 100644 --- a/init/service.h +++ b/init/service.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -113,6 +114,7 @@ class Service { pid_t pid() const { return pid_; } android::base::boot_clock::time_point time_started() const { return time_started_; } int crash_count() const { return crash_count_; } + int was_last_exit_ok() const { return was_last_exit_ok_; } uid_t uid() const { return proc_attr_.uid; } gid_t gid() const { return proc_attr_.gid; } int namespace_flags() const { return namespaces_.flags; } @@ -128,7 +130,15 @@ class Service { bool process_cgroup_empty() const { return process_cgroup_empty_; } unsigned long start_order() const { return start_order_; } void set_sigstop(bool value) { sigstop_ = value; } - std::chrono::seconds restart_period() const { return restart_period_; } + std::chrono::seconds restart_period() const { + // If the service exited abnormally or due to timeout, late limit the restart even if + // restart_period is set to a very short value. + // If not, i.e. restart after a deliberate and successful exit, respect the period. + if (!was_last_exit_ok_) { + return std::max(restart_period_, default_restart_period_); + } + return restart_period_; + } std::optional timeout_period() const { return timeout_period_; } const std::vector& args() const { return args_; } bool is_updatable() const { return updatable_; } @@ -171,6 +181,8 @@ class Service { int crash_count_; // number of times crashed within window std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it std::optional fatal_reboot_target_; // reboot target of fatal handler + bool was_last_exit_ok_ = + true; // true if the service never exited, or exited with status code 0 std::optional capabilities_; ProcessAttributes proc_attr_; @@ -211,7 +223,8 @@ class Service { bool sigstop_ = false; - std::chrono::seconds restart_period_ = 5s; + const std::chrono::seconds default_restart_period_ = 5s; + std::chrono::seconds restart_period_ = default_restart_period_; std::optional timeout_period_; bool updatable_ = false; diff --git a/init/service_parser.cpp b/init/service_parser.cpp index 1ee309d98..cc39565de 100644 --- a/init/service_parser.cpp +++ b/init/service_parser.cpp @@ -364,8 +364,8 @@ Result ServiceParser::ParseRebootOnFailure(std::vector&& args Result ServiceParser::ParseRestartPeriod(std::vector&& args) { int period; - if (!ParseInt(args[1], &period, 5)) { - return Error() << "restart_period value must be an integer >= 5"; + if (!ParseInt(args[1], &period, 0)) { + return Error() << "restart_period value must be an integer >= 0"; } service_->restart_period_ = std::chrono::seconds(period); return {};