init: non-crashing service can restart immediately am: 9088d1bb12 am: 794968142e

Original change: https://googleplex-android-review.googlesource.com/c/platform/system/core/+/28219211

Change-Id: I619ea35bf8f529b90214c2a3893dd52a8097486d
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
Jiyong Park 2024-07-24 20:52:20 +00:00 committed by Automerger Merge Worker
commit 89bf48634f
4 changed files with 28 additions and 10 deletions

View file

@ -344,11 +344,14 @@ runs the service.
intended to be used with the `exec_start` builtin for any must-have checks during boot. intended to be used with the `exec_start` builtin for any must-have checks during boot.
`restart_period <seconds>` `restart_period <seconds>`
> If a non-oneshot service exits, it will be restarted at its start time plus > If a non-oneshot service exits, it will be restarted at its previous start time plus this period.
this period. It defaults to 5s to rate limit crashing services. The default value is 5s. This can be used to implement periodic services together with the
This can be increased for services that are meant to run periodically. For `timeout_period` command below. For example, it may be set to 3600 to indicate that the service
example, it may be set to 3600 to indicate that the service should run every hour should run every hour or 86400 to indicate that the service should run every day. This can be set
or 86400 to indicate that the service should run every day. to a value shorter than 5s for example 0, but the minimum 5s delay is enforced if the restart was
due to a crash. This is to rate limit persistentally crashing services. In other words,
`<seconds>` smaller than 5 is respected only when the service exits deliverately and successfully
(i.e. by calling exit(0)).
`rlimit <resource> <cur> <max>` `rlimit <resource> <cur> <max>`
> This applies the given rlimit to the service. rlimits are inherited by child > This applies the given rlimit to the service. rlimits are inherited by child

View file

@ -307,6 +307,7 @@ void Service::Reap(const siginfo_t& siginfo) {
pid_ = 0; pid_ = 0;
flags_ &= (~SVC_RUNNING); flags_ &= (~SVC_RUNNING);
start_order_ = 0; start_order_ = 0;
was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0;
// Oneshot processes go into the disabled state on exit, // Oneshot processes go into the disabled state on exit,
// except when manually restarted. // except when manually restarted.
@ -360,7 +361,8 @@ void Service::Reap(const siginfo_t& siginfo) {
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed, // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
// reboot into bootloader or set crashing property // reboot into bootloader or set crashing property
boot_clock::time_point now = boot_clock::now(); boot_clock::time_point now = boot_clock::now();
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) { if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
!was_last_exit_ok_) {
bool boot_completed = GetBoolProperty("sys.boot_completed", false); bool boot_completed = GetBoolProperty("sys.boot_completed", false);
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) { if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
if (++crash_count_ > 4) { if (++crash_count_ > 4) {

View file

@ -19,6 +19,7 @@
#include <signal.h> #include <signal.h>
#include <sys/types.h> #include <sys/types.h>
#include <algorithm>
#include <chrono> #include <chrono>
#include <memory> #include <memory>
#include <optional> #include <optional>
@ -115,6 +116,7 @@ class Service {
pid_t pid() const { return pid_; } pid_t pid() const { return pid_; }
android::base::boot_clock::time_point time_started() const { return time_started_; } android::base::boot_clock::time_point time_started() const { return time_started_; }
int crash_count() const { return crash_count_; } int crash_count() const { return crash_count_; }
int was_last_exit_ok() const { return was_last_exit_ok_; }
uid_t uid() const { return proc_attr_.uid; } uid_t uid() const { return proc_attr_.uid; }
gid_t gid() const { return proc_attr_.gid; } gid_t gid() const { return proc_attr_.gid; }
int namespace_flags() const { return namespaces_.flags; } int namespace_flags() const { return namespaces_.flags; }
@ -130,7 +132,15 @@ class Service {
bool process_cgroup_empty() const { return process_cgroup_empty_; } bool process_cgroup_empty() const { return process_cgroup_empty_; }
unsigned long start_order() const { return start_order_; } unsigned long start_order() const { return start_order_; }
void set_sigstop(bool value) { sigstop_ = value; } void set_sigstop(bool value) { sigstop_ = value; }
std::chrono::seconds restart_period() const { return restart_period_; } std::chrono::seconds restart_period() const {
// If the service exited abnormally or due to timeout, late limit the restart even if
// restart_period is set to a very short value.
// If not, i.e. restart after a deliberate and successful exit, respect the period.
if (!was_last_exit_ok_) {
return std::max(restart_period_, default_restart_period_);
}
return restart_period_;
}
std::optional<std::chrono::seconds> timeout_period() const { return timeout_period_; } std::optional<std::chrono::seconds> timeout_period() const { return timeout_period_; }
const std::vector<std::string>& args() const { return args_; } const std::vector<std::string>& args() const { return args_; }
bool is_updatable() const { return updatable_; } bool is_updatable() const { return updatable_; }
@ -172,6 +182,8 @@ class Service {
bool upgraded_mte_ = false; // whether we upgraded async MTE -> sync MTE before bool upgraded_mte_ = false; // whether we upgraded async MTE -> sync MTE before
std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it
std::optional<std::string> fatal_reboot_target_; // reboot target of fatal handler std::optional<std::string> fatal_reboot_target_; // reboot target of fatal handler
bool was_last_exit_ok_ =
true; // true if the service never exited, or exited with status code 0
std::optional<CapSet> capabilities_; std::optional<CapSet> capabilities_;
ProcessAttributes proc_attr_; ProcessAttributes proc_attr_;
@ -214,7 +226,8 @@ class Service {
bool sigstop_ = false; bool sigstop_ = false;
std::chrono::seconds restart_period_ = 5s; const std::chrono::seconds default_restart_period_ = 5s;
std::chrono::seconds restart_period_ = default_restart_period_;
std::optional<std::chrono::seconds> timeout_period_; std::optional<std::chrono::seconds> timeout_period_;
bool updatable_ = false; bool updatable_ = false;

View file

@ -369,8 +369,8 @@ Result<void> ServiceParser::ParseRebootOnFailure(std::vector<std::string>&& args
Result<void> ServiceParser::ParseRestartPeriod(std::vector<std::string>&& args) { Result<void> ServiceParser::ParseRestartPeriod(std::vector<std::string>&& args) {
int period; int period;
if (!ParseInt(args[1], &period, 5)) { if (!ParseInt(args[1], &period, 0)) {
return Error() << "restart_period value must be an integer >= 5"; return Error() << "restart_period value must be an integer >= 0";
} }
service_->restart_period_ = std::chrono::seconds(period); service_->restart_period_ = std::chrono::seconds(period);
return {}; return {};