init: non-crashing service can restart immediately am: 9088d1bb12
Original change: https://googleplex-android-review.googlesource.com/c/platform/system/core/+/28219211 Change-Id: Id675ba8c2ba97a51b63da8ba80a857a07c383245 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
commit
794968142e
4 changed files with 28 additions and 10 deletions
|
|
@ -344,11 +344,14 @@ runs the service.
|
|||
intended to be used with the `exec_start` builtin for any must-have checks during boot.
|
||||
|
||||
`restart_period <seconds>`
|
||||
> If a non-oneshot service exits, it will be restarted at its start time plus
|
||||
this period. It defaults to 5s to rate limit crashing services.
|
||||
This can be increased for services that are meant to run periodically. For
|
||||
example, it may be set to 3600 to indicate that the service should run every hour
|
||||
or 86400 to indicate that the service should run every day.
|
||||
> If a non-oneshot service exits, it will be restarted at its previous start time plus this period.
|
||||
The default value is 5s. This can be used to implement periodic services together with the
|
||||
`timeout_period` command below. For example, it may be set to 3600 to indicate that the service
|
||||
should run every hour or 86400 to indicate that the service should run every day. This can be set
|
||||
to a value shorter than 5s for example 0, but the minimum 5s delay is enforced if the restart was
|
||||
due to a crash. This is to rate limit persistentally crashing services. In other words,
|
||||
`<seconds>` smaller than 5 is respected only when the service exits deliverately and successfully
|
||||
(i.e. by calling exit(0)).
|
||||
|
||||
`rlimit <resource> <cur> <max>`
|
||||
> This applies the given rlimit to the service. rlimits are inherited by child
|
||||
|
|
|
|||
|
|
@ -307,6 +307,7 @@ void Service::Reap(const siginfo_t& siginfo) {
|
|||
pid_ = 0;
|
||||
flags_ &= (~SVC_RUNNING);
|
||||
start_order_ = 0;
|
||||
was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0;
|
||||
|
||||
// Oneshot processes go into the disabled state on exit,
|
||||
// except when manually restarted.
|
||||
|
|
@ -360,7 +361,8 @@ void Service::Reap(const siginfo_t& siginfo) {
|
|||
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
|
||||
// reboot into bootloader or set crashing property
|
||||
boot_clock::time_point now = boot_clock::now();
|
||||
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) {
|
||||
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
|
||||
!was_last_exit_ok_) {
|
||||
bool boot_completed = GetBoolProperty("sys.boot_completed", false);
|
||||
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
|
||||
if (++crash_count_ > 4) {
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
#include <signal.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
|
@ -115,6 +116,7 @@ class Service {
|
|||
pid_t pid() const { return pid_; }
|
||||
android::base::boot_clock::time_point time_started() const { return time_started_; }
|
||||
int crash_count() const { return crash_count_; }
|
||||
int was_last_exit_ok() const { return was_last_exit_ok_; }
|
||||
uid_t uid() const { return proc_attr_.uid; }
|
||||
gid_t gid() const { return proc_attr_.gid; }
|
||||
int namespace_flags() const { return namespaces_.flags; }
|
||||
|
|
@ -130,7 +132,15 @@ class Service {
|
|||
bool process_cgroup_empty() const { return process_cgroup_empty_; }
|
||||
unsigned long start_order() const { return start_order_; }
|
||||
void set_sigstop(bool value) { sigstop_ = value; }
|
||||
std::chrono::seconds restart_period() const { return restart_period_; }
|
||||
std::chrono::seconds restart_period() const {
|
||||
// If the service exited abnormally or due to timeout, late limit the restart even if
|
||||
// restart_period is set to a very short value.
|
||||
// If not, i.e. restart after a deliberate and successful exit, respect the period.
|
||||
if (!was_last_exit_ok_) {
|
||||
return std::max(restart_period_, default_restart_period_);
|
||||
}
|
||||
return restart_period_;
|
||||
}
|
||||
std::optional<std::chrono::seconds> timeout_period() const { return timeout_period_; }
|
||||
const std::vector<std::string>& args() const { return args_; }
|
||||
bool is_updatable() const { return updatable_; }
|
||||
|
|
@ -172,6 +182,8 @@ class Service {
|
|||
bool upgraded_mte_ = false; // whether we upgraded async MTE -> sync MTE before
|
||||
std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it
|
||||
std::optional<std::string> fatal_reboot_target_; // reboot target of fatal handler
|
||||
bool was_last_exit_ok_ =
|
||||
true; // true if the service never exited, or exited with status code 0
|
||||
|
||||
std::optional<CapSet> capabilities_;
|
||||
ProcessAttributes proc_attr_;
|
||||
|
|
@ -214,7 +226,8 @@ class Service {
|
|||
|
||||
bool sigstop_ = false;
|
||||
|
||||
std::chrono::seconds restart_period_ = 5s;
|
||||
const std::chrono::seconds default_restart_period_ = 5s;
|
||||
std::chrono::seconds restart_period_ = default_restart_period_;
|
||||
std::optional<std::chrono::seconds> timeout_period_;
|
||||
|
||||
bool updatable_ = false;
|
||||
|
|
|
|||
|
|
@ -369,8 +369,8 @@ Result<void> ServiceParser::ParseRebootOnFailure(std::vector<std::string>&& args
|
|||
|
||||
Result<void> ServiceParser::ParseRestartPeriod(std::vector<std::string>&& args) {
|
||||
int period;
|
||||
if (!ParseInt(args[1], &period, 5)) {
|
||||
return Error() << "restart_period value must be an integer >= 5";
|
||||
if (!ParseInt(args[1], &period, 0)) {
|
||||
return Error() << "restart_period value must be an integer >= 0";
|
||||
}
|
||||
service_->restart_period_ = std::chrono::seconds(period);
|
||||
return {};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue