init: non-crashing service can restart immediately am: 9088d1bb12 am: 794968142e
Original change: https://googleplex-android-review.googlesource.com/c/platform/system/core/+/28219211 Change-Id: I619ea35bf8f529b90214c2a3893dd52a8097486d Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
commit
89bf48634f
4 changed files with 28 additions and 10 deletions
|
|
@ -344,11 +344,14 @@ runs the service.
|
||||||
intended to be used with the `exec_start` builtin for any must-have checks during boot.
|
intended to be used with the `exec_start` builtin for any must-have checks during boot.
|
||||||
|
|
||||||
`restart_period <seconds>`
|
`restart_period <seconds>`
|
||||||
> If a non-oneshot service exits, it will be restarted at its start time plus
|
> If a non-oneshot service exits, it will be restarted at its previous start time plus this period.
|
||||||
this period. It defaults to 5s to rate limit crashing services.
|
The default value is 5s. This can be used to implement periodic services together with the
|
||||||
This can be increased for services that are meant to run periodically. For
|
`timeout_period` command below. For example, it may be set to 3600 to indicate that the service
|
||||||
example, it may be set to 3600 to indicate that the service should run every hour
|
should run every hour or 86400 to indicate that the service should run every day. This can be set
|
||||||
or 86400 to indicate that the service should run every day.
|
to a value shorter than 5s for example 0, but the minimum 5s delay is enforced if the restart was
|
||||||
|
due to a crash. This is to rate limit persistentally crashing services. In other words,
|
||||||
|
`<seconds>` smaller than 5 is respected only when the service exits deliverately and successfully
|
||||||
|
(i.e. by calling exit(0)).
|
||||||
|
|
||||||
`rlimit <resource> <cur> <max>`
|
`rlimit <resource> <cur> <max>`
|
||||||
> This applies the given rlimit to the service. rlimits are inherited by child
|
> This applies the given rlimit to the service. rlimits are inherited by child
|
||||||
|
|
|
||||||
|
|
@ -307,6 +307,7 @@ void Service::Reap(const siginfo_t& siginfo) {
|
||||||
pid_ = 0;
|
pid_ = 0;
|
||||||
flags_ &= (~SVC_RUNNING);
|
flags_ &= (~SVC_RUNNING);
|
||||||
start_order_ = 0;
|
start_order_ = 0;
|
||||||
|
was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0;
|
||||||
|
|
||||||
// Oneshot processes go into the disabled state on exit,
|
// Oneshot processes go into the disabled state on exit,
|
||||||
// except when manually restarted.
|
// except when manually restarted.
|
||||||
|
|
@ -360,7 +361,8 @@ void Service::Reap(const siginfo_t& siginfo) {
|
||||||
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
|
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
|
||||||
// reboot into bootloader or set crashing property
|
// reboot into bootloader or set crashing property
|
||||||
boot_clock::time_point now = boot_clock::now();
|
boot_clock::time_point now = boot_clock::now();
|
||||||
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) {
|
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
|
||||||
|
!was_last_exit_ok_) {
|
||||||
bool boot_completed = GetBoolProperty("sys.boot_completed", false);
|
bool boot_completed = GetBoolProperty("sys.boot_completed", false);
|
||||||
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
|
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
|
||||||
if (++crash_count_ > 4) {
|
if (++crash_count_ > 4) {
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
@ -115,6 +116,7 @@ class Service {
|
||||||
pid_t pid() const { return pid_; }
|
pid_t pid() const { return pid_; }
|
||||||
android::base::boot_clock::time_point time_started() const { return time_started_; }
|
android::base::boot_clock::time_point time_started() const { return time_started_; }
|
||||||
int crash_count() const { return crash_count_; }
|
int crash_count() const { return crash_count_; }
|
||||||
|
int was_last_exit_ok() const { return was_last_exit_ok_; }
|
||||||
uid_t uid() const { return proc_attr_.uid; }
|
uid_t uid() const { return proc_attr_.uid; }
|
||||||
gid_t gid() const { return proc_attr_.gid; }
|
gid_t gid() const { return proc_attr_.gid; }
|
||||||
int namespace_flags() const { return namespaces_.flags; }
|
int namespace_flags() const { return namespaces_.flags; }
|
||||||
|
|
@ -130,7 +132,15 @@ class Service {
|
||||||
bool process_cgroup_empty() const { return process_cgroup_empty_; }
|
bool process_cgroup_empty() const { return process_cgroup_empty_; }
|
||||||
unsigned long start_order() const { return start_order_; }
|
unsigned long start_order() const { return start_order_; }
|
||||||
void set_sigstop(bool value) { sigstop_ = value; }
|
void set_sigstop(bool value) { sigstop_ = value; }
|
||||||
std::chrono::seconds restart_period() const { return restart_period_; }
|
std::chrono::seconds restart_period() const {
|
||||||
|
// If the service exited abnormally or due to timeout, late limit the restart even if
|
||||||
|
// restart_period is set to a very short value.
|
||||||
|
// If not, i.e. restart after a deliberate and successful exit, respect the period.
|
||||||
|
if (!was_last_exit_ok_) {
|
||||||
|
return std::max(restart_period_, default_restart_period_);
|
||||||
|
}
|
||||||
|
return restart_period_;
|
||||||
|
}
|
||||||
std::optional<std::chrono::seconds> timeout_period() const { return timeout_period_; }
|
std::optional<std::chrono::seconds> timeout_period() const { return timeout_period_; }
|
||||||
const std::vector<std::string>& args() const { return args_; }
|
const std::vector<std::string>& args() const { return args_; }
|
||||||
bool is_updatable() const { return updatable_; }
|
bool is_updatable() const { return updatable_; }
|
||||||
|
|
@ -172,6 +182,8 @@ class Service {
|
||||||
bool upgraded_mte_ = false; // whether we upgraded async MTE -> sync MTE before
|
bool upgraded_mte_ = false; // whether we upgraded async MTE -> sync MTE before
|
||||||
std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it
|
std::chrono::minutes fatal_crash_window_ = 4min; // fatal() when more than 4 crashes in it
|
||||||
std::optional<std::string> fatal_reboot_target_; // reboot target of fatal handler
|
std::optional<std::string> fatal_reboot_target_; // reboot target of fatal handler
|
||||||
|
bool was_last_exit_ok_ =
|
||||||
|
true; // true if the service never exited, or exited with status code 0
|
||||||
|
|
||||||
std::optional<CapSet> capabilities_;
|
std::optional<CapSet> capabilities_;
|
||||||
ProcessAttributes proc_attr_;
|
ProcessAttributes proc_attr_;
|
||||||
|
|
@ -214,7 +226,8 @@ class Service {
|
||||||
|
|
||||||
bool sigstop_ = false;
|
bool sigstop_ = false;
|
||||||
|
|
||||||
std::chrono::seconds restart_period_ = 5s;
|
const std::chrono::seconds default_restart_period_ = 5s;
|
||||||
|
std::chrono::seconds restart_period_ = default_restart_period_;
|
||||||
std::optional<std::chrono::seconds> timeout_period_;
|
std::optional<std::chrono::seconds> timeout_period_;
|
||||||
|
|
||||||
bool updatable_ = false;
|
bool updatable_ = false;
|
||||||
|
|
|
||||||
|
|
@ -369,8 +369,8 @@ Result<void> ServiceParser::ParseRebootOnFailure(std::vector<std::string>&& args
|
||||||
|
|
||||||
Result<void> ServiceParser::ParseRestartPeriod(std::vector<std::string>&& args) {
|
Result<void> ServiceParser::ParseRestartPeriod(std::vector<std::string>&& args) {
|
||||||
int period;
|
int period;
|
||||||
if (!ParseInt(args[1], &period, 5)) {
|
if (!ParseInt(args[1], &period, 0)) {
|
||||||
return Error() << "restart_period value must be an integer >= 5";
|
return Error() << "restart_period value must be an integer >= 0";
|
||||||
}
|
}
|
||||||
service_->restart_period_ = std::chrono::seconds(period);
|
service_->restart_period_ = std::chrono::seconds(period);
|
||||||
return {};
|
return {};
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue