From 29d8a42d14544ac81b89243b22e46ce7c5c71701 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 29 Nov 2022 12:56:34 -0800 Subject: [PATCH] Revert "init: Add more diagnostics for signalfd hangs." Revert commit 14f9c15e0560 ("init: Add more diagnostics for signalfd hangs") because: * That commit was intented to help with root-causing b/223076262. * The root cause of b/223076262 has been fixed (not blocking SIGCHLD in all threads in the init process). Test: Treehugger Change-Id: I586663ec0588e74a9d58512f7f31155398cf4f52 Signed-off-by: Bart Van Assche --- init/init.cpp | 84 ++++++------------------------------------------ init/service.cpp | 4 --- init/service.h | 6 ---- 3 files changed, 9 insertions(+), 85 deletions(-) diff --git a/init/init.cpp b/init/init.cpp index 540e2ca65..42621915e 100644 --- a/init/init.cpp +++ b/init/init.cpp @@ -739,33 +739,13 @@ static void HandleSigtermSignal(const signalfd_siginfo& siginfo) { HandlePowerctlMessage("shutdown,container"); } -static constexpr std::chrono::milliseconds kDiagnosticTimeout = 10s; - -static void HandleSignalFd(bool one_off) { +static void HandleSignalFd() { signalfd_siginfo siginfo; - auto started = std::chrono::steady_clock::now(); - do { - ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo))); - if (bytes_read < 0 && errno == EAGAIN) { - if (one_off) { - return; - } - auto now = std::chrono::steady_clock::now(); - std::chrono::duration waited = now - started; - if (waited >= kDiagnosticTimeout) { - LOG(ERROR) << "epoll() woke us up, but we waited with no SIGCHLD!"; - started = now; - } - - std::this_thread::sleep_for(100ms); - continue; - } - if (bytes_read != sizeof(siginfo)) { - PLOG(ERROR) << "Failed to read siginfo from signal_fd"; - return; - } - break; - } while (!one_off); + ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo))); + if (bytes_read != sizeof(siginfo)) { + PLOG(ERROR) << "Failed to read siginfo from signal_fd"; + return; + } switch (siginfo.ssi_signo) { case SIGCHLD: @@ -820,14 +800,13 @@ static void InstallSignalFdHandler(Epoll* epoll) { LOG(FATAL) << "Failed to register a fork handler: " << strerror(result); } - signal_fd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_NONBLOCK); + signal_fd = signalfd(-1, &mask, SFD_CLOEXEC); if (signal_fd == -1) { PLOG(FATAL) << "failed to create signalfd"; } constexpr int flags = EPOLLIN | EPOLLPRI; - auto handler = std::bind(HandleSignalFd, false); - if (auto result = epoll->RegisterHandler(signal_fd, handler, flags); !result.ok()) { + if (auto result = epoll->RegisterHandler(signal_fd, HandleSignalFd, flags); !result.ok()) { LOG(FATAL) << result.error(); } } @@ -956,32 +935,6 @@ static Result ConnectEarlyStageSnapuserdAction(const BuiltinArguments& arg return {}; } -static void DumpPidFds(const std::string& prefix, pid_t pid) { - std::error_code ec; - std::string proc_dir = "/proc/" + std::to_string(pid) + "/fd"; - for (const auto& entry : std::filesystem::directory_iterator(proc_dir)) { - std::string target; - if (android::base::Readlink(entry.path(), &target)) { - LOG(ERROR) << prefix << target; - } else { - LOG(ERROR) << prefix << entry.path(); - } - } -} - -static void DumpFile(const std::string& prefix, const std::string& file) { - std::ifstream fp(file); - if (!fp) { - LOG(ERROR) << "Could not open " << file; - return; - } - - std::string line; - while (std::getline(fp, line)) { - LOG(ERROR) << prefix << line; - } -} - int SecondStageMain(int argc, char** argv) { if (REBOOT_BOOTLOADER_ON_PANIC) { InstallRebootSignalHandlers(); @@ -1155,7 +1108,7 @@ int SecondStageMain(int argc, char** argv) { setpriority(PRIO_PROCESS, 0, 0); while (true) { // By default, sleep until something happens. - std::chrono::milliseconds epoll_timeout{kDiagnosticTimeout}; + std::optional epoll_timeout; auto shutdown_command = shutdown_state.CheckShutdown(); if (shutdown_command) { @@ -1187,25 +1140,6 @@ int SecondStageMain(int argc, char** argv) { auto epoll_result = epoll.Wait(epoll_timeout); if (!epoll_result.ok()) { LOG(ERROR) << epoll_result.error(); - } else if (*epoll_result <= 0 && Service::is_exec_service_running()) { - static bool dumped_diagnostics = false; - std::chrono::duration waited = - std::chrono::steady_clock::now() - Service::exec_service_started(); - if (waited >= kDiagnosticTimeout) { - LOG(ERROR) << "Exec service is hung? Waited " << waited.count() - << " without SIGCHLD"; - if (!dumped_diagnostics) { - DumpPidFds("exec service opened: ", Service::exec_service_pid()); - - std::string status_file = - "/proc/" + std::to_string(Service::exec_service_pid()) + "/status"; - DumpFile("exec service: ", status_file); - dumped_diagnostics = true; - - LOG(INFO) << "Attempting to handle any stuck SIGCHLDs..."; - HandleSignalFd(true); - } - } } if (!IsShuttingDown()) { HandleControlMessages(); diff --git a/init/service.cpp b/init/service.cpp index ccc719168..8db2b05a3 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -136,8 +136,6 @@ static bool ExpandArgsAndExecv(const std::vector& args, bool sigsto unsigned long Service::next_start_order_ = 1; bool Service::is_exec_service_running_ = false; -pid_t Service::exec_service_pid_ = -1; -std::chrono::time_point Service::exec_service_started_; Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands, const std::string& filename, const std::vector& args) @@ -433,8 +431,6 @@ Result Service::ExecStart() { flags_ |= SVC_EXEC; is_exec_service_running_ = true; - exec_service_pid_ = pid_; - exec_service_started_ = std::chrono::steady_clock::now(); LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << proc_attr_.uid << " gid " << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context " diff --git a/init/service.h b/init/service.h index 54bf63871..6fb28041a 100644 --- a/init/service.h +++ b/init/service.h @@ -104,10 +104,6 @@ class Service { size_t CheckAllCommands() const { return onrestart_.CheckAllCommands(); } static bool is_exec_service_running() { return is_exec_service_running_; } - static pid_t exec_service_pid() { return exec_service_pid_; } - static std::chrono::time_point exec_service_started() { - return exec_service_started_; - } const std::string& name() const { return name_; } const std::set& classnames() const { return classnames_; } @@ -160,8 +156,6 @@ class Service { void SetMountNamespace(); static unsigned long next_start_order_; static bool is_exec_service_running_; - static std::chrono::time_point exec_service_started_; - static pid_t exec_service_pid_; const std::string name_; std::set classnames_;