From 905a5df83d1cdfd133ad53b3b6af999ddb485f94 Mon Sep 17 00:00:00 2001 From: Tom Cherry Date: Fri, 30 Aug 2019 14:12:56 -0700 Subject: [PATCH] init: Always reap processes before handling properties There is a race that manifests like this: 1) A service dies (not processed by init yet). 2) service_manager processes death notification. 3) service_manager gets checkService and calls init to start service. 4) init gets the ctl.start / ctl.interface_start for the service but the service already appears started, so it does nothing. 5) init gets sigchld, but doesn't do anything else to restart the service We can avoid all of this if we already reap pending processes before handling properties in the main loop of init. Since reaping the services calls waitid(), there's no race even if the signalfd for sigchld hasn't triggered yet. It also won't cost us much efficiency, since it's only a single system call. Test: CF boots, init unit tests pass Change-Id: Ie24ef406055b283797b41b1821c8ebcccead4db4 --- init/epoll.cpp | 19 ++++++++++++------- init/epoll.h | 9 ++++----- init/init.cpp | 13 +++++++++++-- init/keychords_test.cpp | 6 +++++- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/init/epoll.cpp b/init/epoll.cpp index 01d886717..17d63fa5d 100644 --- a/init/epoll.cpp +++ b/init/epoll.cpp @@ -69,19 +69,24 @@ Result Epoll::UnregisterHandler(int fd) { return {}; } -Result Epoll::Wait(std::optional timeout) { +Result*>> Epoll::Wait( + std::optional timeout) { int timeout_ms = -1; if (timeout && timeout->count() < INT_MAX) { timeout_ms = timeout->count(); } - epoll_event ev; - auto nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd_, &ev, 1, timeout_ms)); - if (nr == -1) { + const auto max_events = epoll_handlers_.size(); + epoll_event ev[max_events]; + auto num_events = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd_, ev, max_events, timeout_ms)); + if (num_events == -1) { return ErrnoError() << "epoll_wait failed"; - } else if (nr == 1) { - std::invoke(*reinterpret_cast*>(ev.data.ptr)); } - return {}; + std::vector*> pending_functions; + for (int i = 0; i < num_events; ++i) { + pending_functions.emplace_back(reinterpret_cast*>(ev[i].data.ptr)); + } + + return pending_functions; } } // namespace init diff --git a/init/epoll.h b/init/epoll.h index ca8426634..c32a6614f 100644 --- a/init/epoll.h +++ b/init/epoll.h @@ -14,8 +14,7 @@ * limitations under the License. */ -#ifndef _INIT_EPOLL_H -#define _INIT_EPOLL_H +#pragma once #include #include @@ -24,6 +23,7 @@ #include #include #include +#include #include @@ -39,7 +39,8 @@ class Epoll { Result Open(); Result RegisterHandler(int fd, std::function handler, uint32_t events = EPOLLIN); Result UnregisterHandler(int fd); - Result Wait(std::optional timeout); + Result*>> Wait( + std::optional timeout); private: android::base::unique_fd epoll_fd_; @@ -48,5 +49,3 @@ class Epoll { } // namespace init } // namespace android - -#endif diff --git a/init/init.cpp b/init/init.cpp index ce898dee0..d4cbb5f57 100644 --- a/init/init.cpp +++ b/init/init.cpp @@ -787,8 +787,17 @@ int SecondStageMain(int argc, char** argv) { if (am.HasMoreCommands()) epoll_timeout = 0ms; } - if (auto result = epoll.Wait(epoll_timeout); !result) { - LOG(ERROR) << result.error(); + auto pending_functions = epoll.Wait(epoll_timeout); + if (!pending_functions) { + LOG(ERROR) << pending_functions.error(); + } else if (!pending_functions->empty()) { + // We always reap children before responding to the other pending functions. This is to + // prevent a race where other daemons see that a service has exited and ask init to + // start it again via ctl.start before init has reaped it. + ReapAnyOutstandingChildren(); + for (const auto& function : *pending_functions) { + (*function)(); + } } } diff --git a/init/keychords_test.cpp b/init/keychords_test.cpp index 33373d473..6e9b337a2 100644 --- a/init/keychords_test.cpp +++ b/init/keychords_test.cpp @@ -212,7 +212,11 @@ TestFrame::TestFrame(const std::vector>& chords, EventHan } void TestFrame::RelaxForMs(std::chrono::milliseconds wait) { - epoll_.Wait(wait); + auto pending_functions = epoll_.Wait(wait); + ASSERT_TRUE(pending_functions) << pending_functions.error(); + for (const auto& function : *pending_functions) { + (*function)(); + } } void TestFrame::SetChord(int key, bool value) {