diff --git a/libprocessgroup/include/processgroup/processgroup.h b/libprocessgroup/include/processgroup/processgroup.h index 9107838b8..ca6868c1b 100644 --- a/libprocessgroup/include/processgroup/processgroup.h +++ b/libprocessgroup/include/processgroup/processgroup.h @@ -65,9 +65,8 @@ bool UsePerAppMemcg(); // should be active again. E.g. Zygote specialization for child process. void DropTaskProfilesResourceCaching(); -// Return 0 and removes the cgroup if there are no longer any processes in it. -// Returns -1 in the case of an error occurring or if there are processes still running -// even after retrying for up to 200ms. +// Return 0 if all processes were killed and the cgroup was successfully removed. +// Returns -1 in the case of an error occurring or if there are processes still running. int killProcessGroup(uid_t uid, int initialPid, int signal); // Returns the same as killProcessGroup(), however it does not retry, which means @@ -76,8 +75,9 @@ int killProcessGroupOnce(uid_t uid, int initialPid, int signal); // Sends the provided signal to all members of a process group, but does not wait for processes to // exit, or for the cgroup to be removed. Callers should also ensure that killProcessGroup is called -// later to ensure the cgroup is fully removed, otherwise system resources may leak. -int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal); +// later to ensure the cgroup is fully removed, otherwise system resources will leak. +// Returns true if no errors are encountered sending signals, otherwise false. +bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal); int createProcessGroup(uid_t uid, int initialPid, bool memControl = false); diff --git a/libprocessgroup/processgroup.cpp b/libprocessgroup/processgroup.cpp index f594f7f66..7e27d7544 100644 --- a/libprocessgroup/processgroup.cpp +++ b/libprocessgroup/processgroup.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include +#include #include #include #include @@ -53,7 +55,8 @@ using android::base::WriteStringToFile; using namespace std::chrono_literals; -#define PROCESSGROUP_CGROUP_PROCS_FILE "/cgroup.procs" +#define PROCESSGROUP_CGROUP_PROCS_FILE "cgroup.procs" +#define PROCESSGROUP_CGROUP_EVENTS_FILE "cgroup.events" bool CgroupsAvailable() { static bool cgroups_available = access("/proc/cgroups", F_OK) == 0; @@ -213,30 +216,21 @@ static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) { return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid); } -static int RemoveCgroup(const char* cgroup, uid_t uid, int pid, unsigned int retries) { - int ret = 0; - auto uid_pid_path = ConvertUidPidToPath(cgroup, uid, pid); - - while (retries--) { - ret = rmdir(uid_pid_path.c_str()); - // If we get an error 2 'No such file or directory' , that means the - // cgroup is already removed, treat it as success and return 0 for - // idempotency. - if (ret < 0 && errno == ENOENT) { - ret = 0; - } - if (!ret || errno != EBUSY || !retries) break; - std::this_thread::sleep_for(5ms); - } +static int RemoveCgroup(const char* cgroup, uid_t uid, int pid) { + auto path = ConvertUidPidToPath(cgroup, uid, pid); + int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str())); if (!ret && uid >= AID_ISOLATED_START && uid <= AID_ISOLATED_END) { // Isolated UIDs are unlikely to be reused soon after removal, // so free up the kernel resources for the UID level cgroup. - const auto uid_path = ConvertUidToPath(cgroup, uid); - ret = rmdir(uid_path.c_str()); - if (ret < 0 && errno == ENOENT) { - ret = 0; - } + path = ConvertUidToPath(cgroup, uid); + ret = TEMP_FAILURE_RETRY(rmdir(path.c_str())); + } + + if (ret < 0 && errno == ENOENT) { + // This function is idempoetent, but still warn here. + LOG(WARNING) << "RemoveCgroup: " << path << " does not exist."; + ret = 0; } return ret; @@ -360,38 +354,33 @@ err: return false; } -// Returns number of processes killed on success -// Returns 0 if there are no processes in the process cgroup left to kill -// Returns -1 on error -static int DoKillProcessGroupOnce(const char* cgroup, uid_t uid, int initialPid, int signal) { - // We separate all of the pids in the cgroup into those pids that are also the leaders of - // process groups (stored in the pgids set) and those that are not (stored in the pids set). - std::set pgids; - pgids.emplace(initialPid); - std::set pids; - int processes = 0; - - std::unique_ptr fd(nullptr, fclose); +bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) { + std::set pgids, pids; if (CgroupsAvailable()) { - auto path = ConvertUidPidToPath(cgroup, uid, initialPid) + PROCESSGROUP_CGROUP_PROCS_FILE; - fd.reset(fopen(path.c_str(), "re")); - if (!fd) { - if (errno == ENOENT) { - // This happens when the process is already dead or if, as the result of a bug, it - // has been migrated to another cgroup. An example of a bug that can cause migration - // to another cgroup is using the JoinCgroup action with a cgroup controller that - // has been activated in the v2 cgroup hierarchy. - goto kill; - } - PLOG(WARNING) << __func__ << " failed to open process cgroup uid " << uid << " pid " - << initialPid; - return -1; + std::string hierarchy_root_path, cgroup_v2_path; + CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path); + cgroup_v2_path = ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid); + + LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_PROCS_FILE << " to signal (" << signal + << ") " << cgroup_v2_path; + + // We separate all of the pids in the cgroup into those pids that are also the leaders of + // process groups (stored in the pgids set) and those that are not (stored in the pids set). + const auto procsfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE; + std::unique_ptr fp(fopen(procsfilepath.c_str(), "re"), fclose); + if (!fp) { + // This should only happen if the cgroup has already been removed with a successful call + // to killProcessGroup. Callers should only retry sendSignalToProcessGroup or + // killProcessGroup calls if they fail without ENOENT. + PLOG(ERROR) << "Failed to open " << procsfilepath; + kill(-initialPid, signal); + return false; } + pid_t pid; bool file_is_empty = true; - while (fscanf(fd.get(), "%d\n", &pid) == 1 && pid >= 0) { - processes++; + while (fscanf(fp.get(), "%d\n", &pid) == 1 && pid >= 0) { file_is_empty = false; if (pid == 0) { // Should never happen... but if it does, trying to kill this @@ -421,7 +410,8 @@ static int DoKillProcessGroupOnce(const char* cgroup, uid_t uid, int initialPid, } } -kill: + pgids.emplace(initialPid); + // Kill all process groups. for (const auto pgid : pgids) { LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid @@ -442,101 +432,174 @@ kill: } } - return (!fd || feof(fd.get())) ? processes : -1; + return true; } -static int KillProcessGroup(uid_t uid, int initialPid, int signal, int retries) { +template +static std::chrono::milliseconds toMillisec(T&& duration) { + return std::chrono::duration_cast(duration); +} + +enum class populated_status +{ + populated, + not_populated, + error +}; + +static populated_status cgroupIsPopulated(int events_fd) { + const std::string POPULATED_KEY("populated "); + const std::string::size_type MAX_EVENTS_FILE_SIZE = 32; + + std::string buf; + buf.resize(MAX_EVENTS_FILE_SIZE); + ssize_t len = TEMP_FAILURE_RETRY(pread(events_fd, buf.data(), buf.size(), 0)); + if (len == -1) { + PLOG(ERROR) << "Could not read cgroup.events: "; + // Potentially ENODEV if the cgroup has been removed since we opened this file, but that + // shouldn't have happened yet. + return populated_status::error; + } + + if (len == 0) { + LOG(ERROR) << "cgroup.events EOF"; + return populated_status::error; + } + + buf.resize(len); + + const std::string::size_type pos = buf.find(POPULATED_KEY); + if (pos == std::string::npos) { + LOG(ERROR) << "Could not find populated key in cgroup.events"; + return populated_status::error; + } + + if (pos + POPULATED_KEY.size() + 1 > len) { + LOG(ERROR) << "Partial read of cgroup.events"; + return populated_status::error; + } + + return buf[pos + POPULATED_KEY.size()] == '1' ? + populated_status::populated : populated_status::not_populated; +} + +// The default timeout of 2200ms comes from the default number of retries in a previous +// implementation of this function. The default retry value was 40 for killing and 400 for cgroup +// removal with 5ms sleeps between each retry. +static int KillProcessGroup( + uid_t uid, int initialPid, int signal, bool once = false, + std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) { CHECK_GE(uid, 0); CHECK_GT(initialPid, 0); + // Always attempt to send a kill signal to at least the initialPid, at least once, regardless of + // whether its cgroup exists or not. This should only be necessary if a bug results in the + // migration of the targeted process out of its cgroup, which we will also attempt to kill. + const bool signal_ret = sendSignalToProcessGroup(uid, initialPid, signal); + + if (!CgroupsAvailable() || !signal_ret) return signal_ret ? 0 : -1; + std::string hierarchy_root_path; - if (CgroupsAvailable()) { - CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path); - } - const char* cgroup = hierarchy_root_path.c_str(); + CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path); - std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); + const std::string cgroup_v2_path = + ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid); - int retry = retries; - int processes; - while ((processes = DoKillProcessGroupOnce(cgroup, uid, initialPid, signal)) > 0) { - LOG(VERBOSE) << "Killed " << processes << " processes for processgroup " << initialPid; - if (!CgroupsAvailable()) { - // makes no sense to retry, because there are no cgroup_procs file - processes = 0; // no remaining processes - break; - } - if (retry > 0) { - std::this_thread::sleep_for(5ms); - --retry; - } else { - break; - } - } - - if (processes < 0) { - PLOG(ERROR) << "Error encountered killing process cgroup uid " << uid << " pid " - << initialPid; + const std::string eventsfile = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_EVENTS_FILE; + android::base::unique_fd events_fd(open(eventsfile.c_str(), O_RDONLY)); + if (events_fd.get() == -1) { + PLOG(WARNING) << "Error opening " << eventsfile << " for KillProcessGroup"; return -1; } - std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); - auto ms = std::chrono::duration_cast(end - start).count(); + struct pollfd fds = { + .fd = events_fd, + .events = POLLPRI, + }; - // We only calculate the number of 'processes' when killing the processes. - // In the retries == 0 case, we only kill the processes once and therefore - // will not have waited then recalculated how many processes are remaining - // after the first signals have been sent. - // Logging anything regarding the number of 'processes' here does not make sense. + const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); - if (processes == 0) { - if (retries > 0) { - LOG(INFO) << "Successfully killed process cgroup uid " << uid << " pid " << initialPid - << " in " << static_cast(ms) << "ms"; + // The primary reason to loop here is to capture any new forks or migrations that could occur + // after we send signals to the original set of processes, but before all of those processes + // exit and the cgroup becomes unpopulated, or before we remove the cgroup. We try hard to + // ensure this completes successfully to avoid permanent memory leaks, but we still place a + // large default upper bound on the amount of time we spend in this loop. The amount of CPU + // contention, and the amount of work that needs to be done in do_exit for each process + // determines how long this will take. + int ret; + do { + populated_status populated; + while ((populated = cgroupIsPopulated(events_fd.get())) == populated_status::populated && + std::chrono::steady_clock::now() < until) { + + sendSignalToProcessGroup(uid, initialPid, signal); + if (once) { + populated = cgroupIsPopulated(events_fd.get()); + break; + } + + const std::chrono::steady_clock::time_point poll_start = + std::chrono::steady_clock::now(); + + if (poll_start < until) + ret = TEMP_FAILURE_RETRY(poll(&fds, 1, toMillisec(until - poll_start).count())); + + if (ret == -1) { + // Fallback to 5ms sleeps if poll fails + PLOG(ERROR) << "Poll on " << eventsfile << "failed"; + const std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + if (now < until) + std::this_thread::sleep_for(std::min(5ms, toMillisec(until - now))); + } + + LOG(VERBOSE) << "Waited " + << toMillisec(std::chrono::steady_clock::now() - poll_start).count() + << " ms for " << eventsfile << " poll"; } - if (!CgroupsAvailable()) { - // nothing to do here, if cgroups isn't available - return 0; + const std::chrono::milliseconds kill_duration = + toMillisec(std::chrono::steady_clock::now() - start); + + if (populated == populated_status::populated) { + LOG(WARNING) << "Still waiting on process(es) to exit for cgroup " << cgroup_v2_path + << " after " << kill_duration.count() << " ms"; + // We'll still try the cgroup removal below which we expect to log an error. + } else if (populated == populated_status::not_populated) { + LOG(VERBOSE) << "Killed all processes under cgroup " << cgroup_v2_path + << " after " << kill_duration.count() << " ms"; } - // 400 retries correspond to 2 secs max timeout - int err = RemoveCgroup(cgroup, uid, initialPid, 400); + ret = RemoveCgroup(hierarchy_root_path.c_str(), uid, initialPid); + if (ret) + PLOG(ERROR) << "Unable to remove cgroup " << cgroup_v2_path; + else + LOG(INFO) << "Removed cgroup " << cgroup_v2_path; if (isMemoryCgroupSupported() && UsePerAppMemcg()) { + // This per-application memcg v1 case should eventually be removed after migration to + // memcg v2. std::string memcg_apps_path; if (CgroupGetMemcgAppsPath(&memcg_apps_path) && - RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid, 400) < 0) { - return -1; + (ret = RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid)) < 0) { + const auto memcg_v1_cgroup_path = + ConvertUidPidToPath(memcg_apps_path.c_str(), uid, initialPid); + PLOG(ERROR) << "Unable to remove memcg v1 cgroup " << memcg_v1_cgroup_path; } } - return err; - } else { - if (retries > 0) { - LOG(ERROR) << "Failed to kill process cgroup uid " << uid << " pid " << initialPid - << " in " << static_cast(ms) << "ms, " << processes - << " processes remain"; - } - return -1; - } + if (once) break; + if (std::chrono::steady_clock::now() >= until) break; + } while (ret && errno == EBUSY); + + return ret; } int killProcessGroup(uid_t uid, int initialPid, int signal) { - return KillProcessGroup(uid, initialPid, signal, 40 /*retries*/); + return KillProcessGroup(uid, initialPid, signal); } int killProcessGroupOnce(uid_t uid, int initialPid, int signal) { - return KillProcessGroup(uid, initialPid, signal, 0 /*retries*/); -} - -int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) { - std::string hierarchy_root_path; - if (CgroupsAvailable()) { - CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path); - } - const char* cgroup = hierarchy_root_path.c_str(); - return DoKillProcessGroupOnce(cgroup, uid, initialPid, signal); + return KillProcessGroup(uid, initialPid, signal, true); } static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgroup, @@ -576,7 +639,7 @@ static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgr return -errno; } - auto uid_pid_procs_file = uid_pid_path + PROCESSGROUP_CGROUP_PROCS_FILE; + auto uid_pid_procs_file = uid_pid_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE; if (!WriteStringToFile(std::to_string(initialPid), uid_pid_procs_file)) { ret = -errno;