snapuserd: Fix race condition in MergeWorker::WaitForMergeBegin.

The pre-conditions and post-conditions to cv.wait() in this function are
different, which leads to a race. If the merge is never initiated, but
I/O is terminated, the wait() will hang.

This is easy to hit in tests where I/O terminates quickly, but should
not affect actual OTA flow in practice.

Bug: 269361087
Test: snapuserd_test
Change-Id: I85ed9c357031ec6fd74d6a3f49d85e6cbb037eee
This commit is contained in:
David Anderson 2023-08-09 10:31:56 -07:00
parent 8d9940bdaf
commit fb7e268552

View file

@ -189,35 +189,32 @@ void SnapshotHandler::InitiateMerge() {
cv.notify_all();
}
static inline bool IsMergeBeginError(MERGE_IO_TRANSITION io_state) {
return io_state == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
io_state == MERGE_IO_TRANSITION::IO_TERMINATED;
}
// Invoked by Merge thread - Waits on RA thread to resume merging. Will
// be waken up RA thread.
bool SnapshotHandler::WaitForMergeBegin() {
{
std::unique_lock<std::mutex> lock(lock_);
while (!MergeInitiated()) {
cv.wait(lock);
std::unique_lock<std::mutex> lock(lock_);
if (io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
return false;
}
}
cv.wait(lock, [this]() -> bool { return MergeInitiated() || IsMergeBeginError(io_state_); });
while (!(io_state_ == MERGE_IO_TRANSITION::MERGE_BEGIN ||
io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED)) {
cv.wait(lock);
}
if (io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
return false;
}
return true;
if (IsMergeBeginError(io_state_)) {
SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
return false;
}
cv.wait(lock, [this]() -> bool {
return io_state_ == MERGE_IO_TRANSITION::MERGE_BEGIN || IsMergeBeginError(io_state_);
});
if (IsMergeBeginError(io_state_)) {
SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
return false;
}
return true;
}
// Invoked by RA thread - Flushes the RA block to scratch space if necessary