libsnapshot: lock on /metadata/ota

We used to flock() on /metadata/ota/state to ensure
atomic access. However, writing the file itself is
not necessary atomic and may lead to inconsistent
states.

This change redirects flock() to the outer directory, /metadata/ota,
which is very likely to exist (see exception below).
flock() is called on this directory instead of /state. This allows
a follow-up change to turn all writes to the /metadata partition
atomic.

Note: /metadata/ota may not exist during first boot after a flash
with wipe. However, first_stage_init always checks existence of
boot indicator before even trying to flock() (via
IsSnapshotManagerNeeded() and NeedSnapshotsInFirstStageMount()). If
the boot indicator exists, /metadata/ota must exist as well.

Also add tests to ensure LockExclusive() and LockShared() works as
expected.

Test: libsnapshot_test
Test: apply OTA from older build to this, then reboot

Bug: 144549076

Change-Id: Ib4dd9e9be1a43013c328e181b9398ac0b514dbce
This commit is contained in:
Yifan Hong 2019-11-26 16:07:04 -08:00
parent 3a8001476d
commit b6663cfb5a
3 changed files with 181 additions and 43 deletions

View file

@ -277,6 +277,7 @@ class SnapshotManager final {
friend class SnapshotTest;
friend class SnapshotUpdateTest;
friend class FlashAfterUpdateTest;
friend class LockTestConsumer;
friend struct AutoDeleteCowImage;
friend struct AutoDeleteSnapshot;
friend struct PartitionCowCreator;
@ -304,9 +305,6 @@ class SnapshotManager final {
LockedFile(const std::string& path, android::base::unique_fd&& fd, int lock_mode)
: path_(path), fd_(std::move(fd)), lock_mode_(lock_mode) {}
~LockedFile();
const std::string& path() const { return path_; }
int fd() const { return fd_; }
int lock_mode() const { return lock_mode_; }
private:
@ -314,8 +312,7 @@ class SnapshotManager final {
android::base::unique_fd fd_;
int lock_mode_;
};
std::unique_ptr<LockedFile> OpenFile(const std::string& file, int open_flags, int lock_flags);
bool Truncate(LockedFile* file);
static std::unique_ptr<LockedFile> OpenFile(const std::string& file, int lock_flags);
// Create a new snapshot record. This creates the backing COW store and
// persists information needed to map the device. The device can be mapped
@ -381,10 +378,13 @@ class SnapshotManager final {
// set the update state to None.
bool RemoveAllUpdateState(LockedFile* lock);
// Interact with /metadata/ota/state.
std::unique_ptr<LockedFile> OpenStateFile(int open_flags, int lock_flags);
// Interact with /metadata/ota.
std::unique_ptr<LockedFile> OpenLock(int lock_flags);
std::unique_ptr<LockedFile> LockShared();
std::unique_ptr<LockedFile> LockExclusive();
std::string GetLockPath() const;
// Interact with /metadata/ota/state.
UpdateState ReadUpdateState(LockedFile* file);
bool WriteUpdateState(LockedFile* file, UpdateState state);
std::string GetStateFilePath() const;

View file

@ -1218,12 +1218,12 @@ UpdateState SnapshotManager::GetUpdateState(double* progress) {
return UpdateState::None;
}
auto file = LockShared();
if (!file) {
auto lock = LockShared();
if (!lock) {
return UpdateState::None;
}
auto state = ReadUpdateState(file.get());
auto state = ReadUpdateState(lock.get());
if (progress) {
*progress = 0.0;
if (state == UpdateState::Merging) {
@ -1595,9 +1595,9 @@ bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name)
return true;
}
auto SnapshotManager::OpenFile(const std::string& file, int open_flags, int lock_flags)
auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
-> std::unique_ptr<LockedFile> {
unique_fd fd(open(file.c_str(), open_flags | O_CLOEXEC | O_NOFOLLOW | O_SYNC, 0660));
unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
if (fd < 0) {
PLOG(ERROR) << "Open failed: " << file;
return nullptr;
@ -1622,29 +1622,28 @@ std::string SnapshotManager::GetStateFilePath() const {
return metadata_dir_ + "/state"s;
}
std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenStateFile(int open_flags,
int lock_flags) {
auto state_file = GetStateFilePath();
return OpenFile(state_file, open_flags, lock_flags);
std::string SnapshotManager::GetLockPath() const {
return metadata_dir_;
}
std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
auto lock_file = GetLockPath();
return OpenFile(lock_file, lock_flags);
}
std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
return OpenStateFile(O_RDONLY, LOCK_SH);
return OpenLock(LOCK_SH);
}
std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
return OpenStateFile(O_RDWR | O_CREAT, LOCK_EX);
return OpenLock(LOCK_EX);
}
UpdateState SnapshotManager::ReadUpdateState(LockedFile* file) {
// Reset position since some calls read+write.
if (lseek(file->fd(), 0, SEEK_SET) < 0) {
PLOG(ERROR) << "lseek state file failed";
return UpdateState::None;
}
UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
CHECK(lock);
std::string contents;
if (!android::base::ReadFdToString(file->fd(), &contents)) {
if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
PLOG(ERROR) << "Read state file failed";
return UpdateState::None;
}
@ -1691,14 +1690,15 @@ std::ostream& operator<<(std::ostream& os, UpdateState state) {
}
}
bool SnapshotManager::WriteUpdateState(LockedFile* file, UpdateState state) {
bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) {
CHECK(lock);
CHECK(lock->lock_mode() == LOCK_EX);
std::stringstream ss;
ss << state;
std::string contents = ss.str();
if (contents.empty()) return false;
if (!Truncate(file)) return false;
#ifdef LIBSNAPSHOT_USE_HAL
auto merge_status = MergeStatus::UNKNOWN;
switch (state) {
@ -1731,7 +1731,7 @@ bool SnapshotManager::WriteUpdateState(LockedFile* file, UpdateState state) {
}
#endif
if (!android::base::WriteStringToFd(contents, file->fd())) {
if (!android::base::WriteStringToFile(contents, GetStateFilePath())) {
PLOG(ERROR) << "Could not write to state file";
return false;
}
@ -1795,18 +1795,6 @@ bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus
return true;
}
bool SnapshotManager::Truncate(LockedFile* file) {
if (lseek(file->fd(), 0, SEEK_SET) < 0) {
PLOG(ERROR) << "lseek file failed: " << file->path();
return false;
}
if (ftruncate(file->fd(), 0) < 0) {
PLOG(ERROR) << "truncate failed: " << file->path();
return false;
}
return true;
}
std::string SnapshotManager::GetSnapshotDeviceName(const std::string& snapshot_name,
const SnapshotStatus& status) {
if (status.device_size() != status.snapshot_size()) {
@ -2164,7 +2152,7 @@ bool SnapshotManager::UnmapAllPartitions() {
bool SnapshotManager::Dump(std::ostream& os) {
// Don't actually lock. Dump() is for debugging purposes only, so it is okay
// if it is racy.
auto file = OpenStateFile(O_RDONLY, 0);
auto file = OpenLock(0 /* lock flag */);
if (!file) return false;
std::stringstream ss;

View file

@ -20,6 +20,8 @@
#include <sys/types.h>
#include <chrono>
#include <deque>
#include <future>
#include <iostream>
#include <android-base/file.h>
@ -142,7 +144,7 @@ class SnapshotTest : public ::testing::Test {
}
bool AcquireLock() {
lock_ = sm->OpenStateFile(O_RDWR, LOCK_EX);
lock_ = sm->LockExclusive();
return !!lock_;
}
@ -595,6 +597,154 @@ TEST_F(SnapshotTest, UpdateBootControlHal) {
ASSERT_EQ(test_device->merge_status(), MergeStatus::MERGING);
}
enum class Request { UNKNOWN, LOCK_SHARED, LOCK_EXCLUSIVE, UNLOCK, EXIT };
std::ostream& operator<<(std::ostream& os, Request request) {
switch (request) {
case Request::LOCK_SHARED:
return os << "LOCK_SHARED";
case Request::LOCK_EXCLUSIVE:
return os << "LOCK_EXCLUSIVE";
case Request::UNLOCK:
return os << "UNLOCK";
case Request::EXIT:
return os << "EXIT";
case Request::UNKNOWN:
[[fallthrough]];
default:
return os << "UNKNOWN";
}
}
class LockTestConsumer {
public:
AssertionResult MakeRequest(Request new_request) {
{
std::unique_lock<std::mutex> ulock(mutex_);
requests_.push_back(new_request);
}
cv_.notify_all();
return AssertionSuccess() << "Request " << new_request << " successful";
}
template <typename R, typename P>
AssertionResult WaitFulfill(std::chrono::duration<R, P> timeout) {
std::unique_lock<std::mutex> ulock(mutex_);
if (cv_.wait_for(ulock, timeout, [this] { return requests_.empty(); })) {
return AssertionSuccess() << "All requests_ fulfilled.";
}
return AssertionFailure() << "Timeout waiting for fulfilling " << requests_.size()
<< " request(s), first one is "
<< (requests_.empty() ? Request::UNKNOWN : requests_.front());
}
void StartHandleRequestsInBackground() {
future_ = std::async(std::launch::async, &LockTestConsumer::HandleRequests, this);
}
private:
void HandleRequests() {
static constexpr auto consumer_timeout = 3s;
auto next_request = Request::UNKNOWN;
do {
// Peek next request.
{
std::unique_lock<std::mutex> ulock(mutex_);
if (cv_.wait_for(ulock, consumer_timeout, [this] { return !requests_.empty(); })) {
next_request = requests_.front();
} else {
next_request = Request::EXIT;
}
}
// Handle next request.
switch (next_request) {
case Request::LOCK_SHARED: {
lock_ = sm->LockShared();
} break;
case Request::LOCK_EXCLUSIVE: {
lock_ = sm->LockExclusive();
} break;
case Request::EXIT:
[[fallthrough]];
case Request::UNLOCK: {
lock_.reset();
} break;
case Request::UNKNOWN:
[[fallthrough]];
default:
break;
}
// Pop next request. This thread is the only thread that
// pops from the front of the requests_ deque.
{
std::unique_lock<std::mutex> ulock(mutex_);
if (next_request == Request::EXIT) {
requests_.clear();
} else {
requests_.pop_front();
}
}
cv_.notify_all();
} while (next_request != Request::EXIT);
}
std::mutex mutex_;
std::condition_variable cv_;
std::deque<Request> requests_;
std::unique_ptr<SnapshotManager::LockedFile> lock_;
std::future<void> future_;
};
class LockTest : public ::testing::Test {
public:
void SetUp() {
first_consumer.StartHandleRequestsInBackground();
second_consumer.StartHandleRequestsInBackground();
}
void TearDown() {
EXPECT_TRUE(first_consumer.MakeRequest(Request::EXIT));
EXPECT_TRUE(second_consumer.MakeRequest(Request::EXIT));
}
static constexpr auto request_timeout = 500ms;
LockTestConsumer first_consumer;
LockTestConsumer second_consumer;
};
TEST_F(LockTest, SharedShared) {
ASSERT_TRUE(first_consumer.MakeRequest(Request::LOCK_SHARED));
ASSERT_TRUE(first_consumer.WaitFulfill(request_timeout));
ASSERT_TRUE(second_consumer.MakeRequest(Request::LOCK_SHARED));
ASSERT_TRUE(second_consumer.WaitFulfill(request_timeout));
}
using LockTestParam = std::pair<Request, Request>;
class LockTestP : public LockTest, public ::testing::WithParamInterface<LockTestParam> {};
TEST_P(LockTestP, Test) {
ASSERT_TRUE(first_consumer.MakeRequest(GetParam().first));
ASSERT_TRUE(first_consumer.WaitFulfill(request_timeout));
ASSERT_TRUE(second_consumer.MakeRequest(GetParam().second));
ASSERT_FALSE(second_consumer.WaitFulfill(request_timeout))
<< "Should not be able to " << GetParam().second << " while separate thread "
<< GetParam().first;
ASSERT_TRUE(first_consumer.MakeRequest(Request::UNLOCK));
ASSERT_TRUE(second_consumer.WaitFulfill(request_timeout))
<< "Should be able to hold lock that is released by separate thread";
}
INSTANTIATE_TEST_SUITE_P(
LockTest, LockTestP,
testing::Values(LockTestParam{Request::LOCK_EXCLUSIVE, Request::LOCK_EXCLUSIVE},
LockTestParam{Request::LOCK_EXCLUSIVE, Request::LOCK_SHARED},
LockTestParam{Request::LOCK_SHARED, Request::LOCK_EXCLUSIVE}),
[](const testing::TestParamInfo<LockTestP::ParamType>& info) {
std::stringstream ss;
ss << info.param.first << "_" << info.param.second;
return ss.str();
});
class SnapshotUpdateTest : public SnapshotTest {
public:
void SetUp() override {