snapuserd: I/O request on overlapping blocks during snapshot-merge.
This fixes the case when all the following conditions are true: 1: Incremental OTA 2: When there are sequence of overlapping COPY operations within one merge-window (510 blocks) 3: Device is rebooted when snapshot-merge is in-progress of this merge-window. When device reboots, the state of merge-window (of 510 blocks) was merge-in-progress (aka - only partial set of blocks were merged in this window thereby the state of the base device is in-complete for this window) 4: During the next boot, if there any I/O request from the filesystem which maps to the merge-window in (3): a: The data has to be retrieved from the scratch space of the COW until the snapshot-merge for that window is completed. b: Once the snapshot-merge is complete for that window, data has to be retrieved from base device. The bug was in step 4(a) wherein I/O request was getting routed to base device. This patch addresses the above flow by fixing step 4(a). A new vts test has been added to explicitly track this issue. Additionally, there is no need to re-scan the partition if partition is in merge resume path. This should cut down the overhead of the scan. Bug: 275296365 Test: 1: 100 iterations of ./vts_snapuserd_test --gtest_filter=SnapuserdTest.Snapshot_COPY_Overlap_Merge_Resume_IO_Validate_TEST 2: Incremental OTA on Pixel 6 Pro with multiple iterations of device reboot when merge is in progress Change-Id: Ib53be7f07ff192a84ec7f7049b2c6be01dad1041 Signed-off-by: Akilesh Kailash <akailash@google.com>
This commit is contained in:
parent
7cf712ab3b
commit
cffa413de4
5 changed files with 145 additions and 4 deletions
|
|
@ -173,6 +173,10 @@ bool SnapshotHandler::ReadMetadata() {
|
|||
}
|
||||
|
||||
SNAP_LOG(INFO) << "Merge-ops: " << header.num_merge_ops;
|
||||
if (header.num_merge_ops) {
|
||||
resume_merge_ = true;
|
||||
SNAP_LOG(INFO) << "Resume Snapshot-merge";
|
||||
}
|
||||
|
||||
if (!MmapMetadata()) {
|
||||
SNAP_LOG(ERROR) << "mmap failed";
|
||||
|
|
@ -295,6 +299,11 @@ bool SnapshotHandler::Start() {
|
|||
if (ra_thread_) {
|
||||
ra_thread_status =
|
||||
std::async(std::launch::async, &ReadAhead::RunThread, read_ahead_thread_.get());
|
||||
// If this is a merge-resume path, wait until RA thread is fully up as
|
||||
// the data has to be re-constructed from the scratch space.
|
||||
if (resume_merge_ && ShouldReconstructDataFromCow()) {
|
||||
WaitForRaThreadToStart();
|
||||
}
|
||||
}
|
||||
|
||||
// Launch worker threads
|
||||
|
|
@ -307,7 +316,9 @@ bool SnapshotHandler::Start() {
|
|||
std::async(std::launch::async, &MergeWorker::Run, merge_thread_.get());
|
||||
|
||||
// Now that the worker threads are up, scan the partitions.
|
||||
if (perform_verification_) {
|
||||
// If the snapshot-merge is being resumed, there is no need to scan as the
|
||||
// current slot is already marked as boot complete.
|
||||
if (perform_verification_ && !resume_merge_) {
|
||||
update_verify_->VerifyUpdatePartition();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -147,6 +147,8 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
|
|||
void WakeupMonitorMergeThread();
|
||||
void WaitForMergeComplete();
|
||||
bool WaitForMergeBegin();
|
||||
void RaThreadStarted();
|
||||
void WaitForRaThreadToStart();
|
||||
void NotifyRAForMergeReady();
|
||||
bool WaitForMergeReady();
|
||||
void MergeFailed();
|
||||
|
|
@ -221,6 +223,7 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
|
|||
// Read-ahead related
|
||||
bool populate_data_from_cow_ = false;
|
||||
bool ra_thread_ = false;
|
||||
bool ra_thread_started_ = false;
|
||||
int total_ra_blocks_merged_ = 0;
|
||||
MERGE_IO_TRANSITION io_state_ = MERGE_IO_TRANSITION::INVALID;
|
||||
std::unique_ptr<ReadAhead> read_ahead_thread_;
|
||||
|
|
@ -242,6 +245,7 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
|
|||
bool scratch_space_ = false;
|
||||
int num_worker_threads_ = kNumWorkerThreads;
|
||||
bool perform_verification_ = true;
|
||||
bool resume_merge_ = false;
|
||||
|
||||
std::unique_ptr<struct io_uring> ring_;
|
||||
std::unique_ptr<UpdateVerify> update_verify_;
|
||||
|
|
|
|||
|
|
@ -206,6 +206,7 @@ bool ReadAhead::ReconstructDataFromCow() {
|
|||
return false;
|
||||
}
|
||||
|
||||
snapuserd_->RaThreadStarted();
|
||||
SNAP_LOG(INFO) << "ReconstructDataFromCow success";
|
||||
notify_read_ahead_failed.Cancel();
|
||||
return true;
|
||||
|
|
@ -716,9 +717,13 @@ bool ReadAhead::ReadAheadIOStart() {
|
|||
total_ra_blocks_completed_ += total_blocks_merged_;
|
||||
snapuserd_->SetMergedBlockCountForNextCommit(total_blocks_merged_);
|
||||
|
||||
// Flush the data only if we have a overlapping blocks in the region
|
||||
// Flush the scratch data - Technically, we should flush only for overlapping
|
||||
// blocks; However, since this region is mmap'ed, the dirty pages can still
|
||||
// get flushed to disk at any random point in time. Instead, make sure
|
||||
// the data in scratch is in the correct state before merge thread resumes.
|
||||
//
|
||||
// Notify the Merge thread to resume merging this window
|
||||
if (!snapuserd_->ReadAheadIOCompleted(overlap_)) {
|
||||
if (!snapuserd_->ReadAheadIOCompleted(true)) {
|
||||
SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed...";
|
||||
snapuserd_->ReadAheadIOFailed();
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -235,9 +235,11 @@ class SnapuserdTest : public SnapuserdTestBase {
|
|||
bool Merge();
|
||||
void ValidateMerge();
|
||||
void ReadSnapshotDeviceAndValidate();
|
||||
void ReadSnapshotAndValidateOverlappingBlocks();
|
||||
void Shutdown();
|
||||
void MergeInterrupt();
|
||||
void MergeInterruptFixed(int duration);
|
||||
void MergeInterruptAndValidate(int duration);
|
||||
void MergeInterruptRandomly(int max_duration);
|
||||
bool StartMerge();
|
||||
void CheckMergeCompletion();
|
||||
|
|
@ -358,6 +360,76 @@ void SnapuserdTest::ReadSnapshotDeviceAndValidate() {
|
|||
ASSERT_EQ(memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + (size_ * 4), size_), 0);
|
||||
}
|
||||
|
||||
void SnapuserdTest::ReadSnapshotAndValidateOverlappingBlocks() {
|
||||
// Open COW device
|
||||
unique_fd fd(open(cow_system_->path, O_RDONLY));
|
||||
ASSERT_GE(fd, 0);
|
||||
|
||||
CowReader reader;
|
||||
ASSERT_TRUE(reader.Parse(fd));
|
||||
|
||||
const auto& header = reader.GetHeader();
|
||||
size_t total_mapped_addr_length = header.prefix.header_size + BUFFER_REGION_DEFAULT_SIZE;
|
||||
|
||||
ASSERT_GE(header.prefix.major_version, 2);
|
||||
|
||||
void* mapped_addr = mmap(NULL, total_mapped_addr_length, PROT_READ, MAP_SHARED, fd.get(), 0);
|
||||
ASSERT_NE(mapped_addr, MAP_FAILED);
|
||||
|
||||
bool populate_data_from_scratch = false;
|
||||
struct BufferState* ra_state =
|
||||
reinterpret_cast<struct BufferState*>((char*)mapped_addr + header.prefix.header_size);
|
||||
if (ra_state->read_ahead_state == kCowReadAheadDone) {
|
||||
populate_data_from_scratch = true;
|
||||
}
|
||||
|
||||
size_t num_merge_ops = header.num_merge_ops;
|
||||
// We have some partial merge operations completed.
|
||||
// To test the merge-resume path, forcefully corrupt the data of the base
|
||||
// device for the offsets where the merge is still pending.
|
||||
if (num_merge_ops && populate_data_from_scratch) {
|
||||
std::string corrupt_buffer(4096, 0);
|
||||
// Corrupt two blocks from the point where the merge has to be resumed by
|
||||
// writing down zeroe's.
|
||||
//
|
||||
// Now, since this is a merge-resume path, the "correct" data should be
|
||||
// in the scratch space of the COW device. When there is an I/O request
|
||||
// from the snapshot device, the data has to be retrieved from the
|
||||
// scratch space. If not and I/O is routed to the base device, we
|
||||
// may end up with corruption.
|
||||
off_t corrupt_offset = (num_merge_ops + 2) * 4096;
|
||||
|
||||
if (corrupt_offset < size_) {
|
||||
ASSERT_EQ(android::base::WriteFullyAtOffset(base_fd_, (void*)corrupt_buffer.c_str(),
|
||||
4096, corrupt_offset),
|
||||
true);
|
||||
corrupt_offset -= 4096;
|
||||
ASSERT_EQ(android::base::WriteFullyAtOffset(base_fd_, (void*)corrupt_buffer.c_str(),
|
||||
4096, corrupt_offset),
|
||||
true);
|
||||
fsync(base_fd_.get());
|
||||
}
|
||||
}
|
||||
|
||||
// Time to read the snapshot device.
|
||||
unique_fd snapshot_fd(open(dmuser_dev_->GetPath().c_str(), O_RDONLY | O_DIRECT | O_SYNC));
|
||||
ASSERT_GE(snapshot_fd, 0);
|
||||
|
||||
void* buff_addr;
|
||||
ASSERT_EQ(posix_memalign(&buff_addr, 4096, size_), 0);
|
||||
|
||||
std::unique_ptr<void, decltype(&::free)> snapshot_buffer(buff_addr, ::free);
|
||||
|
||||
// Scan the entire snapshot device and read the data and verify data
|
||||
// integrity. Since the base device was forcefully corrupted, the data from
|
||||
// this scan should be retrieved from scratch space of the COW partition.
|
||||
//
|
||||
// Furthermore, after the merge is complete, base device data is again
|
||||
// verified as the aforementioned corrupted blocks aren't persisted.
|
||||
ASSERT_EQ(ReadFullyAtOffset(snapshot_fd, snapshot_buffer.get(), size_, 0), true);
|
||||
ASSERT_EQ(memcmp(snapshot_buffer.get(), orig_buffer_.get(), size_), 0);
|
||||
}
|
||||
|
||||
void SnapuserdTest::CreateCowDeviceWithCopyOverlap_2() {
|
||||
auto writer = CreateCowDeviceInternal();
|
||||
ASSERT_NE(writer, nullptr);
|
||||
|
|
@ -665,6 +737,20 @@ void SnapuserdTest::MergeInterruptFixed(int duration) {
|
|||
ASSERT_TRUE(Merge());
|
||||
}
|
||||
|
||||
void SnapuserdTest::MergeInterruptAndValidate(int duration) {
|
||||
ASSERT_TRUE(StartMerge());
|
||||
|
||||
for (int i = 0; i < 15; i++) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(duration));
|
||||
ASSERT_NO_FATAL_FAILURE(SimulateDaemonRestart());
|
||||
ReadSnapshotAndValidateOverlappingBlocks();
|
||||
ASSERT_TRUE(StartMerge());
|
||||
}
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(SimulateDaemonRestart());
|
||||
ASSERT_TRUE(Merge());
|
||||
}
|
||||
|
||||
void SnapuserdTest::MergeInterrupt() {
|
||||
// Interrupt merge at various intervals
|
||||
ASSERT_TRUE(StartMerge());
|
||||
|
|
@ -761,6 +847,15 @@ TEST_F(SnapuserdTest, Snapshot_COPY_Overlap_Merge_Resume_TEST) {
|
|||
ValidateMerge();
|
||||
}
|
||||
|
||||
TEST_F(SnapuserdTest, Snapshot_COPY_Overlap_Merge_Resume_IO_Validate_TEST) {
|
||||
if (!harness_->HasUserDevice()) {
|
||||
GTEST_SKIP() << "Skipping snapshot read; not supported";
|
||||
}
|
||||
ASSERT_NO_FATAL_FAILURE(SetupCopyOverlap_2());
|
||||
ASSERT_NO_FATAL_FAILURE(MergeInterruptAndValidate(2));
|
||||
ValidateMerge();
|
||||
}
|
||||
|
||||
TEST_F(SnapuserdTest, Snapshot_Merge_Crash_Fixed_Ordered) {
|
||||
ASSERT_NO_FATAL_FAILURE(SetupOrderedOps());
|
||||
ASSERT_NO_FATAL_FAILURE(MergeInterruptFixed(300));
|
||||
|
|
|
|||
|
|
@ -366,6 +366,26 @@ void SnapshotHandler::WaitForMergeComplete() {
|
|||
}
|
||||
}
|
||||
|
||||
void SnapshotHandler::RaThreadStarted() {
|
||||
std::unique_lock<std::mutex> lock(lock_);
|
||||
ra_thread_started_ = true;
|
||||
}
|
||||
|
||||
void SnapshotHandler::WaitForRaThreadToStart() {
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto deadline = now + 3s;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(lock_);
|
||||
while (!ra_thread_started_) {
|
||||
auto status = cv.wait_until(lock, deadline);
|
||||
if (status == std::cv_status::timeout) {
|
||||
SNAP_LOG(ERROR) << "Read-ahead thread did not start";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string SnapshotHandler::GetMergeStatus() {
|
||||
bool merge_not_initiated = false;
|
||||
bool merge_monitored = false;
|
||||
|
|
@ -618,7 +638,6 @@ bool SnapshotHandler::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t b
|
|||
std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
|
||||
|
||||
if (it == read_ahead_buffer_map_.end()) {
|
||||
SNAP_LOG(ERROR) << "Block: " << block << " not found in RA buffer";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -642,6 +661,13 @@ MERGE_GROUP_STATE SnapshotHandler::ProcessMergingBlock(uint64_t new_block, void*
|
|||
MERGE_GROUP_STATE state = blk_state->merge_state_;
|
||||
switch (state) {
|
||||
case MERGE_GROUP_STATE::GROUP_MERGE_PENDING: {
|
||||
// If this is a merge-resume path, check if the data is
|
||||
// available from scratch space. Data from scratch space takes
|
||||
// higher precedence than from source device for overlapping
|
||||
// blocks.
|
||||
if (resume_merge_ && GetRABuffer(&lock, new_block, buffer)) {
|
||||
return (MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS);
|
||||
}
|
||||
blk_state->num_ios_in_progress += 1; // ref count
|
||||
[[fallthrough]];
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue