diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 8ff41dbcb..de2052631 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -442,6 +442,7 @@ class SnapshotManager final : public ISnapshotManager { FRIEND_TEST(SnapshotUpdateTest, QueryStatusError); FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow); FRIEND_TEST(SnapshotUpdateTest, SpaceSwapUpdate); + FRIEND_TEST(SnapshotUpdateTest, InterruptMergeDuringPhaseUpdate); FRIEND_TEST(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch); friend class SnapshotTest; friend class SnapshotUpdateTest; diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index 05dec68d5..acabd6770 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -1343,10 +1343,25 @@ auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& } if (merge_status == "snapshot" && - DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE && - update_status.merge_phase() == MergePhase::FIRST_PHASE) { - // The snapshot is not being merged because it's in the wrong phase. - return MergeResult(UpdateState::None); + DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE) { + if (update_status.merge_phase() == MergePhase::FIRST_PHASE) { + // The snapshot is not being merged because it's in the wrong phase. + return MergeResult(UpdateState::None); + } else { + // update_status is already in second phase but the + // snapshot_status is still not set to SnapshotState::MERGING. + // + // Resume the merge at this point. see b/374225913 + LOG(INFO) << "SwitchSnapshotToMerge: " << name << " after resuming merge"; + auto code = SwitchSnapshotToMerge(lock, name); + if (code != MergeFailureCode::Ok) { + LOG(ERROR) << "Failed to switch snapshot: " << name + << " to merge during second phase"; + return MergeResult(UpdateState::MergeFailed, + MergeFailureCode::UnknownTargetType); + } + return MergeResult(UpdateState::Merging); + } } if (merge_status == "snapshot-merge") { @@ -1442,8 +1457,14 @@ MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { return MergeFailureCode::WriteStatus; } + auto current_slot_suffix = device_->GetSlotSuffix(); MergeFailureCode result = MergeFailureCode::Ok; for (const auto& snapshot : snapshots) { + if (!android::base::EndsWith(snapshot, current_slot_suffix)) { + LOG(ERROR) << "Skipping invalid snapshot: " << snapshot + << " during MergeSecondPhaseSnapshots"; + continue; + } SnapshotStatus snapshot_status; if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) { return MergeFailureCode::ReadStatus; diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 46c3a35e9..3e5ae2aff 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -1607,6 +1607,146 @@ TEST_F(SnapshotUpdateTest, SpaceSwapUpdate) { } } +// Test that shrinking and growing partitions at the same time is handled +// correctly in VABC. +TEST_F(SnapshotUpdateTest, InterruptMergeDuringPhaseUpdate) { + if (!snapuserd_required_) { + // b/179111359 + GTEST_SKIP() << "Skipping snapuserd test"; + } + + auto old_sys_size = GetSize(sys_); + auto old_prd_size = GetSize(prd_); + + // Grow |sys| but shrink |prd|. + SetSize(sys_, old_sys_size * 2); + sys_->set_estimate_cow_size(8_MiB); + SetSize(prd_, old_prd_size / 2); + prd_->set_estimate_cow_size(1_MiB); + + AddOperationForPartitions(); + + ASSERT_TRUE(sm->BeginUpdate()); + ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); + + // Check that the old partition sizes were saved correctly. + { + ASSERT_TRUE(AcquireLock()); + auto local_lock = std::move(lock_); + + SnapshotStatus status; + ASSERT_TRUE(sm->ReadSnapshotStatus(local_lock.get(), "prd_b", &status)); + ASSERT_EQ(status.old_partition_size(), 3145728); + ASSERT_TRUE(sm->ReadSnapshotStatus(local_lock.get(), "sys_b", &status)); + ASSERT_EQ(status.old_partition_size(), 3145728); + } + + ASSERT_TRUE(WriteSnapshotAndHash(sys_)); + ASSERT_TRUE(WriteSnapshotAndHash(vnd_)); + ASSERT_TRUE(ShiftAllSnapshotBlocks("prd_b", old_prd_size)); + + sync(); + + // Assert that source partitions aren't affected. + for (const auto& name : {"sys_a", "vnd_a", "prd_a"}) { + ASSERT_TRUE(IsPartitionUnchanged(name)); + } + + ASSERT_TRUE(sm->FinishedSnapshotWrites(false)); + + // Simulate shutting down the device. + ASSERT_TRUE(UnmapAll()); + + // After reboot, init does first stage mount. + auto init = NewManagerForFirstStageMount("_b"); + ASSERT_NE(init, nullptr); + ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount()); + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_)); + + // Check that the target partitions have the same content. + for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { + ASSERT_TRUE(IsPartitionUnchanged(name)); + } + + // Initiate the merge and wait for it to be completed. + if (ShouldSkipLegacyMerging()) { + LOG(INFO) << "Skipping legacy merge in test"; + return; + } + ASSERT_TRUE(init->InitiateMerge()); + ASSERT_EQ(init->IsSnapuserdRequired(), snapuserd_required_); + { + // Check that the merge phase is FIRST_PHASE until at least one call + // to ProcessUpdateState() occurs. + ASSERT_TRUE(AcquireLock()); + auto local_lock = std::move(lock_); + auto status = init->ReadSnapshotUpdateStatus(local_lock.get()); + ASSERT_EQ(status.merge_phase(), MergePhase::FIRST_PHASE); + } + + // Wait until prd_b merge is completed which is part of first phase + std::chrono::milliseconds timeout(6000); + auto start = std::chrono::steady_clock::now(); + // Keep polling until the merge is complete or timeout is reached + while (true) { + // Query the merge status + const auto merge_status = init->snapuserd_client()->QuerySnapshotStatus("prd_b"); + if (merge_status == "snapshot-merge-complete") { + break; + } + + auto now = std::chrono::steady_clock::now(); + auto elapsed = std::chrono::duration_cast(now - start); + + ASSERT_TRUE(elapsed < timeout); + // sleep for a second and allow merge to complete + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + // Now, forcefully update the snapshot-update status to SECOND PHASE + // This will not update the snapshot status of sys_b to MERGING + if (init->UpdateUsesUserSnapshots()) { + ASSERT_TRUE(AcquireLock()); + auto local_lock = std::move(lock_); + auto status = init->ReadSnapshotUpdateStatus(local_lock.get()); + status.set_merge_phase(MergePhase::SECOND_PHASE); + ASSERT_TRUE(init->WriteSnapshotUpdateStatus(local_lock.get(), status)); + } + + // Simulate shutting down the device and creating partitions again. + ASSERT_TRUE(UnmapAll()); + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_)); + + DeviceMapper::TargetInfo target; + ASSERT_TRUE(init->IsSnapshotDevice("prd_b", &target)); + + ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user"); + ASSERT_TRUE(init->IsSnapshotDevice("sys_b", &target)); + ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user"); + ASSERT_TRUE(init->IsSnapshotDevice("vnd_b", &target)); + ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user"); + + // Complete the merge; "sys" and "vnd" should resume the merge + // even though merge was interrupted after update_status was updated to + // SECOND_PHASE + ASSERT_EQ(UpdateState::MergeCompleted, init->ProcessUpdateState()); + + // Make sure the second phase ran and deleted snapshots. + { + ASSERT_TRUE(AcquireLock()); + auto local_lock = std::move(lock_); + std::vector snapshots; + ASSERT_TRUE(init->ListSnapshots(local_lock.get(), &snapshots)); + ASSERT_TRUE(snapshots.empty()); + } + + // Check that the target partitions have the same content after the merge. + for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { + ASSERT_TRUE(IsPartitionUnchanged(name)) + << "Content of " << name << " changes after the merge"; + } +} + // Test that if new system partitions uses empty space in super, that region is not snapshotted. TEST_F(SnapshotUpdateTest, DirectWriteEmptySpace) { GTEST_SKIP() << "b/141889746";