From dd620ccf24ff4ca25c1cec6b387cb085ab5a8035 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 6 Jan 2022 02:26:08 +0000 Subject: [PATCH] libsnapshot: Fix CHECK failure during second phase merge This CHECK prevents a release build from resuming a two-phase merge if the merge initially failed in the first pass. Bug: 213031779 Test: vts_libsnapshot_test Test: update_engine_unittests Change-Id: I8bf00e3016546ef7039bb0b18eb977cc3dc1066a --- .../include/libsnapshot/snapshot.h | 13 +++ fs_mgr/libsnapshot/snapshot.cpp | 18 +++- fs_mgr/libsnapshot/snapshot_test.cpp | 87 +++++++++++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 41c6ef576..120f95bfa 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -396,6 +396,17 @@ class SnapshotManager final : public ISnapshotManager { DM_USER, }; + // Add new public entries above this line. + + // Helpers for failure injection. + using MergeConsistencyChecker = + std::function; + + void set_merge_consistency_checker(MergeConsistencyChecker checker) { + merge_consistency_checker_ = checker; + } + MergeConsistencyChecker merge_consistency_checker() const { return merge_consistency_checker_; } + private: FRIEND_TEST(SnapshotTest, CleanFirstStageMount); FRIEND_TEST(SnapshotTest, CreateSnapshot); @@ -410,6 +421,7 @@ class SnapshotManager final : public ISnapshotManager { FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot); FRIEND_TEST(SnapshotTest, UpdateBootControlHal); FRIEND_TEST(SnapshotUpdateTest, AddPartition); + FRIEND_TEST(SnapshotUpdateTest, ConsistencyCheckResume); FRIEND_TEST(SnapshotUpdateTest, DaemonTransition); FRIEND_TEST(SnapshotUpdateTest, DataWipeAfterRollback); FRIEND_TEST(SnapshotUpdateTest, DataWipeRollbackInRecovery); @@ -811,6 +823,7 @@ class SnapshotManager final : public ISnapshotManager { std::unique_ptr snapuserd_client_; std::unique_ptr old_partition_metadata_; std::optional is_snapshot_userspace_; + MergeConsistencyChecker merge_consistency_checker_; }; } // namespace snapshot diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index e6e17bdf3..18a9d2290 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -87,6 +87,8 @@ static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot"; static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator"; static constexpr auto kUpdateStateCheckInterval = 2s; +MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status); + // Note: IImageManager is an incomplete type in the header, so the default // destructor doesn't work. SnapshotManager::~SnapshotManager() {} @@ -116,7 +118,9 @@ std::unique_ptr SnapshotManager::NewForFirstStageMount(IDeviceI } SnapshotManager::SnapshotManager(IDeviceInfo* device) - : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {} + : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) { + merge_consistency_checker_ = android::snapshot::CheckMergeConsistency; +} static std::string GetCowName(const std::string& snapshot_name) { return snapshot_name + "-cow"; @@ -1329,14 +1333,20 @@ MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const const SnapshotStatus& status) { CHECK(lock); + return merge_consistency_checker_(name, status); +} + +MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status) { if (!status.compression_enabled()) { // Do not try to verify old-style COWs yet. return MergeFailureCode::Ok; } + auto& dm = DeviceMapper::Instance(); + std::string cow_image_name = GetMappedCowDeviceName(name, status); std::string cow_image_path; - if (!dm_.GetDmDevicePathByName(cow_image_name, &cow_image_path)) { + if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) { LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name; return MergeFailureCode::GetCowPathConsistencyCheck; } @@ -1400,9 +1410,11 @@ MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { } SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); - CHECK(update_status.state() == UpdateState::Merging); + CHECK(update_status.state() == UpdateState::Merging || + update_status.state() == UpdateState::MergeFailed); CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE); + update_status.set_state(UpdateState::Merging); update_status.set_merge_phase(MergePhase::SECOND_PHASE); if (!WriteSnapshotUpdateStatus(lock, update_status)) { return MergeFailureCode::WriteStatus; diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 14f2d45be..11cebe170 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -1394,6 +1394,93 @@ TEST_F(SnapshotUpdateTest, SpaceSwapUpdate) { } } +// Test that a transient merge consistency check failure can resume properly. +TEST_F(SnapshotUpdateTest, ConsistencyCheckResume) { + if (!ShouldUseCompression()) { + // b/179111359 + GTEST_SKIP() << "Skipping Virtual A/B Compression test"; + } + + auto old_sys_size = GetSize(sys_); + auto old_prd_size = GetSize(prd_); + + // Grow |sys| but shrink |prd|. + SetSize(sys_, old_sys_size * 2); + sys_->set_estimate_cow_size(8_MiB); + SetSize(prd_, old_prd_size / 2); + prd_->set_estimate_cow_size(1_MiB); + + AddOperationForPartitions(); + + ASSERT_TRUE(sm->BeginUpdate()); + ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); + ASSERT_TRUE(WriteSnapshotAndHash("sys_b")); + ASSERT_TRUE(WriteSnapshotAndHash("vnd_b")); + ASSERT_TRUE(ShiftAllSnapshotBlocks("prd_b", old_prd_size)); + + sync(); + + // Assert that source partitions aren't affected. + for (const auto& name : {"sys_a", "vnd_a", "prd_a"}) { + ASSERT_TRUE(IsPartitionUnchanged(name)); + } + + ASSERT_TRUE(sm->FinishedSnapshotWrites(false)); + + // Simulate shutting down the device. + ASSERT_TRUE(UnmapAll()); + + // After reboot, init does first stage mount. + auto init = NewManagerForFirstStageMount("_b"); + ASSERT_NE(init, nullptr); + ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount()); + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_)); + + // Check that the target partitions have the same content. + for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { + ASSERT_TRUE(IsPartitionUnchanged(name)); + } + + auto old_checker = init->merge_consistency_checker(); + + init->set_merge_consistency_checker( + [](const std::string&, const SnapshotStatus&) -> MergeFailureCode { + return MergeFailureCode::WrongMergeCountConsistencyCheck; + }); + + // Initiate the merge and wait for it to be completed. + ASSERT_TRUE(init->InitiateMerge()); + ASSERT_EQ(init->IsSnapuserdRequired(), ShouldUseUserspaceSnapshots()); + { + // Check that the merge phase is FIRST_PHASE until at least one call + // to ProcessUpdateState() occurs. + ASSERT_TRUE(AcquireLock()); + auto local_lock = std::move(lock_); + auto status = init->ReadSnapshotUpdateStatus(local_lock.get()); + ASSERT_EQ(status.merge_phase(), MergePhase::FIRST_PHASE); + } + + // Merge should have failed. + ASSERT_EQ(UpdateState::MergeFailed, init->ProcessUpdateState()); + + // Simulate shutting down the device and creating partitions again. + ASSERT_TRUE(UnmapAll()); + + // Restore the checker. + init->set_merge_consistency_checker(std::move(old_checker)); + + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_)); + + // Complete the merge. + ASSERT_EQ(UpdateState::MergeCompleted, init->ProcessUpdateState()); + + // Check that the target partitions have the same content after the merge. + for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { + ASSERT_TRUE(IsPartitionUnchanged(name)) + << "Content of " << name << " changes after the merge"; + } +} + // Test that if new system partitions uses empty space in super, that region is not snapshotted. TEST_F(SnapshotUpdateTest, DirectWriteEmptySpace) { GTEST_SKIP() << "b/141889746";