From c85af5595254fae175cea517fe33ddaf5ba1183f Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 3 Jul 2024 17:00:31 -0700 Subject: [PATCH] libsnapshot: Simplify wipe handling in recovery. This refactors HandleImminentDataWipe to address some shortcomings discovered through testing. Previously, it always called CreateSnapshotsAndLogicalPartitions, which meant trying to use snapuserd even if completely unnecessary. Instead we now peek at the update state and eliminate the "easy" cases ahead of time. These are "none", "initiated", and "unverified" when either a rollback happens or there is no forward merge indicator. In this case we simply return early and allow the wipe to continue (disabling the current slot if necessary). The hard case, when a merge is needed, is still handled within ProcessUpdateStateOnDataWipe. However it's no longer recursive, and it can assume a merge is about to initiated or already in progress. In all cases except a merge failure, we change the update state to None to clear any roadblocks update_engine or the bootloader might encounter. A merge failure, however, still blocks a data wipe. The way to recover from this is adb sideload. Bug: 350613336 Test: vts_libsnapshot_test wipe in INITIATED state, no merge wipe in UNVERIFIED state, no merge wipe in UNVERIFIED + rollback state, no merge wipe in MERGING state, merge Change-Id: I387aabcfa6304118be88ddbb85842111d5c2ef6a --- fs_mgr/libsnapshot/device_info.cpp | 18 ++ fs_mgr/libsnapshot/device_info.h | 1 + .../include/libsnapshot/mock_device_info.h | 1 + .../include/libsnapshot/snapshot.h | 11 +- .../include_test/libsnapshot/test_helpers.h | 1 + .../partition_cow_creator_test.cpp | 8 +- fs_mgr/libsnapshot/snapshot.cpp | 186 ++++++++++-------- fs_mgr/libsnapshot/snapshot_test.cpp | 13 +- 8 files changed, 141 insertions(+), 98 deletions(-) diff --git a/fs_mgr/libsnapshot/device_info.cpp b/fs_mgr/libsnapshot/device_info.cpp index 0ab61033f..e0969f4c7 100644 --- a/fs_mgr/libsnapshot/device_info.cpp +++ b/fs_mgr/libsnapshot/device_info.cpp @@ -104,6 +104,24 @@ bool DeviceInfo::IsFirstStageInit() const { return first_stage_init_; } +bool DeviceInfo::SetActiveBootSlot([[maybe_unused]] unsigned int slot) { +#ifdef LIBSNAPSHOT_USE_HAL + if (!EnsureBootHal()) { + return false; + } + + CommandResult result = boot_control_->SetActiveBootSlot(slot); + if (!result.success) { + LOG(ERROR) << "Error setting slot " << slot << " active: " << result.errMsg; + return false; + } + return true; +#else + LOG(ERROR) << "HAL support not enabled."; + return false; +#endif +} + bool DeviceInfo::SetSlotAsUnbootable([[maybe_unused]] unsigned int slot) { #ifdef LIBSNAPSHOT_USE_HAL if (!EnsureBootHal()) { diff --git a/fs_mgr/libsnapshot/device_info.h b/fs_mgr/libsnapshot/device_info.h index d06f1be7c..9153abb01 100644 --- a/fs_mgr/libsnapshot/device_info.h +++ b/fs_mgr/libsnapshot/device_info.h @@ -36,6 +36,7 @@ class DeviceInfo final : public SnapshotManager::IDeviceInfo { std::string GetSuperDevice(uint32_t slot) const override; bool IsOverlayfsSetup() const override; bool SetBootControlMergeStatus(MergeStatus status) override; + bool SetActiveBootSlot(unsigned int slot) override; bool SetSlotAsUnbootable(unsigned int slot) override; bool IsRecovery() const override; std::unique_ptr OpenImageManager() const override; diff --git a/fs_mgr/libsnapshot/include/libsnapshot/mock_device_info.h b/fs_mgr/libsnapshot/include/libsnapshot/mock_device_info.h index 573a85b24..ca1ac1e6b 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/mock_device_info.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/mock_device_info.h @@ -29,6 +29,7 @@ class MockDeviceInfo : public SnapshotManager::IDeviceInfo { MOCK_METHOD(const android::fs_mgr::IPartitionOpener&, GetPartitionOpener, (), (const)); MOCK_METHOD(bool, IsOverlayfsSetup, (), (const, override)); MOCK_METHOD(bool, SetBootControlMergeStatus, (MergeStatus status), (override)); + MOCK_METHOD(bool, SetActiveBootSlot, (unsigned int slot), (override)); MOCK_METHOD(bool, SetSlotAsUnbootable, (unsigned int slot), (override)); MOCK_METHOD(bool, IsRecovery, (), (const, override)); MOCK_METHOD(bool, IsFirstStageInit, (), (const, override)); diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 4a3ec1d3a..deb2d6e92 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -104,6 +104,7 @@ class ISnapshotManager { virtual const android::fs_mgr::IPartitionOpener& GetPartitionOpener() const = 0; virtual bool IsOverlayfsSetup() const = 0; virtual bool SetBootControlMergeStatus(MergeStatus status) = 0; + virtual bool SetActiveBootSlot(unsigned int slot) = 0; virtual bool SetSlotAsUnbootable(unsigned int slot) = 0; virtual bool IsRecovery() const = 0; virtual bool IsTestDevice() const { return false; } @@ -675,6 +676,8 @@ class SnapshotManager final : public ISnapshotManager { std::string GetBootSnapshotsWithoutSlotSwitchPath(); std::string GetSnapuserdFromSystemPath(); + bool HasForwardMergeIndicator(); + const LpMetadata* ReadOldPartitionMetadata(LockedFile* lock); bool MapAllPartitions(LockedFile* lock, const std::string& super_device, uint32_t slot, @@ -785,11 +788,8 @@ class SnapshotManager final : public ISnapshotManager { bool UpdateForwardMergeIndicator(bool wipe); // Helper for HandleImminentDataWipe. - // Call ProcessUpdateState and handle states with special rules before data wipe. Specifically, - // if |allow_forward_merge| and allow-forward-merge indicator exists, initiate merge if - // necessary. - UpdateState ProcessUpdateStateOnDataWipe(bool allow_forward_merge, - const std::function& callback); + // Call ProcessUpdateState and handle states with special rules before data wipe. + UpdateState ProcessUpdateStateOnDataWipe(const std::function& callback); // Return device string of a mapped image, or if it is not available, the mapped image path. bool GetMappedImageDeviceStringOrPath(const std::string& device_name, @@ -848,7 +848,6 @@ class SnapshotManager final : public ISnapshotManager { std::string metadata_dir_; std::unique_ptr images_; bool use_first_stage_snapuserd_ = false; - bool in_factory_data_reset_ = false; std::function uevent_regen_callback_; std::unique_ptr snapuserd_client_; std::unique_ptr old_partition_metadata_; diff --git a/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h b/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h index 0afd8bd22..620b03c14 100644 --- a/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h +++ b/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h @@ -92,6 +92,7 @@ class TestDeviceInfo : public SnapshotManager::IDeviceInfo { } bool IsOverlayfsSetup() const override { return false; } bool IsRecovery() const override { return recovery_; } + bool SetActiveBootSlot([[maybe_unused]] unsigned int slot) override { return true; } bool SetSlotAsUnbootable(unsigned int slot) override { unbootable_slots_.insert(slot); return true; diff --git a/fs_mgr/libsnapshot/partition_cow_creator_test.cpp b/fs_mgr/libsnapshot/partition_cow_creator_test.cpp index a4a2c1a68..8356c0c24 100644 --- a/fs_mgr/libsnapshot/partition_cow_creator_test.cpp +++ b/fs_mgr/libsnapshot/partition_cow_creator_test.cpp @@ -250,8 +250,8 @@ TEST_F(PartitionCowCreatorTest, CompressionEnabled) { .target_partition = system_b, .current_metadata = builder_a.get(), .current_suffix = "_a", - .using_snapuserd = true, - .update = &update}; + .update = &update, + .using_snapuserd = true}; auto ret = creator.Run(); ASSERT_TRUE(ret.has_value()); @@ -276,8 +276,8 @@ TEST_F(PartitionCowCreatorTest, CompressionWithNoManifest) { .target_partition = system_b, .current_metadata = builder_a.get(), .current_suffix = "_a", - .using_snapuserd = true, - .update = nullptr}; + .update = nullptr, + .using_snapuserd = true}; auto ret = creator.Run(); ASSERT_FALSE(ret.has_value()); diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index 265445b60..108fd903b 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -4005,44 +4005,90 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function& callba // We allow the wipe to continue, because if we can't mount /metadata, // it is unlikely the device would have booted anyway. If there is no // metadata partition, then the device predates Virtual A/B. + LOG(INFO) << "/metadata not found; allowing wipe."; return true; } - // Check this early, so we don't accidentally start trying to populate - // the state file in recovery. Note we don't call GetUpdateState since - // we want errors in acquiring the lock to be propagated, instead of - // returning UpdateState::None. - auto state_file = GetStateFilePath(); - if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) { - return true; - } - - auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); - auto super_path = device_->GetSuperDevice(slot_number); - if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) { - LOG(ERROR) << "Unable to map partitions to complete merge."; - return false; - } - - auto process_callback = [&]() -> bool { - if (callback) { - callback(); + // This could happen if /metadata mounted but there is no filesystem + // structure. Weird, but we have to assume there's no OTA pending, and + // thus we let the wipe proceed. + UpdateState state; + { + auto lock = LockExclusive(); + if (!lock) { + LOG(ERROR) << "Unable to determine update state; allowing wipe."; + return true; } - return true; - }; - in_factory_data_reset_ = true; - UpdateState state = - ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback); - in_factory_data_reset_ = false; - - if (state == UpdateState::MergeFailed) { - return false; + state = ReadUpdateState(lock.get()); + LOG(INFO) << "Update state before wipe: " << state << "; slot: " << GetCurrentSlot() + << "; suffix: " << device_->GetSlotSuffix(); } - // Nothing should be depending on partitions now, so unmap them all. - if (!UnmapAllPartitionsInRecovery()) { - LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash."; + bool try_merge = false; + switch (state) { + case UpdateState::None: + case UpdateState::Initiated: + LOG(INFO) << "Wipe is not impacted by update state; allowing wipe."; + break; + case UpdateState::Unverified: + if (GetCurrentSlot() != Slot::Target) { + LOG(INFO) << "Wipe is not impacted by rolled back update; allowing wipe"; + break; + } + if (!HasForwardMergeIndicator()) { + auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); + auto other_slot_number = SlotNumberForSlotSuffix(device_->GetOtherSlotSuffix()); + + // We're not allowed to forward merge, so forcefully rollback the + // slot switch. + LOG(INFO) << "Allowing wipe due to lack of forward merge indicator; reverting to " + "old slot since update will be deleted."; + device_->SetSlotAsUnbootable(slot_number); + device_->SetActiveBootSlot(other_slot_number); + break; + } + + // Forward merge indicator means we have to mount snapshots and try to merge. + LOG(INFO) << "Forward merge indicator is present."; + try_merge = true; + break; + case UpdateState::Merging: + case UpdateState::MergeFailed: + try_merge = true; + break; + case UpdateState::MergeNeedsReboot: + case UpdateState::Cancelled: + LOG(INFO) << "Unexpected update state in recovery; allowing wipe."; + break; + default: + break; + } + + if (try_merge) { + auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); + auto super_path = device_->GetSuperDevice(slot_number); + if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) { + LOG(ERROR) << "Unable to map partitions to complete merge."; + return false; + } + + auto process_callback = [&]() -> bool { + if (callback) { + callback(); + } + return true; + }; + + state = ProcessUpdateStateOnDataWipe(process_callback); + if (state == UpdateState::MergeFailed) { + return false; + } + + // Nothing should be depending on partitions now, so unmap them all. + if (!UnmapAllPartitionsInRecovery()) { + LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash."; + } } if (state != UpdateState::None) { @@ -4088,58 +4134,40 @@ bool SnapshotManager::FinishMergeInRecovery() { return true; } -UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge, - const std::function& callback) { - auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); - UpdateState state = ProcessUpdateState(callback); - LOG(INFO) << "Update state in recovery: " << state; - switch (state) { - case UpdateState::MergeFailed: - LOG(ERROR) << "Unrecoverable merge failure detected."; - return state; - case UpdateState::Unverified: { - // If an OTA was just applied but has not yet started merging: - // - // - if forward merge is allowed, initiate merge and call - // ProcessUpdateState again. - // - // - if forward merge is not allowed, we - // have no choice but to revert slots, because the current slot will - // immediately become unbootable. Rather than wait for the device - // to reboot N times until a rollback, we proactively disable the - // new slot instead. - // - // Since the rollback is inevitable, we don't treat a HAL failure - // as an error here. - auto slot = GetCurrentSlot(); - if (slot == Slot::Target) { - if (allow_forward_merge && - access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) { - LOG(INFO) << "Forward merge allowed, initiating merge now."; - - if (!InitiateMerge()) { - LOG(ERROR) << "Failed to initiate merge on data wipe."; - return UpdateState::MergeFailed; - } - return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback); +UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(const std::function& callback) { + while (true) { + UpdateState state = ProcessUpdateState(callback); + LOG(INFO) << "Processed updated state in recovery: " << state; + switch (state) { + case UpdateState::MergeFailed: + LOG(ERROR) << "Unrecoverable merge failure detected."; + return state; + case UpdateState::Unverified: { + // Unverified was already handled earlier, in HandleImminentDataWipe, + // but it will fall through here if a forward merge is required. + // + // If InitiateMerge fails, we early return. If it succeeds, then we + // are guaranteed that the next call to ProcessUpdateState will not + // return Unverified. + if (!InitiateMerge()) { + LOG(ERROR) << "Failed to initiate merge on data wipe."; + return UpdateState::MergeFailed; } - - LOG(ERROR) << "Reverting to old slot since update will be deleted."; - device_->SetSlotAsUnbootable(slot_number); - } else { - LOG(INFO) << "Booting from " << slot << " slot, no action is taken."; + continue; } - break; + case UpdateState::MergeNeedsReboot: + // We shouldn't get here, because nothing is depending on + // logical partitions. + LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery."; + return state; + default: + return state; } - case UpdateState::MergeNeedsReboot: - // We shouldn't get here, because nothing is depending on - // logical partitions. - LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery."; - break; - default: - break; } - return state; +} + +bool SnapshotManager::HasForwardMergeIndicator() { + return access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0; } bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) { diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index b2e36d426..c29da175a 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -2102,10 +2102,10 @@ TEST_F(SnapshotUpdateTest, DataWipeRollbackInRecovery) { test_device->set_recovery(true); auto new_sm = NewManagerForFirstStageMount(test_device); + EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified); ASSERT_TRUE(new_sm->HandleImminentDataWipe()); // Manually mount metadata so that we can call GetUpdateState() below. MountMetadata(); - EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::None); EXPECT_TRUE(test_device->IsSlotUnbootable(1)); EXPECT_FALSE(test_device->IsSlotUnbootable(0)); } @@ -2127,6 +2127,7 @@ TEST_F(SnapshotUpdateTest, DataWipeAfterRollback) { test_device->set_recovery(true); auto new_sm = NewManagerForFirstStageMount(test_device); + EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified); ASSERT_TRUE(new_sm->HandleImminentDataWipe()); EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::None); EXPECT_FALSE(test_device->IsSlotUnbootable(0)); @@ -2135,10 +2136,6 @@ TEST_F(SnapshotUpdateTest, DataWipeAfterRollback) { // Test update package that requests data wipe. TEST_F(SnapshotUpdateTest, DataWipeRequiredInPackage) { - if (ShouldSkipLegacyMerging()) { - GTEST_SKIP() << "Skipping legacy merge in test"; - } - AddOperationForPartitions(); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); @@ -2157,6 +2154,7 @@ TEST_F(SnapshotUpdateTest, DataWipeRequiredInPackage) { test_device->set_recovery(true); auto new_sm = NewManagerForFirstStageMount(test_device); + EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified); ASSERT_TRUE(new_sm->HandleImminentDataWipe()); // Manually mount metadata so that we can call GetUpdateState() below. MountMetadata(); @@ -2178,10 +2176,6 @@ TEST_F(SnapshotUpdateTest, DataWipeRequiredInPackage) { // Test update package that requests data wipe. TEST_F(SnapshotUpdateTest, DataWipeWithStaleSnapshots) { - if (ShouldSkipLegacyMerging()) { - GTEST_SKIP() << "Skipping legacy merge in test"; - } - AddOperationForPartitions(); // Execute the update. @@ -2222,6 +2216,7 @@ TEST_F(SnapshotUpdateTest, DataWipeWithStaleSnapshots) { test_device->set_recovery(true); auto new_sm = NewManagerForFirstStageMount(test_device); + EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified); ASSERT_TRUE(new_sm->HandleImminentDataWipe()); // Manually mount metadata so that we can call GetUpdateState() below. MountMetadata();