libsnapshot: Simplify wipe handling in recovery.

This refactors HandleImminentDataWipe to address some shortcomings
discovered through testing. Previously, it always called
CreateSnapshotsAndLogicalPartitions, which meant trying to use snapuserd
even if completely unnecessary.

Instead we now peek at the update state and eliminate the "easy" cases
ahead of time. These are "none", "initiated", and "unverified" when
either a rollback happens or there is no forward merge indicator. In
this case we simply return early and allow the wipe to continue
(disabling the current slot if necessary).

The hard case, when a merge is needed, is still handled within
ProcessUpdateStateOnDataWipe. However it's no longer recursive, and it
can assume a merge is about to initiated or already in progress.

In all cases except a merge failure, we change the update state to None
to clear any roadblocks update_engine or the bootloader might encounter.
A merge failure, however, still blocks a data wipe. The way to recover
from this is adb sideload.

Bug: 350613336
Test: vts_libsnapshot_test
      wipe in INITIATED state, no merge
      wipe in UNVERIFIED state, no merge
      wipe in UNVERIFIED + rollback state, no merge
      wipe in MERGING state, merge
Change-Id: I387aabcfa6304118be88ddbb85842111d5c2ef6a
This commit is contained in:
David Anderson 2024-07-03 17:00:31 -07:00
parent 9da55b8cf7
commit c85af55952
8 changed files with 141 additions and 98 deletions

View file

@ -104,6 +104,24 @@ bool DeviceInfo::IsFirstStageInit() const {
return first_stage_init_;
}
bool DeviceInfo::SetActiveBootSlot([[maybe_unused]] unsigned int slot) {
#ifdef LIBSNAPSHOT_USE_HAL
if (!EnsureBootHal()) {
return false;
}
CommandResult result = boot_control_->SetActiveBootSlot(slot);
if (!result.success) {
LOG(ERROR) << "Error setting slot " << slot << " active: " << result.errMsg;
return false;
}
return true;
#else
LOG(ERROR) << "HAL support not enabled.";
return false;
#endif
}
bool DeviceInfo::SetSlotAsUnbootable([[maybe_unused]] unsigned int slot) {
#ifdef LIBSNAPSHOT_USE_HAL
if (!EnsureBootHal()) {

View file

@ -36,6 +36,7 @@ class DeviceInfo final : public SnapshotManager::IDeviceInfo {
std::string GetSuperDevice(uint32_t slot) const override;
bool IsOverlayfsSetup() const override;
bool SetBootControlMergeStatus(MergeStatus status) override;
bool SetActiveBootSlot(unsigned int slot) override;
bool SetSlotAsUnbootable(unsigned int slot) override;
bool IsRecovery() const override;
std::unique_ptr<IImageManager> OpenImageManager() const override;

View file

@ -29,6 +29,7 @@ class MockDeviceInfo : public SnapshotManager::IDeviceInfo {
MOCK_METHOD(const android::fs_mgr::IPartitionOpener&, GetPartitionOpener, (), (const));
MOCK_METHOD(bool, IsOverlayfsSetup, (), (const, override));
MOCK_METHOD(bool, SetBootControlMergeStatus, (MergeStatus status), (override));
MOCK_METHOD(bool, SetActiveBootSlot, (unsigned int slot), (override));
MOCK_METHOD(bool, SetSlotAsUnbootable, (unsigned int slot), (override));
MOCK_METHOD(bool, IsRecovery, (), (const, override));
MOCK_METHOD(bool, IsFirstStageInit, (), (const, override));

View file

@ -104,6 +104,7 @@ class ISnapshotManager {
virtual const android::fs_mgr::IPartitionOpener& GetPartitionOpener() const = 0;
virtual bool IsOverlayfsSetup() const = 0;
virtual bool SetBootControlMergeStatus(MergeStatus status) = 0;
virtual bool SetActiveBootSlot(unsigned int slot) = 0;
virtual bool SetSlotAsUnbootable(unsigned int slot) = 0;
virtual bool IsRecovery() const = 0;
virtual bool IsTestDevice() const { return false; }
@ -675,6 +676,8 @@ class SnapshotManager final : public ISnapshotManager {
std::string GetBootSnapshotsWithoutSlotSwitchPath();
std::string GetSnapuserdFromSystemPath();
bool HasForwardMergeIndicator();
const LpMetadata* ReadOldPartitionMetadata(LockedFile* lock);
bool MapAllPartitions(LockedFile* lock, const std::string& super_device, uint32_t slot,
@ -785,11 +788,8 @@ class SnapshotManager final : public ISnapshotManager {
bool UpdateForwardMergeIndicator(bool wipe);
// Helper for HandleImminentDataWipe.
// Call ProcessUpdateState and handle states with special rules before data wipe. Specifically,
// if |allow_forward_merge| and allow-forward-merge indicator exists, initiate merge if
// necessary.
UpdateState ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
const std::function<bool()>& callback);
// Call ProcessUpdateState and handle states with special rules before data wipe.
UpdateState ProcessUpdateStateOnDataWipe(const std::function<bool()>& callback);
// Return device string of a mapped image, or if it is not available, the mapped image path.
bool GetMappedImageDeviceStringOrPath(const std::string& device_name,
@ -848,7 +848,6 @@ class SnapshotManager final : public ISnapshotManager {
std::string metadata_dir_;
std::unique_ptr<IImageManager> images_;
bool use_first_stage_snapuserd_ = false;
bool in_factory_data_reset_ = false;
std::function<bool(const std::string&)> uevent_regen_callback_;
std::unique_ptr<SnapuserdClient> snapuserd_client_;
std::unique_ptr<LpMetadata> old_partition_metadata_;

View file

@ -92,6 +92,7 @@ class TestDeviceInfo : public SnapshotManager::IDeviceInfo {
}
bool IsOverlayfsSetup() const override { return false; }
bool IsRecovery() const override { return recovery_; }
bool SetActiveBootSlot([[maybe_unused]] unsigned int slot) override { return true; }
bool SetSlotAsUnbootable(unsigned int slot) override {
unbootable_slots_.insert(slot);
return true;

View file

@ -250,8 +250,8 @@ TEST_F(PartitionCowCreatorTest, CompressionEnabled) {
.target_partition = system_b,
.current_metadata = builder_a.get(),
.current_suffix = "_a",
.using_snapuserd = true,
.update = &update};
.update = &update,
.using_snapuserd = true};
auto ret = creator.Run();
ASSERT_TRUE(ret.has_value());
@ -276,8 +276,8 @@ TEST_F(PartitionCowCreatorTest, CompressionWithNoManifest) {
.target_partition = system_b,
.current_metadata = builder_a.get(),
.current_suffix = "_a",
.using_snapuserd = true,
.update = nullptr};
.update = nullptr,
.using_snapuserd = true};
auto ret = creator.Run();
ASSERT_FALSE(ret.has_value());

View file

@ -4005,44 +4005,90 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callba
// We allow the wipe to continue, because if we can't mount /metadata,
// it is unlikely the device would have booted anyway. If there is no
// metadata partition, then the device predates Virtual A/B.
LOG(INFO) << "/metadata not found; allowing wipe.";
return true;
}
// Check this early, so we don't accidentally start trying to populate
// the state file in recovery. Note we don't call GetUpdateState since
// we want errors in acquiring the lock to be propagated, instead of
// returning UpdateState::None.
auto state_file = GetStateFilePath();
if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
return true;
}
auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
auto super_path = device_->GetSuperDevice(slot_number);
if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
LOG(ERROR) << "Unable to map partitions to complete merge.";
return false;
}
auto process_callback = [&]() -> bool {
if (callback) {
callback();
// This could happen if /metadata mounted but there is no filesystem
// structure. Weird, but we have to assume there's no OTA pending, and
// thus we let the wipe proceed.
UpdateState state;
{
auto lock = LockExclusive();
if (!lock) {
LOG(ERROR) << "Unable to determine update state; allowing wipe.";
return true;
}
return true;
};
in_factory_data_reset_ = true;
UpdateState state =
ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
in_factory_data_reset_ = false;
if (state == UpdateState::MergeFailed) {
return false;
state = ReadUpdateState(lock.get());
LOG(INFO) << "Update state before wipe: " << state << "; slot: " << GetCurrentSlot()
<< "; suffix: " << device_->GetSlotSuffix();
}
// Nothing should be depending on partitions now, so unmap them all.
if (!UnmapAllPartitionsInRecovery()) {
LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
bool try_merge = false;
switch (state) {
case UpdateState::None:
case UpdateState::Initiated:
LOG(INFO) << "Wipe is not impacted by update state; allowing wipe.";
break;
case UpdateState::Unverified:
if (GetCurrentSlot() != Slot::Target) {
LOG(INFO) << "Wipe is not impacted by rolled back update; allowing wipe";
break;
}
if (!HasForwardMergeIndicator()) {
auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
auto other_slot_number = SlotNumberForSlotSuffix(device_->GetOtherSlotSuffix());
// We're not allowed to forward merge, so forcefully rollback the
// slot switch.
LOG(INFO) << "Allowing wipe due to lack of forward merge indicator; reverting to "
"old slot since update will be deleted.";
device_->SetSlotAsUnbootable(slot_number);
device_->SetActiveBootSlot(other_slot_number);
break;
}
// Forward merge indicator means we have to mount snapshots and try to merge.
LOG(INFO) << "Forward merge indicator is present.";
try_merge = true;
break;
case UpdateState::Merging:
case UpdateState::MergeFailed:
try_merge = true;
break;
case UpdateState::MergeNeedsReboot:
case UpdateState::Cancelled:
LOG(INFO) << "Unexpected update state in recovery; allowing wipe.";
break;
default:
break;
}
if (try_merge) {
auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
auto super_path = device_->GetSuperDevice(slot_number);
if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
LOG(ERROR) << "Unable to map partitions to complete merge.";
return false;
}
auto process_callback = [&]() -> bool {
if (callback) {
callback();
}
return true;
};
state = ProcessUpdateStateOnDataWipe(process_callback);
if (state == UpdateState::MergeFailed) {
return false;
}
// Nothing should be depending on partitions now, so unmap them all.
if (!UnmapAllPartitionsInRecovery()) {
LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
}
}
if (state != UpdateState::None) {
@ -4088,58 +4134,40 @@ bool SnapshotManager::FinishMergeInRecovery() {
return true;
}
UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
const std::function<bool()>& callback) {
auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
UpdateState state = ProcessUpdateState(callback);
LOG(INFO) << "Update state in recovery: " << state;
switch (state) {
case UpdateState::MergeFailed:
LOG(ERROR) << "Unrecoverable merge failure detected.";
return state;
case UpdateState::Unverified: {
// If an OTA was just applied but has not yet started merging:
//
// - if forward merge is allowed, initiate merge and call
// ProcessUpdateState again.
//
// - if forward merge is not allowed, we
// have no choice but to revert slots, because the current slot will
// immediately become unbootable. Rather than wait for the device
// to reboot N times until a rollback, we proactively disable the
// new slot instead.
//
// Since the rollback is inevitable, we don't treat a HAL failure
// as an error here.
auto slot = GetCurrentSlot();
if (slot == Slot::Target) {
if (allow_forward_merge &&
access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
LOG(INFO) << "Forward merge allowed, initiating merge now.";
if (!InitiateMerge()) {
LOG(ERROR) << "Failed to initiate merge on data wipe.";
return UpdateState::MergeFailed;
}
return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(const std::function<bool()>& callback) {
while (true) {
UpdateState state = ProcessUpdateState(callback);
LOG(INFO) << "Processed updated state in recovery: " << state;
switch (state) {
case UpdateState::MergeFailed:
LOG(ERROR) << "Unrecoverable merge failure detected.";
return state;
case UpdateState::Unverified: {
// Unverified was already handled earlier, in HandleImminentDataWipe,
// but it will fall through here if a forward merge is required.
//
// If InitiateMerge fails, we early return. If it succeeds, then we
// are guaranteed that the next call to ProcessUpdateState will not
// return Unverified.
if (!InitiateMerge()) {
LOG(ERROR) << "Failed to initiate merge on data wipe.";
return UpdateState::MergeFailed;
}
LOG(ERROR) << "Reverting to old slot since update will be deleted.";
device_->SetSlotAsUnbootable(slot_number);
} else {
LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
continue;
}
break;
case UpdateState::MergeNeedsReboot:
// We shouldn't get here, because nothing is depending on
// logical partitions.
LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
return state;
default:
return state;
}
case UpdateState::MergeNeedsReboot:
// We shouldn't get here, because nothing is depending on
// logical partitions.
LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
break;
default:
break;
}
return state;
}
bool SnapshotManager::HasForwardMergeIndicator() {
return access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0;
}
bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {

View file

@ -2102,10 +2102,10 @@ TEST_F(SnapshotUpdateTest, DataWipeRollbackInRecovery) {
test_device->set_recovery(true);
auto new_sm = NewManagerForFirstStageMount(test_device);
EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified);
ASSERT_TRUE(new_sm->HandleImminentDataWipe());
// Manually mount metadata so that we can call GetUpdateState() below.
MountMetadata();
EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::None);
EXPECT_TRUE(test_device->IsSlotUnbootable(1));
EXPECT_FALSE(test_device->IsSlotUnbootable(0));
}
@ -2127,6 +2127,7 @@ TEST_F(SnapshotUpdateTest, DataWipeAfterRollback) {
test_device->set_recovery(true);
auto new_sm = NewManagerForFirstStageMount(test_device);
EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified);
ASSERT_TRUE(new_sm->HandleImminentDataWipe());
EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::None);
EXPECT_FALSE(test_device->IsSlotUnbootable(0));
@ -2135,10 +2136,6 @@ TEST_F(SnapshotUpdateTest, DataWipeAfterRollback) {
// Test update package that requests data wipe.
TEST_F(SnapshotUpdateTest, DataWipeRequiredInPackage) {
if (ShouldSkipLegacyMerging()) {
GTEST_SKIP() << "Skipping legacy merge in test";
}
AddOperationForPartitions();
// Execute the update.
ASSERT_TRUE(sm->BeginUpdate());
@ -2157,6 +2154,7 @@ TEST_F(SnapshotUpdateTest, DataWipeRequiredInPackage) {
test_device->set_recovery(true);
auto new_sm = NewManagerForFirstStageMount(test_device);
EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified);
ASSERT_TRUE(new_sm->HandleImminentDataWipe());
// Manually mount metadata so that we can call GetUpdateState() below.
MountMetadata();
@ -2178,10 +2176,6 @@ TEST_F(SnapshotUpdateTest, DataWipeRequiredInPackage) {
// Test update package that requests data wipe.
TEST_F(SnapshotUpdateTest, DataWipeWithStaleSnapshots) {
if (ShouldSkipLegacyMerging()) {
GTEST_SKIP() << "Skipping legacy merge in test";
}
AddOperationForPartitions();
// Execute the update.
@ -2222,6 +2216,7 @@ TEST_F(SnapshotUpdateTest, DataWipeWithStaleSnapshots) {
test_device->set_recovery(true);
auto new_sm = NewManagerForFirstStageMount(test_device);
EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified);
ASSERT_TRUE(new_sm->HandleImminentDataWipe());
// Manually mount metadata so that we can call GetUpdateState() below.
MountMetadata();