Merge "libsnapshot: Resume snapshot merge if snapshots are in second phase" into main

This commit is contained in:
Akilesh Kailash 2024-10-20 06:55:24 +00:00 committed by Gerrit Code Review
commit c7cad438ab
3 changed files with 166 additions and 4 deletions

View file

@ -442,6 +442,7 @@ class SnapshotManager final : public ISnapshotManager {
FRIEND_TEST(SnapshotUpdateTest, QueryStatusError);
FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow);
FRIEND_TEST(SnapshotUpdateTest, SpaceSwapUpdate);
FRIEND_TEST(SnapshotUpdateTest, InterruptMergeDuringPhaseUpdate);
FRIEND_TEST(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch);
friend class SnapshotTest;
friend class SnapshotUpdateTest;

View file

@ -1343,10 +1343,25 @@ auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string&
}
if (merge_status == "snapshot" &&
DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
update_status.merge_phase() == MergePhase::FIRST_PHASE) {
// The snapshot is not being merged because it's in the wrong phase.
return MergeResult(UpdateState::None);
DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE) {
if (update_status.merge_phase() == MergePhase::FIRST_PHASE) {
// The snapshot is not being merged because it's in the wrong phase.
return MergeResult(UpdateState::None);
} else {
// update_status is already in second phase but the
// snapshot_status is still not set to SnapshotState::MERGING.
//
// Resume the merge at this point. see b/374225913
LOG(INFO) << "SwitchSnapshotToMerge: " << name << " after resuming merge";
auto code = SwitchSnapshotToMerge(lock, name);
if (code != MergeFailureCode::Ok) {
LOG(ERROR) << "Failed to switch snapshot: " << name
<< " to merge during second phase";
return MergeResult(UpdateState::MergeFailed,
MergeFailureCode::UnknownTargetType);
}
return MergeResult(UpdateState::Merging);
}
}
if (merge_status == "snapshot-merge") {
@ -1442,8 +1457,14 @@ MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
return MergeFailureCode::WriteStatus;
}
auto current_slot_suffix = device_->GetSlotSuffix();
MergeFailureCode result = MergeFailureCode::Ok;
for (const auto& snapshot : snapshots) {
if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
LOG(ERROR) << "Skipping invalid snapshot: " << snapshot
<< " during MergeSecondPhaseSnapshots";
continue;
}
SnapshotStatus snapshot_status;
if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
return MergeFailureCode::ReadStatus;

View file

@ -1607,6 +1607,146 @@ TEST_F(SnapshotUpdateTest, SpaceSwapUpdate) {
}
}
// Test that shrinking and growing partitions at the same time is handled
// correctly in VABC.
TEST_F(SnapshotUpdateTest, InterruptMergeDuringPhaseUpdate) {
if (!snapuserd_required_) {
// b/179111359
GTEST_SKIP() << "Skipping snapuserd test";
}
auto old_sys_size = GetSize(sys_);
auto old_prd_size = GetSize(prd_);
// Grow |sys| but shrink |prd|.
SetSize(sys_, old_sys_size * 2);
sys_->set_estimate_cow_size(8_MiB);
SetSize(prd_, old_prd_size / 2);
prd_->set_estimate_cow_size(1_MiB);
AddOperationForPartitions();
ASSERT_TRUE(sm->BeginUpdate());
ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
// Check that the old partition sizes were saved correctly.
{
ASSERT_TRUE(AcquireLock());
auto local_lock = std::move(lock_);
SnapshotStatus status;
ASSERT_TRUE(sm->ReadSnapshotStatus(local_lock.get(), "prd_b", &status));
ASSERT_EQ(status.old_partition_size(), 3145728);
ASSERT_TRUE(sm->ReadSnapshotStatus(local_lock.get(), "sys_b", &status));
ASSERT_EQ(status.old_partition_size(), 3145728);
}
ASSERT_TRUE(WriteSnapshotAndHash(sys_));
ASSERT_TRUE(WriteSnapshotAndHash(vnd_));
ASSERT_TRUE(ShiftAllSnapshotBlocks("prd_b", old_prd_size));
sync();
// Assert that source partitions aren't affected.
for (const auto& name : {"sys_a", "vnd_a", "prd_a"}) {
ASSERT_TRUE(IsPartitionUnchanged(name));
}
ASSERT_TRUE(sm->FinishedSnapshotWrites(false));
// Simulate shutting down the device.
ASSERT_TRUE(UnmapAll());
// After reboot, init does first stage mount.
auto init = NewManagerForFirstStageMount("_b");
ASSERT_NE(init, nullptr);
ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount());
ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_));
// Check that the target partitions have the same content.
for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
ASSERT_TRUE(IsPartitionUnchanged(name));
}
// Initiate the merge and wait for it to be completed.
if (ShouldSkipLegacyMerging()) {
LOG(INFO) << "Skipping legacy merge in test";
return;
}
ASSERT_TRUE(init->InitiateMerge());
ASSERT_EQ(init->IsSnapuserdRequired(), snapuserd_required_);
{
// Check that the merge phase is FIRST_PHASE until at least one call
// to ProcessUpdateState() occurs.
ASSERT_TRUE(AcquireLock());
auto local_lock = std::move(lock_);
auto status = init->ReadSnapshotUpdateStatus(local_lock.get());
ASSERT_EQ(status.merge_phase(), MergePhase::FIRST_PHASE);
}
// Wait until prd_b merge is completed which is part of first phase
std::chrono::milliseconds timeout(6000);
auto start = std::chrono::steady_clock::now();
// Keep polling until the merge is complete or timeout is reached
while (true) {
// Query the merge status
const auto merge_status = init->snapuserd_client()->QuerySnapshotStatus("prd_b");
if (merge_status == "snapshot-merge-complete") {
break;
}
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
ASSERT_TRUE(elapsed < timeout);
// sleep for a second and allow merge to complete
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
}
// Now, forcefully update the snapshot-update status to SECOND PHASE
// This will not update the snapshot status of sys_b to MERGING
if (init->UpdateUsesUserSnapshots()) {
ASSERT_TRUE(AcquireLock());
auto local_lock = std::move(lock_);
auto status = init->ReadSnapshotUpdateStatus(local_lock.get());
status.set_merge_phase(MergePhase::SECOND_PHASE);
ASSERT_TRUE(init->WriteSnapshotUpdateStatus(local_lock.get(), status));
}
// Simulate shutting down the device and creating partitions again.
ASSERT_TRUE(UnmapAll());
ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_));
DeviceMapper::TargetInfo target;
ASSERT_TRUE(init->IsSnapshotDevice("prd_b", &target));
ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user");
ASSERT_TRUE(init->IsSnapshotDevice("sys_b", &target));
ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user");
ASSERT_TRUE(init->IsSnapshotDevice("vnd_b", &target));
ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user");
// Complete the merge; "sys" and "vnd" should resume the merge
// even though merge was interrupted after update_status was updated to
// SECOND_PHASE
ASSERT_EQ(UpdateState::MergeCompleted, init->ProcessUpdateState());
// Make sure the second phase ran and deleted snapshots.
{
ASSERT_TRUE(AcquireLock());
auto local_lock = std::move(lock_);
std::vector<std::string> snapshots;
ASSERT_TRUE(init->ListSnapshots(local_lock.get(), &snapshots));
ASSERT_TRUE(snapshots.empty());
}
// Check that the target partitions have the same content after the merge.
for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
ASSERT_TRUE(IsPartitionUnchanged(name))
<< "Content of " << name << " changes after the merge";
}
}
// Test that if new system partitions uses empty space in super, that region is not snapshotted.
TEST_F(SnapshotUpdateTest, DirectWriteEmptySpace) {
GTEST_SKIP() << "b/141889746";