libsnapshot: Resume snapshot merge if snapshots are in second
phase If the device reboots when SnapshotUpdateStatus switches from first phase to second phase, then track the transition and resume the merge. Bug: 374225913 Test: OTA on Pixel - Verify merge resumes when device reboots just after first phase merge Change-Id: I5f62a03852a4b012850b11d0c1e6b96ec0556278 Signed-off-by: Akilesh Kailash <akailash@google.com>
This commit is contained in:
parent
097b607ac6
commit
5ad59a4cff
3 changed files with 166 additions and 4 deletions
|
|
@ -442,6 +442,7 @@ class SnapshotManager final : public ISnapshotManager {
|
|||
FRIEND_TEST(SnapshotUpdateTest, QueryStatusError);
|
||||
FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow);
|
||||
FRIEND_TEST(SnapshotUpdateTest, SpaceSwapUpdate);
|
||||
FRIEND_TEST(SnapshotUpdateTest, InterruptMergeDuringPhaseUpdate);
|
||||
FRIEND_TEST(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch);
|
||||
friend class SnapshotTest;
|
||||
friend class SnapshotUpdateTest;
|
||||
|
|
|
|||
|
|
@ -1343,10 +1343,25 @@ auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string&
|
|||
}
|
||||
|
||||
if (merge_status == "snapshot" &&
|
||||
DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
|
||||
update_status.merge_phase() == MergePhase::FIRST_PHASE) {
|
||||
// The snapshot is not being merged because it's in the wrong phase.
|
||||
return MergeResult(UpdateState::None);
|
||||
DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE) {
|
||||
if (update_status.merge_phase() == MergePhase::FIRST_PHASE) {
|
||||
// The snapshot is not being merged because it's in the wrong phase.
|
||||
return MergeResult(UpdateState::None);
|
||||
} else {
|
||||
// update_status is already in second phase but the
|
||||
// snapshot_status is still not set to SnapshotState::MERGING.
|
||||
//
|
||||
// Resume the merge at this point. see b/374225913
|
||||
LOG(INFO) << "SwitchSnapshotToMerge: " << name << " after resuming merge";
|
||||
auto code = SwitchSnapshotToMerge(lock, name);
|
||||
if (code != MergeFailureCode::Ok) {
|
||||
LOG(ERROR) << "Failed to switch snapshot: " << name
|
||||
<< " to merge during second phase";
|
||||
return MergeResult(UpdateState::MergeFailed,
|
||||
MergeFailureCode::UnknownTargetType);
|
||||
}
|
||||
return MergeResult(UpdateState::Merging);
|
||||
}
|
||||
}
|
||||
|
||||
if (merge_status == "snapshot-merge") {
|
||||
|
|
@ -1442,8 +1457,14 @@ MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
|
|||
return MergeFailureCode::WriteStatus;
|
||||
}
|
||||
|
||||
auto current_slot_suffix = device_->GetSlotSuffix();
|
||||
MergeFailureCode result = MergeFailureCode::Ok;
|
||||
for (const auto& snapshot : snapshots) {
|
||||
if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
|
||||
LOG(ERROR) << "Skipping invalid snapshot: " << snapshot
|
||||
<< " during MergeSecondPhaseSnapshots";
|
||||
continue;
|
||||
}
|
||||
SnapshotStatus snapshot_status;
|
||||
if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
|
||||
return MergeFailureCode::ReadStatus;
|
||||
|
|
|
|||
|
|
@ -1607,6 +1607,146 @@ TEST_F(SnapshotUpdateTest, SpaceSwapUpdate) {
|
|||
}
|
||||
}
|
||||
|
||||
// Test that shrinking and growing partitions at the same time is handled
|
||||
// correctly in VABC.
|
||||
TEST_F(SnapshotUpdateTest, InterruptMergeDuringPhaseUpdate) {
|
||||
if (!snapuserd_required_) {
|
||||
// b/179111359
|
||||
GTEST_SKIP() << "Skipping snapuserd test";
|
||||
}
|
||||
|
||||
auto old_sys_size = GetSize(sys_);
|
||||
auto old_prd_size = GetSize(prd_);
|
||||
|
||||
// Grow |sys| but shrink |prd|.
|
||||
SetSize(sys_, old_sys_size * 2);
|
||||
sys_->set_estimate_cow_size(8_MiB);
|
||||
SetSize(prd_, old_prd_size / 2);
|
||||
prd_->set_estimate_cow_size(1_MiB);
|
||||
|
||||
AddOperationForPartitions();
|
||||
|
||||
ASSERT_TRUE(sm->BeginUpdate());
|
||||
ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
|
||||
|
||||
// Check that the old partition sizes were saved correctly.
|
||||
{
|
||||
ASSERT_TRUE(AcquireLock());
|
||||
auto local_lock = std::move(lock_);
|
||||
|
||||
SnapshotStatus status;
|
||||
ASSERT_TRUE(sm->ReadSnapshotStatus(local_lock.get(), "prd_b", &status));
|
||||
ASSERT_EQ(status.old_partition_size(), 3145728);
|
||||
ASSERT_TRUE(sm->ReadSnapshotStatus(local_lock.get(), "sys_b", &status));
|
||||
ASSERT_EQ(status.old_partition_size(), 3145728);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(WriteSnapshotAndHash(sys_));
|
||||
ASSERT_TRUE(WriteSnapshotAndHash(vnd_));
|
||||
ASSERT_TRUE(ShiftAllSnapshotBlocks("prd_b", old_prd_size));
|
||||
|
||||
sync();
|
||||
|
||||
// Assert that source partitions aren't affected.
|
||||
for (const auto& name : {"sys_a", "vnd_a", "prd_a"}) {
|
||||
ASSERT_TRUE(IsPartitionUnchanged(name));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(sm->FinishedSnapshotWrites(false));
|
||||
|
||||
// Simulate shutting down the device.
|
||||
ASSERT_TRUE(UnmapAll());
|
||||
|
||||
// After reboot, init does first stage mount.
|
||||
auto init = NewManagerForFirstStageMount("_b");
|
||||
ASSERT_NE(init, nullptr);
|
||||
ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount());
|
||||
ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_));
|
||||
|
||||
// Check that the target partitions have the same content.
|
||||
for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
|
||||
ASSERT_TRUE(IsPartitionUnchanged(name));
|
||||
}
|
||||
|
||||
// Initiate the merge and wait for it to be completed.
|
||||
if (ShouldSkipLegacyMerging()) {
|
||||
LOG(INFO) << "Skipping legacy merge in test";
|
||||
return;
|
||||
}
|
||||
ASSERT_TRUE(init->InitiateMerge());
|
||||
ASSERT_EQ(init->IsSnapuserdRequired(), snapuserd_required_);
|
||||
{
|
||||
// Check that the merge phase is FIRST_PHASE until at least one call
|
||||
// to ProcessUpdateState() occurs.
|
||||
ASSERT_TRUE(AcquireLock());
|
||||
auto local_lock = std::move(lock_);
|
||||
auto status = init->ReadSnapshotUpdateStatus(local_lock.get());
|
||||
ASSERT_EQ(status.merge_phase(), MergePhase::FIRST_PHASE);
|
||||
}
|
||||
|
||||
// Wait until prd_b merge is completed which is part of first phase
|
||||
std::chrono::milliseconds timeout(6000);
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
// Keep polling until the merge is complete or timeout is reached
|
||||
while (true) {
|
||||
// Query the merge status
|
||||
const auto merge_status = init->snapuserd_client()->QuerySnapshotStatus("prd_b");
|
||||
if (merge_status == "snapshot-merge-complete") {
|
||||
break;
|
||||
}
|
||||
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
|
||||
|
||||
ASSERT_TRUE(elapsed < timeout);
|
||||
// sleep for a second and allow merge to complete
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
}
|
||||
|
||||
// Now, forcefully update the snapshot-update status to SECOND PHASE
|
||||
// This will not update the snapshot status of sys_b to MERGING
|
||||
if (init->UpdateUsesUserSnapshots()) {
|
||||
ASSERT_TRUE(AcquireLock());
|
||||
auto local_lock = std::move(lock_);
|
||||
auto status = init->ReadSnapshotUpdateStatus(local_lock.get());
|
||||
status.set_merge_phase(MergePhase::SECOND_PHASE);
|
||||
ASSERT_TRUE(init->WriteSnapshotUpdateStatus(local_lock.get(), status));
|
||||
}
|
||||
|
||||
// Simulate shutting down the device and creating partitions again.
|
||||
ASSERT_TRUE(UnmapAll());
|
||||
ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_));
|
||||
|
||||
DeviceMapper::TargetInfo target;
|
||||
ASSERT_TRUE(init->IsSnapshotDevice("prd_b", &target));
|
||||
|
||||
ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user");
|
||||
ASSERT_TRUE(init->IsSnapshotDevice("sys_b", &target));
|
||||
ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user");
|
||||
ASSERT_TRUE(init->IsSnapshotDevice("vnd_b", &target));
|
||||
ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "user");
|
||||
|
||||
// Complete the merge; "sys" and "vnd" should resume the merge
|
||||
// even though merge was interrupted after update_status was updated to
|
||||
// SECOND_PHASE
|
||||
ASSERT_EQ(UpdateState::MergeCompleted, init->ProcessUpdateState());
|
||||
|
||||
// Make sure the second phase ran and deleted snapshots.
|
||||
{
|
||||
ASSERT_TRUE(AcquireLock());
|
||||
auto local_lock = std::move(lock_);
|
||||
std::vector<std::string> snapshots;
|
||||
ASSERT_TRUE(init->ListSnapshots(local_lock.get(), &snapshots));
|
||||
ASSERT_TRUE(snapshots.empty());
|
||||
}
|
||||
|
||||
// Check that the target partitions have the same content after the merge.
|
||||
for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
|
||||
ASSERT_TRUE(IsPartitionUnchanged(name))
|
||||
<< "Content of " << name << " changes after the merge";
|
||||
}
|
||||
}
|
||||
|
||||
// Test that if new system partitions uses empty space in super, that region is not snapshotted.
|
||||
TEST_F(SnapshotUpdateTest, DirectWriteEmptySpace) {
|
||||
GTEST_SKIP() << "b/141889746";
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue