diff --git a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto index 38c6bf893..36e116943 100644 --- a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto +++ b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto @@ -34,7 +34,19 @@ enum SnapshotState { MERGE_COMPLETED = 3; } -// Next: 10 +// Next: 3 +enum MergePhase { + // No merge is in progress. + NO_MERGE = 0; + + // Shrunk partitions can merge. + FIRST_PHASE = 1; + + // Grown partitions can merge. + SECOND_PHASE = 2; +} + +// Next: 11 message SnapshotStatus { // Name of the snapshot. This is usually the name of the snapshotted // logical partition; for example, "system_b". @@ -87,6 +99,9 @@ message SnapshotStatus { // True if compression is enabled, false otherwise. bool compression_enabled = 9; + + // The old partition size (if none existed, this will be zero). + uint64 old_partition_size = 10; } // Next: 8 @@ -118,7 +133,7 @@ enum UpdateState { Cancelled = 7; }; -// Next: 6 +// Next: 7 message SnapshotUpdateStatus { UpdateState state = 1; @@ -136,6 +151,9 @@ message SnapshotUpdateStatus { // Whether compression/dm-user was used for any snapshots. bool compression_enabled = 5; + + // Merge phase (if state == MERGING). + MergePhase merge_phase = 6; } // Next: 4 diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 1d7b6031c..0a8567fdc 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -463,6 +463,10 @@ class SnapshotManager final : public ISnapshotManager { const std::string& base_device, const std::chrono::milliseconds& timeout_ms, std::string* path); + // Map the source device used for dm-user. + bool MapSourceDevice(LockedFile* lock, const std::string& name, + const std::chrono::milliseconds& timeout_ms, std::string* path); + // Map a COW image that was previous created with CreateCowImage. std::optional MapCowImage(const std::string& name, const std::chrono::milliseconds& timeout_ms); @@ -521,11 +525,13 @@ class SnapshotManager final : public ISnapshotManager { std::string GetMergeStateFilePath() const; // Helpers for merging. + bool MergeSecondPhaseSnapshots(LockedFile* lock); bool SwitchSnapshotToMerge(LockedFile* lock, const std::string& name); bool RewriteSnapshotDeviceTable(const std::string& dm_name); bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name); void AcknowledgeMergeSuccess(LockedFile* lock); void AcknowledgeMergeFailure(); + MergePhase DecideMergePhase(const SnapshotStatus& status); std::unique_ptr ReadCurrentMetadata(); enum class MetadataPartitionState { @@ -558,7 +564,8 @@ class SnapshotManager final : public ISnapshotManager { // UpdateState::MergeNeedsReboot UpdateState CheckMergeState(const std::function& before_cancel); UpdateState CheckMergeState(LockedFile* lock, const std::function& before_cancel); - UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name); + UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name, + const SnapshotUpdateStatus& update_status); // Interact with status files under /metadata/ota/snapshots. bool WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status); @@ -568,6 +575,9 @@ class SnapshotManager final : public ISnapshotManager { std::string GetSnapshotBootIndicatorPath(); std::string GetRollbackIndicatorPath(); std::string GetForwardMergeIndicatorPath(); + std::string GetOldPartitionMetadataPath(); + + const LpMetadata* ReadOldPartitionMetadata(LockedFile* lock); bool MapAllPartitions(LockedFile* lock, const std::string& super_device, uint32_t slot, const std::chrono::milliseconds& timeout_ms); @@ -716,6 +726,7 @@ class SnapshotManager final : public ISnapshotManager { bool in_factory_data_reset_ = false; std::function uevent_regen_callback_; std::unique_ptr snapuserd_client_; + std::unique_ptr old_partition_metadata_; }; } // namespace snapshot diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index 64e45ede0..ebda4300b 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -132,6 +132,10 @@ static std::string GetBaseDeviceName(const std::string& partition_name) { return partition_name + "-base"; } +static std::string GetSourceDeviceName(const std::string& partition_name) { + return partition_name + "-src"; +} + bool SnapshotManager::BeginUpdate() { bool needs_merge = false; if (!TryCancelUpdate(&needs_merge)) { @@ -153,6 +157,9 @@ bool SnapshotManager::BeginUpdate() { images_->RemoveAllImages(); } + // Clear any cached metadata (this allows re-using one manager across tests). + old_partition_metadata_ = nullptr; + auto state = ReadUpdateState(file.get()); if (state != UpdateState::None) { LOG(ERROR) << "An update is already in progress, cannot begin a new update"; @@ -251,6 +258,7 @@ bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function GetSnapshotBootIndicatorPath(), GetRollbackIndicatorPath(), GetForwardMergeIndicatorPath(), + GetOldPartitionMetadataPath(), }; for (const auto& file : files) { RemoveFileIfExists(file); @@ -475,7 +483,8 @@ bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name, // have completed merging, but the start of the merge process is considered // atomic. SnapshotStorageMode mode; - switch (ReadUpdateState(lock)) { + SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); + switch (update_status.state()) { case UpdateState::MergeCompleted: case UpdateState::MergeNeedsReboot: LOG(ERROR) << "Should not create a snapshot device for " << name @@ -485,7 +494,11 @@ bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name, case UpdateState::MergeFailed: // Note: MergeFailed indicates that a merge is in progress, but // is possibly stalled. We still have to honor the merge. - mode = SnapshotStorageMode::Merge; + if (DecideMergePhase(status) == update_status.merge_phase()) { + mode = SnapshotStorageMode::Merge; + } else { + mode = SnapshotStorageMode::Persistent; + } break; default: mode = SnapshotStorageMode::Persistent; @@ -526,6 +539,36 @@ std::optional SnapshotManager::MapCowImage( return std::nullopt; } +bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name, + const std::chrono::milliseconds& timeout_ms, + std::string* path) { + CHECK(lock); + + auto metadata = ReadOldPartitionMetadata(lock); + if (!metadata) { + LOG(ERROR) << "Could not map source device due to missing or corrupt metadata"; + return false; + } + + auto old_name = GetOtherPartitionName(name); + auto slot_suffix = device_->GetSlotSuffix(); + auto slot = SlotNumberForSlotSuffix(slot_suffix); + + CreateLogicalPartitionParams params = { + .block_device = device_->GetSuperDevice(slot), + .metadata = metadata, + .partition_name = old_name, + .timeout_ms = timeout_ms, + .device_name = GetSourceDeviceName(name), + .partition_opener = &device_->GetPartitionOpener(), + }; + if (!CreateLogicalPartition(std::move(params), path)) { + LOG(ERROR) << "Could not create source device for snapshot " << name; + return false; + } + return true; +} + bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) { CHECK(lock); @@ -640,6 +683,8 @@ bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) { bool compression_enabled = false; + std::vector first_merge_group; + uint64_t total_cow_file_size = 0; DmTargetSnapshot::Status initial_target_values = {}; for (const auto& snapshot : snapshots) { @@ -658,6 +703,9 @@ bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) { total_cow_file_size += snapshot_status.cow_file_size(); compression_enabled |= snapshot_status.compression_enabled(); + if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) { + first_merge_group.emplace_back(snapshot); + } } if (cow_file_size) { @@ -671,14 +719,26 @@ bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) { initial_status.set_metadata_sectors(initial_target_values.metadata_sectors); initial_status.set_compression_enabled(compression_enabled); + // If any partitions shrunk, we need to merge them before we merge any other + // partitions (see b/177935716). Otherwise, a merge from another partition + // may overwrite the source block of a copy operation. + const std::vector* merge_group; + if (first_merge_group.empty()) { + merge_group = &snapshots; + initial_status.set_merge_phase(MergePhase::SECOND_PHASE); + } else { + merge_group = &first_merge_group; + initial_status.set_merge_phase(MergePhase::FIRST_PHASE); + } + // Point of no return - mark that we're starting a merge. From now on every - // snapshot must be a merge target. + // eligible snapshot must be a merge target. if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) { return false; } bool rewrote_all = true; - for (const auto& snapshot : snapshots) { + for (const auto& snapshot : *merge_group) { // If this fails, we have no choice but to continue. Everything must // be merged. This is not an ideal state to be in, but it is safe, // because we the next boot will try again. @@ -869,13 +929,13 @@ UpdateState SnapshotManager::CheckMergeState(const std::function& before UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, const std::function& before_cancel) { - UpdateState state = ReadUpdateState(lock); - switch (state) { + SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); + switch (update_status.state()) { case UpdateState::None: case UpdateState::MergeCompleted: // Harmless races are allowed between two callers of WaitForMerge, // so in both of these cases we just propagate the state. - return state; + return update_status.state(); case UpdateState::Merging: case UpdateState::MergeNeedsReboot: @@ -892,10 +952,10 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, if (HandleCancelledUpdate(lock, before_cancel)) { return UpdateState::Cancelled; } - return state; + return update_status.state(); default: - return state; + return update_status.state(); } std::vector snapshots; @@ -907,8 +967,9 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, bool failed = false; bool merging = false; bool needs_reboot = false; + bool wrong_phase = false; for (const auto& snapshot : snapshots) { - UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot); + UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot, update_status); switch (snapshot_state) { case UpdateState::MergeFailed: failed = true; @@ -924,6 +985,9 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, case UpdateState::Cancelled: cancelled = true; break; + case UpdateState::None: + wrong_phase = true; + break; default: LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": " << "\"" << snapshot_state << "\""; @@ -943,6 +1007,14 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, // it in WaitForMerge rather than here and elsewhere. return UpdateState::MergeFailed; } + if (wrong_phase) { + // If we got here, no other partitions are being merged, and nothing + // failed to merge. It's safe to move to the next merge phase. + if (!MergeSecondPhaseSnapshots(lock)) { + return UpdateState::MergeFailed; + } + return UpdateState::Merging; + } if (needs_reboot) { WriteUpdateState(lock, UpdateState::MergeNeedsReboot); return UpdateState::MergeNeedsReboot; @@ -958,7 +1030,8 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, return UpdateState::MergeCompleted; } -UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name) { +UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name, + const SnapshotUpdateStatus& update_status) { SnapshotStatus snapshot_status; if (!ReadSnapshotStatus(lock, name, &snapshot_status)) { return UpdateState::MergeFailed; @@ -980,7 +1053,7 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: // During a check, we decided the merge was complete, but we were unable to // collapse the device-mapper stack and perform COW cleanup. If we haven't // rebooted after this check, the device will still be a snapshot-merge - // target. If the have rebooted, the device will now be a linear target, + // target. If we have rebooted, the device will now be a linear target, // and we can try cleanup again. if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) { // NB: It's okay if this fails now, we gave cleanup our best effort. @@ -1001,6 +1074,12 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: if (!QuerySnapshotStatus(name, &target_type, &status)) { return UpdateState::MergeFailed; } + if (target_type == "snapshot" && + DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE && + update_status.merge_phase() == MergePhase::FIRST_PHASE) { + // The snapshot is not being merged because it's in the wrong phase. + return UpdateState::None; + } if (target_type != "snapshot-merge") { // We can get here if we failed to rewrite the target type in // InitiateMerge(). If we failed to create the target in first-stage @@ -1036,6 +1115,38 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: return UpdateState::MergeCompleted; } +bool SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { + std::vector snapshots; + if (!ListSnapshots(lock, &snapshots)) { + return UpdateState::MergeFailed; + } + + SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); + CHECK(update_status.state() == UpdateState::Merging); + CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE); + + update_status.set_merge_phase(MergePhase::SECOND_PHASE); + if (!WriteSnapshotUpdateStatus(lock, update_status)) { + return false; + } + + bool rewrote_all = true; + for (const auto& snapshot : snapshots) { + SnapshotStatus snapshot_status; + if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) { + return UpdateState::MergeFailed; + } + if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) { + continue; + } + if (!SwitchSnapshotToMerge(lock, snapshot)) { + LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot; + rewrote_all = false; + } + } + return rewrote_all; +} + std::string SnapshotManager::GetSnapshotBootIndicatorPath() { return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath); } @@ -1048,6 +1159,10 @@ std::string SnapshotManager::GetForwardMergeIndicatorPath() { return metadata_dir_ + "/allow-forward-merge"; } +std::string SnapshotManager::GetOldPartitionMetadataPath() { + return metadata_dir_ + "/old-partition-metadata"; +} + void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) { // It's not possible to remove update state in recovery, so write an // indicator that cleanup is needed on reboot. If a factory data reset @@ -1172,6 +1287,10 @@ bool SnapshotManager::CollapseSnapshotDevice(const std::string& name, if (!dm.DeleteDeviceIfExists(base_name)) { LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name; } + auto source_name = GetSourceDeviceName(name); + if (!dm.DeleteDeviceIfExists(source_name)) { + LOG(ERROR) << "Unable to delete source device for snapshot: " << source_name; + } return true; } @@ -1274,9 +1393,9 @@ bool SnapshotManager::PerformInitTransition(InitTransition transition, continue; } - std::string backing_device; - if (!dm.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &backing_device)) { - LOG(ERROR) << "Could not get device path for " << GetBaseDeviceName(snapshot); + std::string source_device; + if (!dm.GetDmDevicePathByName(GetSourceDeviceName(snapshot), &source_device)) { + LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot); continue; } @@ -1302,7 +1421,7 @@ bool SnapshotManager::PerformInitTransition(InitTransition transition, } if (transition == InitTransition::SELINUX_DETACH) { - auto message = misc_name + "," + cow_image_device + "," + backing_device; + auto message = misc_name + "," + cow_image_device + "," + source_device; snapuserd_argv->emplace_back(std::move(message)); // Do not attempt to connect to the new snapuserd yet, it hasn't @@ -1313,7 +1432,7 @@ bool SnapshotManager::PerformInitTransition(InitTransition transition, } uint64_t base_sectors = - snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, backing_device); + snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device); if (base_sectors == 0) { // Unrecoverable as metadata reads from cow device failed LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd"; @@ -1822,24 +1941,35 @@ bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock, } if (live_snapshot_status->compression_enabled()) { - auto name = GetDmUserCowName(params.GetPartitionName()); + // Get the source device (eg the view of the partition from before it was resized). + std::string source_device_path; + if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time, + &source_device_path)) { + LOG(ERROR) << "Could not map source device for: " << cow_name; + return false; + } + + auto source_device = GetSourceDeviceName(params.GetPartitionName()); + created_devices.EmplaceBack(&dm, source_device); + + if (!WaitForDevice(source_device_path, remaining_time)) { + return false; + } std::string cow_path; if (!GetMappedImageDevicePath(cow_name, &cow_path)) { LOG(ERROR) << "Could not determine path for: " << cow_name; return false; } - - // Ensure both |base_path| and |cow_path| are created, for snapuserd. - if (!WaitForDevice(base_path, remaining_time)) { - return false; - } if (!WaitForDevice(cow_path, remaining_time)) { return false; } + auto name = GetDmUserCowName(params.GetPartitionName()); + std::string new_cow_device; - if (!MapDmUserCow(lock, name, cow_path, base_path, remaining_time, &new_cow_device)) { + if (!MapDmUserCow(lock, name, cow_path, source_device_path, remaining_time, + &new_cow_device)) { LOG(ERROR) << "Could not map dm-user device for partition " << params.GetPartitionName(); return false; @@ -1883,12 +2013,18 @@ bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock, } auto& dm = DeviceMapper::Instance(); - std::string base_name = GetBaseDeviceName(target_partition_name); + auto base_name = GetBaseDeviceName(target_partition_name); if (!dm.DeleteDeviceIfExists(base_name)) { LOG(ERROR) << "Cannot delete base device: " << base_name; return false; } + auto source_name = GetSourceDeviceName(target_partition_name); + if (!dm.DeleteDeviceIfExists(source_name)) { + LOG(ERROR) << "Cannot delete source device: " << source_name; + return false; + } + LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name; return true; @@ -2480,6 +2616,24 @@ Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manife return Return::Error(); } + // If compression is enabled, we need to retain a copy of the old metadata + // so we can access original blocks in case they are moved around. We do + // not want to rely on the old super metadata slot because we don't + // guarantee its validity after the slot switch is successful. + if (cow_creator.compression_enabled) { + auto metadata = current_metadata->Export(); + if (!metadata) { + LOG(ERROR) << "Could not export current metadata"; + return Return::Error(); + } + + auto path = GetOldPartitionMetadataPath(); + if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) { + LOG(ERROR) << "Cannot write old metadata to " << path; + return Return::Error(); + } + } + SnapshotUpdateStatus status = {}; status.set_state(update_state); status.set_compression_enabled(cow_creator.compression_enabled); @@ -2580,6 +2734,15 @@ Return SnapshotManager::CreateUpdateSnapshotsInternal( continue; } + // Find the original partition size. + auto name = target_partition->name(); + auto old_partition_name = + name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix; + auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name); + if (old_partition) { + cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size()); + } + // Store these device sizes to snapshot status file. if (!CreateSnapshot(lock, &cow_creator_ret->snapshot_status)) { return Return::Error(); @@ -3267,5 +3430,26 @@ bool SnapshotManager::PerformSecondStageInitTransition() { return PerformInitTransition(InitTransition::SECOND_STAGE); } +const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) { + CHECK(lock); + + if (!old_partition_metadata_) { + auto path = GetOldPartitionMetadataPath(); + old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path); + if (!old_partition_metadata_) { + LOG(ERROR) << "Could not read old partition metadata from " << path; + return nullptr; + } + } + return old_partition_metadata_.get(); +} + +MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) { + if (status.compression_enabled() && status.device_size() < status.old_partition_size()) { + return MergePhase::FIRST_PHASE; + } + return MergePhase::SECOND_PHASE; +} + } // namespace snapshot } // namespace android diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 8b72022a6..4c209ecb4 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -234,7 +234,8 @@ class SnapshotTest : public ::testing::Test { .partition_opener = &opener, }; - auto result = sm->OpenSnapshotWriter(params, {}); + auto old_partition = "/dev/block/mapper/" + GetOtherPartitionName(name); + auto result = sm->OpenSnapshotWriter(params, {old_partition}); if (!result) { return AssertionFailure() << "Cannot open snapshot for writing: " << name; } diff --git a/fs_mgr/libsnapshot/utility.cpp b/fs_mgr/libsnapshot/utility.cpp index 7342fd434..4a2af1c10 100644 --- a/fs_mgr/libsnapshot/utility.cpp +++ b/fs_mgr/libsnapshot/utility.cpp @@ -187,5 +187,13 @@ bool IsCompressionEnabled() { return android::base::GetBoolProperty("ro.virtual_ab.compression.enabled", false); } +std::string GetOtherPartitionName(const std::string& name) { + auto suffix = android::fs_mgr::GetPartitionSlotSuffix(name); + CHECK(suffix == "_a" || suffix == "_b"); + + auto other_suffix = (suffix == "_a") ? "_b" : "_a"; + return name.substr(0, name.size() - suffix.size()) + other_suffix; +} + } // namespace snapshot } // namespace android diff --git a/fs_mgr/libsnapshot/utility.h b/fs_mgr/libsnapshot/utility.h index 3e6873b14..671de9dbc 100644 --- a/fs_mgr/libsnapshot/utility.h +++ b/fs_mgr/libsnapshot/utility.h @@ -131,5 +131,8 @@ void AppendExtent(google::protobuf::RepeatedPtrField