Perform a consistency check before deleting snapshots.
If for some reason the COW state is not fully synced to disk, but
dm-snapshot has flushed its pending merges, we do not want to delete
snapshots. Doing so could potentially leave blocks unmerged.
This situation is quite unexpected so we label it as a merge failure.
The device can recover by completely syncing the COW state, and then
rebooting, which will attempt to make forward progress on the merge.
Bug: 190582627
Test: vts_libsnapshot_test
full OTA on bramble
incremental OTA on bramble
Change-Id: Ib887f1d9e4397a712ed2f800cc1222cf9305a039
Merged-In: Ib887f1d9e4397a712ed2f800cc1222cf9305a039
This commit is contained in:
parent
028303d523
commit
91b351ea7b
4 changed files with 102 additions and 11 deletions
|
|
@ -158,6 +158,13 @@ enum MergeFailureCode {
|
|||
ExpectedMergeTarget = 11;
|
||||
UnmergedSectorsAfterCompletion = 12;
|
||||
UnexpectedMergeState = 13;
|
||||
GetCowPathConsistencyCheck = 14;
|
||||
OpenCowConsistencyCheck = 15;
|
||||
ParseCowConsistencyCheck = 16;
|
||||
OpenCowDirectConsistencyCheck = 17;
|
||||
MemAlignConsistencyCheck = 18;
|
||||
DirectReadConsistencyCheck = 19;
|
||||
WrongMergeCountConsistencyCheck = 20;
|
||||
};
|
||||
|
||||
// Next: 8
|
||||
|
|
|
|||
|
|
@ -143,12 +143,11 @@ class CowReader : public ICowReader {
|
|||
|
||||
void InitializeMerge();
|
||||
|
||||
// Number of copy, replace, and zero ops. Set if InitializeMerge is called.
|
||||
void set_total_data_ops(uint64_t size) { total_data_ops_ = size; }
|
||||
|
||||
uint64_t total_data_ops() { return total_data_ops_; }
|
||||
|
||||
// Number of copy ops. Set if InitializeMerge is called.
|
||||
void set_copy_ops(uint64_t size) { copy_ops_ = size; }
|
||||
|
||||
uint64_t total_copy_ops() { return copy_ops_; }
|
||||
|
||||
void CloseCowFd() { owned_fd_ = {}; }
|
||||
|
|
|
|||
|
|
@ -603,6 +603,8 @@ class SnapshotManager final : public ISnapshotManager {
|
|||
MergeResult CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel);
|
||||
MergeResult CheckTargetMergeState(LockedFile* lock, const std::string& name,
|
||||
const SnapshotUpdateStatus& update_status);
|
||||
MergeFailureCode CheckMergeConsistency(LockedFile* lock, const std::string& name,
|
||||
const SnapshotStatus& update_status);
|
||||
|
||||
// Interact with status files under /metadata/ota/snapshots.
|
||||
bool WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status);
|
||||
|
|
|
|||
|
|
@ -1126,6 +1126,11 @@ auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string&
|
|||
return MergeResult(UpdateState::Merging);
|
||||
}
|
||||
|
||||
auto code = CheckMergeConsistency(lock, name, snapshot_status);
|
||||
if (code != MergeFailureCode::Ok) {
|
||||
return MergeResult(UpdateState::MergeFailed, code);
|
||||
}
|
||||
|
||||
// Merging is done. First, update the status file to indicate the merge
|
||||
// is complete. We do this before calling OnSnapshotMergeComplete, even
|
||||
// though this means the write is potentially wasted work (since in the
|
||||
|
|
@ -1144,6 +1149,91 @@ auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string&
|
|||
return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
|
||||
}
|
||||
|
||||
// This returns the backing device, not the dm-user layer.
|
||||
static std::string GetMappedCowDeviceName(const std::string& snapshot,
|
||||
const SnapshotStatus& status) {
|
||||
// If no partition was created (the COW exists entirely on /data), the
|
||||
// device-mapper layering is different than if we had a partition.
|
||||
if (status.cow_partition_size() == 0) {
|
||||
return GetCowImageDeviceName(snapshot);
|
||||
}
|
||||
return GetCowName(snapshot);
|
||||
}
|
||||
|
||||
MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
|
||||
const SnapshotStatus& status) {
|
||||
CHECK(lock);
|
||||
|
||||
if (!status.compression_enabled()) {
|
||||
// Do not try to verify old-style COWs yet.
|
||||
return MergeFailureCode::Ok;
|
||||
}
|
||||
|
||||
auto& dm = DeviceMapper::Instance();
|
||||
|
||||
std::string cow_image_name = GetMappedCowDeviceName(name, status);
|
||||
std::string cow_image_path;
|
||||
if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
|
||||
LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
|
||||
return MergeFailureCode::GetCowPathConsistencyCheck;
|
||||
}
|
||||
|
||||
// First pass, count # of ops.
|
||||
size_t num_ops = 0;
|
||||
{
|
||||
unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
|
||||
if (fd < 0) {
|
||||
PLOG(ERROR) << "Failed to open " << cow_image_name;
|
||||
return MergeFailureCode::OpenCowConsistencyCheck;
|
||||
}
|
||||
|
||||
CowReader reader;
|
||||
if (!reader.Parse(std::move(fd))) {
|
||||
LOG(ERROR) << "Failed to parse cow " << cow_image_path;
|
||||
return MergeFailureCode::ParseCowConsistencyCheck;
|
||||
}
|
||||
|
||||
for (auto iter = reader.GetOpIter(); !iter->Done(); iter->Next()) {
|
||||
if (!IsMetadataOp(iter->Get())) {
|
||||
num_ops++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass, try as hard as we can to get the actual number of blocks
|
||||
// the system thinks is merged.
|
||||
unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
|
||||
if (fd < 0) {
|
||||
PLOG(ERROR) << "Failed to open direct " << cow_image_name;
|
||||
return MergeFailureCode::OpenCowDirectConsistencyCheck;
|
||||
}
|
||||
|
||||
void* addr;
|
||||
size_t page_size = getpagesize();
|
||||
if (posix_memalign(&addr, page_size, page_size) < 0) {
|
||||
PLOG(ERROR) << "posix_memalign with page size " << page_size;
|
||||
return MergeFailureCode::MemAlignConsistencyCheck;
|
||||
}
|
||||
|
||||
// COWs are always at least 2MB, this is guaranteed in snapshot creation.
|
||||
std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
|
||||
if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
|
||||
PLOG(ERROR) << "Direct read failed " << cow_image_name;
|
||||
return MergeFailureCode::DirectReadConsistencyCheck;
|
||||
}
|
||||
|
||||
auto header = reinterpret_cast<CowHeader*>(buffer.get());
|
||||
if (header->num_merge_ops != num_ops) {
|
||||
LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
|
||||
<< "but " << header->num_merge_ops << " were actually recorded.";
|
||||
LOG(ERROR) << "Aborting merge progress for snapshot " << name
|
||||
<< ", will try again next boot";
|
||||
return MergeFailureCode::WrongMergeCountConsistencyCheck;
|
||||
}
|
||||
|
||||
return MergeFailureCode::Ok;
|
||||
}
|
||||
|
||||
MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
|
||||
std::vector<std::string> snapshots;
|
||||
if (!ListSnapshots(lock, &snapshots)) {
|
||||
|
|
@ -1429,14 +1519,7 @@ bool SnapshotManager::PerformInitTransition(InitTransition transition,
|
|||
continue;
|
||||
}
|
||||
|
||||
// If no partition was created (the COW exists entirely on /data), the
|
||||
// device-mapper layering is different than if we had a partition.
|
||||
std::string cow_image_name;
|
||||
if (snapshot_status.cow_partition_size() == 0) {
|
||||
cow_image_name = GetCowImageDeviceName(snapshot);
|
||||
} else {
|
||||
cow_image_name = GetCowName(snapshot);
|
||||
}
|
||||
std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
|
||||
|
||||
std::string cow_image_device;
|
||||
if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue