From f4d58b483e22206fd94f2f7029cb360d8353996c Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 19 Mar 2020 15:56:51 -0700 Subject: [PATCH] libsnapshot: Add a simple script and gtest to simulate power failures during snapshot-merge. This test is pretty straight forward: create two large files, then merge one into the other while injecting kernel crashes. At the end, block-by-block validation is performed against both the snapshot-merge target and the file written to disk. Bug: 139092356 Test: instructions in PowerTest.md Change-Id: I28cc88160cf824039966abff4e61382090a354c5 --- fs_mgr/libsnapshot/Android.bp | 16 + fs_mgr/libsnapshot/PowerTest.md | 40 ++ fs_mgr/libsnapshot/power_test.cpp | 559 +++++++++++++++++++++++++++ fs_mgr/libsnapshot/run_power_test.sh | 35 ++ 4 files changed, 650 insertions(+) create mode 100644 fs_mgr/libsnapshot/PowerTest.md create mode 100644 fs_mgr/libsnapshot/power_test.cpp create mode 100755 fs_mgr/libsnapshot/run_power_test.sh diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp index 996fbcae6..e7de84c0c 100644 --- a/fs_mgr/libsnapshot/Android.bp +++ b/fs_mgr/libsnapshot/Android.bp @@ -226,3 +226,19 @@ cc_binary { "libutils", ], } + +cc_test { + name: "snapshot_power_test", + srcs: [ + "power_test.cpp", + ], + static_libs: [ + "libsnapshot", + ], + shared_libs: [ + "libbase", + "libfs_mgr_binder", + "liblog", + ], + gtest: false, +} diff --git a/fs_mgr/libsnapshot/PowerTest.md b/fs_mgr/libsnapshot/PowerTest.md new file mode 100644 index 000000000..0b0cb5dbf --- /dev/null +++ b/fs_mgr/libsnapshot/PowerTest.md @@ -0,0 +1,40 @@ +snapshot\_power\_test +--------------------- + +snapshot\_power\_test is a standalone test to simulate power failures during a snapshot-merge operation. + +### Test Setup + +Start by creating two large files that will be used as the pre-merge and post-merge state. You can take two different partition images (for example, a product.img from two separate builds), or just create random data: + + dd if=/dev/urandom of=pre-merge count=1024 bs=1048576 + dd if=/dev/urandom of=post-merge count=1024 bs=1048576 + +Next, push these files to an unencrypted directory on the device: + + adb push pre-merge /data/local/unencrypted + adb push post-merge /data/local/unencrypted + +Next, run the test setup: + + adb sync data + adb shell /data/nativetest64/snapshot_power_test/snapshot_power_test \ + /data/local/unencrypted/pre-merge \ + /data/local/unencrypted/post-merge + +This will create the necessary fiemap-based images. + +### Running +The actual test can be run via `run_power_test.sh`. Its syntax is: + + run_power_test.sh + +`POST_MERGE_FILE` should be the path on the device of the image to validate the merge against. Example: + + run_power_test.sh /data/local/unencrypted/post-merge + +The device will begin the merge with a 5% chance of injecting a kernel crash every 10ms. The device should be capable of rebooting normally without user intervention. Once the merge has completed, the test will run a final check command to validate the contents of the snapshot against the post-merge file. It will error if there are any incorrect blocks. + +Two environment variables can be passed to `run_power_test.sh`: +1. `FAIL_RATE` - A fraction between 0 and 100 (inclusive) indicating the probability the device should inject a kernel crash every 10ms. +2. `DEVICE_SERIAL` - If multiple devices are attached to adb, this argument is passed as the serial to select (to `adb -s`). diff --git a/fs_mgr/libsnapshot/power_test.cpp b/fs_mgr/libsnapshot/power_test.cpp new file mode 100644 index 000000000..4d2548ae9 --- /dev/null +++ b/fs_mgr/libsnapshot/power_test.cpp @@ -0,0 +1,559 @@ +// +// Copyright (C) 2020 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std::chrono_literals; +using namespace std::string_literals; +using android::base::borrowed_fd; +using android::base::unique_fd; +using android::dm::DeviceMapper; +using android::dm::DmDeviceState; +using android::dm::DmTable; +using android::dm::DmTargetSnapshot; +using android::dm::SnapshotStorageMode; +using android::fiemap::ImageManager; +using android::fs_mgr::Fstab; + +namespace android { +namespace snapshot { + +static void usage() { + std::cerr << "Usage:\n"; + std::cerr << " create \n"; + std::cerr << "\n"; + std::cerr << " Create a snapshot device containing the contents of\n"; + std::cerr << " orig-payload, and then write the contents of new-payload.\n"; + std::cerr << " The original files are not modified.\n"; + std::cerr << "\n"; + std::cerr << " merge \n"; + std::cerr << "\n"; + std::cerr << " Merge the snapshot previously started by create, and wait\n"; + std::cerr << " for it to complete. Once done, it is compared to the\n"; + std::cerr << " new-payload for consistency. The original files are not \n"; + std::cerr << " modified. If a fail-rate is passed (as a fraction between 0\n"; + std::cerr << " and 100), every 10ms the device has that percent change of\n"; + std::cerr << " injecting a kernel crash.\n"; + std::cerr << "\n"; + std::cerr << " check \n"; + std::cerr << " Verify that all artifacts are correct after a merge\n"; + std::cerr << " completes.\n"; + std::cerr << "\n"; + std::cerr << " cleanup\n"; + std::cerr << " Remove all ImageManager artifacts from create/merge.\n"; +} + +class PowerTest final { + public: + PowerTest(); + bool Run(int argc, char** argv); + + private: + bool OpenImageManager(); + bool Create(int argc, char** argv); + bool Merge(int argc, char** argv); + bool Check(int argc, char** argv); + bool Cleanup(); + bool CleanupImage(const std::string& name); + bool SetupImages(const std::string& first_file, borrowed_fd second_fd); + bool MapImages(); + bool MapSnapshot(SnapshotStorageMode mode); + bool GetMergeStatus(DmTargetSnapshot::Status* status); + + static constexpr char kSnapshotName[] = "snapshot-power-test"; + static constexpr char kSnapshotImageName[] = "snapshot-power-test-image"; + static constexpr char kSnapshotCowName[] = "snapshot-power-test-cow"; + + DeviceMapper& dm_; + std::unique_ptr images_; + std::string image_path_; + std::string cow_path_; + std::string snapshot_path_; +}; + +PowerTest::PowerTest() : dm_(DeviceMapper::Instance()) {} + +bool PowerTest::Run([[maybe_unused]] int argc, [[maybe_unused]] char** argv) { + if (!OpenImageManager()) { + return false; + } + + if (argc < 2) { + usage(); + return false; + } + if (argv[1] == "create"s) { + return Create(argc, argv); + } else if (argv[1] == "merge"s) { + return Merge(argc, argv); + } else if (argv[1] == "check"s) { + return Check(argc, argv); + } else if (argv[1] == "cleanup"s) { + return Cleanup(); + } else { + usage(); + return false; + } +} + +bool PowerTest::OpenImageManager() { + std::vector dirs = { + "/data/gsi/test", + "/metadata/gsi/test", + }; + for (const auto& dir : dirs) { + if (mkdir(dir.c_str(), 0700) && errno != EEXIST) { + std::cerr << "mkdir " << dir << ": " << strerror(errno) << "\n"; + return false; + } + } + + images_ = ImageManager::Open("/metadata/gsi/test", "/data/gsi/test"); + if (!images_) { + std::cerr << "Could not open ImageManager\n"; + return false; + } + return true; +} + +bool PowerTest::Create(int argc, char** argv) { + if (argc < 4) { + usage(); + return false; + } + + std::string first = argv[2]; + std::string second = argv[3]; + + unique_fd second_fd(open(second.c_str(), O_RDONLY)); + if (second_fd < 0) { + std::cerr << "open " << second << ": " << strerror(errno) << "\n"; + return false; + } + + if (!Cleanup()) { + return false; + } + if (!SetupImages(first, second_fd)) { + return false; + } + if (!MapSnapshot(SnapshotStorageMode::Persistent)) { + return false; + } + + struct stat s; + if (fstat(second_fd, &s)) { + std::cerr << "fstat " << second << ": " << strerror(errno) << "\n"; + return false; + } + + unique_fd snap_fd(open(snapshot_path_.c_str(), O_WRONLY)); + if (snap_fd < 0) { + std::cerr << "open " << snapshot_path_ << ": " << strerror(errno) << "\n"; + return false; + } + + uint8_t chunk[4096]; + uint64_t written = 0; + while (written < s.st_size) { + uint64_t remaining = s.st_size - written; + size_t bytes = (size_t)std::min((uint64_t)sizeof(chunk), remaining); + if (!android::base::ReadFully(second_fd, chunk, bytes)) { + std::cerr << "read " << second << ": " << strerror(errno) << "\n"; + return false; + } + if (!android::base::WriteFully(snap_fd, chunk, bytes)) { + std::cerr << "write " << snapshot_path_ << ": " << strerror(errno) << "\n"; + return false; + } + written += bytes; + } + if (fsync(snap_fd)) { + std::cerr << "fsync: " << strerror(errno) << "\n"; + return false; + } + + sync(); + + snap_fd = {}; + if (!dm_.DeleteDeviceIfExists(kSnapshotName)) { + std::cerr << "could not delete dm device " << kSnapshotName << "\n"; + return false; + } + if (!images_->UnmapImageIfExists(kSnapshotImageName)) { + std::cerr << "failed to unmap " << kSnapshotImageName << "\n"; + return false; + } + if (!images_->UnmapImageIfExists(kSnapshotCowName)) { + std::cerr << "failed to unmap " << kSnapshotImageName << "\n"; + return false; + } + return true; +} + +bool PowerTest::Cleanup() { + if (!dm_.DeleteDeviceIfExists(kSnapshotName)) { + std::cerr << "could not delete dm device " << kSnapshotName << "\n"; + return false; + } + if (!CleanupImage(kSnapshotImageName) || !CleanupImage(kSnapshotCowName)) { + return false; + } + return true; +} + +bool PowerTest::CleanupImage(const std::string& name) { + if (!images_->UnmapImageIfExists(name)) { + std::cerr << "failed to unmap " << name << "\n"; + return false; + } + if (images_->BackingImageExists(name) && !images_->DeleteBackingImage(name)) { + std::cerr << "failed to delete " << name << "\n"; + return false; + } + return true; +} + +bool PowerTest::SetupImages(const std::string& first, borrowed_fd second_fd) { + unique_fd first_fd(open(first.c_str(), O_RDONLY)); + if (first_fd < 0) { + std::cerr << "open " << first << ": " << strerror(errno) << "\n"; + return false; + } + + struct stat s1, s2; + if (fstat(first_fd.get(), &s1)) { + std::cerr << "first stat: " << strerror(errno) << "\n"; + return false; + } + if (fstat(second_fd.get(), &s2)) { + std::cerr << "second stat: " << strerror(errno) << "\n"; + return false; + } + + // Pick the bigger size of both images, rounding up to the nearest block. + uint64_t s1_size = (s1.st_size + 4095) & ~uint64_t(4095); + uint64_t s2_size = (s2.st_size + 4095) & ~uint64_t(4095); + uint64_t image_size = std::max(s1_size, s2_size) + (1024 * 1024 * 128); + if (!images_->CreateBackingImage(kSnapshotImageName, image_size, 0, nullptr)) { + std::cerr << "failed to create " << kSnapshotImageName << "\n"; + return false; + } + // Use the same size for the cow. + if (!images_->CreateBackingImage(kSnapshotCowName, image_size, 0, nullptr)) { + std::cerr << "failed to create " << kSnapshotCowName << "\n"; + return false; + } + if (!MapImages()) { + return false; + } + + unique_fd image_fd(open(image_path_.c_str(), O_WRONLY)); + if (image_fd < 0) { + std::cerr << "open: " << image_path_ << ": " << strerror(errno) << "\n"; + return false; + } + + uint8_t chunk[4096]; + uint64_t written = 0; + while (written < s1.st_size) { + uint64_t remaining = s1.st_size - written; + size_t bytes = (size_t)std::min((uint64_t)sizeof(chunk), remaining); + if (!android::base::ReadFully(first_fd, chunk, bytes)) { + std::cerr << "read: " << strerror(errno) << "\n"; + return false; + } + if (!android::base::WriteFully(image_fd, chunk, bytes)) { + std::cerr << "write: " << strerror(errno) << "\n"; + return false; + } + written += bytes; + } + if (fsync(image_fd)) { + std::cerr << "fsync: " << strerror(errno) << "\n"; + return false; + } + + // Zero the first block of the COW. + unique_fd cow_fd(open(cow_path_.c_str(), O_WRONLY)); + if (cow_fd < 0) { + std::cerr << "open: " << cow_path_ << ": " << strerror(errno) << "\n"; + return false; + } + + memset(chunk, 0, sizeof(chunk)); + if (!android::base::WriteFully(cow_fd, chunk, sizeof(chunk))) { + std::cerr << "read: " << strerror(errno) << "\n"; + return false; + } + if (fsync(cow_fd)) { + std::cerr << "fsync: " << strerror(errno) << "\n"; + return false; + } + return true; +} + +bool PowerTest::MapImages() { + if (!images_->MapImageDevice(kSnapshotImageName, 10s, &image_path_)) { + std::cerr << "failed to map " << kSnapshotImageName << "\n"; + return false; + } + if (!images_->MapImageDevice(kSnapshotCowName, 10s, &cow_path_)) { + std::cerr << "failed to map " << kSnapshotCowName << "\n"; + return false; + } + return true; +} + +bool PowerTest::MapSnapshot(SnapshotStorageMode mode) { + uint64_t sectors; + { + unique_fd fd(open(image_path_.c_str(), O_RDONLY)); + if (fd < 0) { + std::cerr << "open: " << image_path_ << ": " << strerror(errno) << "\n"; + return false; + } + sectors = get_block_device_size(fd) / 512; + } + + DmTable table; + table.Emplace(0, sectors, image_path_, cow_path_, mode, 8); + if (!dm_.CreateDevice(kSnapshotName, table, &snapshot_path_, 10s)) { + std::cerr << "failed to create snapshot device\n"; + return false; + } + return true; +} + +bool PowerTest::GetMergeStatus(DmTargetSnapshot::Status* status) { + std::vector targets; + if (!dm_.GetTableStatus(kSnapshotName, &targets)) { + std::cerr << "failed to get merge status\n"; + return false; + } + if (targets.size() != 1) { + std::cerr << "merge device has wrong number of targets\n"; + return false; + } + if (!DmTargetSnapshot::ParseStatusText(targets[0].data, status)) { + std::cerr << "could not parse merge target status text\n"; + return false; + } + return true; +} + +static std::string GetUserdataBlockDeviceName() { + Fstab fstab; + if (!ReadFstabFromFile("/proc/mounts", &fstab)) { + return {}; + } + + auto entry = android::fs_mgr::GetEntryForMountPoint(&fstab, "/data"); + if (!entry) { + return {}; + } + + auto prefix = "/dev/block/"s; + if (!android::base::StartsWith(entry->blk_device, prefix)) { + return {}; + } + return entry->blk_device.substr(prefix.size()); +} + +bool PowerTest::Merge(int argc, char** argv) { + // Start an f2fs GC to really stress things. :TODO: figure out data device + auto userdata_dev = GetUserdataBlockDeviceName(); + if (userdata_dev.empty()) { + std::cerr << "could not locate userdata block device\n"; + return false; + } + + auto cmd = + android::base::StringPrintf("echo 1 > /sys/fs/f2fs/%s/gc_urgent", userdata_dev.c_str()); + system(cmd.c_str()); + + if (dm_.GetState(kSnapshotName) == DmDeviceState::INVALID) { + if (!MapImages()) { + return false; + } + if (!MapSnapshot(SnapshotStorageMode::Merge)) { + return false; + } + } + + std::random_device r; + std::default_random_engine re(r()); + std::uniform_real_distribution dist(0.0, 100.0); + + std::optional failure_rate; + if (argc >= 3) { + double d; + if (!android::base::ParseDouble(argv[2], &d)) { + std::cerr << "Could not parse failure rate as double: " << argv[2] << "\n"; + return false; + } + failure_rate = d; + } + + while (true) { + DmTargetSnapshot::Status status; + if (!GetMergeStatus(&status)) { + return false; + } + if (!status.error.empty()) { + std::cerr << "merge reported error: " << status.error << "\n"; + return false; + } + if (status.sectors_allocated == status.metadata_sectors) { + break; + } + + std::cerr << status.sectors_allocated << " / " << status.metadata_sectors << "\n"; + + if (failure_rate && *failure_rate >= dist(re)) { + system("echo 1 > /proc/sys/kernel/sysrq"); + system("echo c > /proc/sysrq-trigger"); + } + + std::this_thread::sleep_for(10ms); + } + + std::cout << "Merge completed.\n"; + return true; +} + +bool PowerTest::Check([[maybe_unused]] int argc, [[maybe_unused]] char** argv) { + if (argc < 3) { + std::cerr << "Expected argument: \n"; + return false; + } + std::string md_path, image_path; + std::string canonical_path = argv[2]; + + if (!dm_.GetDmDevicePathByName(kSnapshotName, &md_path)) { + std::cerr << "could not get dm-path for merge device\n"; + return false; + } + if (!images_->GetMappedImageDevice(kSnapshotImageName, &image_path)) { + std::cerr << "could not get image path\n"; + return false; + } + + unique_fd md_fd(open(md_path.c_str(), O_RDONLY)); + if (md_fd < 0) { + std::cerr << "open: " << md_path << ": " << strerror(errno) << "\n"; + return false; + } + unique_fd image_fd(open(image_path.c_str(), O_RDONLY)); + if (image_fd < 0) { + std::cerr << "open: " << image_path << ": " << strerror(errno) << "\n"; + return false; + } + unique_fd canonical_fd(open(canonical_path.c_str(), O_RDONLY)); + if (canonical_fd < 0) { + std::cerr << "open: " << canonical_path << ": " << strerror(errno) << "\n"; + return false; + } + + struct stat s; + if (fstat(canonical_fd, &s)) { + std::cerr << "fstat: " << canonical_path << ": " << strerror(errno) << "\n"; + return false; + } + uint64_t canonical_size = s.st_size; + uint64_t md_size = get_block_device_size(md_fd); + uint64_t image_size = get_block_device_size(image_fd); + if (image_size != md_size) { + std::cerr << "image size does not match merge device size\n"; + return false; + } + if (canonical_size > image_size) { + std::cerr << "canonical size " << canonical_size << " is greater than image size " + << image_size << "\n"; + return false; + } + + constexpr size_t kBlockSize = 4096; + uint8_t canonical_buffer[kBlockSize]; + uint8_t image_buffer[kBlockSize]; + uint8_t md_buffer[kBlockSize]; + + uint64_t remaining = canonical_size; + uint64_t blockno = 0; + while (remaining) { + size_t bytes = (size_t)std::min((uint64_t)kBlockSize, remaining); + if (!android::base::ReadFully(canonical_fd, canonical_buffer, bytes)) { + std::cerr << "read: " << canonical_buffer << ": " << strerror(errno) << "\n"; + return false; + } + if (!android::base::ReadFully(image_fd, image_buffer, bytes)) { + std::cerr << "read: " << image_buffer << ": " << strerror(errno) << "\n"; + return false; + } + if (!android::base::ReadFully(md_fd, md_buffer, bytes)) { + std::cerr << "read: " << md_buffer << ": " << strerror(errno) << "\n"; + return false; + } + if (memcmp(canonical_buffer, image_buffer, bytes)) { + std::cerr << "canonical and image differ at block " << blockno << "\n"; + return false; + } + if (memcmp(canonical_buffer, md_buffer, bytes)) { + std::cerr << "canonical and image differ at block " << blockno << "\n"; + return false; + } + + remaining -= bytes; + blockno++; + } + + std::cout << "Images all match.\n"; + return true; +} + +} // namespace snapshot +} // namespace android + +int main(int argc, char** argv) { + android::snapshot::PowerTest test; + + if (!test.Run(argc, argv)) { + std::cerr << "Unexpected error running test." << std::endl; + return 1; + } + fflush(stdout); + return 0; +} diff --git a/fs_mgr/libsnapshot/run_power_test.sh b/fs_mgr/libsnapshot/run_power_test.sh new file mode 100755 index 000000000..dc03dc975 --- /dev/null +++ b/fs_mgr/libsnapshot/run_power_test.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +set -e + +if [ -z "$FAIL_RATE" ]; then + FAIL_RATE=5.0 +fi +if [ ! -z "$ANDROID_SERIAL" ]; then + DEVICE_ARGS=-s $ANDROID_SERIAL +else + DEVICE_ARGS= +fi + +TEST_BIN=/data/nativetest64/snapshot_power_test/snapshot_power_test + +while : +do + adb $DEVICE_ARGS wait-for-device + adb $DEVICE_ARGS root + adb $DEVICE_ARGS shell rm $TEST_BIN + adb $DEVICE_ARGS sync data + set +e + output=$(adb $DEVICE_ARGS shell $TEST_BIN merge $FAIL_RATE 2>&1) + set -e + if [[ "$output" == *"Merge completed"* ]]; then + echo "Merge completed." + break + fi + if [[ "$output" == *"Unexpected error"* ]]; then + echo "Unexpected error." + exit 1 + fi +done + +adb $DEVICE_ARGS shell $TEST_BIN check $1