From f57fd97df39adeac7bd58d57ed6d8297ee0bfd10 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 28 Oct 2020 17:02:25 -0700 Subject: [PATCH 1/3] libsnapshot: Add support for first-to-second-stage transitions of snapuserd. This patch introduces the fundamentals needed to support booting off dm-user. First, a method has been added to start snapuserd in first-stage init. It simply forks and execs, creates a specially named first-stage socket, then waits for requests. Next, a new method has been added to SnapshotManager to perform a second-stage handoff. This works by first launching a second copy of snapuserd using init's normal service management functionality. The new snapuserd runs alongside the original, but has correct privileges and a correct selinux context. Next, we inspect each COW device, and if its table uses dm-user, we replace the table with a renamed control device. The new control device is bound to the new snapuserd. device-mapper guarantees that such a table swap is safe. It flushes I/O to the old table and then replaces it with the new table. Once the new table is in place, the old dm-user control devices are automatically destroyed. Thus, once all dm-user devices has been transitioned, the first-stage daemon is idle and can gracefully exit. This patch does not modify init. A few changes will be needed on top of this patch: (1) CreateLogicalAndSnapshotPartitions will need further changes to start the first-stage daemon and track its pid. Additionally, it will need to ensure the named socket file is deleted, so there is no further IPC allowed after partitions are completed. (2) init will need to propagate the pid to second-stage init so the process can be killed (or signalled). (3) first-stage snapuserd will need to gracefully exit once it has no active handler threads. (4) second-stage init will need to invoke the transition helper on SnapshotMaanager, ideally as soon as feasible. Bug: 168259959 Test: manual test Change-Id: I54dec2edf85ed95f11ab4518eb3d7dbaf0bdcbfd --- fs_mgr/libsnapshot/Android.bp | 5 +- .../include/libsnapshot/snapshot.h | 19 +- .../include/libsnapshot/snapuserd_client.h | 8 + .../include_test/libsnapshot/test_helpers.h | 1 + fs_mgr/libsnapshot/snapshot.cpp | 196 ++++++++++++++++-- fs_mgr/libsnapshot/snapshot_test.cpp | 59 +++++- fs_mgr/libsnapshot/snapuserd_client.cpp | 19 ++ fs_mgr/libsnapshot/snapuserd_server.cpp | 4 + fs_mgr/libsnapshot/utility.cpp | 2 +- fs_mgr/libsnapshot/utility.h | 2 +- 10 files changed, 295 insertions(+), 20 deletions(-) diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp index 059a469a2..f1b00311c 100644 --- a/fs_mgr/libsnapshot/Android.bp +++ b/fs_mgr/libsnapshot/Android.bp @@ -31,15 +31,17 @@ cc_defaults { "libbrotli", "libdm", "libfstab", - "libsnapshot_cow", "update_metadata-protos", ], whole_static_libs: [ + "libbrotli", "libcutils", "libext2_uuid", "libext4_utils", "libfstab", + "libsnapshot_cow", "libsnapshot_snapuserd", + "libz", ], header_libs: [ "libchrome", @@ -432,6 +434,7 @@ cc_binary { init_rc: [ "snapuserd.rc", ], + static_executable: true, } cc_binary { diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 35ed04a29..9dbbbd7c0 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -77,6 +78,7 @@ class SnapshotMergeStats; class SnapshotStatus; static constexpr const std::string_view kCowGroupName = "cow"; +static constexpr char kVirtualAbCompressionProp[] = "ro.virtual_ab.compression.enabled"; bool OptimizeSourceCopyOperation(const chromeos_update_engine::InstallOperation& operation, chromeos_update_engine::InstallOperation* optimized); @@ -104,6 +106,7 @@ class ISnapshotManager { android::hardware::boot::V1_1::MergeStatus status) = 0; virtual bool SetSlotAsUnbootable(unsigned int slot) = 0; virtual bool IsRecovery() const = 0; + virtual bool IsTestDevice() const { return false; } }; virtual ~ISnapshotManager() = default; @@ -303,6 +306,14 @@ class SnapshotManager final : public ISnapshotManager { // Helper function for second stage init to restorecon on the rollback indicator. static std::string GetGlobalRollbackIndicatorPath(); + // Initiate the transition from first-stage to second-stage snapuserd. This + // process involves re-creating the dm-user table entries for each device, + // so that they connect to the new daemon. Once all new tables have been + // activated, we ask the first-stage daemon to cleanly exit. + // + // The caller must pass a function which starts snapuserd. + bool PerformSecondStageTransition(); + // ISnapshotManager overrides. bool BeginUpdate() override; bool CancelUpdate() override; @@ -345,6 +356,7 @@ class SnapshotManager final : public ISnapshotManager { FRIEND_TEST(SnapshotTest, Merge); FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot); FRIEND_TEST(SnapshotTest, UpdateBootControlHal); + FRIEND_TEST(SnapshotUpdateTest, DaemonTransition); FRIEND_TEST(SnapshotUpdateTest, DataWipeAfterRollback); FRIEND_TEST(SnapshotUpdateTest, DataWipeRollbackInRecovery); FRIEND_TEST(SnapshotUpdateTest, FullUpdateFlow); @@ -372,11 +384,13 @@ class SnapshotManager final : public ISnapshotManager { // Ensure we're connected to snapuserd. bool EnsureSnapuserdConnected(); - // Helper for first-stage init. + // Helpers for first-stage init. bool ForceLocalImageManager(); + const std::unique_ptr& device() const { return device_; } - // Helper function for tests. + // Helper functions for tests. IImageManager* image_manager() const { return images_.get(); } + void set_use_first_stage_snapuserd(bool value) { use_first_stage_snapuserd_ = value; } // Since libsnapshot is included into multiple processes, we flock() our // files for simple synchronization. LockedFile is a helper to assist with @@ -660,6 +674,7 @@ class SnapshotManager final : public ISnapshotManager { std::unique_ptr device_; std::unique_ptr images_; bool has_local_image_manager_ = false; + bool use_first_stage_snapuserd_ = false; bool in_factory_data_reset_ = false; std::unique_ptr snapuserd_client_; }; diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h index 0bbdaa583..aaec229b6 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include #include @@ -31,9 +33,15 @@ static constexpr uint32_t PACKET_SIZE = 512; static constexpr char kSnapuserdSocketFirstStage[] = "snapuserd_first_stage"; static constexpr char kSnapuserdSocket[] = "snapuserd"; +static constexpr char kSnapuserdFirstStagePidVar[] = "FIRST_STAGE_SNAPUSERD_PID"; + // Ensure that the second-stage daemon for snapuserd is running. bool EnsureSnapuserdStarted(); +// Start the first-stage version of snapuserd, returning its pid. This is used +// by first-stage init, as well as vts_libsnapshot_test. On failure, -1 is returned. +pid_t StartFirstStageSnapuserd(); + class SnapuserdClient { private: android::base::unique_fd sockfd_; diff --git a/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h b/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h index 197aeaa05..7aef086bb 100644 --- a/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h +++ b/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h @@ -95,6 +95,7 @@ class TestDeviceInfo : public SnapshotManager::IDeviceInfo { unbootable_slots_.insert(slot); return true; } + bool IsTestDevice() const override { return true; } bool IsSlotUnbootable(uint32_t slot) { return unbootable_slots_.count(slot) != 0; } diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index 7061d5665..b6c0a293c 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -31,8 +31,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -100,6 +102,12 @@ std::unique_ptr SnapshotManager::NewForFirstStageMount(IDeviceI if (!sm || !sm->ForceLocalImageManager()) { return nullptr; } + + // The first-stage version of snapuserd is explicitly started by init. Do + // not attempt to using it during tests (which run in normal AOSP). + if (!sm->device()->IsTestDevice()) { + sm->use_first_stage_snapuserd_ = true; + } return sm; } @@ -400,8 +408,15 @@ bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name, base_sectors = dev_size / kSectorSize; } + // Use an extra decoration for first-stage init, so we can transition + // to a new table entry in second-stage. + std::string misc_name = name; + if (use_first_stage_snapuserd_) { + misc_name += "-init"; + } + DmTable table; - table.Emplace(0, base_sectors, name); + table.Emplace(0, base_sectors, misc_name); if (!dm.CreateDevice(name, table, path, timeout_ms)) { return false; } @@ -410,7 +425,7 @@ bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name, return false; } - auto control_device = "/dev/dm-user/" + name; + auto control_device = "/dev/dm-user/" + misc_name; return snapuserd_client_->InitializeSnapuserd(cow_file, base_device, control_device); } @@ -1284,6 +1299,107 @@ bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock, return RemoveAllUpdateState(lock, before_cancel); } +bool SnapshotManager::PerformSecondStageTransition() { + LOG(INFO) << "Performing second-stage transition for snapuserd."; + + // Don't use EnsuerSnapuserdConnected() because this is called from init, + // and attempting to do so will deadlock. + if (!snapuserd_client_) { + snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s); + if (!snapuserd_client_) { + LOG(ERROR) << "Unable to connect to snapuserd"; + return false; + } + } + + auto& dm = DeviceMapper::Instance(); + + auto lock = LockExclusive(); + if (!lock) return false; + + std::vector snapshots; + if (!ListSnapshots(lock.get(), &snapshots)) { + LOG(ERROR) << "Failed to list snapshots."; + return false; + } + + size_t num_cows = 0; + size_t ok_cows = 0; + for (const auto& snapshot : snapshots) { + std::string cow_name = GetDmUserCowName(snapshot); + if (dm.GetState(cow_name) == DmDeviceState::INVALID) { + continue; + } + + DeviceMapper::TargetInfo target; + if (!GetSingleTarget(cow_name, TableQuery::Table, &target)) { + continue; + } + + auto target_type = DeviceMapper::GetTargetType(target.spec); + if (target_type != "user") { + LOG(ERROR) << "Unexpected target type for " << cow_name << ": " << target_type; + continue; + } + + num_cows++; + + DmTable table; + table.Emplace(0, target.spec.length, cow_name); + if (!dm.LoadTableAndActivate(cow_name, table)) { + LOG(ERROR) << "Unable to swap tables for " << cow_name; + continue; + } + + std::string backing_device; + if (!dm.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &backing_device)) { + LOG(ERROR) << "Could not get device path for " << GetBaseDeviceName(snapshot); + continue; + } + + std::string cow_device; + if (!dm.GetDmDevicePathByName(GetCowName(snapshot), &cow_device)) { + LOG(ERROR) << "Could not get device path for " << GetCowName(snapshot); + continue; + } + + // Wait for ueventd to acknowledge and create the control device node. + std::string control_device = "/dev/dm-user/" + cow_name; + if (!android::fs_mgr::WaitForFile(control_device, 10s)) { + LOG(ERROR) << "Could not find control device: " << control_device; + continue; + } + + if (!snapuserd_client_->InitializeSnapuserd(cow_device, backing_device, control_device)) { + // This error is unrecoverable. We cannot proceed because reads to + // the underlying device will fail. + LOG(FATAL) << "Could not initialize snapuserd for " << cow_name; + return false; + } + + ok_cows++; + } + + if (ok_cows != num_cows) { + LOG(ERROR) << "Could not transition all snapuserd consumers."; + return false; + } + + int pid; + const char* pid_str = getenv(kSnapuserdFirstStagePidVar); + if (pid_str && android::base::ParseInt(pid_str, &pid)) { + if (kill(pid, SIGTERM) < 0 && errno != ESRCH) { + LOG(ERROR) << "kill snapuserd failed"; + return false; + } + } else { + LOG(ERROR) << "Could not find or parse " << kSnapuserdFirstStagePidVar + << " for snapuserd pid"; + return false; + } + return true; +} + std::unique_ptr SnapshotManager::ReadCurrentMetadata() { const auto& opener = device_->GetPartitionOpener(); uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); @@ -1621,6 +1737,15 @@ bool SnapshotManager::CreateLogicalAndSnapshotPartitions( } } + if (use_first_stage_snapuserd_) { + // Remove the first-stage socket as a precaution, there is no need to + // access the daemon anymore and we'll be killing it once second-stage + // is running. + auto socket = ANDROID_SOCKET_DIR + "/"s + kSnapuserdSocketFirstStage; + snapuserd_client_ = nullptr; + unlink(socket.c_str()); + } + LOG(INFO) << "Created logical partitions with snapshot."; return true; } @@ -1925,10 +2050,18 @@ bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) LOG(ERROR) << "Cannot unmap " << dm_user_name; return false; } - if (!snapuserd_client_->WaitForDeviceDelete("/dev/dm-user/" + dm_user_name)) { + + auto control_device = "/dev/dm-user/" + dm_user_name; + if (!snapuserd_client_->WaitForDeviceDelete(control_device)) { LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete"; return false; } + + // Ensure the control device is gone so we don't run into ABA problems. + if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) { + LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink"; + return false; + } } auto cow_name = GetCowName(name); @@ -2212,15 +2345,35 @@ bool SnapshotManager::EnsureImageManager() { } bool SnapshotManager::EnsureSnapuserdConnected() { - if (!snapuserd_client_) { + if (snapuserd_client_) { + return true; + } + + std::string socket; + if (use_first_stage_snapuserd_) { + auto pid = StartFirstStageSnapuserd(); + if (pid < 0) { + LOG(ERROR) << "Failed to start snapuserd"; + return false; + } + + auto pid_str = std::to_string(static_cast(pid)); + if (setenv(kSnapuserdFirstStagePidVar, pid_str.c_str(), 1) < 0) { + PLOG(ERROR) << "setenv failed storing the snapuserd pid"; + } + + socket = kSnapuserdSocketFirstStage; + } else { if (!EnsureSnapuserdStarted()) { return false; } - snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s); - if (!snapuserd_client_) { - LOG(ERROR) << "Unable to connect to snapuserd"; - return false; - } + socket = kSnapuserdSocket; + } + + snapuserd_client_ = SnapuserdClient::Connect(socket, 10s); + if (!snapuserd_client_) { + LOG(ERROR) << "Unable to connect to snapuserd"; + return false; } return true; } @@ -2538,11 +2691,26 @@ Return SnapshotManager::InitializeUpdateSnapshots( return Return::Error(); } - auto ret = InitializeCow(cow_path); - if (!ret.is_ok()) { - LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": " - << cow_path; - return AddRequiredSpace(ret, all_snapshot_status); + if (IsCompressionEnabled()) { + unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC)); + if (fd < 0) { + PLOG(ERROR) << "open " << cow_path << " failed for snapshot " + << cow_params.partition_name; + return Return::Error(); + } + + CowWriter writer(CowOptions{}); + if (!writer.Initialize(fd) || !writer.Finalize()) { + LOG(ERROR) << "Could not initialize COW device for " << target_partition->name(); + return Return::Error(); + } + } else { + auto ret = InitializeKernelCow(cow_path); + if (!ret.is_ok()) { + LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": " + << cow_path; + return AddRequiredSpace(ret, all_snapshot_status); + } } // Let destructor of created_devices_for_cow to unmap the COW devices. }; diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 9660357a1..445428187 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +120,8 @@ class SnapshotTest : public ::testing::Test { image_manager_ = sm->image_manager(); test_device->set_slot_suffix("_a"); + + sm->set_use_first_stage_snapuserd(false); } void CleanupTestArtifacts() { @@ -265,7 +269,7 @@ class SnapshotTest : public ::testing::Test { if (!map_res) { return map_res; } - if (!InitializeCow(cow_device)) { + if (!InitializeKernelCow(cow_device)) { return AssertionFailure() << "Cannot zero fill " << cow_device; } if (!sm->UnmapCowImage(name)) { @@ -1736,6 +1740,59 @@ TEST_F(SnapshotUpdateTest, LowSpace) { ASSERT_LT(res.required_size(), 15_MiB); } +class AutoKill final { + public: + explicit AutoKill(pid_t pid) : pid_(pid) {} + ~AutoKill() { + if (pid_ > 0) kill(pid_, SIGKILL); + } + + bool valid() const { return pid_ > 0; } + + private: + pid_t pid_; +}; + +TEST_F(SnapshotUpdateTest, DaemonTransition) { + if (!IsCompressionEnabled()) { + GTEST_SKIP() << "Skipping Virtual A/B Compression test"; + } + + AutoKill auto_kill(StartFirstStageSnapuserd()); + ASSERT_TRUE(auto_kill.valid()); + + // Ensure a connection to the second-stage daemon, but use the first-stage + // code paths thereafter. + ASSERT_TRUE(sm->EnsureSnapuserdConnected()); + sm->set_use_first_stage_snapuserd(true); + + AddOperationForPartitions(); + // Execute the update. + ASSERT_TRUE(sm->BeginUpdate()); + ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); + ASSERT_TRUE(MapUpdateSnapshots()); + ASSERT_TRUE(sm->FinishedSnapshotWrites(false)); + ASSERT_TRUE(UnmapAll()); + + auto init = SnapshotManager::NewForFirstStageMount(new TestDeviceInfo(fake_super, "_b")); + ASSERT_NE(init, nullptr); + + ASSERT_TRUE(init->EnsureSnapuserdConnected()); + init->set_use_first_stage_snapuserd(true); + + ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount()); + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_)); + + ASSERT_EQ(access("/dev/dm-user/sys_b-user-cow-init", F_OK), 0); + ASSERT_EQ(access("/dev/dm-user/sys_b-user-cow", F_OK), -1); + + ASSERT_TRUE(init->PerformSecondStageTransition()); + + // The control device should have been renamed. + ASSERT_TRUE(android::fs_mgr::WaitForFileDeleted("/dev/dm-user/sys_b-user-cow-init", 10s)); + ASSERT_EQ(access("/dev/dm-user/sys_b-user-cow", F_OK), 0); +} + class FlashAfterUpdateTest : public SnapshotUpdateTest, public WithParamInterface> { public: diff --git a/fs_mgr/libsnapshot/snapuserd_client.cpp b/fs_mgr/libsnapshot/snapuserd_client.cpp index 35bb29b8b..565013976 100644 --- a/fs_mgr/libsnapshot/snapuserd_client.cpp +++ b/fs_mgr/libsnapshot/snapuserd_client.cpp @@ -51,6 +51,25 @@ bool EnsureSnapuserdStarted() { return true; } +pid_t StartFirstStageSnapuserd() { + pid_t pid = fork(); + if (pid < 0) { + PLOG(ERROR) << "fork failed"; + return pid; + } + if (pid != 0) { + return pid; + } + + std::string arg0 = "/system/bin/snapuserd"; + std::string arg1 = kSnapuserdSocketFirstStage; + char* const argv[] = {arg0.data(), arg1.data(), nullptr}; + if (execv(arg0.c_str(), argv) < 0) { + PLOG(FATAL) << "execv failed"; + } + return pid; +} + SnapuserdClient::SnapuserdClient(android::base::unique_fd&& sockfd) : sockfd_(std::move(sockfd)) {} static inline bool IsRetryErrno() { diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd_server.cpp index 6b8cdd9e0..6a892186a 100644 --- a/fs_mgr/libsnapshot/snapuserd_server.cpp +++ b/fs_mgr/libsnapshot/snapuserd_server.cpp @@ -191,6 +191,8 @@ bool SnapuserdServer::Receivemsg(android::base::borrowed_fd fd, const std::strin } void SnapuserdServer::RunThread(DmUserHandler* handler) { + LOG(INFO) << "Entering thread for handler: " << handler->GetControlDevice(); + while (!StopRequested()) { if (handler->snapuserd()->Run() < 0) { LOG(INFO) << "Snapuserd: Thread terminating as control device is de-registered"; @@ -198,6 +200,8 @@ void SnapuserdServer::RunThread(DmUserHandler* handler) { } } + LOG(INFO) << "Exiting thread for handler: " << handler->GetControlDevice(); + if (auto client = RemoveHandler(handler->GetControlDevice())) { // The main thread did not receive a WaitForDelete request for this // control device. Since we transferred ownership within the lock, diff --git a/fs_mgr/libsnapshot/utility.cpp b/fs_mgr/libsnapshot/utility.cpp index d32b61eaa..4cae83a79 100644 --- a/fs_mgr/libsnapshot/utility.cpp +++ b/fs_mgr/libsnapshot/utility.cpp @@ -91,7 +91,7 @@ AutoDeleteSnapshot::~AutoDeleteSnapshot() { } } -Return InitializeCow(const std::string& device) { +Return InitializeKernelCow(const std::string& device) { // When the kernel creates a persistent dm-snapshot, it requires a CoW file // to store the modifications. The kernel interface does not specify how // the CoW is used, and there is no standard associated. diff --git a/fs_mgr/libsnapshot/utility.h b/fs_mgr/libsnapshot/utility.h index e69bdadc9..482888a0d 100644 --- a/fs_mgr/libsnapshot/utility.h +++ b/fs_mgr/libsnapshot/utility.h @@ -112,7 +112,7 @@ std::vector ListPartitionsWithSuffix( android::fs_mgr::MetadataBuilder* builder, const std::string& suffix); // Initialize a device before using it as the COW device for a dm-snapshot device. -Return InitializeCow(const std::string& device); +Return InitializeKernelCow(const std::string& device); // "Atomically" write string to file. This is done by a series of actions: // 1. Write to path + ".tmp" From b031def229aaa3e1fcd304e5fb7ad85dab61b829 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 30 Oct 2020 00:00:33 -0700 Subject: [PATCH 2/3] Start snapuserd if needed as soon as possible during second-stage init. snapuserd is used as a user-space block device implementation during Virtual A/B Compression-enabled updates. It has to be started in first-stage init, so that updated partitions can be mounted. Once init reaches second-stage, and sepolicy is loaded, we want to re-launch snapuserd at the correct privilege level. We accomplish this by rebuilding the device-mapper tables of each block device, which allows us to re-bind the kernel driver to a new instance of snapuserd. After this, the old daemon can be shut down. Ideally this transition happens as soon as possible, before any .rc scripts are run. This minimizes the amount of time the original snapuserd is running, as well as any ambiguity about which instance of snapuserd is the correct one. The original daemon is sent a SIGTERM signal once the transition is complete. The pid is stored in an environment variable to make this possible (these details are implemented in libsnapshot). Bug: 168259959 Test: manual test Change-Id: Ife9518e502ce02f11ec54e7f3e6adc6f04d94133 --- init/init.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/init/init.cpp b/init/init.cpp index ea0449446..c6f206644 100644 --- a/init/init.cpp +++ b/init/init.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,7 @@ using android::base::StringPrintf; using android::base::Timer; using android::base::Trim; using android::fs_mgr::AvbHandle; +using android::snapshot::SnapshotManager; namespace android { namespace init { @@ -722,6 +724,32 @@ void SendLoadPersistentPropertiesMessage() { } } +static Result TransitionSnapuserdAction(const BuiltinArguments&) { + if (!SnapshotManager::IsSnapshotManagerNeeded() || + !android::base::GetBoolProperty(android::snapshot::kVirtualAbCompressionProp, false)) { + return {}; + } + + auto sm = SnapshotManager::New(); + if (!sm) { + LOG(FATAL) << "Failed to create SnapshotManager, will not transition snapuserd"; + return {}; + } + + ServiceList& service_list = ServiceList::GetInstance(); + auto svc = service_list.FindService("snapuserd"); + if (!svc) { + LOG(FATAL) << "Failed to find snapuserd service, aborting transition"; + return {}; + } + svc->Start(); + + if (!sm->PerformSecondStageTransition()) { + LOG(FATAL) << "Failed to transition snapuserd to second-stage"; + } + return {}; +} + int SecondStageMain(int argc, char** argv) { if (REBOOT_BOOTLOADER_ON_PANIC) { InstallRebootSignalHandlers(); @@ -847,6 +875,7 @@ int SecondStageMain(int argc, char** argv) { SetProperty(gsi::kGsiInstalledProp, is_installed); am.QueueBuiltinAction(SetupCgroupsAction, "SetupCgroups"); + am.QueueBuiltinAction(TransitionSnapuserdAction, "TransitionSnapuserd"); am.QueueBuiltinAction(SetKptrRestrictAction, "SetKptrRestrict"); am.QueueBuiltinAction(TestPerfEventSelinuxAction, "TestPerfEventSelinux"); am.QueueEventTrigger("early-init"); From 2eb7b9221ba9094aa3120570869403d8344d5a2d Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 30 Oct 2020 15:54:46 -0700 Subject: [PATCH 3/3] libsnapshot: Implement MapAllSnapshots and UnmapAllSnapshots. Bug: 168554689 Test: vts_libsnapshot_test Change-Id: I6809e226741dabcf337c3a5cfaba56afdb9edd64 --- .../include/libsnapshot/snapshot.h | 8 ++- fs_mgr/libsnapshot/snapshot.cpp | 61 +++++++++++++++---- fs_mgr/libsnapshot/snapshot_test.cpp | 15 +++++ 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 9dbbbd7c0..8bed1b956 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -559,6 +559,9 @@ class SnapshotManager final : public ISnapshotManager { std::string GetSnapshotDeviceName(const std::string& snapshot_name, const SnapshotStatus& status); + bool MapAllPartitions(LockedFile* lock, const std::string& super_device, uint32_t slot, + const std::chrono::milliseconds& timeout_ms); + // Reason for calling MapPartitionWithSnapshot. enum class SnapshotContext { // For writing or verification (during update_engine). @@ -632,9 +635,12 @@ class SnapshotManager final : public ISnapshotManager { const LpMetadata* exported_target_metadata, const std::string& target_suffix, const std::map& all_snapshot_status); + // Implementation of UnmapAllSnapshots(), with the lock provided. + bool UnmapAllSnapshots(LockedFile* lock); + // Unmap all partitions that were mapped by CreateLogicalAndSnapshotPartitions. // This should only be called in recovery. - bool UnmapAllPartitions(); + bool UnmapAllPartitionsInRecovery(); // Check no snapshot overflows. Note that this returns false negatives if the snapshot // overflows, then is remapped and not written afterwards. diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index b6c0a293c..f9bb0dd2e 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -1709,8 +1709,13 @@ bool SnapshotManager::CreateLogicalAndSnapshotPartitions( auto lock = LockExclusive(); if (!lock) return false; - const auto& opener = device_->GetPartitionOpener(); uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); + return MapAllPartitions(lock.get(), super_device, slot, timeout_ms); +} + +bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device, + uint32_t slot, const std::chrono::milliseconds& timeout_ms) { + const auto& opener = device_->GetPartitionOpener(); auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot); if (!metadata) { LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device; @@ -1731,8 +1736,7 @@ bool SnapshotManager::CreateLogicalAndSnapshotPartitions( .partition_opener = &opener, .timeout_ms = timeout_ms, }; - if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount, - nullptr)) { + if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) { return false; } } @@ -2078,14 +2082,49 @@ bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) return true; } -bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds&) { - LOG(ERROR) << "Not yet implemented."; - return false; +bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) { + auto lock = LockExclusive(); + if (!lock) return false; + + auto state = ReadUpdateState(lock.get()); + if (state == UpdateState::Unverified) { + if (GetCurrentSlot() == Slot::Target) { + LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot."; + return false; + } + } else if (state != UpdateState::Initiated) { + LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state; + return false; + } + + if (!UnmapAllSnapshots(lock.get())) { + return false; + } + + uint32_t slot = SlotNumberForSlotSuffix(device_->GetOtherSlotSuffix()); + return MapAllPartitions(lock.get(), device_->GetSuperDevice(slot), slot, timeout_ms); } bool SnapshotManager::UnmapAllSnapshots() { - LOG(ERROR) << "Not yet implemented."; - return false; + auto lock = LockExclusive(); + if (!lock) return false; + + return UnmapAllSnapshots(lock.get()); +} + +bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) { + std::vector snapshots; + if (!ListSnapshots(lock, &snapshots)) { + return false; + } + + for (const auto& snapshot : snapshots) { + if (!UnmapPartitionWithSnapshot(lock, snapshot)) { + LOG(ERROR) << "Failed to unmap snapshot: " << snapshot; + return false; + } + } + return true; } auto SnapshotManager::OpenFile(const std::string& file, int lock_flags) @@ -2868,7 +2907,7 @@ bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_na return UnmapPartitionWithSnapshot(lock.get(), target_partition_name); } -bool SnapshotManager::UnmapAllPartitions() { +bool SnapshotManager::UnmapAllPartitionsInRecovery() { auto lock = LockExclusive(); if (!lock) return false; @@ -3012,7 +3051,7 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function& callba } // Nothing should be depending on partitions now, so unmap them all. - if (!UnmapAllPartitions()) { + if (!UnmapAllPartitionsInRecovery()) { LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash."; } return true; @@ -3043,7 +3082,7 @@ bool SnapshotManager::FinishMergeInRecovery() { } // Nothing should be depending on partitions now, so unmap them all. - if (!UnmapAllPartitions()) { + if (!UnmapAllPartitionsInRecovery()) { LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash."; } return true; diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 445428187..7fc64e527 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -1793,6 +1793,21 @@ TEST_F(SnapshotUpdateTest, DaemonTransition) { ASSERT_EQ(access("/dev/dm-user/sys_b-user-cow", F_OK), 0); } +TEST_F(SnapshotUpdateTest, MapAllSnapshots) { + AddOperationForPartitions(); + // Execute the update. + ASSERT_TRUE(sm->BeginUpdate()); + ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); + for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { + ASSERT_TRUE(WriteSnapshotAndHash(name)); + } + ASSERT_TRUE(sm->FinishedSnapshotWrites(false)); + ASSERT_TRUE(sm->MapAllSnapshots(10s)); + + // Read bytes back and verify they match the cache. + ASSERT_TRUE(IsPartitionUnchanged("sys_b")); +} + class FlashAfterUpdateTest : public SnapshotUpdateTest, public WithParamInterface> { public: