diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp index 059a469a2..f1b00311c 100644 --- a/fs_mgr/libsnapshot/Android.bp +++ b/fs_mgr/libsnapshot/Android.bp @@ -31,15 +31,17 @@ cc_defaults { "libbrotli", "libdm", "libfstab", - "libsnapshot_cow", "update_metadata-protos", ], whole_static_libs: [ + "libbrotli", "libcutils", "libext2_uuid", "libext4_utils", "libfstab", + "libsnapshot_cow", "libsnapshot_snapuserd", + "libz", ], header_libs: [ "libchrome", @@ -432,6 +434,7 @@ cc_binary { init_rc: [ "snapuserd.rc", ], + static_executable: true, } cc_binary { diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 35ed04a29..9dbbbd7c0 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -77,6 +78,7 @@ class SnapshotMergeStats; class SnapshotStatus; static constexpr const std::string_view kCowGroupName = "cow"; +static constexpr char kVirtualAbCompressionProp[] = "ro.virtual_ab.compression.enabled"; bool OptimizeSourceCopyOperation(const chromeos_update_engine::InstallOperation& operation, chromeos_update_engine::InstallOperation* optimized); @@ -104,6 +106,7 @@ class ISnapshotManager { android::hardware::boot::V1_1::MergeStatus status) = 0; virtual bool SetSlotAsUnbootable(unsigned int slot) = 0; virtual bool IsRecovery() const = 0; + virtual bool IsTestDevice() const { return false; } }; virtual ~ISnapshotManager() = default; @@ -303,6 +306,14 @@ class SnapshotManager final : public ISnapshotManager { // Helper function for second stage init to restorecon on the rollback indicator. static std::string GetGlobalRollbackIndicatorPath(); + // Initiate the transition from first-stage to second-stage snapuserd. This + // process involves re-creating the dm-user table entries for each device, + // so that they connect to the new daemon. Once all new tables have been + // activated, we ask the first-stage daemon to cleanly exit. + // + // The caller must pass a function which starts snapuserd. + bool PerformSecondStageTransition(); + // ISnapshotManager overrides. bool BeginUpdate() override; bool CancelUpdate() override; @@ -345,6 +356,7 @@ class SnapshotManager final : public ISnapshotManager { FRIEND_TEST(SnapshotTest, Merge); FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot); FRIEND_TEST(SnapshotTest, UpdateBootControlHal); + FRIEND_TEST(SnapshotUpdateTest, DaemonTransition); FRIEND_TEST(SnapshotUpdateTest, DataWipeAfterRollback); FRIEND_TEST(SnapshotUpdateTest, DataWipeRollbackInRecovery); FRIEND_TEST(SnapshotUpdateTest, FullUpdateFlow); @@ -372,11 +384,13 @@ class SnapshotManager final : public ISnapshotManager { // Ensure we're connected to snapuserd. bool EnsureSnapuserdConnected(); - // Helper for first-stage init. + // Helpers for first-stage init. bool ForceLocalImageManager(); + const std::unique_ptr& device() const { return device_; } - // Helper function for tests. + // Helper functions for tests. IImageManager* image_manager() const { return images_.get(); } + void set_use_first_stage_snapuserd(bool value) { use_first_stage_snapuserd_ = value; } // Since libsnapshot is included into multiple processes, we flock() our // files for simple synchronization. LockedFile is a helper to assist with @@ -660,6 +674,7 @@ class SnapshotManager final : public ISnapshotManager { std::unique_ptr device_; std::unique_ptr images_; bool has_local_image_manager_ = false; + bool use_first_stage_snapuserd_ = false; bool in_factory_data_reset_ = false; std::unique_ptr snapuserd_client_; }; diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h index 0bbdaa583..aaec229b6 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include #include @@ -31,9 +33,15 @@ static constexpr uint32_t PACKET_SIZE = 512; static constexpr char kSnapuserdSocketFirstStage[] = "snapuserd_first_stage"; static constexpr char kSnapuserdSocket[] = "snapuserd"; +static constexpr char kSnapuserdFirstStagePidVar[] = "FIRST_STAGE_SNAPUSERD_PID"; + // Ensure that the second-stage daemon for snapuserd is running. bool EnsureSnapuserdStarted(); +// Start the first-stage version of snapuserd, returning its pid. This is used +// by first-stage init, as well as vts_libsnapshot_test. On failure, -1 is returned. +pid_t StartFirstStageSnapuserd(); + class SnapuserdClient { private: android::base::unique_fd sockfd_; diff --git a/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h b/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h index 197aeaa05..7aef086bb 100644 --- a/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h +++ b/fs_mgr/libsnapshot/include_test/libsnapshot/test_helpers.h @@ -95,6 +95,7 @@ class TestDeviceInfo : public SnapshotManager::IDeviceInfo { unbootable_slots_.insert(slot); return true; } + bool IsTestDevice() const override { return true; } bool IsSlotUnbootable(uint32_t slot) { return unbootable_slots_.count(slot) != 0; } diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index 7061d5665..b6c0a293c 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -31,8 +31,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -100,6 +102,12 @@ std::unique_ptr SnapshotManager::NewForFirstStageMount(IDeviceI if (!sm || !sm->ForceLocalImageManager()) { return nullptr; } + + // The first-stage version of snapuserd is explicitly started by init. Do + // not attempt to using it during tests (which run in normal AOSP). + if (!sm->device()->IsTestDevice()) { + sm->use_first_stage_snapuserd_ = true; + } return sm; } @@ -400,8 +408,15 @@ bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name, base_sectors = dev_size / kSectorSize; } + // Use an extra decoration for first-stage init, so we can transition + // to a new table entry in second-stage. + std::string misc_name = name; + if (use_first_stage_snapuserd_) { + misc_name += "-init"; + } + DmTable table; - table.Emplace(0, base_sectors, name); + table.Emplace(0, base_sectors, misc_name); if (!dm.CreateDevice(name, table, path, timeout_ms)) { return false; } @@ -410,7 +425,7 @@ bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name, return false; } - auto control_device = "/dev/dm-user/" + name; + auto control_device = "/dev/dm-user/" + misc_name; return snapuserd_client_->InitializeSnapuserd(cow_file, base_device, control_device); } @@ -1284,6 +1299,107 @@ bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock, return RemoveAllUpdateState(lock, before_cancel); } +bool SnapshotManager::PerformSecondStageTransition() { + LOG(INFO) << "Performing second-stage transition for snapuserd."; + + // Don't use EnsuerSnapuserdConnected() because this is called from init, + // and attempting to do so will deadlock. + if (!snapuserd_client_) { + snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s); + if (!snapuserd_client_) { + LOG(ERROR) << "Unable to connect to snapuserd"; + return false; + } + } + + auto& dm = DeviceMapper::Instance(); + + auto lock = LockExclusive(); + if (!lock) return false; + + std::vector snapshots; + if (!ListSnapshots(lock.get(), &snapshots)) { + LOG(ERROR) << "Failed to list snapshots."; + return false; + } + + size_t num_cows = 0; + size_t ok_cows = 0; + for (const auto& snapshot : snapshots) { + std::string cow_name = GetDmUserCowName(snapshot); + if (dm.GetState(cow_name) == DmDeviceState::INVALID) { + continue; + } + + DeviceMapper::TargetInfo target; + if (!GetSingleTarget(cow_name, TableQuery::Table, &target)) { + continue; + } + + auto target_type = DeviceMapper::GetTargetType(target.spec); + if (target_type != "user") { + LOG(ERROR) << "Unexpected target type for " << cow_name << ": " << target_type; + continue; + } + + num_cows++; + + DmTable table; + table.Emplace(0, target.spec.length, cow_name); + if (!dm.LoadTableAndActivate(cow_name, table)) { + LOG(ERROR) << "Unable to swap tables for " << cow_name; + continue; + } + + std::string backing_device; + if (!dm.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &backing_device)) { + LOG(ERROR) << "Could not get device path for " << GetBaseDeviceName(snapshot); + continue; + } + + std::string cow_device; + if (!dm.GetDmDevicePathByName(GetCowName(snapshot), &cow_device)) { + LOG(ERROR) << "Could not get device path for " << GetCowName(snapshot); + continue; + } + + // Wait for ueventd to acknowledge and create the control device node. + std::string control_device = "/dev/dm-user/" + cow_name; + if (!android::fs_mgr::WaitForFile(control_device, 10s)) { + LOG(ERROR) << "Could not find control device: " << control_device; + continue; + } + + if (!snapuserd_client_->InitializeSnapuserd(cow_device, backing_device, control_device)) { + // This error is unrecoverable. We cannot proceed because reads to + // the underlying device will fail. + LOG(FATAL) << "Could not initialize snapuserd for " << cow_name; + return false; + } + + ok_cows++; + } + + if (ok_cows != num_cows) { + LOG(ERROR) << "Could not transition all snapuserd consumers."; + return false; + } + + int pid; + const char* pid_str = getenv(kSnapuserdFirstStagePidVar); + if (pid_str && android::base::ParseInt(pid_str, &pid)) { + if (kill(pid, SIGTERM) < 0 && errno != ESRCH) { + LOG(ERROR) << "kill snapuserd failed"; + return false; + } + } else { + LOG(ERROR) << "Could not find or parse " << kSnapuserdFirstStagePidVar + << " for snapuserd pid"; + return false; + } + return true; +} + std::unique_ptr SnapshotManager::ReadCurrentMetadata() { const auto& opener = device_->GetPartitionOpener(); uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); @@ -1621,6 +1737,15 @@ bool SnapshotManager::CreateLogicalAndSnapshotPartitions( } } + if (use_first_stage_snapuserd_) { + // Remove the first-stage socket as a precaution, there is no need to + // access the daemon anymore and we'll be killing it once second-stage + // is running. + auto socket = ANDROID_SOCKET_DIR + "/"s + kSnapuserdSocketFirstStage; + snapuserd_client_ = nullptr; + unlink(socket.c_str()); + } + LOG(INFO) << "Created logical partitions with snapshot."; return true; } @@ -1925,10 +2050,18 @@ bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) LOG(ERROR) << "Cannot unmap " << dm_user_name; return false; } - if (!snapuserd_client_->WaitForDeviceDelete("/dev/dm-user/" + dm_user_name)) { + + auto control_device = "/dev/dm-user/" + dm_user_name; + if (!snapuserd_client_->WaitForDeviceDelete(control_device)) { LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete"; return false; } + + // Ensure the control device is gone so we don't run into ABA problems. + if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) { + LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink"; + return false; + } } auto cow_name = GetCowName(name); @@ -2212,15 +2345,35 @@ bool SnapshotManager::EnsureImageManager() { } bool SnapshotManager::EnsureSnapuserdConnected() { - if (!snapuserd_client_) { + if (snapuserd_client_) { + return true; + } + + std::string socket; + if (use_first_stage_snapuserd_) { + auto pid = StartFirstStageSnapuserd(); + if (pid < 0) { + LOG(ERROR) << "Failed to start snapuserd"; + return false; + } + + auto pid_str = std::to_string(static_cast(pid)); + if (setenv(kSnapuserdFirstStagePidVar, pid_str.c_str(), 1) < 0) { + PLOG(ERROR) << "setenv failed storing the snapuserd pid"; + } + + socket = kSnapuserdSocketFirstStage; + } else { if (!EnsureSnapuserdStarted()) { return false; } - snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s); - if (!snapuserd_client_) { - LOG(ERROR) << "Unable to connect to snapuserd"; - return false; - } + socket = kSnapuserdSocket; + } + + snapuserd_client_ = SnapuserdClient::Connect(socket, 10s); + if (!snapuserd_client_) { + LOG(ERROR) << "Unable to connect to snapuserd"; + return false; } return true; } @@ -2538,11 +2691,26 @@ Return SnapshotManager::InitializeUpdateSnapshots( return Return::Error(); } - auto ret = InitializeCow(cow_path); - if (!ret.is_ok()) { - LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": " - << cow_path; - return AddRequiredSpace(ret, all_snapshot_status); + if (IsCompressionEnabled()) { + unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC)); + if (fd < 0) { + PLOG(ERROR) << "open " << cow_path << " failed for snapshot " + << cow_params.partition_name; + return Return::Error(); + } + + CowWriter writer(CowOptions{}); + if (!writer.Initialize(fd) || !writer.Finalize()) { + LOG(ERROR) << "Could not initialize COW device for " << target_partition->name(); + return Return::Error(); + } + } else { + auto ret = InitializeKernelCow(cow_path); + if (!ret.is_ok()) { + LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": " + << cow_path; + return AddRequiredSpace(ret, all_snapshot_status); + } } // Let destructor of created_devices_for_cow to unmap the COW devices. }; diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 9660357a1..445428187 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +120,8 @@ class SnapshotTest : public ::testing::Test { image_manager_ = sm->image_manager(); test_device->set_slot_suffix("_a"); + + sm->set_use_first_stage_snapuserd(false); } void CleanupTestArtifacts() { @@ -265,7 +269,7 @@ class SnapshotTest : public ::testing::Test { if (!map_res) { return map_res; } - if (!InitializeCow(cow_device)) { + if (!InitializeKernelCow(cow_device)) { return AssertionFailure() << "Cannot zero fill " << cow_device; } if (!sm->UnmapCowImage(name)) { @@ -1736,6 +1740,59 @@ TEST_F(SnapshotUpdateTest, LowSpace) { ASSERT_LT(res.required_size(), 15_MiB); } +class AutoKill final { + public: + explicit AutoKill(pid_t pid) : pid_(pid) {} + ~AutoKill() { + if (pid_ > 0) kill(pid_, SIGKILL); + } + + bool valid() const { return pid_ > 0; } + + private: + pid_t pid_; +}; + +TEST_F(SnapshotUpdateTest, DaemonTransition) { + if (!IsCompressionEnabled()) { + GTEST_SKIP() << "Skipping Virtual A/B Compression test"; + } + + AutoKill auto_kill(StartFirstStageSnapuserd()); + ASSERT_TRUE(auto_kill.valid()); + + // Ensure a connection to the second-stage daemon, but use the first-stage + // code paths thereafter. + ASSERT_TRUE(sm->EnsureSnapuserdConnected()); + sm->set_use_first_stage_snapuserd(true); + + AddOperationForPartitions(); + // Execute the update. + ASSERT_TRUE(sm->BeginUpdate()); + ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); + ASSERT_TRUE(MapUpdateSnapshots()); + ASSERT_TRUE(sm->FinishedSnapshotWrites(false)); + ASSERT_TRUE(UnmapAll()); + + auto init = SnapshotManager::NewForFirstStageMount(new TestDeviceInfo(fake_super, "_b")); + ASSERT_NE(init, nullptr); + + ASSERT_TRUE(init->EnsureSnapuserdConnected()); + init->set_use_first_stage_snapuserd(true); + + ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount()); + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_)); + + ASSERT_EQ(access("/dev/dm-user/sys_b-user-cow-init", F_OK), 0); + ASSERT_EQ(access("/dev/dm-user/sys_b-user-cow", F_OK), -1); + + ASSERT_TRUE(init->PerformSecondStageTransition()); + + // The control device should have been renamed. + ASSERT_TRUE(android::fs_mgr::WaitForFileDeleted("/dev/dm-user/sys_b-user-cow-init", 10s)); + ASSERT_EQ(access("/dev/dm-user/sys_b-user-cow", F_OK), 0); +} + class FlashAfterUpdateTest : public SnapshotUpdateTest, public WithParamInterface> { public: diff --git a/fs_mgr/libsnapshot/snapuserd_client.cpp b/fs_mgr/libsnapshot/snapuserd_client.cpp index 35bb29b8b..565013976 100644 --- a/fs_mgr/libsnapshot/snapuserd_client.cpp +++ b/fs_mgr/libsnapshot/snapuserd_client.cpp @@ -51,6 +51,25 @@ bool EnsureSnapuserdStarted() { return true; } +pid_t StartFirstStageSnapuserd() { + pid_t pid = fork(); + if (pid < 0) { + PLOG(ERROR) << "fork failed"; + return pid; + } + if (pid != 0) { + return pid; + } + + std::string arg0 = "/system/bin/snapuserd"; + std::string arg1 = kSnapuserdSocketFirstStage; + char* const argv[] = {arg0.data(), arg1.data(), nullptr}; + if (execv(arg0.c_str(), argv) < 0) { + PLOG(FATAL) << "execv failed"; + } + return pid; +} + SnapuserdClient::SnapuserdClient(android::base::unique_fd&& sockfd) : sockfd_(std::move(sockfd)) {} static inline bool IsRetryErrno() { diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd_server.cpp index 6b8cdd9e0..6a892186a 100644 --- a/fs_mgr/libsnapshot/snapuserd_server.cpp +++ b/fs_mgr/libsnapshot/snapuserd_server.cpp @@ -191,6 +191,8 @@ bool SnapuserdServer::Receivemsg(android::base::borrowed_fd fd, const std::strin } void SnapuserdServer::RunThread(DmUserHandler* handler) { + LOG(INFO) << "Entering thread for handler: " << handler->GetControlDevice(); + while (!StopRequested()) { if (handler->snapuserd()->Run() < 0) { LOG(INFO) << "Snapuserd: Thread terminating as control device is de-registered"; @@ -198,6 +200,8 @@ void SnapuserdServer::RunThread(DmUserHandler* handler) { } } + LOG(INFO) << "Exiting thread for handler: " << handler->GetControlDevice(); + if (auto client = RemoveHandler(handler->GetControlDevice())) { // The main thread did not receive a WaitForDelete request for this // control device. Since we transferred ownership within the lock, diff --git a/fs_mgr/libsnapshot/utility.cpp b/fs_mgr/libsnapshot/utility.cpp index d32b61eaa..4cae83a79 100644 --- a/fs_mgr/libsnapshot/utility.cpp +++ b/fs_mgr/libsnapshot/utility.cpp @@ -91,7 +91,7 @@ AutoDeleteSnapshot::~AutoDeleteSnapshot() { } } -Return InitializeCow(const std::string& device) { +Return InitializeKernelCow(const std::string& device) { // When the kernel creates a persistent dm-snapshot, it requires a CoW file // to store the modifications. The kernel interface does not specify how // the CoW is used, and there is no standard associated. diff --git a/fs_mgr/libsnapshot/utility.h b/fs_mgr/libsnapshot/utility.h index e69bdadc9..482888a0d 100644 --- a/fs_mgr/libsnapshot/utility.h +++ b/fs_mgr/libsnapshot/utility.h @@ -112,7 +112,7 @@ std::vector ListPartitionsWithSuffix( android::fs_mgr::MetadataBuilder* builder, const std::string& suffix); // Initialize a device before using it as the COW device for a dm-snapshot device. -Return InitializeCow(const std::string& device); +Return InitializeKernelCow(const std::string& device); // "Atomically" write string to file. This is done by a series of actions: // 1. Write to path + ".tmp"