From 31e0426489badd78531e7983989c53e944c1f728 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 29 Mar 2021 17:16:59 +0000 Subject: [PATCH 1/6] libsnapshot:snapuserd: mmap + msync header after merge mmap the CowHeader and use msync to flush only the first 4k page after merge is complete. This cuts down ~30 seconds of merge completion time on a 55M incremental OTA with 235k copy operations. Although, this isn't a significant gain but this patch creates a scaffolding for the next set of read-ahead patches. Bug: 183863613 Test: Incremental and Full OTA Signed-off-by: Akilesh Kailash Change-Id: I15bfec91ea1d5bdf4390670bcf406e1015b79299 --- fs_mgr/libsnapshot/cow_writer.cpp | 24 -------- .../include/libsnapshot/cow_writer.h | 3 - fs_mgr/libsnapshot/snapuserd.cpp | 56 +++++++++++++------ fs_mgr/libsnapshot/snapuserd.h | 11 +++- fs_mgr/libsnapshot/snapuserd_server.cpp | 3 +- fs_mgr/libsnapshot/snapuserd_worker.cpp | 9 +++ 6 files changed, 58 insertions(+), 48 deletions(-) diff --git a/fs_mgr/libsnapshot/cow_writer.cpp b/fs_mgr/libsnapshot/cow_writer.cpp index 645ae9d0d..c51336df6 100644 --- a/fs_mgr/libsnapshot/cow_writer.cpp +++ b/fs_mgr/libsnapshot/cow_writer.cpp @@ -139,12 +139,6 @@ bool CowWriter::SetFd(android::base::borrowed_fd fd) { return true; } -void CowWriter::InitializeMerge(borrowed_fd fd, CowHeader* header) { - fd_ = fd; - memcpy(&header_, header, sizeof(CowHeader)); - merge_in_progress_ = true; -} - bool CowWriter::Initialize(unique_fd&& fd) { owned_fd_ = std::move(fd); return Initialize(borrowed_fd{owned_fd_}); @@ -517,24 +511,6 @@ bool CowWriter::Sync() { return true; } -bool CowWriter::CommitMerge(int merged_ops) { - CHECK(merge_in_progress_); - header_.num_merge_ops += merged_ops; - - if (lseek(fd_.get(), 0, SEEK_SET) < 0) { - PLOG(ERROR) << "lseek failed"; - return false; - } - - if (!android::base::WriteFully(fd_, reinterpret_cast(&header_), - sizeof(header_))) { - PLOG(ERROR) << "WriteFully failed"; - return false; - } - - return Sync(); -} - bool CowWriter::Truncate(off_t length) { if (is_dev_null_ || is_block_device_) { return true; diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h index a9efad8a8..a9d85bfdd 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h @@ -100,9 +100,6 @@ class CowWriter : public ICowWriter { bool InitializeAppend(android::base::unique_fd&&, uint64_t label); bool InitializeAppend(android::base::borrowed_fd fd, uint64_t label); - void InitializeMerge(android::base::borrowed_fd fd, CowHeader* header); - bool CommitMerge(int merged_ops); - bool Finalize() override; uint64_t GetCowSize() override; diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp index 5ef9e29ed..d104bd8d3 100644 --- a/fs_mgr/libsnapshot/snapuserd.cpp +++ b/fs_mgr/libsnapshot/snapuserd.cpp @@ -51,21 +51,18 @@ bool Snapuserd::InitializeWorkers() { } bool Snapuserd::CommitMerge(int num_merge_ops) { - { - std::lock_guard lock(lock_); - CowHeader header; + struct CowHeader* ch = reinterpret_cast(mapped_addr_); + ch->num_merge_ops += num_merge_ops; - reader_->GetHeader(&header); - header.num_merge_ops += num_merge_ops; - reader_->UpdateMergeProgress(num_merge_ops); - if (!writer_->CommitMerge(num_merge_ops)) { - SNAP_LOG(ERROR) << "CommitMerge failed... merged_ops_cur_iter: " << num_merge_ops - << " Total-merged-ops: " << header.num_merge_ops; - return false; - } - merge_initiated_ = true; + // Sync the first 4k block + int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC); + if (ret < 0) { + PLOG(ERROR) << "msync header failed: " << ret; + return false; } + merge_initiated_ = true; + return true; } @@ -93,9 +90,9 @@ void Snapuserd::CheckMergeCompletionStatus() { return; } - CowHeader header; - reader_->GetHeader(&header); - SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << header.num_merge_ops + struct CowHeader* ch = reinterpret_cast(mapped_addr_); + + SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops << " Total-data-ops: " << reader_->total_data_ops(); } @@ -175,8 +172,10 @@ bool Snapuserd::ReadMetadata() { reader_->InitializeMerge(); SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops; - writer_ = std::make_unique(options); - writer_->InitializeMerge(cow_fd_.get(), &header); + if (!MmapMetadata()) { + SNAP_LOG(ERROR) << "mmap failed"; + return false; + } // Initialize the iterator for reading metadata cowop_riter_ = reader_->GetRevOpIter(); @@ -487,6 +486,29 @@ bool Snapuserd::ReadMetadata() { return true; } +bool Snapuserd::MmapMetadata() { + CowHeader header; + reader_->GetHeader(&header); + + // mmap the first 4k page + total_mapped_addr_length_ = BLOCK_SZ; + mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED, + cow_fd_.get(), 0); + if (mapped_addr_ == MAP_FAILED) { + SNAP_LOG(ERROR) << "mmap metadata failed"; + return false; + } + + return true; +} + +void Snapuserd::UnmapBufferRegion() { + int ret = munmap(mapped_addr_, total_mapped_addr_length_); + if (ret < 0) { + SNAP_PLOG(ERROR) << "munmap failed"; + } +} + void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*, unsigned int, const char* message) { if (severity == android::base::ERROR) { diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h index 87c552824..65af553c4 100644 --- a/fs_mgr/libsnapshot/snapuserd.h +++ b/fs_mgr/libsnapshot/snapuserd.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -173,9 +174,10 @@ class Snapuserd : public std::enable_shared_from_this { return p1.first < p2.first; } - private: - std::vector> worker_threads_; + void UnmapBufferRegion(); + bool MmapMetadata(); + private: bool IsChunkIdMetadata(chunk_t chunk); chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); @@ -197,7 +199,6 @@ class Snapuserd : public std::enable_shared_from_this { std::unique_ptr cowop_iter_; std::unique_ptr cowop_riter_; std::unique_ptr reader_; - std::unique_ptr writer_; // Vector of disk exception which is a // mapping of old-chunk to new-chunk @@ -209,6 +210,10 @@ class Snapuserd : public std::enable_shared_from_this { std::mutex lock_; + void* mapped_addr_; + size_t total_mapped_addr_length_; + + std::vector> worker_threads_; bool merge_initiated_ = false; bool attached_ = false; }; diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd_server.cpp index 64332d191..d813425ae 100644 --- a/fs_mgr/libsnapshot/snapuserd_server.cpp +++ b/fs_mgr/libsnapshot/snapuserd_server.cpp @@ -209,10 +209,11 @@ void SnapuserdServer::RunThread(std::shared_ptr handler) { } handler->snapuserd()->CloseFds(); + handler->snapuserd()->CheckMergeCompletionStatus(); + handler->snapuserd()->UnmapBufferRegion(); auto misc_name = handler->misc_name(); LOG(INFO) << "Handler thread about to exit: " << misc_name; - handler->snapuserd()->CheckMergeCompletionStatus(); { std::lock_guard lock(lock_); diff --git a/fs_mgr/libsnapshot/snapuserd_worker.cpp b/fs_mgr/libsnapshot/snapuserd_worker.cpp index 1002569e0..4b2c5a083 100644 --- a/fs_mgr/libsnapshot/snapuserd_worker.cpp +++ b/fs_mgr/libsnapshot/snapuserd_worker.cpp @@ -613,12 +613,21 @@ bool WorkerThread::DmuserReadRequest() { } } + // Just return the header if it is an error + if (header->type == DM_USER_RESP_ERROR) { + ret = 0; + } + // Daemon will not be terminated if there is any error. We will // just send the error back to dm-user. if (!WriteDmUserPayload(ret)) { return false; } + if (header->type == DM_USER_RESP_ERROR) { + break; + } + remaining_size -= ret; offset += ret; } while (remaining_size > 0); From 771b17f509f91986848d7d6a45cfb26663f8e0d1 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 29 Mar 2021 17:57:35 +0000 Subject: [PATCH 2/6] libsnapshot:snapuserd: Add 2MB scratch space in COW file Add 2MB scratch space in the COW file. This is a preparation patch for read-ahead patch. This just add the buffer space right after the header. Bump up the version number in the header in order to distiguish between older and newer COW formats. No operation is done on this buffer with this patch-set. Scratch space option is disabled by default. Bug: 183863613 Test: 1: Create Full OTA with the new COW format. 2: Incremental OTA with older COW format. 3: vts_libsnapshot_test Signed-off-by: Akilesh Kailash Change-Id: I42a535a48ec22adb893dfe6f86a4f51650e1f88a --- fs_mgr/libsnapshot/cow_reader.cpp | 33 ++++++++++++------- fs_mgr/libsnapshot/cow_writer.cpp | 27 ++++++++++++++- .../include/libsnapshot/cow_format.h | 11 ++++++- .../include/libsnapshot/cow_writer.h | 2 ++ .../include/libsnapshot/snapuserd_kernel.h | 3 -- fs_mgr/libsnapshot/snapshot.cpp | 12 ++++++- fs_mgr/libsnapshot/snapshot_reader_test.cpp | 1 + fs_mgr/libsnapshot/snapshot_test.cpp | 3 ++ 8 files changed, 74 insertions(+), 18 deletions(-) diff --git a/fs_mgr/libsnapshot/cow_reader.cpp b/fs_mgr/libsnapshot/cow_reader.cpp index 7199b3829..c2e4f89df 100644 --- a/fs_mgr/libsnapshot/cow_reader.cpp +++ b/fs_mgr/libsnapshot/cow_reader.cpp @@ -94,11 +94,6 @@ bool CowReader::Parse(android::base::borrowed_fd fd, std::optional lab << "Expected: " << kCowMagicNumber; return false; } - if (header_.header_size != sizeof(CowHeader)) { - LOG(ERROR) << "Header size unknown, read " << header_.header_size << ", expected " - << sizeof(CowHeader); - return false; - } if (header_.footer_size != sizeof(CowFooter)) { LOG(ERROR) << "Footer size unknown, read " << header_.footer_size << ", expected " << sizeof(CowFooter); @@ -123,8 +118,7 @@ bool CowReader::Parse(android::base::borrowed_fd fd, std::optional lab return false; } - if ((header_.major_version != kCowVersionMajor) || - (header_.minor_version != kCowVersionMinor)) { + if ((header_.major_version > kCowVersionMajor) || (header_.minor_version != kCowVersionMinor)) { LOG(ERROR) << "Header version mismatch"; LOG(ERROR) << "Major version: " << header_.major_version << "Expected: " << kCowVersionMajor; @@ -137,10 +131,25 @@ bool CowReader::Parse(android::base::borrowed_fd fd, std::optional lab } bool CowReader::ParseOps(std::optional label) { - uint64_t pos = lseek(fd_.get(), sizeof(header_), SEEK_SET); - if (pos != sizeof(header_)) { - PLOG(ERROR) << "lseek ops failed"; - return false; + uint64_t pos; + + // Skip the scratch space + if (header_.major_version >= 2 && (header_.buffer_size > 0)) { + LOG(DEBUG) << " Scratch space found of size: " << header_.buffer_size; + size_t init_offset = header_.header_size + header_.buffer_size; + pos = lseek(fd_.get(), init_offset, SEEK_SET); + if (pos != init_offset) { + PLOG(ERROR) << "lseek ops failed"; + return false; + } + } else { + pos = lseek(fd_.get(), header_.header_size, SEEK_SET); + if (pos != header_.header_size) { + PLOG(ERROR) << "lseek ops failed"; + return false; + } + // Reading a v1 version of COW which doesn't have buffer_size. + header_.buffer_size = 0; } auto ops_buffer = std::make_shared>(); @@ -470,7 +479,7 @@ std::unique_ptr CowReader::GetRevOpIter() { bool CowReader::GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read) { // Validate the offset, taking care to acknowledge possible overflow of offset+len. - if (offset < sizeof(header_) || offset >= fd_size_ - sizeof(CowFooter) || len >= fd_size_ || + if (offset < header_.header_size || offset >= fd_size_ - sizeof(CowFooter) || len >= fd_size_ || offset + len > fd_size_ - sizeof(CowFooter)) { LOG(ERROR) << "invalid data offset: " << offset << ", " << len << " bytes"; return false; diff --git a/fs_mgr/libsnapshot/cow_writer.cpp b/fs_mgr/libsnapshot/cow_writer.cpp index c51336df6..51c00a96a 100644 --- a/fs_mgr/libsnapshot/cow_writer.cpp +++ b/fs_mgr/libsnapshot/cow_writer.cpp @@ -94,6 +94,7 @@ void CowWriter::SetupHeaders() { header_.block_size = options_.block_size; header_.num_merge_ops = 0; header_.cluster_ops = options_.cluster_ops; + header_.buffer_size = 0; footer_ = {}; footer_.op.data_length = 64; footer_.op.type = kCowFooterOp; @@ -166,7 +167,7 @@ bool CowWriter::InitializeAppend(android::base::borrowed_fd fd, uint64_t label) } void CowWriter::InitPos() { - next_op_pos_ = sizeof(header_); + next_op_pos_ = sizeof(header_) + header_.buffer_size; cluster_size_ = header_.cluster_ops * sizeof(CowOperation); if (header_.cluster_ops) { next_data_pos_ = next_op_pos_ + cluster_size_; @@ -190,6 +191,10 @@ bool CowWriter::OpenForWrite() { return false; } + if (options_.scratch_space) { + header_.buffer_size = BUFFER_REGION_DEFAULT_SIZE; + } + // Headers are not complete, but this ensures the file is at the right // position. if (!android::base::WriteFully(fd_, &header_, sizeof(header_))) { @@ -197,7 +202,27 @@ bool CowWriter::OpenForWrite() { return false; } + if (options_.scratch_space) { + // Initialize the scratch space + std::string data(header_.buffer_size, 0); + if (!android::base::WriteFully(fd_, data.data(), header_.buffer_size)) { + PLOG(ERROR) << "writing scratch space failed"; + return false; + } + } + + if (!Sync()) { + LOG(ERROR) << "Header sync failed"; + return false; + } + + if (lseek(fd_.get(), sizeof(header_) + header_.buffer_size, SEEK_SET) < 0) { + PLOG(ERROR) << "lseek failed"; + return false; + } + InitPos(); + return true; } diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h index e93254e5c..060d363ee 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h @@ -21,11 +21,14 @@ namespace android { namespace snapshot { static constexpr uint64_t kCowMagicNumber = 0x436f77634f572121ULL; -static constexpr uint32_t kCowVersionMajor = 1; +static constexpr uint32_t kCowVersionMajor = 2; static constexpr uint32_t kCowVersionMinor = 0; static constexpr uint32_t kCowVersionManifest = 1; +static constexpr uint32_t BLOCK_SZ = 4096; +static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SZ) - 1); + // This header appears as the first sequence of bytes in the COW. All fields // in the layout are little-endian encoded. The on-disk layout is: // @@ -70,6 +73,9 @@ struct CowHeader { // Tracks merge operations completed uint64_t num_merge_ops; + + // Scratch space used during merge + uint32_t buffer_size; } __attribute__((packed)); // This structure is the same size of a normal Operation, but is repurposed for the footer. @@ -151,6 +157,9 @@ struct CowFooter { CowFooterData data; } __attribute__((packed)); +// 2MB Scratch space used for read-ahead +static constexpr uint64_t BUFFER_REGION_DEFAULT_SIZE = (1ULL << 21); + std::ostream& operator<<(std::ostream& os, CowOperation const& arg); int64_t GetNextOpOffset(const CowOperation& op, uint32_t cluster_size); diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h index a9d85bfdd..c764190a3 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h @@ -36,6 +36,8 @@ struct CowOptions { // Number of CowOperations in a cluster. 0 for no clustering. Cannot be 1. uint32_t cluster_ops = 200; + + bool scratch_space = false; }; // Interface for writing to a snapuserd COW. All operations are ordered; merges diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h index 2b6c8ef25..6bb7a394e 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h @@ -47,9 +47,6 @@ typedef sector_t chunk_t; static constexpr uint32_t CHUNK_SIZE = 8; static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1); -static constexpr uint32_t BLOCK_SZ = 4096; -static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SZ) - 1); - #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) // This structure represents the kernel COW header. diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index c50435564..8de622aef 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -2941,7 +2941,13 @@ Return SnapshotManager::InitializeUpdateSnapshots( return Return::Error(); } - CowWriter writer(CowOptions{.compression = it->second.compression_algorithm()}); + CowOptions options; + if (device()->IsTestDevice()) { + options.scratch_space = false; + } + options.compression = it->second.compression_algorithm(); + + CowWriter writer(options); if (!writer.Initialize(fd) || !writer.Finalize()) { LOG(ERROR) << "Could not initialize COW device for " << target_partition->name(); return Return::Error(); @@ -3050,6 +3056,10 @@ std::unique_ptr SnapshotManager::OpenCompressedSnapshotWriter( CowOptions cow_options; cow_options.compression = status.compression_algorithm(); cow_options.max_blocks = {status.device_size() / cow_options.block_size}; + // Disable scratch space for vts tests + if (device()->IsTestDevice()) { + cow_options.scratch_space = false; + } // Currently we don't support partial snapshots, since partition_cow_creator // never creates this scenario. diff --git a/fs_mgr/libsnapshot/snapshot_reader_test.cpp b/fs_mgr/libsnapshot/snapshot_reader_test.cpp index 4202d2221..937305951 100644 --- a/fs_mgr/libsnapshot/snapshot_reader_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_reader_test.cpp @@ -150,6 +150,7 @@ TEST_F(OfflineSnapshotTest, CompressedSnapshot) { CowOptions options; options.compression = "gz"; options.max_blocks = {kBlockCount}; + options.scratch_space = false; unique_fd cow_fd(dup(cow_->fd)); ASSERT_GE(cow_fd, 0); diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 8fae00b75..337d4101d 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include @@ -327,6 +328,7 @@ class SnapshotTest : public ::testing::Test { auto dynamic_partition_metadata = manifest.mutable_dynamic_partition_metadata(); dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled()); + dynamic_partition_metadata->set_cow_version(android::snapshot::kCowVersionMajor); auto group = dynamic_partition_metadata->add_groups(); group->set_name("group"); @@ -841,6 +843,7 @@ class SnapshotUpdateTest : public SnapshotTest { auto dynamic_partition_metadata = manifest_.mutable_dynamic_partition_metadata(); dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled()); + dynamic_partition_metadata->set_cow_version(android::snapshot::kCowVersionMajor); // Create a fake update package metadata. // Not using full name "system", "vendor", "product" because these names collide with the From d967d01f568b0eb285a239003de2b9bbe4ce0439 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Tue, 13 Apr 2021 22:34:14 +0000 Subject: [PATCH 3/6] libsnapshot: Retrieve COW version from update engine manifest update_metadata.proto will have the COW version. Retrieve that from the manifest and compare it with the COW library. If the versioning doesn't match, disable VABC. The primary use case of this is during downgrade tests in pre-submit. Whenever we have a COW format changes, we may have to disable VABC for that specific transition build. At a high level, the flow of version check will be: 1: Create a initial COW version of 1 in manifest (update_metadata.proto) 2: The latest COW version of libsnapshot is 2 3: libsnapshot will return VABC disabled 4: Check-in the CL and changes to manifest 5: Once the CL is baked in and the build is green, bump up the COW version to 2 in the manifest 6: Next set of tests, since both versions match, libsnapshot will enable VABC 7: Downgrade should be done to the build which was checked in at (5) Bug: 183863613 Test: Apply OTA and verify if VABC is disabled if the versions don't match Signed-off-by: Akilesh Kailash Change-Id: Id55f33a90bb31b417e72f4fbe370daf05a68f05a --- .../libsnapshot/include/libsnapshot/cow_writer.h | 2 ++ fs_mgr/libsnapshot/snapshot.cpp | 14 ++++++++++++-- .../update_engine/update_metadata.proto | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h index c764190a3..1192e7d29 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h @@ -106,6 +106,8 @@ class CowWriter : public ICowWriter { uint64_t GetCowSize() override; + uint32_t GetCowVersion() { return header_.major_version; } + protected: virtual bool EmitCopy(uint64_t new_block, uint64_t old_block) override; virtual bool EmitRawBlocks(uint64_t new_block_start, const void* data, size_t size) override; diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index 8de622aef..28dd3abde 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -2673,8 +2673,18 @@ Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manife AutoDeviceList created_devices; const auto& dap_metadata = manifest.dynamic_partition_metadata(); - bool use_compression = - IsCompressionEnabled() && dap_metadata.vabc_enabled() && !device_->IsRecovery(); + CowOptions options; + CowWriter writer(options); + bool cow_format_support = true; + if (dap_metadata.cow_version() < writer.GetCowVersion()) { + cow_format_support = false; + } + + LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version() + << " writer.GetCowVersion(): " << writer.GetCowVersion(); + + bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() && + !device_->IsRecovery() && cow_format_support; std::string compression_algorithm; if (use_compression) { diff --git a/fs_mgr/libsnapshot/update_engine/update_metadata.proto b/fs_mgr/libsnapshot/update_engine/update_metadata.proto index f31ee31c1..69d72e19c 100644 --- a/fs_mgr/libsnapshot/update_engine/update_metadata.proto +++ b/fs_mgr/libsnapshot/update_engine/update_metadata.proto @@ -75,6 +75,7 @@ message DynamicPartitionMetadata { repeated DynamicPartitionGroup groups = 1; optional bool vabc_enabled = 3; optional string vabc_compression_param = 4; + optional uint32 cow_version = 5; } message DeltaArchiveManifest { From 580312bc953a61cf1b7d2d1d082ece759844bfcb Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 29 Mar 2021 22:22:45 +0000 Subject: [PATCH 4/6] libsnapshot:snapuserd: read-ahead COW copy ops Introduce read-ahead mechanism for COW copy ops. 1: Read-ahead thread will read from base device and store the data in scratch space along with the metadata. 2: Worker threads during merge will retrieve the data from read-ahead cache 3: Fixed set of blocks are read during each cycle by the read-ahead thread. 4: When the last block in the region is merged, read-ahead thread makes forward progress. Scratch space is set to 2MB and is only used from COW copy operations. We can extend this to Replace Ops based on performance evaluation. Performance: As mentioned in bug 181883791, Incremental OTA of size 55M with 235K copy operations where every block is moved by 4k: Without read-ahead: 40 Minutes for merge completion With read-ahead: 21 Minutes for merge completion Bug: 183863613 Test: 1: Full OTA - no regression observed. 2: Incremental OTA - with older COW format. Daemon will just skip the read-ahead feature for older COW format. 3: Incremental OTA - with new COW format. 4: Reboot and crash kernel when multiple times when incremental OTA is in-flight. Verify post reboot, read-ahead thread re-constructs the data from scratch space. 5: No regression observed in RSS-Anon memory usage when merge in-flight. Signed-off-by: Akilesh Kailash Change-Id: Ic565bfbee3e9fcfc94af694596dbf44c0877639f --- fs_mgr/libsnapshot/Android.bp | 3 +- fs_mgr/libsnapshot/cow_reader.cpp | 25 +- .../include/libsnapshot/cow_format.h | 19 + .../include/libsnapshot/cow_reader.h | 8 +- fs_mgr/libsnapshot/snapuserd.cpp | 290 +++++++++++- fs_mgr/libsnapshot/snapuserd.h | 118 ++++- fs_mgr/libsnapshot/snapuserd_readahead.cpp | 439 ++++++++++++++++++ fs_mgr/libsnapshot/snapuserd_worker.cpp | 76 ++- 8 files changed, 948 insertions(+), 30 deletions(-) create mode 100644 fs_mgr/libsnapshot/snapuserd_readahead.cpp diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp index 3cb412378..c97dca0ad 100644 --- a/fs_mgr/libsnapshot/Android.bp +++ b/fs_mgr/libsnapshot/Android.bp @@ -420,7 +420,8 @@ cc_defaults { "snapuserd_server.cpp", "snapuserd.cpp", "snapuserd_daemon.cpp", - "snapuserd_worker.cpp", + "snapuserd_worker.cpp", + "snapuserd_readahead.cpp", ], cflags: [ diff --git a/fs_mgr/libsnapshot/cow_reader.cpp b/fs_mgr/libsnapshot/cow_reader.cpp index c2e4f89df..35a02e654 100644 --- a/fs_mgr/libsnapshot/cow_reader.cpp +++ b/fs_mgr/libsnapshot/cow_reader.cpp @@ -369,13 +369,7 @@ void CowReader::InitializeMerge() { // Replace-op-4, Zero-op-9, Replace-op-5 } //============================================================== - for (uint64_t i = 0; i < ops_->size(); i++) { - auto& current_op = ops_->data()[i]; - if (current_op.type != kCowCopyOp) { - break; - } - num_copy_ops += 1; - } + num_copy_ops = FindNumCopyops(); std::sort(ops_.get()->begin() + num_copy_ops, ops_.get()->end(), [](CowOperation& op1, CowOperation& op2) -> bool { @@ -386,6 +380,23 @@ void CowReader::InitializeMerge() { CHECK(ops_->size() >= header_.num_merge_ops); ops_->erase(ops_.get()->begin(), ops_.get()->begin() + header_.num_merge_ops); } + + num_copy_ops = FindNumCopyops(); + set_copy_ops(num_copy_ops); +} + +uint64_t CowReader::FindNumCopyops() { + uint64_t num_copy_ops = 0; + + for (uint64_t i = 0; i < ops_->size(); i++) { + auto& current_op = ops_->data()[i]; + if (current_op.type != kCowCopyOp) { + break; + } + num_copy_ops += 1; + } + + return num_copy_ops; } bool CowReader::GetHeader(CowHeader* header) { diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h index 060d363ee..c05b7efed 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h @@ -35,6 +35,8 @@ static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SZ) - 1); // +-----------------------+ // | Header (fixed) | // +-----------------------+ +// | Scratch space | +// +-----------------------+ // | Operation (variable) | // | Data (variable) | // +-----------------------+ @@ -152,11 +154,28 @@ static constexpr uint8_t kCowCompressNone = 0; static constexpr uint8_t kCowCompressGz = 1; static constexpr uint8_t kCowCompressBrotli = 2; +static constexpr uint8_t kCowReadAheadNotStarted = 0; +static constexpr uint8_t kCowReadAheadInProgress = 1; +static constexpr uint8_t kCowReadAheadDone = 2; + struct CowFooter { CowFooterOperation op; CowFooterData data; } __attribute__((packed)); +struct ScratchMetadata { + // Block of data in the image that operation modifies + // and read-ahead thread stores the modified data + // in the scratch space + uint64_t new_block; + // Offset within the file to read the data + uint64_t file_offset; +} __attribute__((packed)); + +struct BufferState { + uint8_t read_ahead_state; +} __attribute__((packed)); + // 2MB Scratch space used for read-ahead static constexpr uint64_t BUFFER_REGION_DEFAULT_SIZE = (1ULL << 21); diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h index 552fd96d1..9ebcfd983 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h @@ -141,18 +141,21 @@ class CowReader : public ICowReader { bool GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read); - void UpdateMergeProgress(uint64_t merge_ops) { header_.num_merge_ops += merge_ops; } - void InitializeMerge(); void set_total_data_ops(uint64_t size) { total_data_ops_ = size; } uint64_t total_data_ops() { return total_data_ops_; } + void set_copy_ops(uint64_t size) { copy_ops_ = size; } + + uint64_t total_copy_ops() { return copy_ops_; } + void CloseCowFd() { owned_fd_ = {}; } private: bool ParseOps(std::optional label); + uint64_t FindNumCopyops(); android::base::unique_fd owned_fd_; android::base::borrowed_fd fd_; @@ -162,6 +165,7 @@ class CowReader : public ICowReader { std::optional last_label_; std::shared_ptr> ops_; uint64_t total_data_ops_; + uint64_t copy_ops_; }; } // namespace snapshot diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp index d104bd8d3..e6af7776a 100644 --- a/fs_mgr/libsnapshot/snapuserd.cpp +++ b/fs_mgr/libsnapshot/snapuserd.cpp @@ -47,6 +47,9 @@ bool Snapuserd::InitializeWorkers() { worker_threads_.push_back(std::move(wt)); } + + read_ahead_thread_ = std::make_unique(cow_device_, backing_store_device_, + misc_name_, GetSharedPtr()); return true; } @@ -54,7 +57,11 @@ bool Snapuserd::CommitMerge(int num_merge_ops) { struct CowHeader* ch = reinterpret_cast(mapped_addr_); ch->num_merge_ops += num_merge_ops; - // Sync the first 4k block + if (read_ahead_feature_ && read_ahead_ops_.size() > 0) { + struct BufferState* ra_state = GetBufferState(); + ra_state->read_ahead_state = kCowReadAheadInProgress; + } + int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC); if (ret < 0) { PLOG(ERROR) << "msync header failed: " << ret; @@ -66,6 +73,174 @@ bool Snapuserd::CommitMerge(int num_merge_ops) { return true; } +void Snapuserd::PrepareReadAhead() { + if (!read_ahead_feature_) { + return; + } + + struct BufferState* ra_state = GetBufferState(); + // Check if the data has to be re-constructed from COW device + if (ra_state->read_ahead_state == kCowReadAheadDone) { + populate_data_from_cow_ = true; + } else { + populate_data_from_cow_ = false; + } + + StartReadAhead(); +} + +bool Snapuserd::GetRABuffer(std::unique_lock* lock, uint64_t block, void* buffer) { + CHECK(lock->owns_lock()); + std::unordered_map::iterator it = read_ahead_buffer_map_.find(block); + + // This will be true only for IO's generated as part of reading a root + // filesystem. IO's related to merge should always be in read-ahead cache. + if (it == read_ahead_buffer_map_.end()) { + return false; + } + + // Theoretically, we can send the data back from the read-ahead buffer + // all the way to the kernel without memcpy. However, if the IO is + // un-aligned, the wrapper function will need to touch the read-ahead + // buffers and transitions will be bit more complicated. + memcpy(buffer, it->second, BLOCK_SZ); + return true; +} + +// ========== State transition functions for read-ahead operations =========== + +bool Snapuserd::GetReadAheadPopulatedBuffer(uint64_t block, void* buffer) { + if (!read_ahead_feature_) { + return false; + } + + { + std::unique_lock lock(lock_); + if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) { + return false; + } + + if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS) { + return GetRABuffer(&lock, block, buffer); + } + } + + { + // Read-ahead thread IO is in-progress. Wait for it to complete + std::unique_lock lock(lock_); + while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE || + io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS)) { + cv.wait(lock); + } + + return GetRABuffer(&lock, block, buffer); + } +} + +// This is invoked by read-ahead thread waiting for merge IO's +// to complete +bool Snapuserd::WaitForMergeToComplete() { + { + std::unique_lock lock(lock_); + while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN || + io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED)) { + cv.wait(lock); + } + + if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED) { + return false; + } + + io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_IN_PROGRESS; + return true; + } +} + +// This is invoked during the launch of worker threads. We wait +// for read-ahead thread to by fully up before worker threads +// are launched; else we will have a race between worker threads +// and read-ahead thread specifically during re-construction. +bool Snapuserd::WaitForReadAheadToStart() { + { + std::unique_lock lock(lock_); + while (!(io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS || + io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE)) { + cv.wait(lock); + } + + if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) { + return false; + } + + return true; + } +} + +// Invoked by worker threads when a sequence of merge operation +// is complete notifying read-ahead thread to make forward +// progress. +void Snapuserd::StartReadAhead() { + { + std::lock_guard lock(lock_); + io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN; + } + + cv.notify_one(); +} + +void Snapuserd::MergeCompleted() { + { + std::lock_guard lock(lock_); + io_state_ = READ_AHEAD_IO_TRANSITION::IO_TERMINATED; + } + + cv.notify_one(); +} + +bool Snapuserd::ReadAheadIOCompleted() { + // Flush the entire buffer region + int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC); + if (ret < 0) { + PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret; + return false; + } + + // Metadata and data are synced. Now, update the state. + // We need to update the state after flushing data; if there is a crash + // when read-ahead IO is in progress, the state of data in the COW file + // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data + // in the scratch space is good and during next reboot, read-ahead thread + // can safely re-construct the data. + struct BufferState* ra_state = GetBufferState(); + ra_state->read_ahead_state = kCowReadAheadDone; + + ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC); + if (ret < 0) { + PLOG(ERROR) << "msync failed to flush Readahead completion state..."; + return false; + } + + // Notify the worker threads + { + std::lock_guard lock(lock_); + io_state_ = READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS; + } + + cv.notify_all(); + return true; +} + +void Snapuserd::ReadAheadIOFailed() { + { + std::lock_guard lock(lock_); + io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE; + } + + cv.notify_all(); +} + +//========== End of state transition functions ==================== + bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) { uint32_t stride = exceptions_per_area_ + 1; lldiv_t divresult = lldiv(chunk, stride); @@ -257,13 +432,16 @@ bool Snapuserd::ReadMetadata() { data_chunk_id = GetNextAllocatableChunkId(data_chunk_id); } + int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ); std::optional prev_id = {}; std::map map; std::set dest_blocks; size_t pending_copy_ops = exceptions_per_area_ - num_ops; - SNAP_LOG(INFO) << " Processing copy-ops at Area: " << vec_.size() - << " Number of replace/zero ops completed in this area: " << num_ops - << " Pending copy ops for this area: " << pending_copy_ops; + uint64_t total_copy_ops = reader_->total_copy_ops(); + + SNAP_LOG(DEBUG) << " Processing copy-ops at Area: " << vec_.size() + << " Number of replace/zero ops completed in this area: " << num_ops + << " Pending copy ops for this area: " << pending_copy_ops; while (!cowop_riter_->Done()) { do { const CowOperation* cow_op = &cowop_riter_->Get(); @@ -425,6 +603,9 @@ bool Snapuserd::ReadMetadata() { offset += sizeof(struct disk_exception); num_ops += 1; copy_ops++; + if (read_ahead_feature_) { + read_ahead_ops_.push_back(it->second); + } SNAP_LOG(DEBUG) << num_ops << ":" << " Copy-op: " @@ -452,6 +633,15 @@ bool Snapuserd::ReadMetadata() { } data_chunk_id = GetNextAllocatableChunkId(data_chunk_id); + total_copy_ops -= 1; + /* + * Split the number of ops based on the size of read-ahead buffer + * region. We need to ensure that kernel doesn't issue IO on blocks + * which are not read by the read-ahead thread. + */ + if (read_ahead_feature_ && (total_copy_ops % num_ra_ops_per_iter == 0)) { + data_chunk_id = GetNextAllocatableChunkId(data_chunk_id); + } } map.clear(); dest_blocks.clear(); @@ -469,6 +659,7 @@ bool Snapuserd::ReadMetadata() { chunk_vec_.shrink_to_fit(); vec_.shrink_to_fit(); + read_ahead_ops_.shrink_to_fit(); // Sort the vector based on sectors as we need this during un-aligned access std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare); @@ -483,6 +674,8 @@ bool Snapuserd::ReadMetadata() { // Total number of sectors required for creating dm-user device num_sectors_ = ChunkToSector(data_chunk_id); merge_initiated_ = false; + PrepareReadAhead(); + return true; } @@ -490,8 +683,15 @@ bool Snapuserd::MmapMetadata() { CowHeader header; reader_->GetHeader(&header); - // mmap the first 4k page - total_mapped_addr_length_ = BLOCK_SZ; + if (header.major_version >= 2 && header.buffer_size > 0) { + total_mapped_addr_length_ = header.header_size + BUFFER_REGION_DEFAULT_SIZE; + read_ahead_feature_ = true; + } else { + // mmap the first 4k page - older COW format + total_mapped_addr_length_ = BLOCK_SZ; + read_ahead_feature_ = false; + } + mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED, cow_fd_.get(), 0); if (mapped_addr_ == MAP_FAILED) { @@ -529,11 +729,26 @@ bool Snapuserd::InitCowDevice() { } /* - * Entry point to launch worker threads + * Entry point to launch threads */ bool Snapuserd::Start() { std::vector> threads; + std::future ra_thread; + bool rathread = (read_ahead_feature_ && (read_ahead_ops_.size() > 0)); + // Start the read-ahead thread and wait + // for it as the data has to be re-constructed + // from COW device. + if (rathread) { + ra_thread = std::async(std::launch::async, &ReadAheadThread::RunThread, + read_ahead_thread_.get()); + if (!WaitForReadAheadToStart()) { + SNAP_LOG(ERROR) << "Failed to start Read-ahead thread..."; + return false; + } + } + + // Launch worker threads for (int i = 0; i < worker_threads_.size(); i++) { threads.emplace_back( std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get())); @@ -544,8 +759,69 @@ bool Snapuserd::Start() { ret = t.get() && ret; } + if (rathread) { + // Notify the read-ahead thread that all worker threads + // are done. We need this explicit notification when + // there is an IO failure or there was a switch + // of dm-user table; thus, forcing the read-ahead + // thread to wake up. + MergeCompleted(); + ret = ret && ra_thread.get(); + } + return ret; } +uint64_t Snapuserd::GetBufferMetadataOffset() { + CowHeader header; + reader_->GetHeader(&header); + + size_t size = header.header_size + sizeof(BufferState); + return size; +} + +/* + * Metadata for read-ahead is 16 bytes. For a 2 MB region, we will + * end up with 8k (2 PAGE) worth of metadata. Thus, a 2MB buffer + * region is split into: + * + * 1: 8k metadata + * + */ +size_t Snapuserd::GetBufferMetadataSize() { + CowHeader header; + reader_->GetHeader(&header); + + size_t metadata_bytes = (header.buffer_size * sizeof(struct ScratchMetadata)) / BLOCK_SZ; + return metadata_bytes; +} + +size_t Snapuserd::GetBufferDataOffset() { + CowHeader header; + reader_->GetHeader(&header); + + return (header.header_size + GetBufferMetadataSize()); +} + +/* + * (2MB - 8K = 2088960 bytes) will be the buffer region to hold the data. + */ +size_t Snapuserd::GetBufferDataSize() { + CowHeader header; + reader_->GetHeader(&header); + + size_t size = header.buffer_size - GetBufferMetadataSize(); + return size; +} + +struct BufferState* Snapuserd::GetBufferState() { + CowHeader header; + reader_->GetHeader(&header); + + struct BufferState* ra_state = + reinterpret_cast((char*)mapped_addr_ + header.header_size); + return ra_state; +} + } // namespace snapshot } // namespace android diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h index 65af553c4..cd9d82a49 100644 --- a/fs_mgr/libsnapshot/snapuserd.h +++ b/fs_mgr/libsnapshot/snapuserd.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -57,6 +58,35 @@ static_assert(PAYLOAD_SIZE >= BLOCK_SZ); */ static constexpr int NUM_THREADS_PER_PARTITION = 4; +/* + * State transitions between worker threads and read-ahead + * threads. + * + * READ_AHEAD_BEGIN: Worker threads initiates the read-ahead + * thread to begin reading the copy operations + * for each bounded region. + * + * READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight + * and reading the copy operations. + * + * IO_IN_PROGRESS: Merge operation is in-progress by worker threads. + * + * IO_TERMINATED: When all the worker threads are done, request the + * read-ahead thread to terminate + * + * READ_AHEAD_FAILURE: If there are any IO failures when read-ahead + * thread is reading from COW device. + * + * The transition of each states is described in snapuserd_readahead.cpp + */ +enum class READ_AHEAD_IO_TRANSITION { + READ_AHEAD_BEGIN, + READ_AHEAD_IN_PROGRESS, + IO_IN_PROGRESS, + IO_TERMINATED, + READ_AHEAD_FAILURE, +}; + class BufferSink : public IByteSink { public: void Initialize(size_t size); @@ -77,6 +107,42 @@ class BufferSink : public IByteSink { class Snapuserd; +class ReadAheadThread { + public: + ReadAheadThread(const std::string& cow_device, const std::string& backing_device, + const std::string& misc_name, std::shared_ptr snapuserd); + bool RunThread(); + + private: + void InitializeIter(); + bool IterDone(); + void IterNext(); + const CowOperation* GetIterOp(); + void InitializeBuffer(); + + bool InitializeFds(); + void CloseFds() { + cow_fd_ = {}; + backing_store_fd_ = {}; + } + + bool ReadAheadIOStart(); + void PrepareReadAhead(uint64_t* source_block, int* pending_ops, std::vector& blocks); + bool ReconstructDataFromCow(); + + void* read_ahead_buffer_; + void* metadata_buffer_; + std::vector::reverse_iterator read_ahead_iter_; + std::string cow_device_; + std::string backing_store_device_; + std::string misc_name_; + + unique_fd cow_fd_; + unique_fd backing_store_fd_; + + std::shared_ptr snapuserd_; +}; + class WorkerThread { public: WorkerThread(const std::string& cow_device, const std::string& backing_device, @@ -117,12 +183,16 @@ class WorkerThread { bool ProcessCopyOp(const CowOperation* cow_op); bool ProcessZeroOp(); + bool ReadFromBaseDevice(const CowOperation* cow_op); + bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op); + // Merge related functions bool ProcessMergeComplete(chunk_t chunk, void* buffer); loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer, int* unmerged_exceptions); + int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset, - int unmerged_exceptions); + int unmerged_exceptions, bool* copy_op, bool* commit); sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } @@ -159,7 +229,10 @@ class Snapuserd : public std::enable_shared_from_this { bool CommitMerge(int num_merge_ops); void CloseFds() { cow_fd_ = {}; } - void FreeResources() { worker_threads_.clear(); } + void FreeResources() { + worker_threads_.clear(); + read_ahead_thread_ = nullptr; + } size_t GetMetadataAreaSize() { return vec_.size(); } void* GetExceptionBuffer(size_t i) { return vec_[i].get(); } @@ -177,14 +250,44 @@ class Snapuserd : public std::enable_shared_from_this { void UnmapBufferRegion(); bool MmapMetadata(); + // Read-ahead related functions + std::vector& GetReadAheadOpsVec() { return read_ahead_ops_; } + std::unordered_map& GetReadAheadMap() { return read_ahead_buffer_map_; } + void* GetMappedAddr() { return mapped_addr_; } + bool IsReadAheadFeaturePresent() { return read_ahead_feature_; } + void PrepareReadAhead(); + void StartReadAhead(); + void MergeCompleted(); + bool ReadAheadIOCompleted(); + void ReadAheadIOFailed(); + bool WaitForMergeToComplete(); + bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer); + bool ReconstructDataFromCow() { return populate_data_from_cow_; } + void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; } + bool WaitForReadAheadToStart(); + + uint64_t GetBufferMetadataOffset(); + size_t GetBufferMetadataSize(); + size_t GetBufferDataOffset(); + size_t GetBufferDataSize(); + + // Final block to be merged in a given read-ahead buffer region + void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; } + uint64_t GetFinalBlockMerged() { return final_block_merged_; } + // Total number of blocks to be merged in a given read-ahead buffer region + void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; } + int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; } + private: bool IsChunkIdMetadata(chunk_t chunk); chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); + bool GetRABuffer(std::unique_lock* lock, uint64_t block, void* buffer); bool ReadMetadata(); sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); } + struct BufferState* GetBufferState(); std::string cow_device_; std::string backing_store_device_; @@ -209,11 +312,22 @@ class Snapuserd : public std::enable_shared_from_this { std::vector> chunk_vec_; std::mutex lock_; + std::condition_variable cv; void* mapped_addr_; size_t total_mapped_addr_length_; std::vector> worker_threads_; + // Read-ahead related + std::unordered_map read_ahead_buffer_map_; + std::vector read_ahead_ops_; + bool populate_data_from_cow_ = false; + bool read_ahead_feature_; + uint64_t final_block_merged_; + int total_ra_blocks_merged_ = 0; + READ_AHEAD_IO_TRANSITION io_state_; + std::unique_ptr read_ahead_thread_; + bool merge_initiated_ = false; bool attached_ = false; }; diff --git a/fs_mgr/libsnapshot/snapuserd_readahead.cpp b/fs_mgr/libsnapshot/snapuserd_readahead.cpp new file mode 100644 index 000000000..d60a35306 --- /dev/null +++ b/fs_mgr/libsnapshot/snapuserd_readahead.cpp @@ -0,0 +1,439 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "snapuserd.h" + +#include +#include +#include + +#include + +namespace android { +namespace snapshot { + +using namespace android; +using namespace android::dm; +using android::base::unique_fd; + +#define SNAP_LOG(level) LOG(level) << misc_name_ << ": " +#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": " + +/* + * Merging a copy operation involves the following flow: + * + * 1: dm-snapshot layer requests merge for a 4k block. dm-user sends the request + * to the daemon + * 2: daemon reads the source block + * 3: daemon copies the source data + * 4: IO completion sent back to dm-user (a switch from user space to kernel) + * 5: dm-snapshot merges the data to base device + * 6: dm-snapshot sends the merge-completion IO to dm-user + * 7: dm-user re-directs the merge completion IO to daemon (one more switch) + * 8: daemon updates the COW file about the completed merge request (a write syscall) and followed + * by a fysnc. 9: Send the IO completion back to dm-user + * + * The above sequence is a significant overhead especially when merging one 4k + * block at a time. + * + * Read-ahead layer will optimize the above path by reading the data from base + * device in the background so that merging thread can retrieve the data from + * the read-ahead cache. Additionally, syncing of merged data is deferred to + * read-ahead thread threadby the IO path is not bottlenecked. + * + * We create a scratch space of 2MB to store the read-ahead data in the COW + * device. + * + * +-----------------------+ + * | Header (fixed) | + * +-----------------------+ + * | Scratch space | <-- 2MB + * +-----------------------+ + * + * Scratch space is as follows: + * + * +-----------------------+ + * | Metadata | <- 4k page + * +-----------------------+ + * | Metadata | <- 4k page + * +-----------------------+ + * | | + * | Read-ahead data | + * | | + * +-----------------------+ + * + * State transitions and communication between read-ahead thread and worker + * thread during merge: + * ===================================================================== + * + * Worker Threads Read-Ahead thread + * ------------------------------------------------------------------ + * + * | + * | + * --> -----------------READ_AHEAD_BEGIN------------->| + * | | | READ_AHEAD_IN_PROGRESS + * | WAIT | + * | | | + * | |<-----------------IO_IN_PROGRESS--------------- + * | | | + * | | IO_IN_PRGRESS WAIT + * | | | + * |<--| | + * | | + * ------------------IO_TERMINATED--------------->| + * END + * + * + * =================================================================== + * + * Example: + * + * We have 6 copy operations to be executed in OTA and there is a overlap. Update-engine + * will write to COW file as follows: + * + * Op-1: 20 -> 23 + * Op-2: 19 -> 22 + * Op-3: 18 -> 21 + * Op-4: 17 -> 20 + * Op-5: 16 -> 19 + * Op-6: 15 -> 18 + * + * Read-ahead thread will read all the 6 source blocks and store the data in the + * scratch space. Metadata will contain the destination block numbers. Thus, + * scratch space will look something like this: + * + * +--------------+ + * | Block 23 | + * | offset - 1 | + * +--------------+ + * | Block 22 | + * | offset - 2 | + * +--------------+ + * | Block 21 | + * | offset - 3 | + * +--------------+ + * ... + * ... + * +--------------+ + * | Data-Block 20| <-- offset - 1 + * +--------------+ + * | Data-Block 19| <-- offset - 2 + * +--------------+ + * | Data-Block 18| <-- offset - 3 + * +--------------+ + * ... + * ... + * + * ==================================================================== + * IO Path: + * + * Read-ahead will serve the data to worker threads during merge only + * after metadata and data are persisted to the scratch space. Worker + * threads during merge will always retrieve the data from cache; if the + * cache is not populated, it will wait for the read-ahead thread to finish. + * Furthermore, the number of operations merged will by synced to the header + * only when all the blocks in the read-ahead cache are merged. In the above + * case, when all 6 operations are merged, COW Header is updated with + * num_merge_ops = 6. + * + * Merge resume after crash: + * + * Let's say we have a crash after 5 operations are merged. i.e. after + * Op-5: 16->19 is completed but before the Op-6 is merged. Thus, COW Header + * num_merge_ops will be 0 as the all the ops were not merged yet. During next + * reboot, read-ahead thread will re-construct the data in-memory from the + * scratch space; when merge resumes, Op-1 will be re-exectued. However, + * data will be served from read-ahead cache safely even though, block 20 + * was over-written by Op-4. + * + */ + +ReadAheadThread::ReadAheadThread(const std::string& cow_device, const std::string& backing_device, + const std::string& misc_name, + std::shared_ptr snapuserd) { + cow_device_ = cow_device; + backing_store_device_ = backing_device; + misc_name_ = misc_name; + snapuserd_ = snapuserd; +} + +void ReadAheadThread::PrepareReadAhead(uint64_t* source_block, int* pending_ops, + std::vector& blocks) { + int num_ops = *pending_ops; + int nr_consecutive = 0; + + if (!IterDone() && num_ops) { + // Get the first block + const CowOperation* cow_op = GetIterOp(); + *source_block = cow_op->source; + IterNext(); + num_ops -= 1; + nr_consecutive = 1; + blocks.push_back(cow_op->new_block); + + /* + * Find number of consecutive blocks working backwards. + */ + while (!IterDone() && num_ops) { + const CowOperation* op = GetIterOp(); + if (op->source != (*source_block - nr_consecutive)) { + break; + } + nr_consecutive += 1; + num_ops -= 1; + blocks.push_back(op->new_block); + IterNext(); + } + } +} + +bool ReadAheadThread::ReconstructDataFromCow() { + std::unordered_map& read_ahead_buffer_map = snapuserd_->GetReadAheadMap(); + read_ahead_buffer_map.clear(); + loff_t metadata_offset = 0; + loff_t start_data_offset = snapuserd_->GetBufferDataOffset(); + int num_ops = 0; + int total_blocks_merged = 0; + + while (true) { + struct ScratchMetadata* bm = reinterpret_cast( + (char*)metadata_buffer_ + metadata_offset); + + // Done reading metadata + if (bm->new_block == 0 && bm->file_offset == 0) { + break; + } + + loff_t buffer_offset = bm->file_offset - start_data_offset; + void* bufptr = static_cast((char*)read_ahead_buffer_ + buffer_offset); + read_ahead_buffer_map[bm->new_block] = bufptr; + num_ops += 1; + total_blocks_merged += 1; + + metadata_offset += sizeof(struct ScratchMetadata); + } + + // We are done re-constructing the mapping; however, we need to make sure + // all the COW operations to-be merged are present in the re-constructed + // mapping. + while (!IterDone()) { + const CowOperation* op = GetIterOp(); + if (read_ahead_buffer_map.find(op->new_block) != read_ahead_buffer_map.end()) { + num_ops -= 1; + snapuserd_->SetFinalBlockMerged(op->new_block); + IterNext(); + } else { + // Verify that we have covered all the ops which were re-constructed + // from COW device - These are the ops which are being + // re-constructed after crash. + CHECK(num_ops == 0); + break; + } + } + + snapuserd_->SetTotalRaBlocksMerged(total_blocks_merged); + + snapuserd_->ReconstructDataFromCowFinish(); + + if (!snapuserd_->ReadAheadIOCompleted()) { + SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed..."; + snapuserd_->ReadAheadIOFailed(); + return false; + } + + SNAP_LOG(INFO) << "ReconstructDataFromCow success"; + return true; +} + +bool ReadAheadThread::ReadAheadIOStart() { + // Check if the data has to be constructed from the COW file. + // This will be true only once during boot up after a crash + // during merge. + if (snapuserd_->ReconstructDataFromCow()) { + return ReconstructDataFromCow(); + } + + std::unordered_map& read_ahead_buffer_map = snapuserd_->GetReadAheadMap(); + read_ahead_buffer_map.clear(); + + int num_ops = (snapuserd_->GetBufferDataSize()) / BLOCK_SZ; + loff_t metadata_offset = 0; + + struct ScratchMetadata* bm = + reinterpret_cast((char*)metadata_buffer_ + metadata_offset); + + bm->new_block = 0; + bm->file_offset = 0; + + std::vector blocks; + + loff_t buffer_offset = 0; + loff_t offset = 0; + loff_t file_offset = snapuserd_->GetBufferDataOffset(); + int total_blocks_merged = 0; + + while (true) { + uint64_t source_block; + int linear_blocks; + + PrepareReadAhead(&source_block, &num_ops, blocks); + linear_blocks = blocks.size(); + if (linear_blocks == 0) { + // No more blocks to read + SNAP_LOG(DEBUG) << " Read-ahead completed...."; + break; + } + + // Get the first block in the consecutive set of blocks + source_block = source_block + 1 - linear_blocks; + size_t io_size = (linear_blocks * BLOCK_SZ); + num_ops -= linear_blocks; + total_blocks_merged += linear_blocks; + + // Mark the block number as the one which will + // be the final block to be merged in this entire region. + // Read-ahead thread will get + // notified when this block is merged to make + // forward progress + snapuserd_->SetFinalBlockMerged(blocks.back()); + + while (linear_blocks) { + uint64_t new_block = blocks.back(); + blocks.pop_back(); + // Assign the mapping + void* bufptr = static_cast((char*)read_ahead_buffer_ + offset); + read_ahead_buffer_map[new_block] = bufptr; + offset += BLOCK_SZ; + + bm = reinterpret_cast((char*)metadata_buffer_ + + metadata_offset); + bm->new_block = new_block; + bm->file_offset = file_offset; + + metadata_offset += sizeof(struct ScratchMetadata); + file_offset += BLOCK_SZ; + + linear_blocks -= 1; + } + + // Read from the base device consecutive set of blocks in one shot + if (!android::base::ReadFullyAtOffset(backing_store_fd_, + (char*)read_ahead_buffer_ + buffer_offset, io_size, + source_block * BLOCK_SZ)) { + SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_ + << "at block :" << source_block << " buffer_offset : " << buffer_offset + << " io_size : " << io_size << " buf-addr : " << read_ahead_buffer_; + + snapuserd_->ReadAheadIOFailed(); + return false; + } + + // This is important - explicitly set the contents to zero. This is used + // when re-constructing the data after crash. This indicates end of + // reading metadata contents when re-constructing the data + bm = reinterpret_cast((char*)metadata_buffer_ + metadata_offset); + bm->new_block = 0; + bm->file_offset = 0; + + buffer_offset += io_size; + CHECK(offset == buffer_offset); + CHECK((file_offset - snapuserd_->GetBufferDataOffset()) == offset); + } + + snapuserd_->SetTotalRaBlocksMerged(total_blocks_merged); + + if (!snapuserd_->ReadAheadIOCompleted()) { + SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed..."; + snapuserd_->ReadAheadIOFailed(); + return false; + } + + return true; +} + +bool ReadAheadThread::RunThread() { + if (!InitializeFds()) { + return false; + } + + InitializeIter(); + InitializeBuffer(); + + while (!IterDone()) { + if (!ReadAheadIOStart()) { + return false; + } + + bool status = snapuserd_->WaitForMergeToComplete(); + + if (status && !snapuserd_->CommitMerge(snapuserd_->GetTotalRaBlocksMerged())) { + return false; + } + + if (!status) break; + } + + CloseFds(); + SNAP_LOG(INFO) << " ReadAhead thread terminating...."; + return true; +} + +// Initialization +bool ReadAheadThread::InitializeFds() { + backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY)); + if (backing_store_fd_ < 0) { + SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_; + return false; + } + + cow_fd_.reset(open(cow_device_.c_str(), O_RDWR)); + if (cow_fd_ < 0) { + SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_; + return false; + } + + return true; +} + +void ReadAheadThread::InitializeIter() { + std::vector& read_ahead_ops = snapuserd_->GetReadAheadOpsVec(); + read_ahead_iter_ = read_ahead_ops.rbegin(); +} + +bool ReadAheadThread::IterDone() { + std::vector& read_ahead_ops = snapuserd_->GetReadAheadOpsVec(); + return read_ahead_iter_ == read_ahead_ops.rend(); +} + +void ReadAheadThread::IterNext() { + read_ahead_iter_++; +} + +const CowOperation* ReadAheadThread::GetIterOp() { + return *read_ahead_iter_; +} + +void ReadAheadThread::InitializeBuffer() { + void* mapped_addr = snapuserd_->GetMappedAddr(); + // Map the scratch space region into memory + metadata_buffer_ = + static_cast((char*)mapped_addr + snapuserd_->GetBufferMetadataOffset()); + read_ahead_buffer_ = static_cast((char*)mapped_addr + snapuserd_->GetBufferDataOffset()); +} + +} // namespace snapshot +} // namespace android diff --git a/fs_mgr/libsnapshot/snapuserd_worker.cpp b/fs_mgr/libsnapshot/snapuserd_worker.cpp index 4b2c5a083..9f42ab8ec 100644 --- a/fs_mgr/libsnapshot/snapuserd_worker.cpp +++ b/fs_mgr/libsnapshot/snapuserd_worker.cpp @@ -135,14 +135,11 @@ bool WorkerThread::ProcessReplaceOp(const CowOperation* cow_op) { return true; } -// Start the copy operation. This will read the backing -// block device which is represented by cow_op->source. -bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) { +bool WorkerThread::ReadFromBaseDevice(const CowOperation* cow_op) { void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ); CHECK(buffer != nullptr); - - // Issue a single 4K IO. However, this can be optimized - // if the successive blocks are contiguous. + SNAP_LOG(DEBUG) << " ReadFromBaseDevice...: new-block: " << cow_op->new_block + << " Source: " << cow_op->source; if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ, cow_op->source * BLOCK_SZ)) { SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_ @@ -153,6 +150,31 @@ bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) { return true; } +bool WorkerThread::GetReadAheadPopulatedBuffer(const CowOperation* cow_op) { + void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ); + CHECK(buffer != nullptr); + + if (!snapuserd_->GetReadAheadPopulatedBuffer(cow_op->new_block, buffer)) { + return false; + } + + return true; +} + +// Start the copy operation. This will read the backing +// block device which is represented by cow_op->source. +bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) { + if (!GetReadAheadPopulatedBuffer(cow_op)) { + SNAP_LOG(DEBUG) << " GetReadAheadPopulatedBuffer failed..." + << " new_block: " << cow_op->new_block; + if (!ReadFromBaseDevice(cow_op)) { + return false; + } + } + + return true; +} + bool WorkerThread::ProcessZeroOp() { // Zero out the entire block void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ); @@ -386,8 +408,10 @@ loff_t WorkerThread::GetMergeStartOffset(void* merged_buffer, void* unmerged_buf } int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset, - int unmerged_exceptions) { + int unmerged_exceptions, bool* copy_op, bool* commit) { int merged_ops_cur_iter = 0; + std::unordered_map& read_ahead_buffer_map = snapuserd_->GetReadAheadMap(); + *copy_op = false; std::vector>& chunk_vec = snapuserd_->GetChunkVec(); // Find the operations which are merged in this cycle. @@ -411,6 +435,23 @@ int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffe const CowOperation* cow_op = it->second; CHECK(cow_op != nullptr); + if (snapuserd_->IsReadAheadFeaturePresent() && cow_op->type == kCowCopyOp) { + *copy_op = true; + // Every single copy operation has to come from read-ahead + // cache. + if (read_ahead_buffer_map.find(cow_op->new_block) == read_ahead_buffer_map.end()) { + SNAP_LOG(ERROR) + << " Block: " << cow_op->new_block << " not found in read-ahead cache" + << " Source: " << cow_op->source; + return -1; + } + // If this is a final block merged in the read-ahead buffer + // region, notify the read-ahead thread to make forward + // progress + if (cow_op->new_block == snapuserd_->GetFinalBlockMerged()) { + *commit = true; + } + } CHECK(cow_op->new_block == cow_de->old_chunk); // zero out to indicate that operation is merged. @@ -442,6 +483,8 @@ int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffe bool WorkerThread::ProcessMergeComplete(chunk_t chunk, void* buffer) { uint32_t stride = exceptions_per_area_ + 1; const std::vector>& vec = snapuserd_->GetMetadataVec(); + bool copy_op = false; + bool commit = false; // ChunkID to vector index lldiv_t divresult = lldiv(chunk, stride); @@ -452,13 +495,24 @@ bool WorkerThread::ProcessMergeComplete(chunk_t chunk, void* buffer) { int unmerged_exceptions = 0; loff_t offset = GetMergeStartOffset(buffer, vec[divresult.quot].get(), &unmerged_exceptions); - int merged_ops_cur_iter = - GetNumberOfMergedOps(buffer, vec[divresult.quot].get(), offset, unmerged_exceptions); + int merged_ops_cur_iter = GetNumberOfMergedOps(buffer, vec[divresult.quot].get(), offset, + unmerged_exceptions, ©_op, &commit); // There should be at least one operation merged in this cycle CHECK(merged_ops_cur_iter > 0); - if (!snapuserd_->CommitMerge(merged_ops_cur_iter)) { - return false; + + if (copy_op) { + if (commit) { + // Push the flushing logic to read-ahead thread so that merge thread + // can make forward progress. Sync will happen in the background + snapuserd_->StartReadAhead(); + } + } else { + // Non-copy ops and all ops in older COW format + if (!snapuserd_->CommitMerge(merged_ops_cur_iter)) { + SNAP_LOG(ERROR) << "CommitMerge failed..."; + return false; + } } SNAP_LOG(DEBUG) << "Merge success: " << merged_ops_cur_iter << "chunk: " << chunk; From 150bcbf7c6aa4d47cfef9525dd4321801f8a3786 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Sun, 18 Apr 2021 18:24:54 +0000 Subject: [PATCH 5/6] libsnapshot: Flush data to scratch space only for overlapping regions When read-ahead thread caches the data from base device, flush the data only if there are overlapping regions. If there is crash, subsequent reboot will not recover the data from scratch space. Rather, data will be re-constructed from base device. Additionally, allow batch merge of blocks by the kernel even for overlapping region given that we have the read-ahead thread taking care of the overlapping blocks. Bug: 183863613 Test: 1: Incremental OTA from build 7284758 to 7288239. Merge time reduces from ~6 minutes to ~2.5 minutes 2: Reboot and crash kernel multiple times when merge was in progress 3: Verify read-ahead thread re-constructs the data for overlapping region. Signed-off-by: Akilesh Kailash Change-Id: I50e0d828f4fb36a23f0ca13b07a73229ba68874d --- fs_mgr/libsnapshot/snapuserd.cpp | 87 ++++++++-------------- fs_mgr/libsnapshot/snapuserd.h | 8 +- fs_mgr/libsnapshot/snapuserd_readahead.cpp | 25 ++++++- 3 files changed, 62 insertions(+), 58 deletions(-) diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp index e6af7776a..2ccc75002 100644 --- a/fs_mgr/libsnapshot/snapuserd.cpp +++ b/fs_mgr/libsnapshot/snapuserd.cpp @@ -197,27 +197,29 @@ void Snapuserd::MergeCompleted() { cv.notify_one(); } -bool Snapuserd::ReadAheadIOCompleted() { - // Flush the entire buffer region - int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC); - if (ret < 0) { - PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret; - return false; - } +bool Snapuserd::ReadAheadIOCompleted(bool sync) { + if (sync) { + // Flush the entire buffer region + int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC); + if (ret < 0) { + PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret; + return false; + } - // Metadata and data are synced. Now, update the state. - // We need to update the state after flushing data; if there is a crash - // when read-ahead IO is in progress, the state of data in the COW file - // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data - // in the scratch space is good and during next reboot, read-ahead thread - // can safely re-construct the data. - struct BufferState* ra_state = GetBufferState(); - ra_state->read_ahead_state = kCowReadAheadDone; + // Metadata and data are synced. Now, update the state. + // We need to update the state after flushing data; if there is a crash + // when read-ahead IO is in progress, the state of data in the COW file + // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data + // in the scratch space is good and during next reboot, read-ahead thread + // can safely re-construct the data. + struct BufferState* ra_state = GetBufferState(); + ra_state->read_ahead_state = kCowReadAheadDone; - ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC); - if (ret < 0) { - PLOG(ERROR) << "msync failed to flush Readahead completion state..."; - return false; + ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC); + if (ret < 0) { + PLOG(ERROR) << "msync failed to flush Readahead completion state..."; + return false; + } } // Notify the worker threads @@ -435,7 +437,6 @@ bool Snapuserd::ReadMetadata() { int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ); std::optional prev_id = {}; std::map map; - std::set dest_blocks; size_t pending_copy_ops = exceptions_per_area_ - num_ops; uint64_t total_copy_ops = reader_->total_copy_ops(); @@ -477,41 +478,20 @@ bool Snapuserd::ReadMetadata() { // Op-6: 15 -> 18 // // Note that the blocks numbers are contiguous. Hence, all 6 copy - // operations can potentially be batch merged. However, that will be + // operations can be batch merged. However, that will be // problematic if we have a crash as block 20, 19, 18 would have // been overwritten and hence subsequent recovery may end up with // a silent data corruption when op-1, op-2 and op-3 are // re-executed. // - // We will split these 6 operations into two batches viz: - // - // Batch-1: - // =================== - // Op-1: 20 -> 23 - // Op-2: 19 -> 22 - // Op-3: 18 -> 21 - // =================== - // - // Batch-2: - // ================== - // Op-4: 17 -> 20 - // Op-5: 16 -> 19 - // Op-6: 15 -> 18 - // ================== - // - // Now, merge sequence will look like: - // - // 1: Merge Batch-1 { op-1, op-2, op-3 } - // 2: Update Metadata in COW File that op-1, op-2, op-3 merge is - // done. - // 3: Merge Batch-2 - // 4: Update Metadata in COW File that op-4, op-5, op-6 merge is - // done. - // - // Note, that the order of block operations are still the same. - // However, we have two batch merge operations. Any crash between - // either of this sequence should be safe as each of these - // batches are self-contained. + // To address the above problem, read-ahead thread will + // read all the 6 source blocks, cache them in the scratch + // space of the COW file. During merge, read-ahead + // thread will serve the blocks from the read-ahead cache. + // If there is a crash during merge; on subsequent reboot, + // read-ahead thread will recover the data from the + // scratch space and re-construct it thereby there + // is no loss of data. // //=========================================================== // @@ -575,14 +555,10 @@ bool Snapuserd::ReadMetadata() { if (diff != 1) { break; } - if (dest_blocks.count(cow_op->new_block) || map.count(cow_op->source) > 0) { - break; - } } metadata_found = true; pending_copy_ops -= 1; map[cow_op->new_block] = cow_op; - dest_blocks.insert(cow_op->source); prev_id = cow_op->new_block; cowop_riter_->Next(); } while (!cowop_riter_->Done() && pending_copy_ops); @@ -644,7 +620,6 @@ bool Snapuserd::ReadMetadata() { } } map.clear(); - dest_blocks.clear(); prev_id.reset(); } @@ -746,6 +721,8 @@ bool Snapuserd::Start() { SNAP_LOG(ERROR) << "Failed to start Read-ahead thread..."; return false; } + + SNAP_LOG(INFO) << "Read-ahead thread started..."; } // Launch worker threads diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h index cd9d82a49..0a5ab5035 100644 --- a/fs_mgr/libsnapshot/snapuserd.h +++ b/fs_mgr/libsnapshot/snapuserd.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,7 @@ class ReadAheadThread { bool ReadAheadIOStart(); void PrepareReadAhead(uint64_t* source_block, int* pending_ops, std::vector& blocks); bool ReconstructDataFromCow(); + void CheckOverlap(const CowOperation* cow_op); void* read_ahead_buffer_; void* metadata_buffer_; @@ -141,6 +143,10 @@ class ReadAheadThread { unique_fd backing_store_fd_; std::shared_ptr snapuserd_; + + std::unordered_set dest_blocks_; + std::unordered_set source_blocks_; + bool overlap_; }; class WorkerThread { @@ -258,7 +264,7 @@ class Snapuserd : public std::enable_shared_from_this { void PrepareReadAhead(); void StartReadAhead(); void MergeCompleted(); - bool ReadAheadIOCompleted(); + bool ReadAheadIOCompleted(bool sync); void ReadAheadIOFailed(); bool WaitForMergeToComplete(); bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer); diff --git a/fs_mgr/libsnapshot/snapuserd_readahead.cpp b/fs_mgr/libsnapshot/snapuserd_readahead.cpp index d60a35306..09ee2f28b 100644 --- a/fs_mgr/libsnapshot/snapuserd_readahead.cpp +++ b/fs_mgr/libsnapshot/snapuserd_readahead.cpp @@ -171,6 +171,15 @@ ReadAheadThread::ReadAheadThread(const std::string& cow_device, const std::strin snapuserd_ = snapuserd; } +void ReadAheadThread::CheckOverlap(const CowOperation* cow_op) { + if (dest_blocks_.count(cow_op->new_block) || source_blocks_.count(cow_op->source)) { + overlap_ = true; + } + + dest_blocks_.insert(cow_op->source); + source_blocks_.insert(cow_op->new_block); +} + void ReadAheadThread::PrepareReadAhead(uint64_t* source_block, int* pending_ops, std::vector& blocks) { int num_ops = *pending_ops; @@ -185,6 +194,10 @@ void ReadAheadThread::PrepareReadAhead(uint64_t* source_block, int* pending_ops, nr_consecutive = 1; blocks.push_back(cow_op->new_block); + if (!overlap_) { + CheckOverlap(cow_op); + } + /* * Find number of consecutive blocks working backwards. */ @@ -197,6 +210,10 @@ void ReadAheadThread::PrepareReadAhead(uint64_t* source_block, int* pending_ops, num_ops -= 1; blocks.push_back(op->new_block); IterNext(); + + if (!overlap_) { + CheckOverlap(op); + } } } } @@ -249,7 +266,7 @@ bool ReadAheadThread::ReconstructDataFromCow() { snapuserd_->ReconstructDataFromCowFinish(); - if (!snapuserd_->ReadAheadIOCompleted()) { + if (!snapuserd_->ReadAheadIOCompleted(true)) { SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed..."; snapuserd_->ReadAheadIOFailed(); return false; @@ -285,6 +302,9 @@ bool ReadAheadThread::ReadAheadIOStart() { loff_t offset = 0; loff_t file_offset = snapuserd_->GetBufferDataOffset(); int total_blocks_merged = 0; + overlap_ = false; + dest_blocks_.clear(); + source_blocks_.clear(); while (true) { uint64_t source_block; @@ -356,7 +376,8 @@ bool ReadAheadThread::ReadAheadIOStart() { snapuserd_->SetTotalRaBlocksMerged(total_blocks_merged); - if (!snapuserd_->ReadAheadIOCompleted()) { + // Flush the data only if we have a overlapping blocks in the region + if (!snapuserd_->ReadAheadIOCompleted(overlap_)) { SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed..."; snapuserd_->ReadAheadIOFailed(); return false; From 936e9ce79db1a751788da910151487869b07fcaf Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 29 Mar 2021 22:58:32 +0000 Subject: [PATCH 6/6] libsnapshot:snapuserd:Add unit test for read-ahead code path. Add overlapping copy ops to test the read-ahead logic. Bug: 183863613 Test: cow_snapuserd_test Signed-off-by: Akilesh Kailash Change-Id: Ie96bc644c5f2eaae45cf048d9ba8a206930c3ce8 --- fs_mgr/libsnapshot/cow_snapuserd_test.cpp | 86 ++++++++++++++++++++++- 1 file changed, 83 insertions(+), 3 deletions(-) diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp index 045d9db5f..313eb645a 100644 --- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp +++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp @@ -96,6 +96,7 @@ class TempDevice { class CowSnapuserdTest final { public: bool Setup(); + bool SetupCopyOverlap(); bool Merge(); void ValidateMerge(); void ReadSnapshotDeviceAndValidate(); @@ -114,6 +115,7 @@ class CowSnapuserdTest final { void StartMerge(); void CreateCowDevice(); + void CreateCowDeviceWithCopyOverlap(); void CreateBaseDevice(); void InitCowDevice(); void SetDeviceControlName(); @@ -191,6 +193,24 @@ bool CowSnapuserdTest::Setup() { return setup_ok_; } +bool CowSnapuserdTest::SetupCopyOverlap() { + CreateBaseDevice(); + CreateCowDeviceWithCopyOverlap(); + + SetDeviceControlName(); + + StartSnapuserdDaemon(); + InitCowDevice(); + + CreateDmUserDevice(); + InitDaemon(); + + CreateSnapshotDevice(); + setup_ok_ = true; + + return setup_ok_; +} + void CowSnapuserdTest::StartSnapuserdDaemon() { pid_t pid = fork(); ASSERT_GE(pid, 0); @@ -255,6 +275,49 @@ void CowSnapuserdTest::ReadSnapshotDeviceAndValidate() { ASSERT_EQ(memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + (size_ * 3), size_), 0); } +void CowSnapuserdTest::CreateCowDeviceWithCopyOverlap() { + std::string path = android::base::GetExecutableDirectory(); + cow_system_ = std::make_unique(path); + + CowOptions options; + options.compression = "gz"; + CowWriter writer(options); + + ASSERT_TRUE(writer.Initialize(cow_system_->fd)); + + size_t num_blocks = size_ / options.block_size; + size_t x = num_blocks; + size_t blk_src_copy = num_blocks - 1; + + // Create overlapping copy operations + while (1) { + ASSERT_TRUE(writer.AddCopy(blk_src_copy + 1, blk_src_copy)); + x -= 1; + if (x == 0) { + ASSERT_EQ(blk_src_copy, 0); + break; + } + blk_src_copy -= 1; + } + + // Flush operations + ASSERT_TRUE(writer.Finalize()); + + // Construct the buffer required for validation + orig_buffer_ = std::make_unique(total_base_size_); + + // Read the entire base device + ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0), + true); + + // Merged operations + ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), options.block_size, 0), + true); + ASSERT_EQ(android::base::ReadFullyAtOffset( + base_fd_, (char*)orig_buffer_.get() + options.block_size, size_, 0), + true); +} + void CowSnapuserdTest::CreateCowDevice() { unique_fd rnd_fd; loff_t offset = 0; @@ -707,17 +770,17 @@ void CowSnapuserdMetadataTest::ValidateMetadata() { de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 21); - ASSERT_EQ(de->new_chunk, 537); + ASSERT_EQ(de->new_chunk, 536); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 22); - ASSERT_EQ(de->new_chunk, 538); + ASSERT_EQ(de->new_chunk, 537); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 23); - ASSERT_EQ(de->new_chunk, 539); + ASSERT_EQ(de->new_chunk, 538); offset += sizeof(struct disk_exception); // End of metadata @@ -757,6 +820,23 @@ TEST(Snapuserd_Test, Snapshot_IO_TEST) { harness.ValidateMerge(); harness.Shutdown(); } + +TEST(Snapuserd_Test, Snapshot_COPY_Overlap_TEST) { + CowSnapuserdTest harness; + ASSERT_TRUE(harness.SetupCopyOverlap()); + ASSERT_TRUE(harness.Merge()); + harness.ValidateMerge(); + harness.Shutdown(); +} + +TEST(Snapuserd_Test, Snapshot_COPY_Overlap_Merge_Resume_TEST) { + CowSnapuserdTest harness; + ASSERT_TRUE(harness.SetupCopyOverlap()); + harness.MergeInterrupt(); + harness.ValidateMerge(); + harness.Shutdown(); +} + } // namespace snapshot } // namespace android