From 0d84909d667bf3ebd49db2ba87af759d1f326aba Mon Sep 17 00:00:00 2001 From: Sandeep Dhavale Date: Fri, 27 Sep 2024 13:37:18 -0700 Subject: [PATCH] libsnapshot: Use words for xor ops Use words instead of doing xor byte-by-byte for the entire buffer. Profiling this with unittest, I can see the xor ops is much faster. Also handle the word size appropriately for 32bit and 64bit platforms. simpleperf shows that ProcessXorOp() is atleast 30% faster. Similar improvement is seen for ProcessXorData(). Test: snapuserd_test Bug: 369905394 Change-Id: I0bd8586f7fc1bf184f19320667b8195b07f9cdf2 Signed-off-by: Sandeep Dhavale --- .../snapuserd/user-space-merge/read_worker.cpp | 11 ++++++++--- .../user-space-merge/snapuserd_readahead.cpp | 12 +++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp index ef311d475..33767d654 100644 --- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp +++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp @@ -104,6 +104,8 @@ bool ReadWorker::ProcessCopyOp(const CowOperation* cow_op, void* buffer) { } bool ReadWorker::ProcessXorOp(const CowOperation* cow_op, void* buffer) { + using WordType = std::conditional_t; + if (!ReadFromSourceDevice(cow_op, buffer)) { return false; } @@ -120,9 +122,12 @@ bool ReadWorker::ProcessXorOp(const CowOperation* cow_op, void* buffer) { return false; } - auto xor_out = reinterpret_cast(buffer); - for (size_t i = 0; i < BLOCK_SZ; i++) { - xor_out[i] ^= xor_buffer_[i]; + auto xor_in = reinterpret_cast(xor_buffer_.data()); + auto xor_out = reinterpret_cast(buffer); + auto num_words = BLOCK_SZ / sizeof(WordType); + + for (auto i = 0; i < num_words; i++) { + xor_out[i] ^= xor_in[i]; } return true; } diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp index 6b1ed0cd7..9a1d441c4 100644 --- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp +++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp @@ -458,6 +458,7 @@ bool ReadAhead::ReapIoCompletions(int pending_ios_to_complete) { void ReadAhead::ProcessXorData(size_t& block_xor_index, size_t& xor_index, std::vector& xor_op_vec, void* buffer, loff_t& buffer_offset) { + using WordType = std::conditional_t; loff_t xor_buf_offset = 0; while (block_xor_index < blocks_.size()) { @@ -470,13 +471,14 @@ void ReadAhead::ProcessXorData(size_t& block_xor_index, size_t& xor_index, // Check if this block is an XOR op if (xor_op->new_block == new_block) { // Pointer to the data read from base device - uint8_t* buffer = reinterpret_cast(bufptr); + auto buffer_words = reinterpret_cast(bufptr); // Get the xor'ed data read from COW device - uint8_t* xor_data = reinterpret_cast((char*)bufsink_.GetPayloadBufPtr() + - xor_buf_offset); + auto xor_data_words = reinterpret_cast( + (char*)bufsink_.GetPayloadBufPtr() + xor_buf_offset); + auto num_words = BLOCK_SZ / sizeof(WordType); - for (size_t byte_offset = 0; byte_offset < BLOCK_SZ; byte_offset++) { - buffer[byte_offset] ^= xor_data[byte_offset]; + for (auto i = 0; i < num_words; i++) { + buffer_words[i] ^= xor_data_words[i]; } // Move to next XOR op