diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp index ed67a1c84..4cc0fd395 100644 --- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp +++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp @@ -239,16 +239,12 @@ void CowSnapuserdTest::CreateCowDevice() { ASSERT_TRUE(rnd_fd > 0); std::unique_ptr random_buffer_1_ = std::make_unique(size_); - std::unique_ptr random_buffer_2_ = std::make_unique(size_); // Fill random data for (size_t j = 0; j < (size_ / 1_MiB); j++) { ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0), true); - ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_2_.get() + offset, 1_MiB, 0), - true); - offset += 1_MiB; } @@ -280,7 +276,7 @@ void CowSnapuserdTest::CreateCowDevice() { size_t blk_random2_replace_start = blk_zero_copy_end; - ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_2_.get(), size_)); + ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_1_.get(), size_)); // Flush operations ASSERT_TRUE(writer.Finalize()); @@ -292,7 +288,7 @@ void CowSnapuserdTest::CreateCowDevice() { ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), size_, size_), true); memcpy((char*)orig_buffer_.get() + size_, random_buffer_1_.get(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 2), (void*)zero_buffer.c_str(), size_); - memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_2_.get(), size_); + memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_1_.get(), size_); } void CowSnapuserdTest::InitCowDevice() { diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h index e8dbe6e95..7941e6864 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h @@ -50,6 +50,8 @@ static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1); static constexpr uint32_t BLOCK_SIZE = 4096; static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SIZE) - 1); +#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) + // This structure represents the kernel COW header. // All the below fields should be in Little Endian format. struct disk_header { diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp index 4f9480684..573b6a5e6 100644 --- a/fs_mgr/libsnapshot/snapuserd.cpp +++ b/fs_mgr/libsnapshot/snapuserd.cpp @@ -129,88 +129,126 @@ bool Snapuserd::ProcessZeroOp() { return true; } -/* - * Read the data of size bytes from a given chunk. - * - * Kernel can potentially merge the blocks if the - * successive chunks are contiguous. For chunk size of 8, - * there can be 256 disk exceptions; and if - * all 256 disk exceptions are contiguous, kernel can merge - * them into a single IO. - * - * Since each chunk in the disk exception - * mapping represents a 4k block, kernel can potentially - * issue 256*4k = 1M IO in one shot. - * - * Even though kernel assumes that the blocks are - * contiguous, we need to split the 1M IO into 4k chunks - * as each operation represents 4k and it can either be: - * - * 1: Replace operation - * 2: Copy operation - * 3: Zero operation - * - */ -bool Snapuserd::ReadData(chunk_t chunk, size_t size) { - size_t read_size = size; - bool ret = true; - chunk_t chunk_key = chunk; +bool Snapuserd::ProcessCowOp(const CowOperation* cow_op) { + CHECK(cow_op != nullptr); - if (!((read_size & (BLOCK_SIZE - 1)) == 0)) { - SNAP_LOG(ERROR) << "ReadData - unaligned read_size: " << read_size; - return false; + switch (cow_op->type) { + case kCowReplaceOp: { + return ProcessReplaceOp(cow_op); + } + + case kCowZeroOp: { + return ProcessZeroOp(); + } + + case kCowCopyOp: { + return ProcessCopyOp(cow_op); + } + + default: { + SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type; + } + } + return false; +} + +int Snapuserd::ReadUnalignedSector(sector_t sector, size_t size, + std::map::iterator& it) { + size_t skip_sector_size = 0; + + SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size + << " Aligned sector: " << it->second; + + if (!ProcessCowOp(it->second)) { + SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed"; + return -1; } - while (read_size > 0) { - const CowOperation* cow_op = chunk_map_[chunk_key]; - CHECK(cow_op != nullptr); + int num_sectors_skip = sector - it->first; - switch (cow_op->type) { - case kCowReplaceOp: { - ret = ProcessReplaceOp(cow_op); - break; - } + if (num_sectors_skip > 0) { + skip_sector_size = num_sectors_skip << SECTOR_SHIFT; + char* buffer = reinterpret_cast(bufsink_.GetBufPtr()); + struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0])); - case kCowZeroOp: { - ret = ProcessZeroOp(); - break; - } + memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size, + (BLOCK_SIZE - skip_sector_size)); + } - case kCowCopyOp: { - ret = ProcessCopyOp(cow_op); - break; - } + bufsink_.ResetBufferOffset(); + return std::min(size, (BLOCK_SIZE - skip_sector_size)); +} - default: { - SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type; - ret = false; - break; - } +/* + * Read the data for a given COW Operation. + * + * Kernel can issue IO at a sector granularity. + * Hence, an IO may end up with reading partial + * data from a COW operation or we may also + * end up with interspersed request between + * two COW operations. + * + */ +int Snapuserd::ReadData(sector_t sector, size_t size) { + /* + * chunk_map stores COW operation at 4k granularity. + * If the requested IO with the sector falls on the 4k + * boundary, then we can read the COW op directly without + * any issue. + * + * However, if the requested sector is not 4K aligned, + * then we will have the find the nearest COW operation + * and chop the 4K block to fetch the requested sector. + */ + std::map::iterator it = chunk_map_.find(sector); + if (it == chunk_map_.end()) { + it = chunk_map_.lower_bound(sector); + if (it != chunk_map_.begin()) { + --it; } - if (!ret) { - SNAP_LOG(ERROR) << "ReadData failed for operation: " << cow_op->type; - return false; - } + /* + * If the IO is spanned between two COW operations, + * split the IO into two parts: + * + * 1: Read the first part from the single COW op + * 2: Read the second part from the next COW op. + * + * Ex: Let's say we have a 1024 Bytes IO request. + * + * 0 COW OP-1 4096 COW OP-2 8192 + * |******************|*******************| + * |*****|*****| + * 3584 4608 + * <- 1024B - > + * + * We have two COW operations which are 4k blocks. + * The IO is requested for 1024 Bytes which are spanned + * between two COW operations. We will split this IO + * into two parts: + * + * 1: IO of size 512B from offset 3584 bytes (COW OP-1) + * 2: IO of size 512B from offset 4096 bytes (COW OP-2) + */ + return ReadUnalignedSector(sector, size, it); + } + int num_ops = DIV_ROUND_UP(size, BLOCK_SIZE); + while (num_ops) { + if (!ProcessCowOp(it->second)) { + return -1; + } + num_ops -= 1; + it++; // Update the buffer offset bufsink_.UpdateBufferOffset(BLOCK_SIZE); - read_size -= BLOCK_SIZE; - - // Start iterating the chunk incrementally; Since while - // constructing the metadata, we know that the chunk IDs - // are contiguous - chunk_key += 1; - - if (cow_op->type == kCowCopyOp) { - CHECK(read_size == 0); - } + SNAP_LOG(DEBUG) << "ReadData at sector: " << sector << " size: " << size; } // Reset the buffer offset bufsink_.ResetBufferOffset(); - return ret; + return size; } /* @@ -261,9 +299,7 @@ bool Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) { if (divresult.quot < vec_.size()) { size = exceptions_per_area_ * sizeof(struct disk_exception); - if (read_size > size) { - return false; - } + CHECK(read_size == size); void* buffer = bufsink_.GetPayloadBuffer(size); CHECK(buffer != nullptr); @@ -329,7 +365,7 @@ int Snapuserd::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, if (cow_de->new_chunk != 0) { merged_ops_cur_iter += 1; offset += sizeof(struct disk_exception); - const CowOperation* cow_op = chunk_map_[cow_de->new_chunk]; + const CowOperation* cow_op = chunk_map_[ChunkToSector(cow_de->new_chunk)]; CHECK(cow_op != nullptr); CHECK(cow_op->new_block == cow_de->old_chunk); if (cow_op->type == kCowCopyOp) { @@ -563,7 +599,7 @@ bool Snapuserd::ReadMetadata() { SNAP_LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk; // Store operation pointer. - chunk_map_[data_chunk_id] = cow_op; + chunk_map_[ChunkToSector(data_chunk_id)] = cow_op; num_ops += 1; offset += sizeof(struct disk_exception); cowop_riter_->Next(); @@ -680,6 +716,126 @@ bool Snapuserd::InitBackingAndControlDevice() { return true; } +bool Snapuserd::DmuserWriteRequest() { + struct dm_user_header* header = bufsink_.GetHeaderPtr(); + + // device mapper has the capability to allow + // targets to flush the cache when writes are completed. This + // is controlled by each target by a flag "flush_supported". + // This flag is set by dm-user. When flush is supported, + // a number of zero-length bio's will be submitted to + // the target for the purpose of flushing cache. It is the + // responsibility of the target driver - which is dm-user in this + // case, to remap these bio's to the underlying device. Since, + // there is no underlying device for dm-user, this zero length + // bio's gets routed to daemon. + // + // Flush operations are generated post merge by dm-snap by having + // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything + // to flush per se; hence, just respond back with a success message. + if (header->sector == 0) { + CHECK(header->len == 0); + header->type = DM_USER_RESP_SUCCESS; + if (!WriteDmUserPayload(0)) { + return false; + } + return true; + } + + size_t remaining_size = header->len; + size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); + CHECK(read_size == BLOCK_SIZE); + + CHECK(header->sector > 0); + chunk_t chunk = SectorToChunk(header->sector); + CHECK(chunk_map_.find(header->sector) == chunk_map_.end()); + + void* buffer = bufsink_.GetPayloadBuffer(read_size); + CHECK(buffer != nullptr); + header->type = DM_USER_RESP_SUCCESS; + + if (!ReadDmUserPayload(buffer, read_size)) { + SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk + << "Sector: " << header->sector; + header->type = DM_USER_RESP_ERROR; + } + + if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) { + SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk + << "Sector: " << header->sector; + header->type = DM_USER_RESP_ERROR; + } else { + SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk + << "Sector: " << header->sector; + } + + if (!WriteDmUserPayload(0)) { + return false; + } + + return true; +} + +bool Snapuserd::DmuserReadRequest() { + struct dm_user_header* header = bufsink_.GetHeaderPtr(); + size_t remaining_size = header->len; + loff_t offset = 0; + sector_t sector = header->sector; + do { + size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); + + int ret = read_size; + header->type = DM_USER_RESP_SUCCESS; + chunk_t chunk = SectorToChunk(header->sector); + + // Request to sector 0 is always for kernel + // representation of COW header. This IO should be only + // once during dm-snapshot device creation. We should + // never see multiple IO requests. Additionally this IO + // will always be a single 4k. + if (header->sector == 0) { + CHECK(metadata_read_done_ == true); + CHECK(read_size == BLOCK_SIZE); + ConstructKernelCowHeader(); + SNAP_LOG(DEBUG) << "Kernel header constructed"; + } else { + if (!offset && (read_size == BLOCK_SIZE) && + chunk_map_.find(header->sector) == chunk_map_.end()) { + if (!ReadDiskExceptions(chunk, read_size)) { + SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk + << "Sector: " << header->sector; + header->type = DM_USER_RESP_ERROR; + } else { + SNAP_LOG(DEBUG) << "ReadDiskExceptions success for chunk id: " << chunk + << "Sector: " << header->sector; + } + } else { + chunk_t num_sectors_read = (offset >> SECTOR_SHIFT); + ret = ReadData(sector + num_sectors_read, read_size); + if (ret < 0) { + SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk + << "Sector: " << header->sector; + header->type = DM_USER_RESP_ERROR; + } else { + SNAP_LOG(DEBUG) << "ReadData success for chunk id: " << chunk + << "Sector: " << header->sector; + } + } + } + + // Daemon will not be terminated if there is any error. We will + // just send the error back to dm-user. + if (!WriteDmUserPayload(ret)) { + return false; + } + + remaining_size -= ret; + offset += ret; + } while (remaining_size > 0); + + return true; +} + bool Snapuserd::Run() { struct dm_user_header* header = bufsink_.GetHeaderPtr(); @@ -698,122 +854,16 @@ bool Snapuserd::Run() { switch (header->type) { case DM_USER_REQ_MAP_READ: { - size_t remaining_size = header->len; - loff_t offset = 0; - do { - size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); - header->type = DM_USER_RESP_SUCCESS; - - // Request to sector 0 is always for kernel - // representation of COW header. This IO should be only - // once during dm-snapshot device creation. We should - // never see multiple IO requests. Additionally this IO - // will always be a single 4k. - if (header->sector == 0) { - CHECK(metadata_read_done_ == true); - CHECK(read_size == BLOCK_SIZE); - ConstructKernelCowHeader(); - SNAP_LOG(DEBUG) << "Kernel header constructed"; - } else { - // Convert the sector number to a chunk ID. - // - // Check if the chunk ID represents a metadata - // page. If the chunk ID is not found in the - // vector, then it points to a metadata page. - chunk_t chunk = SectorToChunk(header->sector); - - if (chunk_map_.find(chunk) == chunk_map_.end()) { - if (!ReadDiskExceptions(chunk, read_size)) { - SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk - << "Sector: " << header->sector; - header->type = DM_USER_RESP_ERROR; - } else { - SNAP_LOG(DEBUG) << "ReadDiskExceptions success for chunk id: " << chunk - << "Sector: " << header->sector; - } - } else { - SNAP_LOG(DEBUG) << "ReadData: chunk: " << chunk << " len: " << header->len - << " read_size: " << read_size << " offset: " << offset; - chunk_t num_chunks_read = (offset >> BLOCK_SHIFT); - if (!ReadData(chunk + num_chunks_read, read_size)) { - SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk - << "Sector: " << header->sector; - header->type = DM_USER_RESP_ERROR; - } else { - SNAP_LOG(DEBUG) << "ReadData success for chunk id: " << chunk - << "Sector: " << header->sector; - } - } - } - - // Daemon will not be terminated if there is any error. We will - // just send the error back to dm-user. - if (!WriteDmUserPayload(read_size)) { - return false; - } - - remaining_size -= read_size; - offset += read_size; - } while (remaining_size); - + if (!DmuserReadRequest()) { + return false; + } break; } case DM_USER_REQ_MAP_WRITE: { - // device mapper has the capability to allow - // targets to flush the cache when writes are completed. This - // is controlled by each target by a flag "flush_supported". - // This flag is set by dm-user. When flush is supported, - // a number of zero-length bio's will be submitted to - // the target for the purpose of flushing cache. It is the - // responsibility of the target driver - which is dm-user in this - // case, to remap these bio's to the underlying device. Since, - // there is no underlying device for dm-user, this zero length - // bio's gets routed to daemon. - // - // Flush operations are generated post merge by dm-snap by having - // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything - // to flush per se; hence, just respond back with a success message. - if (header->sector == 0) { - CHECK(header->len == 0); - header->type = DM_USER_RESP_SUCCESS; - if (!WriteDmUserPayload(0)) { - return false; - } - break; - } - - size_t remaining_size = header->len; - size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); - CHECK(read_size == BLOCK_SIZE); - - CHECK(header->sector > 0); - chunk_t chunk = SectorToChunk(header->sector); - CHECK(chunk_map_.find(chunk) == chunk_map_.end()); - - void* buffer = bufsink_.GetPayloadBuffer(read_size); - CHECK(buffer != nullptr); - header->type = DM_USER_RESP_SUCCESS; - - if (!ReadDmUserPayload(buffer, read_size)) { - SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk - << "Sector: " << header->sector; - header->type = DM_USER_RESP_ERROR; - } - - if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) { - SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk - << "Sector: " << header->sector; - header->type = DM_USER_RESP_ERROR; - } else { - SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk - << "Sector: " << header->sector; - } - - if (!WriteDmUserPayload(0)) { + if (!DmuserWriteRequest()) { return false; } - break; } } diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h index eec6d4562..c01fee3c0 100644 --- a/fs_mgr/libsnapshot/snapuserd.h +++ b/fs_mgr/libsnapshot/snapuserd.h @@ -22,9 +22,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -72,6 +72,9 @@ class Snapuserd final { bool IsAttached() const { return ctrl_fd_ >= 0; } private: + bool DmuserReadRequest(); + bool DmuserWriteRequest(); + bool ReadDmUserHeader(); bool ReadDmUserPayload(void* buffer, size_t size); bool WriteDmUserPayload(size_t size); @@ -79,10 +82,13 @@ class Snapuserd final { bool ReadMetadata(); bool ZerofillDiskExceptions(size_t read_size); bool ReadDiskExceptions(chunk_t chunk, size_t size); - bool ReadData(chunk_t chunk, size_t size); + int ReadUnalignedSector(sector_t sector, size_t size, + std::map::iterator& it); + int ReadData(sector_t sector, size_t size); bool IsChunkIdMetadata(chunk_t chunk); chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); + bool ProcessCowOp(const CowOperation* cow_op); bool ProcessReplaceOp(const CowOperation* cow_op); bool ProcessCopyOp(const CowOperation* cow_op); bool ProcessZeroOp(); @@ -94,6 +100,7 @@ class Snapuserd final { bool ProcessMergeComplete(chunk_t chunk, void* buffer); sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } + bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SIZE - 1)) == 0); } std::string cow_device_; std::string backing_store_device_; @@ -116,9 +123,15 @@ class Snapuserd final { // mapping of old-chunk to new-chunk std::vector> vec_; - // Key - Chunk ID + // Key - Sector // Value - cow operation - std::unordered_map chunk_map_; + // + // chunk_map stores the pseudo mapping of sector + // to COW operations. Each COW op is 4k; however, + // we can get a read request which are as small + // as 512 bytes. Hence, we need to binary search + // in the chunk_map to find the nearest COW op. + std::map chunk_map_; bool metadata_read_done_ = false; BufferSink bufsink_;