From 8c1e93196b123f47b7e20277587a9a568bb83bc8 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 28 Jul 2021 21:46:08 -0700 Subject: [PATCH] libsnapshot: Use std::unordered_set in DmSnapCowSizeCalculator. There is a check here if vector resize fails. In practice, this would throw bad_alloc or length_error and cause a runtime abort, so the check is dead code. To protect against bad chunk_ids we can switch to unordered_set instead. The original memory concerns for std::set are less applicable since unordered_set is bucketed. Bug: 194431534 Test: apply OTA; run vts_libsnapshot_tests Change-Id: I09c108b700d2f83acf80a9eaa5099b46aedcab89 --- fs_mgr/libsnapshot/dm_snapshot_internals.h | 59 +++++----------------- 1 file changed, 13 insertions(+), 46 deletions(-) diff --git a/fs_mgr/libsnapshot/dm_snapshot_internals.h b/fs_mgr/libsnapshot/dm_snapshot_internals.h index ed77c1526..4a3625197 100644 --- a/fs_mgr/libsnapshot/dm_snapshot_internals.h +++ b/fs_mgr/libsnapshot/dm_snapshot_internals.h @@ -17,8 +17,9 @@ #include #include +#include #include -#include +#include namespace android { namespace snapshot { @@ -37,21 +38,16 @@ class DmSnapCowSizeCalculator { return; } - if (modified_chunks_.size() <= chunk_id) { - if (modified_chunks_.max_size() <= chunk_id) { - LOG(ERROR) << "Invalid COW size, chunk_id is too large."; - valid_ = false; - return; - } - modified_chunks_.resize(chunk_id + 1, false); - if (modified_chunks_.size() <= chunk_id) { - LOG(ERROR) << "Invalid COW size, chunk_id is too large."; - valid_ = false; - return; - } + if (chunk_id > std::numeric_limits::max()) { + LOG(ERROR) << "Chunk exceeds maximum size: " << chunk_id; + valid_ = false; + return; + } + if (modified_chunks_.count(chunk_id) > 0) { + return; } - modified_chunks_[chunk_id] = true; + modified_chunks_.emplace(chunk_id); } std::optional cow_size_bytes() const { @@ -91,23 +87,16 @@ class DmSnapCowSizeCalculator { return std::nullopt; } - uint64_t modified_chunks_count = 0; uint64_t cow_chunks = 0; - for (const auto& c : modified_chunks_) { - if (c) { - ++modified_chunks_count; - } - } - /* disk header + padding = 1 chunk */ cow_chunks += 1; /* snapshot modified chunks */ - cow_chunks += modified_chunks_count; + cow_chunks += modified_chunks_.size(); /* snapshot chunks index metadata */ - cow_chunks += 1 + modified_chunks_count / exceptions_per_chunk; + cow_chunks += 1 + modified_chunks_.size() / exceptions_per_chunk; return cow_chunks; } @@ -150,30 +139,8 @@ class DmSnapCowSizeCalculator { /* * |modified_chunks_| is a container that keeps trace of the modified * chunks. - * Multiple options were considered when choosing the most appropriate data - * structure for this container. Here follows a summary of why vector - * has been chosen, taking as a reference a snapshot partition of 4 GiB and - * chunk size of 4 KiB. - * - std::set is very space-efficient for a small number of - * operations, but if the whole snapshot is changed, it would need to - * store - * 4 GiB / 4 KiB * (64 bit / 8) = 8 MiB - * just for the data, plus the additional data overhead for the red-black - * tree used for data sorting (if each rb-tree element stores 3 address - * and the word-aligne color, the total size grows to 32 MiB). - * - std::bitset is not a good fit because requires a priori knowledge, - * at compile time, of the bitset size. - * - std::vector is a special case of vector, which uses a data - * compression that allows reducing the space utilization of each element - * to 1 bit. In detail, this data structure is composed of a resizable - * array of words, each of them representing a bitmap. On a 64 bit - * device, modifying the whole 4 GiB snapshot grows this container up to - * 4 * GiB / 4 KiB / 64 = 64 KiB - * that, even if is the same space requirement to change a single byte at - * the highest address of the snapshot, is a very affordable space - * requirement. */ - std::vector modified_chunks_; + std::unordered_set modified_chunks_; }; } // namespace snapshot