Merge changes from topic "snapuserd-read-ahead"
* changes: libsnapshot:snapuserd:Add unit test for read-ahead code path. libsnapshot: Flush data to scratch space only for overlapping regions libsnapshot:snapuserd: read-ahead COW copy ops libsnapshot: Retrieve COW version from update engine manifest libsnapshot:snapuserd: Add 2MB scratch space in COW file libsnapshot:snapuserd: mmap + msync header after merge
This commit is contained in:
commit
eebf447fef
17 changed files with 1215 additions and 134 deletions
|
|
@ -420,7 +420,8 @@ cc_defaults {
|
||||||
"snapuserd_server.cpp",
|
"snapuserd_server.cpp",
|
||||||
"snapuserd.cpp",
|
"snapuserd.cpp",
|
||||||
"snapuserd_daemon.cpp",
|
"snapuserd_daemon.cpp",
|
||||||
"snapuserd_worker.cpp",
|
"snapuserd_worker.cpp",
|
||||||
|
"snapuserd_readahead.cpp",
|
||||||
],
|
],
|
||||||
|
|
||||||
cflags: [
|
cflags: [
|
||||||
|
|
|
||||||
|
|
@ -94,11 +94,6 @@ bool CowReader::Parse(android::base::borrowed_fd fd, std::optional<uint64_t> lab
|
||||||
<< "Expected: " << kCowMagicNumber;
|
<< "Expected: " << kCowMagicNumber;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (header_.header_size != sizeof(CowHeader)) {
|
|
||||||
LOG(ERROR) << "Header size unknown, read " << header_.header_size << ", expected "
|
|
||||||
<< sizeof(CowHeader);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (header_.footer_size != sizeof(CowFooter)) {
|
if (header_.footer_size != sizeof(CowFooter)) {
|
||||||
LOG(ERROR) << "Footer size unknown, read " << header_.footer_size << ", expected "
|
LOG(ERROR) << "Footer size unknown, read " << header_.footer_size << ", expected "
|
||||||
<< sizeof(CowFooter);
|
<< sizeof(CowFooter);
|
||||||
|
|
@ -123,8 +118,7 @@ bool CowReader::Parse(android::base::borrowed_fd fd, std::optional<uint64_t> lab
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((header_.major_version != kCowVersionMajor) ||
|
if ((header_.major_version > kCowVersionMajor) || (header_.minor_version != kCowVersionMinor)) {
|
||||||
(header_.minor_version != kCowVersionMinor)) {
|
|
||||||
LOG(ERROR) << "Header version mismatch";
|
LOG(ERROR) << "Header version mismatch";
|
||||||
LOG(ERROR) << "Major version: " << header_.major_version
|
LOG(ERROR) << "Major version: " << header_.major_version
|
||||||
<< "Expected: " << kCowVersionMajor;
|
<< "Expected: " << kCowVersionMajor;
|
||||||
|
|
@ -137,10 +131,25 @@ bool CowReader::Parse(android::base::borrowed_fd fd, std::optional<uint64_t> lab
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CowReader::ParseOps(std::optional<uint64_t> label) {
|
bool CowReader::ParseOps(std::optional<uint64_t> label) {
|
||||||
uint64_t pos = lseek(fd_.get(), sizeof(header_), SEEK_SET);
|
uint64_t pos;
|
||||||
if (pos != sizeof(header_)) {
|
|
||||||
PLOG(ERROR) << "lseek ops failed";
|
// Skip the scratch space
|
||||||
return false;
|
if (header_.major_version >= 2 && (header_.buffer_size > 0)) {
|
||||||
|
LOG(DEBUG) << " Scratch space found of size: " << header_.buffer_size;
|
||||||
|
size_t init_offset = header_.header_size + header_.buffer_size;
|
||||||
|
pos = lseek(fd_.get(), init_offset, SEEK_SET);
|
||||||
|
if (pos != init_offset) {
|
||||||
|
PLOG(ERROR) << "lseek ops failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pos = lseek(fd_.get(), header_.header_size, SEEK_SET);
|
||||||
|
if (pos != header_.header_size) {
|
||||||
|
PLOG(ERROR) << "lseek ops failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Reading a v1 version of COW which doesn't have buffer_size.
|
||||||
|
header_.buffer_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ops_buffer = std::make_shared<std::vector<CowOperation>>();
|
auto ops_buffer = std::make_shared<std::vector<CowOperation>>();
|
||||||
|
|
@ -360,13 +369,7 @@ void CowReader::InitializeMerge() {
|
||||||
// Replace-op-4, Zero-op-9, Replace-op-5 }
|
// Replace-op-4, Zero-op-9, Replace-op-5 }
|
||||||
//==============================================================
|
//==============================================================
|
||||||
|
|
||||||
for (uint64_t i = 0; i < ops_->size(); i++) {
|
num_copy_ops = FindNumCopyops();
|
||||||
auto& current_op = ops_->data()[i];
|
|
||||||
if (current_op.type != kCowCopyOp) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
num_copy_ops += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::sort(ops_.get()->begin() + num_copy_ops, ops_.get()->end(),
|
std::sort(ops_.get()->begin() + num_copy_ops, ops_.get()->end(),
|
||||||
[](CowOperation& op1, CowOperation& op2) -> bool {
|
[](CowOperation& op1, CowOperation& op2) -> bool {
|
||||||
|
|
@ -377,6 +380,23 @@ void CowReader::InitializeMerge() {
|
||||||
CHECK(ops_->size() >= header_.num_merge_ops);
|
CHECK(ops_->size() >= header_.num_merge_ops);
|
||||||
ops_->erase(ops_.get()->begin(), ops_.get()->begin() + header_.num_merge_ops);
|
ops_->erase(ops_.get()->begin(), ops_.get()->begin() + header_.num_merge_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
num_copy_ops = FindNumCopyops();
|
||||||
|
set_copy_ops(num_copy_ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t CowReader::FindNumCopyops() {
|
||||||
|
uint64_t num_copy_ops = 0;
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < ops_->size(); i++) {
|
||||||
|
auto& current_op = ops_->data()[i];
|
||||||
|
if (current_op.type != kCowCopyOp) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
num_copy_ops += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return num_copy_ops;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CowReader::GetHeader(CowHeader* header) {
|
bool CowReader::GetHeader(CowHeader* header) {
|
||||||
|
|
@ -470,7 +490,7 @@ std::unique_ptr<ICowOpReverseIter> CowReader::GetRevOpIter() {
|
||||||
|
|
||||||
bool CowReader::GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read) {
|
bool CowReader::GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read) {
|
||||||
// Validate the offset, taking care to acknowledge possible overflow of offset+len.
|
// Validate the offset, taking care to acknowledge possible overflow of offset+len.
|
||||||
if (offset < sizeof(header_) || offset >= fd_size_ - sizeof(CowFooter) || len >= fd_size_ ||
|
if (offset < header_.header_size || offset >= fd_size_ - sizeof(CowFooter) || len >= fd_size_ ||
|
||||||
offset + len > fd_size_ - sizeof(CowFooter)) {
|
offset + len > fd_size_ - sizeof(CowFooter)) {
|
||||||
LOG(ERROR) << "invalid data offset: " << offset << ", " << len << " bytes";
|
LOG(ERROR) << "invalid data offset: " << offset << ", " << len << " bytes";
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,7 @@ class TempDevice {
|
||||||
class CowSnapuserdTest final {
|
class CowSnapuserdTest final {
|
||||||
public:
|
public:
|
||||||
bool Setup();
|
bool Setup();
|
||||||
|
bool SetupCopyOverlap();
|
||||||
bool Merge();
|
bool Merge();
|
||||||
void ValidateMerge();
|
void ValidateMerge();
|
||||||
void ReadSnapshotDeviceAndValidate();
|
void ReadSnapshotDeviceAndValidate();
|
||||||
|
|
@ -114,6 +115,7 @@ class CowSnapuserdTest final {
|
||||||
void StartMerge();
|
void StartMerge();
|
||||||
|
|
||||||
void CreateCowDevice();
|
void CreateCowDevice();
|
||||||
|
void CreateCowDeviceWithCopyOverlap();
|
||||||
void CreateBaseDevice();
|
void CreateBaseDevice();
|
||||||
void InitCowDevice();
|
void InitCowDevice();
|
||||||
void SetDeviceControlName();
|
void SetDeviceControlName();
|
||||||
|
|
@ -191,6 +193,24 @@ bool CowSnapuserdTest::Setup() {
|
||||||
return setup_ok_;
|
return setup_ok_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CowSnapuserdTest::SetupCopyOverlap() {
|
||||||
|
CreateBaseDevice();
|
||||||
|
CreateCowDeviceWithCopyOverlap();
|
||||||
|
|
||||||
|
SetDeviceControlName();
|
||||||
|
|
||||||
|
StartSnapuserdDaemon();
|
||||||
|
InitCowDevice();
|
||||||
|
|
||||||
|
CreateDmUserDevice();
|
||||||
|
InitDaemon();
|
||||||
|
|
||||||
|
CreateSnapshotDevice();
|
||||||
|
setup_ok_ = true;
|
||||||
|
|
||||||
|
return setup_ok_;
|
||||||
|
}
|
||||||
|
|
||||||
void CowSnapuserdTest::StartSnapuserdDaemon() {
|
void CowSnapuserdTest::StartSnapuserdDaemon() {
|
||||||
pid_t pid = fork();
|
pid_t pid = fork();
|
||||||
ASSERT_GE(pid, 0);
|
ASSERT_GE(pid, 0);
|
||||||
|
|
@ -255,6 +275,49 @@ void CowSnapuserdTest::ReadSnapshotDeviceAndValidate() {
|
||||||
ASSERT_EQ(memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + (size_ * 3), size_), 0);
|
ASSERT_EQ(memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + (size_ * 3), size_), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CowSnapuserdTest::CreateCowDeviceWithCopyOverlap() {
|
||||||
|
std::string path = android::base::GetExecutableDirectory();
|
||||||
|
cow_system_ = std::make_unique<TemporaryFile>(path);
|
||||||
|
|
||||||
|
CowOptions options;
|
||||||
|
options.compression = "gz";
|
||||||
|
CowWriter writer(options);
|
||||||
|
|
||||||
|
ASSERT_TRUE(writer.Initialize(cow_system_->fd));
|
||||||
|
|
||||||
|
size_t num_blocks = size_ / options.block_size;
|
||||||
|
size_t x = num_blocks;
|
||||||
|
size_t blk_src_copy = num_blocks - 1;
|
||||||
|
|
||||||
|
// Create overlapping copy operations
|
||||||
|
while (1) {
|
||||||
|
ASSERT_TRUE(writer.AddCopy(blk_src_copy + 1, blk_src_copy));
|
||||||
|
x -= 1;
|
||||||
|
if (x == 0) {
|
||||||
|
ASSERT_EQ(blk_src_copy, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
blk_src_copy -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush operations
|
||||||
|
ASSERT_TRUE(writer.Finalize());
|
||||||
|
|
||||||
|
// Construct the buffer required for validation
|
||||||
|
orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
|
||||||
|
|
||||||
|
// Read the entire base device
|
||||||
|
ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
|
||||||
|
true);
|
||||||
|
|
||||||
|
// Merged operations
|
||||||
|
ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), options.block_size, 0),
|
||||||
|
true);
|
||||||
|
ASSERT_EQ(android::base::ReadFullyAtOffset(
|
||||||
|
base_fd_, (char*)orig_buffer_.get() + options.block_size, size_, 0),
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
void CowSnapuserdTest::CreateCowDevice() {
|
void CowSnapuserdTest::CreateCowDevice() {
|
||||||
unique_fd rnd_fd;
|
unique_fd rnd_fd;
|
||||||
loff_t offset = 0;
|
loff_t offset = 0;
|
||||||
|
|
@ -707,17 +770,17 @@ void CowSnapuserdMetadataTest::ValidateMetadata() {
|
||||||
|
|
||||||
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
|
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
|
||||||
ASSERT_EQ(de->old_chunk, 21);
|
ASSERT_EQ(de->old_chunk, 21);
|
||||||
ASSERT_EQ(de->new_chunk, 537);
|
ASSERT_EQ(de->new_chunk, 536);
|
||||||
offset += sizeof(struct disk_exception);
|
offset += sizeof(struct disk_exception);
|
||||||
|
|
||||||
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
|
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
|
||||||
ASSERT_EQ(de->old_chunk, 22);
|
ASSERT_EQ(de->old_chunk, 22);
|
||||||
ASSERT_EQ(de->new_chunk, 538);
|
ASSERT_EQ(de->new_chunk, 537);
|
||||||
offset += sizeof(struct disk_exception);
|
offset += sizeof(struct disk_exception);
|
||||||
|
|
||||||
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
|
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
|
||||||
ASSERT_EQ(de->old_chunk, 23);
|
ASSERT_EQ(de->old_chunk, 23);
|
||||||
ASSERT_EQ(de->new_chunk, 539);
|
ASSERT_EQ(de->new_chunk, 538);
|
||||||
offset += sizeof(struct disk_exception);
|
offset += sizeof(struct disk_exception);
|
||||||
|
|
||||||
// End of metadata
|
// End of metadata
|
||||||
|
|
@ -757,6 +820,23 @@ TEST(Snapuserd_Test, Snapshot_IO_TEST) {
|
||||||
harness.ValidateMerge();
|
harness.ValidateMerge();
|
||||||
harness.Shutdown();
|
harness.Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Snapuserd_Test, Snapshot_COPY_Overlap_TEST) {
|
||||||
|
CowSnapuserdTest harness;
|
||||||
|
ASSERT_TRUE(harness.SetupCopyOverlap());
|
||||||
|
ASSERT_TRUE(harness.Merge());
|
||||||
|
harness.ValidateMerge();
|
||||||
|
harness.Shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Snapuserd_Test, Snapshot_COPY_Overlap_Merge_Resume_TEST) {
|
||||||
|
CowSnapuserdTest harness;
|
||||||
|
ASSERT_TRUE(harness.SetupCopyOverlap());
|
||||||
|
harness.MergeInterrupt();
|
||||||
|
harness.ValidateMerge();
|
||||||
|
harness.Shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace snapshot
|
} // namespace snapshot
|
||||||
} // namespace android
|
} // namespace android
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,7 @@ void CowWriter::SetupHeaders() {
|
||||||
header_.block_size = options_.block_size;
|
header_.block_size = options_.block_size;
|
||||||
header_.num_merge_ops = 0;
|
header_.num_merge_ops = 0;
|
||||||
header_.cluster_ops = options_.cluster_ops;
|
header_.cluster_ops = options_.cluster_ops;
|
||||||
|
header_.buffer_size = 0;
|
||||||
footer_ = {};
|
footer_ = {};
|
||||||
footer_.op.data_length = 64;
|
footer_.op.data_length = 64;
|
||||||
footer_.op.type = kCowFooterOp;
|
footer_.op.type = kCowFooterOp;
|
||||||
|
|
@ -139,12 +140,6 @@ bool CowWriter::SetFd(android::base::borrowed_fd fd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CowWriter::InitializeMerge(borrowed_fd fd, CowHeader* header) {
|
|
||||||
fd_ = fd;
|
|
||||||
memcpy(&header_, header, sizeof(CowHeader));
|
|
||||||
merge_in_progress_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CowWriter::Initialize(unique_fd&& fd) {
|
bool CowWriter::Initialize(unique_fd&& fd) {
|
||||||
owned_fd_ = std::move(fd);
|
owned_fd_ = std::move(fd);
|
||||||
return Initialize(borrowed_fd{owned_fd_});
|
return Initialize(borrowed_fd{owned_fd_});
|
||||||
|
|
@ -172,7 +167,7 @@ bool CowWriter::InitializeAppend(android::base::borrowed_fd fd, uint64_t label)
|
||||||
}
|
}
|
||||||
|
|
||||||
void CowWriter::InitPos() {
|
void CowWriter::InitPos() {
|
||||||
next_op_pos_ = sizeof(header_);
|
next_op_pos_ = sizeof(header_) + header_.buffer_size;
|
||||||
cluster_size_ = header_.cluster_ops * sizeof(CowOperation);
|
cluster_size_ = header_.cluster_ops * sizeof(CowOperation);
|
||||||
if (header_.cluster_ops) {
|
if (header_.cluster_ops) {
|
||||||
next_data_pos_ = next_op_pos_ + cluster_size_;
|
next_data_pos_ = next_op_pos_ + cluster_size_;
|
||||||
|
|
@ -196,6 +191,10 @@ bool CowWriter::OpenForWrite() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (options_.scratch_space) {
|
||||||
|
header_.buffer_size = BUFFER_REGION_DEFAULT_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
// Headers are not complete, but this ensures the file is at the right
|
// Headers are not complete, but this ensures the file is at the right
|
||||||
// position.
|
// position.
|
||||||
if (!android::base::WriteFully(fd_, &header_, sizeof(header_))) {
|
if (!android::base::WriteFully(fd_, &header_, sizeof(header_))) {
|
||||||
|
|
@ -203,7 +202,27 @@ bool CowWriter::OpenForWrite() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (options_.scratch_space) {
|
||||||
|
// Initialize the scratch space
|
||||||
|
std::string data(header_.buffer_size, 0);
|
||||||
|
if (!android::base::WriteFully(fd_, data.data(), header_.buffer_size)) {
|
||||||
|
PLOG(ERROR) << "writing scratch space failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Sync()) {
|
||||||
|
LOG(ERROR) << "Header sync failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lseek(fd_.get(), sizeof(header_) + header_.buffer_size, SEEK_SET) < 0) {
|
||||||
|
PLOG(ERROR) << "lseek failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
InitPos();
|
InitPos();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -517,24 +536,6 @@ bool CowWriter::Sync() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CowWriter::CommitMerge(int merged_ops) {
|
|
||||||
CHECK(merge_in_progress_);
|
|
||||||
header_.num_merge_ops += merged_ops;
|
|
||||||
|
|
||||||
if (lseek(fd_.get(), 0, SEEK_SET) < 0) {
|
|
||||||
PLOG(ERROR) << "lseek failed";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!android::base::WriteFully(fd_, reinterpret_cast<const uint8_t*>(&header_),
|
|
||||||
sizeof(header_))) {
|
|
||||||
PLOG(ERROR) << "WriteFully failed";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Sync();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CowWriter::Truncate(off_t length) {
|
bool CowWriter::Truncate(off_t length) {
|
||||||
if (is_dev_null_ || is_block_device_) {
|
if (is_dev_null_ || is_block_device_) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
|
|
@ -21,17 +21,22 @@ namespace android {
|
||||||
namespace snapshot {
|
namespace snapshot {
|
||||||
|
|
||||||
static constexpr uint64_t kCowMagicNumber = 0x436f77634f572121ULL;
|
static constexpr uint64_t kCowMagicNumber = 0x436f77634f572121ULL;
|
||||||
static constexpr uint32_t kCowVersionMajor = 1;
|
static constexpr uint32_t kCowVersionMajor = 2;
|
||||||
static constexpr uint32_t kCowVersionMinor = 0;
|
static constexpr uint32_t kCowVersionMinor = 0;
|
||||||
|
|
||||||
static constexpr uint32_t kCowVersionManifest = 1;
|
static constexpr uint32_t kCowVersionManifest = 1;
|
||||||
|
|
||||||
|
static constexpr uint32_t BLOCK_SZ = 4096;
|
||||||
|
static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SZ) - 1);
|
||||||
|
|
||||||
// This header appears as the first sequence of bytes in the COW. All fields
|
// This header appears as the first sequence of bytes in the COW. All fields
|
||||||
// in the layout are little-endian encoded. The on-disk layout is:
|
// in the layout are little-endian encoded. The on-disk layout is:
|
||||||
//
|
//
|
||||||
// +-----------------------+
|
// +-----------------------+
|
||||||
// | Header (fixed) |
|
// | Header (fixed) |
|
||||||
// +-----------------------+
|
// +-----------------------+
|
||||||
|
// | Scratch space |
|
||||||
|
// +-----------------------+
|
||||||
// | Operation (variable) |
|
// | Operation (variable) |
|
||||||
// | Data (variable) |
|
// | Data (variable) |
|
||||||
// +-----------------------+
|
// +-----------------------+
|
||||||
|
|
@ -70,6 +75,9 @@ struct CowHeader {
|
||||||
|
|
||||||
// Tracks merge operations completed
|
// Tracks merge operations completed
|
||||||
uint64_t num_merge_ops;
|
uint64_t num_merge_ops;
|
||||||
|
|
||||||
|
// Scratch space used during merge
|
||||||
|
uint32_t buffer_size;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
// This structure is the same size of a normal Operation, but is repurposed for the footer.
|
// This structure is the same size of a normal Operation, but is repurposed for the footer.
|
||||||
|
|
@ -146,11 +154,31 @@ static constexpr uint8_t kCowCompressNone = 0;
|
||||||
static constexpr uint8_t kCowCompressGz = 1;
|
static constexpr uint8_t kCowCompressGz = 1;
|
||||||
static constexpr uint8_t kCowCompressBrotli = 2;
|
static constexpr uint8_t kCowCompressBrotli = 2;
|
||||||
|
|
||||||
|
static constexpr uint8_t kCowReadAheadNotStarted = 0;
|
||||||
|
static constexpr uint8_t kCowReadAheadInProgress = 1;
|
||||||
|
static constexpr uint8_t kCowReadAheadDone = 2;
|
||||||
|
|
||||||
struct CowFooter {
|
struct CowFooter {
|
||||||
CowFooterOperation op;
|
CowFooterOperation op;
|
||||||
CowFooterData data;
|
CowFooterData data;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
|
struct ScratchMetadata {
|
||||||
|
// Block of data in the image that operation modifies
|
||||||
|
// and read-ahead thread stores the modified data
|
||||||
|
// in the scratch space
|
||||||
|
uint64_t new_block;
|
||||||
|
// Offset within the file to read the data
|
||||||
|
uint64_t file_offset;
|
||||||
|
} __attribute__((packed));
|
||||||
|
|
||||||
|
struct BufferState {
|
||||||
|
uint8_t read_ahead_state;
|
||||||
|
} __attribute__((packed));
|
||||||
|
|
||||||
|
// 2MB Scratch space used for read-ahead
|
||||||
|
static constexpr uint64_t BUFFER_REGION_DEFAULT_SIZE = (1ULL << 21);
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, CowOperation const& arg);
|
std::ostream& operator<<(std::ostream& os, CowOperation const& arg);
|
||||||
|
|
||||||
int64_t GetNextOpOffset(const CowOperation& op, uint32_t cluster_size);
|
int64_t GetNextOpOffset(const CowOperation& op, uint32_t cluster_size);
|
||||||
|
|
|
||||||
|
|
@ -141,18 +141,21 @@ class CowReader : public ICowReader {
|
||||||
|
|
||||||
bool GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read);
|
bool GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read);
|
||||||
|
|
||||||
void UpdateMergeProgress(uint64_t merge_ops) { header_.num_merge_ops += merge_ops; }
|
|
||||||
|
|
||||||
void InitializeMerge();
|
void InitializeMerge();
|
||||||
|
|
||||||
void set_total_data_ops(uint64_t size) { total_data_ops_ = size; }
|
void set_total_data_ops(uint64_t size) { total_data_ops_ = size; }
|
||||||
|
|
||||||
uint64_t total_data_ops() { return total_data_ops_; }
|
uint64_t total_data_ops() { return total_data_ops_; }
|
||||||
|
|
||||||
|
void set_copy_ops(uint64_t size) { copy_ops_ = size; }
|
||||||
|
|
||||||
|
uint64_t total_copy_ops() { return copy_ops_; }
|
||||||
|
|
||||||
void CloseCowFd() { owned_fd_ = {}; }
|
void CloseCowFd() { owned_fd_ = {}; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool ParseOps(std::optional<uint64_t> label);
|
bool ParseOps(std::optional<uint64_t> label);
|
||||||
|
uint64_t FindNumCopyops();
|
||||||
|
|
||||||
android::base::unique_fd owned_fd_;
|
android::base::unique_fd owned_fd_;
|
||||||
android::base::borrowed_fd fd_;
|
android::base::borrowed_fd fd_;
|
||||||
|
|
@ -162,6 +165,7 @@ class CowReader : public ICowReader {
|
||||||
std::optional<uint64_t> last_label_;
|
std::optional<uint64_t> last_label_;
|
||||||
std::shared_ptr<std::vector<CowOperation>> ops_;
|
std::shared_ptr<std::vector<CowOperation>> ops_;
|
||||||
uint64_t total_data_ops_;
|
uint64_t total_data_ops_;
|
||||||
|
uint64_t copy_ops_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace snapshot
|
} // namespace snapshot
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,8 @@ struct CowOptions {
|
||||||
|
|
||||||
// Number of CowOperations in a cluster. 0 for no clustering. Cannot be 1.
|
// Number of CowOperations in a cluster. 0 for no clustering. Cannot be 1.
|
||||||
uint32_t cluster_ops = 200;
|
uint32_t cluster_ops = 200;
|
||||||
|
|
||||||
|
bool scratch_space = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Interface for writing to a snapuserd COW. All operations are ordered; merges
|
// Interface for writing to a snapuserd COW. All operations are ordered; merges
|
||||||
|
|
@ -100,13 +102,12 @@ class CowWriter : public ICowWriter {
|
||||||
bool InitializeAppend(android::base::unique_fd&&, uint64_t label);
|
bool InitializeAppend(android::base::unique_fd&&, uint64_t label);
|
||||||
bool InitializeAppend(android::base::borrowed_fd fd, uint64_t label);
|
bool InitializeAppend(android::base::borrowed_fd fd, uint64_t label);
|
||||||
|
|
||||||
void InitializeMerge(android::base::borrowed_fd fd, CowHeader* header);
|
|
||||||
bool CommitMerge(int merged_ops);
|
|
||||||
|
|
||||||
bool Finalize() override;
|
bool Finalize() override;
|
||||||
|
|
||||||
uint64_t GetCowSize() override;
|
uint64_t GetCowSize() override;
|
||||||
|
|
||||||
|
uint32_t GetCowVersion() { return header_.major_version; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual bool EmitCopy(uint64_t new_block, uint64_t old_block) override;
|
virtual bool EmitCopy(uint64_t new_block, uint64_t old_block) override;
|
||||||
virtual bool EmitRawBlocks(uint64_t new_block_start, const void* data, size_t size) override;
|
virtual bool EmitRawBlocks(uint64_t new_block_start, const void* data, size_t size) override;
|
||||||
|
|
|
||||||
|
|
@ -47,9 +47,6 @@ typedef sector_t chunk_t;
|
||||||
static constexpr uint32_t CHUNK_SIZE = 8;
|
static constexpr uint32_t CHUNK_SIZE = 8;
|
||||||
static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1);
|
static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1);
|
||||||
|
|
||||||
static constexpr uint32_t BLOCK_SZ = 4096;
|
|
||||||
static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SZ) - 1);
|
|
||||||
|
|
||||||
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
|
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
|
||||||
|
|
||||||
// This structure represents the kernel COW header.
|
// This structure represents the kernel COW header.
|
||||||
|
|
|
||||||
|
|
@ -2692,8 +2692,18 @@ Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manife
|
||||||
AutoDeviceList created_devices;
|
AutoDeviceList created_devices;
|
||||||
|
|
||||||
const auto& dap_metadata = manifest.dynamic_partition_metadata();
|
const auto& dap_metadata = manifest.dynamic_partition_metadata();
|
||||||
bool use_compression =
|
CowOptions options;
|
||||||
IsCompressionEnabled() && dap_metadata.vabc_enabled() && !device_->IsRecovery();
|
CowWriter writer(options);
|
||||||
|
bool cow_format_support = true;
|
||||||
|
if (dap_metadata.cow_version() < writer.GetCowVersion()) {
|
||||||
|
cow_format_support = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
|
||||||
|
<< " writer.GetCowVersion(): " << writer.GetCowVersion();
|
||||||
|
|
||||||
|
bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
|
||||||
|
!device_->IsRecovery() && cow_format_support;
|
||||||
|
|
||||||
std::string compression_algorithm;
|
std::string compression_algorithm;
|
||||||
if (use_compression) {
|
if (use_compression) {
|
||||||
|
|
@ -2960,7 +2970,13 @@ Return SnapshotManager::InitializeUpdateSnapshots(
|
||||||
return Return::Error();
|
return Return::Error();
|
||||||
}
|
}
|
||||||
|
|
||||||
CowWriter writer(CowOptions{.compression = it->second.compression_algorithm()});
|
CowOptions options;
|
||||||
|
if (device()->IsTestDevice()) {
|
||||||
|
options.scratch_space = false;
|
||||||
|
}
|
||||||
|
options.compression = it->second.compression_algorithm();
|
||||||
|
|
||||||
|
CowWriter writer(options);
|
||||||
if (!writer.Initialize(fd) || !writer.Finalize()) {
|
if (!writer.Initialize(fd) || !writer.Finalize()) {
|
||||||
LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
|
LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
|
||||||
return Return::Error();
|
return Return::Error();
|
||||||
|
|
@ -3069,6 +3085,10 @@ std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenCompressedSnapshotWriter(
|
||||||
CowOptions cow_options;
|
CowOptions cow_options;
|
||||||
cow_options.compression = status.compression_algorithm();
|
cow_options.compression = status.compression_algorithm();
|
||||||
cow_options.max_blocks = {status.device_size() / cow_options.block_size};
|
cow_options.max_blocks = {status.device_size() / cow_options.block_size};
|
||||||
|
// Disable scratch space for vts tests
|
||||||
|
if (device()->IsTestDevice()) {
|
||||||
|
cow_options.scratch_space = false;
|
||||||
|
}
|
||||||
|
|
||||||
// Currently we don't support partial snapshots, since partition_cow_creator
|
// Currently we don't support partial snapshots, since partition_cow_creator
|
||||||
// never creates this scenario.
|
// never creates this scenario.
|
||||||
|
|
|
||||||
|
|
@ -150,6 +150,7 @@ TEST_F(OfflineSnapshotTest, CompressedSnapshot) {
|
||||||
CowOptions options;
|
CowOptions options;
|
||||||
options.compression = "gz";
|
options.compression = "gz";
|
||||||
options.max_blocks = {kBlockCount};
|
options.max_blocks = {kBlockCount};
|
||||||
|
options.scratch_space = false;
|
||||||
|
|
||||||
unique_fd cow_fd(dup(cow_->fd));
|
unique_fd cow_fd(dup(cow_->fd));
|
||||||
ASSERT_GE(cow_fd, 0);
|
ASSERT_GE(cow_fd, 0);
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <libsnapshot/cow_format.h>
|
||||||
#include <libsnapshot/snapshot.h>
|
#include <libsnapshot/snapshot.h>
|
||||||
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
|
@ -327,6 +328,7 @@ class SnapshotTest : public ::testing::Test {
|
||||||
|
|
||||||
auto dynamic_partition_metadata = manifest.mutable_dynamic_partition_metadata();
|
auto dynamic_partition_metadata = manifest.mutable_dynamic_partition_metadata();
|
||||||
dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled());
|
dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled());
|
||||||
|
dynamic_partition_metadata->set_cow_version(android::snapshot::kCowVersionMajor);
|
||||||
|
|
||||||
auto group = dynamic_partition_metadata->add_groups();
|
auto group = dynamic_partition_metadata->add_groups();
|
||||||
group->set_name("group");
|
group->set_name("group");
|
||||||
|
|
@ -853,6 +855,7 @@ class SnapshotUpdateTest : public SnapshotTest {
|
||||||
|
|
||||||
auto dynamic_partition_metadata = manifest_.mutable_dynamic_partition_metadata();
|
auto dynamic_partition_metadata = manifest_.mutable_dynamic_partition_metadata();
|
||||||
dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled());
|
dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled());
|
||||||
|
dynamic_partition_metadata->set_cow_version(android::snapshot::kCowVersionMajor);
|
||||||
|
|
||||||
// Create a fake update package metadata.
|
// Create a fake update package metadata.
|
||||||
// Not using full name "system", "vendor", "product" because these names collide with the
|
// Not using full name "system", "vendor", "product" because these names collide with the
|
||||||
|
|
|
||||||
|
|
@ -47,28 +47,202 @@ bool Snapuserd::InitializeWorkers() {
|
||||||
|
|
||||||
worker_threads_.push_back(std::move(wt));
|
worker_threads_.push_back(std::move(wt));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
read_ahead_thread_ = std::make_unique<ReadAheadThread>(cow_device_, backing_store_device_,
|
||||||
|
misc_name_, GetSharedPtr());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Snapuserd::CommitMerge(int num_merge_ops) {
|
bool Snapuserd::CommitMerge(int num_merge_ops) {
|
||||||
{
|
struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
|
||||||
std::lock_guard<std::mutex> lock(lock_);
|
ch->num_merge_ops += num_merge_ops;
|
||||||
CowHeader header;
|
|
||||||
|
|
||||||
reader_->GetHeader(&header);
|
if (read_ahead_feature_ && read_ahead_ops_.size() > 0) {
|
||||||
header.num_merge_ops += num_merge_ops;
|
struct BufferState* ra_state = GetBufferState();
|
||||||
reader_->UpdateMergeProgress(num_merge_ops);
|
ra_state->read_ahead_state = kCowReadAheadInProgress;
|
||||||
if (!writer_->CommitMerge(num_merge_ops)) {
|
|
||||||
SNAP_LOG(ERROR) << "CommitMerge failed... merged_ops_cur_iter: " << num_merge_ops
|
|
||||||
<< " Total-merged-ops: " << header.num_merge_ops;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
merge_initiated_ = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
|
||||||
|
if (ret < 0) {
|
||||||
|
PLOG(ERROR) << "msync header failed: " << ret;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
merge_initiated_ = true;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Snapuserd::PrepareReadAhead() {
|
||||||
|
if (!read_ahead_feature_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BufferState* ra_state = GetBufferState();
|
||||||
|
// Check if the data has to be re-constructed from COW device
|
||||||
|
if (ra_state->read_ahead_state == kCowReadAheadDone) {
|
||||||
|
populate_data_from_cow_ = true;
|
||||||
|
} else {
|
||||||
|
populate_data_from_cow_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
StartReadAhead();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Snapuserd::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer) {
|
||||||
|
CHECK(lock->owns_lock());
|
||||||
|
std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
|
||||||
|
|
||||||
|
// This will be true only for IO's generated as part of reading a root
|
||||||
|
// filesystem. IO's related to merge should always be in read-ahead cache.
|
||||||
|
if (it == read_ahead_buffer_map_.end()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Theoretically, we can send the data back from the read-ahead buffer
|
||||||
|
// all the way to the kernel without memcpy. However, if the IO is
|
||||||
|
// un-aligned, the wrapper function will need to touch the read-ahead
|
||||||
|
// buffers and transitions will be bit more complicated.
|
||||||
|
memcpy(buffer, it->second, BLOCK_SZ);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ========== State transition functions for read-ahead operations ===========
|
||||||
|
|
||||||
|
bool Snapuserd::GetReadAheadPopulatedBuffer(uint64_t block, void* buffer) {
|
||||||
|
if (!read_ahead_feature_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(lock_);
|
||||||
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS) {
|
||||||
|
return GetRABuffer(&lock, block, buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Read-ahead thread IO is in-progress. Wait for it to complete
|
||||||
|
std::unique_lock<std::mutex> lock(lock_);
|
||||||
|
while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE ||
|
||||||
|
io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS)) {
|
||||||
|
cv.wait(lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
return GetRABuffer(&lock, block, buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is invoked by read-ahead thread waiting for merge IO's
|
||||||
|
// to complete
|
||||||
|
bool Snapuserd::WaitForMergeToComplete() {
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(lock_);
|
||||||
|
while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN ||
|
||||||
|
io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED)) {
|
||||||
|
cv.wait(lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_IN_PROGRESS;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is invoked during the launch of worker threads. We wait
|
||||||
|
// for read-ahead thread to by fully up before worker threads
|
||||||
|
// are launched; else we will have a race between worker threads
|
||||||
|
// and read-ahead thread specifically during re-construction.
|
||||||
|
bool Snapuserd::WaitForReadAheadToStart() {
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(lock_);
|
||||||
|
while (!(io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS ||
|
||||||
|
io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE)) {
|
||||||
|
cv.wait(lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invoked by worker threads when a sequence of merge operation
|
||||||
|
// is complete notifying read-ahead thread to make forward
|
||||||
|
// progress.
|
||||||
|
void Snapuserd::StartReadAhead() {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(lock_);
|
||||||
|
io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Snapuserd::MergeCompleted() {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(lock_);
|
||||||
|
io_state_ = READ_AHEAD_IO_TRANSITION::IO_TERMINATED;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Snapuserd::ReadAheadIOCompleted(bool sync) {
|
||||||
|
if (sync) {
|
||||||
|
// Flush the entire buffer region
|
||||||
|
int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
|
||||||
|
if (ret < 0) {
|
||||||
|
PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metadata and data are synced. Now, update the state.
|
||||||
|
// We need to update the state after flushing data; if there is a crash
|
||||||
|
// when read-ahead IO is in progress, the state of data in the COW file
|
||||||
|
// is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
|
||||||
|
// in the scratch space is good and during next reboot, read-ahead thread
|
||||||
|
// can safely re-construct the data.
|
||||||
|
struct BufferState* ra_state = GetBufferState();
|
||||||
|
ra_state->read_ahead_state = kCowReadAheadDone;
|
||||||
|
|
||||||
|
ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
|
||||||
|
if (ret < 0) {
|
||||||
|
PLOG(ERROR) << "msync failed to flush Readahead completion state...";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Notify the worker threads
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(lock_);
|
||||||
|
io_state_ = READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv.notify_all();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Snapuserd::ReadAheadIOFailed() {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(lock_);
|
||||||
|
io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
//========== End of state transition functions ====================
|
||||||
|
|
||||||
bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
|
bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
|
||||||
uint32_t stride = exceptions_per_area_ + 1;
|
uint32_t stride = exceptions_per_area_ + 1;
|
||||||
lldiv_t divresult = lldiv(chunk, stride);
|
lldiv_t divresult = lldiv(chunk, stride);
|
||||||
|
|
@ -93,9 +267,9 @@ void Snapuserd::CheckMergeCompletionStatus() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
CowHeader header;
|
struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
|
||||||
reader_->GetHeader(&header);
|
|
||||||
SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << header.num_merge_ops
|
SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops
|
||||||
<< " Total-data-ops: " << reader_->total_data_ops();
|
<< " Total-data-ops: " << reader_->total_data_ops();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -175,8 +349,10 @@ bool Snapuserd::ReadMetadata() {
|
||||||
reader_->InitializeMerge();
|
reader_->InitializeMerge();
|
||||||
SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;
|
SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;
|
||||||
|
|
||||||
writer_ = std::make_unique<CowWriter>(options);
|
if (!MmapMetadata()) {
|
||||||
writer_->InitializeMerge(cow_fd_.get(), &header);
|
SNAP_LOG(ERROR) << "mmap failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize the iterator for reading metadata
|
// Initialize the iterator for reading metadata
|
||||||
cowop_riter_ = reader_->GetRevOpIter();
|
cowop_riter_ = reader_->GetRevOpIter();
|
||||||
|
|
@ -258,13 +434,15 @@ bool Snapuserd::ReadMetadata() {
|
||||||
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
|
||||||
std::optional<chunk_t> prev_id = {};
|
std::optional<chunk_t> prev_id = {};
|
||||||
std::map<uint64_t, const CowOperation*> map;
|
std::map<uint64_t, const CowOperation*> map;
|
||||||
std::set<uint64_t> dest_blocks;
|
|
||||||
size_t pending_copy_ops = exceptions_per_area_ - num_ops;
|
size_t pending_copy_ops = exceptions_per_area_ - num_ops;
|
||||||
SNAP_LOG(INFO) << " Processing copy-ops at Area: " << vec_.size()
|
uint64_t total_copy_ops = reader_->total_copy_ops();
|
||||||
<< " Number of replace/zero ops completed in this area: " << num_ops
|
|
||||||
<< " Pending copy ops for this area: " << pending_copy_ops;
|
SNAP_LOG(DEBUG) << " Processing copy-ops at Area: " << vec_.size()
|
||||||
|
<< " Number of replace/zero ops completed in this area: " << num_ops
|
||||||
|
<< " Pending copy ops for this area: " << pending_copy_ops;
|
||||||
while (!cowop_riter_->Done()) {
|
while (!cowop_riter_->Done()) {
|
||||||
do {
|
do {
|
||||||
const CowOperation* cow_op = &cowop_riter_->Get();
|
const CowOperation* cow_op = &cowop_riter_->Get();
|
||||||
|
|
@ -300,41 +478,20 @@ bool Snapuserd::ReadMetadata() {
|
||||||
// Op-6: 15 -> 18
|
// Op-6: 15 -> 18
|
||||||
//
|
//
|
||||||
// Note that the blocks numbers are contiguous. Hence, all 6 copy
|
// Note that the blocks numbers are contiguous. Hence, all 6 copy
|
||||||
// operations can potentially be batch merged. However, that will be
|
// operations can be batch merged. However, that will be
|
||||||
// problematic if we have a crash as block 20, 19, 18 would have
|
// problematic if we have a crash as block 20, 19, 18 would have
|
||||||
// been overwritten and hence subsequent recovery may end up with
|
// been overwritten and hence subsequent recovery may end up with
|
||||||
// a silent data corruption when op-1, op-2 and op-3 are
|
// a silent data corruption when op-1, op-2 and op-3 are
|
||||||
// re-executed.
|
// re-executed.
|
||||||
//
|
//
|
||||||
// We will split these 6 operations into two batches viz:
|
// To address the above problem, read-ahead thread will
|
||||||
//
|
// read all the 6 source blocks, cache them in the scratch
|
||||||
// Batch-1:
|
// space of the COW file. During merge, read-ahead
|
||||||
// ===================
|
// thread will serve the blocks from the read-ahead cache.
|
||||||
// Op-1: 20 -> 23
|
// If there is a crash during merge; on subsequent reboot,
|
||||||
// Op-2: 19 -> 22
|
// read-ahead thread will recover the data from the
|
||||||
// Op-3: 18 -> 21
|
// scratch space and re-construct it thereby there
|
||||||
// ===================
|
// is no loss of data.
|
||||||
//
|
|
||||||
// Batch-2:
|
|
||||||
// ==================
|
|
||||||
// Op-4: 17 -> 20
|
|
||||||
// Op-5: 16 -> 19
|
|
||||||
// Op-6: 15 -> 18
|
|
||||||
// ==================
|
|
||||||
//
|
|
||||||
// Now, merge sequence will look like:
|
|
||||||
//
|
|
||||||
// 1: Merge Batch-1 { op-1, op-2, op-3 }
|
|
||||||
// 2: Update Metadata in COW File that op-1, op-2, op-3 merge is
|
|
||||||
// done.
|
|
||||||
// 3: Merge Batch-2
|
|
||||||
// 4: Update Metadata in COW File that op-4, op-5, op-6 merge is
|
|
||||||
// done.
|
|
||||||
//
|
|
||||||
// Note, that the order of block operations are still the same.
|
|
||||||
// However, we have two batch merge operations. Any crash between
|
|
||||||
// either of this sequence should be safe as each of these
|
|
||||||
// batches are self-contained.
|
|
||||||
//
|
//
|
||||||
//===========================================================
|
//===========================================================
|
||||||
//
|
//
|
||||||
|
|
@ -398,14 +555,10 @@ bool Snapuserd::ReadMetadata() {
|
||||||
if (diff != 1) {
|
if (diff != 1) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (dest_blocks.count(cow_op->new_block) || map.count(cow_op->source) > 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
metadata_found = true;
|
metadata_found = true;
|
||||||
pending_copy_ops -= 1;
|
pending_copy_ops -= 1;
|
||||||
map[cow_op->new_block] = cow_op;
|
map[cow_op->new_block] = cow_op;
|
||||||
dest_blocks.insert(cow_op->source);
|
|
||||||
prev_id = cow_op->new_block;
|
prev_id = cow_op->new_block;
|
||||||
cowop_riter_->Next();
|
cowop_riter_->Next();
|
||||||
} while (!cowop_riter_->Done() && pending_copy_ops);
|
} while (!cowop_riter_->Done() && pending_copy_ops);
|
||||||
|
|
@ -426,6 +579,9 @@ bool Snapuserd::ReadMetadata() {
|
||||||
offset += sizeof(struct disk_exception);
|
offset += sizeof(struct disk_exception);
|
||||||
num_ops += 1;
|
num_ops += 1;
|
||||||
copy_ops++;
|
copy_ops++;
|
||||||
|
if (read_ahead_feature_) {
|
||||||
|
read_ahead_ops_.push_back(it->second);
|
||||||
|
}
|
||||||
|
|
||||||
SNAP_LOG(DEBUG) << num_ops << ":"
|
SNAP_LOG(DEBUG) << num_ops << ":"
|
||||||
<< " Copy-op: "
|
<< " Copy-op: "
|
||||||
|
|
@ -453,9 +609,17 @@ bool Snapuserd::ReadMetadata() {
|
||||||
}
|
}
|
||||||
|
|
||||||
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
||||||
|
total_copy_ops -= 1;
|
||||||
|
/*
|
||||||
|
* Split the number of ops based on the size of read-ahead buffer
|
||||||
|
* region. We need to ensure that kernel doesn't issue IO on blocks
|
||||||
|
* which are not read by the read-ahead thread.
|
||||||
|
*/
|
||||||
|
if (read_ahead_feature_ && (total_copy_ops % num_ra_ops_per_iter == 0)) {
|
||||||
|
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
map.clear();
|
map.clear();
|
||||||
dest_blocks.clear();
|
|
||||||
prev_id.reset();
|
prev_id.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -470,6 +634,7 @@ bool Snapuserd::ReadMetadata() {
|
||||||
|
|
||||||
chunk_vec_.shrink_to_fit();
|
chunk_vec_.shrink_to_fit();
|
||||||
vec_.shrink_to_fit();
|
vec_.shrink_to_fit();
|
||||||
|
read_ahead_ops_.shrink_to_fit();
|
||||||
|
|
||||||
// Sort the vector based on sectors as we need this during un-aligned access
|
// Sort the vector based on sectors as we need this during un-aligned access
|
||||||
std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare);
|
std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare);
|
||||||
|
|
@ -484,9 +649,41 @@ bool Snapuserd::ReadMetadata() {
|
||||||
// Total number of sectors required for creating dm-user device
|
// Total number of sectors required for creating dm-user device
|
||||||
num_sectors_ = ChunkToSector(data_chunk_id);
|
num_sectors_ = ChunkToSector(data_chunk_id);
|
||||||
merge_initiated_ = false;
|
merge_initiated_ = false;
|
||||||
|
PrepareReadAhead();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Snapuserd::MmapMetadata() {
|
||||||
|
CowHeader header;
|
||||||
|
reader_->GetHeader(&header);
|
||||||
|
|
||||||
|
if (header.major_version >= 2 && header.buffer_size > 0) {
|
||||||
|
total_mapped_addr_length_ = header.header_size + BUFFER_REGION_DEFAULT_SIZE;
|
||||||
|
read_ahead_feature_ = true;
|
||||||
|
} else {
|
||||||
|
// mmap the first 4k page - older COW format
|
||||||
|
total_mapped_addr_length_ = BLOCK_SZ;
|
||||||
|
read_ahead_feature_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||||
|
cow_fd_.get(), 0);
|
||||||
|
if (mapped_addr_ == MAP_FAILED) {
|
||||||
|
SNAP_LOG(ERROR) << "mmap metadata failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Snapuserd::UnmapBufferRegion() {
|
||||||
|
int ret = munmap(mapped_addr_, total_mapped_addr_length_);
|
||||||
|
if (ret < 0) {
|
||||||
|
SNAP_PLOG(ERROR) << "munmap failed";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
|
void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
|
||||||
unsigned int, const char* message) {
|
unsigned int, const char* message) {
|
||||||
if (severity == android::base::ERROR) {
|
if (severity == android::base::ERROR) {
|
||||||
|
|
@ -507,11 +704,28 @@ bool Snapuserd::InitCowDevice() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Entry point to launch worker threads
|
* Entry point to launch threads
|
||||||
*/
|
*/
|
||||||
bool Snapuserd::Start() {
|
bool Snapuserd::Start() {
|
||||||
std::vector<std::future<bool>> threads;
|
std::vector<std::future<bool>> threads;
|
||||||
|
std::future<bool> ra_thread;
|
||||||
|
bool rathread = (read_ahead_feature_ && (read_ahead_ops_.size() > 0));
|
||||||
|
|
||||||
|
// Start the read-ahead thread and wait
|
||||||
|
// for it as the data has to be re-constructed
|
||||||
|
// from COW device.
|
||||||
|
if (rathread) {
|
||||||
|
ra_thread = std::async(std::launch::async, &ReadAheadThread::RunThread,
|
||||||
|
read_ahead_thread_.get());
|
||||||
|
if (!WaitForReadAheadToStart()) {
|
||||||
|
SNAP_LOG(ERROR) << "Failed to start Read-ahead thread...";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
SNAP_LOG(INFO) << "Read-ahead thread started...";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Launch worker threads
|
||||||
for (int i = 0; i < worker_threads_.size(); i++) {
|
for (int i = 0; i < worker_threads_.size(); i++) {
|
||||||
threads.emplace_back(
|
threads.emplace_back(
|
||||||
std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
|
std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
|
||||||
|
|
@ -522,8 +736,69 @@ bool Snapuserd::Start() {
|
||||||
ret = t.get() && ret;
|
ret = t.get() && ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rathread) {
|
||||||
|
// Notify the read-ahead thread that all worker threads
|
||||||
|
// are done. We need this explicit notification when
|
||||||
|
// there is an IO failure or there was a switch
|
||||||
|
// of dm-user table; thus, forcing the read-ahead
|
||||||
|
// thread to wake up.
|
||||||
|
MergeCompleted();
|
||||||
|
ret = ret && ra_thread.get();
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t Snapuserd::GetBufferMetadataOffset() {
|
||||||
|
CowHeader header;
|
||||||
|
reader_->GetHeader(&header);
|
||||||
|
|
||||||
|
size_t size = header.header_size + sizeof(BufferState);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Metadata for read-ahead is 16 bytes. For a 2 MB region, we will
|
||||||
|
* end up with 8k (2 PAGE) worth of metadata. Thus, a 2MB buffer
|
||||||
|
* region is split into:
|
||||||
|
*
|
||||||
|
* 1: 8k metadata
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
size_t Snapuserd::GetBufferMetadataSize() {
|
||||||
|
CowHeader header;
|
||||||
|
reader_->GetHeader(&header);
|
||||||
|
|
||||||
|
size_t metadata_bytes = (header.buffer_size * sizeof(struct ScratchMetadata)) / BLOCK_SZ;
|
||||||
|
return metadata_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Snapuserd::GetBufferDataOffset() {
|
||||||
|
CowHeader header;
|
||||||
|
reader_->GetHeader(&header);
|
||||||
|
|
||||||
|
return (header.header_size + GetBufferMetadataSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* (2MB - 8K = 2088960 bytes) will be the buffer region to hold the data.
|
||||||
|
*/
|
||||||
|
size_t Snapuserd::GetBufferDataSize() {
|
||||||
|
CowHeader header;
|
||||||
|
reader_->GetHeader(&header);
|
||||||
|
|
||||||
|
size_t size = header.buffer_size - GetBufferMetadataSize();
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BufferState* Snapuserd::GetBufferState() {
|
||||||
|
CowHeader header;
|
||||||
|
reader_->GetHeader(&header);
|
||||||
|
|
||||||
|
struct BufferState* ra_state =
|
||||||
|
reinterpret_cast<struct BufferState*>((char*)mapped_addr_ + header.header_size);
|
||||||
|
return ra_state;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace snapshot
|
} // namespace snapshot
|
||||||
} // namespace android
|
} // namespace android
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,10 @@
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
#include <condition_variable>
|
||||||
#include <csignal>
|
#include <csignal>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <future>
|
#include <future>
|
||||||
|
|
@ -29,6 +31,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <android-base/file.h>
|
#include <android-base/file.h>
|
||||||
|
|
@ -56,6 +59,35 @@ static_assert(PAYLOAD_SIZE >= BLOCK_SZ);
|
||||||
*/
|
*/
|
||||||
static constexpr int NUM_THREADS_PER_PARTITION = 4;
|
static constexpr int NUM_THREADS_PER_PARTITION = 4;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* State transitions between worker threads and read-ahead
|
||||||
|
* threads.
|
||||||
|
*
|
||||||
|
* READ_AHEAD_BEGIN: Worker threads initiates the read-ahead
|
||||||
|
* thread to begin reading the copy operations
|
||||||
|
* for each bounded region.
|
||||||
|
*
|
||||||
|
* READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight
|
||||||
|
* and reading the copy operations.
|
||||||
|
*
|
||||||
|
* IO_IN_PROGRESS: Merge operation is in-progress by worker threads.
|
||||||
|
*
|
||||||
|
* IO_TERMINATED: When all the worker threads are done, request the
|
||||||
|
* read-ahead thread to terminate
|
||||||
|
*
|
||||||
|
* READ_AHEAD_FAILURE: If there are any IO failures when read-ahead
|
||||||
|
* thread is reading from COW device.
|
||||||
|
*
|
||||||
|
* The transition of each states is described in snapuserd_readahead.cpp
|
||||||
|
*/
|
||||||
|
enum class READ_AHEAD_IO_TRANSITION {
|
||||||
|
READ_AHEAD_BEGIN,
|
||||||
|
READ_AHEAD_IN_PROGRESS,
|
||||||
|
IO_IN_PROGRESS,
|
||||||
|
IO_TERMINATED,
|
||||||
|
READ_AHEAD_FAILURE,
|
||||||
|
};
|
||||||
|
|
||||||
class BufferSink : public IByteSink {
|
class BufferSink : public IByteSink {
|
||||||
public:
|
public:
|
||||||
void Initialize(size_t size);
|
void Initialize(size_t size);
|
||||||
|
|
@ -76,6 +108,47 @@ class BufferSink : public IByteSink {
|
||||||
|
|
||||||
class Snapuserd;
|
class Snapuserd;
|
||||||
|
|
||||||
|
class ReadAheadThread {
|
||||||
|
public:
|
||||||
|
ReadAheadThread(const std::string& cow_device, const std::string& backing_device,
|
||||||
|
const std::string& misc_name, std::shared_ptr<Snapuserd> snapuserd);
|
||||||
|
bool RunThread();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void InitializeIter();
|
||||||
|
bool IterDone();
|
||||||
|
void IterNext();
|
||||||
|
const CowOperation* GetIterOp();
|
||||||
|
void InitializeBuffer();
|
||||||
|
|
||||||
|
bool InitializeFds();
|
||||||
|
void CloseFds() {
|
||||||
|
cow_fd_ = {};
|
||||||
|
backing_store_fd_ = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReadAheadIOStart();
|
||||||
|
void PrepareReadAhead(uint64_t* source_block, int* pending_ops, std::vector<uint64_t>& blocks);
|
||||||
|
bool ReconstructDataFromCow();
|
||||||
|
void CheckOverlap(const CowOperation* cow_op);
|
||||||
|
|
||||||
|
void* read_ahead_buffer_;
|
||||||
|
void* metadata_buffer_;
|
||||||
|
std::vector<const CowOperation*>::reverse_iterator read_ahead_iter_;
|
||||||
|
std::string cow_device_;
|
||||||
|
std::string backing_store_device_;
|
||||||
|
std::string misc_name_;
|
||||||
|
|
||||||
|
unique_fd cow_fd_;
|
||||||
|
unique_fd backing_store_fd_;
|
||||||
|
|
||||||
|
std::shared_ptr<Snapuserd> snapuserd_;
|
||||||
|
|
||||||
|
std::unordered_set<uint64_t> dest_blocks_;
|
||||||
|
std::unordered_set<uint64_t> source_blocks_;
|
||||||
|
bool overlap_;
|
||||||
|
};
|
||||||
|
|
||||||
class WorkerThread {
|
class WorkerThread {
|
||||||
public:
|
public:
|
||||||
WorkerThread(const std::string& cow_device, const std::string& backing_device,
|
WorkerThread(const std::string& cow_device, const std::string& backing_device,
|
||||||
|
|
@ -116,12 +189,16 @@ class WorkerThread {
|
||||||
bool ProcessCopyOp(const CowOperation* cow_op);
|
bool ProcessCopyOp(const CowOperation* cow_op);
|
||||||
bool ProcessZeroOp();
|
bool ProcessZeroOp();
|
||||||
|
|
||||||
|
bool ReadFromBaseDevice(const CowOperation* cow_op);
|
||||||
|
bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op);
|
||||||
|
|
||||||
// Merge related functions
|
// Merge related functions
|
||||||
bool ProcessMergeComplete(chunk_t chunk, void* buffer);
|
bool ProcessMergeComplete(chunk_t chunk, void* buffer);
|
||||||
loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
|
loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
|
||||||
int* unmerged_exceptions);
|
int* unmerged_exceptions);
|
||||||
|
|
||||||
int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
|
int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
|
||||||
int unmerged_exceptions);
|
int unmerged_exceptions, bool* copy_op, bool* commit);
|
||||||
|
|
||||||
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
|
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
|
||||||
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
|
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
|
||||||
|
|
@ -158,7 +235,10 @@ class Snapuserd : public std::enable_shared_from_this<Snapuserd> {
|
||||||
bool CommitMerge(int num_merge_ops);
|
bool CommitMerge(int num_merge_ops);
|
||||||
|
|
||||||
void CloseFds() { cow_fd_ = {}; }
|
void CloseFds() { cow_fd_ = {}; }
|
||||||
void FreeResources() { worker_threads_.clear(); }
|
void FreeResources() {
|
||||||
|
worker_threads_.clear();
|
||||||
|
read_ahead_thread_ = nullptr;
|
||||||
|
}
|
||||||
size_t GetMetadataAreaSize() { return vec_.size(); }
|
size_t GetMetadataAreaSize() { return vec_.size(); }
|
||||||
void* GetExceptionBuffer(size_t i) { return vec_[i].get(); }
|
void* GetExceptionBuffer(size_t i) { return vec_[i].get(); }
|
||||||
|
|
||||||
|
|
@ -173,16 +253,47 @@ class Snapuserd : public std::enable_shared_from_this<Snapuserd> {
|
||||||
return p1.first < p2.first;
|
return p1.first < p2.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
void UnmapBufferRegion();
|
||||||
std::vector<std::unique_ptr<WorkerThread>> worker_threads_;
|
bool MmapMetadata();
|
||||||
|
|
||||||
|
// Read-ahead related functions
|
||||||
|
std::vector<const CowOperation*>& GetReadAheadOpsVec() { return read_ahead_ops_; }
|
||||||
|
std::unordered_map<uint64_t, void*>& GetReadAheadMap() { return read_ahead_buffer_map_; }
|
||||||
|
void* GetMappedAddr() { return mapped_addr_; }
|
||||||
|
bool IsReadAheadFeaturePresent() { return read_ahead_feature_; }
|
||||||
|
void PrepareReadAhead();
|
||||||
|
void StartReadAhead();
|
||||||
|
void MergeCompleted();
|
||||||
|
bool ReadAheadIOCompleted(bool sync);
|
||||||
|
void ReadAheadIOFailed();
|
||||||
|
bool WaitForMergeToComplete();
|
||||||
|
bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer);
|
||||||
|
bool ReconstructDataFromCow() { return populate_data_from_cow_; }
|
||||||
|
void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; }
|
||||||
|
bool WaitForReadAheadToStart();
|
||||||
|
|
||||||
|
uint64_t GetBufferMetadataOffset();
|
||||||
|
size_t GetBufferMetadataSize();
|
||||||
|
size_t GetBufferDataOffset();
|
||||||
|
size_t GetBufferDataSize();
|
||||||
|
|
||||||
|
// Final block to be merged in a given read-ahead buffer region
|
||||||
|
void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; }
|
||||||
|
uint64_t GetFinalBlockMerged() { return final_block_merged_; }
|
||||||
|
// Total number of blocks to be merged in a given read-ahead buffer region
|
||||||
|
void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; }
|
||||||
|
int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; }
|
||||||
|
|
||||||
|
private:
|
||||||
bool IsChunkIdMetadata(chunk_t chunk);
|
bool IsChunkIdMetadata(chunk_t chunk);
|
||||||
chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
|
chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
|
||||||
|
|
||||||
|
bool GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer);
|
||||||
bool ReadMetadata();
|
bool ReadMetadata();
|
||||||
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
|
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
|
||||||
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
|
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
|
||||||
bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
|
bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
|
||||||
|
struct BufferState* GetBufferState();
|
||||||
|
|
||||||
std::string cow_device_;
|
std::string cow_device_;
|
||||||
std::string backing_store_device_;
|
std::string backing_store_device_;
|
||||||
|
|
@ -197,7 +308,6 @@ class Snapuserd : public std::enable_shared_from_this<Snapuserd> {
|
||||||
std::unique_ptr<ICowOpIter> cowop_iter_;
|
std::unique_ptr<ICowOpIter> cowop_iter_;
|
||||||
std::unique_ptr<ICowOpReverseIter> cowop_riter_;
|
std::unique_ptr<ICowOpReverseIter> cowop_riter_;
|
||||||
std::unique_ptr<CowReader> reader_;
|
std::unique_ptr<CowReader> reader_;
|
||||||
std::unique_ptr<CowWriter> writer_;
|
|
||||||
|
|
||||||
// Vector of disk exception which is a
|
// Vector of disk exception which is a
|
||||||
// mapping of old-chunk to new-chunk
|
// mapping of old-chunk to new-chunk
|
||||||
|
|
@ -208,6 +318,21 @@ class Snapuserd : public std::enable_shared_from_this<Snapuserd> {
|
||||||
std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_;
|
std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_;
|
||||||
|
|
||||||
std::mutex lock_;
|
std::mutex lock_;
|
||||||
|
std::condition_variable cv;
|
||||||
|
|
||||||
|
void* mapped_addr_;
|
||||||
|
size_t total_mapped_addr_length_;
|
||||||
|
|
||||||
|
std::vector<std::unique_ptr<WorkerThread>> worker_threads_;
|
||||||
|
// Read-ahead related
|
||||||
|
std::unordered_map<uint64_t, void*> read_ahead_buffer_map_;
|
||||||
|
std::vector<const CowOperation*> read_ahead_ops_;
|
||||||
|
bool populate_data_from_cow_ = false;
|
||||||
|
bool read_ahead_feature_;
|
||||||
|
uint64_t final_block_merged_;
|
||||||
|
int total_ra_blocks_merged_ = 0;
|
||||||
|
READ_AHEAD_IO_TRANSITION io_state_;
|
||||||
|
std::unique_ptr<ReadAheadThread> read_ahead_thread_;
|
||||||
|
|
||||||
bool merge_initiated_ = false;
|
bool merge_initiated_ = false;
|
||||||
bool attached_ = false;
|
bool attached_ = false;
|
||||||
|
|
|
||||||
460
fs_mgr/libsnapshot/snapuserd_readahead.cpp
Normal file
460
fs_mgr/libsnapshot/snapuserd_readahead.cpp
Normal file
|
|
@ -0,0 +1,460 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2021 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "snapuserd.h"
|
||||||
|
|
||||||
|
#include <csignal>
|
||||||
|
#include <optional>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
#include <libsnapshot/snapuserd_client.h>
|
||||||
|
|
||||||
|
namespace android {
|
||||||
|
namespace snapshot {
|
||||||
|
|
||||||
|
using namespace android;
|
||||||
|
using namespace android::dm;
|
||||||
|
using android::base::unique_fd;
|
||||||
|
|
||||||
|
#define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
|
||||||
|
#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Merging a copy operation involves the following flow:
|
||||||
|
*
|
||||||
|
* 1: dm-snapshot layer requests merge for a 4k block. dm-user sends the request
|
||||||
|
* to the daemon
|
||||||
|
* 2: daemon reads the source block
|
||||||
|
* 3: daemon copies the source data
|
||||||
|
* 4: IO completion sent back to dm-user (a switch from user space to kernel)
|
||||||
|
* 5: dm-snapshot merges the data to base device
|
||||||
|
* 6: dm-snapshot sends the merge-completion IO to dm-user
|
||||||
|
* 7: dm-user re-directs the merge completion IO to daemon (one more switch)
|
||||||
|
* 8: daemon updates the COW file about the completed merge request (a write syscall) and followed
|
||||||
|
* by a fysnc. 9: Send the IO completion back to dm-user
|
||||||
|
*
|
||||||
|
* The above sequence is a significant overhead especially when merging one 4k
|
||||||
|
* block at a time.
|
||||||
|
*
|
||||||
|
* Read-ahead layer will optimize the above path by reading the data from base
|
||||||
|
* device in the background so that merging thread can retrieve the data from
|
||||||
|
* the read-ahead cache. Additionally, syncing of merged data is deferred to
|
||||||
|
* read-ahead thread threadby the IO path is not bottlenecked.
|
||||||
|
*
|
||||||
|
* We create a scratch space of 2MB to store the read-ahead data in the COW
|
||||||
|
* device.
|
||||||
|
*
|
||||||
|
* +-----------------------+
|
||||||
|
* | Header (fixed) |
|
||||||
|
* +-----------------------+
|
||||||
|
* | Scratch space | <-- 2MB
|
||||||
|
* +-----------------------+
|
||||||
|
*
|
||||||
|
* Scratch space is as follows:
|
||||||
|
*
|
||||||
|
* +-----------------------+
|
||||||
|
* | Metadata | <- 4k page
|
||||||
|
* +-----------------------+
|
||||||
|
* | Metadata | <- 4k page
|
||||||
|
* +-----------------------+
|
||||||
|
* | |
|
||||||
|
* | Read-ahead data |
|
||||||
|
* | |
|
||||||
|
* +-----------------------+
|
||||||
|
*
|
||||||
|
* State transitions and communication between read-ahead thread and worker
|
||||||
|
* thread during merge:
|
||||||
|
* =====================================================================
|
||||||
|
*
|
||||||
|
* Worker Threads Read-Ahead thread
|
||||||
|
* ------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* |
|
||||||
|
* |
|
||||||
|
* --> -----------------READ_AHEAD_BEGIN------------->|
|
||||||
|
* | | | READ_AHEAD_IN_PROGRESS
|
||||||
|
* | WAIT |
|
||||||
|
* | | |
|
||||||
|
* | |<-----------------IO_IN_PROGRESS---------------
|
||||||
|
* | | |
|
||||||
|
* | | IO_IN_PRGRESS WAIT
|
||||||
|
* | | |
|
||||||
|
* |<--| |
|
||||||
|
* | |
|
||||||
|
* ------------------IO_TERMINATED--------------->|
|
||||||
|
* END
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* ===================================================================
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
*
|
||||||
|
* We have 6 copy operations to be executed in OTA and there is a overlap. Update-engine
|
||||||
|
* will write to COW file as follows:
|
||||||
|
*
|
||||||
|
* Op-1: 20 -> 23
|
||||||
|
* Op-2: 19 -> 22
|
||||||
|
* Op-3: 18 -> 21
|
||||||
|
* Op-4: 17 -> 20
|
||||||
|
* Op-5: 16 -> 19
|
||||||
|
* Op-6: 15 -> 18
|
||||||
|
*
|
||||||
|
* Read-ahead thread will read all the 6 source blocks and store the data in the
|
||||||
|
* scratch space. Metadata will contain the destination block numbers. Thus,
|
||||||
|
* scratch space will look something like this:
|
||||||
|
*
|
||||||
|
* +--------------+
|
||||||
|
* | Block 23 |
|
||||||
|
* | offset - 1 |
|
||||||
|
* +--------------+
|
||||||
|
* | Block 22 |
|
||||||
|
* | offset - 2 |
|
||||||
|
* +--------------+
|
||||||
|
* | Block 21 |
|
||||||
|
* | offset - 3 |
|
||||||
|
* +--------------+
|
||||||
|
* ...
|
||||||
|
* ...
|
||||||
|
* +--------------+
|
||||||
|
* | Data-Block 20| <-- offset - 1
|
||||||
|
* +--------------+
|
||||||
|
* | Data-Block 19| <-- offset - 2
|
||||||
|
* +--------------+
|
||||||
|
* | Data-Block 18| <-- offset - 3
|
||||||
|
* +--------------+
|
||||||
|
* ...
|
||||||
|
* ...
|
||||||
|
*
|
||||||
|
* ====================================================================
|
||||||
|
* IO Path:
|
||||||
|
*
|
||||||
|
* Read-ahead will serve the data to worker threads during merge only
|
||||||
|
* after metadata and data are persisted to the scratch space. Worker
|
||||||
|
* threads during merge will always retrieve the data from cache; if the
|
||||||
|
* cache is not populated, it will wait for the read-ahead thread to finish.
|
||||||
|
* Furthermore, the number of operations merged will by synced to the header
|
||||||
|
* only when all the blocks in the read-ahead cache are merged. In the above
|
||||||
|
* case, when all 6 operations are merged, COW Header is updated with
|
||||||
|
* num_merge_ops = 6.
|
||||||
|
*
|
||||||
|
* Merge resume after crash:
|
||||||
|
*
|
||||||
|
* Let's say we have a crash after 5 operations are merged. i.e. after
|
||||||
|
* Op-5: 16->19 is completed but before the Op-6 is merged. Thus, COW Header
|
||||||
|
* num_merge_ops will be 0 as the all the ops were not merged yet. During next
|
||||||
|
* reboot, read-ahead thread will re-construct the data in-memory from the
|
||||||
|
* scratch space; when merge resumes, Op-1 will be re-exectued. However,
|
||||||
|
* data will be served from read-ahead cache safely even though, block 20
|
||||||
|
* was over-written by Op-4.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
ReadAheadThread::ReadAheadThread(const std::string& cow_device, const std::string& backing_device,
|
||||||
|
const std::string& misc_name,
|
||||||
|
std::shared_ptr<Snapuserd> snapuserd) {
|
||||||
|
cow_device_ = cow_device;
|
||||||
|
backing_store_device_ = backing_device;
|
||||||
|
misc_name_ = misc_name;
|
||||||
|
snapuserd_ = snapuserd;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReadAheadThread::CheckOverlap(const CowOperation* cow_op) {
|
||||||
|
if (dest_blocks_.count(cow_op->new_block) || source_blocks_.count(cow_op->source)) {
|
||||||
|
overlap_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
dest_blocks_.insert(cow_op->source);
|
||||||
|
source_blocks_.insert(cow_op->new_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReadAheadThread::PrepareReadAhead(uint64_t* source_block, int* pending_ops,
|
||||||
|
std::vector<uint64_t>& blocks) {
|
||||||
|
int num_ops = *pending_ops;
|
||||||
|
int nr_consecutive = 0;
|
||||||
|
|
||||||
|
if (!IterDone() && num_ops) {
|
||||||
|
// Get the first block
|
||||||
|
const CowOperation* cow_op = GetIterOp();
|
||||||
|
*source_block = cow_op->source;
|
||||||
|
IterNext();
|
||||||
|
num_ops -= 1;
|
||||||
|
nr_consecutive = 1;
|
||||||
|
blocks.push_back(cow_op->new_block);
|
||||||
|
|
||||||
|
if (!overlap_) {
|
||||||
|
CheckOverlap(cow_op);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find number of consecutive blocks working backwards.
|
||||||
|
*/
|
||||||
|
while (!IterDone() && num_ops) {
|
||||||
|
const CowOperation* op = GetIterOp();
|
||||||
|
if (op->source != (*source_block - nr_consecutive)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nr_consecutive += 1;
|
||||||
|
num_ops -= 1;
|
||||||
|
blocks.push_back(op->new_block);
|
||||||
|
IterNext();
|
||||||
|
|
||||||
|
if (!overlap_) {
|
||||||
|
CheckOverlap(op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReadAheadThread::ReconstructDataFromCow() {
|
||||||
|
std::unordered_map<uint64_t, void*>& read_ahead_buffer_map = snapuserd_->GetReadAheadMap();
|
||||||
|
read_ahead_buffer_map.clear();
|
||||||
|
loff_t metadata_offset = 0;
|
||||||
|
loff_t start_data_offset = snapuserd_->GetBufferDataOffset();
|
||||||
|
int num_ops = 0;
|
||||||
|
int total_blocks_merged = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
struct ScratchMetadata* bm = reinterpret_cast<struct ScratchMetadata*>(
|
||||||
|
(char*)metadata_buffer_ + metadata_offset);
|
||||||
|
|
||||||
|
// Done reading metadata
|
||||||
|
if (bm->new_block == 0 && bm->file_offset == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
loff_t buffer_offset = bm->file_offset - start_data_offset;
|
||||||
|
void* bufptr = static_cast<void*>((char*)read_ahead_buffer_ + buffer_offset);
|
||||||
|
read_ahead_buffer_map[bm->new_block] = bufptr;
|
||||||
|
num_ops += 1;
|
||||||
|
total_blocks_merged += 1;
|
||||||
|
|
||||||
|
metadata_offset += sizeof(struct ScratchMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are done re-constructing the mapping; however, we need to make sure
|
||||||
|
// all the COW operations to-be merged are present in the re-constructed
|
||||||
|
// mapping.
|
||||||
|
while (!IterDone()) {
|
||||||
|
const CowOperation* op = GetIterOp();
|
||||||
|
if (read_ahead_buffer_map.find(op->new_block) != read_ahead_buffer_map.end()) {
|
||||||
|
num_ops -= 1;
|
||||||
|
snapuserd_->SetFinalBlockMerged(op->new_block);
|
||||||
|
IterNext();
|
||||||
|
} else {
|
||||||
|
// Verify that we have covered all the ops which were re-constructed
|
||||||
|
// from COW device - These are the ops which are being
|
||||||
|
// re-constructed after crash.
|
||||||
|
CHECK(num_ops == 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
snapuserd_->SetTotalRaBlocksMerged(total_blocks_merged);
|
||||||
|
|
||||||
|
snapuserd_->ReconstructDataFromCowFinish();
|
||||||
|
|
||||||
|
if (!snapuserd_->ReadAheadIOCompleted(true)) {
|
||||||
|
SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed...";
|
||||||
|
snapuserd_->ReadAheadIOFailed();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
SNAP_LOG(INFO) << "ReconstructDataFromCow success";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReadAheadThread::ReadAheadIOStart() {
|
||||||
|
// Check if the data has to be constructed from the COW file.
|
||||||
|
// This will be true only once during boot up after a crash
|
||||||
|
// during merge.
|
||||||
|
if (snapuserd_->ReconstructDataFromCow()) {
|
||||||
|
return ReconstructDataFromCow();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<uint64_t, void*>& read_ahead_buffer_map = snapuserd_->GetReadAheadMap();
|
||||||
|
read_ahead_buffer_map.clear();
|
||||||
|
|
||||||
|
int num_ops = (snapuserd_->GetBufferDataSize()) / BLOCK_SZ;
|
||||||
|
loff_t metadata_offset = 0;
|
||||||
|
|
||||||
|
struct ScratchMetadata* bm =
|
||||||
|
reinterpret_cast<struct ScratchMetadata*>((char*)metadata_buffer_ + metadata_offset);
|
||||||
|
|
||||||
|
bm->new_block = 0;
|
||||||
|
bm->file_offset = 0;
|
||||||
|
|
||||||
|
std::vector<uint64_t> blocks;
|
||||||
|
|
||||||
|
loff_t buffer_offset = 0;
|
||||||
|
loff_t offset = 0;
|
||||||
|
loff_t file_offset = snapuserd_->GetBufferDataOffset();
|
||||||
|
int total_blocks_merged = 0;
|
||||||
|
overlap_ = false;
|
||||||
|
dest_blocks_.clear();
|
||||||
|
source_blocks_.clear();
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
uint64_t source_block;
|
||||||
|
int linear_blocks;
|
||||||
|
|
||||||
|
PrepareReadAhead(&source_block, &num_ops, blocks);
|
||||||
|
linear_blocks = blocks.size();
|
||||||
|
if (linear_blocks == 0) {
|
||||||
|
// No more blocks to read
|
||||||
|
SNAP_LOG(DEBUG) << " Read-ahead completed....";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the first block in the consecutive set of blocks
|
||||||
|
source_block = source_block + 1 - linear_blocks;
|
||||||
|
size_t io_size = (linear_blocks * BLOCK_SZ);
|
||||||
|
num_ops -= linear_blocks;
|
||||||
|
total_blocks_merged += linear_blocks;
|
||||||
|
|
||||||
|
// Mark the block number as the one which will
|
||||||
|
// be the final block to be merged in this entire region.
|
||||||
|
// Read-ahead thread will get
|
||||||
|
// notified when this block is merged to make
|
||||||
|
// forward progress
|
||||||
|
snapuserd_->SetFinalBlockMerged(blocks.back());
|
||||||
|
|
||||||
|
while (linear_blocks) {
|
||||||
|
uint64_t new_block = blocks.back();
|
||||||
|
blocks.pop_back();
|
||||||
|
// Assign the mapping
|
||||||
|
void* bufptr = static_cast<void*>((char*)read_ahead_buffer_ + offset);
|
||||||
|
read_ahead_buffer_map[new_block] = bufptr;
|
||||||
|
offset += BLOCK_SZ;
|
||||||
|
|
||||||
|
bm = reinterpret_cast<struct ScratchMetadata*>((char*)metadata_buffer_ +
|
||||||
|
metadata_offset);
|
||||||
|
bm->new_block = new_block;
|
||||||
|
bm->file_offset = file_offset;
|
||||||
|
|
||||||
|
metadata_offset += sizeof(struct ScratchMetadata);
|
||||||
|
file_offset += BLOCK_SZ;
|
||||||
|
|
||||||
|
linear_blocks -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read from the base device consecutive set of blocks in one shot
|
||||||
|
if (!android::base::ReadFullyAtOffset(backing_store_fd_,
|
||||||
|
(char*)read_ahead_buffer_ + buffer_offset, io_size,
|
||||||
|
source_block * BLOCK_SZ)) {
|
||||||
|
SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_
|
||||||
|
<< "at block :" << source_block << " buffer_offset : " << buffer_offset
|
||||||
|
<< " io_size : " << io_size << " buf-addr : " << read_ahead_buffer_;
|
||||||
|
|
||||||
|
snapuserd_->ReadAheadIOFailed();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is important - explicitly set the contents to zero. This is used
|
||||||
|
// when re-constructing the data after crash. This indicates end of
|
||||||
|
// reading metadata contents when re-constructing the data
|
||||||
|
bm = reinterpret_cast<struct ScratchMetadata*>((char*)metadata_buffer_ + metadata_offset);
|
||||||
|
bm->new_block = 0;
|
||||||
|
bm->file_offset = 0;
|
||||||
|
|
||||||
|
buffer_offset += io_size;
|
||||||
|
CHECK(offset == buffer_offset);
|
||||||
|
CHECK((file_offset - snapuserd_->GetBufferDataOffset()) == offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
snapuserd_->SetTotalRaBlocksMerged(total_blocks_merged);
|
||||||
|
|
||||||
|
// Flush the data only if we have a overlapping blocks in the region
|
||||||
|
if (!snapuserd_->ReadAheadIOCompleted(overlap_)) {
|
||||||
|
SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed...";
|
||||||
|
snapuserd_->ReadAheadIOFailed();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReadAheadThread::RunThread() {
|
||||||
|
if (!InitializeFds()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
InitializeIter();
|
||||||
|
InitializeBuffer();
|
||||||
|
|
||||||
|
while (!IterDone()) {
|
||||||
|
if (!ReadAheadIOStart()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool status = snapuserd_->WaitForMergeToComplete();
|
||||||
|
|
||||||
|
if (status && !snapuserd_->CommitMerge(snapuserd_->GetTotalRaBlocksMerged())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!status) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
CloseFds();
|
||||||
|
SNAP_LOG(INFO) << " ReadAhead thread terminating....";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialization
|
||||||
|
bool ReadAheadThread::InitializeFds() {
|
||||||
|
backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
|
||||||
|
if (backing_store_fd_ < 0) {
|
||||||
|
SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
|
||||||
|
if (cow_fd_ < 0) {
|
||||||
|
SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReadAheadThread::InitializeIter() {
|
||||||
|
std::vector<const CowOperation*>& read_ahead_ops = snapuserd_->GetReadAheadOpsVec();
|
||||||
|
read_ahead_iter_ = read_ahead_ops.rbegin();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReadAheadThread::IterDone() {
|
||||||
|
std::vector<const CowOperation*>& read_ahead_ops = snapuserd_->GetReadAheadOpsVec();
|
||||||
|
return read_ahead_iter_ == read_ahead_ops.rend();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReadAheadThread::IterNext() {
|
||||||
|
read_ahead_iter_++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const CowOperation* ReadAheadThread::GetIterOp() {
|
||||||
|
return *read_ahead_iter_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReadAheadThread::InitializeBuffer() {
|
||||||
|
void* mapped_addr = snapuserd_->GetMappedAddr();
|
||||||
|
// Map the scratch space region into memory
|
||||||
|
metadata_buffer_ =
|
||||||
|
static_cast<void*>((char*)mapped_addr + snapuserd_->GetBufferMetadataOffset());
|
||||||
|
read_ahead_buffer_ = static_cast<void*>((char*)mapped_addr + snapuserd_->GetBufferDataOffset());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace snapshot
|
||||||
|
} // namespace android
|
||||||
|
|
@ -209,10 +209,11 @@ void SnapuserdServer::RunThread(std::shared_ptr<DmUserHandler> handler) {
|
||||||
}
|
}
|
||||||
|
|
||||||
handler->snapuserd()->CloseFds();
|
handler->snapuserd()->CloseFds();
|
||||||
|
handler->snapuserd()->CheckMergeCompletionStatus();
|
||||||
|
handler->snapuserd()->UnmapBufferRegion();
|
||||||
|
|
||||||
auto misc_name = handler->misc_name();
|
auto misc_name = handler->misc_name();
|
||||||
LOG(INFO) << "Handler thread about to exit: " << misc_name;
|
LOG(INFO) << "Handler thread about to exit: " << misc_name;
|
||||||
handler->snapuserd()->CheckMergeCompletionStatus();
|
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(lock_);
|
std::lock_guard<std::mutex> lock(lock_);
|
||||||
|
|
|
||||||
|
|
@ -135,14 +135,11 @@ bool WorkerThread::ProcessReplaceOp(const CowOperation* cow_op) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the copy operation. This will read the backing
|
bool WorkerThread::ReadFromBaseDevice(const CowOperation* cow_op) {
|
||||||
// block device which is represented by cow_op->source.
|
|
||||||
bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) {
|
|
||||||
void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
|
void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
|
||||||
CHECK(buffer != nullptr);
|
CHECK(buffer != nullptr);
|
||||||
|
SNAP_LOG(DEBUG) << " ReadFromBaseDevice...: new-block: " << cow_op->new_block
|
||||||
// Issue a single 4K IO. However, this can be optimized
|
<< " Source: " << cow_op->source;
|
||||||
// if the successive blocks are contiguous.
|
|
||||||
if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ,
|
if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ,
|
||||||
cow_op->source * BLOCK_SZ)) {
|
cow_op->source * BLOCK_SZ)) {
|
||||||
SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_
|
SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_
|
||||||
|
|
@ -153,6 +150,31 @@ bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool WorkerThread::GetReadAheadPopulatedBuffer(const CowOperation* cow_op) {
|
||||||
|
void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
|
||||||
|
CHECK(buffer != nullptr);
|
||||||
|
|
||||||
|
if (!snapuserd_->GetReadAheadPopulatedBuffer(cow_op->new_block, buffer)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the copy operation. This will read the backing
|
||||||
|
// block device which is represented by cow_op->source.
|
||||||
|
bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) {
|
||||||
|
if (!GetReadAheadPopulatedBuffer(cow_op)) {
|
||||||
|
SNAP_LOG(DEBUG) << " GetReadAheadPopulatedBuffer failed..."
|
||||||
|
<< " new_block: " << cow_op->new_block;
|
||||||
|
if (!ReadFromBaseDevice(cow_op)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool WorkerThread::ProcessZeroOp() {
|
bool WorkerThread::ProcessZeroOp() {
|
||||||
// Zero out the entire block
|
// Zero out the entire block
|
||||||
void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
|
void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
|
||||||
|
|
@ -386,8 +408,10 @@ loff_t WorkerThread::GetMergeStartOffset(void* merged_buffer, void* unmerged_buf
|
||||||
}
|
}
|
||||||
|
|
||||||
int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
|
int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
|
||||||
int unmerged_exceptions) {
|
int unmerged_exceptions, bool* copy_op, bool* commit) {
|
||||||
int merged_ops_cur_iter = 0;
|
int merged_ops_cur_iter = 0;
|
||||||
|
std::unordered_map<uint64_t, void*>& read_ahead_buffer_map = snapuserd_->GetReadAheadMap();
|
||||||
|
*copy_op = false;
|
||||||
std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
|
std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
|
||||||
|
|
||||||
// Find the operations which are merged in this cycle.
|
// Find the operations which are merged in this cycle.
|
||||||
|
|
@ -411,6 +435,23 @@ int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffe
|
||||||
const CowOperation* cow_op = it->second;
|
const CowOperation* cow_op = it->second;
|
||||||
|
|
||||||
CHECK(cow_op != nullptr);
|
CHECK(cow_op != nullptr);
|
||||||
|
if (snapuserd_->IsReadAheadFeaturePresent() && cow_op->type == kCowCopyOp) {
|
||||||
|
*copy_op = true;
|
||||||
|
// Every single copy operation has to come from read-ahead
|
||||||
|
// cache.
|
||||||
|
if (read_ahead_buffer_map.find(cow_op->new_block) == read_ahead_buffer_map.end()) {
|
||||||
|
SNAP_LOG(ERROR)
|
||||||
|
<< " Block: " << cow_op->new_block << " not found in read-ahead cache"
|
||||||
|
<< " Source: " << cow_op->source;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// If this is a final block merged in the read-ahead buffer
|
||||||
|
// region, notify the read-ahead thread to make forward
|
||||||
|
// progress
|
||||||
|
if (cow_op->new_block == snapuserd_->GetFinalBlockMerged()) {
|
||||||
|
*commit = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CHECK(cow_op->new_block == cow_de->old_chunk);
|
CHECK(cow_op->new_block == cow_de->old_chunk);
|
||||||
// zero out to indicate that operation is merged.
|
// zero out to indicate that operation is merged.
|
||||||
|
|
@ -442,6 +483,8 @@ int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffe
|
||||||
bool WorkerThread::ProcessMergeComplete(chunk_t chunk, void* buffer) {
|
bool WorkerThread::ProcessMergeComplete(chunk_t chunk, void* buffer) {
|
||||||
uint32_t stride = exceptions_per_area_ + 1;
|
uint32_t stride = exceptions_per_area_ + 1;
|
||||||
const std::vector<std::unique_ptr<uint8_t[]>>& vec = snapuserd_->GetMetadataVec();
|
const std::vector<std::unique_ptr<uint8_t[]>>& vec = snapuserd_->GetMetadataVec();
|
||||||
|
bool copy_op = false;
|
||||||
|
bool commit = false;
|
||||||
|
|
||||||
// ChunkID to vector index
|
// ChunkID to vector index
|
||||||
lldiv_t divresult = lldiv(chunk, stride);
|
lldiv_t divresult = lldiv(chunk, stride);
|
||||||
|
|
@ -452,13 +495,24 @@ bool WorkerThread::ProcessMergeComplete(chunk_t chunk, void* buffer) {
|
||||||
int unmerged_exceptions = 0;
|
int unmerged_exceptions = 0;
|
||||||
loff_t offset = GetMergeStartOffset(buffer, vec[divresult.quot].get(), &unmerged_exceptions);
|
loff_t offset = GetMergeStartOffset(buffer, vec[divresult.quot].get(), &unmerged_exceptions);
|
||||||
|
|
||||||
int merged_ops_cur_iter =
|
int merged_ops_cur_iter = GetNumberOfMergedOps(buffer, vec[divresult.quot].get(), offset,
|
||||||
GetNumberOfMergedOps(buffer, vec[divresult.quot].get(), offset, unmerged_exceptions);
|
unmerged_exceptions, ©_op, &commit);
|
||||||
|
|
||||||
// There should be at least one operation merged in this cycle
|
// There should be at least one operation merged in this cycle
|
||||||
CHECK(merged_ops_cur_iter > 0);
|
CHECK(merged_ops_cur_iter > 0);
|
||||||
if (!snapuserd_->CommitMerge(merged_ops_cur_iter)) {
|
|
||||||
return false;
|
if (copy_op) {
|
||||||
|
if (commit) {
|
||||||
|
// Push the flushing logic to read-ahead thread so that merge thread
|
||||||
|
// can make forward progress. Sync will happen in the background
|
||||||
|
snapuserd_->StartReadAhead();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Non-copy ops and all ops in older COW format
|
||||||
|
if (!snapuserd_->CommitMerge(merged_ops_cur_iter)) {
|
||||||
|
SNAP_LOG(ERROR) << "CommitMerge failed...";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SNAP_LOG(DEBUG) << "Merge success: " << merged_ops_cur_iter << "chunk: " << chunk;
|
SNAP_LOG(DEBUG) << "Merge success: " << merged_ops_cur_iter << "chunk: " << chunk;
|
||||||
|
|
@ -613,12 +667,21 @@ bool WorkerThread::DmuserReadRequest() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Just return the header if it is an error
|
||||||
|
if (header->type == DM_USER_RESP_ERROR) {
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Daemon will not be terminated if there is any error. We will
|
// Daemon will not be terminated if there is any error. We will
|
||||||
// just send the error back to dm-user.
|
// just send the error back to dm-user.
|
||||||
if (!WriteDmUserPayload(ret)) {
|
if (!WriteDmUserPayload(ret)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (header->type == DM_USER_RESP_ERROR) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
remaining_size -= ret;
|
remaining_size -= ret;
|
||||||
offset += ret;
|
offset += ret;
|
||||||
} while (remaining_size > 0);
|
} while (remaining_size > 0);
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,7 @@ message DynamicPartitionMetadata {
|
||||||
repeated DynamicPartitionGroup groups = 1;
|
repeated DynamicPartitionGroup groups = 1;
|
||||||
optional bool vabc_enabled = 3;
|
optional bool vabc_enabled = 3;
|
||||||
optional string vabc_compression_param = 4;
|
optional string vabc_compression_param = 4;
|
||||||
|
optional uint32 cow_version = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message DeltaArchiveManifest {
|
message DeltaArchiveManifest {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue