From 0ef9783c57bfa696ae09ef2e7c3c024d058b8848 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Sun, 15 Mar 2020 21:23:24 -0700 Subject: [PATCH] Add std::map implementation for cd entry map Add the std::map implementation to be used later in zip64 format. Also move the entry map classes to a separate file to make the hierarchy clear. Test: unittests pass Change-Id: I74d95f53207cc8ca871b955e2a15c184d5497833 --- libziparchive/zip_archive.cc | 64 ++++++++++++++++++++----- libziparchive/zip_archive_private.h | 36 +++++++++++--- libziparchive/zip_archive_test.cc | 73 +++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 18 deletions(-) diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc index 445150780..34a9c545f 100644 --- a/libziparchive/zip_archive.cc +++ b/libziparchive/zip_archive.cc @@ -107,15 +107,15 @@ static uint32_t ComputeHash(std::string_view name) { } // Convert a ZipEntry to a hash table index, verifying that it's in a valid range. -std::pair CdEntryMapZip32::GetCdEntryOffset(std::string_view name, - const uint8_t* start) const { +std::pair CdEntryMapZip32::GetCdEntryOffset(std::string_view name, + const uint8_t* start) const { const uint32_t hash = ComputeHash(name); // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. uint32_t ent = hash & (hash_table_size_ - 1); while (hash_table_[ent].name_offset != 0) { if (hash_table_[ent].ToStringView(start) == name) { - return {0, hash_table_[ent].name_offset}; + return {kSuccess, hash_table_[ent].name_offset}; } ent = (ent + 1) & (hash_table_size_ - 1); } @@ -124,7 +124,7 @@ std::pair CdEntryMapZip32::GetCdEntryOffset(std::string_view return {kEntryNotFound, 0}; } -int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) { +ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) { const uint64_t hash = ComputeHash(name); uint32_t ent = hash & (hash_table_size_ - 1); @@ -145,7 +145,7 @@ int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) { const char* start_char = reinterpret_cast(start); hash_table_[ent].name_offset = static_cast(name.data() - start_char); hash_table_[ent].name_length = static_cast(name.size()); - return 0; + return kSuccess; } void CdEntryMapZip32::ResetIteration() { @@ -166,6 +166,11 @@ std::pair CdEntryMapZip32::Next(const uint8_t* cd_st } CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) { + /* + * Create hash table. We have a minimum 75% load factor, possibly as + * low as 50% after we round off to a power of 2. There must be at + * least one unused entry to avoid an infinite loop during creation. + */ hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3); hash_table_ = { reinterpret_cast(calloc(hash_table_size_, sizeof(ZipStringOffset))), free}; @@ -179,6 +184,43 @@ std::unique_ptr CdEntryMapZip32::Create(uint16_t num_entrie return std::unique_ptr(entry_map); } +std::unique_ptr CdEntryMapZip64::Create() { + return std::unique_ptr(new CdEntryMapZip64()); +} + +ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) { + const auto [it, added] = + entry_table_.insert({name, name.data() - reinterpret_cast(start)}); + if (!added) { + ALOGW("Zip: Found duplicate entry %.*s", static_cast(name.size()), name.data()); + return kDuplicateEntry; + } + return kSuccess; +} + +std::pair CdEntryMapZip64::GetCdEntryOffset(std::string_view name, + const uint8_t* /*cd_start*/) const { + const auto it = entry_table_.find(name); + if (it == entry_table_.end()) { + ALOGV("Zip: Could not find entry %.*s", static_cast(name.size()), name.data()); + return {kEntryNotFound, 0}; + } + + return {kSuccess, it->second}; +} + +void CdEntryMapZip64::ResetIteration() { + iterator_ = entry_table_.begin(); +} + +std::pair CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) { + if (iterator_ == entry_table_.end()) { + return {}; + } + + return *iterator_++; +} + #if defined(__BIONIC__) uint64_t GetOwnerTag(const ZipArchive* archive) { return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE, @@ -357,12 +399,12 @@ static int32_t ParseZipArchive(ZipArchive* archive) { const size_t cd_length = archive->central_directory.GetMapLength(); const uint16_t num_entries = archive->num_entries; - /* - * Create hash table. We have a minimum 75% load factor, possibly as - * low as 50% after we round off to a power of 2. There must be at - * least one unused entry to avoid an infinite loop during creation. - */ - archive->cd_entry_map = CdEntryMapZip32::Create(num_entries); + // TODO(xunchang) parse the zip64 Eocd + if (num_entries > UINT16_MAX) { + archive->cd_entry_map = CdEntryMapZip64::Create(); + } else { + archive->cd_entry_map = CdEntryMapZip32::Create(num_entries); + } if (archive->cd_entry_map == nullptr) { return kAllocationFailed; } diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h index 68977f6a1..ecb9f22bc 100644 --- a/libziparchive/zip_archive_private.h +++ b/libziparchive/zip_archive_private.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -46,7 +47,9 @@ static const char* kErrorMessages[] = { "Allocation failed", }; -enum ErrorCodes : int32_t { +enum ZipError : int32_t { + kSuccess = 0, + kIterationEnd = -1, // We encountered a Zlib error when inflating a stream from this file. @@ -149,12 +152,12 @@ class CdEntryMapInterface { // Adds an entry to the map. The |name| should internally points to the // filename field of a cd entry. And |start| points to the beginning of the // central directory. Returns 0 on success. - virtual int32_t AddToMap(std::string_view name, const uint8_t* start) = 0; + virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0; // For the zip entry |entryName|, finds the offset of its filename field in // the central directory. Returns a pair of [status, offset]. The value of // the status is 0 on success. - virtual std::pair GetCdEntryOffset(std::string_view name, - const uint8_t* cd_start) const = 0; + virtual std::pair GetCdEntryOffset(std::string_view name, + const uint8_t* cd_start) const = 0; // Resets the iterator to the beginning of the map. virtual void ResetIteration() = 0; // Returns the [name, cd offset] of the current element. Also increments the @@ -190,9 +193,9 @@ class CdEntryMapZip32 : public CdEntryMapInterface { public: static std::unique_ptr Create(uint16_t num_entries); - int32_t AddToMap(std::string_view name, const uint8_t* start) override; - std::pair GetCdEntryOffset(std::string_view name, - const uint8_t* cd_start) const override; + ZipError AddToMap(std::string_view name, const uint8_t* start) override; + std::pair GetCdEntryOffset(std::string_view name, + const uint8_t* cd_start) const override; void ResetIteration() override; std::pair Next(const uint8_t* cd_start) override; @@ -210,6 +213,25 @@ class CdEntryMapZip32 : public CdEntryMapInterface { uint32_t current_position_{0}; }; +// This implementation of CdEntryMap uses a std::map +class CdEntryMapZip64 : public CdEntryMapInterface { + public: + static std::unique_ptr Create(); + + ZipError AddToMap(std::string_view name, const uint8_t* start) override; + std::pair GetCdEntryOffset(std::string_view name, + const uint8_t* cd_start) const override; + void ResetIteration() override; + std::pair Next(const uint8_t* cd_start) override; + + private: + CdEntryMapZip64() = default; + + std::map entry_table_; + + std::map::iterator iterator_; +}; + struct ZipArchive { // open Zip archive mutable MappedZipFile mapped_zip; diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc index 091630407..523d4c5c1 100644 --- a/libziparchive/zip_archive_test.cc +++ b/libziparchive/zip_archive_test.cc @@ -24,11 +24,14 @@ #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -53,6 +56,76 @@ static int32_t OpenArchiveWrapper(const std::string& name, ZipArchiveHandle* han return OpenArchive(abs_path.c_str(), handle); } +class CdEntryMapTest : public ::testing::Test { + protected: + void SetUp() override { + names_ = { + "a.txt", "b.txt", "b/", "b/c.txt", "b/d.txt", + }; + separator_ = "separator"; + header_ = "metadata"; + joined_names_ = header_ + android::base::Join(names_, separator_); + base_ptr_ = reinterpret_cast(&joined_names_[0]); + + entry_maps_.emplace_back(CdEntryMapZip32::Create(static_cast(names_.size()))); + entry_maps_.emplace_back(CdEntryMapZip64::Create()); + for (auto& cd_map : entry_maps_) { + ASSERT_NE(nullptr, cd_map); + size_t offset = header_.size(); + for (const auto& name : names_) { + auto status = cd_map->AddToMap( + std::string_view{joined_names_.c_str() + offset, name.size()}, base_ptr_); + ASSERT_EQ(0, status); + offset += name.size() + separator_.size(); + } + } + } + + std::vector names_; + // A continuous region of memory serves as a mock of the central directory. + std::string joined_names_; + // We expect some metadata at the beginning of the central directory and between filenames. + std::string header_; + std::string separator_; + + std::vector> entry_maps_; + uint8_t* base_ptr_{nullptr}; // Points to the start of the central directory. +}; + +TEST_F(CdEntryMapTest, AddDuplicatedEntry) { + for (auto& cd_map : entry_maps_) { + std::string_view name = "b.txt"; + ASSERT_NE(0, cd_map->AddToMap(name, base_ptr_)); + } +} + +TEST_F(CdEntryMapTest, FindEntry) { + for (auto& cd_map : entry_maps_) { + uint64_t expected_offset = header_.size(); + for (const auto& name : names_) { + auto [status, offset] = cd_map->GetCdEntryOffset(name, base_ptr_); + ASSERT_EQ(status, kSuccess); + ASSERT_EQ(offset, expected_offset); + expected_offset += name.size() + separator_.size(); + } + } +} + +TEST_F(CdEntryMapTest, Iteration) { + std::set expected(names_.begin(), names_.end()); + for (auto& cd_map : entry_maps_) { + cd_map->ResetIteration(); + std::set entry_set; + auto ret = cd_map->Next(base_ptr_); + while (ret != std::pair{}) { + auto [it, insert_status] = entry_set.insert(ret.first); + ASSERT_TRUE(insert_status); + ret = cd_map->Next(base_ptr_); + } + ASSERT_EQ(expected, entry_set); + } +} + TEST(ziparchive, Open) { ZipArchiveHandle handle; ASSERT_EQ(0, OpenArchiveWrapper(kValidZip, &handle));