diff --git a/libziparchive/Android.bp b/libziparchive/Android.bp index 1bbffaf7a..3d4e86e46 100644 --- a/libziparchive/Android.bp +++ b/libziparchive/Android.bp @@ -60,6 +60,7 @@ cc_defaults { srcs: [ "zip_archive.cc", "zip_archive_stream_entry.cc", + "zip_cd_entry_map.cc", "zip_writer.cc", ], diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc index 34a9c545f..2648c5948 100644 --- a/libziparchive/zip_archive.cc +++ b/libziparchive/zip_archive.cc @@ -85,142 +85,6 @@ static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord); * of the string length into the hash table entry. */ -/* - * Round up to the next highest power of 2. - * - * Found on http://graphics.stanford.edu/~seander/bithacks.html. - */ -static uint32_t RoundUpPower2(uint32_t val) { - val--; - val |= val >> 1; - val |= val >> 2; - val |= val >> 4; - val |= val >> 8; - val |= val >> 16; - val++; - - return val; -} - -static uint32_t ComputeHash(std::string_view name) { - return static_cast(std::hash{}(name)); -} - -// Convert a ZipEntry to a hash table index, verifying that it's in a valid range. -std::pair CdEntryMapZip32::GetCdEntryOffset(std::string_view name, - const uint8_t* start) const { - const uint32_t hash = ComputeHash(name); - - // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. - uint32_t ent = hash & (hash_table_size_ - 1); - while (hash_table_[ent].name_offset != 0) { - if (hash_table_[ent].ToStringView(start) == name) { - return {kSuccess, hash_table_[ent].name_offset}; - } - ent = (ent + 1) & (hash_table_size_ - 1); - } - - ALOGV("Zip: Unable to find entry %.*s", static_cast(name.size()), name.data()); - return {kEntryNotFound, 0}; -} - -ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) { - const uint64_t hash = ComputeHash(name); - uint32_t ent = hash & (hash_table_size_ - 1); - - /* - * We over-allocated the table, so we're guaranteed to find an empty slot. - * Further, we guarantee that the hashtable size is not 0. - */ - while (hash_table_[ent].name_offset != 0) { - if (hash_table_[ent].ToStringView(start) == name) { - // We've found a duplicate entry. We don't accept duplicates. - ALOGW("Zip: Found duplicate entry %.*s", static_cast(name.size()), name.data()); - return kDuplicateEntry; - } - ent = (ent + 1) & (hash_table_size_ - 1); - } - - // `name` has already been validated before entry. - const char* start_char = reinterpret_cast(start); - hash_table_[ent].name_offset = static_cast(name.data() - start_char); - hash_table_[ent].name_length = static_cast(name.size()); - return kSuccess; -} - -void CdEntryMapZip32::ResetIteration() { - current_position_ = 0; -} - -std::pair CdEntryMapZip32::Next(const uint8_t* cd_start) { - while (current_position_ < hash_table_size_) { - const auto& entry = hash_table_[current_position_]; - current_position_ += 1; - - if (entry.name_offset != 0) { - return {entry.ToStringView(cd_start), entry.name_offset}; - } - } - // We have reached the end of the hash table. - return {}; -} - -CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) { - /* - * Create hash table. We have a minimum 75% load factor, possibly as - * low as 50% after we round off to a power of 2. There must be at - * least one unused entry to avoid an infinite loop during creation. - */ - hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3); - hash_table_ = { - reinterpret_cast(calloc(hash_table_size_, sizeof(ZipStringOffset))), free}; -} - -std::unique_ptr CdEntryMapZip32::Create(uint16_t num_entries) { - auto entry_map = new CdEntryMapZip32(num_entries); - CHECK(entry_map->hash_table_ != nullptr) - << "Zip: unable to allocate the " << entry_map->hash_table_size_ - << " entry hash_table, entry size: " << sizeof(ZipStringOffset); - return std::unique_ptr(entry_map); -} - -std::unique_ptr CdEntryMapZip64::Create() { - return std::unique_ptr(new CdEntryMapZip64()); -} - -ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) { - const auto [it, added] = - entry_table_.insert({name, name.data() - reinterpret_cast(start)}); - if (!added) { - ALOGW("Zip: Found duplicate entry %.*s", static_cast(name.size()), name.data()); - return kDuplicateEntry; - } - return kSuccess; -} - -std::pair CdEntryMapZip64::GetCdEntryOffset(std::string_view name, - const uint8_t* /*cd_start*/) const { - const auto it = entry_table_.find(name); - if (it == entry_table_.end()) { - ALOGV("Zip: Could not find entry %.*s", static_cast(name.size()), name.data()); - return {kEntryNotFound, 0}; - } - - return {kSuccess, it->second}; -} - -void CdEntryMapZip64::ResetIteration() { - iterator_ = entry_table_.begin(); -} - -std::pair CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) { - if (iterator_ == entry_table_.end()) { - return {}; - } - - return *iterator_++; -} - #if defined(__BIONIC__) uint64_t GetOwnerTag(const ZipArchive* archive) { return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE, diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h index ecb9f22bc..536894cbf 100644 --- a/libziparchive/zip_archive_private.h +++ b/libziparchive/zip_archive_private.h @@ -22,81 +22,14 @@ #include #include -#include #include #include #include #include "android-base/macros.h" #include "android-base/mapped_file.h" - -static const char* kErrorMessages[] = { - "Success", - "Iteration ended", - "Zlib error", - "Invalid file", - "Invalid handle", - "Duplicate entries in archive", - "Empty archive", - "Entry not found", - "Invalid offset", - "Inconsistent information", - "Invalid entry name", - "I/O error", - "File mapping failed", - "Allocation failed", -}; - -enum ZipError : int32_t { - kSuccess = 0, - - kIterationEnd = -1, - - // We encountered a Zlib error when inflating a stream from this file. - // Usually indicates file corruption. - kZlibError = -2, - - // The input file cannot be processed as a zip archive. Usually because - // it's too small, too large or does not have a valid signature. - kInvalidFile = -3, - - // An invalid iteration / ziparchive handle was passed in as an input - // argument. - kInvalidHandle = -4, - - // The zip archive contained two (or possibly more) entries with the same - // name. - kDuplicateEntry = -5, - - // The zip archive contains no entries. - kEmptyArchive = -6, - - // The specified entry was not found in the archive. - kEntryNotFound = -7, - - // The zip archive contained an invalid local file header pointer. - kInvalidOffset = -8, - - // The zip archive contained inconsistent entry information. This could - // be because the central directory & local file header did not agree, or - // if the actual uncompressed length or crc32 do not match their declared - // values. - kInconsistentInformation = -9, - - // An invalid entry name was encountered. - kInvalidEntryName = -10, - - // An I/O related system call (read, lseek, ftruncate, map) failed. - kIoError = -11, - - // We were not able to mmap the central directory or entry contents. - kMmapFailed = -12, - - // An allocation failed. - kAllocationFailed = -13, - - kLastErrorCode = kAllocationFailed, -}; +#include "zip_cd_entry_map.h" +#include "zip_error.h" class MappedZipFile { public: @@ -144,94 +77,6 @@ class CentralDirectory { size_t length_; }; -// This class is the interface of the central directory entries map. The map -// helps to locate a particular cd entry based on the filename. -class CdEntryMapInterface { - public: - virtual ~CdEntryMapInterface() = default; - // Adds an entry to the map. The |name| should internally points to the - // filename field of a cd entry. And |start| points to the beginning of the - // central directory. Returns 0 on success. - virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0; - // For the zip entry |entryName|, finds the offset of its filename field in - // the central directory. Returns a pair of [status, offset]. The value of - // the status is 0 on success. - virtual std::pair GetCdEntryOffset(std::string_view name, - const uint8_t* cd_start) const = 0; - // Resets the iterator to the beginning of the map. - virtual void ResetIteration() = 0; - // Returns the [name, cd offset] of the current element. Also increments the - // iterator to points to the next element. Returns an empty pair we have read - // past boundary. - virtual std::pair Next(const uint8_t* cd_start) = 0; -}; - -/** - * More space efficient string representation of strings in an mmaped zipped - * file than std::string_view. Using std::string_view as an entry in the - * ZipArchive hash table wastes space. std::string_view stores a pointer to a - * string (on 64 bit, 8 bytes) and the length to read from that pointer, - * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting - * 6 bytes. - * - * ZipStringOffset stores a 4 byte offset from a fixed location in the memory - * mapped file instead of the entire address, consuming 8 bytes with alignment. - */ -struct ZipStringOffset { - uint32_t name_offset; - uint16_t name_length; - - const std::string_view ToStringView(const uint8_t* start) const { - return std::string_view{reinterpret_cast(start + name_offset), name_length}; - } -}; - -// This implementation of CdEntryMap uses an array hash table. It uses less -// memory than std::map; and it's used as the default implementation for zip -// archives without zip64 extension. -class CdEntryMapZip32 : public CdEntryMapInterface { - public: - static std::unique_ptr Create(uint16_t num_entries); - - ZipError AddToMap(std::string_view name, const uint8_t* start) override; - std::pair GetCdEntryOffset(std::string_view name, - const uint8_t* cd_start) const override; - void ResetIteration() override; - std::pair Next(const uint8_t* cd_start) override; - - private: - explicit CdEntryMapZip32(uint16_t num_entries); - - // We know how many entries are in the Zip archive, so we can have a - // fixed-size hash table. We define a load factor of 0.75 and over - // allocate so the maximum number entries can never be higher than - // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. - uint32_t hash_table_size_{0}; - std::unique_ptr hash_table_{nullptr, free}; - - // The position of element for the current iteration. - uint32_t current_position_{0}; -}; - -// This implementation of CdEntryMap uses a std::map -class CdEntryMapZip64 : public CdEntryMapInterface { - public: - static std::unique_ptr Create(); - - ZipError AddToMap(std::string_view name, const uint8_t* start) override; - std::pair GetCdEntryOffset(std::string_view name, - const uint8_t* cd_start) const override; - void ResetIteration() override; - std::pair Next(const uint8_t* cd_start) override; - - private: - CdEntryMapZip64() = default; - - std::map entry_table_; - - std::map::iterator iterator_; -}; - struct ZipArchive { // open Zip archive mutable MappedZipFile mapped_zip; diff --git a/libziparchive/zip_cd_entry_map.cc b/libziparchive/zip_cd_entry_map.cc new file mode 100644 index 000000000..f187c0690 --- /dev/null +++ b/libziparchive/zip_cd_entry_map.cc @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "zip_cd_entry_map.h" + +#include +#include + +/* + * Round up to the next highest power of 2. + * + * Found on http://graphics.stanford.edu/~seander/bithacks.html. + */ +static uint32_t RoundUpPower2(uint32_t val) { + val--; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + val++; + + return val; +} + +static uint32_t ComputeHash(std::string_view name) { + return static_cast(std::hash{}(name)); +} + +// Convert a ZipEntry to a hash table index, verifying that it's in a valid range. +std::pair CdEntryMapZip32::GetCdEntryOffset(std::string_view name, + const uint8_t* start) const { + const uint32_t hash = ComputeHash(name); + + // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. + uint32_t ent = hash & (hash_table_size_ - 1); + while (hash_table_[ent].name_offset != 0) { + if (hash_table_[ent].ToStringView(start) == name) { + return {kSuccess, hash_table_[ent].name_offset}; + } + ent = (ent + 1) & (hash_table_size_ - 1); + } + + ALOGV("Zip: Unable to find entry %.*s", static_cast(name.size()), name.data()); + return {kEntryNotFound, 0}; +} + +ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) { + const uint64_t hash = ComputeHash(name); + uint32_t ent = hash & (hash_table_size_ - 1); + + /* + * We over-allocated the table, so we're guaranteed to find an empty slot. + * Further, we guarantee that the hashtable size is not 0. + */ + while (hash_table_[ent].name_offset != 0) { + if (hash_table_[ent].ToStringView(start) == name) { + // We've found a duplicate entry. We don't accept duplicates. + ALOGW("Zip: Found duplicate entry %.*s", static_cast(name.size()), name.data()); + return kDuplicateEntry; + } + ent = (ent + 1) & (hash_table_size_ - 1); + } + + // `name` has already been validated before entry. + const char* start_char = reinterpret_cast(start); + hash_table_[ent].name_offset = static_cast(name.data() - start_char); + hash_table_[ent].name_length = static_cast(name.size()); + return kSuccess; +} + +void CdEntryMapZip32::ResetIteration() { + current_position_ = 0; +} + +std::pair CdEntryMapZip32::Next(const uint8_t* cd_start) { + while (current_position_ < hash_table_size_) { + const auto& entry = hash_table_[current_position_]; + current_position_ += 1; + + if (entry.name_offset != 0) { + return {entry.ToStringView(cd_start), entry.name_offset}; + } + } + // We have reached the end of the hash table. + return {}; +} + +CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) { + /* + * Create hash table. We have a minimum 75% load factor, possibly as + * low as 50% after we round off to a power of 2. There must be at + * least one unused entry to avoid an infinite loop during creation. + */ + hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3); + hash_table_ = { + reinterpret_cast(calloc(hash_table_size_, sizeof(ZipStringOffset))), free}; +} + +std::unique_ptr CdEntryMapZip32::Create(uint16_t num_entries) { + auto entry_map = new CdEntryMapZip32(num_entries); + CHECK(entry_map->hash_table_ != nullptr) + << "Zip: unable to allocate the " << entry_map->hash_table_size_ + << " entry hash_table, entry size: " << sizeof(ZipStringOffset); + return std::unique_ptr(entry_map); +} + +std::unique_ptr CdEntryMapZip64::Create() { + return std::unique_ptr(new CdEntryMapZip64()); +} + +ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) { + const auto [it, added] = + entry_table_.insert({name, name.data() - reinterpret_cast(start)}); + if (!added) { + ALOGW("Zip: Found duplicate entry %.*s", static_cast(name.size()), name.data()); + return kDuplicateEntry; + } + return kSuccess; +} + +std::pair CdEntryMapZip64::GetCdEntryOffset(std::string_view name, + const uint8_t* /*cd_start*/) const { + const auto it = entry_table_.find(name); + if (it == entry_table_.end()) { + ALOGV("Zip: Could not find entry %.*s", static_cast(name.size()), name.data()); + return {kEntryNotFound, 0}; + } + + return {kSuccess, it->second}; +} + +void CdEntryMapZip64::ResetIteration() { + iterator_ = entry_table_.begin(); +} + +std::pair CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) { + if (iterator_ == entry_table_.end()) { + return {}; + } + + return *iterator_++; +} diff --git a/libziparchive/zip_cd_entry_map.h b/libziparchive/zip_cd_entry_map.h new file mode 100644 index 000000000..4957f754e --- /dev/null +++ b/libziparchive/zip_cd_entry_map.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include + +#include "zip_error.h" + +// This class is the interface of the central directory entries map. The map +// helps to locate a particular cd entry based on the filename. +class CdEntryMapInterface { + public: + virtual ~CdEntryMapInterface() = default; + // Adds an entry to the map. The |name| should internally points to the + // filename field of a cd entry. And |start| points to the beginning of the + // central directory. Returns 0 on success. + virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0; + // For the zip entry |entryName|, finds the offset of its filename field in + // the central directory. Returns a pair of [status, offset]. The value of + // the status is 0 on success. + virtual std::pair GetCdEntryOffset(std::string_view name, + const uint8_t* cd_start) const = 0; + // Resets the iterator to the beginning of the map. + virtual void ResetIteration() = 0; + // Returns the [name, cd offset] of the current element. Also increments the + // iterator to points to the next element. Returns an empty pair we have read + // past boundary. + virtual std::pair Next(const uint8_t* cd_start) = 0; +}; + +/** + * More space efficient string representation of strings in an mmaped zipped + * file than std::string_view. Using std::string_view as an entry in the + * ZipArchive hash table wastes space. std::string_view stores a pointer to a + * string (on 64 bit, 8 bytes) and the length to read from that pointer, + * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting + * 6 bytes. + * + * ZipStringOffset stores a 4 byte offset from a fixed location in the memory + * mapped file instead of the entire address, consuming 8 bytes with alignment. + */ +struct ZipStringOffset { + uint32_t name_offset; + uint16_t name_length; + + const std::string_view ToStringView(const uint8_t* start) const { + return std::string_view{reinterpret_cast(start + name_offset), name_length}; + } +}; + +// This implementation of CdEntryMap uses an array hash table. It uses less +// memory than std::map; and it's used as the default implementation for zip +// archives without zip64 extension. +class CdEntryMapZip32 : public CdEntryMapInterface { + public: + static std::unique_ptr Create(uint16_t num_entries); + + ZipError AddToMap(std::string_view name, const uint8_t* start) override; + std::pair GetCdEntryOffset(std::string_view name, + const uint8_t* cd_start) const override; + void ResetIteration() override; + std::pair Next(const uint8_t* cd_start) override; + + private: + explicit CdEntryMapZip32(uint16_t num_entries); + + // We know how many entries are in the Zip archive, so we can have a + // fixed-size hash table. We define a load factor of 0.75 and over + // allocate so the maximum number entries can never be higher than + // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. + uint32_t hash_table_size_{0}; + std::unique_ptr hash_table_{nullptr, free}; + + // The position of element for the current iteration. + uint32_t current_position_{0}; +}; + +// This implementation of CdEntryMap uses a std::map +class CdEntryMapZip64 : public CdEntryMapInterface { + public: + static std::unique_ptr Create(); + + ZipError AddToMap(std::string_view name, const uint8_t* start) override; + std::pair GetCdEntryOffset(std::string_view name, + const uint8_t* cd_start) const override; + void ResetIteration() override; + std::pair Next(const uint8_t* cd_start) override; + + private: + CdEntryMapZip64() = default; + + std::map entry_table_; + + std::map::iterator iterator_; +}; diff --git a/libziparchive/zip_error.h b/libziparchive/zip_error.h new file mode 100644 index 000000000..44d7221c5 --- /dev/null +++ b/libziparchive/zip_error.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +static const char* kErrorMessages[] = { + "Success", + "Iteration ended", + "Zlib error", + "Invalid file", + "Invalid handle", + "Duplicate entries in archive", + "Empty archive", + "Entry not found", + "Invalid offset", + "Inconsistent information", + "Invalid entry name", + "I/O error", + "File mapping failed", + "Allocation failed", +}; + +enum ZipError : int32_t { + kSuccess = 0, + + kIterationEnd = -1, + + // We encountered a Zlib error when inflating a stream from this file. + // Usually indicates file corruption. + kZlibError = -2, + + // The input file cannot be processed as a zip archive. Usually because + // it's too small, too large or does not have a valid signature. + kInvalidFile = -3, + + // An invalid iteration / ziparchive handle was passed in as an input + // argument. + kInvalidHandle = -4, + + // The zip archive contained two (or possibly more) entries with the same + // name. + kDuplicateEntry = -5, + + // The zip archive contains no entries. + kEmptyArchive = -6, + + // The specified entry was not found in the archive. + kEntryNotFound = -7, + + // The zip archive contained an invalid local file header pointer. + kInvalidOffset = -8, + + // The zip archive contained inconsistent entry information. This could + // be because the central directory & local file header did not agree, or + // if the actual uncompressed length or crc32 do not match their declared + // values. + kInconsistentInformation = -9, + + // An invalid entry name was encountered. + kInvalidEntryName = -10, + + // An I/O related system call (read, lseek, ftruncate, map) failed. + kIoError = -11, + + // We were not able to mmap the central directory or entry contents. + kMmapFailed = -12, + + // An allocation failed. + kAllocationFailed = -13, + + kLastErrorCode = kAllocationFailed, +};