Merge "Create an interface for the cd entry hash table"
This commit is contained in:
commit
73ae00bbde
2 changed files with 130 additions and 74 deletions
|
|
@ -106,55 +106,79 @@ static uint32_t ComputeHash(std::string_view name) {
|
|||
return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a ZipEntry to a hash table index, verifying that it's in a
|
||||
* valid range.
|
||||
*/
|
||||
static int64_t EntryToIndex(const ZipStringOffset* hash_table, const uint32_t hash_table_size,
|
||||
std::string_view name, const uint8_t* start) {
|
||||
// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
|
||||
std::pair<int32_t, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
|
||||
const uint8_t* start) const {
|
||||
const uint32_t hash = ComputeHash(name);
|
||||
|
||||
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
|
||||
uint32_t ent = hash & (hash_table_size - 1);
|
||||
while (hash_table[ent].name_offset != 0) {
|
||||
if (hash_table[ent].ToStringView(start) == name) {
|
||||
return ent;
|
||||
uint32_t ent = hash & (hash_table_size_ - 1);
|
||||
while (hash_table_[ent].name_offset != 0) {
|
||||
if (hash_table_[ent].ToStringView(start) == name) {
|
||||
return {0, hash_table_[ent].name_offset};
|
||||
}
|
||||
ent = (ent + 1) & (hash_table_size - 1);
|
||||
ent = (ent + 1) & (hash_table_size_ - 1);
|
||||
}
|
||||
|
||||
ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
|
||||
return kEntryNotFound;
|
||||
return {kEntryNotFound, 0};
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a new entry to the hash table.
|
||||
*/
|
||||
static int32_t AddToHash(ZipStringOffset* hash_table, const uint32_t hash_table_size,
|
||||
std::string_view name, const uint8_t* start) {
|
||||
int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
|
||||
const uint64_t hash = ComputeHash(name);
|
||||
uint32_t ent = hash & (hash_table_size - 1);
|
||||
uint32_t ent = hash & (hash_table_size_ - 1);
|
||||
|
||||
/*
|
||||
* We over-allocated the table, so we're guaranteed to find an empty slot.
|
||||
* Further, we guarantee that the hashtable size is not 0.
|
||||
*/
|
||||
while (hash_table[ent].name_offset != 0) {
|
||||
if (hash_table[ent].ToStringView(start) == name) {
|
||||
while (hash_table_[ent].name_offset != 0) {
|
||||
if (hash_table_[ent].ToStringView(start) == name) {
|
||||
// We've found a duplicate entry. We don't accept duplicates.
|
||||
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
|
||||
return kDuplicateEntry;
|
||||
}
|
||||
ent = (ent + 1) & (hash_table_size - 1);
|
||||
ent = (ent + 1) & (hash_table_size_ - 1);
|
||||
}
|
||||
|
||||
// `name` has already been validated before entry.
|
||||
const char* start_char = reinterpret_cast<const char*>(start);
|
||||
hash_table[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
|
||||
hash_table[ent].name_length = static_cast<uint16_t>(name.size());
|
||||
hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
|
||||
hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CdEntryMapZip32::ResetIteration() {
|
||||
current_position_ = 0;
|
||||
}
|
||||
|
||||
std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
|
||||
while (current_position_ < hash_table_size_) {
|
||||
const auto& entry = hash_table_[current_position_];
|
||||
current_position_ += 1;
|
||||
|
||||
if (entry.name_offset != 0) {
|
||||
return {entry.ToStringView(cd_start), entry.name_offset};
|
||||
}
|
||||
}
|
||||
// We have reached the end of the hash table.
|
||||
return {};
|
||||
}
|
||||
|
||||
CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
|
||||
hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
|
||||
hash_table_ = {
|
||||
reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
|
||||
}
|
||||
|
||||
std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
|
||||
auto entry_map = new CdEntryMapZip32(num_entries);
|
||||
CHECK(entry_map->hash_table_ != nullptr)
|
||||
<< "Zip: unable to allocate the " << entry_map->hash_table_size_
|
||||
<< " entry hash_table, entry size: " << sizeof(ZipStringOffset);
|
||||
return std::unique_ptr<CdEntryMapInterface>(entry_map);
|
||||
}
|
||||
|
||||
#if defined(__BIONIC__)
|
||||
uint64_t GetOwnerTag(const ZipArchive* archive) {
|
||||
return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
|
||||
|
|
@ -168,9 +192,7 @@ ZipArchive::ZipArchive(const int fd, bool assume_ownership)
|
|||
directory_offset(0),
|
||||
central_directory(),
|
||||
directory_map(),
|
||||
num_entries(0),
|
||||
hash_table_size(0),
|
||||
hash_table(nullptr) {
|
||||
num_entries(0) {
|
||||
#if defined(__BIONIC__)
|
||||
if (assume_ownership) {
|
||||
android_fdsan_exchange_owner_tag(fd, 0, GetOwnerTag(this));
|
||||
|
|
@ -184,9 +206,7 @@ ZipArchive::ZipArchive(const void* address, size_t length)
|
|||
directory_offset(0),
|
||||
central_directory(),
|
||||
directory_map(),
|
||||
num_entries(0),
|
||||
hash_table_size(0),
|
||||
hash_table(nullptr) {}
|
||||
num_entries(0) {}
|
||||
|
||||
ZipArchive::~ZipArchive() {
|
||||
if (close_file && mapped_zip.GetFileDescriptor() >= 0) {
|
||||
|
|
@ -196,8 +216,6 @@ ZipArchive::~ZipArchive() {
|
|||
close(mapped_zip.GetFileDescriptor());
|
||||
#endif
|
||||
}
|
||||
|
||||
free(hash_table);
|
||||
}
|
||||
|
||||
static int32_t MapCentralDirectory0(const char* debug_file_name, ZipArchive* archive,
|
||||
|
|
@ -344,12 +362,8 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
|
|||
* low as 50% after we round off to a power of 2. There must be at
|
||||
* least one unused entry to avoid an infinite loop during creation.
|
||||
*/
|
||||
archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
|
||||
archive->hash_table =
|
||||
reinterpret_cast<ZipStringOffset*>(calloc(archive->hash_table_size, sizeof(ZipStringOffset)));
|
||||
if (archive->hash_table == nullptr) {
|
||||
ALOGW("Zip: unable to allocate the %u-entry hash_table, entry size: %zu",
|
||||
archive->hash_table_size, sizeof(ZipStringOffset));
|
||||
archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
|
||||
if (archive->cd_entry_map == nullptr) {
|
||||
return kAllocationFailed;
|
||||
}
|
||||
|
||||
|
|
@ -401,9 +415,9 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
|
|||
|
||||
// Add the CDE filename to the hash table.
|
||||
std::string_view entry_name{reinterpret_cast<const char*>(file_name), file_name_length};
|
||||
const int add_result = AddToHash(archive->hash_table, archive->hash_table_size, entry_name,
|
||||
archive->central_directory.GetBasePtr());
|
||||
if (add_result != 0) {
|
||||
if (auto add_result =
|
||||
archive->cd_entry_map->AddToMap(entry_name, archive->central_directory.GetBasePtr());
|
||||
add_result != 0) {
|
||||
ALOGW("Zip: Error adding entry to hash table %d", add_result);
|
||||
return add_result;
|
||||
}
|
||||
|
|
@ -514,14 +528,13 @@ static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry* data) {
|
||||
const uint16_t nameLen = archive->hash_table[ent].name_length;
|
||||
|
||||
static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
|
||||
const uint64_t nameOffset, ZipEntry* data) {
|
||||
// Recover the start of the central directory entry from the filename
|
||||
// pointer. The filename is the first entry past the fixed-size data,
|
||||
// so we can just subtract back from that.
|
||||
const uint8_t* base_ptr = archive->central_directory.GetBasePtr();
|
||||
const uint8_t* ptr = base_ptr + archive->hash_table[ent].name_offset;
|
||||
const uint8_t* ptr = base_ptr + nameOffset;
|
||||
ptr -= sizeof(CentralDirectoryRecord);
|
||||
|
||||
// This is the base of our mmapped region, we have to sanity check that
|
||||
|
|
@ -627,8 +640,11 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry*
|
|||
|
||||
// Check that the local file header name matches the declared
|
||||
// name in the central directory.
|
||||
CHECK_LE(entryName.size(), UINT16_MAX);
|
||||
auto nameLen = static_cast<uint16_t>(entryName.size());
|
||||
if (lfh->file_name_length != nameLen) {
|
||||
ALOGW("Zip: lfh name length did not match central directory");
|
||||
ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
|
||||
std::string(entryName).c_str(), lfh->file_name_length, nameLen);
|
||||
return kInconsistentInformation;
|
||||
}
|
||||
const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
|
||||
|
|
@ -641,9 +657,7 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry*
|
|||
ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
|
||||
return kIoError;
|
||||
}
|
||||
const std::string_view entry_name =
|
||||
archive->hash_table[ent].ToStringView(archive->central_directory.GetBasePtr());
|
||||
if (memcmp(entry_name.data(), name_buf.data(), nameLen) != 0) {
|
||||
if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
|
||||
ALOGW("Zip: lfh name did not match central directory");
|
||||
return kInconsistentInformation;
|
||||
}
|
||||
|
|
@ -689,7 +703,7 @@ struct IterationHandle {
|
|||
int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
|
||||
const std::string_view optional_prefix,
|
||||
const std::string_view optional_suffix) {
|
||||
if (archive == NULL || archive->hash_table == NULL) {
|
||||
if (archive == nullptr || archive->cd_entry_map == nullptr) {
|
||||
ALOGW("Zip: Invalid ZipArchiveHandle");
|
||||
return kInvalidHandle;
|
||||
}
|
||||
|
|
@ -700,6 +714,7 @@ int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
|
|||
return kInvalidEntryName;
|
||||
}
|
||||
|
||||
archive->cd_entry_map->ResetIteration();
|
||||
*cookie_ptr = new IterationHandle(archive, optional_prefix, optional_suffix);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -715,14 +730,14 @@ int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryNa
|
|||
return kInvalidEntryName;
|
||||
}
|
||||
|
||||
const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size, entryName,
|
||||
archive->central_directory.GetBasePtr());
|
||||
if (ent < 0) {
|
||||
const auto [result, offset] =
|
||||
archive->cd_entry_map->GetCdEntryOffset(entryName, archive->central_directory.GetBasePtr());
|
||||
if (result != 0) {
|
||||
ALOGV("Zip: Could not find entry %.*s", static_cast<int>(entryName.size()), entryName.data());
|
||||
return static_cast<int32_t>(ent); // kEntryNotFound is safe to truncate.
|
||||
return static_cast<int32_t>(result); // kEntryNotFound is safe to truncate.
|
||||
}
|
||||
// We know there are at most hash_table_size entries, safe to truncate.
|
||||
return FindEntry(archive, static_cast<uint32_t>(ent), data);
|
||||
return FindEntry(archive, entryName, offset, data);
|
||||
}
|
||||
|
||||
int32_t Next(void* cookie, ZipEntry* data, std::string* name) {
|
||||
|
|
@ -736,35 +751,32 @@ int32_t Next(void* cookie, ZipEntry* data, std::string* name) {
|
|||
|
||||
int32_t Next(void* cookie, ZipEntry* data, std::string_view* name) {
|
||||
IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
|
||||
if (handle == NULL) {
|
||||
if (handle == nullptr) {
|
||||
ALOGW("Zip: Null ZipArchiveHandle");
|
||||
return kInvalidHandle;
|
||||
}
|
||||
|
||||
ZipArchive* archive = handle->archive;
|
||||
if (archive == NULL || archive->hash_table == NULL) {
|
||||
if (archive == nullptr || archive->cd_entry_map == nullptr) {
|
||||
ALOGW("Zip: Invalid ZipArchiveHandle");
|
||||
return kInvalidHandle;
|
||||
}
|
||||
|
||||
const uint32_t currentOffset = handle->position;
|
||||
const uint32_t hash_table_length = archive->hash_table_size;
|
||||
const ZipStringOffset* hash_table = archive->hash_table;
|
||||
for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
|
||||
const std::string_view entry_name =
|
||||
hash_table[i].ToStringView(archive->central_directory.GetBasePtr());
|
||||
if (hash_table[i].name_offset != 0 && (android::base::StartsWith(entry_name, handle->prefix) &&
|
||||
android::base::EndsWith(entry_name, handle->suffix))) {
|
||||
handle->position = (i + 1);
|
||||
const int error = FindEntry(archive, i, data);
|
||||
auto entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
|
||||
while (entry != std::pair<std::string_view, uint64_t>()) {
|
||||
const auto [entry_name, offset] = entry;
|
||||
if (android::base::StartsWith(entry_name, handle->prefix) &&
|
||||
android::base::EndsWith(entry_name, handle->suffix)) {
|
||||
const int error = FindEntry(archive, entry_name, offset, data);
|
||||
if (!error && name) {
|
||||
*name = entry_name;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
entry = archive->cd_entry_map->Next(archive->central_directory.GetBasePtr());
|
||||
}
|
||||
|
||||
handle->position = 0;
|
||||
archive->cd_entry_map->ResetIteration();
|
||||
return kIterationEnd;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "android-base/macros.h"
|
||||
|
|
@ -140,6 +141,28 @@ class CentralDirectory {
|
|||
size_t length_;
|
||||
};
|
||||
|
||||
// This class is the interface of the central directory entries map. The map
|
||||
// helps to locate a particular cd entry based on the filename.
|
||||
class CdEntryMapInterface {
|
||||
public:
|
||||
virtual ~CdEntryMapInterface() = default;
|
||||
// Adds an entry to the map. The |name| should internally points to the
|
||||
// filename field of a cd entry. And |start| points to the beginning of the
|
||||
// central directory. Returns 0 on success.
|
||||
virtual int32_t AddToMap(std::string_view name, const uint8_t* start) = 0;
|
||||
// For the zip entry |entryName|, finds the offset of its filename field in
|
||||
// the central directory. Returns a pair of [status, offset]. The value of
|
||||
// the status is 0 on success.
|
||||
virtual std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
|
||||
const uint8_t* cd_start) const = 0;
|
||||
// Resets the iterator to the beginning of the map.
|
||||
virtual void ResetIteration() = 0;
|
||||
// Returns the [name, cd offset] of the current element. Also increments the
|
||||
// iterator to points to the next element. Returns an empty pair we have read
|
||||
// past boundary.
|
||||
virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* More space efficient string representation of strings in an mmaped zipped
|
||||
* file than std::string_view. Using std::string_view as an entry in the
|
||||
|
|
@ -160,6 +183,33 @@ struct ZipStringOffset {
|
|||
}
|
||||
};
|
||||
|
||||
// This implementation of CdEntryMap uses an array hash table. It uses less
|
||||
// memory than std::map; and it's used as the default implementation for zip
|
||||
// archives without zip64 extension.
|
||||
class CdEntryMapZip32 : public CdEntryMapInterface {
|
||||
public:
|
||||
static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
|
||||
|
||||
int32_t AddToMap(std::string_view name, const uint8_t* start) override;
|
||||
std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
|
||||
const uint8_t* cd_start) const override;
|
||||
void ResetIteration() override;
|
||||
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
|
||||
|
||||
private:
|
||||
explicit CdEntryMapZip32(uint16_t num_entries);
|
||||
|
||||
// We know how many entries are in the Zip archive, so we can have a
|
||||
// fixed-size hash table. We define a load factor of 0.75 and over
|
||||
// allocate so the maximum number entries can never be higher than
|
||||
// ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
|
||||
uint32_t hash_table_size_{0};
|
||||
std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free};
|
||||
|
||||
// The position of element for the current iteration.
|
||||
uint32_t current_position_{0};
|
||||
};
|
||||
|
||||
struct ZipArchive {
|
||||
// open Zip archive
|
||||
mutable MappedZipFile mapped_zip;
|
||||
|
|
@ -172,13 +222,7 @@ struct ZipArchive {
|
|||
|
||||
// number of entries in the Zip archive
|
||||
uint16_t num_entries;
|
||||
|
||||
// We know how many entries are in the Zip archive, so we can have a
|
||||
// fixed-size hash table. We define a load factor of 0.75 and over
|
||||
// allocate so the maximum number entries can never be higher than
|
||||
// ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
|
||||
uint32_t hash_table_size;
|
||||
ZipStringOffset* hash_table;
|
||||
std::unique_ptr<CdEntryMapInterface> cd_entry_map;
|
||||
|
||||
ZipArchive(const int fd, bool assume_ownership);
|
||||
ZipArchive(const void* address, size_t length);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue