diff --git a/TEST_MAPPING b/TEST_MAPPING index 44c47f3c9..9b6213a9b 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -67,5 +67,11 @@ { "name": "ziparchive-tests" } + ], + + "postsubmit": [ + { + "name": "ziparchive_tests_large" + } ] } diff --git a/libziparchive/Android.bp b/libziparchive/Android.bp index 4081b21f4..786e7b3d3 100644 --- a/libziparchive/Android.bp +++ b/libziparchive/Android.bp @@ -212,3 +212,20 @@ sh_test { data: ["cli-tests/**/*"], target_required: ["cli-test", "ziptool"], } + +python_test_host { + name: "ziparchive_tests_large", + srcs: ["test_ziparchive_large.py"], + main: "test_ziparchive_large.py", + version: { + py2: { + enabled: true, + embedded_launcher: false, + }, + py3: { + enabled: false, + embedded_launcher: false, + }, + }, + test_suites: ["general-tests"], +} diff --git a/libziparchive/test_ziparchive_large.py b/libziparchive/test_ziparchive_large.py new file mode 100644 index 000000000..c29c37e9d --- /dev/null +++ b/libziparchive/test_ziparchive_large.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Unittests for parsing files in zip64 format""" + +import os +import subprocess +import tempfile +import unittest +import zipfile +import time + +class Zip64Test(unittest.TestCase): + @staticmethod + def _AddEntriesToZip(output_zip, entries_dict=None): + for name, size in entries_dict.items(): + contents = name[0] * 1024 + file_path = tempfile.NamedTemporaryFile() + with open(file_path.name, 'w') as f: + for it in range(0, size): + f.write(contents) + output_zip.write(file_path.name, arcname = name) + + def _getEntryNames(self, zip_name): + cmd = ['ziptool', 'zipinfo', '-1', zip_name] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + output, _ = proc.communicate() + self.assertEquals(0, proc.returncode) + self.assertNotEqual(None, output) + return output.split() + + def _ExtractEntries(self, zip_name): + temp_dir = tempfile.mkdtemp() + cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + proc.communicate() + self.assertEquals(0, proc.returncode) + + def test_entriesSmallerThan2G(self): + zip_path = tempfile.NamedTemporaryFile(suffix='.zip') + # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger + # than 4GiB in size. + with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip: + entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024, + 'd.txt': 1025 * 1024, 'e.txt': 1024} + self._AddEntriesToZip(output_zip, entry_dict) + + read_names = self._getEntryNames(zip_path.name) + self.assertEquals(sorted(entry_dict.keys()), sorted(read_names)) + self._ExtractEntries(zip_path.name) + + + def test_largeNumberOfEntries(self): + zip_path = tempfile.NamedTemporaryFile(suffix='.zip') + entry_dict = {} + # Add 100k entries (more than 65535|UINT16_MAX). + for num in range(0, 100 * 1024): + entry_dict[str(num)] = 50 + + with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip: + self._AddEntriesToZip(output_zip, entry_dict) + + read_names = self._getEntryNames(zip_path.name) + self.assertEquals(sorted(entry_dict.keys()), sorted(read_names)) + self._ExtractEntries(zip_path.name) + + + def test_largeCompressedEntries(self): + zip_path = tempfile.NamedTemporaryFile(suffix='.zip') + with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED, + allowZip64=True) as output_zip: + # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed + # sizes in the extra field. Test if our ziptool should be able to parse it. + entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024} + self._AddEntriesToZip(output_zip, entry_dict) + + read_names = self._getEntryNames(zip_path.name) + self.assertEquals(sorted(entry_dict.keys()), sorted(read_names)) + self._ExtractEntries(zip_path.name) + + +if __name__ == '__main__': + testsuite = unittest.TestLoader().discover( + os.path.dirname(os.path.realpath(__file__))) + unittest.TextTestRunner(verbosity=2).run(testsuite) diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc index afbc5d891..5b976d050 100644 --- a/libziparchive/zip_archive.cc +++ b/libziparchive/zip_archive.cc @@ -138,9 +138,60 @@ struct CentralDirectoryInfo { uint64_t cd_start_offset; }; -static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) { - ALOGW("Zip: Parsing zip64 EOCD isn't supported yet."); - return kInvalidFile; +static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive, + off64_t eocdOffset, CentralDirectoryInfo* cdInfo) { + if (eocdOffset <= sizeof(Zip64EocdLocator)) { + ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName); + return kInvalidFile; + } + // We expect to find the zip64 eocd locator immediately before the zip eocd. + const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator); + Zip64EocdLocator zip64EocdLocator{}; + if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast((&zip64EocdLocator)), + sizeof(Zip64EocdLocator), locatorOffset)) { + ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName, + sizeof(Zip64EocdLocator), locatorOffset, debugFileName); + return kIoError; + } + + if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) { + ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName, + locatorOffset); + return kInvalidFile; + } + + const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset; + if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) { + ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset); + return kInvalidOffset; + } + + Zip64EocdRecord zip64EocdRecord{}; + if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast(&zip64EocdRecord), + sizeof(Zip64EocdRecord), zip64EocdOffset)) { + ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName, + sizeof(Zip64EocdLocator), static_cast(zip64EocdOffset), debugFileName); + return kIoError; + } + + if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) { + ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName, + zip64EocdOffset); + return kInvalidFile; + } + + if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) { + ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64 + ", zip64 eocd offset %" PRIu64, + debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset); + return kInvalidOffset; + } + + *cdInfo = {.num_records = zip64EocdRecord.num_records, + .cd_size = zip64EocdRecord.cd_size, + .cd_start_offset = zip64EocdRecord.cd_start_offset}; + + return kSuccess; } static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive, @@ -194,7 +245,7 @@ static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) { ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32, eocd->cd_size, eocd->cd_start_offset); - return FindCentralDirectoryInfoForZip64(cdInfo); + return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo); } /* @@ -290,13 +341,104 @@ static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* arc return kSuccess; } +static ZipError ParseZip64ExtendedInfoInExtraField( + const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize, + uint32_t zip32CompressedSize, std::optional zip32LocalFileHeaderOffset, + Zip64ExtendedInfo* zip64Info) { + if (extraFieldLength <= 4) { + ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16, + extraFieldLength); + return kInvalidFile; + } + + // Each header MUST consist of: + // Header ID - 2 bytes + // Data Size - 2 bytes + uint16_t offset = 0; + while (offset < extraFieldLength - 4) { + auto headerId = get_unaligned(extraFieldStart + offset); + auto dataSize = get_unaligned(extraFieldStart + offset + 2); + + offset += 4; + if (dataSize > extraFieldLength - offset) { + ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize); + return kInvalidOffset; + } + + // Skip the other types of extensible data fields. Details in + // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5 + if (headerId != Zip64ExtendedInfo::kHeaderId) { + offset += dataSize; + continue; + } + + uint16_t expectedDataSize = 0; + // We expect the extended field to include both uncompressed and compressed size. + if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) { + expectedDataSize += 16; + } + if (zip32LocalFileHeaderOffset == UINT32_MAX) { + expectedDataSize += 8; + } + + if (expectedDataSize == 0) { + ALOGW("Zip: Data size should not be 0 in zip64 extended field"); + return kInvalidFile; + } + + if (dataSize != expectedDataSize) { + auto localOffsetString = zip32LocalFileHeaderOffset.has_value() + ? std::to_string(zip32LocalFileHeaderOffset.value()) + : "missing"; + ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16 + ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s", + expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize, + localOffsetString.c_str()); + return kInvalidFile; + } + + std::optional uncompressedFileSize; + std::optional compressedFileSize; + std::optional localHeaderOffset; + if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) { + uncompressedFileSize = get_unaligned(extraFieldStart + offset); + compressedFileSize = get_unaligned(extraFieldStart + offset + 8); + offset += 16; + + // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible + // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own + // bytes counter to replace stream.total_out. + if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) { + ALOGW( + "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64 + ", compressed size %" PRIu64, + uncompressedFileSize.value(), compressedFileSize.value()); + return kInvalidFile; + } + } + + if (zip32LocalFileHeaderOffset == UINT32_MAX) { + localHeaderOffset = get_unaligned(extraFieldStart + offset); + offset += 8; + } + + zip64Info->uncompressed_file_size = uncompressedFileSize; + zip64Info->compressed_file_size = compressedFileSize; + zip64Info->local_header_offset = localHeaderOffset; + return kSuccess; + } + + ALOGW("Zip: zip64 extended info isn't found in the extra field."); + return kInvalidFile; +} + /* * Parses the Zip archive's Central Directory. Allocates and populates the * hash table. * * Returns 0 on success. */ -static int32_t ParseZipArchive(ZipArchive* archive) { +static ZipError ParseZipArchive(ZipArchive* archive) { const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr(); const size_t cd_length = archive->central_directory.GetMapLength(); const uint64_t num_entries = archive->num_entries; @@ -326,7 +468,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) { return kInvalidFile; } - const CentralDirectoryRecord* cdr = reinterpret_cast(ptr); + auto cdr = reinterpret_cast(ptr); if (cdr->record_signature != CentralDirectoryRecord::kSignature) { ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i); return kInvalidFile; @@ -354,9 +496,15 @@ static int32_t ParseZipArchive(ZipArchive* archive) { off64_t local_header_offset = cdr->local_file_header_offset; if (local_header_offset == UINT32_MAX) { - // TODO(xunchang) parse the zip64 eocd - ALOGW("Zip: Parsing zip64 cd entry isn't supported yet"); - return kInvalidFile; + Zip64ExtendedInfo zip64_info{}; + if (auto status = ParseZip64ExtendedInfoInExtraField( + extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size, + cdr->local_file_header_offset, &zip64_info); + status != kSuccess) { + return status; + } + CHECK(zip64_info.local_header_offset.has_value()); + local_header_offset = zip64_info.local_header_offset.value(); } if (local_header_offset >= archive->directory_offset) { @@ -404,7 +552,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) { ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries); - return 0; + return kSuccess; } static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) { @@ -521,7 +669,7 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, return kInvalidOffset; } - const CentralDirectoryRecord* cdr = reinterpret_cast(ptr); + auto cdr = reinterpret_cast(ptr); // The offset of the start of the central directory in the zipfile. // We keep this lying around so that we can sanity check all our lengths @@ -545,8 +693,27 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, // the extra field. if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX || cdr->local_file_header_offset == UINT32_MAX) { - ALOGW("Zip: Parsing zip64 local file header isn't supported yet"); - return kInvalidFile; + const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length; + Zip64ExtendedInfo zip64_info{}; + if (auto status = ParseZip64ExtendedInfoInExtraField( + extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size, + cdr->local_file_header_offset, &zip64_info); + status != kSuccess) { + return status; + } + + if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) { + CHECK(zip64_info.uncompressed_file_size.has_value()); + CHECK(zip64_info.compressed_file_size.has_value()); + // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX. + data->uncompressed_length = static_cast(zip64_info.uncompressed_file_size.value()); + data->compressed_length = static_cast(zip64_info.compressed_file_size.value()); + } + + if (local_header_offset == UINT32_MAX) { + CHECK(zip64_info.local_header_offset.has_value()); + local_header_offset = zip64_info.local_header_offset.value(); + } } if (local_header_offset + static_cast(sizeof(LocalFileHeader)) >= cd_offset) { @@ -561,14 +728,68 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, return kIoError; } - const LocalFileHeader* lfh = reinterpret_cast(lfh_buf); - + auto lfh = reinterpret_cast(lfh_buf); if (lfh->lfh_signature != LocalFileHeader::kSignature) { ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64, static_cast(local_header_offset)); return kInvalidOffset; } + // Check that the local file header name matches the declared name in the central directory. + CHECK_LE(entryName.size(), UINT16_MAX); + auto nameLen = static_cast(entryName.size()); + if (lfh->file_name_length != nameLen) { + ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16, + std::string(entryName).c_str(), lfh->file_name_length, nameLen); + return kInconsistentInformation; + } + const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader); + if (name_offset > cd_offset - lfh->file_name_length) { + ALOGW("Zip: lfh name has invalid declared length"); + return kInvalidOffset; + } + + std::vector name_buf(nameLen); + if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) { + ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast(name_offset)); + return kIoError; + } + if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) { + ALOGW("Zip: lfh name did not match central directory"); + return kInconsistentInformation; + } + + uint64_t lfh_uncompressed_size = lfh->uncompressed_size; + uint64_t lfh_compressed_size = lfh->compressed_size; + if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) { + const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length; + const uint16_t lfh_extra_field_size = lfh->extra_field_length; + if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) { + ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str()); + return kInvalidOffset; + } + + std::vector local_extra_field(lfh_extra_field_size); + if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size, + lfh_extra_field_offset)) { + ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset); + return kIoError; + } + + Zip64ExtendedInfo zip64_info{}; + if (auto status = ParseZip64ExtendedInfoInExtraField( + local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size, + lfh->compressed_size, std::nullopt, &zip64_info); + status != kSuccess) { + return status; + } + + CHECK(zip64_info.uncompressed_file_size.has_value()); + CHECK(zip64_info.compressed_file_size.has_value()); + lfh_uncompressed_size = zip64_info.uncompressed_file_size.value(); + lfh_compressed_size = zip64_info.compressed_file_size.value(); + } + // Paranoia: Match the values specified in the local file header // to those specified in the central directory. @@ -594,12 +815,12 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, // header agree on the crc, compressed, and uncompressed sizes of the entry. if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) { data->has_data_descriptor = 0; - if (data->compressed_length != lfh->compressed_size || - data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) { + if (data->compressed_length != lfh_compressed_size || + data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) { ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32 - "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}", - data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size, - lfh->uncompressed_size, lfh->crc32); + "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}", + data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size, + lfh_uncompressed_size, lfh->crc32); return kInconsistentInformation; } } else { @@ -622,30 +843,6 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, // Currently only needed to implement zipinfo. data->is_text = (cdr->internal_file_attributes & 1); - // Check that the local file header name matches the declared - // name in the central directory. - CHECK_LE(entryName.size(), UINT16_MAX); - auto nameLen = static_cast(entryName.size()); - if (lfh->file_name_length != nameLen) { - ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16, - std::string(entryName).c_str(), lfh->file_name_length, nameLen); - return kInconsistentInformation; - } - const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader); - if (name_offset + lfh->file_name_length > cd_offset) { - ALOGW("Zip: lfh name has invalid declared length"); - return kInvalidOffset; - } - std::vector name_buf(nameLen); - if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) { - ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast(name_offset)); - return kIoError; - } - if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) { - ALOGW("Zip: lfh name did not match central directory"); - return kInconsistentInformation; - } - const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) + lfh->file_name_length + lfh->extra_field_length; if (data_offset > cd_offset) { diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc index 5caca8aea..10050da79 100644 --- a/libziparchive/zip_archive_test.cc +++ b/libziparchive/zip_archive_test.cc @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "zip_archive_private.h" - #include #include #include @@ -23,6 +21,7 @@ #include #include +#include #include #include #include @@ -31,12 +30,16 @@ #include #include #include +#include #include #include #include #include #include +#include "zip_archive_common.h" +#include "zip_archive_private.h" + static std::string test_data_dir = android::base::GetExecutableDirectory() + "/testdata"; static const std::string kValidZip = "valid.zip"; @@ -930,3 +933,290 @@ TEST(ziparchive, Inflate) { ASSERT_EQ(0u, writer.GetOutput().size()); } } + +// The class constructs a zipfile with zip64 format, and test the parsing logic. +class Zip64ParseTest : public ::testing::Test { + protected: + struct LocalFileEntry { + std::vector local_file_header; + std::string file_name; + std::vector extended_field; + // Fake data to mimic the compressed bytes in the zipfile. + std::vector compressed_bytes; + + size_t GetSize() const { + return local_file_header.size() + file_name.size() + extended_field.size() + + compressed_bytes.size(); + } + + void CopyToOutput(std::vector* output) const { + std::copy(local_file_header.begin(), local_file_header.end(), std::back_inserter(*output)); + std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output)); + std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output)); + std::copy(compressed_bytes.begin(), compressed_bytes.end(), std::back_inserter(*output)); + } + }; + + struct CdRecordEntry { + std::vector central_directory_record; + std::string file_name; + std::vector extended_field; + + size_t GetSize() const { + return central_directory_record.size() + file_name.size() + extended_field.size(); + } + + void CopyToOutput(std::vector* output) const { + std::copy(central_directory_record.begin(), central_directory_record.end(), + std::back_inserter(*output)); + std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output)); + std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output)); + } + }; + + static void ConstructLocalFileHeader(const std::string& name, std::vector* output, + uint32_t uncompressed_size, uint32_t compressed_size) { + LocalFileHeader lfh = {}; + lfh.lfh_signature = LocalFileHeader::kSignature; + lfh.compressed_size = compressed_size; + lfh.uncompressed_size = uncompressed_size; + lfh.file_name_length = static_cast(name.size()); + lfh.extra_field_length = 20; + *output = std::vector(reinterpret_cast(&lfh), + reinterpret_cast(&lfh) + sizeof(LocalFileHeader)); + } + + // Put one zip64 extended info in the extended field. + static void ConstructExtendedField(const std::vector& zip64_fields, + std::vector* output) { + ASSERT_FALSE(zip64_fields.empty()); + uint16_t data_size = 8 * static_cast(zip64_fields.size()); + std::vector extended_field(data_size + 4); + android::base::put_unaligned(extended_field.data(), Zip64ExtendedInfo::kHeaderId); + android::base::put_unaligned(extended_field.data() + 2, data_size); + size_t offset = 4; + for (const auto& field : zip64_fields) { + android::base::put_unaligned(extended_field.data() + offset, field); + offset += 8; + } + + *output = std::move(extended_field); + } + + static void ConstructCentralDirectoryRecord(const std::string& name, uint32_t uncompressed_size, + uint32_t compressed_size, uint32_t local_offset, + std::vector* output) { + CentralDirectoryRecord cdr = {}; + cdr.record_signature = CentralDirectoryRecord::kSignature; + cdr.compressed_size = uncompressed_size; + cdr.uncompressed_size = compressed_size; + cdr.file_name_length = static_cast(name.size()); + cdr.extra_field_length = local_offset == UINT32_MAX ? 28 : 20; + cdr.local_file_header_offset = local_offset; + *output = + std::vector(reinterpret_cast(&cdr), + reinterpret_cast(&cdr) + sizeof(CentralDirectoryRecord)); + } + + // Add an entry to the zipfile, construct the corresponding local header and cd entry. + void AddEntry(const std::string& name, const std::vector& content, + bool uncompressed_size_in_extended, bool compressed_size_in_extended, + bool local_offset_in_extended) { + auto uncompressed_size = static_cast(content.size()); + auto compressed_size = static_cast(content.size()); + uint32_t local_file_header_offset = 0; + std::for_each(file_entries_.begin(), file_entries_.end(), + [&local_file_header_offset](const LocalFileEntry& file_entry) { + local_file_header_offset += file_entry.GetSize(); + }); + + std::vector zip64_fields; + if (uncompressed_size_in_extended) { + zip64_fields.push_back(uncompressed_size); + uncompressed_size = UINT32_MAX; + } + if (compressed_size_in_extended) { + zip64_fields.push_back(compressed_size); + compressed_size = UINT32_MAX; + } + LocalFileEntry local_entry = { + .local_file_header = {}, + .file_name = name, + .extended_field = {}, + .compressed_bytes = content, + }; + ConstructLocalFileHeader(name, &local_entry.local_file_header, uncompressed_size, + compressed_size); + ConstructExtendedField(zip64_fields, &local_entry.extended_field); + file_entries_.push_back(std::move(local_entry)); + + if (local_offset_in_extended) { + zip64_fields.push_back(local_file_header_offset); + local_file_header_offset = UINT32_MAX; + } + CdRecordEntry cd_entry = { + .central_directory_record = {}, + .file_name = name, + .extended_field = {}, + }; + ConstructCentralDirectoryRecord(name, uncompressed_size, compressed_size, + local_file_header_offset, &cd_entry.central_directory_record); + ConstructExtendedField(zip64_fields, &cd_entry.extended_field); + cd_entries_.push_back(std::move(cd_entry)); + } + + void ConstructEocd() { + ASSERT_EQ(file_entries_.size(), cd_entries_.size()); + Zip64EocdRecord zip64_eocd = {}; + zip64_eocd.record_signature = Zip64EocdRecord::kSignature; + zip64_eocd.num_records = file_entries_.size(); + zip64_eocd.cd_size = 0; + std::for_each( + cd_entries_.begin(), cd_entries_.end(), + [&zip64_eocd](const CdRecordEntry& cd_entry) { zip64_eocd.cd_size += cd_entry.GetSize(); }); + zip64_eocd.cd_start_offset = 0; + std::for_each(file_entries_.begin(), file_entries_.end(), + [&zip64_eocd](const LocalFileEntry& file_entry) { + zip64_eocd.cd_start_offset += file_entry.GetSize(); + }); + zip64_eocd_record_ = + std::vector(reinterpret_cast(&zip64_eocd), + reinterpret_cast(&zip64_eocd) + sizeof(Zip64EocdRecord)); + + Zip64EocdLocator zip64_locator = {}; + zip64_locator.locator_signature = Zip64EocdLocator::kSignature; + zip64_locator.zip64_eocd_offset = zip64_eocd.cd_start_offset + zip64_eocd.cd_size; + zip64_eocd_locator_ = + std::vector(reinterpret_cast(&zip64_locator), + reinterpret_cast(&zip64_locator) + sizeof(Zip64EocdLocator)); + + EocdRecord eocd = {}; + eocd.eocd_signature = EocdRecord::kSignature, + eocd.num_records = file_entries_.size() > UINT16_MAX + ? UINT16_MAX + : static_cast(file_entries_.size()); + eocd.cd_size = UINT32_MAX; + eocd.cd_start_offset = UINT32_MAX; + eocd_record_ = std::vector(reinterpret_cast(&eocd), + reinterpret_cast(&eocd) + sizeof(EocdRecord)); + } + + // Concatenate all the local file entries, cd entries, and eocd metadata. + void ConstructZipFile() { + for (const auto& file_entry : file_entries_) { + file_entry.CopyToOutput(&zip_content_); + } + for (const auto& cd_entry : cd_entries_) { + cd_entry.CopyToOutput(&zip_content_); + } + std::copy(zip64_eocd_record_.begin(), zip64_eocd_record_.end(), + std::back_inserter(zip_content_)); + std::copy(zip64_eocd_locator_.begin(), zip64_eocd_locator_.end(), + std::back_inserter(zip_content_)); + std::copy(eocd_record_.begin(), eocd_record_.end(), std::back_inserter(zip_content_)); + } + + std::vector zip_content_; + + std::vector file_entries_; + std::vector cd_entries_; + std::vector zip64_eocd_record_; + std::vector zip64_eocd_locator_; + std::vector eocd_record_; +}; + +TEST_F(Zip64ParseTest, openFile) { + AddEntry("a.txt", std::vector(100, 'a'), true, true, false); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, openFilelocalOffsetInExtendedField) { + AddEntry("a.txt", std::vector(100, 'a'), true, true, true); + AddEntry("b.txt", std::vector(200, 'b'), true, true, true); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, openFileCompressedNotInExtendedField) { + AddEntry("a.txt", std::vector(100, 'a'), true, false, false); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + // Zip64 extended fields must include both uncompressed and compressed size. + ASSERT_NE( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, findEntry) { + AddEntry("a.txt", std::vector(200, 'a'), true, true, true); + AddEntry("b.txt", std::vector(300, 'b'), true, true, false); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + ZipEntry entry; + ASSERT_EQ(0, FindEntry(handle, "a.txt", &entry)); + ASSERT_EQ(200, entry.uncompressed_length); + ASSERT_EQ(200, entry.compressed_length); + + ASSERT_EQ(0, FindEntry(handle, "b.txt", &entry)); + ASSERT_EQ(300, entry.uncompressed_length); + ASSERT_EQ(300, entry.compressed_length); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, openFileIncorrectDataSizeInLocalExtendedField) { + AddEntry("a.txt", std::vector(100, 'a'), true, true, false); + ASSERT_EQ(1, file_entries_.size()); + auto& extended_field = file_entries_[0].extended_field; + // data size exceeds the extended field size in local header. + android::base::put_unaligned(extended_field.data() + 2, 30); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + ZipEntry entry; + ASSERT_NE(0, FindEntry(handle, "a.txt", &entry)); + + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, iterates) { + std::set names{"a.txt", "b.txt", "c.txt", "d.txt", "e.txt"}; + for (const auto& name : names) { + AddEntry(std::string(name), std::vector(100, name[0]), true, true, true); + } + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + + void* iteration_cookie; + ASSERT_EQ(0, StartIteration(handle, &iteration_cookie)); + std::set result; + std::string_view name; + ZipEntry entry; + while (Next(iteration_cookie, &entry, &name) == 0) result.emplace(name); + ASSERT_EQ(names, result); + + CloseArchive(handle); +}