From 6ab29129ec5b9d53bb42ce00a5382b6f5dddb9ed Mon Sep 17 00:00:00 2001 From: Tianjie Date: Wed, 18 Mar 2020 17:44:30 -0700 Subject: [PATCH] Implement the functions to parse zip64 structs Implement the logic to parse zip64 eocd and zip64 extended info in the extra field. Also add unit tests and python tests which create packages larger than 4GiB. The extraction of zip entry size > 4GiB will be supported in the follow ups. Bug: 150900468 Test: unit tests pass Change-Id: I4cd9ebbd9709b3d2f9cd293625d2c79024bb45a5 --- TEST_MAPPING | 6 + libziparchive/Android.bp | 17 ++ libziparchive/test_ziparchive_large.py | 99 +++++++++ libziparchive/zip_archive.cc | 285 ++++++++++++++++++++---- libziparchive/zip_archive_test.cc | 294 ++++++++++++++++++++++++- 5 files changed, 655 insertions(+), 46 deletions(-) create mode 100644 libziparchive/test_ziparchive_large.py diff --git a/TEST_MAPPING b/TEST_MAPPING index 44c47f3c9..9b6213a9b 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -67,5 +67,11 @@ { "name": "ziparchive-tests" } + ], + + "postsubmit": [ + { + "name": "ziparchive_tests_large" + } ] } diff --git a/libziparchive/Android.bp b/libziparchive/Android.bp index 4081b21f4..786e7b3d3 100644 --- a/libziparchive/Android.bp +++ b/libziparchive/Android.bp @@ -212,3 +212,20 @@ sh_test { data: ["cli-tests/**/*"], target_required: ["cli-test", "ziptool"], } + +python_test_host { + name: "ziparchive_tests_large", + srcs: ["test_ziparchive_large.py"], + main: "test_ziparchive_large.py", + version: { + py2: { + enabled: true, + embedded_launcher: false, + }, + py3: { + enabled: false, + embedded_launcher: false, + }, + }, + test_suites: ["general-tests"], +} diff --git a/libziparchive/test_ziparchive_large.py b/libziparchive/test_ziparchive_large.py new file mode 100644 index 000000000..c29c37e9d --- /dev/null +++ b/libziparchive/test_ziparchive_large.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# +# Copyright (C) 2020 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Unittests for parsing files in zip64 format""" + +import os +import subprocess +import tempfile +import unittest +import zipfile +import time + +class Zip64Test(unittest.TestCase): + @staticmethod + def _AddEntriesToZip(output_zip, entries_dict=None): + for name, size in entries_dict.items(): + contents = name[0] * 1024 + file_path = tempfile.NamedTemporaryFile() + with open(file_path.name, 'w') as f: + for it in range(0, size): + f.write(contents) + output_zip.write(file_path.name, arcname = name) + + def _getEntryNames(self, zip_name): + cmd = ['ziptool', 'zipinfo', '-1', zip_name] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + output, _ = proc.communicate() + self.assertEquals(0, proc.returncode) + self.assertNotEqual(None, output) + return output.split() + + def _ExtractEntries(self, zip_name): + temp_dir = tempfile.mkdtemp() + cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + proc.communicate() + self.assertEquals(0, proc.returncode) + + def test_entriesSmallerThan2G(self): + zip_path = tempfile.NamedTemporaryFile(suffix='.zip') + # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger + # than 4GiB in size. + with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip: + entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024, + 'd.txt': 1025 * 1024, 'e.txt': 1024} + self._AddEntriesToZip(output_zip, entry_dict) + + read_names = self._getEntryNames(zip_path.name) + self.assertEquals(sorted(entry_dict.keys()), sorted(read_names)) + self._ExtractEntries(zip_path.name) + + + def test_largeNumberOfEntries(self): + zip_path = tempfile.NamedTemporaryFile(suffix='.zip') + entry_dict = {} + # Add 100k entries (more than 65535|UINT16_MAX). + for num in range(0, 100 * 1024): + entry_dict[str(num)] = 50 + + with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip: + self._AddEntriesToZip(output_zip, entry_dict) + + read_names = self._getEntryNames(zip_path.name) + self.assertEquals(sorted(entry_dict.keys()), sorted(read_names)) + self._ExtractEntries(zip_path.name) + + + def test_largeCompressedEntries(self): + zip_path = tempfile.NamedTemporaryFile(suffix='.zip') + with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED, + allowZip64=True) as output_zip: + # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed + # sizes in the extra field. Test if our ziptool should be able to parse it. + entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024} + self._AddEntriesToZip(output_zip, entry_dict) + + read_names = self._getEntryNames(zip_path.name) + self.assertEquals(sorted(entry_dict.keys()), sorted(read_names)) + self._ExtractEntries(zip_path.name) + + +if __name__ == '__main__': + testsuite = unittest.TestLoader().discover( + os.path.dirname(os.path.realpath(__file__))) + unittest.TextTestRunner(verbosity=2).run(testsuite) diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc index afbc5d891..5b976d050 100644 --- a/libziparchive/zip_archive.cc +++ b/libziparchive/zip_archive.cc @@ -138,9 +138,60 @@ struct CentralDirectoryInfo { uint64_t cd_start_offset; }; -static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) { - ALOGW("Zip: Parsing zip64 EOCD isn't supported yet."); - return kInvalidFile; +static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive, + off64_t eocdOffset, CentralDirectoryInfo* cdInfo) { + if (eocdOffset <= sizeof(Zip64EocdLocator)) { + ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName); + return kInvalidFile; + } + // We expect to find the zip64 eocd locator immediately before the zip eocd. + const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator); + Zip64EocdLocator zip64EocdLocator{}; + if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast((&zip64EocdLocator)), + sizeof(Zip64EocdLocator), locatorOffset)) { + ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName, + sizeof(Zip64EocdLocator), locatorOffset, debugFileName); + return kIoError; + } + + if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) { + ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName, + locatorOffset); + return kInvalidFile; + } + + const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset; + if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) { + ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset); + return kInvalidOffset; + } + + Zip64EocdRecord zip64EocdRecord{}; + if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast(&zip64EocdRecord), + sizeof(Zip64EocdRecord), zip64EocdOffset)) { + ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName, + sizeof(Zip64EocdLocator), static_cast(zip64EocdOffset), debugFileName); + return kIoError; + } + + if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) { + ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName, + zip64EocdOffset); + return kInvalidFile; + } + + if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) { + ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64 + ", zip64 eocd offset %" PRIu64, + debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset); + return kInvalidOffset; + } + + *cdInfo = {.num_records = zip64EocdRecord.num_records, + .cd_size = zip64EocdRecord.cd_size, + .cd_start_offset = zip64EocdRecord.cd_start_offset}; + + return kSuccess; } static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive, @@ -194,7 +245,7 @@ static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) { ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32, eocd->cd_size, eocd->cd_start_offset); - return FindCentralDirectoryInfoForZip64(cdInfo); + return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo); } /* @@ -290,13 +341,104 @@ static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* arc return kSuccess; } +static ZipError ParseZip64ExtendedInfoInExtraField( + const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize, + uint32_t zip32CompressedSize, std::optional zip32LocalFileHeaderOffset, + Zip64ExtendedInfo* zip64Info) { + if (extraFieldLength <= 4) { + ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16, + extraFieldLength); + return kInvalidFile; + } + + // Each header MUST consist of: + // Header ID - 2 bytes + // Data Size - 2 bytes + uint16_t offset = 0; + while (offset < extraFieldLength - 4) { + auto headerId = get_unaligned(extraFieldStart + offset); + auto dataSize = get_unaligned(extraFieldStart + offset + 2); + + offset += 4; + if (dataSize > extraFieldLength - offset) { + ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize); + return kInvalidOffset; + } + + // Skip the other types of extensible data fields. Details in + // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5 + if (headerId != Zip64ExtendedInfo::kHeaderId) { + offset += dataSize; + continue; + } + + uint16_t expectedDataSize = 0; + // We expect the extended field to include both uncompressed and compressed size. + if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) { + expectedDataSize += 16; + } + if (zip32LocalFileHeaderOffset == UINT32_MAX) { + expectedDataSize += 8; + } + + if (expectedDataSize == 0) { + ALOGW("Zip: Data size should not be 0 in zip64 extended field"); + return kInvalidFile; + } + + if (dataSize != expectedDataSize) { + auto localOffsetString = zip32LocalFileHeaderOffset.has_value() + ? std::to_string(zip32LocalFileHeaderOffset.value()) + : "missing"; + ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16 + ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s", + expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize, + localOffsetString.c_str()); + return kInvalidFile; + } + + std::optional uncompressedFileSize; + std::optional compressedFileSize; + std::optional localHeaderOffset; + if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) { + uncompressedFileSize = get_unaligned(extraFieldStart + offset); + compressedFileSize = get_unaligned(extraFieldStart + offset + 8); + offset += 16; + + // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible + // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own + // bytes counter to replace stream.total_out. + if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) { + ALOGW( + "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64 + ", compressed size %" PRIu64, + uncompressedFileSize.value(), compressedFileSize.value()); + return kInvalidFile; + } + } + + if (zip32LocalFileHeaderOffset == UINT32_MAX) { + localHeaderOffset = get_unaligned(extraFieldStart + offset); + offset += 8; + } + + zip64Info->uncompressed_file_size = uncompressedFileSize; + zip64Info->compressed_file_size = compressedFileSize; + zip64Info->local_header_offset = localHeaderOffset; + return kSuccess; + } + + ALOGW("Zip: zip64 extended info isn't found in the extra field."); + return kInvalidFile; +} + /* * Parses the Zip archive's Central Directory. Allocates and populates the * hash table. * * Returns 0 on success. */ -static int32_t ParseZipArchive(ZipArchive* archive) { +static ZipError ParseZipArchive(ZipArchive* archive) { const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr(); const size_t cd_length = archive->central_directory.GetMapLength(); const uint64_t num_entries = archive->num_entries; @@ -326,7 +468,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) { return kInvalidFile; } - const CentralDirectoryRecord* cdr = reinterpret_cast(ptr); + auto cdr = reinterpret_cast(ptr); if (cdr->record_signature != CentralDirectoryRecord::kSignature) { ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i); return kInvalidFile; @@ -354,9 +496,15 @@ static int32_t ParseZipArchive(ZipArchive* archive) { off64_t local_header_offset = cdr->local_file_header_offset; if (local_header_offset == UINT32_MAX) { - // TODO(xunchang) parse the zip64 eocd - ALOGW("Zip: Parsing zip64 cd entry isn't supported yet"); - return kInvalidFile; + Zip64ExtendedInfo zip64_info{}; + if (auto status = ParseZip64ExtendedInfoInExtraField( + extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size, + cdr->local_file_header_offset, &zip64_info); + status != kSuccess) { + return status; + } + CHECK(zip64_info.local_header_offset.has_value()); + local_header_offset = zip64_info.local_header_offset.value(); } if (local_header_offset >= archive->directory_offset) { @@ -404,7 +552,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) { ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries); - return 0; + return kSuccess; } static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) { @@ -521,7 +669,7 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, return kInvalidOffset; } - const CentralDirectoryRecord* cdr = reinterpret_cast(ptr); + auto cdr = reinterpret_cast(ptr); // The offset of the start of the central directory in the zipfile. // We keep this lying around so that we can sanity check all our lengths @@ -545,8 +693,27 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, // the extra field. if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX || cdr->local_file_header_offset == UINT32_MAX) { - ALOGW("Zip: Parsing zip64 local file header isn't supported yet"); - return kInvalidFile; + const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length; + Zip64ExtendedInfo zip64_info{}; + if (auto status = ParseZip64ExtendedInfoInExtraField( + extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size, + cdr->local_file_header_offset, &zip64_info); + status != kSuccess) { + return status; + } + + if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) { + CHECK(zip64_info.uncompressed_file_size.has_value()); + CHECK(zip64_info.compressed_file_size.has_value()); + // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX. + data->uncompressed_length = static_cast(zip64_info.uncompressed_file_size.value()); + data->compressed_length = static_cast(zip64_info.compressed_file_size.value()); + } + + if (local_header_offset == UINT32_MAX) { + CHECK(zip64_info.local_header_offset.has_value()); + local_header_offset = zip64_info.local_header_offset.value(); + } } if (local_header_offset + static_cast(sizeof(LocalFileHeader)) >= cd_offset) { @@ -561,14 +728,68 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, return kIoError; } - const LocalFileHeader* lfh = reinterpret_cast(lfh_buf); - + auto lfh = reinterpret_cast(lfh_buf); if (lfh->lfh_signature != LocalFileHeader::kSignature) { ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64, static_cast(local_header_offset)); return kInvalidOffset; } + // Check that the local file header name matches the declared name in the central directory. + CHECK_LE(entryName.size(), UINT16_MAX); + auto nameLen = static_cast(entryName.size()); + if (lfh->file_name_length != nameLen) { + ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16, + std::string(entryName).c_str(), lfh->file_name_length, nameLen); + return kInconsistentInformation; + } + const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader); + if (name_offset > cd_offset - lfh->file_name_length) { + ALOGW("Zip: lfh name has invalid declared length"); + return kInvalidOffset; + } + + std::vector name_buf(nameLen); + if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) { + ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast(name_offset)); + return kIoError; + } + if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) { + ALOGW("Zip: lfh name did not match central directory"); + return kInconsistentInformation; + } + + uint64_t lfh_uncompressed_size = lfh->uncompressed_size; + uint64_t lfh_compressed_size = lfh->compressed_size; + if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) { + const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length; + const uint16_t lfh_extra_field_size = lfh->extra_field_length; + if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) { + ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str()); + return kInvalidOffset; + } + + std::vector local_extra_field(lfh_extra_field_size); + if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size, + lfh_extra_field_offset)) { + ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset); + return kIoError; + } + + Zip64ExtendedInfo zip64_info{}; + if (auto status = ParseZip64ExtendedInfoInExtraField( + local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size, + lfh->compressed_size, std::nullopt, &zip64_info); + status != kSuccess) { + return status; + } + + CHECK(zip64_info.uncompressed_file_size.has_value()); + CHECK(zip64_info.compressed_file_size.has_value()); + lfh_uncompressed_size = zip64_info.uncompressed_file_size.value(); + lfh_compressed_size = zip64_info.compressed_file_size.value(); + } + // Paranoia: Match the values specified in the local file header // to those specified in the central directory. @@ -594,12 +815,12 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, // header agree on the crc, compressed, and uncompressed sizes of the entry. if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) { data->has_data_descriptor = 0; - if (data->compressed_length != lfh->compressed_size || - data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) { + if (data->compressed_length != lfh_compressed_size || + data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) { ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32 - "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}", - data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size, - lfh->uncompressed_size, lfh->crc32); + "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}", + data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size, + lfh_uncompressed_size, lfh->crc32); return kInconsistentInformation; } } else { @@ -622,30 +843,6 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName, // Currently only needed to implement zipinfo. data->is_text = (cdr->internal_file_attributes & 1); - // Check that the local file header name matches the declared - // name in the central directory. - CHECK_LE(entryName.size(), UINT16_MAX); - auto nameLen = static_cast(entryName.size()); - if (lfh->file_name_length != nameLen) { - ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16, - std::string(entryName).c_str(), lfh->file_name_length, nameLen); - return kInconsistentInformation; - } - const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader); - if (name_offset + lfh->file_name_length > cd_offset) { - ALOGW("Zip: lfh name has invalid declared length"); - return kInvalidOffset; - } - std::vector name_buf(nameLen); - if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) { - ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast(name_offset)); - return kIoError; - } - if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) { - ALOGW("Zip: lfh name did not match central directory"); - return kInconsistentInformation; - } - const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) + lfh->file_name_length + lfh->extra_field_length; if (data_offset > cd_offset) { diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc index 5caca8aea..10050da79 100644 --- a/libziparchive/zip_archive_test.cc +++ b/libziparchive/zip_archive_test.cc @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "zip_archive_private.h" - #include #include #include @@ -23,6 +21,7 @@ #include #include +#include #include #include #include @@ -31,12 +30,16 @@ #include #include #include +#include #include #include #include #include #include +#include "zip_archive_common.h" +#include "zip_archive_private.h" + static std::string test_data_dir = android::base::GetExecutableDirectory() + "/testdata"; static const std::string kValidZip = "valid.zip"; @@ -930,3 +933,290 @@ TEST(ziparchive, Inflate) { ASSERT_EQ(0u, writer.GetOutput().size()); } } + +// The class constructs a zipfile with zip64 format, and test the parsing logic. +class Zip64ParseTest : public ::testing::Test { + protected: + struct LocalFileEntry { + std::vector local_file_header; + std::string file_name; + std::vector extended_field; + // Fake data to mimic the compressed bytes in the zipfile. + std::vector compressed_bytes; + + size_t GetSize() const { + return local_file_header.size() + file_name.size() + extended_field.size() + + compressed_bytes.size(); + } + + void CopyToOutput(std::vector* output) const { + std::copy(local_file_header.begin(), local_file_header.end(), std::back_inserter(*output)); + std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output)); + std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output)); + std::copy(compressed_bytes.begin(), compressed_bytes.end(), std::back_inserter(*output)); + } + }; + + struct CdRecordEntry { + std::vector central_directory_record; + std::string file_name; + std::vector extended_field; + + size_t GetSize() const { + return central_directory_record.size() + file_name.size() + extended_field.size(); + } + + void CopyToOutput(std::vector* output) const { + std::copy(central_directory_record.begin(), central_directory_record.end(), + std::back_inserter(*output)); + std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output)); + std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output)); + } + }; + + static void ConstructLocalFileHeader(const std::string& name, std::vector* output, + uint32_t uncompressed_size, uint32_t compressed_size) { + LocalFileHeader lfh = {}; + lfh.lfh_signature = LocalFileHeader::kSignature; + lfh.compressed_size = compressed_size; + lfh.uncompressed_size = uncompressed_size; + lfh.file_name_length = static_cast(name.size()); + lfh.extra_field_length = 20; + *output = std::vector(reinterpret_cast(&lfh), + reinterpret_cast(&lfh) + sizeof(LocalFileHeader)); + } + + // Put one zip64 extended info in the extended field. + static void ConstructExtendedField(const std::vector& zip64_fields, + std::vector* output) { + ASSERT_FALSE(zip64_fields.empty()); + uint16_t data_size = 8 * static_cast(zip64_fields.size()); + std::vector extended_field(data_size + 4); + android::base::put_unaligned(extended_field.data(), Zip64ExtendedInfo::kHeaderId); + android::base::put_unaligned(extended_field.data() + 2, data_size); + size_t offset = 4; + for (const auto& field : zip64_fields) { + android::base::put_unaligned(extended_field.data() + offset, field); + offset += 8; + } + + *output = std::move(extended_field); + } + + static void ConstructCentralDirectoryRecord(const std::string& name, uint32_t uncompressed_size, + uint32_t compressed_size, uint32_t local_offset, + std::vector* output) { + CentralDirectoryRecord cdr = {}; + cdr.record_signature = CentralDirectoryRecord::kSignature; + cdr.compressed_size = uncompressed_size; + cdr.uncompressed_size = compressed_size; + cdr.file_name_length = static_cast(name.size()); + cdr.extra_field_length = local_offset == UINT32_MAX ? 28 : 20; + cdr.local_file_header_offset = local_offset; + *output = + std::vector(reinterpret_cast(&cdr), + reinterpret_cast(&cdr) + sizeof(CentralDirectoryRecord)); + } + + // Add an entry to the zipfile, construct the corresponding local header and cd entry. + void AddEntry(const std::string& name, const std::vector& content, + bool uncompressed_size_in_extended, bool compressed_size_in_extended, + bool local_offset_in_extended) { + auto uncompressed_size = static_cast(content.size()); + auto compressed_size = static_cast(content.size()); + uint32_t local_file_header_offset = 0; + std::for_each(file_entries_.begin(), file_entries_.end(), + [&local_file_header_offset](const LocalFileEntry& file_entry) { + local_file_header_offset += file_entry.GetSize(); + }); + + std::vector zip64_fields; + if (uncompressed_size_in_extended) { + zip64_fields.push_back(uncompressed_size); + uncompressed_size = UINT32_MAX; + } + if (compressed_size_in_extended) { + zip64_fields.push_back(compressed_size); + compressed_size = UINT32_MAX; + } + LocalFileEntry local_entry = { + .local_file_header = {}, + .file_name = name, + .extended_field = {}, + .compressed_bytes = content, + }; + ConstructLocalFileHeader(name, &local_entry.local_file_header, uncompressed_size, + compressed_size); + ConstructExtendedField(zip64_fields, &local_entry.extended_field); + file_entries_.push_back(std::move(local_entry)); + + if (local_offset_in_extended) { + zip64_fields.push_back(local_file_header_offset); + local_file_header_offset = UINT32_MAX; + } + CdRecordEntry cd_entry = { + .central_directory_record = {}, + .file_name = name, + .extended_field = {}, + }; + ConstructCentralDirectoryRecord(name, uncompressed_size, compressed_size, + local_file_header_offset, &cd_entry.central_directory_record); + ConstructExtendedField(zip64_fields, &cd_entry.extended_field); + cd_entries_.push_back(std::move(cd_entry)); + } + + void ConstructEocd() { + ASSERT_EQ(file_entries_.size(), cd_entries_.size()); + Zip64EocdRecord zip64_eocd = {}; + zip64_eocd.record_signature = Zip64EocdRecord::kSignature; + zip64_eocd.num_records = file_entries_.size(); + zip64_eocd.cd_size = 0; + std::for_each( + cd_entries_.begin(), cd_entries_.end(), + [&zip64_eocd](const CdRecordEntry& cd_entry) { zip64_eocd.cd_size += cd_entry.GetSize(); }); + zip64_eocd.cd_start_offset = 0; + std::for_each(file_entries_.begin(), file_entries_.end(), + [&zip64_eocd](const LocalFileEntry& file_entry) { + zip64_eocd.cd_start_offset += file_entry.GetSize(); + }); + zip64_eocd_record_ = + std::vector(reinterpret_cast(&zip64_eocd), + reinterpret_cast(&zip64_eocd) + sizeof(Zip64EocdRecord)); + + Zip64EocdLocator zip64_locator = {}; + zip64_locator.locator_signature = Zip64EocdLocator::kSignature; + zip64_locator.zip64_eocd_offset = zip64_eocd.cd_start_offset + zip64_eocd.cd_size; + zip64_eocd_locator_ = + std::vector(reinterpret_cast(&zip64_locator), + reinterpret_cast(&zip64_locator) + sizeof(Zip64EocdLocator)); + + EocdRecord eocd = {}; + eocd.eocd_signature = EocdRecord::kSignature, + eocd.num_records = file_entries_.size() > UINT16_MAX + ? UINT16_MAX + : static_cast(file_entries_.size()); + eocd.cd_size = UINT32_MAX; + eocd.cd_start_offset = UINT32_MAX; + eocd_record_ = std::vector(reinterpret_cast(&eocd), + reinterpret_cast(&eocd) + sizeof(EocdRecord)); + } + + // Concatenate all the local file entries, cd entries, and eocd metadata. + void ConstructZipFile() { + for (const auto& file_entry : file_entries_) { + file_entry.CopyToOutput(&zip_content_); + } + for (const auto& cd_entry : cd_entries_) { + cd_entry.CopyToOutput(&zip_content_); + } + std::copy(zip64_eocd_record_.begin(), zip64_eocd_record_.end(), + std::back_inserter(zip_content_)); + std::copy(zip64_eocd_locator_.begin(), zip64_eocd_locator_.end(), + std::back_inserter(zip_content_)); + std::copy(eocd_record_.begin(), eocd_record_.end(), std::back_inserter(zip_content_)); + } + + std::vector zip_content_; + + std::vector file_entries_; + std::vector cd_entries_; + std::vector zip64_eocd_record_; + std::vector zip64_eocd_locator_; + std::vector eocd_record_; +}; + +TEST_F(Zip64ParseTest, openFile) { + AddEntry("a.txt", std::vector(100, 'a'), true, true, false); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, openFilelocalOffsetInExtendedField) { + AddEntry("a.txt", std::vector(100, 'a'), true, true, true); + AddEntry("b.txt", std::vector(200, 'b'), true, true, true); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, openFileCompressedNotInExtendedField) { + AddEntry("a.txt", std::vector(100, 'a'), true, false, false); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + // Zip64 extended fields must include both uncompressed and compressed size. + ASSERT_NE( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, findEntry) { + AddEntry("a.txt", std::vector(200, 'a'), true, true, true); + AddEntry("b.txt", std::vector(300, 'b'), true, true, false); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + ZipEntry entry; + ASSERT_EQ(0, FindEntry(handle, "a.txt", &entry)); + ASSERT_EQ(200, entry.uncompressed_length); + ASSERT_EQ(200, entry.compressed_length); + + ASSERT_EQ(0, FindEntry(handle, "b.txt", &entry)); + ASSERT_EQ(300, entry.uncompressed_length); + ASSERT_EQ(300, entry.compressed_length); + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, openFileIncorrectDataSizeInLocalExtendedField) { + AddEntry("a.txt", std::vector(100, 'a'), true, true, false); + ASSERT_EQ(1, file_entries_.size()); + auto& extended_field = file_entries_[0].extended_field; + // data size exceeds the extended field size in local header. + android::base::put_unaligned(extended_field.data() + 2, 30); + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + ZipEntry entry; + ASSERT_NE(0, FindEntry(handle, "a.txt", &entry)); + + CloseArchive(handle); +} + +TEST_F(Zip64ParseTest, iterates) { + std::set names{"a.txt", "b.txt", "c.txt", "d.txt", "e.txt"}; + for (const auto& name : names) { + AddEntry(std::string(name), std::vector(100, name[0]), true, true, true); + } + ConstructEocd(); + ConstructZipFile(); + + ZipArchiveHandle handle; + ASSERT_EQ( + 0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle)); + + void* iteration_cookie; + ASSERT_EQ(0, StartIteration(handle, &iteration_cookie)); + std::set result; + std::string_view name; + ZipEntry entry; + while (Next(iteration_cookie, &entry, &name) == 0) result.emplace(name); + ASSERT_EQ(names, result); + + CloseArchive(handle); +}