Avoid using data descriptors in ZIP files when possible.

These add 16 bytes per ZIP entry, and are usually avoidable.  APKs contain thousands of
deflated entries, so this overhead adds up to tens of kilobytes.

Bug: 135470635
Change-Id: Ib928aa41dd55cacc41f7394c218c4340d3bbd570
This commit is contained in:
Donald Chai 2019-07-02 17:25:03 -07:00
parent b3fc1b7441
commit e170d7fe85
4 changed files with 39 additions and 2 deletions

View file

@ -76,6 +76,10 @@ cc_defaults {
"liblog",
],
// for FRIEND_TEST
static_libs: ["libgtest_prod"],
export_static_lib_headers: ["libgtest_prod"],
export_include_dirs: ["include"],
}

View file

@ -19,6 +19,7 @@
#include <cstdio>
#include <ctime>
#include <gtest/gtest_prod.h>
#include <memory>
#include <string>
#include <string_view>
@ -165,6 +166,7 @@ class ZipWriter {
int32_t StoreBytes(FileEntry* file, const void* data, uint32_t len);
int32_t CompressBytes(FileEntry* file, const void* data, uint32_t len);
int32_t FlushCompressedBytes(FileEntry* file);
bool ShouldUseDataDescriptor() const;
enum class State {
kWritingZip,
@ -182,4 +184,6 @@ class ZipWriter {
std::unique_ptr<z_stream, void (*)(z_stream*)> z_stream_;
std::vector<uint8_t> buffer_;
FRIEND_TEST(zipwriter, WriteToUnseekableFile);
};

View file

@ -455,6 +455,11 @@ int32_t ZipWriter::FlushCompressedBytes(FileEntry* file) {
return kNoError;
}
bool ZipWriter::ShouldUseDataDescriptor() const {
// Only use a trailing "data descriptor" if the output isn't seekable.
return !seekable_;
}
int32_t ZipWriter::FinishEntry() {
if (state_ != State::kWritingEntry) {
return kInvalidState;
@ -467,7 +472,7 @@ int32_t ZipWriter::FinishEntry() {
}
}
if ((current_file_entry_.compression_method & kCompressDeflated) || !seekable_) {
if (ShouldUseDataDescriptor()) {
// Some versions of ZIP don't allow STORED data to have a trailing DataDescriptor.
// If this file is not seekable, or if the data is compressed, write a DataDescriptor.
const uint32_t sig = DataDescriptor::kOptSignature;
@ -515,7 +520,7 @@ int32_t ZipWriter::Finish() {
for (FileEntry& file : files_) {
CentralDirectoryRecord cdr = {};
cdr.record_signature = CentralDirectoryRecord::kSignature;
if ((file.compression_method & kCompressDeflated) || !seekable_) {
if (ShouldUseDataDescriptor()) {
cdr.gpb_flags |= kGPBDDFlagMask;
}
cdr.compression_method = file.compression_method;

View file

@ -243,6 +243,7 @@ TEST_F(zipwriter, WriteCompressedZipWithOneFile) {
ZipEntry data;
ASSERT_EQ(0, FindEntry(handle, "file.txt", &data));
EXPECT_EQ(kCompressDeflated, data.method);
EXPECT_EQ(0u, data.has_data_descriptor);
ASSERT_EQ(4u, data.uncompressed_length);
ASSERT_TRUE(AssertFileEntryContentsEq("helo", handle, &data));
@ -351,6 +352,29 @@ TEST_F(zipwriter, BackupRemovesTheLastFile) {
CloseArchive(handle);
}
TEST_F(zipwriter, WriteToUnseekableFile) {
const char* expected = "hello";
ZipWriter writer(file_);
writer.seekable_ = false;
ASSERT_EQ(0, writer.StartEntry("file.txt", 0));
ASSERT_EQ(0, writer.WriteBytes(expected, strlen(expected)));
ASSERT_EQ(0, writer.FinishEntry());
ASSERT_EQ(0, writer.Finish());
ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
ZipArchiveHandle handle;
ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
ZipEntry data;
ASSERT_EQ(0, FindEntry(handle, "file.txt", &data));
EXPECT_EQ(kCompressStored, data.method);
EXPECT_EQ(1u, data.has_data_descriptor);
EXPECT_EQ(strlen(expected), data.compressed_length);
ASSERT_EQ(strlen(expected), data.uncompressed_length);
ASSERT_TRUE(AssertFileEntryContentsEq(expected, handle, &data));
CloseArchive(handle);
}
TEST_F(zipwriter, TruncateFileAfterBackup) {
ZipWriter writer(file_);