diff options
Diffstat (limited to 'libdex/ZipArchive.cpp')
-rw-r--r-- | libdex/ZipArchive.cpp | 732 |
1 files changed, 732 insertions, 0 deletions
diff --git a/libdex/ZipArchive.cpp b/libdex/ZipArchive.cpp new file mode 100644 index 0000000..59d28d9 --- /dev/null +++ b/libdex/ZipArchive.cpp @@ -0,0 +1,732 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Read-only access to Zip archives, with minimal heap allocation. + */ +#include "ZipArchive.h" + +#include <zlib.h> + +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <errno.h> + +#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/* + * Zip file constants. + */ +#define kEOCDSignature 0x06054b50 +#define kEOCDLen 22 +#define kEOCDNumEntries 8 // offset to #of entries in file +#define kEOCDSize 12 // size of the central directory +#define kEOCDFileOffset 16 // offset to central directory + +#define kMaxCommentLen 65535 // longest possible in ushort +#define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen) + +#define kLFHSignature 0x04034b50 +#define kLFHLen 30 // excluding variable-len fields +#define kLFHNameLen 26 // offset to filename length +#define kLFHExtraLen 28 // offset to extra length + +#define kCDESignature 0x02014b50 +#define kCDELen 46 // excluding variable-len fields +#define kCDEMethod 10 // offset to compression method +#define kCDEModWhen 12 // offset to modification timestamp +#define kCDECRC 16 // offset to entry CRC +#define kCDECompLen 20 // offset to compressed length +#define kCDEUncompLen 24 // offset to uncompressed length +#define kCDENameLen 28 // offset to filename length +#define kCDEExtraLen 30 // offset to extra length +#define kCDECommentLen 32 // offset to comment length +#define kCDELocalOffset 42 // offset to local hdr + +/* + * The values we return for ZipEntry use 0 as an invalid value, so we + * want to adjust the hash table index by a fixed amount. Using a large + * value helps insure that people don't mix & match arguments, e.g. with + * entry indices. + */ +#define kZipEntryAdj 10000 + +/* + * Convert a ZipEntry to a hash table index, verifying that it's in a + * valid range. + */ +static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry) +{ + long ent = ((long) entry) - kZipEntryAdj; + if (ent < 0 || ent >= pArchive->mHashTableSize || + pArchive->mHashTable[ent].name == NULL) + { + LOGW("Zip: invalid ZipEntry %p (%ld)", entry, ent); + return -1; + } + return ent; +} + +/* + * Simple string hash function for non-null-terminated strings. + */ +static unsigned int computeHash(const char* str, int len) +{ + unsigned int hash = 0; + + while (len--) + hash = hash * 31 + *str++; + + return hash; +} + +/* + * Add a new entry to the hash table. + */ +static void addToHash(ZipArchive* pArchive, const char* str, int strLen, + unsigned int hash) +{ + const int hashTableSize = pArchive->mHashTableSize; + int ent = hash & (hashTableSize - 1); + + /* + * We over-allocated the table, so we're guaranteed to find an empty slot. + */ + while (pArchive->mHashTable[ent].name != NULL) + ent = (ent + 1) & (hashTableSize-1); + + pArchive->mHashTable[ent].name = str; + pArchive->mHashTable[ent].nameLen = strLen; +} + +/* + * Get 2 little-endian bytes. + */ +static u2 get2LE(unsigned char const* pSrc) +{ + return pSrc[0] | (pSrc[1] << 8); +} + +/* + * Get 4 little-endian bytes. + */ +static u4 get4LE(unsigned char const* pSrc) +{ + u4 result; + + result = pSrc[0]; + result |= pSrc[1] << 8; + result |= pSrc[2] << 16; + result |= pSrc[3] << 24; + + return result; +} + +static int mapCentralDirectory0(int fd, const char* debugFileName, + ZipArchive* pArchive, off_t fileLength, size_t readAmount, u1* scanBuf) +{ + off_t searchStart = fileLength - readAmount; + + if (lseek(fd, searchStart, SEEK_SET) != searchStart) { + LOGW("Zip: seek %ld failed: %s", (long) searchStart, strerror(errno)); + return -1; + } + ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scanBuf, readAmount)); + if (actual != (ssize_t) readAmount) { + LOGW("Zip: read %zd failed: %s", readAmount, strerror(errno)); + return -1; + } + + /* + * Scan backward for the EOCD magic. In an archive without a trailing + * comment, we'll find it on the first try. (We may want to consider + * doing an initial minimal read; if we don't find it, retry with a + * second read as above.) + */ + int i; + for (i = readAmount - kEOCDLen; i >= 0; i--) { + if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) { + LOGV("+++ Found EOCD at buf+%d", i); + break; + } + } + if (i < 0) { + LOGD("Zip: EOCD not found, %s is not zip", debugFileName); + return -1; + } + + off_t eocdOffset = searchStart + i; + const u1* eocdPtr = scanBuf + i; + + assert(eocdOffset < fileLength); + + /* + * Grab the CD offset and size, and the number of entries in the + * archive. Verify that they look reasonable. + */ + u4 numEntries = get2LE(eocdPtr + kEOCDNumEntries); + u4 dirSize = get4LE(eocdPtr + kEOCDSize); + u4 dirOffset = get4LE(eocdPtr + kEOCDFileOffset); + + if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) { + LOGW("Zip: bad offsets (dir %ld, size %u, eocd %ld)", + (long) dirOffset, dirSize, (long) eocdOffset); + return -1; + } + if (numEntries == 0) { + LOGW("Zip: empty archive?"); + return -1; + } + + LOGV("+++ numEntries=%d dirSize=%d dirOffset=%d", + numEntries, dirSize, dirOffset); + + /* + * It all looks good. Create a mapping for the CD, and set the fields + * in pArchive. + */ + if (sysMapFileSegmentInShmem(fd, dirOffset, dirSize, + &pArchive->mDirectoryMap) != 0) + { + LOGW("Zip: cd map failed"); + return -1; + } + + pArchive->mNumEntries = numEntries; + pArchive->mDirectoryOffset = dirOffset; + + return 0; +} + +/* + * Find the zip Central Directory and memory-map it. + * + * On success, returns 0 after populating fields from the EOCD area: + * mDirectoryOffset + * mDirectoryMap + * mNumEntries + */ +static int mapCentralDirectory(int fd, const char* debugFileName, + ZipArchive* pArchive) +{ + /* + * Get and test file length. + */ + off_t fileLength = lseek(fd, 0, SEEK_END); + if (fileLength < kEOCDLen) { + LOGV("Zip: length %ld is too small to be zip", (long) fileLength); + return -1; + } + + /* + * Perform the traditional EOCD snipe hunt. + * + * We're searching for the End of Central Directory magic number, + * which appears at the start of the EOCD block. It's followed by + * 18 bytes of EOCD stuff and up to 64KB of archive comment. We + * need to read the last part of the file into a buffer, dig through + * it to find the magic number, parse some values out, and use those + * to determine the extent of the CD. + * + * We start by pulling in the last part of the file. + */ + size_t readAmount = kMaxEOCDSearch; + if (fileLength < off_t(readAmount)) + readAmount = fileLength; + + u1* scanBuf = (u1*) malloc(readAmount); + if (scanBuf == NULL) { + return -1; + } + + int result = mapCentralDirectory0(fd, debugFileName, pArchive, + fileLength, readAmount, scanBuf); + + free(scanBuf); + return result; +} + +/* + * Parses the Zip archive's Central Directory. Allocates and populates the + * hash table. + * + * Returns 0 on success. + */ +static int parseZipArchive(ZipArchive* pArchive) +{ + int result = -1; + const u1* cdPtr = (const u1*)pArchive->mDirectoryMap.addr; + size_t cdLength = pArchive->mDirectoryMap.length; + int numEntries = pArchive->mNumEntries; + + /* + * Create hash table. We have a minimum 75% load factor, possibly as + * low as 50% after we round off to a power of 2. There must be at + * least one unused entry to avoid an infinite loop during creation. + */ + pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3); + pArchive->mHashTable = (ZipHashEntry*) + calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry)); + + /* + * Walk through the central directory, adding entries to the hash + * table and verifying values. + */ + const u1* ptr = cdPtr; + int i; + for (i = 0; i < numEntries; i++) { + if (get4LE(ptr) != kCDESignature) { + LOGW("Zip: missed a central dir sig (at %d)", i); + goto bail; + } + if (ptr + kCDELen > cdPtr + cdLength) { + LOGW("Zip: ran off the end (at %d)", i); + goto bail; + } + + long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset); + if (localHdrOffset >= pArchive->mDirectoryOffset) { + LOGW("Zip: bad LFH offset %ld at entry %d", localHdrOffset, i); + goto bail; + } + + unsigned int fileNameLen, extraLen, commentLen, hash; + fileNameLen = get2LE(ptr + kCDENameLen); + extraLen = get2LE(ptr + kCDEExtraLen); + commentLen = get2LE(ptr + kCDECommentLen); + + /* add the CDE filename to the hash table */ + hash = computeHash((const char*)ptr + kCDELen, fileNameLen); + addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash); + + ptr += kCDELen + fileNameLen + extraLen + commentLen; + if ((size_t)(ptr - cdPtr) > cdLength) { + LOGW("Zip: bad CD advance (%d vs %zd) at entry %d", + (int) (ptr - cdPtr), cdLength, i); + goto bail; + } + } + LOGV("+++ zip good scan %d entries", numEntries); + + result = 0; + +bail: + return result; +} + +/* + * Open the specified file read-only. We examine the contents and verify + * that it appears to be a valid zip file. + * + * This will be called on non-Zip files, especially during VM startup, so + * we don't want to be too noisy about certain types of failure. (Do + * we want a "quiet" flag?) + * + * On success, we fill out the contents of "pArchive" and return 0. On + * failure we return the errno value. + */ +int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive) +{ + int fd, err; + + LOGV("Opening as zip '%s' %p", fileName, pArchive); + + memset(pArchive, 0, sizeof(ZipArchive)); + + fd = open(fileName, O_RDONLY | O_BINARY, 0); + if (fd < 0) { + err = errno ? errno : -1; + LOGV("Unable to open '%s': %s", fileName, strerror(err)); + return err; + } + + return dexZipPrepArchive(fd, fileName, pArchive); +} + +/* + * Prepare to access a ZipArchive through an open file descriptor. + * + * On success, we fill out the contents of "pArchive" and return 0. + */ +int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive) +{ + int result = -1; + + memset(pArchive, 0, sizeof(*pArchive)); + pArchive->mFd = fd; + + if (mapCentralDirectory(fd, debugFileName, pArchive) != 0) + goto bail; + + if (parseZipArchive(pArchive) != 0) { + LOGV("Zip: parsing '%s' failed", debugFileName); + goto bail; + } + + /* success */ + result = 0; + +bail: + if (result != 0) + dexZipCloseArchive(pArchive); + return result; +} + + +/* + * Close a ZipArchive, closing the file and freeing the contents. + * + * NOTE: the ZipArchive may not have been fully created. + */ +void dexZipCloseArchive(ZipArchive* pArchive) +{ + LOGV("Closing archive %p", pArchive); + + if (pArchive->mFd >= 0) + close(pArchive->mFd); + + sysReleaseShmem(&pArchive->mDirectoryMap); + + free(pArchive->mHashTable); + + /* ensure nobody tries to use the ZipArchive after it's closed */ + pArchive->mDirectoryOffset = -1; + pArchive->mFd = -1; + pArchive->mNumEntries = -1; + pArchive->mHashTableSize = -1; + pArchive->mHashTable = NULL; +} + + +/* + * Find a matching entry. + * + * Returns 0 if not found. + */ +ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName) +{ + int nameLen = strlen(entryName); + unsigned int hash = computeHash(entryName, nameLen); + const int hashTableSize = pArchive->mHashTableSize; + int ent = hash & (hashTableSize-1); + + while (pArchive->mHashTable[ent].name != NULL) { + if (pArchive->mHashTable[ent].nameLen == nameLen && + memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0) + { + /* match */ + return (ZipEntry)(long)(ent + kZipEntryAdj); + } + + ent = (ent + 1) & (hashTableSize-1); + } + + return NULL; +} + +#if 0 +/* + * Find the Nth entry. + * + * This currently involves walking through the sparse hash table, counting + * non-empty entries. If we need to speed this up we can either allocate + * a parallel lookup table or (perhaps better) provide an iterator interface. + */ +ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx) +{ + if (idx < 0 || idx >= pArchive->mNumEntries) { + LOGW("Invalid index %d", idx); + return NULL; + } + + int ent; + for (ent = 0; ent < pArchive->mHashTableSize; ent++) { + if (pArchive->mHashTable[ent].name != NULL) { + if (idx-- == 0) + return (ZipEntry) (ent + kZipEntryAdj); + } + } + + return NULL; +} +#endif + +/* + * Get the useful fields from the zip entry. + * + * Returns non-zero if the contents of the fields (particularly the data + * offset) appear to be bogus. + */ +int dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry, + int* pMethod, size_t* pUncompLen, size_t* pCompLen, off_t* pOffset, + long* pModWhen, long* pCrc32) +{ + int ent = entryToIndex(pArchive, entry); + if (ent < 0) + return -1; + + /* + * Recover the start of the central directory entry from the filename + * pointer. The filename is the first entry past the fixed-size data, + * so we can just subtract back from that. + */ + const unsigned char* basePtr = (const unsigned char*) + pArchive->mDirectoryMap.addr; + const unsigned char* ptr = (const unsigned char*) + pArchive->mHashTable[ent].name; + off_t cdOffset = pArchive->mDirectoryOffset; + + ptr -= kCDELen; + + int method = get2LE(ptr + kCDEMethod); + if (pMethod != NULL) + *pMethod = method; + + if (pModWhen != NULL) + *pModWhen = get4LE(ptr + kCDEModWhen); + if (pCrc32 != NULL) + *pCrc32 = get4LE(ptr + kCDECRC); + + size_t compLen = get4LE(ptr + kCDECompLen); + if (pCompLen != NULL) + *pCompLen = compLen; + size_t uncompLen = get4LE(ptr + kCDEUncompLen); + if (pUncompLen != NULL) + *pUncompLen = uncompLen; + + /* + * If requested, determine the offset of the start of the data. All we + * have is the offset to the Local File Header, which is variable size, + * so we have to read the contents of the struct to figure out where + * the actual data starts. + * + * We also need to make sure that the lengths are not so large that + * somebody trying to map the compressed or uncompressed data runs + * off the end of the mapped region. + * + * Note we don't verify compLen/uncompLen if they don't request the + * dataOffset, because dataOffset is expensive to determine. However, + * if they don't have the file offset, they're not likely to be doing + * anything with the contents. + */ + if (pOffset != NULL) { + long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset); + if (localHdrOffset + kLFHLen >= cdOffset) { + LOGW("Zip: bad local hdr offset in zip"); + return -1; + } + + u1 lfhBuf[kLFHLen]; + if (lseek(pArchive->mFd, localHdrOffset, SEEK_SET) != localHdrOffset) { + LOGW("Zip: failed seeking to lfh at offset %ld", localHdrOffset); + return -1; + } + ssize_t actual = + TEMP_FAILURE_RETRY(read(pArchive->mFd, lfhBuf, sizeof(lfhBuf))); + if (actual != sizeof(lfhBuf)) { + LOGW("Zip: failed reading lfh from offset %ld", localHdrOffset); + return -1; + } + + if (get4LE(lfhBuf) != kLFHSignature) { + LOGW("Zip: didn't find signature at start of lfh, offset=%ld", + localHdrOffset); + return -1; + } + + off_t dataOffset = localHdrOffset + kLFHLen + + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen); + if (dataOffset >= cdOffset) { + LOGW("Zip: bad data offset %ld in zip", (long) dataOffset); + return -1; + } + + /* check lengths */ + if ((off_t)(dataOffset + compLen) > cdOffset) { + LOGW("Zip: bad compressed length in zip (%ld + %zd > %ld)", + (long) dataOffset, compLen, (long) cdOffset); + return -1; + } + + if (method == kCompressStored && + (off_t)(dataOffset + uncompLen) > cdOffset) + { + LOGW("Zip: bad uncompressed length in zip (%ld + %zd > %ld)", + (long) dataOffset, uncompLen, (long) cdOffset); + return -1; + } + + *pOffset = dataOffset; + } + return 0; +} + +/* + * Uncompress "deflate" data from the archive's file to an open file + * descriptor. + */ +static int inflateToFile(int outFd, int inFd, size_t uncompLen, size_t compLen) +{ + int result = -1; + const size_t kBufSize = 32768; + unsigned char* readBuf = (unsigned char*) malloc(kBufSize); + unsigned char* writeBuf = (unsigned char*) malloc(kBufSize); + z_stream zstream; + int zerr; + + if (readBuf == NULL || writeBuf == NULL) + goto bail; + + /* + * Initialize the zlib stream struct. + */ + memset(&zstream, 0, sizeof(zstream)); + zstream.zalloc = Z_NULL; + zstream.zfree = Z_NULL; + zstream.opaque = Z_NULL; + zstream.next_in = NULL; + zstream.avail_in = 0; + zstream.next_out = (Bytef*) writeBuf; + zstream.avail_out = kBufSize; + zstream.data_type = Z_UNKNOWN; + + /* + * Use the undocumented "negative window bits" feature to tell zlib + * that there's no zlib header waiting for it. + */ + zerr = inflateInit2(&zstream, -MAX_WBITS); + if (zerr != Z_OK) { + if (zerr == Z_VERSION_ERROR) { + LOGE("Installed zlib is not compatible with linked version (%s)", + ZLIB_VERSION); + } else { + LOGW("Call to inflateInit2 failed (zerr=%d)", zerr); + } + goto bail; + } + + /* + * Loop while we have more to do. + */ + do { + /* read as much as we can */ + if (zstream.avail_in == 0) { + size_t getSize = (compLen > kBufSize) ? kBufSize : compLen; + + ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, readBuf, getSize)); + if (actual != (ssize_t) getSize) { + LOGW("Zip: inflate read failed (%d vs %zd)", + (int)actual, getSize); + goto z_bail; + } + + compLen -= getSize; + + zstream.next_in = readBuf; + zstream.avail_in = getSize; + } + + /* uncompress the data */ + zerr = inflate(&zstream, Z_NO_FLUSH); + if (zerr != Z_OK && zerr != Z_STREAM_END) { + LOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", + zerr, zstream.next_in, zstream.avail_in, + zstream.next_out, zstream.avail_out); + goto z_bail; + } + + /* write when we're full or when we're done */ + if (zstream.avail_out == 0 || + (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) + { + size_t writeSize = zstream.next_out - writeBuf; + if (sysWriteFully(outFd, writeBuf, writeSize, "Zip inflate") != 0) + goto z_bail; + + zstream.next_out = writeBuf; + zstream.avail_out = kBufSize; + } + } while (zerr == Z_OK); + + assert(zerr == Z_STREAM_END); /* other errors should've been caught */ + + /* paranoia */ + if (zstream.total_out != uncompLen) { + LOGW("Zip: size mismatch on inflated file (%ld vs %zd)", + zstream.total_out, uncompLen); + goto z_bail; + } + + result = 0; + +z_bail: + inflateEnd(&zstream); /* free up any allocated structures */ + +bail: + free(readBuf); + free(writeBuf); + return result; +} + +/* + * Uncompress an entry, in its entirety, to an open file descriptor. + * + * TODO: this doesn't verify the data's CRC, but probably should (especially + * for uncompressed data). + */ +int dexZipExtractEntryToFile(const ZipArchive* pArchive, + const ZipEntry entry, int fd) +{ + int result = -1; + int ent = entryToIndex(pArchive, entry); + if (ent < 0) { + LOGW("Zip: extract can't find entry %p", entry); + goto bail; + } + + int method; + size_t uncompLen, compLen; + off_t dataOffset; + + if (dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen, + &dataOffset, NULL, NULL) != 0) + { + goto bail; + } + if (lseek(pArchive->mFd, dataOffset, SEEK_SET) != dataOffset) { + LOGW("Zip: lseek to data at %ld failed", (long) dataOffset); + goto bail; + } + + if (method == kCompressStored) { + if (sysCopyFileToFile(fd, pArchive->mFd, uncompLen) != 0) + goto bail; + } else { + if (inflateToFile(fd, pArchive->mFd, uncompLen, compLen) != 0) + goto bail; + } + + result = 0; + +bail: + return result; +} |