summaryrefslogtreecommitdiff
path: root/libdex/DexFile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libdex/DexFile.cpp')
-rw-r--r--libdex/DexFile.cpp542
1 files changed, 542 insertions, 0 deletions
diff --git a/libdex/DexFile.cpp b/libdex/DexFile.cpp
new file mode 100644
index 0000000..4dcc097
--- /dev/null
+++ b/libdex/DexFile.cpp
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Access the contents of a .dex file.
+ */
+
+#include "DexFile.h"
+#include "DexOptData.h"
+#include "DexProto.h"
+#include "DexCatch.h"
+#include "Leb128.h"
+#include "sha1.h"
+#include "ZipArchive.h"
+
+#include <zlib.h>
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+
+
+/*
+ * Verifying checksums is good, but it slows things down and causes us to
+ * touch every page. In the "optimized" world, it doesn't work at all,
+ * because we rewrite the contents.
+ */
+static const bool kVerifyChecksum = false;
+static const bool kVerifySignature = false;
+
+/* (documented in header) */
+char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
+ const char* string = dexGetPrimitiveTypeDescriptor(type);
+
+ return (string == NULL) ? '\0' : string[0];
+}
+
+/* (documented in header) */
+const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
+ switch (type) {
+ case PRIM_VOID: return "V";
+ case PRIM_BOOLEAN: return "Z";
+ case PRIM_BYTE: return "B";
+ case PRIM_SHORT: return "S";
+ case PRIM_CHAR: return "C";
+ case PRIM_INT: return "I";
+ case PRIM_LONG: return "J";
+ case PRIM_FLOAT: return "F";
+ case PRIM_DOUBLE: return "D";
+ default: return NULL;
+ }
+
+ return NULL;
+}
+
+/* (documented in header) */
+const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
+ switch (type) {
+ case PRIM_VOID: return NULL;
+ case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
+ case PRIM_BYTE: return "Ljava/lang/Byte;";
+ case PRIM_SHORT: return "Ljava/lang/Short;";
+ case PRIM_CHAR: return "Ljava/lang/Character;";
+ case PRIM_INT: return "Ljava/lang/Integer;";
+ case PRIM_LONG: return "Ljava/lang/Long;";
+ case PRIM_FLOAT: return "Ljava/lang/Float;";
+ case PRIM_DOUBLE: return "Ljava/lang/Double;";
+ default: return NULL;
+ }
+}
+
+/* (documented in header) */
+PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
+ switch (descriptorChar) {
+ case 'V': return PRIM_VOID;
+ case 'Z': return PRIM_BOOLEAN;
+ case 'B': return PRIM_BYTE;
+ case 'S': return PRIM_SHORT;
+ case 'C': return PRIM_CHAR;
+ case 'I': return PRIM_INT;
+ case 'J': return PRIM_LONG;
+ case 'F': return PRIM_FLOAT;
+ case 'D': return PRIM_DOUBLE;
+ default: return PRIM_NOT;
+ }
+}
+
+/* Return the UTF-8 encoded string with the specified string_id index,
+ * also filling in the UTF-16 size (number of 16-bit code points).*/
+const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
+ u4* utf16Size) {
+ const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
+ const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
+
+ *utf16Size = readUnsignedLeb128(&ptr);
+ return (const char*) ptr;
+}
+
+/*
+ * Format an SHA-1 digest for printing. tmpBuf must be able to hold at
+ * least kSHA1DigestOutputLen bytes.
+ */
+const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
+
+/*
+ * Compute a SHA-1 digest on a range of bytes.
+ */
+static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
+ unsigned char digest[])
+{
+ SHA1_CTX context;
+ SHA1Init(&context);
+ SHA1Update(&context, data, length);
+ SHA1Final(digest, &context);
+}
+
+/*
+ * Format the SHA-1 digest into the buffer, which must be able to hold at
+ * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
+ */
+static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
+{
+ static const char hexDigit[] = "0123456789abcdef";
+ char* cp;
+ int i;
+
+ cp = tmpBuf;
+ for (i = 0; i < kSHA1DigestLen; i++) {
+ *cp++ = hexDigit[digest[i] >> 4];
+ *cp++ = hexDigit[digest[i] & 0x0f];
+ }
+ *cp++ = '\0';
+
+ assert(cp == tmpBuf + kSHA1DigestOutputLen);
+
+ return tmpBuf;
+}
+
+/*
+ * Compute a hash code on a UTF-8 string, for use with internal hash tables.
+ *
+ * This may or may not be compatible with UTF-8 hash functions used inside
+ * the Dalvik VM.
+ *
+ * The basic "multiply by 31 and add" approach does better on class names
+ * than most other things tried (e.g. adler32).
+ */
+static u4 classDescriptorHash(const char* str)
+{
+ u4 hash = 1;
+
+ while (*str != '\0')
+ hash = hash * 31 + *str++;
+
+ return hash;
+}
+
+/*
+ * Add an entry to the class lookup table. We hash the string and probe
+ * until we find an open slot.
+ */
+static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
+ int stringOff, int classDefOff, int* pNumProbes)
+{
+ const char* classDescriptor =
+ (const char*) (pDexFile->baseAddr + stringOff);
+ const DexClassDef* pClassDef =
+ (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
+ u4 hash = classDescriptorHash(classDescriptor);
+ int mask = pLookup->numEntries-1;
+ int idx = hash & mask;
+
+ /*
+ * Find the first empty slot. We oversized the table, so this is
+ * guaranteed to finish.
+ */
+ int probes = 0;
+ while (pLookup->table[idx].classDescriptorOffset != 0) {
+ idx = (idx + 1) & mask;
+ probes++;
+ }
+ //if (probes > 1)
+ // LOGW("classLookupAdd: probes=%d", probes);
+
+ pLookup->table[idx].classDescriptorHash = hash;
+ pLookup->table[idx].classDescriptorOffset = stringOff;
+ pLookup->table[idx].classDefOffset = classDefOff;
+ *pNumProbes = probes;
+}
+
+/*
+ * Create the class lookup hash table.
+ *
+ * Returns newly-allocated storage.
+ */
+DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
+{
+ DexClassLookup* pLookup;
+ int allocSize;
+ int i, numEntries;
+ int numProbes, totalProbes, maxProbes;
+
+ numProbes = totalProbes = maxProbes = 0;
+
+ assert(pDexFile != NULL);
+
+ /*
+ * Using a factor of 3 results in far less probing than a factor of 2,
+ * but almost doubles the flash storage requirements for the bootstrap
+ * DEX files. The overall impact on class loading performance seems
+ * to be minor. We could probably get some performance improvement by
+ * using a secondary hash.
+ */
+ numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
+ allocSize = offsetof(DexClassLookup, table)
+ + numEntries * sizeof(pLookup->table[0]);
+
+ pLookup = (DexClassLookup*) calloc(1, allocSize);
+ if (pLookup == NULL)
+ return NULL;
+ pLookup->size = allocSize;
+ pLookup->numEntries = numEntries;
+
+ for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
+ const DexClassDef* pClassDef;
+ const char* pString;
+
+ pClassDef = dexGetClassDef(pDexFile, i);
+ pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
+
+ classLookupAdd(pDexFile, pLookup,
+ (u1*)pString - pDexFile->baseAddr,
+ (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
+
+ if (numProbes > maxProbes)
+ maxProbes = numProbes;
+ totalProbes += numProbes;
+ }
+
+ LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
+ " total=%d max=%d",
+ pDexFile->pHeader->classDefsSize, numEntries,
+ (100 * pDexFile->pHeader->classDefsSize) / numEntries,
+ allocSize, totalProbes, maxProbes);
+
+ return pLookup;
+}
+
+
+/*
+ * Set up the basic raw data pointers of a DexFile. This function isn't
+ * meant for general use.
+ */
+void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
+ DexHeader *pHeader = (DexHeader*) data;
+
+ pDexFile->baseAddr = data;
+ pDexFile->pHeader = pHeader;
+ pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
+ pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
+ pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
+ pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
+ pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
+ pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
+ pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
+}
+
+/*
+ * Parse an optimized or unoptimized .dex file sitting in memory. This is
+ * called after the byte-ordering and structure alignment has been fixed up.
+ *
+ * On success, return a newly-allocated DexFile.
+ */
+DexFile* dexFileParse(const u1* data, size_t length, int flags)
+{
+ DexFile* pDexFile = NULL;
+ const DexHeader* pHeader;
+ const u1* magic;
+ int result = -1;
+
+ if (length < sizeof(DexHeader)) {
+ LOGE("too short to be a valid .dex");
+ goto bail; /* bad file format */
+ }
+
+ pDexFile = (DexFile*) malloc(sizeof(DexFile));
+ if (pDexFile == NULL)
+ goto bail; /* alloc failure */
+ memset(pDexFile, 0, sizeof(DexFile));
+
+ /*
+ * Peel off the optimized header.
+ */
+ if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
+ magic = data;
+ if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
+ LOGE("bad opt version (0x%02x %02x %02x %02x)",
+ magic[4], magic[5], magic[6], magic[7]);
+ goto bail;
+ }
+
+ pDexFile->pOptHeader = (const DexOptHeader*) data;
+ LOGV("Good opt header, DEX offset is %d, flags=0x%02x",
+ pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
+
+ /* parse the optimized dex file tables */
+ if (!dexParseOptData(data, length, pDexFile))
+ goto bail;
+
+ /* ignore the opt header and appended data from here on out */
+ data += pDexFile->pOptHeader->dexOffset;
+ length -= pDexFile->pOptHeader->dexOffset;
+ if (pDexFile->pOptHeader->dexLength > length) {
+ LOGE("File truncated? stored len=%d, rem len=%d",
+ pDexFile->pOptHeader->dexLength, (int) length);
+ goto bail;
+ }
+ length = pDexFile->pOptHeader->dexLength;
+ }
+
+ dexFileSetupBasicPointers(pDexFile, data);
+ pHeader = pDexFile->pHeader;
+
+ if (!dexHasValidMagic(pHeader)) {
+ goto bail;
+ }
+
+ /*
+ * Verify the checksum(s). This is reasonably quick, but does require
+ * touching every byte in the DEX file. The base checksum changes after
+ * byte-swapping and DEX optimization.
+ */
+ if (flags & kDexParseVerifyChecksum) {
+ u4 adler = dexComputeChecksum(pHeader);
+ if (adler != pHeader->checksum) {
+ LOGE("ERROR: bad checksum (%08x vs %08x)",
+ adler, pHeader->checksum);
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ } else {
+ LOGV("+++ adler32 checksum (%08x) verified", adler);
+ }
+
+ const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
+ if (pOptHeader != NULL) {
+ adler = dexComputeOptChecksum(pOptHeader);
+ if (adler != pOptHeader->checksum) {
+ LOGE("ERROR: bad opt checksum (%08x vs %08x)",
+ adler, pOptHeader->checksum);
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ } else {
+ LOGV("+++ adler32 opt checksum (%08x) verified", adler);
+ }
+ }
+ }
+
+ /*
+ * Verify the SHA-1 digest. (Normally we don't want to do this --
+ * the digest is used to uniquely identify the original DEX file, and
+ * can't be computed for verification after the DEX is byte-swapped
+ * and optimized.)
+ */
+ if (kVerifySignature) {
+ unsigned char sha1Digest[kSHA1DigestLen];
+ const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
+ kSHA1DigestLen;
+
+ dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
+ if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
+ char tmpBuf1[kSHA1DigestOutputLen];
+ char tmpBuf2[kSHA1DigestOutputLen];
+ LOGE("ERROR: bad SHA1 digest (%s vs %s)",
+ dexSHA1DigestToStr(sha1Digest, tmpBuf1),
+ dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ } else {
+ LOGV("+++ sha1 digest verified");
+ }
+ }
+
+ if (pHeader->fileSize != length) {
+ LOGE("ERROR: stored file size (%d) != expected (%d)",
+ (int) pHeader->fileSize, (int) length);
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ }
+
+ if (pHeader->classDefsSize == 0) {
+ LOGE("ERROR: DEX file has no classes in it, failing");
+ goto bail;
+ }
+
+ /*
+ * Success!
+ */
+ result = 0;
+
+bail:
+ if (result != 0 && pDexFile != NULL) {
+ dexFileFree(pDexFile);
+ pDexFile = NULL;
+ }
+ return pDexFile;
+}
+
+/*
+ * Free up the DexFile and any associated data structures.
+ *
+ * Note we may be called with a partially-initialized DexFile.
+ */
+void dexFileFree(DexFile* pDexFile)
+{
+ if (pDexFile == NULL)
+ return;
+
+ free(pDexFile);
+}
+
+/*
+ * Look up a class definition entry by descriptor.
+ *
+ * "descriptor" should look like "Landroid/debug/Stuff;".
+ */
+const DexClassDef* dexFindClass(const DexFile* pDexFile,
+ const char* descriptor)
+{
+ const DexClassLookup* pLookup = pDexFile->pClassLookup;
+ u4 hash;
+ int idx, mask;
+
+ hash = classDescriptorHash(descriptor);
+ mask = pLookup->numEntries - 1;
+ idx = hash & mask;
+
+ /*
+ * Search until we find a matching entry or an empty slot.
+ */
+ while (true) {
+ int offset;
+
+ offset = pLookup->table[idx].classDescriptorOffset;
+ if (offset == 0)
+ return NULL;
+
+ if (pLookup->table[idx].classDescriptorHash == hash) {
+ const char* str;
+
+ str = (const char*) (pDexFile->baseAddr + offset);
+ if (strcmp(str, descriptor) == 0) {
+ return (const DexClassDef*)
+ (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
+ }
+ }
+
+ idx = (idx + 1) & mask;
+ }
+}
+
+
+/*
+ * Compute the DEX file checksum for a memory-mapped DEX file.
+ */
+u4 dexComputeChecksum(const DexHeader* pHeader)
+{
+ const u1* start = (const u1*) pHeader;
+
+ uLong adler = adler32(0L, Z_NULL, 0);
+ const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
+
+ return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
+}
+
+/*
+ * Compute the size, in bytes, of a DexCode.
+ */
+size_t dexGetDexCodeSize(const DexCode* pCode)
+{
+ /*
+ * The catch handler data is the last entry. It has a variable number
+ * of variable-size pieces, so we need to create an iterator.
+ */
+ u4 handlersSize;
+ u4 offset;
+ u4 ui;
+
+ if (pCode->triesSize != 0) {
+ handlersSize = dexGetHandlersSize(pCode);
+ offset = dexGetFirstHandlerOffset(pCode);
+ } else {
+ handlersSize = 0;
+ offset = 0;
+ }
+
+ for (ui = 0; ui < handlersSize; ui++) {
+ DexCatchIterator iterator;
+ dexCatchIteratorInit(&iterator, pCode, offset);
+ offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
+ }
+
+ const u1* handlerData = dexGetCatchHandlerData(pCode);
+
+ //LOGD("+++ pCode=%p handlerData=%p last offset=%d",
+ // pCode, handlerData, offset);
+
+ /* return the size of the catch handler + everything before it */
+ return (handlerData - (u1*) pCode) + offset;
+}
+
+/*
+ * Round up to the next highest power of 2.
+ *
+ * Found on http://graphics.stanford.edu/~seander/bithacks.html.
+ */
+u4 dexRoundUpPower2(u4 val)
+{
+ val--;
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ val++;
+
+ return val;
+}