1 files changed, 542 insertions, 0 deletions
diff --git a/libdex/DexFile.cpp b/libdex/DexFile.cpp
new file mode 100644
index 0000000..4dcc097
--- /dev/null
+++ b/libdex/DexFile.cpp
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Access the contents of a .dex file.
+ */
+
+#include "DexFile.h"
+#include "DexOptData.h"
+#include "DexProto.h"
+#include "DexCatch.h"
+#include "Leb128.h"
+#include "sha1.h"
+#include "ZipArchive.h"
+
+#include <zlib.h>
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+
+
+/*
+ * Verifying checksums is good, but it slows things down and causes us to
+ * touch every page.  In the "optimized" world, it doesn't work at all,
+ * because we rewrite the contents.
+ */
+static const bool kVerifyChecksum = false;
+static const bool kVerifySignature = false;
+
+/* (documented in header) */
+char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
+    const char* string = dexGetPrimitiveTypeDescriptor(type);
+
+    return (string == NULL) ? '\0' : string[0];
+}
+
+/* (documented in header) */
+const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
+    switch (type) {
+        case PRIM_VOID:    return "V";
+        case PRIM_BOOLEAN: return "Z";
+        case PRIM_BYTE:    return "B";
+        case PRIM_SHORT:   return "S";
+        case PRIM_CHAR:    return "C";
+        case PRIM_INT:     return "I";
+        case PRIM_LONG:    return "J";
+        case PRIM_FLOAT:   return "F";
+        case PRIM_DOUBLE:  return "D";
+        default:           return NULL;
+    }
+
+    return NULL;
+}
+
+/* (documented in header) */
+const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
+    switch (type) {
+        case PRIM_VOID:    return NULL;
+        case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
+        case PRIM_BYTE:    return "Ljava/lang/Byte;";
+        case PRIM_SHORT:   return "Ljava/lang/Short;";
+        case PRIM_CHAR:    return "Ljava/lang/Character;";
+        case PRIM_INT:     return "Ljava/lang/Integer;";
+        case PRIM_LONG:    return "Ljava/lang/Long;";
+        case PRIM_FLOAT:   return "Ljava/lang/Float;";
+        case PRIM_DOUBLE:  return "Ljava/lang/Double;";
+        default:           return NULL;
+    }
+}
+
+/* (documented in header) */
+PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
+    switch (descriptorChar) {
+        case 'V': return PRIM_VOID;
+        case 'Z': return PRIM_BOOLEAN;
+        case 'B': return PRIM_BYTE;
+        case 'S': return PRIM_SHORT;
+        case 'C': return PRIM_CHAR;
+        case 'I': return PRIM_INT;
+        case 'J': return PRIM_LONG;
+        case 'F': return PRIM_FLOAT;
+        case 'D': return PRIM_DOUBLE;
+        default:  return PRIM_NOT;
+    }
+}
+
+/* Return the UTF-8 encoded string with the specified string_id index,
+ * also filling in the UTF-16 size (number of 16-bit code points).*/
+const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
+        u4* utf16Size) {
+    const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
+    const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
+
+    *utf16Size = readUnsignedLeb128(&ptr);
+    return (const char*) ptr;
+}
+
+/*
+ * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
+ * least kSHA1DigestOutputLen bytes.
+ */
+const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
+
+/*
+ * Compute a SHA-1 digest on a range of bytes.
+ */
+static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
+    unsigned char digest[])
+{
+    SHA1_CTX context;
+    SHA1Init(&context);
+    SHA1Update(&context, data, length);
+    SHA1Final(digest, &context);
+}
+
+/*
+ * Format the SHA-1 digest into the buffer, which must be able to hold at
+ * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
+ */
+static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
+{
+    static const char hexDigit[] = "0123456789abcdef";
+    char* cp;
+    int i;
+
+    cp = tmpBuf;
+    for (i = 0; i < kSHA1DigestLen; i++) {
+        *cp++ = hexDigit[digest[i] >> 4];
+        *cp++ = hexDigit[digest[i] & 0x0f];
+    }
+    *cp++ = '\0';
+
+    assert(cp == tmpBuf + kSHA1DigestOutputLen);
+
+    return tmpBuf;
+}
+
+/*
+ * Compute a hash code on a UTF-8 string, for use with internal hash tables.
+ *
+ * This may or may not be compatible with UTF-8 hash functions used inside
+ * the Dalvik VM.
+ *
+ * The basic "multiply by 31 and add" approach does better on class names
+ * than most other things tried (e.g. adler32).
+ */
+static u4 classDescriptorHash(const char* str)
+{
+    u4 hash = 1;
+
+    while (*str != '\0')
+        hash = hash * 31 + *str++;
+
+    return hash;
+}
+
+/*
+ * Add an entry to the class lookup table.  We hash the string and probe
+ * until we find an open slot.
+ */
+static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
+    int stringOff, int classDefOff, int* pNumProbes)
+{
+    const char* classDescriptor =
+        (const char*) (pDexFile->baseAddr + stringOff);
+    const DexClassDef* pClassDef =
+        (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
+    u4 hash = classDescriptorHash(classDescriptor);
+    int mask = pLookup->numEntries-1;
+    int idx = hash & mask;
+
+    /*
+     * Find the first empty slot.  We oversized the table, so this is
+     * guaranteed to finish.
+     */
+    int probes = 0;
+    while (pLookup->table[idx].classDescriptorOffset != 0) {
+        idx = (idx + 1) & mask;
+        probes++;
+    }
+    //if (probes > 1)
+    //    LOGW("classLookupAdd: probes=%d", probes);
+
+    pLookup->table[idx].classDescriptorHash = hash;
+    pLookup->table[idx].classDescriptorOffset = stringOff;
+    pLookup->table[idx].classDefOffset = classDefOff;
+    *pNumProbes = probes;
+}
+
+/*
+ * Create the class lookup hash table.
+ *
+ * Returns newly-allocated storage.
+ */
+DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
+{
+    DexClassLookup* pLookup;
+    int allocSize;
+    int i, numEntries;
+    int numProbes, totalProbes, maxProbes;
+
+    numProbes = totalProbes = maxProbes = 0;
+
+    assert(pDexFile != NULL);
+
+    /*
+     * Using a factor of 3 results in far less probing than a factor of 2,
+     * but almost doubles the flash storage requirements for the bootstrap
+     * DEX files.  The overall impact on class loading performance seems
+     * to be minor.  We could probably get some performance improvement by
+     * using a secondary hash.
+     */
+    numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
+    allocSize = offsetof(DexClassLookup, table)
+                    + numEntries * sizeof(pLookup->table[0]);
+
+    pLookup = (DexClassLookup*) calloc(1, allocSize);
+    if (pLookup == NULL)
+        return NULL;
+    pLookup->size = allocSize;
+    pLookup->numEntries = numEntries;
+
+    for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
+        const DexClassDef* pClassDef;
+        const char* pString;
+
+        pClassDef = dexGetClassDef(pDexFile, i);
+        pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
+
+        classLookupAdd(pDexFile, pLookup,
+            (u1*)pString - pDexFile->baseAddr,
+            (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
+
+        if (numProbes > maxProbes)
+            maxProbes = numProbes;
+        totalProbes += numProbes;
+    }
+
+    LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
+         " total=%d max=%d",
+        pDexFile->pHeader->classDefsSize, numEntries,
+        (100 * pDexFile->pHeader->classDefsSize) / numEntries,
+        allocSize, totalProbes, maxProbes);
+
+    return pLookup;
+}
+
+
+/*
+ * Set up the basic raw data pointers of a DexFile. This function isn't
+ * meant for general use.
+ */
+void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
+    DexHeader *pHeader = (DexHeader*) data;
+
+    pDexFile->baseAddr = data;
+    pDexFile->pHeader = pHeader;
+    pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
+    pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
+    pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
+    pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
+    pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
+    pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
+    pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
+}
+
+/*
+ * Parse an optimized or unoptimized .dex file sitting in memory.  This is
+ * called after the byte-ordering and structure alignment has been fixed up.
+ *
+ * On success, return a newly-allocated DexFile.
+ */
+DexFile* dexFileParse(const u1* data, size_t length, int flags)
+{
+    DexFile* pDexFile = NULL;
+    const DexHeader* pHeader;
+    const u1* magic;
+    int result = -1;
+
+    if (length < sizeof(DexHeader)) {
+        LOGE("too short to be a valid .dex");
+        goto bail;      /* bad file format */
+    }
+
+    pDexFile = (DexFile*) malloc(sizeof(DexFile));
+    if (pDexFile == NULL)
+        goto bail;      /* alloc failure */
+    memset(pDexFile, 0, sizeof(DexFile));
+
+    /*
+     * Peel off the optimized header.
+     */
+    if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
+        magic = data;
+        if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
+            LOGE("bad opt version (0x%02x %02x %02x %02x)",
+                 magic[4], magic[5], magic[6], magic[7]);
+            goto bail;
+        }
+
+        pDexFile->pOptHeader = (const DexOptHeader*) data;
+        LOGV("Good opt header, DEX offset is %d, flags=0x%02x",
+            pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
+
+        /* parse the optimized dex file tables */
+        if (!dexParseOptData(data, length, pDexFile))
+            goto bail;
+
+        /* ignore the opt header and appended data from here on out */
+        data += pDexFile->pOptHeader->dexOffset;
+        length -= pDexFile->pOptHeader->dexOffset;
+        if (pDexFile->pOptHeader->dexLength > length) {
+            LOGE("File truncated? stored len=%d, rem len=%d",
+                pDexFile->pOptHeader->dexLength, (int) length);
+            goto bail;
+        }
+        length = pDexFile->pOptHeader->dexLength;
+    }
+
+    dexFileSetupBasicPointers(pDexFile, data);
+    pHeader = pDexFile->pHeader;
+
+    if (!dexHasValidMagic(pHeader)) {
+        goto bail;
+    }
+
+    /*
+     * Verify the checksum(s).  This is reasonably quick, but does require
+     * touching every byte in the DEX file.  The base checksum changes after
+     * byte-swapping and DEX optimization.
+     */
+    if (flags & kDexParseVerifyChecksum) {
+        u4 adler = dexComputeChecksum(pHeader);
+        if (adler != pHeader->checksum) {
+            LOGE("ERROR: bad checksum (%08x vs %08x)",
+                adler, pHeader->checksum);
+            if (!(flags & kDexParseContinueOnError))
+                goto bail;
+        } else {
+            LOGV("+++ adler32 checksum (%08x) verified", adler);
+        }
+
+        const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
+        if (pOptHeader != NULL) {
+            adler = dexComputeOptChecksum(pOptHeader);
+            if (adler != pOptHeader->checksum) {
+                LOGE("ERROR: bad opt checksum (%08x vs %08x)",
+                    adler, pOptHeader->checksum);
+                if (!(flags & kDexParseContinueOnError))
+                    goto bail;
+            } else {
+                LOGV("+++ adler32 opt checksum (%08x) verified", adler);
+            }
+        }
+    }
+
+    /*
+     * Verify the SHA-1 digest.  (Normally we don't want to do this --
+     * the digest is used to uniquely identify the original DEX file, and
+     * can't be computed for verification after the DEX is byte-swapped
+     * and optimized.)
+     */
+    if (kVerifySignature) {
+        unsigned char sha1Digest[kSHA1DigestLen];
+        const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
+                            kSHA1DigestLen;
+
+        dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
+        if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
+            char tmpBuf1[kSHA1DigestOutputLen];
+            char tmpBuf2[kSHA1DigestOutputLen];
+            LOGE("ERROR: bad SHA1 digest (%s vs %s)",
+                dexSHA1DigestToStr(sha1Digest, tmpBuf1),
+                dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
+            if (!(flags & kDexParseContinueOnError))
+                goto bail;
+        } else {
+            LOGV("+++ sha1 digest verified");
+        }
+    }
+
+    if (pHeader->fileSize != length) {
+        LOGE("ERROR: stored file size (%d) != expected (%d)",
+            (int) pHeader->fileSize, (int) length);
+        if (!(flags & kDexParseContinueOnError))
+            goto bail;
+    }
+
+    if (pHeader->classDefsSize == 0) {
+        LOGE("ERROR: DEX file has no classes in it, failing");
+        goto bail;
+    }
+
+    /*
+     * Success!
+     */
+    result = 0;
+
+bail:
+    if (result != 0 && pDexFile != NULL) {
+        dexFileFree(pDexFile);
+        pDexFile = NULL;
+    }
+    return pDexFile;
+}
+
+/*
+ * Free up the DexFile and any associated data structures.
+ *
+ * Note we may be called with a partially-initialized DexFile.
+ */
+void dexFileFree(DexFile* pDexFile)
+{
+    if (pDexFile == NULL)
+        return;
+
+    free(pDexFile);
+}
+
+/*
+ * Look up a class definition entry by descriptor.
+ *
+ * "descriptor" should look like "Landroid/debug/Stuff;".
+ */
+const DexClassDef* dexFindClass(const DexFile* pDexFile,
+    const char* descriptor)
+{
+    const DexClassLookup* pLookup = pDexFile->pClassLookup;
+    u4 hash;
+    int idx, mask;
+
+    hash = classDescriptorHash(descriptor);
+    mask = pLookup->numEntries - 1;
+    idx = hash & mask;
+
+    /*
+     * Search until we find a matching entry or an empty slot.
+     */
+    while (true) {
+        int offset;
+
+        offset = pLookup->table[idx].classDescriptorOffset;
+        if (offset == 0)
+            return NULL;
+
+        if (pLookup->table[idx].classDescriptorHash == hash) {
+            const char* str;
+
+            str = (const char*) (pDexFile->baseAddr + offset);
+            if (strcmp(str, descriptor) == 0) {
+                return (const DexClassDef*)
+                    (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
+            }
+        }
+
+        idx = (idx + 1) & mask;
+    }
+}
+
+
+/*
+ * Compute the DEX file checksum for a memory-mapped DEX file.
+ */
+u4 dexComputeChecksum(const DexHeader* pHeader)
+{
+    const u1* start = (const u1*) pHeader;
+
+    uLong adler = adler32(0L, Z_NULL, 0);
+    const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
+
+    return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
+}
+
+/*
+ * Compute the size, in bytes, of a DexCode.
+ */
+size_t dexGetDexCodeSize(const DexCode* pCode)
+{
+    /*
+     * The catch handler data is the last entry.  It has a variable number
+     * of variable-size pieces, so we need to create an iterator.
+     */
+    u4 handlersSize;
+    u4 offset;
+    u4 ui;
+
+    if (pCode->triesSize != 0) {
+        handlersSize = dexGetHandlersSize(pCode);
+        offset = dexGetFirstHandlerOffset(pCode);
+    } else {
+        handlersSize = 0;
+        offset = 0;
+    }
+
+    for (ui = 0; ui < handlersSize; ui++) {
+        DexCatchIterator iterator;
+        dexCatchIteratorInit(&iterator, pCode, offset);
+        offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
+    }
+
+    const u1* handlerData = dexGetCatchHandlerData(pCode);
+
+    //LOGD("+++ pCode=%p handlerData=%p last offset=%d",
+    //    pCode, handlerData, offset);
+
+    /* return the size of the catch handler + everything before it */
+    return (handlerData - (u1*) pCode) + offset;
+}
+
+/*
+ * Round up to the next highest power of 2.
+ *
+ * Found on http://graphics.stanford.edu/~seander/bithacks.html.
+ */
+u4 dexRoundUpPower2(u4 val)
+{
+    val--;
+    val |= val >> 1;
+    val |= val >> 2;
+    val |= val >> 4;
+    val |= val >> 8;
+    val |= val >> 16;
+    val++;
+
+    return val;
+}