summaryrefslogtreecommitdiff
path: root/libdex/DexFile.c
diff options
context:
space:
mode:
Diffstat (limited to 'libdex/DexFile.c')
-rw-r--r--libdex/DexFile.c1032
1 files changed, 1032 insertions, 0 deletions
diff --git a/libdex/DexFile.c b/libdex/DexFile.c
new file mode 100644
index 0000000..f997b94
--- /dev/null
+++ b/libdex/DexFile.c
@@ -0,0 +1,1032 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Access the contents of a .dex file.
+ */
+
+#include "DexFile.h"
+#include "DexOptData.h"
+#include "DexProto.h"
+#include "DexCatch.h"
+#include "Leb128.h"
+#include "sha1.h"
+#include "ZipArchive.h"
+
+#include <zlib.h>
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+
+
+/*
+ * Verifying checksums is good, but it slows things down and causes us to
+ * touch every page. In the "optimized" world, it doesn't work at all,
+ * because we rewrite the contents.
+ */
+static const bool kVerifyChecksum = false;
+static const bool kVerifySignature = false;
+
+
+/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
+ * code point values for comparison. This treats different encodings
+ * for the same code point as equivalent, except that only a real '\0'
+ * byte is considered the string terminator. The return value is as
+ * for strcmp(). */
+int dexUtf8Cmp(const char* s1, const char* s2) {
+ for (;;) {
+ if (*s1 == '\0') {
+ if (*s2 == '\0') {
+ return 0;
+ }
+ return -1;
+ } else if (*s2 == '\0') {
+ return 1;
+ }
+
+ int utf1 = dexGetUtf16FromUtf8(&s1);
+ int utf2 = dexGetUtf16FromUtf8(&s2);
+ int diff = utf1 - utf2;
+
+ if (diff != 0) {
+ return diff;
+ }
+ }
+}
+
+/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
+u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
+ 0x00000000, // 00..1f low control characters; nothing valid
+ 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
+ 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
+ 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z'
+};
+
+/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
+bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
+ /*
+ * It's a multibyte encoded character. Decode it and analyze. We
+ * accept anything that isn't (a) an improperly encoded low value,
+ * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
+ * control character, or (e) a high space, layout, or special
+ * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
+ * U+fff0..U+ffff).
+ */
+
+ u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
+
+ // Perform follow-up tests based on the high 8 bits.
+ switch (utf16 >> 8) {
+ case 0x00: {
+ // It's only valid if it's above the ISO-8859-1 high space (0xa0).
+ return (utf16 > 0x00a0);
+ }
+ case 0xd8:
+ case 0xd9:
+ case 0xda:
+ case 0xdb: {
+ /*
+ * It's a leading surrogate. Check to see that a trailing
+ * surrogate follows.
+ */
+ utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
+ return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
+ }
+ case 0xdc:
+ case 0xdd:
+ case 0xde:
+ case 0xdf: {
+ // It's a trailing surrogate, which is not valid at this point.
+ return false;
+ }
+ case 0x20:
+ case 0xff: {
+ // It's in the range that has spaces, controls, and specials.
+ switch (utf16 & 0xfff8) {
+ case 0x2000:
+ case 0x2008:
+ case 0x2028:
+ case 0xfff0:
+ case 0xfff8: {
+ return false;
+ }
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+/* Return whether the given string is a valid field or method name. */
+bool dexIsValidMemberName(const char* s) {
+ bool angleName = false;
+
+ switch (*s) {
+ case '\0': {
+ // The empty string is not a valid name.
+ return false;
+ }
+ case '<': {
+ /*
+ * '<' is allowed only at the start of a name, and if present,
+ * means that the name must end with '>'.
+ */
+ angleName = true;
+ s++;
+ break;
+ }
+ }
+
+ for (;;) {
+ switch (*s) {
+ case '\0': {
+ return !angleName;
+ }
+ case '>': {
+ return angleName && s[1] == '\0';
+ }
+ }
+ if (!dexIsValidMemberNameUtf8(&s)) {
+ return false;
+ }
+ }
+}
+
+/* Return whether the given string is a valid type descriptor. */
+bool dexIsValidTypeDescriptor(const char* s) {
+ int arrayCount = 0;
+
+ while (*s == '[') {
+ arrayCount++;
+ s++;
+ }
+
+ if (arrayCount > 255) {
+ // Arrays may have no more than 255 dimensions.
+ return false;
+ }
+
+ switch (*(s++)) {
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'F':
+ case 'I':
+ case 'J':
+ case 'S':
+ case 'Z': {
+ // These are all single-character descriptors for primitive types.
+ return (*s == '\0');
+ }
+ case 'V': {
+ // You can't have an array of void.
+ return (arrayCount == 0) && (*s == '\0');
+ }
+ case 'L': {
+ // Break out and continue below.
+ break;
+ }
+ default: {
+ // Oddball descriptor character.
+ return false;
+ }
+ }
+
+ // We just consumed the 'L' that introduces a class name.
+
+ bool slashOrFirst = true; // first character or just encountered a slash
+ for (;;) {
+ u1 c = (u1) *s;
+ switch (c) {
+ case '\0': {
+ // Premature end.
+ return false;
+ }
+ case ';': {
+ /*
+ * Make sure that this is the end of the string and that
+ * it doesn't end with an empty component (including the
+ * degenerate case of "L;").
+ */
+ return (s[1] == '\0') && !slashOrFirst;
+ }
+ case '/': {
+ if (slashOrFirst) {
+ // Slash at start or two slashes in a row.
+ return false;
+ }
+ slashOrFirst = true;
+ s++;
+ break;
+ }
+ default: {
+ if (!dexIsValidMemberNameUtf8(&s)) {
+ return false;
+ }
+ slashOrFirst = false;
+ break;
+ }
+ }
+ }
+}
+
+/* Return whether the given string is a valid reference descriptor. This
+ * is true if dexIsValidTypeDescriptor() returns true and the descriptor
+ * is for a class or array and not a primitive type. */
+bool dexIsReferenceDescriptor(const char* s) {
+ if (!dexIsValidTypeDescriptor(s)) {
+ return false;
+ }
+
+ return (s[0] == 'L') || (s[0] == '[');
+}
+
+/* Return whether the given string is a valid class descriptor. This
+ * is true if dexIsValidTypeDescriptor() returns true and the descriptor
+ * is for a class and not an array or primitive type. */
+bool dexIsClassDescriptor(const char* s) {
+ if (!dexIsValidTypeDescriptor(s)) {
+ return false;
+ }
+
+ return s[0] == 'L';
+}
+
+/* Return whether the given string is a valid field type descriptor. This
+ * is true if dexIsValidTypeDescriptor() returns true and the descriptor
+ * is for anything but "void". */
+bool dexIsFieldDescriptor(const char* s) {
+ if (!dexIsValidTypeDescriptor(s)) {
+ return false;
+ }
+
+ return s[0] != 'V';
+}
+
+/* Return the UTF-8 encoded string with the specified string_id index,
+ * also filling in the UTF-16 size (number of 16-bit code points).*/
+const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
+ u4* utf16Size) {
+ const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
+ const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
+
+ *utf16Size = readUnsignedLeb128(&ptr);
+ return (const char*) ptr;
+}
+
+/*
+ * Format an SHA-1 digest for printing. tmpBuf must be able to hold at
+ * least kSHA1DigestOutputLen bytes.
+ */
+const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
+
+/*
+ * Compute a SHA-1 digest on a range of bytes.
+ */
+static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
+ unsigned char digest[])
+{
+ SHA1_CTX context;
+ SHA1Init(&context);
+ SHA1Update(&context, data, length);
+ SHA1Final(digest, &context);
+}
+
+/*
+ * Format the SHA-1 digest into the buffer, which must be able to hold at
+ * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
+ */
+static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
+{
+ static const char hexDigit[] = "0123456789abcdef";
+ char* cp;
+ int i;
+
+ cp = tmpBuf;
+ for (i = 0; i < kSHA1DigestLen; i++) {
+ *cp++ = hexDigit[digest[i] >> 4];
+ *cp++ = hexDigit[digest[i] & 0x0f];
+ }
+ *cp++ = '\0';
+
+ assert(cp == tmpBuf + kSHA1DigestOutputLen);
+
+ return tmpBuf;
+}
+
+/*
+ * Compute a hash code on a UTF-8 string, for use with internal hash tables.
+ *
+ * This may or may not be compatible with UTF-8 hash functions used inside
+ * the Dalvik VM.
+ *
+ * The basic "multiply by 31 and add" approach does better on class names
+ * than most other things tried (e.g. adler32).
+ */
+static u4 classDescriptorHash(const char* str)
+{
+ u4 hash = 1;
+
+ while (*str != '\0')
+ hash = hash * 31 + *str++;
+
+ return hash;
+}
+
+/*
+ * Add an entry to the class lookup table. We hash the string and probe
+ * until we find an open slot.
+ */
+static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
+ int stringOff, int classDefOff, int* pNumProbes)
+{
+ const char* classDescriptor =
+ (const char*) (pDexFile->baseAddr + stringOff);
+ const DexClassDef* pClassDef =
+ (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
+ u4 hash = classDescriptorHash(classDescriptor);
+ int mask = pLookup->numEntries-1;
+ int idx = hash & mask;
+
+ /*
+ * Find the first empty slot. We oversized the table, so this is
+ * guaranteed to finish.
+ */
+ int probes = 0;
+ while (pLookup->table[idx].classDescriptorOffset != 0) {
+ idx = (idx + 1) & mask;
+ probes++;
+ }
+ //if (probes > 1)
+ // LOGW("classLookupAdd: probes=%d\n", probes);
+
+ pLookup->table[idx].classDescriptorHash = hash;
+ pLookup->table[idx].classDescriptorOffset = stringOff;
+ pLookup->table[idx].classDefOffset = classDefOff;
+ *pNumProbes = probes;
+}
+
+/*
+ * Round up to the next highest power of 2.
+ *
+ * Found on http://graphics.stanford.edu/~seander/bithacks.html.
+ */
+u4 dexRoundUpPower2(u4 val)
+{
+ val--;
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ val++;
+
+ return val;
+}
+
+/*
+ * Create the class lookup hash table.
+ *
+ * Returns newly-allocated storage.
+ */
+DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
+{
+ DexClassLookup* pLookup;
+ int allocSize;
+ int i, numEntries;
+ int numProbes, totalProbes, maxProbes;
+
+ numProbes = totalProbes = maxProbes = 0;
+
+ assert(pDexFile != NULL);
+
+ /*
+ * Using a factor of 3 results in far less probing than a factor of 2,
+ * but almost doubles the flash storage requirements for the bootstrap
+ * DEX files. The overall impact on class loading performance seems
+ * to be minor. We could probably get some performance improvement by
+ * using a secondary hash.
+ */
+ numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
+ allocSize = offsetof(DexClassLookup, table)
+ + numEntries * sizeof(pLookup->table[0]);
+
+ pLookup = (DexClassLookup*) calloc(1, allocSize);
+ if (pLookup == NULL)
+ return NULL;
+ pLookup->size = allocSize;
+ pLookup->numEntries = numEntries;
+
+ for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
+ const DexClassDef* pClassDef;
+ const char* pString;
+
+ pClassDef = dexGetClassDef(pDexFile, i);
+ pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
+
+ classLookupAdd(pDexFile, pLookup,
+ (u1*)pString - pDexFile->baseAddr,
+ (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
+
+ if (numProbes > maxProbes)
+ maxProbes = numProbes;
+ totalProbes += numProbes;
+ }
+
+ LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
+ " total=%d max=%d\n",
+ pDexFile->pHeader->classDefsSize, numEntries,
+ (100 * pDexFile->pHeader->classDefsSize) / numEntries,
+ allocSize, totalProbes, maxProbes);
+
+ return pLookup;
+}
+
+
+/*
+ * Set up the basic raw data pointers of a DexFile. This function isn't
+ * meant for general use.
+ */
+void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
+ DexHeader *pHeader = (DexHeader*) data;
+
+ pDexFile->baseAddr = data;
+ pDexFile->pHeader = pHeader;
+ pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
+ pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
+ pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
+ pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
+ pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
+ pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
+ pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
+}
+
+/*
+ * Parse an optimized or unoptimized .dex file sitting in memory. This is
+ * called after the byte-ordering and structure alignment has been fixed up.
+ *
+ * On success, return a newly-allocated DexFile.
+ */
+DexFile* dexFileParse(const u1* data, size_t length, int flags)
+{
+ DexFile* pDexFile = NULL;
+ const DexHeader* pHeader;
+ const u1* magic;
+ int result = -1;
+
+ if (length < sizeof(DexHeader)) {
+ LOGE("too short to be a valid .dex\n");
+ goto bail; /* bad file format */
+ }
+
+ pDexFile = (DexFile*) malloc(sizeof(DexFile));
+ if (pDexFile == NULL)
+ goto bail; /* alloc failure */
+ memset(pDexFile, 0, sizeof(DexFile));
+
+ /*
+ * Peel off the optimized header.
+ */
+ if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
+ magic = data;
+ if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
+ LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
+ magic[4], magic[5], magic[6], magic[7]);
+ goto bail;
+ }
+
+ pDexFile->pOptHeader = (const DexOptHeader*) data;
+ LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
+ pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
+
+ /* parse the optimized dex file tables */
+ if (!dexParseOptData(data, length, pDexFile))
+ goto bail;
+
+ /* ignore the opt header and appended data from here on out */
+ data += pDexFile->pOptHeader->dexOffset;
+ length -= pDexFile->pOptHeader->dexOffset;
+ if (pDexFile->pOptHeader->dexLength > length) {
+ LOGE("File truncated? stored len=%d, rem len=%d\n",
+ pDexFile->pOptHeader->dexLength, (int) length);
+ goto bail;
+ }
+ length = pDexFile->pOptHeader->dexLength;
+ }
+
+ dexFileSetupBasicPointers(pDexFile, data);
+ pHeader = pDexFile->pHeader;
+
+ magic = pHeader->magic;
+ if (memcmp(magic, DEX_MAGIC, 4) != 0) {
+ /* not expected */
+ LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
+ magic[0], magic[1], magic[2], magic[3]);
+ goto bail;
+ }
+ if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
+ LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
+ magic[4], magic[5], magic[6], magic[7]);
+ goto bail;
+ }
+
+ /*
+ * Verify the checksum(s). This is reasonably quick, but does require
+ * touching every byte in the DEX file. The base checksum changes after
+ * byte-swapping and DEX optimization.
+ */
+ if (flags & kDexParseVerifyChecksum) {
+ u4 adler = dexComputeChecksum(pHeader);
+ if (adler != pHeader->checksum) {
+ LOGE("ERROR: bad checksum (%08x vs %08x)\n",
+ adler, pHeader->checksum);
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ } else {
+ LOGV("+++ adler32 checksum (%08x) verified\n", adler);
+ }
+
+ const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
+ if (pOptHeader != NULL) {
+ adler = dexComputeOptChecksum(pOptHeader);
+ if (adler != pOptHeader->checksum) {
+ LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
+ adler, pOptHeader->checksum);
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ } else {
+ LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
+ }
+ }
+ }
+
+ /*
+ * Verify the SHA-1 digest. (Normally we don't want to do this --
+ * the digest is used to uniquely identify the original DEX file, and
+ * can't be computed for verification after the DEX is byte-swapped
+ * and optimized.)
+ */
+ if (kVerifySignature) {
+ unsigned char sha1Digest[kSHA1DigestLen];
+ const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
+ kSHA1DigestLen;
+
+ dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
+ if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
+ char tmpBuf1[kSHA1DigestOutputLen];
+ char tmpBuf2[kSHA1DigestOutputLen];
+ LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
+ dexSHA1DigestToStr(sha1Digest, tmpBuf1),
+ dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ } else {
+ LOGV("+++ sha1 digest verified\n");
+ }
+ }
+
+ if (pHeader->fileSize != length) {
+ LOGE("ERROR: stored file size (%d) != expected (%d)\n",
+ (int) pHeader->fileSize, (int) length);
+ if (!(flags & kDexParseContinueOnError))
+ goto bail;
+ }
+
+ if (pHeader->classDefsSize == 0) {
+ LOGE("ERROR: DEX file has no classes in it, failing\n");
+ goto bail;
+ }
+
+ /*
+ * Success!
+ */
+ result = 0;
+
+bail:
+ if (result != 0 && pDexFile != NULL) {
+ dexFileFree(pDexFile);
+ pDexFile = NULL;
+ }
+ return pDexFile;
+}
+
+/*
+ * Free up the DexFile and any associated data structures.
+ *
+ * Note we may be called with a partially-initialized DexFile.
+ */
+void dexFileFree(DexFile* pDexFile)
+{
+ if (pDexFile == NULL)
+ return;
+
+ free(pDexFile);
+}
+
+/*
+ * Look up a class definition entry by descriptor.
+ *
+ * "descriptor" should look like "Landroid/debug/Stuff;".
+ */
+const DexClassDef* dexFindClass(const DexFile* pDexFile,
+ const char* descriptor)
+{
+ const DexClassLookup* pLookup = pDexFile->pClassLookup;
+ u4 hash;
+ int idx, mask;
+
+ hash = classDescriptorHash(descriptor);
+ mask = pLookup->numEntries - 1;
+ idx = hash & mask;
+
+ /*
+ * Search until we find a matching entry or an empty slot.
+ */
+ while (true) {
+ int offset;
+
+ offset = pLookup->table[idx].classDescriptorOffset;
+ if (offset == 0)
+ return NULL;
+
+ if (pLookup->table[idx].classDescriptorHash == hash) {
+ const char* str;
+
+ str = (const char*) (pDexFile->baseAddr + offset);
+ if (strcmp(str, descriptor) == 0) {
+ return (const DexClassDef*)
+ (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
+ }
+ }
+
+ idx = (idx + 1) & mask;
+ }
+}
+
+
+/*
+ * Compute the DEX file checksum for a memory-mapped DEX file.
+ */
+u4 dexComputeChecksum(const DexHeader* pHeader)
+{
+ const u1* start = (const u1*) pHeader;
+
+ uLong adler = adler32(0L, Z_NULL, 0);
+ const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
+
+ return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
+}
+
+/*
+ * Compute the size, in bytes, of a DexCode.
+ */
+size_t dexGetDexCodeSize(const DexCode* pCode)
+{
+ /*
+ * The catch handler data is the last entry. It has a variable number
+ * of variable-size pieces, so we need to create an iterator.
+ */
+ u4 handlersSize;
+ u4 offset;
+ u4 ui;
+
+ if (pCode->triesSize != 0) {
+ handlersSize = dexGetHandlersSize(pCode);
+ offset = dexGetFirstHandlerOffset(pCode);
+ } else {
+ handlersSize = 0;
+ offset = 0;
+ }
+
+ for (ui = 0; ui < handlersSize; ui++) {
+ DexCatchIterator iterator;
+ dexCatchIteratorInit(&iterator, pCode, offset);
+ offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
+ }
+
+ const u1* handlerData = dexGetCatchHandlerData(pCode);
+
+ //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
+ // pCode, handlerData, offset);
+
+ /* return the size of the catch handler + everything before it */
+ return (handlerData - (u1*) pCode) + offset;
+}
+
+
+/*
+ * ===========================================================================
+ * Debug info
+ * ===========================================================================
+ */
+
+/*
+ * Decode the arguments in a method signature, which looks something
+ * like "(ID[Ljava/lang/String;)V".
+ *
+ * Returns the type signature letter for the next argument, or ')' if
+ * there are no more args. Advances "pSig" to point to the character
+ * after the one returned.
+ */
+static char decodeSignature(const char** pSig)
+{
+ const char* sig = *pSig;
+
+ if (*sig == '(')
+ sig++;
+
+ if (*sig == 'L') {
+ /* object ref */
+ while (*++sig != ';')
+ ;
+ *pSig = sig+1;
+ return 'L';
+ }
+ if (*sig == '[') {
+ /* array; advance past array type */
+ while (*++sig == '[')
+ ;
+ if (*sig == 'L') {
+ while (*++sig != ';')
+ ;
+ }
+ *pSig = sig+1;
+ return '[';
+ }
+ if (*sig == '\0')
+ return *sig; /* don't advance further */
+
+ *pSig = sig+1;
+ return *sig;
+}
+
+/*
+ * returns the length of a type string, given the start of the
+ * type string. Used for the case where the debug info format
+ * references types that are inside a method type signature.
+ */
+static int typeLength (const char *type) {
+ // Assumes any leading '(' has already been gobbled
+ const char *end = type;
+ decodeSignature(&end);
+ return end - type;
+}
+
+/*
+ * Reads a string index as encoded for the debug info format,
+ * returning a string pointer or NULL as appropriate.
+ */
+static const char* readStringIdx(const DexFile* pDexFile,
+ const u1** pStream) {
+ u4 stringIdx = readUnsignedLeb128(pStream);
+
+ // Remember, encoded string indicies have 1 added to them.
+ if (stringIdx == 0) {
+ return NULL;
+ } else {
+ return dexStringById(pDexFile, stringIdx - 1);
+ }
+}
+
+/*
+ * Reads a type index as encoded for the debug info format, returning
+ * a string pointer for its descriptor or NULL as appropriate.
+ */
+static const char* readTypeIdx(const DexFile* pDexFile,
+ const u1** pStream) {
+ u4 typeIdx = readUnsignedLeb128(pStream);
+
+ // Remember, encoded type indicies have 1 added to them.
+ if (typeIdx == 0) {
+ return NULL;
+ } else {
+ return dexStringByTypeIdx(pDexFile, typeIdx - 1);
+ }
+}
+
+/* access_flag value indicating that a method is static */
+#define ACC_STATIC 0x0008
+
+typedef struct LocalInfo {
+ const char *name;
+ const char *descriptor;
+ const char *signature;
+ u2 startAddress;
+ bool live;
+} LocalInfo;
+
+static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
+ LocalInfo *localInReg, DexDebugNewLocalCb localCb)
+{
+ if (localCb != NULL && localInReg[reg].live) {
+ localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
+ localInReg[reg].name,
+ localInReg[reg].descriptor,
+ localInReg[reg].signature == NULL
+ ? "" : localInReg[reg].signature );
+ }
+}
+
+// TODO optimize localCb == NULL case
+void dexDecodeDebugInfo(
+ const DexFile* pDexFile,
+ const DexCode* pCode,
+ const char* classDescriptor,
+ u4 protoIdx,
+ u4 accessFlags,
+ DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
+ void* cnxt)
+{
+ const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
+ u4 line;
+ u4 parametersSize;
+ u4 address = 0;
+ LocalInfo localInReg[pCode->registersSize];
+ u4 insnsSize = pCode->insnsSize;
+ DexProto proto = { pDexFile, protoIdx };
+
+ memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
+
+ if (stream == NULL) {
+ goto end;
+ }
+
+ line = readUnsignedLeb128(&stream);
+ parametersSize = readUnsignedLeb128(&stream);
+
+ u2 argReg = pCode->registersSize - pCode->insSize;
+
+ if ((accessFlags & ACC_STATIC) == 0) {
+ /*
+ * The code is an instance method, which means that there is
+ * an initial this parameter. Also, the proto list should
+ * contain exactly one fewer argument word than the insSize
+ * indicates.
+ */
+ assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
+ localInReg[argReg].name = "this";
+ localInReg[argReg].descriptor = classDescriptor;
+ localInReg[argReg].startAddress = 0;
+ localInReg[argReg].live = true;
+ argReg++;
+ } else {
+ assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
+ }
+
+ DexParameterIterator iterator;
+ dexParameterIteratorInit(&iterator, &proto);
+
+ while (parametersSize-- != 0) {
+ const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
+ const char *name;
+ int reg;
+
+ if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
+ goto invalid_stream;
+ }
+
+ name = readStringIdx(pDexFile, &stream);
+ reg = argReg;
+
+ switch (descriptor[0]) {
+ case 'D':
+ case 'J':
+ argReg += 2;
+ break;
+ default:
+ argReg += 1;
+ break;
+ }
+
+ if (name != NULL) {
+ localInReg[reg].name = name;
+ localInReg[reg].descriptor = descriptor;
+ localInReg[reg].signature = NULL;
+ localInReg[reg].startAddress = address;
+ localInReg[reg].live = true;
+ }
+ }
+
+ for (;;) {
+ u1 opcode = *stream++;
+ u2 reg;
+
+ switch (opcode) {
+ case DBG_END_SEQUENCE:
+ goto end;
+
+ case DBG_ADVANCE_PC:
+ address += readUnsignedLeb128(&stream);
+ break;
+
+ case DBG_ADVANCE_LINE:
+ line += readSignedLeb128(&stream);
+ break;
+
+ case DBG_START_LOCAL:
+ case DBG_START_LOCAL_EXTENDED:
+ reg = readUnsignedLeb128(&stream);
+ if (reg > pCode->registersSize) goto invalid_stream;
+
+ // Emit what was previously there, if anything
+ emitLocalCbIfLive (cnxt, reg, address,
+ localInReg, localCb);
+
+ localInReg[reg].name = readStringIdx(pDexFile, &stream);
+ localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
+ if (opcode == DBG_START_LOCAL_EXTENDED) {
+ localInReg[reg].signature
+ = readStringIdx(pDexFile, &stream);
+ } else {
+ localInReg[reg].signature = NULL;
+ }
+ localInReg[reg].startAddress = address;
+ localInReg[reg].live = true;
+ break;
+
+ case DBG_END_LOCAL:
+ reg = readUnsignedLeb128(&stream);
+ if (reg > pCode->registersSize) goto invalid_stream;
+
+ emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
+ localInReg[reg].live = false;
+ break;
+
+ case DBG_RESTART_LOCAL:
+ reg = readUnsignedLeb128(&stream);
+ if (reg > pCode->registersSize) goto invalid_stream;
+
+ if (localInReg[reg].name == NULL
+ || localInReg[reg].descriptor == NULL) {
+ goto invalid_stream;
+ }
+
+ /*
+ * If the register is live, the "restart" is superfluous,
+ * and we don't want to mess with the existing start address.
+ */
+ if (!localInReg[reg].live) {
+ localInReg[reg].startAddress = address;
+ localInReg[reg].live = true;
+ }
+ break;
+
+ case DBG_SET_PROLOGUE_END:
+ case DBG_SET_EPILOGUE_BEGIN:
+ case DBG_SET_FILE:
+ break;
+
+ default: {
+ int adjopcode = opcode - DBG_FIRST_SPECIAL;
+
+ address += adjopcode / DBG_LINE_RANGE;
+ line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
+
+ if (posCb != NULL) {
+ int done;
+ done = posCb(cnxt, address, line);
+
+ if (done) {
+ // early exit
+ goto end;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+end:
+ {
+ int reg;
+ for (reg = 0; reg < pCode->registersSize; reg++) {
+ emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
+ }
+ }
+ return;
+
+invalid_stream:
+ IF_LOGE() {
+ char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
+ LOGE("Invalid debug info stream. class %s; proto %s",
+ classDescriptor, methodDescriptor);
+ free(methodDescriptor);
+ }
+}