summaryrefslogtreecommitdiff
path: root/dx/src/com/android/dx/rop/cst/CstUtf8.java
diff options
context:
space:
mode:
Diffstat (limited to 'dx/src/com/android/dx/rop/cst/CstUtf8.java')
-rw-r--r--dx/src/com/android/dx/rop/cst/CstUtf8.java371
1 files changed, 371 insertions, 0 deletions
diff --git a/dx/src/com/android/dx/rop/cst/CstUtf8.java b/dx/src/com/android/dx/rop/cst/CstUtf8.java
new file mode 100644
index 0000000..5cfc1f3
--- /dev/null
+++ b/dx/src/com/android/dx/rop/cst/CstUtf8.java
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2007 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.dx.rop.cst;
+
+import com.android.dx.util.ByteArray;
+import com.android.dx.util.Hex;
+
+/**
+ * Constants of type {@code CONSTANT_Utf8_info}.
+ */
+public final class CstUtf8 extends Constant {
+ /**
+ * {@code non-null;} instance representing {@code ""}, that is, the
+ * empty string
+ */
+ public static final CstUtf8 EMPTY_STRING = new CstUtf8("");
+
+ /** {@code non-null;} the UTF-8 value as a string */
+ private final String string;
+
+ /** {@code non-null;} the UTF-8 value as bytes */
+ private final ByteArray bytes;
+
+ /**
+ * Converts a string into its Java-style UTF-8 form. Java-style UTF-8
+ * differs from normal UTF-8 in the handling of character '\0' and
+ * surrogate pairs.
+ *
+ * @param string {@code non-null;} the string to convert
+ * @return {@code non-null;} the UTF-8 bytes for it
+ */
+ public static byte[] stringToUtf8Bytes(String string) {
+ int len = string.length();
+ byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
+ int outAt = 0;
+
+ for (int i = 0; i < len; i++) {
+ char c = string.charAt(i);
+ if ((c != 0) && (c < 0x80)) {
+ bytes[outAt] = (byte) c;
+ outAt++;
+ } else if (c < 0x800) {
+ bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0);
+ bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80);
+ outAt += 2;
+ } else {
+ bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0);
+ bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80);
+ bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80);
+ outAt += 3;
+ }
+ }
+
+ byte[] result = new byte[outAt];
+ System.arraycopy(bytes, 0, result, 0, outAt);
+ return result;
+ }
+
+ /**
+ * Converts an array of UTF-8 bytes into a string.
+ *
+ * @param bytes {@code non-null;} the bytes to convert
+ * @return {@code non-null;} the converted string
+ */
+ public static String utf8BytesToString(ByteArray bytes) {
+ int length = bytes.size();
+ char[] chars = new char[length]; // This is sized to avoid a realloc.
+ int outAt = 0;
+
+ for (int at = 0; length > 0; /*at*/) {
+ int v0 = bytes.getUnsignedByte(at);
+ char out;
+ switch (v0 >> 4) {
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ case 0x04: case 0x05: case 0x06: case 0x07: {
+ // 0XXXXXXX -- single-byte encoding
+ length--;
+ if (v0 == 0) {
+ // A single zero byte is illegal.
+ return throwBadUtf8(v0, at);
+ }
+ out = (char) v0;
+ at++;
+ break;
+ }
+ case 0x0c: case 0x0d: {
+ // 110XXXXX -- two-byte encoding
+ length -= 2;
+ if (length < 0) {
+ return throwBadUtf8(v0, at);
+ }
+ int v1 = bytes.getUnsignedByte(at + 1);
+ if ((v1 & 0xc0) != 0x80) {
+ return throwBadUtf8(v1, at + 1);
+ }
+ int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
+ if ((value != 0) && (value < 0x80)) {
+ /*
+ * This should have been represented with
+ * one-byte encoding.
+ */
+ return throwBadUtf8(v1, at + 1);
+ }
+ out = (char) value;
+ at += 2;
+ break;
+ }
+ case 0x0e: {
+ // 1110XXXX -- three-byte encoding
+ length -= 3;
+ if (length < 0) {
+ return throwBadUtf8(v0, at);
+ }
+ int v1 = bytes.getUnsignedByte(at + 1);
+ if ((v1 & 0xc0) != 0x80) {
+ return throwBadUtf8(v1, at + 1);
+ }
+ int v2 = bytes.getUnsignedByte(at + 2);
+ if ((v1 & 0xc0) != 0x80) {
+ return throwBadUtf8(v2, at + 2);
+ }
+ int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) |
+ (v2 & 0x3f);
+ if (value < 0x800) {
+ /*
+ * This should have been represented with one- or
+ * two-byte encoding.
+ */
+ return throwBadUtf8(v2, at + 2);
+ }
+ out = (char) value;
+ at += 3;
+ break;
+ }
+ default: {
+ // 10XXXXXX, 1111XXXX -- illegal
+ return throwBadUtf8(v0, at);
+ }
+ }
+ chars[outAt] = out;
+ outAt++;
+ }
+
+ return new String(chars, 0, outAt);
+ }
+
+ /**
+ * Helper for {@link #utf8BytesToString}, which throws the right
+ * exception for a bogus utf-8 byte.
+ *
+ * @param value the byte value
+ * @param offset the file offset
+ * @return never
+ * @throws IllegalArgumentException always thrown
+ */
+ private static String throwBadUtf8(int value, int offset) {
+ throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
+ " at offset " + Hex.u4(offset));
+ }
+
+ /**
+ * Constructs an instance from a {@code String}.
+ *
+ * @param string {@code non-null;} the UTF-8 value as a string
+ */
+ public CstUtf8(String string) {
+ if (string == null) {
+ throw new NullPointerException("string == null");
+ }
+
+ this.string = string.intern();
+ this.bytes = new ByteArray(stringToUtf8Bytes(string));
+ }
+
+ /**
+ * Constructs an instance from some UTF-8 bytes.
+ *
+ * @param bytes {@code non-null;} array of the UTF-8 bytes
+ */
+ public CstUtf8(ByteArray bytes) {
+ if (bytes == null) {
+ throw new NullPointerException("bytes == null");
+ }
+
+ this.bytes = bytes;
+ this.string = utf8BytesToString(bytes).intern();
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof CstUtf8)) {
+ return false;
+ }
+
+ return string.equals(((CstUtf8) other).string);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ return string.hashCode();
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected int compareTo0(Constant other) {
+ return string.compareTo(((CstUtf8) other).string);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public String toString() {
+ return "utf8{\"" + toHuman() + "\"}";
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public String typeName() {
+ return "utf8";
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean isCategory2() {
+ return false;
+ }
+
+ /** {@inheritDoc} */
+ public String toHuman() {
+ int len = string.length();
+ StringBuilder sb = new StringBuilder(len * 3 / 2);
+
+ for (int i = 0; i < len; i++) {
+ char c = string.charAt(i);
+ if ((c >= ' ') && (c < 0x7f)) {
+ if ((c == '\'') || (c == '\"') || (c == '\\')) {
+ sb.append('\\');
+ }
+ sb.append(c);
+ } else if (c <= 0x7f) {
+ switch (c) {
+ case '\n': sb.append("\\n"); break;
+ case '\r': sb.append("\\r"); break;
+ case '\t': sb.append("\\t"); break;
+ default: {
+ /*
+ * Represent the character as an octal escape.
+ * If the next character is a valid octal
+ * digit, disambiguate by using the
+ * three-digit form.
+ */
+ char nextChar =
+ (i < (len - 1)) ? string.charAt(i + 1) : 0;
+ boolean displayZero =
+ (nextChar >= '0') && (nextChar <= '7');
+ sb.append('\\');
+ for (int shift = 6; shift >= 0; shift -= 3) {
+ char outChar = (char) (((c >> shift) & 7) + '0');
+ if ((outChar != '0') || displayZero) {
+ sb.append(outChar);
+ displayZero = true;
+ }
+ }
+ if (! displayZero) {
+ // Ironic edge case: The original value was 0.
+ sb.append('0');
+ }
+ break;
+ }
+ }
+ } else {
+ sb.append("\\u");
+ sb.append(Character.forDigit(c >> 12, 16));
+ sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
+ sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
+ sb.append(Character.forDigit(c & 0x0f, 16));
+ }
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Gets the value as a human-oriented string, surrounded by double
+ * quotes.
+ *
+ * @return {@code non-null;} the quoted string
+ */
+ public String toQuoted() {
+ return '\"' + toHuman() + '\"';
+ }
+
+ /**
+ * Gets the value as a human-oriented string, surrounded by double
+ * quotes, but ellipsizes the result if it is longer than the given
+ * maximum length
+ *
+ * @param maxLength {@code >= 5;} the maximum length of the string to return
+ * @return {@code non-null;} the quoted string
+ */
+ public String toQuoted(int maxLength) {
+ String string = toHuman();
+ int length = string.length();
+ String ellipses;
+
+ if (length <= (maxLength - 2)) {
+ ellipses = "";
+ } else {
+ string = string.substring(0, maxLength - 5);
+ ellipses = "...";
+ }
+
+ return '\"' + string + ellipses + '\"';
+ }
+
+ /**
+ * Gets the UTF-8 value as a string.
+ * The returned string is always already interned.
+ *
+ * @return {@code non-null;} the UTF-8 value as a string
+ */
+ public String getString() {
+ return string;
+ }
+
+ /**
+ * Gets the UTF-8 value as UTF-8 encoded bytes.
+ *
+ * @return {@code non-null;} an array of the UTF-8 bytes
+ */
+ public ByteArray getBytes() {
+ return bytes;
+ }
+
+ /**
+ * Gets the size of this instance as UTF-8 code points. That is,
+ * get the number of bytes in the UTF-8 encoding of this instance.
+ *
+ * @return {@code >= 0;} the UTF-8 size
+ */
+ public int getUtf8Size() {
+ return bytes.size();
+ }
+
+ /**
+ * Gets the size of this instance as UTF-16 code points. That is,
+ * get the number of 16-bit chars in the UTF-16 encoding of this
+ * instance. This is the same as the {@code length} of the
+ * Java {@code String} representation of this instance.
+ *
+ * @return {@code >= 0;} the UTF-16 size
+ */
+ public int getUtf16Size() {
+ return string.length();
+ }
+}