summaryrefslogtreecommitdiff
path: root/opcode-gen/opcode-gen.awk
diff options
context:
space:
mode:
Diffstat (limited to 'opcode-gen/opcode-gen.awk')
-rw-r--r--opcode-gen/opcode-gen.awk546
1 files changed, 546 insertions, 0 deletions
diff --git a/opcode-gen/opcode-gen.awk b/opcode-gen/opcode-gen.awk
new file mode 100644
index 0000000..0e0ff6c
--- /dev/null
+++ b/opcode-gen/opcode-gen.awk
@@ -0,0 +1,546 @@
+# Copyright (C) 2007 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Awk helper script for opcode-gen.
+#
+
+#
+# Initialization.
+#
+
+BEGIN {
+ MAX_OPCODE = 65535;
+ MAX_PACKED_OPCODE = 511;
+ initIndexTypes();
+ initFlags();
+ if (readBytecodes()) exit 1;
+ deriveOpcodeChains();
+ createPackedTables();
+ consumeUntil = "";
+ emission = "";
+}
+
+#
+# General control (must appear above directive handlers).
+#
+
+# Clear out the preexisting output within a directive section.
+consumeUntil != "" {
+ if (index($0, consumeUntil) != 0) {
+ consumeUntil = "";
+ print;
+ }
+
+ next;
+}
+
+# Detect directives.
+/BEGIN\([a-z-]*\)/ {
+ i = match($0, /BEGIN\([a-z-]*\)/);
+ emission = substr($0, i + 6, RLENGTH - 7);
+ consumeUntil = "END(" emission ")";
+ emissionHandled = 0;
+}
+
+# Most lines just get copied from the source as-is, including the start
+# comment for directives.
+{
+ print;
+}
+
+#
+# Handlers for all of the directives.
+#
+
+emission == "opcodes" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i) || isOptimized(i)) continue;
+ printf(" public static final int %s = 0x%s;\n",
+ constName[i], hex[i]);
+ }
+}
+
+emission == "first-opcodes" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i) || isOptimized(i)) continue;
+ if (isFirst[i] == "true") {
+ printf(" // Opcodes.%s\n", constName[i]);
+ }
+ }
+}
+
+emission == "dops" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i) || isOptimized(i)) continue;
+
+ nextOp = nextOpcode[i];
+ nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp];
+
+ printf(" public static final Dop %s =\n" \
+ " new Dop(Opcodes.%s, Opcodes.%s,\n" \
+ " Opcodes.%s, Form%s.THE_ONE, %s);\n\n",
+ constName[i], constName[i], family[i], nextOp, format[i],
+ hasResult[i]);
+ }
+}
+
+emission == "opcode-info-defs" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i) || isOptimized(i)) continue;
+
+ itype = toupper(indexType[i]);
+ gsub(/-/, "_", itype);
+
+ printf(" public static final Info %s =\n" \
+ " new Info(Opcodes.%s, \"%s\",\n" \
+ " InstructionCodec.FORMAT_%s, IndexType.%s);\n\n", \
+ constName[i], constName[i], name[i], toupper(format[i]), itype);
+ }
+}
+
+emission == "dops-init" || emission == "opcode-info-init" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i) || isOptimized(i)) continue;
+ printf(" set(%s);\n", constName[i]);
+ }
+}
+
+emission == "libcore-opcodes" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i) || isOptimized(i)) continue;
+ printf(" int OP_%-28s = 0x%04x;\n", constName[i], i);
+ }
+}
+
+emission == "libcore-maximum-values" {
+ emissionHandled = 1;
+
+ printf(" MAXIMUM_VALUE = %d;\n", MAX_OPCODE);
+ printf(" MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE);
+}
+
+emission == "libdex-maximum-values" {
+ emissionHandled = 1;
+
+ printf("#define kMaxOpcodeValue 0x%x\n", MAX_OPCODE);
+ printf("#define kNumPackedOpcodes 0x%x\n", MAX_PACKED_OPCODE + 1);
+}
+
+emission == "libdex-opcode-enum" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ printf(" OP_%-28s = 0x%02x,\n", packedConstName[i], i);
+ }
+}
+
+emission == "libdex-goto-table" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ content = sprintf(" H(OP_%s),", packedConstName[i]);
+ printf("%-78s\\\n", content);
+ }
+}
+
+emission == "libdex-opcode-names" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ printf(" \"%s\",\n", packedName[i]);
+ }
+}
+
+emission == "libdex-widths" {
+ emissionHandled = 1;
+
+ col = 1;
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ value = sprintf("%d,", packedWidth[i]);
+ col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, " ");
+ }
+}
+
+emission == "libdex-flags" {
+ emissionHandled = 1;
+
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ value = flagsToC(packedFlags[i]);
+ printf(" %s,\n", value);
+ }
+}
+
+emission == "libdex-formats" {
+ emissionHandled = 1;
+
+ col = 1;
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ value = sprintf("kFmt%s,", packedFormat[i]);
+ col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, " ");
+ }
+}
+
+emission == "libdex-index-types" {
+ emissionHandled = 1;
+
+ col = 1;
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ value = sprintf("%s,", indexTypeValues[packedIndexType[i]]);
+ col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, " ");
+ }
+}
+
+# Handle the end of directive processing (must appear after the directive
+# clauses).
+emission != "" {
+ if (!emissionHandled) {
+ printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr";
+ consumeUntil = "";
+ }
+
+ emission = "";
+}
+
+#
+# Helper functions.
+#
+
+# Helper to print out an element in a multi-column fashion. It returns
+# the (one-based) column number that the next element will be printed
+# in.
+function colPrint(value, isLast, col, numCols, colWidth, linePrefix) {
+ isLast = (isLast || (col == numCols));
+ printf("%s%-*s%s",
+ (col == 1) ? linePrefix : " ",
+ isLast ? 1 : colWidth, value,
+ isLast ? "\n" : "");
+
+ return (col % numCols) + 1;
+}
+
+# Read the bytecode description file.
+function readBytecodes(i, parts, line, cmd, status, count) {
+ # locals: parts, line, cmd, status, count
+ for (;;) {
+ # Read a line.
+ status = getline line <bytecodeFile;
+ if (status == 0) break;
+ if (status < 0) {
+ print "trouble reading bytecode file";
+ exit 1;
+ }
+
+ # Clean up the line and extract the command.
+ gsub(/ */, " ", line);
+ sub(/ *#.*$/, "", line);
+ sub(/ $/, "", line);
+ sub(/^ /, "", line);
+ count = split(line, parts);
+ if (count == 0) continue; # Blank or comment line.
+ cmd = parts[1];
+ sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
+
+ if (cmd == "op") {
+ status = defineOpcode(line);
+ } else if (cmd == "format") {
+ status = defineFormat(line);
+ } else {
+ status = -1;
+ }
+
+ if (status != 0) {
+ printf("syntax error on line: %s\n", line) >"/dev/stderr";
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+# Define an opcode.
+function defineOpcode(line, count, parts, idx) {
+ # locals: count, parts, idx
+ count = split(line, parts);
+ if (count != 6) return -1;
+ idx = parseHex(parts[1]);
+ if (idx < 0) return -1;
+
+ # Extract directly specified values from the line.
+ hex[idx] = parts[1];
+ name[idx] = parts[2];
+ format[idx] = parts[3];
+ hasResult[idx] = (parts[4] == "n") ? "false" : "true";
+ indexType[idx] = parts[5];
+ flags[idx] = parts[6];
+
+ # Calculate derived values.
+
+ constName[idx] = toupper(name[idx]);
+ gsub("[/-]", "_", constName[idx]); # Dash and slash become underscore.
+ gsub("[+^]", "", constName[idx]); # Plus and caret are removed.
+ split(name[idx], parts, "/");
+
+ family[idx] = toupper(parts[1]);
+ gsub("-", "_", family[idx]); # Dash becomes underscore.
+ gsub("[+^]", "", family[idx]); # Plus and caret are removed.
+
+ split(format[idx], parts, ""); # Width is the first format char.
+ width[idx] = parts[1];
+
+ # This association is used when computing "next" opcodes.
+ familyFormat[family[idx],format[idx]] = idx;
+
+ # Verify values.
+
+ if (nextFormat[format[idx]] == "") {
+ printf("unknown format: %s\n", format[idx]) >"/dev/stderr";
+ return 1;
+ }
+
+ if (indexTypeValues[indexType[idx]] == "") {
+ printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr";
+ return 1;
+ }
+
+ if (flagsToC(flags[idx]) == "") {
+ printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr";
+ return 1;
+ }
+
+ return 0;
+}
+
+# Define a format family.
+function defineFormat(line, count, parts, i) {
+ # locals: count, parts, i
+ count = split(line, parts);
+ if (count < 1) return -1;
+ formats[parts[1]] = line;
+
+ parts[count + 1] = "none";
+ for (i = 1; i <= count; i++) {
+ nextFormat[parts[i]] = parts[i + 1];
+ }
+
+ return 0;
+}
+
+# Produce the nextOpcode and isFirst arrays. The former indicates, for
+# each opcode, which one should be tried next when doing instruction
+# fitting. The latter indicates which opcodes are at the head of an
+# instruction fitting chain.
+function deriveOpcodeChains(i, op) {
+ # locals: i, op
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i)) continue;
+ isFirst[i] = "true";
+ }
+
+ for (i = 0; i <= MAX_OPCODE; i++) {
+ if (isUnused(i)) continue;
+ op = findNextOpcode(i);
+ nextOpcode[i] = op;
+ if (op != -1) {
+ isFirst[op] = "false";
+ }
+ }
+}
+
+# Given an opcode by index, find the next opcode in the same family
+# (that is, with the same base name) to try when matching instructions
+# to opcodes. This simply walks the nextFormat chain looking for a
+# match. This returns the index of the matching opcode or -1 if there
+# is none.
+function findNextOpcode(idx, fam, fmt, result) {
+ # locals: fam, fmt, result
+ fam = family[idx];
+ fmt = format[idx];
+
+ # Not every opcode has a version with every possible format, so
+ # we have to iterate down the chain until we find one or run out of
+ # formats to try.
+ for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
+ result = familyFormat[fam,fmt];
+ if (result != "") {
+ return result;
+ }
+ }
+
+ return -1;
+}
+
+# Construct the tables of info indexed by packed opcode. The packed opcode
+# values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely
+# span the range 0-0xffff.
+function createPackedTables(i, op) {
+ # locals: i, op
+ for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
+ op = unpackOpcode(i);
+ if (i == 255) {
+ # Special case: This is the low-opcode slot for a would-be
+ # extended opcode dispatch implementation.
+ packedName[i] = "dispatch-ff";
+ packedConstName[i] = "DISPATCH_FF";
+ packedFormat[i] = "00x";
+ packedFlags[i] = 0;
+ packedWidth[i] = 0;
+ packedIndexType[i] = "unknown";
+ } else if (isUnused(op)) {
+ packedName[i] = unusedName(op);
+ packedConstName[i] = unusedConstName(op);
+ packedFormat[i] = "00x";
+ packedFlags[i] = 0;
+ packedWidth[i] = 0;
+ packedIndexType[i] = "unknown";
+ } else {
+ packedName[i] = name[op];
+ packedConstName[i] = constName[op];
+ packedFormat[i] = format[op];
+ packedFlags[i] = flags[op];
+ packedWidth[i] = width[op];
+ packedIndexType[i] = indexType[op];
+ }
+ }
+}
+
+# Given a packed opcode, returns the raw (unpacked) opcode value.
+function unpackOpcode(idx) {
+ # Note: This must be the inverse of the corresponding code in
+ # libdex/DexOpcodes.h.
+ if (idx <= 255) {
+ return idx;
+ } else {
+ idx -= 256;
+ return (idx * 256) + 255;
+ }
+}
+
+# Returns the "unused" name of the given opcode (by index).
+# That is, this is the human-oriented name to use for an opcode
+# definition in cases
+# where the opcode isn't used.
+function unusedName(idx) {
+ if (idx <= 255) {
+ return sprintf("unused-%02x", idx);
+ } else {
+ return sprintf("unused-%04x", idx);
+ }
+}
+
+# Returns the "unused" constant name of the given opcode (by index).
+# That is, this is the name to use for a constant definition in cases
+# where the opcode isn't used.
+function unusedConstName(idx) {
+ if (idx <= 255) {
+ return toupper(sprintf("UNUSED_%02x", idx));
+ } else {
+ return toupper(sprintf("UNUSED_%04x", idx));
+ }
+}
+
+# Convert a hex value to an int.
+function parseHex(hex, result, chars, count, c, i) {
+ # locals: result, chars, count, c, i
+ hex = tolower(hex);
+ count = split(hex, chars, "");
+ result = 0;
+ for (i = 1; i <= count; i++) {
+ c = index("0123456789abcdef", chars[i]);
+ if (c == 0) {
+ printf("bogus hex value: %s\n", hex) >"/dev/stderr";
+ return -1;
+ }
+ result = (result * 16) + c - 1;
+ }
+ return result;
+}
+
+# Initialize the indexTypes data.
+function initIndexTypes() {
+ indexTypeValues["unknown"] = "kIndexUnknown";
+ indexTypeValues["none"] = "kIndexNone";
+ indexTypeValues["varies"] = "kIndexVaries";
+ indexTypeValues["type-ref"] = "kIndexTypeRef";
+ indexTypeValues["string-ref"] = "kIndexStringRef";
+ indexTypeValues["method-ref"] = "kIndexMethodRef";
+ indexTypeValues["field-ref"] = "kIndexFieldRef";
+ indexTypeValues["inline-method"] = "kIndexInlineMethod";
+ indexTypeValues["vtable-offset"] = "kIndexVtableOffset";
+ indexTypeValues["field-offset"] = "kIndexFieldOffset";
+}
+
+# Initialize the flags data.
+function initFlags() {
+ flagValues["branch"] = "kInstrCanBranch";
+ flagValues["continue"] = "kInstrCanContinue";
+ flagValues["switch"] = "kInstrCanSwitch";
+ flagValues["throw"] = "kInstrCanThrow";
+ flagValues["return"] = "kInstrCanReturn";
+ flagValues["invoke"] = "kInstrInvoke";
+ flagValues["optimized"] = "0"; # Not represented in C output
+ flagValues["0"] = "0";
+}
+
+# Translate the given flags into the equivalent C expression. Returns
+# "" on error.
+function flagsToC(f, parts, result, i) {
+ # locals: parts, result, i
+ count = split(f, parts, /\|/); # Split input at pipe characters.
+ result = "0";
+
+ for (i = 1; i <= count; i++) {
+ f = flagValues[parts[i]];
+ if (f == "") {
+ printf("bogus flag: %s\n", f) >"/dev/stderr";
+ return ""; # Bogus flag name.
+ } else if (f == "0") {
+ # Nothing to append for this case.
+ } else if (result == "0") {
+ result = f;
+ } else {
+ result = result "|" f;
+ }
+ }
+
+ return result;
+}
+
+# Returns true if the given opcode (by index) is an "optimized" opcode.
+function isOptimized(idx, parts, f) {
+ # locals: parts, f
+ split(flags[idx], parts, /\|/); # Split flags[idx] at pipes.
+ for (f in parts) {
+ if (parts[f] == "optimized") return 1;
+ }
+ return 0;
+}
+
+# Returns true if there is no definition for the given opcode (by index).
+function isUnused(idx) {
+ return (name[idx] == "");
+}