aboutsummaryrefslogtreecommitdiff
path: root/tools/strip_asm.py
blob: d131dc719414c45e10577570de5d3b41656a8fc3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3

"""
strip_asm.py - Cleanup ASM output for the specified file
"""

from argparse import ArgumentParser
import sys
import os
import re

def find_used_labels(asm):
    found = set()
    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
    for l in asm.splitlines():
        m = label_re.match(l)
        if m:
            found.add('.L%s' % m.group(1))
    return found


def normalize_labels(asm):
    decls = set()
    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
    for l in asm.splitlines():
        m = label_decl.match(l)
        if m:
            decls.add(m.group(0))
    if len(decls) == 0:
        return asm
    needs_dot = next(iter(decls))[0] != '.'
    if not needs_dot:
        return asm
    for ld in decls:
        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
    return asm


def transform_labels(asm):
    asm = normalize_labels(asm)
    used_decls = find_used_labels(asm)
    new_asm = ''
    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
    for l in asm.splitlines():
        m = label_decl.match(l)
        if not m or m.group(0) in used_decls:
            new_asm += l
            new_asm += '\n'
    return new_asm


def is_identifier(tk):
    if len(tk) == 0:
        return False
    first = tk[0]
    if not first.isalpha() and first != '_':
        return False
    for i in range(1, len(tk)):
        c = tk[i]
        if not c.isalnum() and c != '_':
            return False
    return True

def process_identifiers(l):
    """
    process_identifiers - process all identifiers and modify them to have
    consistent names across all platforms; specifically across ELF and MachO.
    For example, MachO inserts an additional understore at the beginning of
    names. This function removes that.
    """
    parts = re.split(r'([a-zA-Z0-9_]+)', l)
    new_line = ''
    for tk in parts:
        if is_identifier(tk):
            if tk.startswith('__Z'):
                tk = tk[1:]
            elif tk.startswith('_') and len(tk) > 1 and \
                    tk[1].isalpha() and tk[1] != 'Z':
                tk = tk[1:]
        new_line += tk
    return new_line


def process_asm(asm):
    """
    Strip the ASM of unwanted directives and lines
    """
    new_contents = ''
    asm = transform_labels(asm)

    # TODO: Add more things we want to remove
    discard_regexes = [
        re.compile("\s+\..*$"), # directive
        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
        re.compile("\s*#.*$"), # comment line
        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
    ]
    keep_regexes = [

    ]
    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
    for l in asm.splitlines():
        # Remove Mach-O attribute
        l = l.replace('@GOTPCREL', '')
        add_line = True
        for reg in discard_regexes:
            if reg.match(l) is not None:
                add_line = False
                break
        for reg in keep_regexes:
            if reg.match(l) is not None:
                add_line = True
                break
        if add_line:
            if fn_label_def.match(l) and len(new_contents) != 0:
                new_contents += '\n'
            l = process_identifiers(l)
            new_contents += l
            new_contents += '\n'
    return new_contents

def main():
    parser = ArgumentParser(
        description='generate a stripped assembly file')
    parser.add_argument(
        'input', metavar='input', type=str, nargs=1,
        help='An input assembly file')
    parser.add_argument(
        'out', metavar='output', type=str, nargs=1,
        help='The output file')
    args, unknown_args = parser.parse_known_args()
    input = args.input[0]
    output = args.out[0]
    if not os.path.isfile(input):
        print(("ERROR: input file '%s' does not exist") % input)
        sys.exit(1)
    contents = None
    with open(input, 'r') as f:
        contents = f.read()
    new_contents = process_asm(contents)
    with open(output, 'w') as f:
        f.write(new_contents)


if __name__ == '__main__':
    main()

# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
# kate: indent-mode python; remove-trailing-spaces modified;