summaryrefslogtreecommitdiff
path: root/src/util/fipstools/delocate/delocate.peg
blob: 9ba357a209dff0e0d32d4e0388f29f24f116e4d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Copyright (c) 2017, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */

# This is a rough parser for x86-64 and aarch64 assembly designed to work with
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
# rebuilding delocate.peg.go from this file.

# To regenerate delocate.peg.go:
#
# go install github.com/pointlander/peg@latest
# ~/go/bin/peg <path-to-this-file>
#
# this will generate delocate.peg.go next to delocate.peg.

package main

type Asm Peg {}

AsmFile <- Statement* !.
Statement <- WS? (Label / ((GlobalDirective /
                            LocationDirective /
                            LabelContainingDirective /
                            Instruction /
                            Directive /
                            Comment / ) WS? ((Comment? '\n') / ';')))
GlobalDirective <- (".global" / ".globl") WS SymbolName
Directive <- '.' DirectiveName (WS Args)?
DirectiveName <- [[A-Z0-9_]]+
LocationDirective <- FileDirective / LocDirective
FileDirective <- ".file" WS [^#\n]+
LocDirective <- ".loc" WS [^#/\n]+
Args <- Arg ((WS? ',' WS?) Arg)*
Arg <- QuotedArg / [[0-9a-z%+\-*_@.]]*
QuotedArg <- '"' QuotedText '"'
QuotedText <- (EscapedChar / [^"])*
LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs
LabelContainingDirectiveName <- ".xword" / ".word" / ".long" / ".set" / ".byte" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type" / ".uleb128" / ".sleb128"
SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)*

SymbolArg <- SymbolExpr
SymbolExpr <- SymbolAtom (WS? SymbolOperator WS? SymbolExpr)?
SymbolAtom <- Offset / SymbolType / LocalSymbol TCMarker? / SymbolName Offset / SymbolName TCMarker? / Dot / OpenParen WS? SymbolExpr WS? CloseParen
SymbolOperator <- '+' / '-' / '|' / '<<' / '>>'
OpenParen <- '('
CloseParen <- ')'

SymbolType <- [@%] ('function' / 'object')
Dot <- '.'
TCMarker <- '[TC]'
EscapedChar <- '\\' .
WS <- [ \t]+
Comment <- ("//" / '#') [^\n]*
Label <- (LocalSymbol / LocalLabel / SymbolName) ':'
SymbolName <- [[A-Z._]][[A-Z.0-9$_]]*
LocalSymbol <- '.L' [[A-Za-z.0-9$_]]+
LocalLabel <- [0-9][0-9$]*
LocalLabelRef <- [0-9][0-9$]*[bf]
Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)?
InstructionName <- [[A-Z]][[A-Z.0-9]]* [.+\-]?
InstructionArg <- IndirectionIndicator? (ARMConstantTweak / RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / GOTLocation / GOTSymbolOffset / MemoryRef) AVX512Token*
GOTLocation <- '$_GLOBAL_OFFSET_TABLE_-' LocalSymbol
GOTSymbolOffset <- ('$' SymbolName '@GOT' 'OFF'?) / (":got:" SymbolName)
AVX512Token <- WS? '{' '%'? [0-9a-z]* '}'
TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha"
TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l"
IndirectionIndicator <- '*'
RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) /
                       ('$'? ((Offset Offset) / Offset)) /
                       ('#' Offset ('*' [0-9]+ ('-' [0-9] [0-9]*)?)? ) /
                       ('#' '~'? '(' [0-9] WS? "<<" WS? [0-9] ')' ) /
                       ARMRegister)
                      ![fb:(+\-]
ARMConstantTweak <- (([us] "xt" [xwhb]) / "lsl" / "lsr" / "ror" / "asr") (WS '#' Offset)?
ARMRegister <- "sp" / ([xwdqshb] [0-9] [0-9]?) / "xzr" / "wzr" / "NZCV" / ARMVectorRegister / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] [0-9]? ']')? )
ARMVectorRegister <- "v" [0-9] [0-9]? ('.' [0-9]* [bsdhq] ('[' [0-9] [0-9]? ']')? )?
# Compilers only output a very limited number of expression forms. Rather than
# implement a full expression parser, this enumerate those forms plus a few
# that appear in our hand-written assembly.
MemoryRef <- (SymbolRef BaseIndexScale /
              SymbolRef /
              Low12BitsSymbolRef /
              Offset* BaseIndexScale /
              SegmentRegister Offset BaseIndexScale /
              SegmentRegister BaseIndexScale /
              SegmentRegister Offset /
              ARMBaseIndexScale /
              BaseIndexScale)
SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?
Low12BitsSymbolRef <- ":lo12:" (LocalSymbol / SymbolName) Offset?
ARMBaseIndexScale <- '[' ARMRegister (',' WS? (('#' Offset (('*' [0-9]+) / ('*' '(' [0-9]+ Operator [0-9]+ ')') / (('+' [0-9]+)*))? ) / ARMGOTLow12 / Low12BitsSymbolRef / ARMRegister) (',' WS? ARMConstantTweak)?)? ']' ARMPostincrement?
ARMGOTLow12 <- ":got_lo12:" SymbolName
ARMPostincrement <- '!'
BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')'
Operator <- [+\-]
Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+)
Section <- [[A-Z@]]+
SegmentRegister <- '%' [c-gs] 's:'