diff options
author | Josh Humphries <jhumphries@buf.build> | 2023-03-02 19:40:35 -0500 |
---|---|---|
committer | Lasse Folger <lassefolger@google.com> | 2023-03-06 07:33:04 +0000 |
commit | fcf5f6cb72978812ac36085183b5c52701c069cb (patch) | |
tree | 908ff659a4b359b3865b91567d5f3f5b33a03aab | |
parent | bc1253ad37431ee26876db47cd8207cdec81993c (diff) | |
download | golang-protobuf-fcf5f6cb72978812ac36085183b5c52701c069cb.tar.gz |
encoding/prototext: allow whitespace and comments between minus sign and number in negative numeric literal
The text format specification[1] indicates that whitespace and comments
may appear after a minus sign and before the subsequent numeric component
in negative number literals. But the Go implementation does not allow
this.
This brings the Go implementation info conformance with this aspect.
Fixes golang/protobuf#1526
[1] https://protobuf.dev/reference/protobuf/textformat-spec/#parsing
Change-Id: I3996c89ee9d37cf2b7502fc6736d6e2ed6dbcf43
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/473015
Reviewed-by: Lasse Folger <lassefolger@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
-rw-r--r-- | encoding/prototext/decode_test.go | 7 | ||||
-rw-r--r-- | internal/encoding/text/decode.go | 5 | ||||
-rw-r--r-- | internal/encoding/text/decode_number.go | 43 | ||||
-rw-r--r-- | internal/encoding/text/decode_test.go | 8 |
4 files changed, 46 insertions, 17 deletions
diff --git a/encoding/prototext/decode_test.go b/encoding/prototext/decode_test.go index eed64dbf..3597d9b6 100644 --- a/encoding/prototext/decode_test.go +++ b/encoding/prototext/decode_test.go @@ -127,9 +127,9 @@ opt_int64: 3735928559 opt_uint32: 0xff opt_uint64: 0xdeadbeef opt_sint32: -1001 -opt_sint64: -0xffff +opt_sint64: - 0xffff opt_fixed64: 64 -opt_sfixed32: -32 +opt_sfixed32: - 32 opt_float: 1.234 opt_double: 1.23e+100 opt_bytes: "\xe8\xb0\xb7\xe6\xad\x8c" @@ -164,7 +164,8 @@ s_int64: 3735928559 s_uint32: 0xff s_uint64: 0xdeadbeef s_sint32: -1001 -s_sint64: -0xffff +s_sint64: - # + 0xffff s_fixed64: 64 s_sfixed32: -32 s_float: 1.234 diff --git a/internal/encoding/text/decode.go b/internal/encoding/text/decode.go index 427c62d0..87853e78 100644 --- a/internal/encoding/text/decode.go +++ b/internal/encoding/text/decode.go @@ -412,12 +412,13 @@ func (d *Decoder) parseFieldName() (tok Token, err error) { // Field number. Identify if input is a valid number that is not negative // and is decimal integer within 32-bit range. if num := parseNumber(d.in); num.size > 0 { + str := num.string(d.in) if !num.neg && num.kind == numDec { - if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil { + if _, err := strconv.ParseInt(str, 10, 32); err == nil { return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil } } - return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size]) + return Token{}, d.newSyntaxError("invalid field number: %s", str) } return Token{}, d.newSyntaxError("invalid field name: %s", errId(d.in)) diff --git a/internal/encoding/text/decode_number.go b/internal/encoding/text/decode_number.go index 81a5d8c8..3dc8e978 100644 --- a/internal/encoding/text/decode_number.go +++ b/internal/encoding/text/decode_number.go @@ -15,17 +15,12 @@ func (d *Decoder) parseNumberValue() (Token, bool) { if num.neg { numAttrs |= isNegative } - strSize := num.size - last := num.size - 1 - if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') { - strSize = last - } tok := Token{ kind: Scalar, attrs: numberValue, pos: len(d.orig) - len(d.in), raw: d.in[:num.size], - str: string(d.in[:strSize]), + str: num.string(d.in), numAttrs: numAttrs, } d.consume(num.size) @@ -46,6 +41,27 @@ type number struct { kind uint8 neg bool size int + // if neg, this is the length of whitespace and comments between + // the minus sign and the rest fo the number literal + sep int +} + +func (num number) string(data []byte) string { + strSize := num.size + last := num.size - 1 + if num.kind == numFloat && (data[last] == 'f' || data[last] == 'F') { + strSize = last + } + if num.neg && num.sep > 0 { + // strip whitespace/comments between negative sign and the rest + strLen := strSize - num.sep + str := make([]byte, strLen) + str[0] = data[0] + copy(str[1:], data[num.sep+1:strSize]) + return string(str) + } + return string(data[:strSize]) + } // parseNumber constructs a number object from given input. It allows for the @@ -67,6 +83,7 @@ func parseNumber(input []byte) number { } // Optional - + var sep int if s[0] == '-' { neg = true s = s[1:] @@ -74,12 +91,14 @@ func parseNumber(input []byte) number { if len(s) == 0 { return number{} } + // Consume any whitespace or comments between the + // negative sign and the rest of the number + lenBefore := len(s) + s = consume(s, 0) + sep = lenBefore - len(s) + size += sep } - // C++ allows for whitespace and comments in between the negative sign and - // the rest of the number. This logic currently does not but is consistent - // with v1. - switch { case s[0] == '0': if len(s) > 1 { @@ -116,7 +135,7 @@ func parseNumber(input []byte) number { if len(s) > 0 && !isDelim(s[0]) { return number{} } - return number{kind: kind, neg: neg, size: size} + return number{kind: kind, neg: neg, size: size, sep: sep} } } s = s[1:] @@ -188,5 +207,5 @@ func parseNumber(input []byte) number { return number{} } - return number{kind: kind, neg: neg, size: size} + return number{kind: kind, neg: neg, size: size, sep: sep} } diff --git a/internal/encoding/text/decode_test.go b/internal/encoding/text/decode_test.go index a7dbad78..528b1bc7 100644 --- a/internal/encoding/text/decode_test.go +++ b/internal/encoding/text/decode_test.go @@ -556,6 +556,14 @@ func TestDecoder(t *testing.T) { want: []R{{E: "invalid field number: -123"}}, }, { + in: "- \t 123.321e6", + want: []R{{E: "invalid field number: -123.321e6"}}, + }, + { + in: "- # negative\n 123", + want: []R{{E: "invalid field number: -123"}}, + }, + { // Field number > math.MaxInt32. in: "2147483648:", want: []R{{E: "invalid field number: 2147483648"}}, |