1 files changed, 196 insertions, 0 deletions
diff --git a/internal/strs/strings.go b/internal/strs/strings.go
new file mode 100644
index 00000000..0b74e765
--- /dev/null
+++ b/internal/strs/strings.go
@@ -0,0 +1,196 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package strs provides string manipulation functionality specific to protobuf.
+package strs
+
+import (
+	"go/token"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+
+	"google.golang.org/protobuf/internal/flags"
+	"google.golang.org/protobuf/reflect/protoreflect"
+)
+
+// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
+func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
+	if flags.ProtoLegacy {
+		if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
+			return fd.EnforceUTF8()
+		}
+	}
+	return fd.Syntax() == protoreflect.Proto3
+}
+
+// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
+//
+// If there is an interior underscore followed by a lower case letter,
+// drop the underscore and convert the letter to upper case.
+func GoCamelCase(s string) string {
+	// Invariant: if the next letter is lower case, it must be converted
+	// to upper case.
+	// That is, we process a word at a time, where words are marked by _ or
+	// upper case letter. Digits are treated as words.
+	var b []byte
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		switch {
+		case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
+			// Skip over '.' in ".{{lowercase}}".
+		case c == '.':
+			b = append(b, '_') // convert '.' to '_'
+		case c == '_' && (i == 0 || s[i-1] == '.'):
+			// Convert initial '_' to ensure we start with a capital letter.
+			// Do the same for '_' after '.' to match historic behavior.
+			b = append(b, 'X') // convert '_' to 'X'
+		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
+			// Skip over '_' in "_{{lowercase}}".
+		case isASCIIDigit(c):
+			b = append(b, c)
+		default:
+			// Assume we have a letter now - if not, it's a bogus identifier.
+			// The next word is a sequence of characters that must start upper case.
+			if isASCIILower(c) {
+				c -= 'a' - 'A' // convert lowercase to uppercase
+			}
+			b = append(b, c)
+
+			// Accept lower case sequence that follows.
+			for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
+				b = append(b, s[i+1])
+			}
+		}
+	}
+	return string(b)
+}
+
+// GoSanitized converts a string to a valid Go identifier.
+func GoSanitized(s string) string {
+	// Sanitize the input to the set of valid characters,
+	// which must be '_' or be in the Unicode L or N categories.
+	s = strings.Map(func(r rune) rune {
+		if unicode.IsLetter(r) || unicode.IsDigit(r) {
+			return r
+		}
+		return '_'
+	}, s)
+
+	// Prepend '_' in the event of a Go keyword conflict or if
+	// the identifier is invalid (does not start in the Unicode L category).
+	r, _ := utf8.DecodeRuneInString(s)
+	if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
+		return "_" + s
+	}
+	return s
+}
+
+// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
+// according to the protobuf JSON specification.
+func JSONCamelCase(s string) string {
+	var b []byte
+	var wasUnderscore bool
+	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
+		c := s[i]
+		if c != '_' {
+			if wasUnderscore && isASCIILower(c) {
+				c -= 'a' - 'A' // convert to uppercase
+			}
+			b = append(b, c)
+		}
+		wasUnderscore = c == '_'
+	}
+	return string(b)
+}
+
+// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
+// according to the protobuf JSON specification.
+func JSONSnakeCase(s string) string {
+	var b []byte
+	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
+		c := s[i]
+		if isASCIIUpper(c) {
+			b = append(b, '_')
+			c += 'a' - 'A' // convert to lowercase
+		}
+		b = append(b, c)
+	}
+	return string(b)
+}
+
+// MapEntryName derives the name of the map entry message given the field name.
+// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
+func MapEntryName(s string) string {
+	var b []byte
+	upperNext := true
+	for _, c := range s {
+		switch {
+		case c == '_':
+			upperNext = true
+		case upperNext:
+			b = append(b, byte(unicode.ToUpper(c)))
+			upperNext = false
+		default:
+			b = append(b, byte(c))
+		}
+	}
+	b = append(b, "Entry"...)
+	return string(b)
+}
+
+// EnumValueName derives the camel-cased enum value name.
+// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
+func EnumValueName(s string) string {
+	var b []byte
+	upperNext := true
+	for _, c := range s {
+		switch {
+		case c == '_':
+			upperNext = true
+		case upperNext:
+			b = append(b, byte(unicode.ToUpper(c)))
+			upperNext = false
+		default:
+			b = append(b, byte(unicode.ToLower(c)))
+			upperNext = false
+		}
+	}
+	return string(b)
+}
+
+// TrimEnumPrefix trims the enum name prefix from an enum value name,
+// where the prefix is all lowercase without underscores.
+// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
+func TrimEnumPrefix(s, prefix string) string {
+	s0 := s // original input
+	for len(s) > 0 && len(prefix) > 0 {
+		if s[0] == '_' {
+			s = s[1:]
+			continue
+		}
+		if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
+			return s0 // no prefix match
+		}
+		s, prefix = s[1:], prefix[1:]
+	}
+	if len(prefix) > 0 {
+		return s0 // no prefix match
+	}
+	s = strings.TrimLeft(s, "_")
+	if len(s) == 0 {
+		return s0 // avoid returning empty string
+	}
+	return s
+}
+
+func isASCIILower(c byte) bool {
+	return 'a' <= c && c <= 'z'
+}
+func isASCIIUpper(c byte) bool {
+	return 'A' <= c && c <= 'Z'
+}
+func isASCIIDigit(c byte) bool {
+	return '0' <= c && c <= '9'
+}