diff options
Diffstat (limited to 'abseil-cpp/absl/strings/ascii.cc')
-rw-r--r-- | abseil-cpp/absl/strings/ascii.cc | 66 |
1 files changed, 57 insertions, 9 deletions
diff --git a/abseil-cpp/absl/strings/ascii.cc b/abseil-cpp/absl/strings/ascii.cc index 93bb03e..16c9689 100644 --- a/abseil-cpp/absl/strings/ascii.cc +++ b/abseil-cpp/absl/strings/ascii.cc @@ -14,6 +14,10 @@ #include "absl/strings/ascii.h" +#include <climits> +#include <cstring> +#include <string> + namespace absl { ABSL_NAMESPACE_BEGIN namespace ascii_internal { @@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on +template <bool ToUpper> +constexpr void AsciiStrCaseFold(char* p, char* end) { + // The upper- and lowercase versions of ASCII characters differ by only 1 bit. + // When we need to flip the case, we can xor with this bit to achieve the + // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We + // could have chosen 'z' and 'Z', or any other pair of characters as they all + // have the same single bit difference. + constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; + + constexpr char ch_a = ToUpper ? 'a' : 'A'; + constexpr char ch_z = ToUpper ? 'z' : 'Z'; + for (; p < end; ++p) { + unsigned char v = static_cast<unsigned char>(*p); + // We use & instead of && to ensure this always stays branchless + // We use static_cast<int> to suppress -Wbitwise-instead-of-logical + bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) & + static_cast<int>(v <= ch_z)); + v ^= is_in_range ? kAsciiCaseBitFlip : 0; + *p = static_cast<char>(v); + } +} + +static constexpr size_t ValidateAsciiCasefold() { + constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; + size_t incorrect_index = 0; + char lowered[num_chars] = {}; + char uppered[num_chars] = {}; + for (unsigned int i = 0; i < num_chars; ++i) { + uppered[i] = lowered[i] = static_cast<char>(i); + } + AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]); + AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]); + for (size_t i = 0; i < num_chars; ++i) { + const char ch = static_cast<char>(i), + ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), + ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch); + if (uppered[i] != ch_upper || lowered[i] != ch_lower) { + incorrect_index = i > 0 ? i : num_chars; + break; + } + } + return incorrect_index; +} + +static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); + } // namespace ascii_internal void AsciiStrToLower(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_tolower(ch); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size()); } void AsciiStrToUpper(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_toupper(ch); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size()); } void RemoveExtraAsciiWhitespace(std::string* str) { @@ -183,17 +231,17 @@ void RemoveExtraAsciiWhitespace(std::string* str) { for (; input_it < input_end; ++input_it) { if (is_ws) { // Consecutive whitespace? Keep only the last. - is_ws = absl::ascii_isspace(*input_it); + is_ws = absl::ascii_isspace(static_cast<unsigned char>(*input_it)); if (is_ws) --output_it; } else { - is_ws = absl::ascii_isspace(*input_it); + is_ws = absl::ascii_isspace(static_cast<unsigned char>(*input_it)); } *output_it = *input_it; ++output_it; } - str->erase(output_it - &(*str)[0]); + str->erase(static_cast<size_t>(output_it - &(*str)[0])); } ABSL_NAMESPACE_END |