aboutsummaryrefslogtreecommitdiff
path: root/cpp/src/util/string_compare.cc
blob: 31a75346af2467c723899ce15e128bf11f5531d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// Copyright (C) 2014 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "string_compare.h"

#include <libaddressinput/util/basictypes.h>

#include <cassert>
#include <string>

#include <re2/re2.h>

#include "lru_cache_using_std.h"

// RE2 uses type string, which is not necessarily the same as type std::string.
// In order to create objects of the correct type, to be able to pass pointers
// to these objects to RE2, the function that does that is defined inside an
// unnamed namespace inside the re2 namespace. Oh, my ...
namespace re2 {
namespace {

// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function
// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would
// be a case-insensitive match to the string. This is far too expensive to do
// repeatedly, so the function is only ever called through an LRU cache.
std::string ComputeMinPossibleMatch(const std::string& str) {
  string min, max;  // N.B.: RE2 type string!

  RE2::Options options;
  options.set_literal(true);
  options.set_case_sensitive(false);
  RE2 matcher(str, options);

  bool success = matcher.PossibleMatchRange(&min, &max, str.size());
  assert(success);
  (void)success;  // Prevent unused variable if assert() is optimized away.

  return min;
}

}  // namespace
}  // namespace re2

namespace i18n {
namespace addressinput {

class StringCompare::Impl {
  enum { MAX_CACHE_SIZE = 1 << 15 };

 public:
  Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) {
    options_.set_literal(true);
    options_.set_case_sensitive(false);
  }

  ~Impl() {}

  bool NaturalEquals(const std::string& a, const std::string& b) const {
    RE2 matcher(b, options_);
    return RE2::FullMatch(a, matcher);
  }

  bool NaturalLess(const std::string& a, const std::string& b) const {
    const std::string& min_a(min_possible_match_(a));
    const std::string& min_b(min_possible_match_(b));
    return min_a < min_b;
  }

 private:
  RE2::Options options_;
  mutable lru_cache_using_std<std::string, std::string> min_possible_match_;

  DISALLOW_COPY_AND_ASSIGN(Impl);
};

StringCompare::StringCompare() : impl_(new Impl) {}

StringCompare::~StringCompare() {}

bool StringCompare::NaturalEquals(const std::string& a,
                                  const std::string& b) const {
  return impl_->NaturalEquals(a, b);
}

bool StringCompare::NaturalLess(const std::string& a,
                                const std::string& b) const {
  return impl_->NaturalLess(a, b);
}

}  // namespace addressinput
}  // namespace i18n