summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorandroid-build-team Robot <android-build-team-robot@google.com>2017-05-10 08:42:09 +0000
committerandroid-build-team Robot <android-build-team-robot@google.com>2017-05-10 08:42:09 +0000
commit7d5630b2ffeb7e9f0a165041573475a5e0206369 (patch)
treef27ece1e233612beffe6c7319a32fc65b887a9c3
parent6c4f34cf116c5e6ffb84a5b88a5afd97f3b520f9 (diff)
parent166e30a4eccc310bae125ed2692c18ca6fe06338 (diff)
downloadlibtextclassifier-oreo-dr2-release.tar.gz
Change-Id: I2777a27263732439b80c5fc5e6dccc521c95e89e
-rw-r--r--tests/textclassifier_jni_test.cc23
-rw-r--r--textclassifier_jni.cc11
2 files changed, 30 insertions, 4 deletions
diff --git a/tests/textclassifier_jni_test.cc b/tests/textclassifier_jni_test.cc
index c441cf5..ffc193b 100644
--- a/tests/textclassifier_jni_test.cc
+++ b/tests/textclassifier_jni_test.cc
@@ -23,8 +23,27 @@ namespace libtextclassifier {
namespace {
TEST(TextClassifier, ConvertIndicesBMPUTF8) {
- EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}),
- ConvertIndicesUTF8ToBMP("hello", {0, 5}));
+ // Test boundary cases.
+ EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5));
+ EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5));
+
+ EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {0, 5}),
+ std::make_pair(0, 5));
+ EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {0, 5}),
+ std::make_pair(0, 5));
+ EXPECT_EQ(ConvertIndicesBMPToUTF8("😁ello world", {0, 6}),
+ std::make_pair(0, 5));
+ EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁ello world", {0, 5}),
+ std::make_pair(0, 6));
+
+ EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {6, 11}),
+ std::make_pair(6, 11));
+ EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {6, 11}),
+ std::make_pair(6, 11));
+ EXPECT_EQ(ConvertIndicesBMPToUTF8("hello worl😁", {6, 12}),
+ std::make_pair(6, 11));
+ EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello worl😁", {6, 11}),
+ std::make_pair(6, 12));
// Simple example where the longer character is before the selection.
// character 😁 is 0x1f601
diff --git a/textclassifier_jni.cc b/textclassifier_jni.cc
index 84a1e32..8d64d87 100644
--- a/textclassifier_jni.cc
+++ b/textclassifier_jni.cc
@@ -116,8 +116,8 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str,
}
CodepointSpan result{-1, -1};
- for (auto it = unicode_str.begin(); it != unicode_str.end();
- ++it, ++unicode_index, ++bmp_index) {
+ std::function<void()> assign_indices_fn = [&result, &orig_indices,
+ &source_index, &target_index]() {
if (orig_indices.first == *source_index) {
result.first = *target_index;
}
@@ -125,12 +125,19 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str,
if (orig_indices.second == *source_index) {
result.second = *target_index;
}
+ };
+
+ for (auto it = unicode_str.begin(); it != unicode_str.end();
+ ++it, ++unicode_index, ++bmp_index) {
+ assign_indices_fn();
// There is 1 extra character in the input for each UTF8 character > 0xFFFF.
if (*it > 0xFFFF) {
++bmp_index;
}
}
+ assign_indices_fn();
+
return result;
}