Fixes boundary problem in indices conversion. am: 45c9b56574oreo-dr1-dev

am: 5e5fa7f6ed Change-Id: I2519824d5c7992762f541ba5e10a2a2deb5ad38a
author: Lukas Zilka <zilka@google.com> 2017-05-10 02:53:42 +0000
committer: android-build-merger <android-build-merger@google.com> 2017-05-10 02:53:42 +0000
commit: 166e30a4eccc310bae125ed2692c18ca6fe06338 (patch)
tree: f27ece1e233612beffe6c7319a32fc65b887a9c3
parent: 297d9da0270fcf6b097ee24a5e2999b1fd2943ce (diff)
parent: 5e5fa7f6ed116972ebccc123c2e49427a882f4c5 (diff)
download: libtextclassifier-oreo-dr1-dev.tar.gz
2 files changed, 30 insertions, 4 deletions
diff --git a/tests/textclassifier_jni_test.cc b/tests/textclassifier_jni_test.cc
index c441cf5..ffc193b 100644
--- a/tests/textclassifier_jni_test.cc
+++ b/tests/textclassifier_jni_test.cc
@@ -23,8 +23,27 @@ namespace libtextclassifier {
 namespace {
 
 TEST(TextClassifier, ConvertIndicesBMPUTF8) {
-  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}),
-            ConvertIndicesUTF8ToBMP("hello", {0, 5}));
+  // Test boundary cases.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5));
+
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {0, 5}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {0, 5}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁ello world", {0, 6}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁ello world", {0, 5}),
+            std::make_pair(0, 6));
+
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {6, 11}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {6, 11}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello worl😁", {6, 12}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello worl😁", {6, 11}),
+            std::make_pair(6, 12));
 
   // Simple example where the longer character is before the selection.
   //  character 😁 is 0x1f601
diff --git a/textclassifier_jni.cc b/textclassifier_jni.cc
index 84a1e32..8d64d87 100644
--- a/textclassifier_jni.cc
+++ b/textclassifier_jni.cc
@@ -116,8 +116,8 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str,
   }
 
   CodepointSpan result{-1, -1};
-  for (auto it = unicode_str.begin(); it != unicode_str.end();
-       ++it, ++unicode_index, ++bmp_index) {
+  std::function<void()> assign_indices_fn = [&result, &orig_indices,
+                                             &source_index, &target_index]() {
     if (orig_indices.first == *source_index) {
       result.first = *target_index;
     }
@@ -125,12 +125,19 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str,
     if (orig_indices.second == *source_index) {
       result.second = *target_index;
     }
+  };
+
+  for (auto it = unicode_str.begin(); it != unicode_str.end();
+       ++it, ++unicode_index, ++bmp_index) {
+    assign_indices_fn();
 
     // There is 1 extra character in the input for each UTF8 character > 0xFFFF.
     if (*it > 0xFFFF) {
       ++bmp_index;
     }
   }
+  assign_indices_fn();
+
   return result;
 }
author	Lukas Zilka <zilka@google.com>	2017-05-10 02:53:42 +0000
committer	android-build-merger <android-build-merger@google.com>	2017-05-10 02:53:42 +0000
commit	166e30a4eccc310bae125ed2692c18ca6fe06338 (patch)
tree	f27ece1e233612beffe6c7319a32fc65b887a9c3
parent	297d9da0270fcf6b097ee24a5e2999b1fd2943ce (diff)
parent	5e5fa7f6ed116972ebccc123c2e49427a882f4c5 (diff)
download	libtextclassifier-oreo-dr1-dev.tar.gz