diff options
author | Lukas Zilka <zilka@google.com> | 2017-05-10 02:53:42 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2017-05-10 02:53:42 +0000 |
commit | 166e30a4eccc310bae125ed2692c18ca6fe06338 (patch) | |
tree | f27ece1e233612beffe6c7319a32fc65b887a9c3 | |
parent | 297d9da0270fcf6b097ee24a5e2999b1fd2943ce (diff) | |
parent | 5e5fa7f6ed116972ebccc123c2e49427a882f4c5 (diff) | |
download | libtextclassifier-oreo-dr1-dev.tar.gz |
Fixes boundary problem in indices conversion. am: 45c9b56574oreo-dr1-dev
am: 5e5fa7f6ed
Change-Id: I2519824d5c7992762f541ba5e10a2a2deb5ad38a
-rw-r--r-- | tests/textclassifier_jni_test.cc | 23 | ||||
-rw-r--r-- | textclassifier_jni.cc | 11 |
2 files changed, 30 insertions, 4 deletions
diff --git a/tests/textclassifier_jni_test.cc b/tests/textclassifier_jni_test.cc index c441cf5..ffc193b 100644 --- a/tests/textclassifier_jni_test.cc +++ b/tests/textclassifier_jni_test.cc @@ -23,8 +23,27 @@ namespace libtextclassifier { namespace { TEST(TextClassifier, ConvertIndicesBMPUTF8) { - EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), - ConvertIndicesUTF8ToBMP("hello", {0, 5})); + // Test boundary cases. + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5)); + + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {0, 5}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {0, 5}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesBMPToUTF8("😁ello world", {0, 6}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁ello world", {0, 5}), + std::make_pair(0, 6)); + + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {6, 11}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {6, 11}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello worl😁", {6, 12}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello worl😁", {6, 11}), + std::make_pair(6, 12)); // Simple example where the longer character is before the selection. // character 😁 is 0x1f601 diff --git a/textclassifier_jni.cc b/textclassifier_jni.cc index 84a1e32..8d64d87 100644 --- a/textclassifier_jni.cc +++ b/textclassifier_jni.cc @@ -116,8 +116,8 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str, } CodepointSpan result{-1, -1}; - for (auto it = unicode_str.begin(); it != unicode_str.end(); - ++it, ++unicode_index, ++bmp_index) { + std::function<void()> assign_indices_fn = [&result, &orig_indices, + &source_index, &target_index]() { if (orig_indices.first == *source_index) { result.first = *target_index; } @@ -125,12 +125,19 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str, if (orig_indices.second == *source_index) { result.second = *target_index; } + }; + + for (auto it = unicode_str.begin(); it != unicode_str.end(); + ++it, ++unicode_index, ++bmp_index) { + assign_indices_fn(); // There is 1 extra character in the input for each UTF8 character > 0xFFFF. if (*it > 0xFFFF) { ++bmp_index; } } + assign_indices_fn(); + return result; } |