diff options
author | android-build-team Robot <android-build-team-robot@google.com> | 2017-05-10 08:42:09 +0000 |
---|---|---|
committer | android-build-team Robot <android-build-team-robot@google.com> | 2017-05-10 08:42:09 +0000 |
commit | 7d5630b2ffeb7e9f0a165041573475a5e0206369 (patch) | |
tree | f27ece1e233612beffe6c7319a32fc65b887a9c3 | |
parent | 6c4f34cf116c5e6ffb84a5b88a5afd97f3b520f9 (diff) | |
parent | 166e30a4eccc310bae125ed2692c18ca6fe06338 (diff) | |
download | libtextclassifier-oreo-dr2-release.tar.gz |
release-request-70e541de-d91e-4004-8c17-3147135eff42-for-git_oc-dr1-release-3993143 snap-temp-L23100000062404817android-8.0.0_r34android-8.0.0_r33android-8.0.0_r27android-8.0.0_r26android-8.0.0_r25android-8.0.0_r24android-8.0.0_r23android-8.0.0_r22android-8.0.0_r21oreo-dr3-releaseoreo-dr2-releaseoreo-dr1-release
Change-Id: I2777a27263732439b80c5fc5e6dccc521c95e89e
-rw-r--r-- | tests/textclassifier_jni_test.cc | 23 | ||||
-rw-r--r-- | textclassifier_jni.cc | 11 |
2 files changed, 30 insertions, 4 deletions
diff --git a/tests/textclassifier_jni_test.cc b/tests/textclassifier_jni_test.cc index c441cf5..ffc193b 100644 --- a/tests/textclassifier_jni_test.cc +++ b/tests/textclassifier_jni_test.cc @@ -23,8 +23,27 @@ namespace libtextclassifier { namespace { TEST(TextClassifier, ConvertIndicesBMPUTF8) { - EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), - ConvertIndicesUTF8ToBMP("hello", {0, 5})); + // Test boundary cases. + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5)); + + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {0, 5}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {0, 5}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesBMPToUTF8("😁ello world", {0, 6}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁ello world", {0, 5}), + std::make_pair(0, 6)); + + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {6, 11}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {6, 11}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello worl😁", {6, 12}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello worl😁", {6, 11}), + std::make_pair(6, 12)); // Simple example where the longer character is before the selection. // character 😁 is 0x1f601 diff --git a/textclassifier_jni.cc b/textclassifier_jni.cc index 84a1e32..8d64d87 100644 --- a/textclassifier_jni.cc +++ b/textclassifier_jni.cc @@ -116,8 +116,8 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str, } CodepointSpan result{-1, -1}; - for (auto it = unicode_str.begin(); it != unicode_str.end(); - ++it, ++unicode_index, ++bmp_index) { + std::function<void()> assign_indices_fn = [&result, &orig_indices, + &source_index, &target_index]() { if (orig_indices.first == *source_index) { result.first = *target_index; } @@ -125,12 +125,19 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str, if (orig_indices.second == *source_index) { result.second = *target_index; } + }; + + for (auto it = unicode_str.begin(); it != unicode_str.end(); + ++it, ++unicode_index, ++bmp_index) { + assign_indices_fn(); // There is 1 extra character in the input for each UTF8 character > 0xFFFF. if (*it > 0xFFFF) { ++bmp_index; } } + assign_indices_fn(); + return result; } |