diff options
author | Lukas Zilka <zilka@google.com> | 2017-05-09 18:05:55 +0200 |
---|---|---|
committer | Lukas Zilka <zilka@google.com> | 2017-05-09 18:07:13 +0200 |
commit | 45c9b5657486fe6500d4c52d370bde790618cb1b (patch) | |
tree | a6b6f9bc7a646895798f96d9b157adc769c3707b | |
parent | 47c9e5861f24301980220e1de5661db6b19842aa (diff) | |
download | libtextclassifier-oreo-dev.tar.gz |
Fixes boundary problem in indices conversion.android-vts-8.0_r2android-vts-8.0_r1oreo-dev
Test: Built and tested on device. Google3 tests pass.
Bug: 38019270
Bug: 38160823
Change-Id: I4de0a1cea140f4c0b69494d3314dc6088b3be4f6
-rw-r--r-- | tests/textclassifier_jni_test.cc | 23 | ||||
-rw-r--r-- | textclassifier_jni.cc | 11 |
2 files changed, 30 insertions, 4 deletions
diff --git a/tests/textclassifier_jni_test.cc b/tests/textclassifier_jni_test.cc index c441cf5..ffc193b 100644 --- a/tests/textclassifier_jni_test.cc +++ b/tests/textclassifier_jni_test.cc @@ -23,8 +23,27 @@ namespace libtextclassifier { namespace { TEST(TextClassifier, ConvertIndicesBMPUTF8) { - EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), - ConvertIndicesUTF8ToBMP("hello", {0, 5})); + // Test boundary cases. + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5)); + + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {0, 5}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {0, 5}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesBMPToUTF8("😁ello world", {0, 6}), + std::make_pair(0, 5)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁ello world", {0, 5}), + std::make_pair(0, 6)); + + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {6, 11}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {6, 11}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesBMPToUTF8("hello worl😁", {6, 12}), + std::make_pair(6, 11)); + EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello worl😁", {6, 11}), + std::make_pair(6, 12)); // Simple example where the longer character is before the selection. // character 😁 is 0x1f601 diff --git a/textclassifier_jni.cc b/textclassifier_jni.cc index 84a1e32..8d64d87 100644 --- a/textclassifier_jni.cc +++ b/textclassifier_jni.cc @@ -116,8 +116,8 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str, } CodepointSpan result{-1, -1}; - for (auto it = unicode_str.begin(); it != unicode_str.end(); - ++it, ++unicode_index, ++bmp_index) { + std::function<void()> assign_indices_fn = [&result, &orig_indices, + &source_index, &target_index]() { if (orig_indices.first == *source_index) { result.first = *target_index; } @@ -125,12 +125,19 @@ CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str, if (orig_indices.second == *source_index) { result.second = *target_index; } + }; + + for (auto it = unicode_str.begin(); it != unicode_str.end(); + ++it, ++unicode_index, ++bmp_index) { + assign_indices_fn(); // There is 1 extra character in the input for each UTF8 character > 0xFFFF. if (*it > 0xFFFF) { ++bmp_index; } } + assign_indices_fn(); + return result; } |