diff options
author | Seigo Nonaka <nona@google.com> | 2023-10-10 06:48:51 +0000 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2023-10-10 06:48:51 +0000 |
commit | 79648373818a752da0889ebbb155b0aac1ba4cbc (patch) | |
tree | 19e485b0d7527b526a1809c8fb9dc171f978148d | |
parent | 8f53bf73fc08a1fba55d2ae0edd8846688ca61ea (diff) | |
parent | 5c98310927866ab4dd8c8e07b4020b7a8bf3795e (diff) | |
download | minikin-79648373818a752da0889ebbb155b0aac1ba4cbc.tar.gz |
Merge "Add support of no break style" into main
-rw-r--r-- | include/minikin/LineBreakStyle.h | 1 | ||||
-rw-r--r-- | libs/minikin/GreedyLineBreaker.cpp | 5 | ||||
-rw-r--r-- | libs/minikin/LineBreakerUtil.h | 7 | ||||
-rw-r--r-- | libs/minikin/WordBreaker.cpp | 52 | ||||
-rw-r--r-- | libs/minikin/WordBreaker.h | 41 | ||||
-rw-r--r-- | tests/unittest/WordBreakerTests.cpp | 2 |
6 files changed, 87 insertions, 21 deletions
diff --git a/include/minikin/LineBreakStyle.h b/include/minikin/LineBreakStyle.h index f474cbd..052ffeb 100644 --- a/include/minikin/LineBreakStyle.h +++ b/include/minikin/LineBreakStyle.h @@ -26,6 +26,7 @@ enum class LineBreakStyle : uint8_t { Loose = 1, Normal = 2, Strict = 3, + NoBreak = 4, }; // The line break word style(lw) of the strings. diff --git a/libs/minikin/GreedyLineBreaker.cpp b/libs/minikin/GreedyLineBreaker.cpp index 66e3dd9..2d89cf2 100644 --- a/libs/minikin/GreedyLineBreaker.cpp +++ b/libs/minikin/GreedyLineBreaker.cpp @@ -431,18 +431,21 @@ void GreedyLineBreaker::process() { // Following two will be initialized after the first iteration. uint32_t localeListId = LocaleListCache::kInvalidListId; + LineBreakStyle lineBreakStyle; uint32_t nextWordBoundaryOffset = 0; for (const auto& run : mMeasuredText.runs) { const Range range = run->getRange(); // Update locale if necessary. uint32_t newLocaleListId = run->getLocaleListId(); - if (localeListId != newLocaleListId) { + LineBreakStyle newLineBreakStyle = run->lineBreakStyle(); + if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) { Locale locale = getEffectiveLocale(newLocaleListId); nextWordBoundaryOffset = wordBreaker.followingWithLocale( locale, run->lineBreakStyle(), run->lineBreakWordStyle(), range.getStart()); mHyphenator = HyphenatorMap::lookup(locale); localeListId = newLocaleListId; + lineBreakStyle = newLineBreakStyle; } for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) { diff --git a/libs/minikin/LineBreakerUtil.h b/libs/minikin/LineBreakerUtil.h index 6e572f1..b48948e 100644 --- a/libs/minikin/LineBreakerUtil.h +++ b/libs/minikin/LineBreakerUtil.h @@ -186,13 +186,15 @@ struct CharProcessor { // time before feeding characters. void updateLocaleIfNecessary(const Run& run) { uint32_t newLocaleListId = run.getLocaleListId(); - if (localeListId != newLocaleListId) { + LineBreakStyle newLineBreakStyle = run.lineBreakStyle(); + if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) { Locale locale = getEffectiveLocale(newLocaleListId); nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(), run.lineBreakWordStyle(), run.getRange().getStart()); hyphenator = HyphenatorMap::lookup(locale); localeListId = newLocaleListId; + lineBreakStyle = newLineBreakStyle; } } @@ -221,8 +223,9 @@ struct CharProcessor { } private: - // The current locale list id. + // The current locale list id, line break style, line break word style. uint32_t localeListId = LocaleListCache::kInvalidListId; + LineBreakStyle lineBreakStyle; WordBreaker breaker; }; diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp index ae79d3c..e3578b4 100644 --- a/libs/minikin/WordBreaker.cpp +++ b/libs/minikin/WordBreaker.cpp @@ -32,17 +32,41 @@ namespace minikin { namespace { -static UBreakIterator* createNewIterator(const Locale& locale, LineBreakStyle lbStyle, - LineBreakWordStyle lbWordStyle) { +static std::unique_ptr<BreakIterator> createNewIterator(const Locale& locale, + LineBreakStyle lbStyle, + LineBreakWordStyle lbWordStyle) { // TODO: handle failure status - UErrorCode status = U_ZERO_ERROR; - char localeID[ULOC_FULLNAME_CAPACITY] = {}; - uloc_forLanguageTag(locale.getStringWithLineBreakOption(lbStyle, lbWordStyle).c_str(), localeID, - ULOC_FULLNAME_CAPACITY, nullptr, &status); - return ubrk_open(UBreakIteratorType::UBRK_LINE, localeID, nullptr, 0, &status); + if (lbStyle == LineBreakStyle::NoBreak) { + return std::make_unique<NoBreakBreakIterator>(); + } else { + UErrorCode status = U_ZERO_ERROR; + char localeID[ULOC_FULLNAME_CAPACITY] = {}; + uloc_forLanguageTag(locale.getStringWithLineBreakOption(lbStyle, lbWordStyle).c_str(), + localeID, ULOC_FULLNAME_CAPACITY, nullptr, &status); + IcuUbrkUniquePtr icuBrkPtr( + ubrk_open(UBreakIteratorType::UBRK_LINE, localeID, nullptr, 0, &status)); + return std::make_unique<ICUBreakIterator>(std::move(icuBrkPtr)); + } } } // namespace +void ICUBreakIterator::setText(UText* text, size_t) { + UErrorCode status = U_ZERO_ERROR; + ubrk_setUText(mBreaker.get(), text, &status); +} + +bool ICUBreakIterator::isBoundary(int32_t i) { + return ubrk_isBoundary(mBreaker.get(), i); +} + +int32_t ICUBreakIterator::following(size_t i) { + return ubrk_following(mBreaker.get(), i); +} + +int32_t ICUBreakIterator::next() { + return ubrk_next(mBreaker.get()); +} + ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle) { @@ -57,8 +81,7 @@ ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale, } // Not found in pool. Create new one. - return {id, lbStyle, lbWordStyle, - IcuUbrkUniquePtr(createNewIterator(locale, lbStyle, lbWordStyle))}; + return {id, lbStyle, lbWordStyle, createNewIterator(locale, lbStyle, lbWordStyle)}; } void ICULineBreakerPoolImpl::release(ICULineBreakerPool::Slot&& slot) { @@ -86,10 +109,9 @@ ssize_t WordBreaker::followingWithLocale(const Locale& locale, LineBreakStyle lb return mCurrent; } mIcuBreaker = mPool->acquire(locale, lbStyle, lbWordStyle); - UErrorCode status = U_ZERO_ERROR; MINIKIN_ASSERT(mText != nullptr, "setText must be called first"); // TODO: handle failure status - ubrk_setUText(mIcuBreaker.breaker.get(), mUText.get(), &status); + mIcuBreaker.breaker->setText(mUText.get(), mTextSize); if (mInEmailOrUrl) { // Note: // Don't reset mCurrent, mLast, or mScanOffset for keeping email/URL context. @@ -171,9 +193,9 @@ static bool isValidBreak(const uint16_t* buf, size_t bufEnd, int32_t i) { // Customized iteratorNext that takes care of both resets and our modifications // to ICU's behavior. int32_t WordBreaker::iteratorNext() { - int32_t result = ubrk_following(mIcuBreaker.breaker.get(), mCurrent); + int32_t result = mIcuBreaker.breaker->following(mCurrent); while (!isValidBreak(mText, mTextSize, result)) { - result = ubrk_next(mIcuBreaker.breaker.get()); + result = mIcuBreaker.breaker->next(); } return result; } @@ -221,11 +243,11 @@ void WordBreaker::detectEmailOrUrl() { } } if (state == SAW_AT || state == SAW_COLON_SLASH_SLASH) { - if (!ubrk_isBoundary(mIcuBreaker.breaker.get(), i)) { + if (!mIcuBreaker.breaker->isBoundary(i)) { // If there are combining marks or such at the end of the URL or the email address, // consider them a part of the URL or the email, and skip to the next actual // boundary. - i = ubrk_following(mIcuBreaker.breaker.get(), i); + i = mIcuBreaker.breaker->following(i); } mInEmailOrUrl = true; } else { diff --git a/libs/minikin/WordBreaker.h b/libs/minikin/WordBreaker.h index c4af635..ccb3186 100644 --- a/libs/minikin/WordBreaker.h +++ b/libs/minikin/WordBreaker.h @@ -37,6 +37,16 @@ namespace minikin { +class BreakIterator { +public: + BreakIterator() {} + virtual ~BreakIterator() {} + virtual void setText(UText* text, size_t size) = 0; + virtual bool isBoundary(int32_t i) = 0; + virtual int32_t following(size_t i) = 0; + virtual int32_t next() = 0; +}; + // A class interface for providing pooling implementation of ICU's line breaker. // The implementation can be customized for testing purposes. class ICULineBreakerPool { @@ -44,7 +54,7 @@ public: struct Slot { Slot() : localeId(0), breaker(nullptr) {} Slot(uint64_t localeId, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle, - IcuUbrkUniquePtr&& breaker) + std::unique_ptr<BreakIterator>&& breaker) : localeId(localeId), lbStyle(lbStyle), lbWordStyle(lbWordStyle), @@ -60,7 +70,7 @@ public: uint64_t localeId; LineBreakStyle lbStyle; LineBreakWordStyle lbWordStyle; - IcuUbrkUniquePtr breaker; + std::unique_ptr<BreakIterator> breaker; }; virtual ~ICULineBreakerPool() {} virtual Slot acquire(const Locale& locale, LineBreakStyle lbStyle, @@ -95,6 +105,33 @@ private: mutable std::mutex mMutex; }; +class ICUBreakIterator : public BreakIterator { +public: + ICUBreakIterator(IcuUbrkUniquePtr&& breaker) : mBreaker(std::move(breaker)) {} + virtual ~ICUBreakIterator() {} + virtual void setText(UText* text, size_t size); + virtual bool isBoundary(int32_t i); + virtual int32_t following(size_t i); + virtual int32_t next(); + +private: + IcuUbrkUniquePtr mBreaker; +}; + +class NoBreakBreakIterator : public BreakIterator { +public: + NoBreakBreakIterator() {} + virtual ~NoBreakBreakIterator() {} + + virtual void setText(UText*, size_t size) { mSize = size; } + virtual bool isBoundary(int32_t i) { return i == 0 || i == static_cast<int32_t>(mSize); } + virtual int32_t following(size_t) { return mSize; } + virtual int32_t next() { return mSize; } + +private: + size_t mSize = 0; +}; + class WordBreaker { public: virtual ~WordBreaker() { finish(); } diff --git a/tests/unittest/WordBreakerTests.cpp b/tests/unittest/WordBreakerTests.cpp index 0c20a80..fe7f953 100644 --- a/tests/unittest/WordBreakerTests.cpp +++ b/tests/unittest/WordBreakerTests.cpp @@ -672,7 +672,7 @@ TEST(WordBreakerTest, LineBreakerPool_acquire_with_release) { pool.acquire(enUS, LineBreakStyle::Loose, LineBreakWordStyle::None); uint64_t enUSBreakerLocaleId = enUSBreaker.localeId; - UBreakIterator* enUSBreakerPtr = enUSBreaker.breaker.get(); + auto* enUSBreakerPtr = enUSBreaker.breaker.get(); pool.release(std::move(enUSBreaker)); EXPECT_EQ(nullptr, enUSBreaker.breaker.get()); |