summaryrefslogtreecommitdiff
path: root/libs/minikin/WordBreaker.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libs/minikin/WordBreaker.cpp')
-rw-r--r--libs/minikin/WordBreaker.cpp62
1 files changed, 47 insertions, 15 deletions
diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp
index ae79d3c..a1e9526 100644
--- a/libs/minikin/WordBreaker.cpp
+++ b/libs/minikin/WordBreaker.cpp
@@ -32,20 +32,54 @@
namespace minikin {
namespace {
-static UBreakIterator* createNewIterator(const Locale& locale, LineBreakStyle lbStyle,
- LineBreakWordStyle lbWordStyle) {
+static std::unique_ptr<BreakIterator> createNewIterator(const Locale& locale,
+ LineBreakStyle lbStyle,
+ LineBreakWordStyle lbWordStyle) {
+ MINIKIN_ASSERT(lbStyle != LineBreakStyle::Auto,
+ "LineBreakStyle::Auto must be resolved beforehand.");
+ MINIKIN_ASSERT(lbWordStyle != LineBreakWordStyle::Auto,
+ "LineBreakWordStyle::Auto must be resolved beforehand.");
+
// TODO: handle failure status
- UErrorCode status = U_ZERO_ERROR;
- char localeID[ULOC_FULLNAME_CAPACITY] = {};
- uloc_forLanguageTag(locale.getStringWithLineBreakOption(lbStyle, lbWordStyle).c_str(), localeID,
- ULOC_FULLNAME_CAPACITY, nullptr, &status);
- return ubrk_open(UBreakIteratorType::UBRK_LINE, localeID, nullptr, 0, &status);
+ if (lbStyle == LineBreakStyle::NoBreak) {
+ return std::make_unique<NoBreakBreakIterator>();
+ } else {
+ UErrorCode status = U_ZERO_ERROR;
+ char localeID[ULOC_FULLNAME_CAPACITY] = {};
+ uloc_forLanguageTag(locale.getStringWithLineBreakOption(lbStyle, lbWordStyle).c_str(),
+ localeID, ULOC_FULLNAME_CAPACITY, nullptr, &status);
+ IcuUbrkUniquePtr icuBrkPtr(
+ ubrk_open(UBreakIteratorType::UBRK_LINE, localeID, nullptr, 0, &status));
+ return std::make_unique<ICUBreakIterator>(std::move(icuBrkPtr));
+ }
}
} // namespace
+void ICUBreakIterator::setText(UText* text, size_t) {
+ UErrorCode status = U_ZERO_ERROR;
+ ubrk_setUText(mBreaker.get(), text, &status);
+}
+
+bool ICUBreakIterator::isBoundary(int32_t i) {
+ return ubrk_isBoundary(mBreaker.get(), i);
+}
+
+int32_t ICUBreakIterator::following(size_t i) {
+ return ubrk_following(mBreaker.get(), i);
+}
+
+int32_t ICUBreakIterator::next() {
+ return ubrk_next(mBreaker.get());
+}
+
ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale,
LineBreakStyle lbStyle,
LineBreakWordStyle lbWordStyle) {
+ if (lbStyle == LineBreakStyle::Auto) {
+ lbStyle = locale.supportsScript('J', 'p', 'a', 'n') ? LineBreakStyle::Strict
+ : LineBreakStyle::None;
+ }
+
const uint64_t id = locale.getIdentifier();
std::lock_guard<std::mutex> lock(mMutex);
for (auto i = mPool.begin(); i != mPool.end(); i++) {
@@ -57,8 +91,7 @@ ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale,
}
// Not found in pool. Create new one.
- return {id, lbStyle, lbWordStyle,
- IcuUbrkUniquePtr(createNewIterator(locale, lbStyle, lbWordStyle))};
+ return {id, lbStyle, lbWordStyle, createNewIterator(locale, lbStyle, lbWordStyle)};
}
void ICULineBreakerPoolImpl::release(ICULineBreakerPool::Slot&& slot) {
@@ -86,10 +119,9 @@ ssize_t WordBreaker::followingWithLocale(const Locale& locale, LineBreakStyle lb
return mCurrent;
}
mIcuBreaker = mPool->acquire(locale, lbStyle, lbWordStyle);
- UErrorCode status = U_ZERO_ERROR;
MINIKIN_ASSERT(mText != nullptr, "setText must be called first");
// TODO: handle failure status
- ubrk_setUText(mIcuBreaker.breaker.get(), mUText.get(), &status);
+ mIcuBreaker.breaker->setText(mUText.get(), mTextSize);
if (mInEmailOrUrl) {
// Note:
// Don't reset mCurrent, mLast, or mScanOffset for keeping email/URL context.
@@ -171,9 +203,9 @@ static bool isValidBreak(const uint16_t* buf, size_t bufEnd, int32_t i) {
// Customized iteratorNext that takes care of both resets and our modifications
// to ICU's behavior.
int32_t WordBreaker::iteratorNext() {
- int32_t result = ubrk_following(mIcuBreaker.breaker.get(), mCurrent);
+ int32_t result = mIcuBreaker.breaker->following(mCurrent);
while (!isValidBreak(mText, mTextSize, result)) {
- result = ubrk_next(mIcuBreaker.breaker.get());
+ result = mIcuBreaker.breaker->next();
}
return result;
}
@@ -221,11 +253,11 @@ void WordBreaker::detectEmailOrUrl() {
}
}
if (state == SAW_AT || state == SAW_COLON_SLASH_SLASH) {
- if (!ubrk_isBoundary(mIcuBreaker.breaker.get(), i)) {
+ if (!mIcuBreaker.breaker->isBoundary(i)) {
// If there are combining marks or such at the end of the URL or the email address,
// consider them a part of the URL or the email, and skip to the next actual
// boundary.
- i = ubrk_following(mIcuBreaker.breaker.get(), i);
+ i = mIcuBreaker.breaker->following(i);
}
mInEmailOrUrl = true;
} else {