summaryrefslogtreecommitdiff
path: root/libs/minikin/LineBreakerUtil.h
blob: 5764c5e03225961f0410e47f69ecad7f518ae154 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef MINIKIN_LINE_BREAKER_UTIL_H
#define MINIKIN_LINE_BREAKER_UTIL_H

#include <vector>

#include "minikin/Hyphenator.h"
#include "minikin/MeasuredText.h"
#include "minikin/U16StringPiece.h"

#include "HyphenatorMap.h"
#include "LayoutUtils.h"
#include "Locale.h"
#include "LocaleListCache.h"
#include "MinikinInternal.h"
#include "WordBreaker.h"

namespace minikin {

// ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large
// paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used
// extensively on the Java side for this. This is a typedef so that we can easily change it based
// on performance/accuracy tradeoff.
typedef float ParaWidth;

// Hyphenates a string potentially containing non-breaking spaces.
std::vector<HyphenationType> hyphenate(const U16StringPiece& string, const Hyphenator& hypenator);

// This function determines whether a character is a space that disappears at end of line.
// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'.
// Note: all such characters are in the BMP, so it's ok to use code units for this.
inline bool isLineEndSpace(uint16_t c) {
    return c == '\n' || c == ' '                           // SPACE
           || c == 0x1680                                  // OGHAM SPACE MARK
           || (0x2000 <= c && c <= 0x200A && c != 0x2007)  // EN QUAD, EM QUAD, EN SPACE, EM SPACE,
           // THREE-PER-EM SPACE, FOUR-PER-EM SPACE,
           // SIX-PER-EM SPACE, PUNCTUATION SPACE,
           // THIN SPACE, HAIR SPACE
           || c == 0x205F  // MEDIUM MATHEMATICAL SPACE
           || c == 0x3000;
}

inline Locale getEffectiveLocale(uint32_t localeListId) {
    const LocaleList& localeList = LocaleListCache::getById(localeListId);
    return localeList.empty() ? Locale() : localeList[0];
}

// Retrieves hyphenation break points from a word.
inline void populateHyphenationPoints(
        const U16StringPiece& textBuf,         // A text buffer.
        const Run& run,                        // A run of this region.
        const Hyphenator& hyphenator,          // A hyphenator to be used for hyphenation.
        const Range& contextRange,             // A context range for measuring hyphenated piece.
        const Range& hyphenationTargetRange,   // An actual range for the hyphenation target.
        const std::vector<float>& charWidths,  // Char width used for hyphen piece estimation.
        bool ignoreKerning,                    // True use full shaping for hyphenation piece.
        std::vector<HyphenBreak>* out,         // An output to be appended.
        LayoutPieces* pieces) {                // An output of layout pieces. Maybe null.
    if (!run.getRange().contains(contextRange) || !contextRange.contains(hyphenationTargetRange)) {
        return;
    }

    const std::vector<HyphenationType> hyphenResult =
            hyphenate(textBuf.substr(hyphenationTargetRange), hyphenator);
    for (uint32_t i = hyphenationTargetRange.getStart(); i < hyphenationTargetRange.getEnd(); ++i) {
        const HyphenationType hyph = hyphenResult[hyphenationTargetRange.toRangeOffset(i)];
        if (hyph == HyphenationType::DONT_BREAK) {
            continue;  // Not a hyphenation point.
        }

        if (!ignoreKerning) {
            auto hyphenPart = contextRange.split(i);
            U16StringPiece firstText = textBuf.substr(hyphenPart.first);
            U16StringPiece secondText = textBuf.substr(hyphenPart.second);
            const float first =
                    run.measureHyphenPiece(firstText, Range(0, firstText.size()),
                                           StartHyphenEdit::NO_EDIT /* start hyphen edit */,
                                           editForThisLine(hyph) /* end hyphen edit */, pieces);
            const float second =
                    run.measureHyphenPiece(secondText, Range(0, secondText.size()),
                                           editForNextLine(hyph) /* start hyphen edit */,
                                           EndHyphenEdit::NO_EDIT /* end hyphen edit */, pieces);

            out->emplace_back(i, hyph, first, second);
        } else {
            float first = 0;
            float second = 0;
            for (uint32_t j = contextRange.getStart(); j < i; ++j) {
                first += charWidths[j];
            }
            for (uint32_t j = i; j < contextRange.getEnd(); ++j) {
                second += charWidths[j];
            }

            EndHyphenEdit endEdit = editForThisLine(hyph);
            StartHyphenEdit startEdit = editForNextLine(hyph);

            if (endEdit != EndHyphenEdit::NO_EDIT) {
                auto [str, strSize] = getHyphenString(endEdit);
                first += run.measureText(U16StringPiece(str, strSize));
            }

            if (startEdit != StartHyphenEdit::NO_EDIT) {
                auto [str, strSize] = getHyphenString(startEdit);
                second += run.measureText(U16StringPiece(str, strSize));
            }

            out->emplace_back(i, hyph, first, second);
        }
    }
}

// Processes and retrieve informations from characters in the paragraph.
struct CharProcessor {
    // The number of spaces.
    uint32_t rawSpaceCount = 0;

    // The number of spaces minus trailing spaces.
    uint32_t effectiveSpaceCount = 0;

    // The sum of character width from the paragraph start.
    ParaWidth sumOfCharWidths = 0.0;

    // The sum of character width from the paragraph start minus trailing line end spaces.
    // This means that the line width from the paragraph start if we decided break now.
    ParaWidth effectiveWidth = 0.0;

    // The total amount of character widths at the previous word break point.
    ParaWidth sumOfCharWidthsAtPrevWordBreak = 0.0;

    // The next word break offset.
    uint32_t nextWordBreak = 0;

    // The previous word break offset.
    uint32_t prevWordBreak = 0;

    // The width of a space. May be 0 if there are no spaces.
    // Note: if there are multiple different widths for spaces (for example, because of mixing of
    // fonts), it's only guaranteed to pick one.
    float spaceWidth = 0.0f;

    // The current hyphenator.
    const Hyphenator* hyphenator = nullptr;

    // Retrieve the current word range.
    inline Range wordRange() const { return breaker.wordRange(); }

    // Retrieve the current context range.
    inline Range contextRange() const { return Range(prevWordBreak, nextWordBreak); }

    // Returns the width from the last word break point.
    inline ParaWidth widthFromLastWordBreak() const {
        return effectiveWidth - sumOfCharWidthsAtPrevWordBreak;
    }

    // Returns the break penalty for the current word break point.
    inline int wordBreakPenalty() const { return breaker.breakBadness(); }

    CharProcessor(const U16StringPiece& text) { breaker.setText(text.data(), text.size()); }

    // The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one
    // time before feeding characters.
    void updateLocaleIfNecessary(const Run& run) {
        uint32_t newLocaleListId = run.getLocaleListId();
        if (localeListId != newLocaleListId) {
            Locale locale = getEffectiveLocale(newLocaleListId);
            nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(),
                                                        run.lineBreakWordStyle(),
                                                        run.getRange().getStart());
            hyphenator = HyphenatorMap::lookup(locale);
            localeListId = newLocaleListId;
        }
    }

    // Process one character.
    void feedChar(uint32_t idx, uint16_t c, float w, bool canBreakHere) {
        if (idx == nextWordBreak) {
            if (canBreakHere) {
                prevWordBreak = nextWordBreak;
                sumOfCharWidthsAtPrevWordBreak = sumOfCharWidths;
            }
            nextWordBreak = breaker.next();
        }
        if (isWordSpace(c)) {
            rawSpaceCount += 1;
            spaceWidth = w;
        }
        sumOfCharWidths += w;
        if (isLineEndSpace(c)) {
            // If we break a line on a line-ending space, that space goes away. So postBreak
            // and postSpaceCount, which keep the width and number of spaces if we decide to
            // break at this point, don't need to get adjusted.
        } else {
            effectiveSpaceCount = rawSpaceCount;
            effectiveWidth = sumOfCharWidths;
        }
    }

private:
    // The current locale list id.
    uint32_t localeListId = LocaleListCache::kInvalidListId;

    WordBreaker breaker;
};
}  // namespace minikin

#endif  // MINIKIN_LINE_BREAKER_UTIL_H