Backport recent changes for horizontal offset optimization

Test: Test: m -j robolectric_native_runtime dist Change-Id: I8d958b019722bd210f82dfa6d31803fc7b234f4f
author: Michael Hoisie <hoisie@google.com> 2022-12-08 00:27:50 +0000
committer: Michael Hoisie <hoisie@google.com> 2022-12-08 00:32:35 +0000
commit: 8f1bf2164264f246ffa9bb781088e3554094980b (patch)
tree: d0d2754ee1b3b42253d33b9ccaf70e344a0a261c
parent: 849ba8d3e4c3c8a6a4f907e6e47adf0fc6748325 (diff)
download: minikin-8f1bf2164264f246ffa9bb781088e3554094980b.tar.gz
3 files changed, 164 insertions, 2 deletions
diff --git a/include/minikin/Measurement.h b/include/minikin/Measurement.h
index 76f3701..ef0b6a5 100644
--- a/include/minikin/Measurement.h
+++ b/include/minikin/Measurement.h
@@ -28,6 +28,8 @@ namespace minikin {
 float getRunAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count,
                     size_t offset);
 
+void distributeAdvances(float* advances, const uint16_t* buf, size_t start, size_t count);
+
 size_t getOffsetForAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count,
                            float advance);
 
diff --git a/libs/minikin/Measurement.cpp b/libs/minikin/Measurement.cpp
index 093dba8..968ab6f 100644
--- a/libs/minikin/Measurement.cpp
+++ b/libs/minikin/Measurement.cpp
@@ -24,11 +24,33 @@
 #include "minikin/BoundsCache.h"
 #include "minikin/GraphemeBreak.h"
 
+namespace {
+bool isAsciiOrBidiControlCharacter(uint16_t c) {
+    return (0x0000 <= c && c <= 0x001F)                  // ASCII control characters
+           || c == 0x061C || c == 0x200E || c == 0x200F  // BiDi control characters
+           || (0x202A <= c && c <= 0x202E) || (0x2066 <= c && c <= 0x2069);
+}
+
+}  // namespace
+
 namespace minikin {
 
 // These could be considered helper methods of layout, but need only be loosely coupled, so
 // are separate.
 
+/**
+ * Return the unsigned advance of the given offset from the run start.
+ *
+ * @param advances the computed advances of the characters in buf. The advance of
+ * the i-th character in buf is stored at index (i - layoutStart) in this array.
+ * @param buf the text stored in utf-16 format.
+ * @param layoutStart the start index of the character that is laid out.
+ * @param start the start index of the run.
+ * @param count the number of the characters in this run.
+ * @param offset the target offset to compute the index. It should be in the
+ * range of [start, start + count).
+ * @return the unsigned advance from the run start to the given offset.
+ */
 static float getRunAdvance(const float* advances, const uint16_t* buf, size_t layoutStart,
                            size_t start, size_t count, size_t offset) {
     float advance = 0.0f;
@@ -42,13 +64,17 @@ static float getRunAdvance(const float* advances, const uint16_t* buf, size_t la
             clusterWidth = charAdvance;
         }
     }
-    if (offset < start + count && advances[offset - layoutStart] == 0.0f) {
+    if (offset < start + count && !isAsciiOrBidiControlCharacter(buf[offset]) &&
+        advances[offset - layoutStart] == 0.0f) {
         // In the middle of a cluster, distribute width of cluster so that each grapheme cluster
         // gets an equal share.
         // TODO: get caret information out of font when that's available
         size_t nextCluster;
         for (nextCluster = offset + 1; nextCluster < start + count; nextCluster++) {
-            if (advances[nextCluster - layoutStart] != 0.0f) break;
+            if (advances[nextCluster - layoutStart] != 0.0f ||
+                isAsciiOrBidiControlCharacter(buf[nextCluster])) {
+                break;
+            }
         }
         int numGraphemeClusters = 0;
         int numGraphemeClustersAfter = 0;
@@ -69,6 +95,50 @@ static float getRunAdvance(const float* advances, const uint16_t* buf, size_t la
     return advance;
 }
 
+/**
+ * Helper method that distribute the advance to ligature characters.
+ * When ligature is applied, the first character in the ligature is assigned with the entire width.
+ * This method will evenly distribute the advance to each grapheme in the ligature.
+ *
+ * @param advances the computed advances of the characters in buf. The advance of
+ * the i-th character in buf is stored at index (i - start) in this array. This
+ * method will update this array so that advances is distributed evenly for
+ * ligature characters.
+ * @param buf the text stored in utf-16 format.
+ * @param start the start index of the run.
+ * @param count the number of the characters in this run.
+ */
+void distributeAdvances(float* advances, const uint16_t* buf, size_t start, size_t count) {
+    size_t clusterStart = start;
+    while (clusterStart < start + count) {
+        float clusterAdvance = advances[clusterStart - start];
+        size_t clusterEnd;
+        for (clusterEnd = clusterStart + 1; clusterEnd < start + count; clusterEnd++) {
+            if (advances[clusterEnd - start] != 0.0f ||
+                isAsciiOrBidiControlCharacter(buf[clusterEnd])) {
+                break;
+            }
+        }
+        size_t numGraphemeClusters = 0;
+        for (size_t i = clusterStart; i < clusterEnd; i++) {
+            if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) {
+                numGraphemeClusters++;
+            }
+        }
+        // When there are more than one grapheme in this cluster, ligature is applied.
+        // And we will distribute the width to each grapheme.
+        if (numGraphemeClusters > 1) {
+            for (size_t i = clusterStart; i < clusterEnd; ++i) {
+                if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) {
+                    // Only distribute the advance to the first character of the cluster.
+                    advances[i - start] = clusterAdvance / numGraphemeClusters;
+                }
+            }
+        }
+        clusterStart = clusterEnd;
+    }
+}
+
 float getRunAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count,
                     size_t offset) {
     return getRunAdvance(advances, buf, start, start, count, offset);
diff --git a/tests/unittest/MeasurementTests.cpp b/tests/unittest/MeasurementTests.cpp
index b5a85c7..ff36f67 100644
--- a/tests/unittest/MeasurementTests.cpp
+++ b/tests/unittest/MeasurementTests.cpp
@@ -31,6 +31,15 @@ float getAdvance(const float* advances, const char* src) {
     return getRunAdvance(advances, buf, 0, size, offset);
 }
 
+void distributeAdvances(float* advances, const char* src, int count) {
+    const size_t BUF_SIZE = 256;
+    uint16_t buf[BUF_SIZE];
+    size_t offset;
+    size_t size;
+    ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
+    distributeAdvances(advances, buf, offset, count);
+}
+
 // Latin fi
 TEST(Measurement, getRunAdvance_fi) {
     const float unligated[] = {30.0, 20.0};
@@ -44,6 +53,22 @@ TEST(Measurement, getRunAdvance_fi) {
     EXPECT_EQ(40.0, getAdvance(ligated, "'f' 'i' |"));
 }
 
+TEST(Measurement, getRunAdvance_control_characters) {
+    const float unligated[] = {30.0, 20.0, 0.0, 0.0};
+    EXPECT_EQ(0.0, getAdvance(unligated, "| 'f' 'i' U+2066 U+202C"));
+    EXPECT_EQ(30.0, getAdvance(unligated, "'f' | 'i' U+2066 U+202C"));
+    EXPECT_EQ(50.0, getAdvance(unligated, "'f' 'i' | U+2066 U+202C"));
+    EXPECT_EQ(50.0, getAdvance(unligated, "'f' 'i' U+2066 | U+202C"));
+    EXPECT_EQ(50.0, getAdvance(unligated, "'f' 'i' U+2066 U+202C |"));
+
+    const float liagated[] = {40.0, 0.0, 0.0, 0.0};
+    EXPECT_EQ(0.0, getAdvance(liagated, "| 'f' 'i' U+2066 U+202C"));
+    EXPECT_EQ(20.0, getAdvance(liagated, "'f' | 'i' U+2066 U+202C"));
+    EXPECT_EQ(40.0, getAdvance(liagated, "'f' 'i' | U+2066 U+202C"));
+    EXPECT_EQ(40.0, getAdvance(liagated, "'f' 'i' U+2066 | U+202C"));
+    EXPECT_EQ(40.0, getAdvance(liagated, "'f' 'i' U+2066 U+202C |"));
+}
+
 // Devanagari ka+virama+ka
 TEST(Measurement, getRunAdvance_kka) {
     const float unligated[] = {30.0, 0.0, 30.0};
@@ -59,4 +84,69 @@ TEST(Measurement, getRunAdvance_kka) {
     EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 U+094D U+0915 |"));
 }
 
+TEST(Measurement, distributeAdvances_fi) {
+    float ligated[] = {20.0, 0.0};
+    distributeAdvances(ligated, "| 'f' 'i' ", 2);
+    EXPECT_EQ(ligated[0], 10.0);
+    EXPECT_EQ(ligated[1], 10.0);
+}
+
+TEST(Measurement, distributeAdvances_non_zero_start) {
+    // Note that advance[i] corresponding to (i + start)-th character.
+    float ligated[] = {20.0, 0.0};
+    distributeAdvances(ligated, "'a' 'b' | 'f' 'i' ", 2);
+    EXPECT_EQ(ligated[0], 10.0);
+    EXPECT_EQ(ligated[1], 10.0);
+}
+
+TEST(Measurement, distributeAdvances_non_zero_start_with_control_characters) {
+    // Note that advance[i] corresponding to (i + start)-th character.
+    float ligated[] = {20.0, 0.0, 0.0, 0.0};
+    distributeAdvances(ligated, "'a' U+2066 | 'f' 'i' U+2066 U+202C", 4);
+    EXPECT_EQ(ligated[0], 10.0);
+    EXPECT_EQ(ligated[1], 10.0);
+    EXPECT_EQ(ligated[2], 0.0);
+    EXPECT_EQ(ligated[3], 0.0);
+}
+
+TEST(Measurement, distributeAdvances_with_count) {
+    // Note that advance[i] corresponding to (i + start)-th character.
+    float ligated[] = {20.0, 0.0, 30.0, 0.0};
+    distributeAdvances(ligated, "'a' 'b' | 'f' 'i' 'f' 'i' ", 2);
+    EXPECT_EQ(ligated[0], 10.0);
+    EXPECT_EQ(ligated[1], 10.0);
+    // Count is 2, so it won't change the rest of the array.
+    EXPECT_EQ(ligated[2], 30.0);
+    EXPECT_EQ(ligated[3], 0.0);
+}
+
+TEST(Measurement, distributeAdvances_control_characters) {
+    float ligated[] = {20.0, 0.0, 0.0, 0.0};
+    distributeAdvances(ligated, "| 'f' 'i' U+2066 U+202C", 4);
+    EXPECT_EQ(ligated[0], 10.0);
+    EXPECT_EQ(ligated[1], 10.0);
+    EXPECT_EQ(ligated[2], 0.0);
+    EXPECT_EQ(ligated[3], 0.0);
+}
+
+TEST(Measurement, distributeAdvances_surrogate) {
+    float advances[] = {20.0, 0.0, 0.0, 0.0};
+    distributeAdvances(advances, "| U+D83D U+DE00 U+2066 U+202C", 4);
+    EXPECT_EQ(advances[0], 20.0);
+    EXPECT_EQ(advances[1], 0.0);
+    EXPECT_EQ(advances[2], 0.0);
+    EXPECT_EQ(advances[3], 0.0);
+}
+
+TEST(Measurement, distributeAdvances_surrogate_in_ligature) {
+    // If a ligature contains surrogates, advances is assigned to the first
+    // character in surrogate.
+    float ligated[] = {40.0, 0.0, 0.0, 0.0};
+    distributeAdvances(ligated, "| U+D83D U+DE00 U+D83D U+DE01", 4);
+    EXPECT_EQ(ligated[0], 20.0);
+    EXPECT_EQ(ligated[1], 0.0);
+    EXPECT_EQ(ligated[2], 20.0);
+    EXPECT_EQ(ligated[3], 0.0);
+}
+
 }  // namespace minikin
author	Michael Hoisie <hoisie@google.com>	2022-12-08 00:27:50 +0000
committer	Michael Hoisie <hoisie@google.com>	2022-12-08 00:32:35 +0000
commit	8f1bf2164264f246ffa9bb781088e3554094980b (patch)
tree	d0d2754ee1b3b42253d33b9ccaf70e344a0a261c
parent	849ba8d3e4c3c8a6a4f907e6e47adf0fc6748325 (diff)
download	minikin-8f1bf2164264f246ffa9bb781088e3554094980b.tar.gz