diff options
author | Sean McQuillan <seanmcq@google.com> | 2023-01-17 19:48:43 +0000 |
---|---|---|
committer | Sean McQuillan <seanmcq@google.com> | 2023-01-17 11:50:40 -0800 |
commit | eab2d36d9dd112783754cb3e1e3cebf174c5a061 (patch) | |
tree | c95e6ede1fd0593576b9635a714b8a8b267bc8bd | |
parent | 5f3e077f850825f78d7d8d15c56a270b6afc7933 (diff) | |
download | noto-fonts-eab2d36d9dd112783754cb3e1e3cebf174c5a061.tar.gz |
Revert "Revert "Bump emojicompat bundled font to 15""
This reverts commit 51bffe5fa6cef4a8d6acebae1561498f53948145.
Reason for revert: New font with fix
Change-Id: I0ef5397066d338a81fcaa4793da54da11681b438
-rw-r--r-- | emoji-compat/README.android | 19 | ||||
-rwxr-xr-x | emoji-compat/createfont.py | 785 | ||||
-rw-r--r-- | emoji-compat/data/emoji_metadata.txt | 31 | ||||
-rwxr-xr-x | emoji-compat/fetch.sh | 118 | ||||
-rw-r--r-- | emoji-compat/font/NotoColorEmojiCompat.ttf | bin | 10043088 -> 10530840 bytes | |||
-rw-r--r-- | emoji-compat/supported-emojis/emojis.txt | 31 |
6 files changed, 190 insertions, 794 deletions
diff --git a/emoji-compat/README.android b/emoji-compat/README.android index 07cabe1..908ee0c 100644 --- a/emoji-compat/README.android +++ b/emoji-compat/README.android @@ -5,16 +5,17 @@ License: Unicode License File: LICENSE_UNICODE Description: -Noto Color Emoji Compat font is generated using Noto Color Emoji font using createfont.py. The -compat font is under font/ directory. +Noto Color Emoji Compat font is generated using Noto Color Emoji font using +<a href="https://github.com/googlefonts/emojicompat">github.com/googlefonts/emojicompat</a> -While generating the compat font, Noto Color Emoji font and data files from Unicode are used. +Canonical source of truth for fonts is -data/emoji-metadata.txt is updated using the Noto Color Emoji font and data files from -Unicode. +* <a href="http://github.com/googlefonts/noto-emoji">github.com/googlefonts/noto-emoji</a> -supported-emojis/emojis.txt file contains list of emojis that are supported by the font. Main -purpose is testing. It is generated using the Unicode files. +However, we do not pull down that entire project as it would increase repo size. -Noto Color Emoji font is under the <android_source>/external/noto-fonts/emoji/ directory. Unicode -files are under the <android_source>/external/unicode/ directory.
\ No newline at end of file +To pull in a new font please update + +* font/NotoColorEmoji.txt +* data/emoji-metadata.txt +* supported-emojis.emojis.txt diff --git a/emoji-compat/createfont.py b/emoji-compat/createfont.py deleted file mode 100755 index f694cf2..0000000 --- a/emoji-compat/createfont.py +++ /dev/null @@ -1,785 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (C) 2017 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format -under a meta tag with name 'Emji'. - -In order to create the final font the followings are used as inputs: - -- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at -external/noto-fonts/emoji/NotoColorEmoji.ttf - -- Unicode files: Unicode files that are in the framework, and lists information about all the -emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt, -and emoji-variation-sequences.txt. Currently at external/unicode/. - -- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are -in the Android font. Resides in framework and currently under external/unicode/. - -- data/emoji_metadata.txt: The file that includes the id, codepoints, the first -Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font -version that the emoji was added (compatAdded). Updated when the script is executed. - -- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/. - -After execution the following files are generated if they don't exist otherwise, they are updated: -- font/NotoColorEmojiCompat.ttf -- supported-emojis/emojis.txt -- data/emoji_metadata.txt -- src/java/android/support/text/emoji/flatbuffer/* -""" - -import contextlib -import csv -import hashlib -import itertools -import json -import os -import re -import shutil -import subprocess -import sys -import tempfile -from fontTools import ttLib -from fontTools.ttLib.tables import otTables -from nototools import font_data - -########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ########### -# Last Android SDK Version -SDK_VERSION = 31 -# metadata version that will be embedded into font. If there are updates to the font that would -# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number -# defines in which EmojiCompat metadata version the emoji is added to the font. -METADATA_VERSION = 8 - -####### main directories where output files are created ####### -SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) -FONT_DIR = os.path.join(SCRIPT_DIR, 'font') -DATA_DIR = os.path.join(SCRIPT_DIR, 'data') -SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis') -JAVA_SRC_DIR = os.path.join('src', 'java') -####### output files ####### -# font file -FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf') -# emoji metadata json output file -OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt') -# emojis test file -TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt') -####### input files ####### -# Unicode file names to read emoji data -EMOJI_DATA_FILE = 'emoji-data.txt' -EMOJI_SEQ_FILE = 'emoji-sequences.txt' -EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt' -EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt' -# Android OS emoji file for emojis that are not in Unicode files -ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt') -ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt') -# Android OS emoji style override file. Codepoints that are rendered with emoji style by default -# even though not defined so in <code>emoji-data.txt</code>. -EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt') -# emoji metadata file -INPUT_META_FILE = OUTPUT_META_FILE -# default flatbuffer module location (if not specified by caller) -FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers') -# flatbuffer schema -FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs') -# file path for java header, it will be prepended to flatbuffer java files -FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt') -# temporary emoji metadata json output file -OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json' -# temporary binary file generated by flatbuffer -FLATBUFFER_BIN = 'emoji_metadata.bin' -# directory representation for flatbuffer java package -FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '') -# temporary directory that contains flatbuffer java files -FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH) -FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java" -FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java" -# directory under source where flatbuffer java files will be copied into -FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH) -# meta tag name used in the font to embed the emoji metadata. This value is also used in -# MetadataListReader.java in order to locate the metadata location. -EMOJI_META_TAG_NAME = 'Emji' - -EMOJI_STR = 'EMOJI' -EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION' -ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR] -STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE' - -DEFAULT_EMOJI_ID = 0xF0001 -EMOJI_STYLE_VS = 0xFE0F - -# The reference code point to be used for filling metrics of wartermark glyph -WATERMARK_REF_CODE_POINT = 0x1F600 -# The code point and glyph name used for watermark. -WATERMARK_NEW_CODE_POINT = 0x10FF00 -WATERMARK_NEW_GLYPH_ID = 'u10FF00' - -def to_hex_str(value): - """Converts given int value to hex without the 0x prefix""" - return format(value, 'X') - -def hex_str_to_int(string): - """Convert a hex string into int""" - return int(string, 16) - -def codepoint_to_string(codepoints): - """Converts a list of codepoints into a string separated with space.""" - return ' '.join([to_hex_str(x) for x in codepoints]) - -def prepend_header_to_file(file_path, header_path): - """Prepends the header to the file. Used to update flatbuffer java files with header, comments - and annotations.""" - with open(file_path, "r+") as original_file: - with open(header_path, "r") as copyright_file: - original_content = original_file.read() - original_file.seek(0) - original_file.write(copyright_file.read() + "\n" + original_content) - -def is_ri(codepoint): - return 0x1F1E6 <= codepoint and codepoint <= 0x1F1FF - -def is_flag_seq(codepoints): - return all(is_ri(x) for x in codepoints) - - -def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir): - """Prepends headers to flatbuffer java files and copies to the final destination""" - tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA - tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA - prepend_header_to_file(tmp_metadata_list, header_dir) - prepend_header_to_file(tmp_metadata_item, header_dir) - - if not os.path.exists(target_dir): - os.makedirs(target_dir) - - shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA)) - shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA)) - -def create_test_data(unicode_path): - """Read all the emojis in the unicode files and update the test file""" - lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE)) - lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE)) - - lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True) - lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True) - - # standardized variants contains a huge list of sequences, only read the ones that are emojis - # and also the ones with FE0F (emoji style) - standardized_variants_lines = read_emoji_lines( - os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE)) - for line in standardized_variants_lines: - if STD_VARIANTS_EMOJI_STYLE in line: - lines.append(line) - - emojis_set = set() - for line in lines: - # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them - # here since we are already checking the emoji presentations with - # emoji-variation-sequences.txt. - if "BASIC_EMOJI" in line: - continue - codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] - emojis_set.add(codepoint_to_string(codepoints).upper()) - - emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE)) - for line in emoji_data_lines: - codepoints_range, emoji_property = codepoints_and_emoji_prop(line) - if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: - continue - is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR - if is_emoji_style: - codepoints = [to_hex_str(x) for x in - codepoints_for_emojirange(codepoints_range)] - emojis_set.update(codepoints) - - emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) - # finally add the android default emoji exceptions - emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions]) - - emojis_list = list(emojis_set) - emojis_list.sort() - with open(TEST_DATA_PATH, "w") as test_file: - for line in emojis_list: - test_file.write("%s\n" % line) - -class _EmojiData(object): - """Holds the information about a single emoji.""" - - def __init__(self, codepoints, is_emoji_style): - self.codepoints = codepoints - self.emoji_style = is_emoji_style - self.emoji_id = 0 - self.width = 0 - self.height = 0 - self.sdk_added = SDK_VERSION - self.compat_added = METADATA_VERSION - - def update_metrics(self, metrics): - """Updates width/height instance variables with the values given in metrics dictionary. - :param metrics: a dictionary object that has width and height values. - """ - self.width = metrics.width - self.height = metrics.height - - def __repr__(self): - return '<EmojiData {0} - {1}>'.format(self.emoji_style, - codepoint_to_string(self.codepoints)) - - def create_json_element(self): - """Creates the json representation of EmojiData.""" - json_element = {} - json_element['id'] = self.emoji_id - json_element['emojiStyle'] = self.emoji_style - json_element['sdkAdded'] = self.sdk_added - json_element['compatAdded'] = self.compat_added - json_element['width'] = self.width - json_element['height'] = self.height - json_element['codepoints'] = self.codepoints - return json_element - - def create_txt_row(self): - """Creates array of values for CSV of EmojiData.""" - row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added] - row += [to_hex_str(x) for x in self.codepoints] - return row - - def update(self, emoji_id, sdk_added, compat_added): - """Updates current EmojiData with the values in a json element""" - self.emoji_id = emoji_id - self.sdk_added = sdk_added - self.compat_added = compat_added - - -def read_emoji_lines(file_path, optional=False): - """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty - lines and comments - :param file_path: unicode emoji file path - :param optional: if True no exception is raised when the file cannot be read - :return: list of uppercase strings - """ - result = [] - try: - with open(file_path) as file_stream: - for line in file_stream: - line = line.strip() - if line and not line.startswith('#'): - result.append(line.upper()) - except IOError: - if optional: - pass - else: - raise - - return result - -def get_emoji_style_exceptions(unicode_path): - """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers""" - lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE)) - exceptions = [] - for line in lines: - codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0]) - exceptions.append(codepoint) - return exceptions - -def codepoints_for_emojirange(codepoints_range): - """ Return codepoints given in emoji files. Expand the codepoints that are given as a range - such as XYZ ... UVT - """ - codepoints = [] - if '..' in codepoints_range: - range_start, range_end = codepoints_range.split('..') - codepoints_range = range(hex_str_to_int(range_start), - hex_str_to_int(range_end) + 1) - codepoints.extend(codepoints_range) - else: - codepoints.append(hex_str_to_int(codepoints_range)) - return codepoints - -def codepoints_and_emoji_prop(line): - """For a given emoji file line, return codepoints and emoji property in the line. - 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component - |Extended_Pictographic] # [...]""" - line = line.strip() - if '#' in line: - line = line[:line.index('#')] - else: - raise ValueError("Line is expected to have # in it") - line = line.split(';') - codepoints_range = line[0].strip() - emoji_property = line[1].strip() - - return codepoints_range, emoji_property - -def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions): - """Read unicode lines of unicode emoji file in which each line describes a set of codepoint - intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map. - A line format that is expected is as follows: - 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component - |Extended_Pictographic] # [...]""" - lines = read_emoji_lines(file_path) - - for line in lines: - codepoints_range, emoji_property = codepoints_and_emoji_prop(line) - if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: - continue - is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR - codepoints = codepoints_for_emojirange(codepoints_range) - - for codepoint in codepoints: - key = codepoint_to_string([codepoint]) - codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions - if key in emoji_data_map: - # since there are multiple definitions of emojis, only update when emoji style is - # True - if codepoint_is_emoji_style: - emoji_data_map[key].emoji_style = True - else: - emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style) - emoji_data_map[key] = emoji_data - - -def read_emoji_sequences(emoji_data_map, file_path, optional=False, filter=None): - """Reads the content of the file which contains emoji sequences. Creates EmojiData for each - line and puts into emoji_data_map.""" - lines = read_emoji_lines(file_path, optional) - # 1F1E6 1F1E8 ; Name ; [...] - for line in lines: - # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them - # here since we are already checking the emoji presentations with - # emoji-variation-sequences.txt. - if "BASIC_EMOJI" in line: - continue - codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] - codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS] - if filter: - if filter(codepoints): - continue - key = codepoint_to_string(codepoints) - if not key in emoji_data_map: - emoji_data = _EmojiData(codepoints, False) - emoji_data_map[key] = emoji_data - - -def load_emoji_data_map(unicode_path, without_flags): - """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData. - :return: map of space separated codepoints to EmojiData - """ - if without_flags: - filter = lambda x: is_flag_seq(x) - else: - filter = None - emoji_data_map = {} - emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) - read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE), - emoji_style_exceptions) - read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE)) - read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE), filter=filter) - - # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists. - read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), - optional=True) - # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists. - read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), - optional=True) - - return emoji_data_map - - -def load_previous_metadata(emoji_data_map): - """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields - in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest - emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not - exist, or contains no emojis defined returns DEFAULT_EMOJI_ID""" - current_emoji_id = DEFAULT_EMOJI_ID - if os.path.isfile(INPUT_META_FILE): - with open(INPUT_META_FILE) as csvfile: - reader = csv.reader(csvfile, delimiter=' ') - for row in reader: - if row[0].startswith('#'): - continue - emoji_id = hex_str_to_int(row[0]) - sdk_added = int(row[1]) - compat_added = int(row[2]) - key = codepoint_to_string(hex_str_to_int(x) for x in row[3:]) - if key in emoji_data_map: - emoji_data = emoji_data_map[key] - emoji_data.update(emoji_id, sdk_added, compat_added) - if emoji_data.emoji_id >= current_emoji_id: - current_emoji_id = emoji_data.emoji_id + 1 - - return current_emoji_id - - -def update_ttlib_orig_sort(): - """Updates the ttLib tag sort with a closure that makes the meta table first.""" - orig_sort = ttLib.sortedTagList - - def meta_first_table_sort(tag_list, table_order=None): - """Sorts the tables with the original ttLib sort, then makes the meta table first.""" - tag_list = orig_sort(tag_list, table_order) - tag_list.remove('meta') - tag_list.insert(0, 'meta') - return tag_list - - ttLib.sortedTagList = meta_first_table_sort - - -def inject_meta_into_font(ttf, flatbuffer_bin_filename): - """inject metadata binary into font""" - if not 'meta' in ttf: - ttf['meta'] = ttLib.getTableClass('meta')() - meta = ttf['meta'] - with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file: - meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read() - - # sort meta tables for faster access - update_ttlib_orig_sort() - - -def validate_input_files(font_path, unicode_path, flatbuffer_path): - """Validate the existence of font file and the unicode files""" - if not os.path.isfile(font_path): - raise ValueError("Font file does not exist: " + font_path) - - if not os.path.isdir(unicode_path): - raise ValueError( - "Unicode directory does not exist or is not a directory " + unicode_path) - - emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE), - os.path.join(unicode_path, EMOJI_ZWJ_FILE), - os.path.join(unicode_path, EMOJI_SEQ_FILE)] - for emoji_filename in emoji_filenames: - if not os.path.isfile(emoji_filename): - raise ValueError("Unicode emoji data file does not exist: " + emoji_filename) - - if not os.path.isdir(flatbuffer_path): - raise ValueError( - "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path) - - flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA), - os.path.join(flatbuffer_path, FLATBUFFER_HEADER)] - for flatbuffer_filename in flatbuffer_filenames: - if not os.path.isfile(flatbuffer_filename): - raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename) - - -def add_file_to_sha(sha_algo, file_path): - with open(file_path, 'rb') as input_file: - for data in iter(lambda: input_file.read(8192), b''): - sha_algo.update(data) - -def create_sha_from_source_files(font_paths): - """Creates a SHA from the given font files""" - sha_algo = hashlib.sha256() - for file_path in font_paths: - add_file_to_sha(sha_algo, file_path) - return sha_algo.hexdigest() - - -class EmojiFontCreator(object): - """Creates the EmojiCompat font""" - - def __init__(self, font_path, unicode_path, without_flags): - validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR) - - self.font_path = font_path - self.unicode_path = unicode_path - self.without_flags = without_flags - self.emoji_data_map = {} - self.remapped_codepoints = {} - self.glyph_to_image_metrics_map = {} - # set default emoji id to start of Supplemental Private Use Area-A - self.emoji_id = DEFAULT_EMOJI_ID - - def update_emoji_data(self, codepoints, glyph_name): - """Updates the existing EmojiData identified with codepoints. The fields that are set are: - - emoji_id (if it does not exist) - - image width/height""" - key = codepoint_to_string(codepoints) - if key in self.emoji_data_map: - # add emoji to final data - emoji_data = self.emoji_data_map[key] - emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name]) - if emoji_data.emoji_id == 0: - emoji_data.emoji_id = self.emoji_id - self.emoji_id = self.emoji_id + 1 - self.remapped_codepoints[emoji_data.emoji_id] = glyph_name - - def read_cbdt(self, ttf): - """Read image size data from CBDT.""" - cbdt = ttf['CBDT'] - for strike_data in cbdt.strikeData: - for key, data in strike_data.items(): - data.decompile() - self.glyph_to_image_metrics_map[key] = data.metrics - - def read_cmap12(self, ttf, glyph_to_codepoint_map): - """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and - finally clears all elements in CMAP 12""" - cmap = ttf['cmap'] - for table in cmap.tables: - if table.format == 12 and table.platformID == 3 and table.platEncID == 10: - for codepoint, glyph_name in table.cmap.items(): - glyph_to_codepoint_map[glyph_name] = codepoint - self.update_emoji_data([codepoint], glyph_name) - return table - raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10") - - def read_gsub(self, ttf, glyph_to_codepoint_map): - """Reads the emoji sequences defined in GSUB and clear all elements under GSUB""" - gsub = ttf['GSUB'] - ligature_subtables = [] - context_subtables = [] - # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat - # and would be expensive with little value - for lookup in gsub.table.LookupList.Lookup: - for subtable in lookup.SubTable: - if subtable.LookupType == 5: - context_subtables.append(subtable) - elif subtable.LookupType == 4: - ligature_subtables.append(subtable) - - for subtable in context_subtables: - self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map) - - for subtable in ligature_subtables: - self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map) - - def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map): - """Add substitutions defined as OpenType Context Substitution""" - for sub_class_set in subtable.SubClassSet: - if sub_class_set: - for sub_class_rule in sub_class_set.SubClassRule: - # prepare holder for substitution list. each rule will have a list that is added - # to the subs_list. - subs_list = len(sub_class_rule.SubstLookupRecord) * [None] - for record in sub_class_rule.SubstLookupRecord: - subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list, - record.LookupListIndex) - # create combinations or all lists. the combinations will be filtered by - # emoji_data_map. the first element that contain as a valid glyph will be used - # as the final glyph - combinations = list(itertools.product(*subs_list)) - for seq in combinations: - glyph_names = [x["input"] for x in seq] - codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] - outputs = [x["output"] for x in seq if x["output"]] - nonempty_outputs = list(filter(lambda x: x.strip() , outputs)) - if len(nonempty_outputs) == 0: - print("Warning: no output glyph is set for " + str(glyph_names)) - continue - elif len(nonempty_outputs) > 1: - print( - "Warning: multiple glyph is set for " - + str(glyph_names) + ", will use the first one") - - glyph = nonempty_outputs[0] - self.update_emoji_data(codepoints, glyph) - - def get_substitutions(self, lookup_list, index): - result = [] - for x in lookup_list.Lookup[index].SubTable: - for input, output in x.mapping.items(): - result.append({"input": input, "output": output}) - return result - - def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map): - for name, ligatures in subtable.ligatures.items(): - for ligature in ligatures: - glyph_names = [name] + ligature.Component - codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] - self.update_emoji_data(codepoints, ligature.LigGlyph) - - def write_metadata_json(self, output_json_file_path): - """Writes the emojis into a json file""" - output_json = {} - output_json['version'] = METADATA_VERSION - output_json['sourceSha'] = create_sha_from_source_files( - [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA]) - output_json['list'] = [] - - emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) - - total_emoji_count = 0 - for emoji_data in emoji_data_list: - if self.without_flags and is_flag_seq(emoji_data.codepoints): - continue # Do not add flags emoji data if this is for subset font. - element = emoji_data.create_json_element() - output_json['list'].append(element) - total_emoji_count = total_emoji_count + 1 - - # write the new json file to be processed by FlatBuffers - with open(output_json_file_path, 'w') as json_file: - print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')), - file=json_file) - - return total_emoji_count - - def write_metadata_csv(self): - """Writes emoji metadata into space separated file""" - with open(OUTPUT_META_FILE, 'w') as csvfile: - csvwriter = csv.writer(csvfile, delimiter=' ') - emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) - csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints']) - for emoji_data in emoji_data_list: - csvwriter.writerow(emoji_data.create_txt_row()) - - def add_watermark(self, ttf): - cmap = ttf.getBestCmap() - gsub = ttf['GSUB'].table - - # Obtain Version string - m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf)) - if not m: - raise ValueError('The font does not have proper version string.') - major = m.group(1) - minor = m.group(2) - # Replace the dot with space since NotoColorEmoji does not have glyph for dot. - glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)] - - # Update Glyph metrics - ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID) - refGlyphId = cmap[WATERMARK_REF_CODE_POINT] - ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId] - ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId] - - # Add new Glyph to cmap - font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID }) - - # Add lookup table for the version string. - lookups = gsub.LookupList.Lookup - new_lookup = otTables.Lookup() - new_lookup.LookupType = 2 # Multiple Substitution Subtable. - new_lookup.LookupFlag = 0 - new_subtable = otTables.MultipleSubst() - new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) } - new_lookup.SubTable = [ new_subtable ] - new_lookup_index = len(lookups) - lookups.append(new_lookup) - - # Add feature - feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp') - if not feature: - raise ValueError("Font doesn't contain ccmp feature.") - - feature.Feature.LookupListIndex.append(new_lookup_index) - - def create_font(self): - """Creates the EmojiCompat font. - :param font_path: path to Android NotoColorEmoji font - :param unicode_path: path to directory that contains unicode files - """ - - tmp_dir = tempfile.mkdtemp() - - # create emoji codepoints to EmojiData map - self.emoji_data_map = load_emoji_data_map(self.unicode_path, self.without_flags) - - # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is - # returned is either default or 1 greater than the largest id in previous data - self.emoji_id = load_previous_metadata(self.emoji_data_map) - - # recalcTimestamp parameter will keep the modified field same as the original font. Changing - # the modified field in the font causes the font ttf file to change, which makes it harder - # to understand if something really changed in the font. - with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf: - # read image size data - self.read_cbdt(ttf) - - # glyph name to codepoint map - glyph_to_codepoint_map = {} - - # read single codepoint emojis under cmap12 and clear the table contents - cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map) - - # read emoji sequences gsub and clear the table contents - self.read_gsub(ttf, glyph_to_codepoint_map) - - # add all new codepoint to glyph mappings - cmap12_table.cmap.update(self.remapped_codepoints) - - # final metadata csv will be used to generate the sha, therefore write it before - # metadata json is written. - self.write_metadata_csv() - - output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME) - flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN) - flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH) - - total_emoji_count = self.write_metadata_json(output_json_file) - - # create the flatbuffers binary and java classes - flatc_command = ['flatc', - '-o', - tmp_dir, - '-b', - '-j', - FLATBUFFER_SCHEMA, - output_json_file] - subprocess.check_output(flatc_command) - - # inject metadata binary into font - inject_meta_into_font(ttf, flatbuffer_bin_file) - - # add wartermark glyph for manual verification. - self.add_watermark(ttf) - - # update CBDT and CBLC versions since older android versions cannot read > 2.0 - ttf['CBDT'].version = 2.0 - ttf['CBLC'].version = 2.0 - - # save the new font - ttf.save(FONT_PATH) - - update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir - FLATBUFFER_HEADER, - FLATBUFFER_JAVA_TARGET) - - create_test_data(self.unicode_path) - - # clear the tmp output directory - shutil.rmtree(tmp_dir, ignore_errors=True) - - print( - "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR)) - - -def print_usage(): - """Prints how to use the script.""" - print("Please specify a path to font and unicode files.\n" - "usage: createfont.py noto-color-emoji-path unicode-dir-path") - -def parse_args(argv): - # parse manually to avoid any extra dependencies - if len(argv) == 4: - without_flags = argv[3] == '--without-flags' - else: - without_flags = False - - if len(argv) < 3: - print_usage() - sys.exit(1) - return (sys.argv[1], sys.argv[2], without_flags) - -def main(): - font_file, unicode_dir, without_flags = parse_args(sys.argv) - EmojiFontCreator(font_file, unicode_dir, without_flags).create_font() - - -if __name__ == '__main__': - main() diff --git a/emoji-compat/data/emoji_metadata.txt b/emoji-compat/data/emoji_metadata.txt index d57d34e..6a9110e 100644 --- a/emoji-compat/data/emoji_metadata.txt +++ b/emoji-compat/data/emoji_metadata.txt @@ -3685,3 +3685,34 @@ F0E97 31 8 1FAF6 1F3FC F0E98 31 8 1FAF6 1F3FD
F0E99 31 8 1FAF6 1F3FE
F0E9A 31 8 1FAF6 1F3FF
+F0E9C 1500 9 1F426 200D 2B1B
+F0E9D 1500 9 1F6DC
+F0E9E 1500 9 1FA75
+F0E9F 1500 9 1FA76
+F0EA0 1500 9 1FA77
+F0EA1 1500 9 1FA87
+F0EA2 1500 9 1FA88
+F0EA3 1500 9 1FAAD
+F0EA4 1500 9 1FAAE
+F0EA5 1500 9 1FAAF
+F0EA6 1500 9 1FABB
+F0EA7 1500 9 1FABC
+F0EA8 1500 9 1FABD
+F0EA9 1500 9 1FABF
+F0EAA 1500 9 1FACE
+F0EAB 1500 9 1FACF
+F0EAC 1500 9 1FADA
+F0EAD 1500 9 1FADB
+F0EAE 1500 9 1FAE8
+F0EAF 1500 9 1FAF7
+F0EB0 1500 9 1FAF7 1F3FB
+F0EB1 1500 9 1FAF7 1F3FC
+F0EB2 1500 9 1FAF7 1F3FD
+F0EB3 1500 9 1FAF7 1F3FE
+F0EB4 1500 9 1FAF7 1F3FF
+F0EB5 1500 9 1FAF8
+F0EB6 1500 9 1FAF8 1F3FB
+F0EB7 1500 9 1FAF8 1F3FC
+F0EB8 1500 9 1FAF8 1F3FD
+F0EB9 1500 9 1FAF8 1F3FE
+F0EBA 1500 9 1FAF8 1F3FF
diff --git a/emoji-compat/fetch.sh b/emoji-compat/fetch.sh new file mode 100755 index 0000000..d35127a --- /dev/null +++ b/emoji-compat/fetch.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash + +# Copyright (C) 2022 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Helper script for fetching new emoji compat fonts from github source +# of truth + +# This script is very basic, please extend or replace to handle your +# needs (e.g. pulling specific commits, releases, branches) as needed. + +#set -o xtrace +set +e + +METADATA_GIT="https://github.com/googlefonts/emojicompat.git" +FONT_GIT="https://github.com/googlefonts/noto-emoji.git" + +SCRIPT_DIR=$(readlink -f $(dirname -- "$0")) +TMP_DIR=$(mktemp -d) + +GIT_VERSION=$(git --version) +if [ $? -ne 0 ]; then + echo -e "ERROR: git not found" + exit 1 +fi + +TTX_VERSION=$(ttx --version) + +if [ $? -ne 0 ]; then + echo "ERROR ttx required to check font" + echo -e "\t python3 -m venv venv" + echo -e "\t source venv/bin/activate" + echo -e "\t pip install fonttools" + exit 127 +fi + +echo "METADATA: $METADATA_GIT" +echo "FONT: $FONT_GIT" +echo "Updating in: $SCRIPT_DIR" + +# confirm directory is clean +pushd $SCRIPT_DIR > /dev/null +UNCOMMITED_CHANGES=$(git status --porcelain) +popd > /dev/null +if [[ "$UNCOMMITED_CHANGES" ]]; then + echo "$UNCOMMITED_CHANGES" + read -p "Uncommited changes. Continue? [y/N]:" uncommited + if [[ ! $uncommited =~ ^[Yy] ]]; then + exit 3 + fi +fi + +function confirm_git_commit() { + pushd $TMP_DIR/$1 > /dev/null + RESULT=$(git log -1) + echo "$RESULT" + read -p "Continue for repo $1? [y/N]: " yn + if [[ ! $yn =~ ^[Yy] ]]; then + exit 2 + fi + popd > /dev/null +} + +pushd $TMP_DIR > /dev/null + +git clone --quiet --depth 1 --branch main $METADATA_GIT +confirm_git_commit "emojicompat" +METADATA_FILE="./emojicompat/src/emojicompat/emoji_metadata.txt" +# adjust newlines to avoid giant diffs +cat $METADATA_FILE | awk 'sub("$", "\r")' > emoji_metadata.txt + +# pull the font +git clone --quiet --depth 1 --branch main $FONT_GIT +confirm_git_commit "noto-emoji" +cp ./noto-emoji/fonts/NotoColorEmoji-emojicompat.ttf ./NewFont.ttf + +ttx -o NewFont.ttx NewFont.ttf 2> /dev/null +grep -q 'header version="2.0"' NewFont.ttx + +if [ $? -ne 0 ]; then + echo -e "WRONG HEADER VERSION IN FONT FILE (breaks API23)" + echo -e "Expected 'header version=\"2.0\"" + echo -e "Found: " + grep 'header version' NewFont.ttx + exit 128 +fi + +# concat new codepoints to emojis.txt +NEW_LINES=$(comm -23 emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt) +NEW_CODEPOINTS=$(echo "$NEW_LINES" | cut -d" " -f4-100 | sed 's/\r//') + +if [[ "$NEW_CODEPOINTS" ]]; then + echo "$NEW_CODEPOINTS" + read -p "New codpoints found in metadata. Append emojis.txt? [y/N]:" emojiAppend + if [[ "$emojiAppend" =~ ^[Yy] ]]; then + echo "$NEW_CODEPOINTS" >> $SCRIPT_DIR/supported-emojis/emojis.txt + echo "Updated ${SCRIPT_DIR}/supported-emojis/emojis.txt" + fi +fi + +cp emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt +echo "Updated ${SCRIPT_DIR}/data/emoji_metadata.txt" +cp NewFont.ttf $SCRIPT_DIR/font/NotoColorEmojiCompat.ttf +echo "Updated ${SCRIPT_DIR}/font/NotoColorEmojiCompat.ttf" + +popd > /dev/null +rm -rf $TMP_DIR diff --git a/emoji-compat/font/NotoColorEmojiCompat.ttf b/emoji-compat/font/NotoColorEmojiCompat.ttf Binary files differindex 7334ae8..c48cd26 100644 --- a/emoji-compat/font/NotoColorEmojiCompat.ttf +++ b/emoji-compat/font/NotoColorEmojiCompat.ttf diff --git a/emoji-compat/supported-emojis/emojis.txt b/emoji-compat/supported-emojis/emojis.txt index a3ac299..3e5eb9e 100644 --- a/emoji-compat/supported-emojis/emojis.txt +++ b/emoji-compat/supported-emojis/emojis.txt @@ -3831,3 +3831,34 @@ 39 FE0F 20E3 A9 FE0F AE FE0F +1F426 200D 2B1B +1F6DC +1FA75 +1FA76 +1FA77 +1FA87 +1FA88 +1FAAD +1FAAE +1FAAF +1FABB +1FABC +1FABD +1FABF +1FACE +1FACF +1FADA +1FADB +1FAE8 +1FAF7 +1FAF7 1F3FB +1FAF7 1F3FC +1FAF7 1F3FD +1FAF7 1F3FE +1FAF7 1F3FF +1FAF8 +1FAF8 1F3FB +1FAF8 1F3FC +1FAF8 1F3FD +1FAF8 1F3FE +1FAF8 1F3FF |