summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McQuillan <seanmcq@google.com>2023-01-17 23:03:23 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-01-17 23:03:23 +0000
commit8f6cec6ca74bbbaf4a728f9965f124ff5b87f08f (patch)
treec95e6ede1fd0593576b9635a714b8a8b267bc8bd
parent5f3e077f850825f78d7d8d15c56a270b6afc7933 (diff)
parenteab2d36d9dd112783754cb3e1e3cebf174c5a061 (diff)
downloadnoto-fonts-8f6cec6ca74bbbaf4a728f9965f124ff5b87f08f.tar.gz
-rw-r--r--emoji-compat/README.android19
-rwxr-xr-xemoji-compat/createfont.py785
-rw-r--r--emoji-compat/data/emoji_metadata.txt31
-rwxr-xr-xemoji-compat/fetch.sh118
-rw-r--r--emoji-compat/font/NotoColorEmojiCompat.ttfbin10043088 -> 10530840 bytes
-rw-r--r--emoji-compat/supported-emojis/emojis.txt31
6 files changed, 190 insertions, 794 deletions
diff --git a/emoji-compat/README.android b/emoji-compat/README.android
index 07cabe1..908ee0c 100644
--- a/emoji-compat/README.android
+++ b/emoji-compat/README.android
@@ -5,16 +5,17 @@ License: Unicode
License File: LICENSE_UNICODE
Description:
-Noto Color Emoji Compat font is generated using Noto Color Emoji font using createfont.py. The
-compat font is under font/ directory.
+Noto Color Emoji Compat font is generated using Noto Color Emoji font using
+<a href="https://github.com/googlefonts/emojicompat">github.com/googlefonts/emojicompat</a>
-While generating the compat font, Noto Color Emoji font and data files from Unicode are used.
+Canonical source of truth for fonts is
-data/emoji-metadata.txt is updated using the Noto Color Emoji font and data files from
-Unicode.
+* <a href="http://github.com/googlefonts/noto-emoji">github.com/googlefonts/noto-emoji</a>
-supported-emojis/emojis.txt file contains list of emojis that are supported by the font. Main
-purpose is testing. It is generated using the Unicode files.
+However, we do not pull down that entire project as it would increase repo size.
-Noto Color Emoji font is under the <android_source>/external/noto-fonts/emoji/ directory. Unicode
-files are under the <android_source>/external/unicode/ directory. \ No newline at end of file
+To pull in a new font please update
+
+* font/NotoColorEmoji.txt
+* data/emoji-metadata.txt
+* supported-emojis.emojis.txt
diff --git a/emoji-compat/createfont.py b/emoji-compat/createfont.py
deleted file mode 100755
index f694cf2..0000000
--- a/emoji-compat/createfont.py
+++ /dev/null
@@ -1,785 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (C) 2017 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
-under a meta tag with name 'Emji'.
-
-In order to create the final font the followings are used as inputs:
-
-- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
-external/noto-fonts/emoji/NotoColorEmoji.ttf
-
-- Unicode files: Unicode files that are in the framework, and lists information about all the
-emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
-and emoji-variation-sequences.txt. Currently at external/unicode/.
-
-- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
-in the Android font. Resides in framework and currently under external/unicode/.
-
-- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
-Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
-version that the emoji was added (compatAdded). Updated when the script is executed.
-
-- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.
-
-After execution the following files are generated if they don't exist otherwise, they are updated:
-- font/NotoColorEmojiCompat.ttf
-- supported-emojis/emojis.txt
-- data/emoji_metadata.txt
-- src/java/android/support/text/emoji/flatbuffer/*
-"""
-
-import contextlib
-import csv
-import hashlib
-import itertools
-import json
-import os
-import re
-import shutil
-import subprocess
-import sys
-import tempfile
-from fontTools import ttLib
-from fontTools.ttLib.tables import otTables
-from nototools import font_data
-
-########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
-# Last Android SDK Version
-SDK_VERSION = 31
-# metadata version that will be embedded into font. If there are updates to the font that would
-# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
-# defines in which EmojiCompat metadata version the emoji is added to the font.
-METADATA_VERSION = 8
-
-####### main directories where output files are created #######
-SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
-FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
-DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
-SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
-JAVA_SRC_DIR = os.path.join('src', 'java')
-####### output files #######
-# font file
-FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
-# emoji metadata json output file
-OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
-# emojis test file
-TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
-####### input files #######
-# Unicode file names to read emoji data
-EMOJI_DATA_FILE = 'emoji-data.txt'
-EMOJI_SEQ_FILE = 'emoji-sequences.txt'
-EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
-EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
-# Android OS emoji file for emojis that are not in Unicode files
-ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
-ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
-# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
-# even though not defined so in <code>emoji-data.txt</code>.
-EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
-# emoji metadata file
-INPUT_META_FILE = OUTPUT_META_FILE
-# default flatbuffer module location (if not specified by caller)
-FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers')
-# flatbuffer schema
-FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs')
-# file path for java header, it will be prepended to flatbuffer java files
-FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt')
-# temporary emoji metadata json output file
-OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
-# temporary binary file generated by flatbuffer
-FLATBUFFER_BIN = 'emoji_metadata.bin'
-# directory representation for flatbuffer java package
-FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
-# temporary directory that contains flatbuffer java files
-FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
-FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
-FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
-# directory under source where flatbuffer java files will be copied into
-FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
-# meta tag name used in the font to embed the emoji metadata. This value is also used in
-# MetadataListReader.java in order to locate the metadata location.
-EMOJI_META_TAG_NAME = 'Emji'
-
-EMOJI_STR = 'EMOJI'
-EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
-ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
-STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'
-
-DEFAULT_EMOJI_ID = 0xF0001
-EMOJI_STYLE_VS = 0xFE0F
-
-# The reference code point to be used for filling metrics of wartermark glyph
-WATERMARK_REF_CODE_POINT = 0x1F600
-# The code point and glyph name used for watermark.
-WATERMARK_NEW_CODE_POINT = 0x10FF00
-WATERMARK_NEW_GLYPH_ID = 'u10FF00'
-
-def to_hex_str(value):
- """Converts given int value to hex without the 0x prefix"""
- return format(value, 'X')
-
-def hex_str_to_int(string):
- """Convert a hex string into int"""
- return int(string, 16)
-
-def codepoint_to_string(codepoints):
- """Converts a list of codepoints into a string separated with space."""
- return ' '.join([to_hex_str(x) for x in codepoints])
-
-def prepend_header_to_file(file_path, header_path):
- """Prepends the header to the file. Used to update flatbuffer java files with header, comments
- and annotations."""
- with open(file_path, "r+") as original_file:
- with open(header_path, "r") as copyright_file:
- original_content = original_file.read()
- original_file.seek(0)
- original_file.write(copyright_file.read() + "\n" + original_content)
-
-def is_ri(codepoint):
- return 0x1F1E6 <= codepoint and codepoint <= 0x1F1FF
-
-def is_flag_seq(codepoints):
- return all(is_ri(x) for x in codepoints)
-
-
-def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir):
- """Prepends headers to flatbuffer java files and copies to the final destination"""
- tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
- tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
- prepend_header_to_file(tmp_metadata_list, header_dir)
- prepend_header_to_file(tmp_metadata_item, header_dir)
-
- if not os.path.exists(target_dir):
- os.makedirs(target_dir)
-
- shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA))
- shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA))
-
-def create_test_data(unicode_path):
- """Read all the emojis in the unicode files and update the test file"""
- lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
- lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))
-
- lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
- lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)
-
- # standardized variants contains a huge list of sequences, only read the ones that are emojis
- # and also the ones with FE0F (emoji style)
- standardized_variants_lines = read_emoji_lines(
- os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
- for line in standardized_variants_lines:
- if STD_VARIANTS_EMOJI_STYLE in line:
- lines.append(line)
-
- emojis_set = set()
- for line in lines:
- # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
- # here since we are already checking the emoji presentations with
- # emoji-variation-sequences.txt.
- if "BASIC_EMOJI" in line:
- continue
- codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
- emojis_set.add(codepoint_to_string(codepoints).upper())
-
- emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
- for line in emoji_data_lines:
- codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
- if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
- continue
- is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
- if is_emoji_style:
- codepoints = [to_hex_str(x) for x in
- codepoints_for_emojirange(codepoints_range)]
- emojis_set.update(codepoints)
-
- emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
- # finally add the android default emoji exceptions
- emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])
-
- emojis_list = list(emojis_set)
- emojis_list.sort()
- with open(TEST_DATA_PATH, "w") as test_file:
- for line in emojis_list:
- test_file.write("%s\n" % line)
-
-class _EmojiData(object):
- """Holds the information about a single emoji."""
-
- def __init__(self, codepoints, is_emoji_style):
- self.codepoints = codepoints
- self.emoji_style = is_emoji_style
- self.emoji_id = 0
- self.width = 0
- self.height = 0
- self.sdk_added = SDK_VERSION
- self.compat_added = METADATA_VERSION
-
- def update_metrics(self, metrics):
- """Updates width/height instance variables with the values given in metrics dictionary.
- :param metrics: a dictionary object that has width and height values.
- """
- self.width = metrics.width
- self.height = metrics.height
-
- def __repr__(self):
- return '<EmojiData {0} - {1}>'.format(self.emoji_style,
- codepoint_to_string(self.codepoints))
-
- def create_json_element(self):
- """Creates the json representation of EmojiData."""
- json_element = {}
- json_element['id'] = self.emoji_id
- json_element['emojiStyle'] = self.emoji_style
- json_element['sdkAdded'] = self.sdk_added
- json_element['compatAdded'] = self.compat_added
- json_element['width'] = self.width
- json_element['height'] = self.height
- json_element['codepoints'] = self.codepoints
- return json_element
-
- def create_txt_row(self):
- """Creates array of values for CSV of EmojiData."""
- row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
- row += [to_hex_str(x) for x in self.codepoints]
- return row
-
- def update(self, emoji_id, sdk_added, compat_added):
- """Updates current EmojiData with the values in a json element"""
- self.emoji_id = emoji_id
- self.sdk_added = sdk_added
- self.compat_added = compat_added
-
-
-def read_emoji_lines(file_path, optional=False):
- """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
- lines and comments
- :param file_path: unicode emoji file path
- :param optional: if True no exception is raised when the file cannot be read
- :return: list of uppercase strings
- """
- result = []
- try:
- with open(file_path) as file_stream:
- for line in file_stream:
- line = line.strip()
- if line and not line.startswith('#'):
- result.append(line.upper())
- except IOError:
- if optional:
- pass
- else:
- raise
-
- return result
-
-def get_emoji_style_exceptions(unicode_path):
- """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
- lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
- exceptions = []
- for line in lines:
- codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
- exceptions.append(codepoint)
- return exceptions
-
-def codepoints_for_emojirange(codepoints_range):
- """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
- such as XYZ ... UVT
- """
- codepoints = []
- if '..' in codepoints_range:
- range_start, range_end = codepoints_range.split('..')
- codepoints_range = range(hex_str_to_int(range_start),
- hex_str_to_int(range_end) + 1)
- codepoints.extend(codepoints_range)
- else:
- codepoints.append(hex_str_to_int(codepoints_range))
- return codepoints
-
-def codepoints_and_emoji_prop(line):
- """For a given emoji file line, return codepoints and emoji property in the line.
- 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
- |Extended_Pictographic] # [...]"""
- line = line.strip()
- if '#' in line:
- line = line[:line.index('#')]
- else:
- raise ValueError("Line is expected to have # in it")
- line = line.split(';')
- codepoints_range = line[0].strip()
- emoji_property = line[1].strip()
-
- return codepoints_range, emoji_property
-
-def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
- """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
- intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
- A line format that is expected is as follows:
- 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
- |Extended_Pictographic] # [...]"""
- lines = read_emoji_lines(file_path)
-
- for line in lines:
- codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
- if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
- continue
- is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
- codepoints = codepoints_for_emojirange(codepoints_range)
-
- for codepoint in codepoints:
- key = codepoint_to_string([codepoint])
- codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
- if key in emoji_data_map:
- # since there are multiple definitions of emojis, only update when emoji style is
- # True
- if codepoint_is_emoji_style:
- emoji_data_map[key].emoji_style = True
- else:
- emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
- emoji_data_map[key] = emoji_data
-
-
-def read_emoji_sequences(emoji_data_map, file_path, optional=False, filter=None):
- """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
- line and puts into emoji_data_map."""
- lines = read_emoji_lines(file_path, optional)
- # 1F1E6 1F1E8 ; Name ; [...]
- for line in lines:
- # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
- # here since we are already checking the emoji presentations with
- # emoji-variation-sequences.txt.
- if "BASIC_EMOJI" in line:
- continue
- codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
- codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
- if filter:
- if filter(codepoints):
- continue
- key = codepoint_to_string(codepoints)
- if not key in emoji_data_map:
- emoji_data = _EmojiData(codepoints, False)
- emoji_data_map[key] = emoji_data
-
-
-def load_emoji_data_map(unicode_path, without_flags):
- """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
- :return: map of space separated codepoints to EmojiData
- """
- if without_flags:
- filter = lambda x: is_flag_seq(x)
- else:
- filter = None
- emoji_data_map = {}
- emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
- read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
- emoji_style_exceptions)
- read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
- read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE), filter=filter)
-
- # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
- read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
- optional=True)
- # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
- read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
- optional=True)
-
- return emoji_data_map
-
-
-def load_previous_metadata(emoji_data_map):
- """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
- in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
- emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
- exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
- current_emoji_id = DEFAULT_EMOJI_ID
- if os.path.isfile(INPUT_META_FILE):
- with open(INPUT_META_FILE) as csvfile:
- reader = csv.reader(csvfile, delimiter=' ')
- for row in reader:
- if row[0].startswith('#'):
- continue
- emoji_id = hex_str_to_int(row[0])
- sdk_added = int(row[1])
- compat_added = int(row[2])
- key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
- if key in emoji_data_map:
- emoji_data = emoji_data_map[key]
- emoji_data.update(emoji_id, sdk_added, compat_added)
- if emoji_data.emoji_id >= current_emoji_id:
- current_emoji_id = emoji_data.emoji_id + 1
-
- return current_emoji_id
-
-
-def update_ttlib_orig_sort():
- """Updates the ttLib tag sort with a closure that makes the meta table first."""
- orig_sort = ttLib.sortedTagList
-
- def meta_first_table_sort(tag_list, table_order=None):
- """Sorts the tables with the original ttLib sort, then makes the meta table first."""
- tag_list = orig_sort(tag_list, table_order)
- tag_list.remove('meta')
- tag_list.insert(0, 'meta')
- return tag_list
-
- ttLib.sortedTagList = meta_first_table_sort
-
-
-def inject_meta_into_font(ttf, flatbuffer_bin_filename):
- """inject metadata binary into font"""
- if not 'meta' in ttf:
- ttf['meta'] = ttLib.getTableClass('meta')()
- meta = ttf['meta']
- with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file:
- meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()
-
- # sort meta tables for faster access
- update_ttlib_orig_sort()
-
-
-def validate_input_files(font_path, unicode_path, flatbuffer_path):
- """Validate the existence of font file and the unicode files"""
- if not os.path.isfile(font_path):
- raise ValueError("Font file does not exist: " + font_path)
-
- if not os.path.isdir(unicode_path):
- raise ValueError(
- "Unicode directory does not exist or is not a directory " + unicode_path)
-
- emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
- os.path.join(unicode_path, EMOJI_ZWJ_FILE),
- os.path.join(unicode_path, EMOJI_SEQ_FILE)]
- for emoji_filename in emoji_filenames:
- if not os.path.isfile(emoji_filename):
- raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)
-
- if not os.path.isdir(flatbuffer_path):
- raise ValueError(
- "Flatbuffer directory does not exist or is not a directory " + flatbuffer_path)
-
- flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA),
- os.path.join(flatbuffer_path, FLATBUFFER_HEADER)]
- for flatbuffer_filename in flatbuffer_filenames:
- if not os.path.isfile(flatbuffer_filename):
- raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename)
-
-
-def add_file_to_sha(sha_algo, file_path):
- with open(file_path, 'rb') as input_file:
- for data in iter(lambda: input_file.read(8192), b''):
- sha_algo.update(data)
-
-def create_sha_from_source_files(font_paths):
- """Creates a SHA from the given font files"""
- sha_algo = hashlib.sha256()
- for file_path in font_paths:
- add_file_to_sha(sha_algo, file_path)
- return sha_algo.hexdigest()
-
-
-class EmojiFontCreator(object):
- """Creates the EmojiCompat font"""
-
- def __init__(self, font_path, unicode_path, without_flags):
- validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR)
-
- self.font_path = font_path
- self.unicode_path = unicode_path
- self.without_flags = without_flags
- self.emoji_data_map = {}
- self.remapped_codepoints = {}
- self.glyph_to_image_metrics_map = {}
- # set default emoji id to start of Supplemental Private Use Area-A
- self.emoji_id = DEFAULT_EMOJI_ID
-
- def update_emoji_data(self, codepoints, glyph_name):
- """Updates the existing EmojiData identified with codepoints. The fields that are set are:
- - emoji_id (if it does not exist)
- - image width/height"""
- key = codepoint_to_string(codepoints)
- if key in self.emoji_data_map:
- # add emoji to final data
- emoji_data = self.emoji_data_map[key]
- emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
- if emoji_data.emoji_id == 0:
- emoji_data.emoji_id = self.emoji_id
- self.emoji_id = self.emoji_id + 1
- self.remapped_codepoints[emoji_data.emoji_id] = glyph_name
-
- def read_cbdt(self, ttf):
- """Read image size data from CBDT."""
- cbdt = ttf['CBDT']
- for strike_data in cbdt.strikeData:
- for key, data in strike_data.items():
- data.decompile()
- self.glyph_to_image_metrics_map[key] = data.metrics
-
- def read_cmap12(self, ttf, glyph_to_codepoint_map):
- """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
- finally clears all elements in CMAP 12"""
- cmap = ttf['cmap']
- for table in cmap.tables:
- if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
- for codepoint, glyph_name in table.cmap.items():
- glyph_to_codepoint_map[glyph_name] = codepoint
- self.update_emoji_data([codepoint], glyph_name)
- return table
- raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")
-
- def read_gsub(self, ttf, glyph_to_codepoint_map):
- """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
- gsub = ttf['GSUB']
- ligature_subtables = []
- context_subtables = []
- # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
- # and would be expensive with little value
- for lookup in gsub.table.LookupList.Lookup:
- for subtable in lookup.SubTable:
- if subtable.LookupType == 5:
- context_subtables.append(subtable)
- elif subtable.LookupType == 4:
- ligature_subtables.append(subtable)
-
- for subtable in context_subtables:
- self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)
-
- for subtable in ligature_subtables:
- self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)
-
- def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
- """Add substitutions defined as OpenType Context Substitution"""
- for sub_class_set in subtable.SubClassSet:
- if sub_class_set:
- for sub_class_rule in sub_class_set.SubClassRule:
- # prepare holder for substitution list. each rule will have a list that is added
- # to the subs_list.
- subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
- for record in sub_class_rule.SubstLookupRecord:
- subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
- record.LookupListIndex)
- # create combinations or all lists. the combinations will be filtered by
- # emoji_data_map. the first element that contain as a valid glyph will be used
- # as the final glyph
- combinations = list(itertools.product(*subs_list))
- for seq in combinations:
- glyph_names = [x["input"] for x in seq]
- codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
- outputs = [x["output"] for x in seq if x["output"]]
- nonempty_outputs = list(filter(lambda x: x.strip() , outputs))
- if len(nonempty_outputs) == 0:
- print("Warning: no output glyph is set for " + str(glyph_names))
- continue
- elif len(nonempty_outputs) > 1:
- print(
- "Warning: multiple glyph is set for "
- + str(glyph_names) + ", will use the first one")
-
- glyph = nonempty_outputs[0]
- self.update_emoji_data(codepoints, glyph)
-
- def get_substitutions(self, lookup_list, index):
- result = []
- for x in lookup_list.Lookup[index].SubTable:
- for input, output in x.mapping.items():
- result.append({"input": input, "output": output})
- return result
-
- def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
- for name, ligatures in subtable.ligatures.items():
- for ligature in ligatures:
- glyph_names = [name] + ligature.Component
- codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
- self.update_emoji_data(codepoints, ligature.LigGlyph)
-
- def write_metadata_json(self, output_json_file_path):
- """Writes the emojis into a json file"""
- output_json = {}
- output_json['version'] = METADATA_VERSION
- output_json['sourceSha'] = create_sha_from_source_files(
- [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
- output_json['list'] = []
-
- emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
-
- total_emoji_count = 0
- for emoji_data in emoji_data_list:
- if self.without_flags and is_flag_seq(emoji_data.codepoints):
- continue # Do not add flags emoji data if this is for subset font.
- element = emoji_data.create_json_element()
- output_json['list'].append(element)
- total_emoji_count = total_emoji_count + 1
-
- # write the new json file to be processed by FlatBuffers
- with open(output_json_file_path, 'w') as json_file:
- print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
- file=json_file)
-
- return total_emoji_count
-
- def write_metadata_csv(self):
- """Writes emoji metadata into space separated file"""
- with open(OUTPUT_META_FILE, 'w') as csvfile:
- csvwriter = csv.writer(csvfile, delimiter=' ')
- emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
- csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
- for emoji_data in emoji_data_list:
- csvwriter.writerow(emoji_data.create_txt_row())
-
- def add_watermark(self, ttf):
- cmap = ttf.getBestCmap()
- gsub = ttf['GSUB'].table
-
- # Obtain Version string
- m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf))
- if not m:
- raise ValueError('The font does not have proper version string.')
- major = m.group(1)
- minor = m.group(2)
- # Replace the dot with space since NotoColorEmoji does not have glyph for dot.
- glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)]
-
- # Update Glyph metrics
- ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID)
- refGlyphId = cmap[WATERMARK_REF_CODE_POINT]
- ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId]
- ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId]
-
- # Add new Glyph to cmap
- font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID })
-
- # Add lookup table for the version string.
- lookups = gsub.LookupList.Lookup
- new_lookup = otTables.Lookup()
- new_lookup.LookupType = 2 # Multiple Substitution Subtable.
- new_lookup.LookupFlag = 0
- new_subtable = otTables.MultipleSubst()
- new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) }
- new_lookup.SubTable = [ new_subtable ]
- new_lookup_index = len(lookups)
- lookups.append(new_lookup)
-
- # Add feature
- feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp')
- if not feature:
- raise ValueError("Font doesn't contain ccmp feature.")
-
- feature.Feature.LookupListIndex.append(new_lookup_index)
-
- def create_font(self):
- """Creates the EmojiCompat font.
- :param font_path: path to Android NotoColorEmoji font
- :param unicode_path: path to directory that contains unicode files
- """
-
- tmp_dir = tempfile.mkdtemp()
-
- # create emoji codepoints to EmojiData map
- self.emoji_data_map = load_emoji_data_map(self.unicode_path, self.without_flags)
-
- # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
- # returned is either default or 1 greater than the largest id in previous data
- self.emoji_id = load_previous_metadata(self.emoji_data_map)
-
- # recalcTimestamp parameter will keep the modified field same as the original font. Changing
- # the modified field in the font causes the font ttf file to change, which makes it harder
- # to understand if something really changed in the font.
- with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
- # read image size data
- self.read_cbdt(ttf)
-
- # glyph name to codepoint map
- glyph_to_codepoint_map = {}
-
- # read single codepoint emojis under cmap12 and clear the table contents
- cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)
-
- # read emoji sequences gsub and clear the table contents
- self.read_gsub(ttf, glyph_to_codepoint_map)
-
- # add all new codepoint to glyph mappings
- cmap12_table.cmap.update(self.remapped_codepoints)
-
- # final metadata csv will be used to generate the sha, therefore write it before
- # metadata json is written.
- self.write_metadata_csv()
-
- output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
- flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
- flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)
-
- total_emoji_count = self.write_metadata_json(output_json_file)
-
- # create the flatbuffers binary and java classes
- flatc_command = ['flatc',
- '-o',
- tmp_dir,
- '-b',
- '-j',
- FLATBUFFER_SCHEMA,
- output_json_file]
- subprocess.check_output(flatc_command)
-
- # inject metadata binary into font
- inject_meta_into_font(ttf, flatbuffer_bin_file)
-
- # add wartermark glyph for manual verification.
- self.add_watermark(ttf)
-
- # update CBDT and CBLC versions since older android versions cannot read > 2.0
- ttf['CBDT'].version = 2.0
- ttf['CBLC'].version = 2.0
-
- # save the new font
- ttf.save(FONT_PATH)
-
- update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir
- FLATBUFFER_HEADER,
- FLATBUFFER_JAVA_TARGET)
-
- create_test_data(self.unicode_path)
-
- # clear the tmp output directory
- shutil.rmtree(tmp_dir, ignore_errors=True)
-
- print(
- "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))
-
-
-def print_usage():
- """Prints how to use the script."""
- print("Please specify a path to font and unicode files.\n"
- "usage: createfont.py noto-color-emoji-path unicode-dir-path")
-
-def parse_args(argv):
- # parse manually to avoid any extra dependencies
- if len(argv) == 4:
- without_flags = argv[3] == '--without-flags'
- else:
- without_flags = False
-
- if len(argv) < 3:
- print_usage()
- sys.exit(1)
- return (sys.argv[1], sys.argv[2], without_flags)
-
-def main():
- font_file, unicode_dir, without_flags = parse_args(sys.argv)
- EmojiFontCreator(font_file, unicode_dir, without_flags).create_font()
-
-
-if __name__ == '__main__':
- main()
diff --git a/emoji-compat/data/emoji_metadata.txt b/emoji-compat/data/emoji_metadata.txt
index d57d34e..6a9110e 100644
--- a/emoji-compat/data/emoji_metadata.txt
+++ b/emoji-compat/data/emoji_metadata.txt
@@ -3685,3 +3685,34 @@ F0E97 31 8 1FAF6 1F3FC
F0E98 31 8 1FAF6 1F3FD
F0E99 31 8 1FAF6 1F3FE
F0E9A 31 8 1FAF6 1F3FF
+F0E9C 1500 9 1F426 200D 2B1B
+F0E9D 1500 9 1F6DC
+F0E9E 1500 9 1FA75
+F0E9F 1500 9 1FA76
+F0EA0 1500 9 1FA77
+F0EA1 1500 9 1FA87
+F0EA2 1500 9 1FA88
+F0EA3 1500 9 1FAAD
+F0EA4 1500 9 1FAAE
+F0EA5 1500 9 1FAAF
+F0EA6 1500 9 1FABB
+F0EA7 1500 9 1FABC
+F0EA8 1500 9 1FABD
+F0EA9 1500 9 1FABF
+F0EAA 1500 9 1FACE
+F0EAB 1500 9 1FACF
+F0EAC 1500 9 1FADA
+F0EAD 1500 9 1FADB
+F0EAE 1500 9 1FAE8
+F0EAF 1500 9 1FAF7
+F0EB0 1500 9 1FAF7 1F3FB
+F0EB1 1500 9 1FAF7 1F3FC
+F0EB2 1500 9 1FAF7 1F3FD
+F0EB3 1500 9 1FAF7 1F3FE
+F0EB4 1500 9 1FAF7 1F3FF
+F0EB5 1500 9 1FAF8
+F0EB6 1500 9 1FAF8 1F3FB
+F0EB7 1500 9 1FAF8 1F3FC
+F0EB8 1500 9 1FAF8 1F3FD
+F0EB9 1500 9 1FAF8 1F3FE
+F0EBA 1500 9 1FAF8 1F3FF
diff --git a/emoji-compat/fetch.sh b/emoji-compat/fetch.sh
new file mode 100755
index 0000000..d35127a
--- /dev/null
+++ b/emoji-compat/fetch.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Helper script for fetching new emoji compat fonts from github source
+# of truth
+
+# This script is very basic, please extend or replace to handle your
+# needs (e.g. pulling specific commits, releases, branches) as needed.
+
+#set -o xtrace
+set +e
+
+METADATA_GIT="https://github.com/googlefonts/emojicompat.git"
+FONT_GIT="https://github.com/googlefonts/noto-emoji.git"
+
+SCRIPT_DIR=$(readlink -f $(dirname -- "$0"))
+TMP_DIR=$(mktemp -d)
+
+GIT_VERSION=$(git --version)
+if [ $? -ne 0 ]; then
+ echo -e "ERROR: git not found"
+ exit 1
+fi
+
+TTX_VERSION=$(ttx --version)
+
+if [ $? -ne 0 ]; then
+ echo "ERROR ttx required to check font"
+ echo -e "\t python3 -m venv venv"
+ echo -e "\t source venv/bin/activate"
+ echo -e "\t pip install fonttools"
+ exit 127
+fi
+
+echo "METADATA: $METADATA_GIT"
+echo "FONT: $FONT_GIT"
+echo "Updating in: $SCRIPT_DIR"
+
+# confirm directory is clean
+pushd $SCRIPT_DIR > /dev/null
+UNCOMMITED_CHANGES=$(git status --porcelain)
+popd > /dev/null
+if [[ "$UNCOMMITED_CHANGES" ]]; then
+ echo "$UNCOMMITED_CHANGES"
+ read -p "Uncommited changes. Continue? [y/N]:" uncommited
+ if [[ ! $uncommited =~ ^[Yy] ]]; then
+ exit 3
+ fi
+fi
+
+function confirm_git_commit() {
+ pushd $TMP_DIR/$1 > /dev/null
+ RESULT=$(git log -1)
+ echo "$RESULT"
+ read -p "Continue for repo $1? [y/N]: " yn
+ if [[ ! $yn =~ ^[Yy] ]]; then
+ exit 2
+ fi
+ popd > /dev/null
+}
+
+pushd $TMP_DIR > /dev/null
+
+git clone --quiet --depth 1 --branch main $METADATA_GIT
+confirm_git_commit "emojicompat"
+METADATA_FILE="./emojicompat/src/emojicompat/emoji_metadata.txt"
+# adjust newlines to avoid giant diffs
+cat $METADATA_FILE | awk 'sub("$", "\r")' > emoji_metadata.txt
+
+# pull the font
+git clone --quiet --depth 1 --branch main $FONT_GIT
+confirm_git_commit "noto-emoji"
+cp ./noto-emoji/fonts/NotoColorEmoji-emojicompat.ttf ./NewFont.ttf
+
+ttx -o NewFont.ttx NewFont.ttf 2> /dev/null
+grep -q 'header version="2.0"' NewFont.ttx
+
+if [ $? -ne 0 ]; then
+ echo -e "WRONG HEADER VERSION IN FONT FILE (breaks API23)"
+ echo -e "Expected 'header version=\"2.0\""
+ echo -e "Found: "
+ grep 'header version' NewFont.ttx
+ exit 128
+fi
+
+# concat new codepoints to emojis.txt
+NEW_LINES=$(comm -23 emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt)
+NEW_CODEPOINTS=$(echo "$NEW_LINES" | cut -d" " -f4-100 | sed 's/\r//')
+
+if [[ "$NEW_CODEPOINTS" ]]; then
+ echo "$NEW_CODEPOINTS"
+ read -p "New codpoints found in metadata. Append emojis.txt? [y/N]:" emojiAppend
+ if [[ "$emojiAppend" =~ ^[Yy] ]]; then
+ echo "$NEW_CODEPOINTS" >> $SCRIPT_DIR/supported-emojis/emojis.txt
+ echo "Updated ${SCRIPT_DIR}/supported-emojis/emojis.txt"
+ fi
+fi
+
+cp emoji_metadata.txt $SCRIPT_DIR/data/emoji_metadata.txt
+echo "Updated ${SCRIPT_DIR}/data/emoji_metadata.txt"
+cp NewFont.ttf $SCRIPT_DIR/font/NotoColorEmojiCompat.ttf
+echo "Updated ${SCRIPT_DIR}/font/NotoColorEmojiCompat.ttf"
+
+popd > /dev/null
+rm -rf $TMP_DIR
diff --git a/emoji-compat/font/NotoColorEmojiCompat.ttf b/emoji-compat/font/NotoColorEmojiCompat.ttf
index 7334ae8..c48cd26 100644
--- a/emoji-compat/font/NotoColorEmojiCompat.ttf
+++ b/emoji-compat/font/NotoColorEmojiCompat.ttf
Binary files differ
diff --git a/emoji-compat/supported-emojis/emojis.txt b/emoji-compat/supported-emojis/emojis.txt
index a3ac299..3e5eb9e 100644
--- a/emoji-compat/supported-emojis/emojis.txt
+++ b/emoji-compat/supported-emojis/emojis.txt
@@ -3831,3 +3831,34 @@
39 FE0F 20E3
A9 FE0F
AE FE0F
+1F426 200D 2B1B
+1F6DC
+1FA75
+1FA76
+1FA77
+1FA87
+1FA88
+1FAAD
+1FAAE
+1FAAF
+1FABB
+1FABC
+1FABD
+1FABF
+1FACE
+1FACF
+1FADA
+1FADB
+1FAE8
+1FAF7
+1FAF7 1F3FB
+1FAF7 1F3FC
+1FAF7 1F3FD
+1FAF7 1F3FE
+1FAF7 1F3FF
+1FAF8
+1FAF8 1F3FB
+1FAF8 1F3FC
+1FAF8 1F3FD
+1FAF8 1F3FE
+1FAF8 1F3FF