summaryrefslogtreecommitdiff
path: root/emoji-compat/createfont.py
blob: 7b42c5ef28f2cb7c18c2966ed1270edc10da0454 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
#!/usr/bin/python
#
# Copyright (C) 2017 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
under a meta tag with name 'Emji'.

In order to create the final font the followings are used as inputs:

- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
external/noto-fonts/emoji/NotoColorEmoji.ttf

- Unicode files: Unicode files that are in the framework, and lists information about all the
emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
and emoji-variation-sequences.txt. Currently at external/unicode/.

- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
in the Android font. Resides in framework and currently under external/unicode/.

- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
version that the emoji was added (compatAdded). Updated when the script is executed.

- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.

After execution the following files are generated if they don't exist otherwise, they are updated:
- font/NotoColorEmojiCompat.ttf
- supported-emojis/emojis.txt
- data/emoji_metadata.txt
- src/java/android/support/text/emoji/flatbuffer/*
"""

from __future__ import print_function

import contextlib
import csv
import hashlib
import itertools
import json
import os
import shutil
import sys
import tempfile
from fontTools import ttLib

########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
# Last Android SDK Version
SDK_VERSION = 29
# metadata version that will be embedded into font. If there are updates to the font that would
# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
# defines in which EmojiCompat metadata version the emoji is added to the font.
METADATA_VERSION = 4

####### main directories where output files are created #######
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
JAVA_SRC_DIR = os.path.join(SCRIPT_DIR, 'src', 'java')
####### output files #######
# font file
FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
# emoji metadata json output file
OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
# emojis test file
TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
####### input files #######
# Unicode file names to read emoji data
EMOJI_DATA_FILE = 'emoji-data.txt'
EMOJI_SEQ_FILE = 'emoji-sequences.txt'
EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
# Android OS emoji file for emojis that are not in Unicode files
ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
# even though not defined so in <code>emoji-data.txt</code>.
EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
# emoji metadata file
INPUT_META_FILE = OUTPUT_META_FILE
# flatbuffer schema
FLATBUFFER_SCHEMA = os.path.join(DATA_DIR, 'emoji_metadata.fbs')
# file path for java header, it will be prepended to flatbuffer java files
FLATBUFFER_HEADER = os.path.join(DATA_DIR, "flatbuffer_header.txt")
# temporary emoji metadata json output file
OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
# temporary binary file generated by flatbuffer
FLATBUFFER_BIN = 'emoji_metadata.bin'
# directory representation for flatbuffer java package
FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
# temporary directory that contains flatbuffer java files
FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
# directory under source where flatbuffer java files will be copied into
FLATBUFFER_JAVA_TARGET = os.path.join(JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
# meta tag name used in the font to embed the emoji metadata. This value is also used in
# MetadataListReader.java in order to locate the metadata location.
EMOJI_META_TAG_NAME = 'Emji'

EMOJI_STR = 'EMOJI'
EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'

DEFAULT_EMOJI_ID = 0xF0001
EMOJI_STYLE_VS = 0xFE0F

def to_hex_str(value):
    """Converts given int value to hex without the 0x prefix"""
    return format(value, 'X')

def hex_str_to_int(string):
    """Convert a hex string into int"""
    return int(string, 16)

def codepoint_to_string(codepoints):
    """Converts a list of codepoints into a string separated with space."""
    return ' '.join([to_hex_str(x) for x in codepoints])

def prepend_header_to_file(file_path):
    """Prepends the header to the file. Used to update flatbuffer java files with header, comments
    and annotations."""
    with open(file_path, "r+") as original_file:
        with open(FLATBUFFER_HEADER, "r") as copyright_file:
            original_content = original_file.read()
            start_index = original_content.index("public final class")
            original_file.seek(0)
            original_file.write(copyright_file.read() + "\n" + original_content[start_index:])


def update_flatbuffer_java_files(flatbuffer_java_dir):
    """Prepends headers to flatbuffer java files and copies to the final destination"""
    tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
    tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
    prepend_header_to_file(tmp_metadata_list)
    prepend_header_to_file(tmp_metadata_item)

    if not os.path.exists(FLATBUFFER_JAVA_TARGET):
        os.makedirs(FLATBUFFER_JAVA_TARGET)

    shutil.copy(tmp_metadata_list, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_LIST_JAVA)
    shutil.copy(tmp_metadata_item, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_ITEM_JAVA)

def create_test_data(unicode_path):
    """Read all the emojis in the unicode files and update the test file"""
    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
    lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))

    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)

    # standardized variants contains a huge list of sequences, only read the ones that are emojis
    # and also the ones with FE0F (emoji style)
    standardized_variants_lines = read_emoji_lines(
        os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
    for line in standardized_variants_lines:
        if STD_VARIANTS_EMOJI_STYLE in line:
            lines.append(line)

    emojis_set = set()
    for line in lines:
        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
        # here since we are already checking the emoji presentations with
        # emoji-variation-sequences.txt.
        if "BASIC_EMOJI" in line:
            continue
        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
        emojis_set.add(codepoint_to_string(codepoints).upper())

    emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
    for line in emoji_data_lines:
        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
            continue
        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
        if is_emoji_style:
            codepoints = [to_hex_str(x) for x in
                          codepoints_for_emojirange(codepoints_range)]
            emojis_set.update(codepoints)

    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
    #  finally add the android default emoji exceptions
    emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])

    emojis_list = list(emojis_set)
    emojis_list.sort()
    with open(TEST_DATA_PATH, "w") as test_file:
        for line in emojis_list:
            test_file.write("%s\n" % line)

class _EmojiData(object):
    """Holds the information about a single emoji."""

    def __init__(self, codepoints, is_emoji_style):
        self.codepoints = codepoints
        self.emoji_style = is_emoji_style
        self.emoji_id = 0
        self.width = 0
        self.height = 0
        self.sdk_added = SDK_VERSION
        self.compat_added = METADATA_VERSION

    def update_metrics(self, metrics):
        """Updates width/height instance variables with the values given in metrics dictionary.
        :param metrics: a dictionary object that has width and height values.
        """
        self.width = metrics.width
        self.height = metrics.height

    def __repr__(self):
        return '<EmojiData {0} - {1}>'.format(self.emoji_style,
                                              codepoint_to_string(self.codepoints))

    def create_json_element(self):
        """Creates the json representation of EmojiData."""
        json_element = {}
        json_element['id'] = self.emoji_id
        json_element['emojiStyle'] = self.emoji_style
        json_element['sdkAdded'] = self.sdk_added
        json_element['compatAdded'] = self.compat_added
        json_element['width'] = self.width
        json_element['height'] = self.height
        json_element['codepoints'] = self.codepoints
        return json_element

    def create_txt_row(self):
        """Creates array of values for CSV of EmojiData."""
        row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
        row += [to_hex_str(x) for x in self.codepoints]
        return row

    def update(self, emoji_id, sdk_added, compat_added):
        """Updates current EmojiData with the values in a json element"""
        self.emoji_id = emoji_id
        self.sdk_added = sdk_added
        self.compat_added = compat_added


def read_emoji_lines(file_path, optional=False):
    """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
    lines and comments
    :param file_path: unicode emoji file path
    :param optional: if True no exception is raised when the file cannot be read
    :return: list of uppercase strings
    """
    result = []
    try:
        with open(file_path) as file_stream:
            for line in file_stream:
                line = line.strip()
                if line and not line.startswith('#'):
                    result.append(line.upper())
    except IOError:
        if optional:
            pass
        else:
            raise

    return result

def get_emoji_style_exceptions(unicode_path):
    """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
    exceptions = []
    for line in lines:
        codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
        exceptions.append(codepoint)
    return exceptions

def codepoints_for_emojirange(codepoints_range):
    """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
    such as XYZ ... UVT
    """
    codepoints = []
    if '..' in codepoints_range:
        range_start, range_end = codepoints_range.split('..')
        codepoints_range = range(hex_str_to_int(range_start),
                                 hex_str_to_int(range_end) + 1)
        codepoints.extend(codepoints_range)
    else:
        codepoints.append(hex_str_to_int(codepoints_range))
    return codepoints

def codepoints_and_emoji_prop(line):
    """For a given emoji file line, return codepoints and emoji property in the line.
    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
    |Extended_Pictographic] # [...]"""
    line = line.strip()
    if '#' in line:
        line = line[:line.index('#')]
    else:
        raise ValueError("Line is expected to have # in it")
    line = line.split(';')
    codepoints_range = line[0].strip()
    emoji_property = line[1].strip()

    return codepoints_range, emoji_property

def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
    """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
    intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
    A line format that is expected is as follows:
    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
    |Extended_Pictographic] # [...]"""
    lines = read_emoji_lines(file_path)

    for line in lines:
        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
            continue
        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
        codepoints = codepoints_for_emojirange(codepoints_range)

        for codepoint in codepoints:
            key = codepoint_to_string([codepoint])
            codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
            if key in emoji_data_map:
                # since there are multiple definitions of emojis, only update when emoji style is
                # True
                if codepoint_is_emoji_style:
                    emoji_data_map[key].emoji_style = True
            else:
                emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
                emoji_data_map[key] = emoji_data


def read_emoji_sequences(emoji_data_map, file_path, optional=False):
    """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
    line and puts into emoji_data_map."""
    lines = read_emoji_lines(file_path, optional)
    # 1F1E6 1F1E8 ; Name ; [...]
    for line in lines:
        # In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
        # here since we are already checking the emoji presentations with
        # emoji-variation-sequences.txt.
        if "BASIC_EMOJI" in line:
            continue
        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
        codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
        key = codepoint_to_string(codepoints)
        if not key in emoji_data_map:
            emoji_data = _EmojiData(codepoints, False)
            emoji_data_map[key] = emoji_data


def load_emoji_data_map(unicode_path):
    """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
    :return: map of space separated codepoints to EmojiData
    """
    emoji_data_map = {}
    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
    read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
                         emoji_style_exceptions)
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE))

    # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
                         optional=True)
    # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
                         optional=True)

    return emoji_data_map


def load_previous_metadata(emoji_data_map):
    """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
       in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
       emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
       exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
    current_emoji_id = DEFAULT_EMOJI_ID
    if os.path.isfile(INPUT_META_FILE):
        with open(INPUT_META_FILE) as csvfile:
            reader = csv.reader(csvfile, delimiter=' ')
            for row in reader:
                if row[0].startswith('#'):
                    continue
                emoji_id = hex_str_to_int(row[0])
                sdk_added = int(row[1])
                compat_added = int(row[2])
                key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
                if key in emoji_data_map:
                    emoji_data = emoji_data_map[key]
                    emoji_data.update(emoji_id, sdk_added, compat_added)
                    if emoji_data.emoji_id >= current_emoji_id:
                        current_emoji_id = emoji_data.emoji_id + 1

    return current_emoji_id


def update_ttlib_orig_sort():
    """Updates the ttLib tag sort with a closure that makes the meta table first."""
    orig_sort = ttLib.sortedTagList

    def meta_first_table_sort(tag_list, table_order=None):
        """Sorts the tables with the original ttLib sort, then makes the meta table first."""
        tag_list = orig_sort(tag_list, table_order)
        tag_list.remove('meta')
        tag_list.insert(0, 'meta')
        return tag_list

    ttLib.sortedTagList = meta_first_table_sort


def inject_meta_into_font(ttf, flatbuffer_bin_filename):
    """inject metadata binary into font"""
    if not 'meta' in ttf:
        ttf['meta'] = ttLib.getTableClass('meta')()
    meta = ttf['meta']
    with open(flatbuffer_bin_filename) as flatbuffer_bin_file:
        meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()

    # sort meta tables for faster access
    update_ttlib_orig_sort()


def validate_input_files(font_path, unicode_path):
    """Validate the existence of font file and the unicode files"""
    if not os.path.isfile(font_path):
        raise ValueError("Font file does not exist: " + font_path)

    if not os.path.isdir(unicode_path):
        raise ValueError(
            "Unicode directory does not exist or is not a directory " + unicode_path)

    emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
                       os.path.join(unicode_path, EMOJI_ZWJ_FILE),
                       os.path.join(unicode_path, EMOJI_SEQ_FILE)]
    for emoji_filename in emoji_filenames:
        if not os.path.isfile(emoji_filename):
            raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)


def add_file_to_sha(sha_algo, file_path):
    with open(file_path, 'rb') as input_file:
        for data in iter(lambda: input_file.read(8192), ''):
            sha_algo.update(data)

def create_sha_from_source_files(font_paths):
    """Creates a SHA from the given font files"""
    sha_algo = hashlib.sha256()
    for file_path in font_paths:
        add_file_to_sha(sha_algo, file_path)
    return sha_algo.hexdigest()


class EmojiFontCreator(object):
    """Creates the EmojiCompat font"""

    def __init__(self, font_path, unicode_path):
        validate_input_files(font_path, unicode_path)

        self.font_path = font_path
        self.unicode_path = unicode_path
        self.emoji_data_map = {}
        self.remapped_codepoints = {}
        self.glyph_to_image_metrics_map = {}
        # set default emoji id to start of Supplemental Private Use Area-A
        self.emoji_id = DEFAULT_EMOJI_ID

    def update_emoji_data(self, codepoints, glyph_name):
        """Updates the existing EmojiData identified with codepoints. The fields that are set are:
        - emoji_id (if it does not exist)
        - image width/height"""
        key = codepoint_to_string(codepoints)
        if key in self.emoji_data_map:
            # add emoji to final data
            emoji_data = self.emoji_data_map[key]
            emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
            if emoji_data.emoji_id == 0:
                emoji_data.emoji_id = self.emoji_id
                self.emoji_id = self.emoji_id + 1
            self.remapped_codepoints[emoji_data.emoji_id] = glyph_name

    def read_cbdt(self, ttf):
        """Read image size data from CBDT."""
        cbdt = ttf['CBDT']
        for strike_data in cbdt.strikeData:
            for key, data in strike_data.iteritems():
                data.decompile()
                self.glyph_to_image_metrics_map[key] = data.metrics

    def read_cmap12(self, ttf, glyph_to_codepoint_map):
        """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
        finally clears all elements in CMAP 12"""
        cmap = ttf['cmap']
        for table in cmap.tables:
            if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
                for codepoint, glyph_name in table.cmap.iteritems():
                    glyph_to_codepoint_map[glyph_name] = codepoint
                    self.update_emoji_data([codepoint], glyph_name)
                return table
        raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")

    def read_gsub(self, ttf, glyph_to_codepoint_map):
        """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
        gsub = ttf['GSUB']
        ligature_subtables = []
        context_subtables = []
        # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
        # and would be expensive with little value
        for lookup in gsub.table.LookupList.Lookup:
            for subtable in lookup.SubTable:
                if subtable.LookupType == 5:
                    context_subtables.append(subtable)
                elif subtable.LookupType == 4:
                    ligature_subtables.append(subtable)

        for subtable in context_subtables:
            self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)

        for subtable in ligature_subtables:
            self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)

    def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
        """Add substitutions defined as OpenType Context Substitution"""
        for sub_class_set in subtable.SubClassSet:
            if sub_class_set:
                for sub_class_rule in sub_class_set.SubClassRule:
                    # prepare holder for substitution list. each rule will have a list that is added
                    # to the subs_list.
                    subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
                    for record in sub_class_rule.SubstLookupRecord:
                        subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
                                                                            record.LookupListIndex)
                    # create combinations or all lists. the combinations will be filtered by
                    # emoji_data_map. the first element that contain as a valid glyph will be used
                    # as the final glyph
                    combinations = list(itertools.product(*subs_list))
                    for seq in combinations:
                        glyph_names = [x["input"] for x in seq]
                        codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
                        outputs = [x["output"] for x in seq if x["output"]]
                        nonempty_outputs = filter(lambda x: x.strip() , outputs)
                        if len(nonempty_outputs) == 0:
                            print("Warning: no output glyph is set for " + str(glyph_names))
                            continue
                        elif len(nonempty_outputs) > 1:
                            print(
                                "Warning: multiple glyph is set for "
                                    + str(glyph_names) + ", will use the first one")

                        glyph = nonempty_outputs[0]
                        self.update_emoji_data(codepoints, glyph)

    def get_substitutions(self, lookup_list, index):
        result = []
        for x in lookup_list.Lookup[index].SubTable:
            for input, output in x.mapping.iteritems():
                result.append({"input": input, "output": output})
        return result

    def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
        for name, ligatures in subtable.ligatures.iteritems():
            for ligature in ligatures:
                glyph_names = [name] + ligature.Component
                codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
                self.update_emoji_data(codepoints, ligature.LigGlyph)

    def write_metadata_json(self, output_json_file_path):
        """Writes the emojis into a json file"""
        output_json = {}
        output_json['version'] = METADATA_VERSION
        output_json['sourceSha'] = create_sha_from_source_files(
            [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
        output_json['list'] = []

        emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)

        total_emoji_count = 0
        for emoji_data in emoji_data_list:
            element = emoji_data.create_json_element()
            output_json['list'].append(element)
            total_emoji_count = total_emoji_count + 1

        # write the new json file to be processed by FlatBuffers
        with open(output_json_file_path, 'w') as json_file:
            print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
                  file=json_file)

        return total_emoji_count

    def write_metadata_csv(self):
        """Writes emoji metadata into space separated file"""
        with open(OUTPUT_META_FILE, 'w') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter=' ')
            emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
            csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
            for emoji_data in emoji_data_list:
                csvwriter.writerow(emoji_data.create_txt_row())

    def create_font(self):
        """Creates the EmojiCompat font.
        :param font_path: path to Android NotoColorEmoji font
        :param unicode_path: path to directory that contains unicode files
        """

        tmp_dir = tempfile.mkdtemp()

        # create emoji codepoints to EmojiData map
        self.emoji_data_map = load_emoji_data_map(self.unicode_path)

        # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
        # returned is either default or 1 greater than the largest id in previous data
        self.emoji_id = load_previous_metadata(self.emoji_data_map)

        # recalcTimestamp parameter will keep the modified field same as the original font. Changing
        # the modified field in the font causes the font ttf file to change, which makes it harder
        # to understand if something really changed in the font.
        with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
            # read image size data
            self.read_cbdt(ttf)

            # glyph name to codepoint map
            glyph_to_codepoint_map = {}

            # read single codepoint emojis under cmap12 and clear the table contents
            cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)

            # read emoji sequences gsub and clear the table contents
            self.read_gsub(ttf, glyph_to_codepoint_map)

            # add all new codepoint to glyph mappings
            cmap12_table.cmap.update(self.remapped_codepoints)

            # final metadata csv will be used to generate the sha, therefore write it before
            # metadata json is written.
            self.write_metadata_csv()

            output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
            flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
            flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)

            total_emoji_count = self.write_metadata_json(output_json_file)

            # create the flatbuffers binary and java classes
            sys_command = 'flatc -o {0} -b -j {1} {2}'
            os.system(sys_command.format(tmp_dir, FLATBUFFER_SCHEMA, output_json_file))

            # inject metadata binary into font
            inject_meta_into_font(ttf, flatbuffer_bin_file)

            # update CBDT and CBLC versions since older android versions cannot read > 2.0
            ttf['CBDT'].version = 2.0
            ttf['CBLC'].version = 2.0

            # save the new font
            ttf.save(FONT_PATH)

            update_flatbuffer_java_files(flatbuffer_java_dir)

            create_test_data(self.unicode_path)

            # clear the tmp output directory
            shutil.rmtree(tmp_dir, ignore_errors=True)

            print(
                "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))


def print_usage():
    """Prints how to use the script."""
    print("Please specify a path to font and unicode files.\n"
          "usage: createfont.py noto-color-emoji-path unicode-dir-path")


if __name__ == '__main__':
    if len(sys.argv) < 3:
        print_usage()
        sys.exit(1)
    EmojiFontCreator(sys.argv[1], sys.argv[2]).create_font()