blob: f193ea41dff9cc5d57d308e543f0b07df98f91f1 [file] [log] [blame]
Roozbeh Pournader0e969e22016-03-09 23:08:45 -08001#!/usr/bin/env python
2
3import collections
Roozbeh Pournader5dde0872016-03-31 13:54:56 -07004import copy
Roozbeh Pournader0e969e22016-03-09 23:08:45 -08005import glob
Roozbeh Pournader5dde0872016-03-31 13:54:56 -07006import itertools
Roozbeh Pournader0e969e22016-03-09 23:08:45 -08007from os import path
8import sys
9from xml.etree import ElementTree
10
11from fontTools import ttLib
12
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070013EMOJI_VS = 0xFE0F
14
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080015LANG_TO_SCRIPT = {
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070016 'as': 'Beng',
17 'bn': 'Beng',
18 'cy': 'Latn',
19 'da': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080020 'de': 'Latn',
21 'en': 'Latn',
22 'es': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070023 'et': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080024 'eu': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070025 'fr': 'Latn',
26 'ga': 'Latn',
27 'gu': 'Gujr',
28 'hi': 'Deva',
29 'hr': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080030 'hu': 'Latn',
31 'hy': 'Armn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070032 'ja': 'Jpan',
33 'kn': 'Knda',
34 'ko': 'Kore',
35 'ml': 'Mlym',
36 'mn': 'Cyrl',
37 'mr': 'Deva',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080038 'nb': 'Latn',
39 'nn': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070040 'or': 'Orya',
41 'pa': 'Guru',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080042 'pt': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070043 'sl': 'Latn',
44 'ta': 'Taml',
45 'te': 'Telu',
46 'tk': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080047}
48
49def lang_to_script(lang_code):
50 lang = lang_code.lower()
51 while lang not in LANG_TO_SCRIPT:
52 hyphen_idx = lang.rfind('-')
53 assert hyphen_idx != -1, (
54 'We do not know what script the "%s" language is written in.'
55 % lang_code)
56 assumed_script = lang[hyphen_idx+1:]
57 if len(assumed_script) == 4 and assumed_script.isalpha():
58 # This is actually the script
59 return assumed_script.title()
60 lang = lang[:hyphen_idx]
61 return LANG_TO_SCRIPT[lang]
62
63
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070064def printable(inp):
65 if type(inp) is set: # set of character sequences
66 return '{' + ', '.join([printable(seq) for seq in inp]) + '}'
67 if type(inp) is tuple: # character sequence
68 return '<' + (', '.join([printable(ch) for ch in inp])) + '>'
69 else: # single character
70 return 'U+%04X' % inp
71
72
73def open_font(font):
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080074 font_file, index = font
75 font_path = path.join(_fonts_dir, font_file)
76 if index is not None:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070077 return ttLib.TTFont(font_path, fontNumber=index)
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080078 else:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070079 return ttLib.TTFont(font_path)
80
81
82def get_best_cmap(font):
83 ttfont = open_font(font)
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080084 all_unicode_cmap = None
85 bmp_cmap = None
86 for cmap in ttfont['cmap'].tables:
87 specifier = (cmap.format, cmap.platformID, cmap.platEncID)
88 if specifier == (4, 3, 1):
89 assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )
90 bmp_cmap = cmap
91 elif specifier == (12, 3, 10):
92 assert all_unicode_cmap is None, (
93 'More than one UCS-4 cmap in %s' % (font, ))
94 all_unicode_cmap = cmap
95
96 return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap
97
98
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070099def get_variation_sequences_cmap(font):
100 ttfont = open_font(font)
101 vs_cmap = None
102 for cmap in ttfont['cmap'].tables:
103 specifier = (cmap.format, cmap.platformID, cmap.platEncID)
104 if specifier == (14, 0, 5):
105 assert vs_cmap is None, 'More than one VS cmap in %s' % (font, )
106 vs_cmap = cmap
107 return vs_cmap
108
109
110def get_emoji_map(font):
111 # Add normal characters
112 emoji_map = copy.copy(get_best_cmap(font))
113 reverse_cmap = {glyph: code for code, glyph in emoji_map.items()}
114
115 # Add variation sequences
116 vs_dict = get_variation_sequences_cmap(font).uvsDict
117 for vs in vs_dict:
118 for base, glyph in vs_dict[vs]:
119 if glyph is None:
120 emoji_map[(base, vs)] = emoji_map[base]
121 else:
122 emoji_map[(base, vs)] = glyph
123
124 # Add GSUB rules
125 ttfont = open_font(font)
126 for lookup in ttfont['GSUB'].table.LookupList.Lookup:
127 assert lookup.LookupType == 4, 'We only understand type 4 lookups'
128 for subtable in lookup.SubTable:
129 ligatures = subtable.ligatures
130 for first_glyph in ligatures:
131 for ligature in ligatures[first_glyph]:
132 sequence = [first_glyph] + ligature.Component
133 sequence = [reverse_cmap[glyph] for glyph in sequence]
134 sequence = tuple(sequence)
135 # Make sure no starting subsequence of 'sequence' has been
136 # seen before.
137 for sub_len in range(2, len(sequence)+1):
138 subsequence = sequence[:sub_len]
139 assert subsequence not in emoji_map
140 emoji_map[sequence] = ligature.LigGlyph
141
142 return emoji_map
143
144
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800145def assert_font_supports_any_of_chars(font, chars):
146 best_cmap = get_best_cmap(font)
147 for char in chars:
148 if char in best_cmap:
149 return
150 sys.exit('None of characters in %s were found in %s' % (chars, font))
151
152
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700153def assert_font_supports_all_of_chars(font, chars):
154 best_cmap = get_best_cmap(font)
155 for char in chars:
156 assert char in best_cmap, (
157 'U+%04X was not found in %s' % (char, font))
158
159
160def assert_font_supports_none_of_chars(font, chars):
161 best_cmap = get_best_cmap(font)
162 for char in chars:
163 assert char not in best_cmap, (
164 'U+%04X was found in %s' % (char, font))
165
166
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700167def assert_font_supports_all_sequences(font, sequences):
168 vs_dict = get_variation_sequences_cmap(font).uvsDict
169 for base, vs in sorted(sequences):
170 assert vs in vs_dict and (base, None) in vs_dict[vs], (
171 '<U+%04X, U+%04X> was not found in %s' % (base, vs, font))
172
173
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800174def check_hyphens(hyphens_dir):
175 # Find all the scripts that need automatic hyphenation
176 scripts = set()
177 for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):
178 hyb_file = path.basename(hyb_file)
179 assert hyb_file.startswith('hyph-'), (
180 'Unknown hyphenation file %s' % hyb_file)
181 lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]
182 scripts.add(lang_to_script(lang_code))
183
184 HYPHENS = {0x002D, 0x2010}
185 for script in scripts:
186 fonts = _script_to_font_map[script]
187 assert fonts, 'No fonts found for the "%s" script' % script
188 for font in fonts:
189 assert_font_supports_any_of_chars(font, HYPHENS)
190
191
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700192class FontRecord(object):
193 def __init__(self, name, scripts, variant, weight, style, font):
194 self.name = name
195 self.scripts = scripts
196 self.variant = variant
197 self.weight = weight
198 self.style = style
199 self.font = font
200
201
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800202def parse_fonts_xml(fonts_xml_path):
203 global _script_to_font_map, _fallback_chain
204 _script_to_font_map = collections.defaultdict(set)
205 _fallback_chain = []
206 tree = ElementTree.parse(fonts_xml_path)
Seigo Nonaka9092dc22017-01-06 16:54:52 +0900207 families = tree.findall('family')
208 # Minikin supports up to 254 but users can place their own font at the first
209 # place. Thus, 253 is the maximum allowed number of font families in the
210 # default collection.
211 assert len(families) < 254, (
212 'System font collection can contains up to 253 font families.')
213 for family in families:
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800214 name = family.get('name')
215 variant = family.get('variant')
216 langs = family.get('lang')
217 if name:
218 assert variant is None, (
219 'No variant expected for LGC font %s.' % name)
220 assert langs is None, (
221 'No language expected for LGC fonts %s.' % name)
222 else:
223 assert variant in {None, 'elegant', 'compact'}, (
224 'Unexpected value for variant: %s' % variant)
225
226 if langs:
227 langs = langs.split()
228 scripts = {lang_to_script(lang) for lang in langs}
229 else:
230 scripts = set()
231
232 for child in family:
233 assert child.tag == 'font', (
234 'Unknown tag <%s>' % child.tag)
235 font_file = child.text
236 weight = int(child.get('weight'))
237 assert weight % 100 == 0, (
238 'Font weight "%d" is not a multiple of 100.' % weight)
239
240 style = child.get('style')
241 assert style in {'normal', 'italic'}, (
242 'Unknown style "%s"' % style)
243
244 index = child.get('index')
245 if index:
246 index = int(index)
247
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700248 _fallback_chain.append(FontRecord(
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800249 name,
250 frozenset(scripts),
251 variant,
252 weight,
253 style,
254 (font_file, index)))
255
256 if name: # non-empty names are used for default LGC fonts
257 map_scripts = {'Latn', 'Grek', 'Cyrl'}
258 else:
259 map_scripts = scripts
260 for script in map_scripts:
261 _script_to_font_map[script].add((font_file, index))
262
263
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700264def check_emoji_coverage(all_emoji, equivalent_emoji):
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700265 emoji_font = get_emoji_font()
266 check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji)
Doug Feltf874a192016-07-08 17:42:15 -0700267
268
269def get_emoji_font():
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700270 emoji_fonts = [
271 record.font for record in _fallback_chain
272 if 'Zsye' in record.scripts]
Roozbeh Pournader27ec3ac2016-03-31 13:05:32 -0700273 assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts)
Doug Feltf874a192016-07-08 17:42:15 -0700274 return emoji_fonts[0]
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700275
Doug Feltf874a192016-07-08 17:42:15 -0700276
277def check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji):
278 coverage = get_emoji_map(emoji_font)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700279 for sequence in all_emoji:
280 assert sequence in coverage, (
281 '%s is not supported in the emoji font.' % printable(sequence))
282
283 for sequence in coverage:
284 if sequence in {0x0000, 0x000D, 0x0020}:
285 # The font needs to support a few extra characters, which is OK
286 continue
287 assert sequence in all_emoji, (
288 'Emoji font should not support %s.' % printable(sequence))
289
290 for first, second in sorted(equivalent_emoji.items()):
291 assert coverage[first] == coverage[second], (
292 '%s and %s should map to the same glyph.' % (
293 printable(first),
294 printable(second)))
295
296 for glyph in set(coverage.values()):
297 maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph]
298 if len(maps_to_glyph) > 1:
299 # There are more than one sequences mapping to the same glyph. We
300 # need to make sure they were expected to be equivalent.
301 equivalent_seqs = set()
302 for seq in maps_to_glyph:
303 equivalent_seq = seq
304 while equivalent_seq in equivalent_emoji:
305 equivalent_seq = equivalent_emoji[equivalent_seq]
306 equivalent_seqs.add(equivalent_seq)
307 assert len(equivalent_seqs) == 1, (
308 'The sequences %s should not result in the same glyph %s' % (
309 printable(equivalent_seqs),
310 glyph))
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700311
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700312
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700313def check_emoji_defaults(default_emoji):
314 missing_text_chars = _emoji_properties['Emoji'] - default_emoji
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700315 emoji_font_seen = False
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700316 for record in _fallback_chain:
317 if 'Zsye' in record.scripts:
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700318 emoji_font_seen = True
319 # No need to check the emoji font
320 continue
321 # For later fonts, we only check them if they have a script
322 # defined, since the defined script may get them to a higher
yiruif9936b92016-09-07 14:37:30 +0900323 # score even if they appear after the emoji font. However,
324 # we should skip checking the text symbols font, since
325 # symbol fonts should be able to override the emoji display
326 # style when 'Zsym' is explicitly specified by the user.
327 if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts):
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700328 continue
329
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700330 # Check default emoji-style characters
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700331 assert_font_supports_none_of_chars(record.font, sorted(default_emoji))
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700332
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700333 # Mark default text-style characters appearing in fonts above the emoji
334 # font as seen
335 if not emoji_font_seen:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700336 missing_text_chars -= set(get_best_cmap(record.font))
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700337
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700338 # Noto does not have monochrome glyphs for Unicode 7.0 wingdings and
339 # webdings yet.
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700340 missing_text_chars -= _chars_by_age['7.0']
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700341 assert missing_text_chars == set(), (
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700342 'Text style version of some emoji characters are missing: ' +
343 repr(missing_text_chars))
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700344
345
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700346# Setting reverse to true returns a dictionary that maps the values to sets of
347# characters, useful for some binary properties. Otherwise, we get a
348# dictionary that maps characters to the property values, assuming there's only
349# one property in the file.
350def parse_unicode_datafile(file_path, reverse=False):
351 if reverse:
352 output_dict = collections.defaultdict(set)
353 else:
354 output_dict = {}
355 with open(file_path) as datafile:
356 for line in datafile:
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700357 if '#' in line:
358 line = line[:line.index('#')]
359 line = line.strip()
360 if not line:
361 continue
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700362
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700363 chars, prop = line.split(';')[:2]
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700364 chars = chars.strip()
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700365 prop = prop.strip()
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700366
367 if ' ' in chars: # character sequence
368 sequence = [int(ch, 16) for ch in chars.split(' ')]
369 additions = [tuple(sequence)]
370 elif '..' in chars: # character range
371 char_start, char_end = chars.split('..')
372 char_start = int(char_start, 16)
373 char_end = int(char_end, 16)
374 additions = xrange(char_start, char_end+1)
375 else: # singe character
376 additions = [int(chars, 16)]
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700377 if reverse:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700378 output_dict[prop].update(additions)
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700379 else:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700380 for addition in additions:
381 assert addition not in output_dict
382 output_dict[addition] = prop
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700383 return output_dict
384
385
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700386def parse_standardized_variants(file_path):
387 emoji_set = set()
388 text_set = set()
389 with open(file_path) as datafile:
390 for line in datafile:
391 if '#' in line:
392 line = line[:line.index('#')]
393 line = line.strip()
394 if not line:
395 continue
396 sequence, description, _ = line.split(';')
397 sequence = sequence.strip().split(' ')
398 base = int(sequence[0], 16)
399 vs = int(sequence[1], 16)
400 description = description.strip()
401 if description == 'text style':
402 text_set.add((base, vs))
403 elif description == 'emoji style':
404 emoji_set.add((base, vs))
405 return text_set, emoji_set
406
407
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700408def parse_ucd(ucd_path):
409 global _emoji_properties, _chars_by_age
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700410 global _text_variation_sequences, _emoji_variation_sequences
411 global _emoji_sequences, _emoji_zwj_sequences
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700412 _emoji_properties = parse_unicode_datafile(
413 path.join(ucd_path, 'emoji-data.txt'), reverse=True)
414 _chars_by_age = parse_unicode_datafile(
415 path.join(ucd_path, 'DerivedAge.txt'), reverse=True)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700416 sequences = parse_standardized_variants(
417 path.join(ucd_path, 'StandardizedVariants.txt'))
418 _text_variation_sequences, _emoji_variation_sequences = sequences
419 _emoji_sequences = parse_unicode_datafile(
420 path.join(ucd_path, 'emoji-sequences.txt'))
421 _emoji_zwj_sequences = parse_unicode_datafile(
422 path.join(ucd_path, 'emoji-zwj-sequences.txt'))
423
424
425def flag_sequence(territory_code):
426 return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code)
427
428
429UNSUPPORTED_FLAGS = frozenset({
430 flag_sequence('BL'), flag_sequence('BQ'), flag_sequence('DG'),
431 flag_sequence('EA'), flag_sequence('EH'), flag_sequence('FK'),
432 flag_sequence('GF'), flag_sequence('GP'), flag_sequence('GS'),
433 flag_sequence('MF'), flag_sequence('MQ'), flag_sequence('NC'),
434 flag_sequence('PM'), flag_sequence('RE'), flag_sequence('TF'),
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700435 flag_sequence('UN'), flag_sequence('WF'), flag_sequence('XK'),
436 flag_sequence('YT'),
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700437})
438
439EQUIVALENT_FLAGS = {
440 flag_sequence('BV'): flag_sequence('NO'),
441 flag_sequence('CP'): flag_sequence('FR'),
442 flag_sequence('HM'): flag_sequence('AU'),
443 flag_sequence('SJ'): flag_sequence('NO'),
444 flag_sequence('UM'): flag_sequence('US'),
445}
446
447COMBINING_KEYCAP = 0x20E3
448
Roozbeh Pournader10ea8f72016-07-25 18:14:14 -0700449# Characters that Android defaults to emoji style, different from the recommendations in UTR #51
450ANDROID_DEFAULT_EMOJI = frozenset({
451 0x2600, # BLACK SUN WITH RAYS
452 0x2601, # CLOUD
453 0x260E, # BLACK TELEPHONE
454 0x261D, # WHITE UP POINTING INDEX
455 0x263A, # WHITE SMILING FACE
456 0x2660, # BLACK SPADE SUIT
457 0x2663, # BLACK CLUB SUIT
458 0x2665, # BLACK HEART SUIT
459 0x2666, # BLACK DIAMOND SUIT
460 0x270C, # VICTORY HAND
461 0x2744, # SNOWFLAKE
462 0x2764, # HEAVY BLACK HEART
463})
464
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700465LEGACY_ANDROID_EMOJI = {
466 0xFE4E5: flag_sequence('JP'),
467 0xFE4E6: flag_sequence('US'),
468 0xFE4E7: flag_sequence('FR'),
469 0xFE4E8: flag_sequence('DE'),
470 0xFE4E9: flag_sequence('IT'),
471 0xFE4EA: flag_sequence('GB'),
472 0xFE4EB: flag_sequence('ES'),
473 0xFE4EC: flag_sequence('RU'),
474 0xFE4ED: flag_sequence('CN'),
475 0xFE4EE: flag_sequence('KR'),
476 0xFE82C: (ord('#'), COMBINING_KEYCAP),
477 0xFE82E: (ord('1'), COMBINING_KEYCAP),
478 0xFE82F: (ord('2'), COMBINING_KEYCAP),
479 0xFE830: (ord('3'), COMBINING_KEYCAP),
480 0xFE831: (ord('4'), COMBINING_KEYCAP),
481 0xFE832: (ord('5'), COMBINING_KEYCAP),
482 0xFE833: (ord('6'), COMBINING_KEYCAP),
483 0xFE834: (ord('7'), COMBINING_KEYCAP),
484 0xFE835: (ord('8'), COMBINING_KEYCAP),
485 0xFE836: (ord('9'), COMBINING_KEYCAP),
486 0xFE837: (ord('0'), COMBINING_KEYCAP),
487}
488
489ZWJ_IDENTICALS = {
490 # KISS
491 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F,
492 # COUPLE WITH HEART
493 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F468): 0x1F491,
494 # FAMILY
495 (0x1F468, 0x200D, 0x1F469, 0x200D, 0x1F466): 0x1F46A,
496}
497
Doug Feltf874a192016-07-08 17:42:15 -0700498
499def is_fitzpatrick_modifier(cp):
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700500 return 0x1F3FB <= cp <= 0x1F3FF
501
502
503def reverse_emoji(seq):
504 rev = list(reversed(seq))
505 # if there are fitzpatrick modifiers in the sequence, keep them after
506 # the emoji they modify
507 for i in xrange(1, len(rev)):
508 if is_fitzpatrick_modifier(rev[i-1]):
509 rev[i], rev[i-1] = rev[i-1], rev[i]
510 return tuple(rev)
Doug Feltf874a192016-07-08 17:42:15 -0700511
512
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700513def compute_expected_emoji():
514 equivalent_emoji = {}
515 sequence_pieces = set()
516 all_sequences = set()
517 all_sequences.update(_emoji_variation_sequences)
518
Raph Levien2b8b8192016-08-09 14:28:54 -0700519 # add zwj sequences not in the current emoji-zwj-sequences.txt
520 adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences)
521 adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences)
522 # single parent families
523 additional_emoji_zwj = (
524 (0x1F468, 0x200D, 0x1F466),
525 (0x1F468, 0x200D, 0x1F467),
526 (0x1F468, 0x200D, 0x1F466, 0x200D, 0x1F466),
527 (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F466),
528 (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F467),
529 (0x1F469, 0x200D, 0x1F466),
530 (0x1F469, 0x200D, 0x1F467),
531 (0x1F469, 0x200D, 0x1F466, 0x200D, 0x1F466),
532 (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F466),
533 (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F467),
534 )
535 # sequences formed from man and woman and optional fitzpatrick modifier
536 modified_extensions = (
537 0x2696,
538 0x2708,
539 0x1F3A8,
540 0x1F680,
541 0x1F692,
542 )
543 for seq in additional_emoji_zwj:
544 adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
545 for ext in modified_extensions:
546 for base in (0x1F468, 0x1F469):
547 seq = (base, 0x200D, ext)
548 adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
549 for modifier in range(0x1F3FB, 0x1F400):
550 seq = (base, modifier, 0x200D, ext)
551 adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
552
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700553 for sequence in _emoji_sequences.keys():
554 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)
555 all_sequences.add(sequence)
556 sequence_pieces.update(sequence)
557
Raph Levien2b8b8192016-08-09 14:28:54 -0700558 for sequence in adjusted_emoji_zwj_sequences.keys():
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700559 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)
560 all_sequences.add(sequence)
561 sequence_pieces.update(sequence)
562 # Add reverse of all emoji ZWJ sequences, which are added to the fonts
563 # as a workaround to get the sequences work in RTL text.
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700564 reversed_seq = reverse_emoji(sequence)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700565 all_sequences.add(reversed_seq)
566 equivalent_emoji[reversed_seq] = sequence
567
568 # Add all two-letter flag sequences, as even the unsupported ones should
569 # resolve to a flag tofu.
570 all_letters = [chr(code) for code in range(ord('A'), ord('Z')+1)]
571 all_two_letter_codes = itertools.product(all_letters, repeat=2)
572 all_flags = {flag_sequence(code) for code in all_two_letter_codes}
573 all_sequences.update(all_flags)
574 tofu_flags = UNSUPPORTED_FLAGS | (all_flags - set(_emoji_sequences.keys()))
575
576 all_emoji = (
577 _emoji_properties['Emoji'] |
578 all_sequences |
579 sequence_pieces |
580 set(LEGACY_ANDROID_EMOJI.keys()))
581 default_emoji = (
582 _emoji_properties['Emoji_Presentation'] |
Roozbeh Pournader10ea8f72016-07-25 18:14:14 -0700583 ANDROID_DEFAULT_EMOJI |
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700584 all_sequences |
585 set(LEGACY_ANDROID_EMOJI.keys()))
586
587 first_tofu_flag = sorted(tofu_flags)[0]
588 for flag in tofu_flags:
589 if flag != first_tofu_flag:
590 equivalent_emoji[flag] = first_tofu_flag
591 equivalent_emoji.update(EQUIVALENT_FLAGS)
592 equivalent_emoji.update(LEGACY_ANDROID_EMOJI)
593 equivalent_emoji.update(ZWJ_IDENTICALS)
594 for seq in _emoji_variation_sequences:
595 equivalent_emoji[seq] = seq[0]
596
597 return all_emoji, default_emoji, equivalent_emoji
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700598
599
Roozbeh Pournaderbac1aec2016-07-27 13:08:37 -0700600def check_vertical_metrics():
601 for record in _fallback_chain:
602 if record.name in ['sans-serif', 'sans-serif-condensed']:
603 font = open_font(record.font)
Roozbeh Pournaderede3a172016-07-27 16:35:12 -0700604 assert font['head'].yMax == 2163 and font['head'].yMin == -555, (
605 'yMax and yMin of %s do not match expected values.' % (record.font,))
606
607 if record.name in ['sans-serif', 'sans-serif-condensed', 'serif', 'monospace']:
608 font = open_font(record.font)
609 assert font['hhea'].ascent == 1900 and font['hhea'].descent == -500, (
610 'ascent and descent of %s do not match expected values.' % (record.font,))
Roozbeh Pournaderbac1aec2016-07-27 13:08:37 -0700611
612
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800613def main():
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800614 global _fonts_dir
Doug Feltf874a192016-07-08 17:42:15 -0700615 target_out = sys.argv[1]
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800616 _fonts_dir = path.join(target_out, 'fonts')
617
618 fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')
619 parse_fonts_xml(fonts_xml_path)
620
Roozbeh Pournaderbac1aec2016-07-27 13:08:37 -0700621 check_vertical_metrics()
622
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800623 hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')
624 check_hyphens(hyphens_dir)
625
Roozbeh Pournader27ec3ac2016-03-31 13:05:32 -0700626 check_emoji = sys.argv[2]
627 if check_emoji == 'true':
628 ucd_path = sys.argv[3]
629 parse_ucd(ucd_path)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700630 all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
631 check_emoji_coverage(all_emoji, equivalent_emoji)
632 check_emoji_defaults(default_emoji)
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700633
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800634
635if __name__ == '__main__':
636 main()