Blame - tools/fonts/fontchain_linter.py - SHIFTPHONES/android_frameworks_base

2016-03-09 23:08:45 -0800

[diff] [blame]

1

#!/usr/bin/env python

2

3

import collections

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

4

import copy

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

5

import glob

6

from os import path

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

7

import re

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

8

import sys

9

from xml.etree import ElementTree

10

11

from fontTools import ttLib

12

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

13

EMOJI_VS = 0xFE0F

14

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

15

LANG_TO_SCRIPT = {

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

16

'as': 'Beng',

Roozbeh Pournader

7e04dd1

2017-10-13 17:41:31 -0700

[diff] [blame]

17

'be': 'Cyrl',

Roozbeh Pournader

033b222

2017-02-22 18:53:39 -0800

[diff] [blame]

18

'bg': 'Cyrl',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

19

'bn': 'Beng',

Roozbeh Pournader

033b222

2017-02-22 18:53:39 -0800

[diff] [blame]

20

'cu': 'Cyrl',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

21

'cy': 'Latn',

22

'da': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

23

'de': 'Latn',

24

'en': 'Latn',

25

'es': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

26

'et': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

27

'eu': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

'fr': 'Latn',

'ga': 'Latn',

'gu': 'Gujr',

'hi': 'Deva',

'hr': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

33

'hu': 'Latn',

34

'hy': 'Armn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

35

'ja': 'Jpan',

36

'kn': 'Knda',

37

'ko': 'Kore',

Roozbeh Pournader

7e04dd1

2017-10-13 17:41:31 -0700

[diff] [blame]

38

'la': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

39

'ml': 'Mlym',

40

'mn': 'Cyrl',

41

'mr': 'Deva',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

42

'nb': 'Latn',

43

'nn': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

44

'or': 'Orya',

45

'pa': 'Guru',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

46

'pt': 'Latn',

Jungshik Shin

2016-03-19 09:32:34 -0700

[diff] [blame]

'sl': 'Latn',

'ta': 'Taml',

'te': 'Telu',

'tk': 'Latn',

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

51

}

52

53

def lang_to_script(lang_code):

54

lang = lang_code.lower()

55

while lang not in LANG_TO_SCRIPT:

56

hyphen_idx = lang.rfind('-')

57

assert hyphen_idx != -1, (

58

'We do not know what script the "%s" language is written in.'

59

% lang_code)

60

assumed_script = lang[hyphen_idx+1:]

61

if len(assumed_script) == 4 and assumed_script.isalpha():

62

# This is actually the script

63

return assumed_script.title()

64

lang = lang[:hyphen_idx]

65

return LANG_TO_SCRIPT[lang]

66

67

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

68

def printable(inp):

69

if type(inp) is set: # set of character sequences

70

return '{' + ', '.join([printable(seq) for seq in inp]) + '}'

71

if type(inp) is tuple: # character sequence

72

return '<' + (', '.join([printable(ch) for ch in inp])) + '>'

73

else: # single character

74

return 'U+%04X' % inp

75

76

77

def open_font(font):

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

78

font_file, index = font

79

font_path = path.join(_fonts_dir, font_file)

80

if index is not None:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

81

return ttLib.TTFont(font_path, fontNumber=index)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

82

else:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

83

return ttLib.TTFont(font_path)

84

85

86

def get_best_cmap(font):

87

ttfont = open_font(font)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

88

all_unicode_cmap = None

89

bmp_cmap = None

90

for cmap in ttfont['cmap'].tables:

91

specifier = (cmap.format, cmap.platformID, cmap.platEncID)

92

if specifier == (4, 3, 1):

93

assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )

94

bmp_cmap = cmap

95

elif specifier == (12, 3, 10):

96

assert all_unicode_cmap is None, (

97

'More than one UCS-4 cmap in %s' % (font, ))

98

all_unicode_cmap = cmap

99

100

return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap

101

102

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

103

def get_variation_sequences_cmap(font):

104

ttfont = open_font(font)

105

vs_cmap = None

106

for cmap in ttfont['cmap'].tables:

107

specifier = (cmap.format, cmap.platformID, cmap.platEncID)

108

if specifier == (14, 0, 5):

109

assert vs_cmap is None, 'More than one VS cmap in %s' % (font, )

vs_cmap = cmap

return vs_cmap

def get_emoji_map(font):

115

# Add normal characters

116

emoji_map = copy.copy(get_best_cmap(font))

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame^]

117

reverse_cmap = {glyph: code for code, glyph in emoji_map.items() if not contains_pua(code) }

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

118

119

# Add variation sequences

120

vs_dict = get_variation_sequences_cmap(font).uvsDict

121

for vs in vs_dict:

122

for base, glyph in vs_dict[vs]:

123

if glyph is None:

124

emoji_map[(base, vs)] = emoji_map[base]

125

else:

126

emoji_map[(base, vs)] = glyph

127

128

# Add GSUB rules

129

ttfont = open_font(font)

130

for lookup in ttfont['GSUB'].table.LookupList.Lookup:

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

131

if lookup.LookupType != 4:

132

# Other lookups are used in the emoji font for fallback.

133

# We ignore them for now.

134

continue

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

135

for subtable in lookup.SubTable:

136

ligatures = subtable.ligatures

137

for first_glyph in ligatures:

138

for ligature in ligatures[first_glyph]:

139

sequence = [first_glyph] + ligature.Component

140

sequence = [reverse_cmap[glyph] for glyph in sequence]

141

sequence = tuple(sequence)

142

# Make sure no starting subsequence of 'sequence' has been

143

# seen before.

144

for sub_len in range(2, len(sequence)+1):

145

subsequence = sequence[:sub_len]

146

assert subsequence not in emoji_map

147

emoji_map[sequence] = ligature.LigGlyph

return emoji_map

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

152

def assert_font_supports_any_of_chars(font, chars):

153

best_cmap = get_best_cmap(font)

154

for char in chars:

155

if char in best_cmap:

156

return

157

sys.exit('None of characters in %s were found in %s' % (chars, font))

158

159

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

160

def assert_font_supports_all_of_chars(font, chars):

161

best_cmap = get_best_cmap(font)

162

for char in chars:

163

assert char in best_cmap, (

164

'U+%04X was not found in %s' % (char, font))

165

166

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

167

def assert_font_supports_none_of_chars(font, chars, fallbackName):

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

168

best_cmap = get_best_cmap(font)

169

for char in chars:

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

170

if fallbackName:

171

assert char not in best_cmap, 'U+%04X was found in %s' % (char, font)

172

else:

173

assert char not in best_cmap, (

174

'U+%04X was found in %s in fallback %s' % (char, font, fallbackName))

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

175

176

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

177

def assert_font_supports_all_sequences(font, sequences):

178

vs_dict = get_variation_sequences_cmap(font).uvsDict

179

for base, vs in sorted(sequences):

180

assert vs in vs_dict and (base, None) in vs_dict[vs], (

181

'<U+%04X, U+%04X> was not found in %s' % (base, vs, font))

182

183

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

184

def check_hyphens(hyphens_dir):

185

# Find all the scripts that need automatic hyphenation

186

scripts = set()

187

for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):

188

hyb_file = path.basename(hyb_file)

189

assert hyb_file.startswith('hyph-'), (

190

'Unknown hyphenation file %s' % hyb_file)

191

lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]

192

scripts.add(lang_to_script(lang_code))

193

194

HYPHENS = {0x002D, 0x2010}

195

for script in scripts:

196

fonts = _script_to_font_map[script]

197

assert fonts, 'No fonts found for the "%s" script' % script

198

for font in fonts:

199

assert_font_supports_any_of_chars(font, HYPHENS)

200

201

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

202

class FontRecord(object):

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

203

def __init__(self, name, psName, scripts, variant, weight, style, fallback_for, font):

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

204

self.name = name

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

205

self.psName = psName

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

206

self.scripts = scripts

207

self.variant = variant

208

self.weight = weight

209

self.style = style

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

210

self.fallback_for = fallback_for

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

self.font = font

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

214

def parse_fonts_xml(fonts_xml_path):

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

215

global _script_to_font_map, _fallback_chains, _all_fonts

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

216

_script_to_font_map = collections.defaultdict(set)

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

217

_fallback_chains = {}

218

_all_fonts = []

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

219

tree = ElementTree.parse(fonts_xml_path)

Seigo Nonaka

9092dc2

2017-01-06 16:54:52 +0900

[diff] [blame]

220

families = tree.findall('family')

221

# Minikin supports up to 254 but users can place their own font at the first

222

# place. Thus, 253 is the maximum allowed number of font families in the

223

# default collection.

224

assert len(families) < 254, (

225

'System font collection can contains up to 253 font families.')

226

for family in families:

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

227

name = family.get('name')

228

variant = family.get('variant')

229

langs = family.get('lang')

230

if name:

231

assert variant is None, (

232

'No variant expected for LGC font %s.' % name)

233

assert langs is None, (

234

'No language expected for LGC fonts %s.' % name)

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

235

assert name not in _fallback_chains, 'Duplicated name entry %s' % name

236

_fallback_chains[name] = []

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

237

else:

238

assert variant in {None, 'elegant', 'compact'}, (

239

'Unexpected value for variant: %s' % variant)

240

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

241

trim_re = re.compile(r"^[ \n\r\t]*(.+)[ \n\r\t]*$")

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

242

for family in families:

243

name = family.get('name')

244

variant = family.get('variant')

245

langs = family.get('lang')

246

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

247

if langs:

248

langs = langs.split()

249

scripts = {lang_to_script(lang) for lang in langs}

else:

scripts = set()

for child in family:

assert child.tag == 'font', (

255

'Unknown tag <%s>' % child.tag)

Jungshik Shin

88b1114

2017-03-17 14:56:17 -0700

[diff] [blame]

256

font_file = child.text.rstrip()

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

257

258

m = trim_re.match(font_file)

259

font_file = m.group(1)

260

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

261

weight = int(child.get('weight'))

262

assert weight % 100 == 0, (

263

'Font weight "%d" is not a multiple of 100.' % weight)

264

265

style = child.get('style')

266

assert style in {'normal', 'italic'}, (

267

'Unknown style "%s"' % style)

268

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

269

fallback_for = child.get('fallbackFor')

270

271

assert not name or not fallback_for, (

272

'name and fallbackFor cannot be present at the same time')

273

assert not fallback_for or fallback_for in _fallback_chains, (

274

'Unknown fallback name: %s' % fallback_for)

275

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

276

index = child.get('index')

if index:

index = int(index)

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

280

if not path.exists(path.join(_fonts_dir, m.group(1))):

Seigo Nonaka

1403ff2

2018-01-18 17:24:31 -0800

[diff] [blame]

281

continue # Missing font is a valid case. Just ignore the missing font files.

282

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

283

record = FontRecord(

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

284

name,

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

285

child.get('postScriptName'),

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

frozenset(scripts),

variant,

weight,

style,

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

fallback_for,

(font_file, index))

_all_fonts.append(record)

294

295

if not fallback_for:

296

if not name or name == 'sans-serif':

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

297

for _, fallback in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

298

fallback.append(record)

299

else:

300

_fallback_chains[name].append(record)

301

else:

302

_fallback_chains[fallback_for].append(record)

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

303

304

if name: # non-empty names are used for default LGC fonts

305

map_scripts = {'Latn', 'Grek', 'Cyrl'}

306

else:

307

map_scripts = scripts

308

for script in map_scripts:

309

_script_to_font_map[script].add((font_file, index))

310

311

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

312

def check_emoji_coverage(all_emoji, equivalent_emoji):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

313

emoji_font = get_emoji_font()

314

check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

315

316

317

def get_emoji_font():

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

318

emoji_fonts = [

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

319

record.font for record in _all_fonts

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

320

if 'Zsye' in record.scripts]

Roozbeh Pournader

27ec3ac

2016-03-31 13:05:32 -0700

[diff] [blame]

321

assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

322

return emoji_fonts[0]

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

323

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

324

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame^]

325

def is_pua(x):

326

return 0xE000 <= x <= 0xF8FF or 0xF0000 <= x <= 0xFFFFD or 0x100000 <= x <= 0x10FFFD

327

328

def contains_pua(sequence):

329

if type(sequence) is tuple:

330

return any([is_pua(x) for x in sequence])

331

else:

332

return is_pua(sequence)

333

334

335

def check_emoji_compat():

336

ttf = open_font(get_emoji_font())

337

meta = ttf['meta']

338

assert meta, 'Compat font must have meta table'

339

assert 'Emji' in meta.data, 'meta table should have \'Emji\' data.'

340

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

341

def check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji):

342

coverage = get_emoji_map(emoji_font)

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

errors = []

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

346

for sequence in all_emoji:

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

347

if not sequence in coverage:

348

errors.append('%s is not supported in the emoji font.' % printable(sequence))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

349

350

for sequence in coverage:

351

if sequence in {0x0000, 0x000D, 0x0020}:

352

# The font needs to support a few extra characters, which is OK

353

continue

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame^]

354

355

if contains_pua(sequence):

356

# The font needs to have some PUA for EmojiCompat library.

357

continue

358

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

359

if sequence not in all_emoji:

360

errors.append('%s support unexpected in the emoji font.' % printable(sequence))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

361

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

362

for first, second in equivalent_emoji.items():

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

363

if first not in coverage or second not in coverage:

364

continue # sequence will be reported missing

365

if coverage[first] != coverage[second]:

366

errors.append('%s and %s should map to the same glyph.' % (

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

printable(first),

printable(second)))

for glyph in set(coverage.values()):

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame^]

371

maps_to_glyph = [

372

seq for seq in coverage if coverage[seq] == glyph and not contains_pua(seq) ]

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

373

if len(maps_to_glyph) > 1:

374

# There are more than one sequences mapping to the same glyph. We

375

# need to make sure they were expected to be equivalent.

376

equivalent_seqs = set()

377

for seq in maps_to_glyph:

378

equivalent_seq = seq

379

while equivalent_seq in equivalent_emoji:

380

equivalent_seq = equivalent_emoji[equivalent_seq]

381

equivalent_seqs.add(equivalent_seq)

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

382

if len(equivalent_seqs) != 1:

383

errors.append('The sequences %s should not result in the same glyph %s' % (

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

384

printable(equivalent_seqs),

385

glyph))

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

386

Rod S

2020-03-16 00:01:15 -0700

[diff] [blame]

387

assert not errors, '%d emoji font errors:\n%s\n%d emoji font coverage errors' % (len(errors), '\n'.join(errors), len(errors))

388

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

389

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

390

def check_emoji_defaults(default_emoji):

391

missing_text_chars = _emoji_properties['Emoji'] - default_emoji

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

392

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

393

emoji_font_seen = False

394

for record in fallback_chain:

395

if 'Zsye' in record.scripts:

396

emoji_font_seen = True

397

# No need to check the emoji font

398

continue

399

# For later fonts, we only check them if they have a script

400

# defined, since the defined script may get them to a higher

401

# score even if they appear after the emoji font. However,

402

# we should skip checking the text symbols font, since

403

# symbol fonts should be able to override the emoji display

404

# style when 'Zsym' is explicitly specified by the user.

405

if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts):

406

continue

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

407

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

408

# Check default emoji-style characters

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

409

assert_font_supports_none_of_chars(record.font, default_emoji, name)

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

410

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

411

# Mark default text-style characters appearing in fonts above the emoji

412

# font as seen

413

if not emoji_font_seen:

414

missing_text_chars -= set(get_best_cmap(record.font))

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

415

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

416

# Noto does not have monochrome glyphs for Unicode 7.0 wingdings and

417

# webdings yet.

418

missing_text_chars -= _chars_by_age['7.0']

419

assert missing_text_chars == set(), (

420

'Text style version of some emoji characters are missing: ' +

421

repr(missing_text_chars))

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

422

423

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

424

# Setting reverse to true returns a dictionary that maps the values to sets of

425

# characters, useful for some binary properties. Otherwise, we get a

426

# dictionary that maps characters to the property values, assuming there's only

427

# one property in the file.

428

def parse_unicode_datafile(file_path, reverse=False):

429

if reverse:

430

output_dict = collections.defaultdict(set)

431

else:

432

output_dict = {}

433

with open(file_path) as datafile:

434

for line in datafile:

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

435

if '#' in line:

436

line = line[:line.index('#')]

437

line = line.strip()

438

if not line:

439

continue

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

440

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

441

chars, prop = line.split(';')[:2]

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

442

chars = chars.strip()

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

443

prop = prop.strip()

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

444

445

if ' ' in chars: # character sequence

446

sequence = [int(ch, 16) for ch in chars.split(' ')]

447

additions = [tuple(sequence)]

448

elif '..' in chars: # character range

449

char_start, char_end = chars.split('..')

450

char_start = int(char_start, 16)

451

char_end = int(char_end, 16)

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

452

additions = range(char_start, char_end+1)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

453

else: # singe character

454

additions = [int(chars, 16)]

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

455

if reverse:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

456

output_dict[prop].update(additions)

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

457

else:

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

458

for addition in additions:

459

assert addition not in output_dict

460

output_dict[addition] = prop

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

return output_dict

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

464

def parse_emoji_variants(file_path):

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

465

emoji_set = set()

466

text_set = set()

467

with open(file_path) as datafile:

468

for line in datafile:

469

if '#' in line:

470

line = line[:line.index('#')]

line = line.strip()

if not line:

continue

sequence, description, _ = line.split(';')

475

sequence = sequence.strip().split(' ')

476

base = int(sequence[0], 16)

477

vs = int(sequence[1], 16)

478

description = description.strip()

479

if description == 'text style':

480

text_set.add((base, vs))

481

elif description == 'emoji style':

482

emoji_set.add((base, vs))

483

return text_set, emoji_set

484

485

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

486

def parse_ucd(ucd_path):

487

global _emoji_properties, _chars_by_age

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

488

global _text_variation_sequences, _emoji_variation_sequences

489

global _emoji_sequences, _emoji_zwj_sequences

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

490

_emoji_properties = parse_unicode_datafile(

491

path.join(ucd_path, 'emoji-data.txt'), reverse=True)

Roozbeh Pournader

f7a68c1

2017-04-04 18:59:31 -0700

[diff] [blame]

492

emoji_properties_additions = parse_unicode_datafile(

493

path.join(ucd_path, 'additions', 'emoji-data.txt'), reverse=True)

494

for prop in emoji_properties_additions.keys():

495

_emoji_properties[prop].update(emoji_properties_additions[prop])

496

Roozbeh Pournader

2016-03-16 18:55:32 -0700

[diff] [blame]

497

_chars_by_age = parse_unicode_datafile(

498

path.join(ucd_path, 'DerivedAge.txt'), reverse=True)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

499

sequences = parse_emoji_variants(

500

path.join(ucd_path, 'emoji-variation-sequences.txt'))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

501

_text_variation_sequences, _emoji_variation_sequences = sequences

502

_emoji_sequences = parse_unicode_datafile(

503

path.join(ucd_path, 'emoji-sequences.txt'))

Siyamed Sinir

6e06ad0

2017-04-19 18:18:35 -0700

[diff] [blame]

504

_emoji_sequences.update(parse_unicode_datafile(

505

path.join(ucd_path, 'additions', 'emoji-sequences.txt')))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

506

_emoji_zwj_sequences = parse_unicode_datafile(

507

path.join(ucd_path, 'emoji-zwj-sequences.txt'))

Roozbeh Pournader

1800ba4

2017-03-17 18:23:23 -0700

[diff] [blame]

508

_emoji_zwj_sequences.update(parse_unicode_datafile(

509

path.join(ucd_path, 'additions', 'emoji-zwj-sequences.txt')))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

510

Siyamed Sinir

d97df5a

2018-04-12 13:11:42 -0700

[diff] [blame]

511

exclusions = parse_unicode_datafile(path.join(ucd_path, 'additions', 'emoji-exclusions.txt'))

512

_emoji_sequences = remove_emoji_exclude(_emoji_sequences, exclusions)

513

_emoji_zwj_sequences = remove_emoji_exclude(_emoji_zwj_sequences, exclusions)

514

_emoji_variation_sequences = remove_emoji_variation_exclude(_emoji_variation_sequences, exclusions)

Qingqing Deng

5e98771

2019-03-25 16:53:34 -0700

[diff] [blame]

515

# Unicode 12.0 adds Basic_Emoji in emoji-sequences.txt. We ignore them here since we are already

516

# checking the emoji presentations with emoji-variation-sequences.txt.

517

# Please refer to http://unicode.org/reports/tr51/#def_basic_emoji_set .

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

518

_emoji_sequences = {k: v for k, v in _emoji_sequences.items() if not v == 'Basic_Emoji' }

Qingqing Deng

5e98771

2019-03-25 16:53:34 -0700

[diff] [blame]

519

Siyamed Sinir

d97df5a

2018-04-12 13:11:42 -0700

[diff] [blame]

520

521

def remove_emoji_variation_exclude(source, items):

522

return source.difference(items.keys())

523

524

def remove_emoji_exclude(source, items):

525

return {k: v for k, v in source.items() if k not in items}

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

526

527

def flag_sequence(territory_code):

528

return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code)

529

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

530

EQUIVALENT_FLAGS = {

531

flag_sequence('BV'): flag_sequence('NO'),

532

flag_sequence('CP'): flag_sequence('FR'),

533

flag_sequence('HM'): flag_sequence('AU'),

534

flag_sequence('SJ'): flag_sequence('NO'),

535

flag_sequence('UM'): flag_sequence('US'),

536

}

537

538

COMBINING_KEYCAP = 0x20E3

539

540

LEGACY_ANDROID_EMOJI = {

541

0xFE4E5: flag_sequence('JP'),

542

0xFE4E6: flag_sequence('US'),

543

0xFE4E7: flag_sequence('FR'),

544

0xFE4E8: flag_sequence('DE'),

545

0xFE4E9: flag_sequence('IT'),

546

0xFE4EA: flag_sequence('GB'),

547

0xFE4EB: flag_sequence('ES'),

548

0xFE4EC: flag_sequence('RU'),

549

0xFE4ED: flag_sequence('CN'),

550

0xFE4EE: flag_sequence('KR'),

551

0xFE82C: (ord('#'), COMBINING_KEYCAP),

552

0xFE82E: (ord('1'), COMBINING_KEYCAP),

553

0xFE82F: (ord('2'), COMBINING_KEYCAP),

554

0xFE830: (ord('3'), COMBINING_KEYCAP),

555

0xFE831: (ord('4'), COMBINING_KEYCAP),

556

0xFE832: (ord('5'), COMBINING_KEYCAP),

557

0xFE833: (ord('6'), COMBINING_KEYCAP),

558

0xFE834: (ord('7'), COMBINING_KEYCAP),

559

0xFE835: (ord('8'), COMBINING_KEYCAP),

560

0xFE836: (ord('9'), COMBINING_KEYCAP),

561

0xFE837: (ord('0'), COMBINING_KEYCAP),

562

}

563

Siyamed Sinir

77a1b14

2018-07-12 12:02:18 -0700

[diff] [blame]

564

# This is used to define the emoji that should have the same glyph.

565

# i.e. previously we had gender based Kiss (0x1F48F), which had the same glyph

566

# with Kiss: Woman, Man (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468)

567

# in that case a valid row would be:

568

# (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F,

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

569

ZWJ_IDENTICALS = {

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

570

}

571

Seigo Nonaka

c180863

2018-05-14 13:39:40 -0700

[diff] [blame]

572

SAME_FLAG_MAPPINGS = [

573

# Diego Garcia and British Indian Ocean Territory

574

((0x1F1EE, 0x1F1F4), (0x1F1E9, 0x1F1EC)),

575

# St. Martin and France

576

((0x1F1F2, 0x1F1EB), (0x1F1EB, 0x1F1F7)),

577

# Spain and Ceuta & Melilla

578

((0x1F1EA, 0x1F1F8), (0x1F1EA, 0x1F1E6)),

579

]

580

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

581

ZWJ = 0x200D

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

582

583

def is_fitzpatrick_modifier(cp):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

584

return 0x1F3FB <= cp <= 0x1F3FF

585

586

587

def reverse_emoji(seq):

588

rev = list(reversed(seq))

589

# if there are fitzpatrick modifiers in the sequence, keep them after

590

# the emoji they modify

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

591

for i in range(1, len(rev)):

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

592

if is_fitzpatrick_modifier(rev[i-1]):

593

rev[i], rev[i-1] = rev[i-1], rev[i]

594

return tuple(rev)

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

595

596

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

597

def compute_expected_emoji():

598

equivalent_emoji = {}

599

sequence_pieces = set()

600

all_sequences = set()

601

all_sequences.update(_emoji_variation_sequences)

602

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

603

# add zwj sequences not in the current emoji-zwj-sequences.txt

604

adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences)

605

adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences)

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

606

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

607

# Add empty flag tag sequence that is supported as fallback

608

_emoji_sequences[(0x1F3F4, 0xE007F)] = 'Emoji_Tag_Sequence'

609

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

610

for sequence in _emoji_sequences.keys():

611

sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)

612

all_sequences.add(sequence)

613

sequence_pieces.update(sequence)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

614

if _emoji_sequences.get(sequence, None) == 'Emoji_Tag_Sequence':

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

615

# Add reverse of all emoji ZWJ sequences, which are added to the

616

# fonts as a workaround to get the sequences work in RTL text.

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

617

# TODO: test if these are actually needed by Minikin/HarfBuzz.

618

reversed_seq = reverse_emoji(sequence)

619

all_sequences.add(reversed_seq)

620

equivalent_emoji[reversed_seq] = sequence

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

621

Raph Levien

2b8b819

2016-08-09 14:28:54 -0700

[diff] [blame]

622

for sequence in adjusted_emoji_zwj_sequences.keys():

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

623

sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)

624

all_sequences.add(sequence)

625

sequence_pieces.update(sequence)

626

# Add reverse of all emoji ZWJ sequences, which are added to the fonts

627

# as a workaround to get the sequences work in RTL text.

Roozbeh Pournader

2016-07-25 14:04:34 -0700

[diff] [blame]

628

reversed_seq = reverse_emoji(sequence)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

629

all_sequences.add(reversed_seq)

630

equivalent_emoji[reversed_seq] = sequence

631

Seigo Nonaka

c180863

2018-05-14 13:39:40 -0700

[diff] [blame]

632

for first, second in SAME_FLAG_MAPPINGS:

633

equivalent_emoji[first] = second

634

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

635

# Add all tag characters used in flags

636

sequence_pieces.update(range(0xE0030, 0xE0039 + 1))

637

sequence_pieces.update(range(0xE0061, 0xE007A + 1))

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

638

639

all_emoji = (

640

_emoji_properties['Emoji'] |

641

all_sequences |

642

sequence_pieces |

643

set(LEGACY_ANDROID_EMOJI.keys()))

644

default_emoji = (

645

_emoji_properties['Emoji_Presentation'] |

646

all_sequences |

647

set(LEGACY_ANDROID_EMOJI.keys()))

648

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

649

equivalent_emoji.update(EQUIVALENT_FLAGS)

650

equivalent_emoji.update(LEGACY_ANDROID_EMOJI)

651

equivalent_emoji.update(ZWJ_IDENTICALS)

Roozbeh Pournader

2017-04-10 13:52:20 -0700

[diff] [blame]

652

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

653

for seq in _emoji_variation_sequences:

654

equivalent_emoji[seq] = seq[0]

655

656

return all_emoji, default_emoji, equivalent_emoji

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

657

658

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

659

def check_compact_only_fallback():

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

660

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

661

for record in fallback_chain:

662

if record.variant == 'compact':

663

same_script_elegants = [x for x in fallback_chain

664

if x.scripts == record.scripts and x.variant == 'elegant']

665

assert same_script_elegants, (

666

'%s must be in elegant of %s as fallback of "%s" too' % (

667

record.font, record.scripts, record.fallback_for),)

668

669

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

670

def check_vertical_metrics():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

671

for record in _all_fonts:

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

672

if record.name in ['sans-serif', 'sans-serif-condensed']:

673

font = open_font(record.font)

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

674

assert font['head'].yMax == 2163 and font['head'].yMin == -555, (

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

675

'yMax and yMin of %s do not match expected values.' % (

676

record.font,))

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

677

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

678

if record.name in ['sans-serif', 'sans-serif-condensed',

679

'serif', 'monospace']:

Roozbeh Pournader

ede3a17

2016-07-27 16:35:12 -0700

[diff] [blame]

680

font = open_font(record.font)

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

681

assert (font['hhea'].ascent == 1900 and

682

font['hhea'].descent == -500), (

683

'ascent and descent of %s do not match expected '

684

'values.' % (record.font,))

685

686

687

def check_cjk_punctuation():

688

cjk_scripts = {'Hans', 'Hant', 'Jpan', 'Kore'}

689

cjk_punctuation = range(0x3000, 0x301F + 1)

Haibo Huang

2020-03-05 11:58:47 -0800

[diff] [blame]

690

for name, fallback_chain in _fallback_chains.items():

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

691

for record in fallback_chain:

692

if record.scripts.intersection(cjk_scripts):

693

# CJK font seen. Stop checking the rest of the fonts.

694

break

695

assert_font_supports_none_of_chars(record.font, cjk_punctuation, name)

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

696

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

697

def getPostScriptName(font):

698

font_file, index = font

699

font_path = path.join(_fonts_dir, font_file)

700

if index is not None:

701

# Use the first font file in the collection for resolving post script name.

702

ttf = ttLib.TTFont(font_path, fontNumber=0)

703

else:

704

ttf = ttLib.TTFont(font_path)

705

706

nameTable = ttf['name']

707

for name in nameTable.names:

708

if (name.nameID == 6 and name.platformID == 3 and name.platEncID == 1

709

and name.langID == 0x0409):

710

return str(name)

711

712

def check_canonical_name():

713

for record in _all_fonts:

714

file_name, index = record.font

715

716

psName = getPostScriptName(record.font)

717

if record.psName:

718

# If fonts element has postScriptName attribute, it should match with the PostScript

719

# name in the name table.

720

assert psName == record.psName, ('postScriptName attribute %s should match with %s' % (

721

record.psName, psName))

722

else:

723

# If fonts element doesn't have postScriptName attribute, the file name should match

724

# with the PostScript name in the name table.

725

assert psName == file_name[:-4], ('file name %s should match with %s' % (

726

file_name, psName))

727

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

728

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

729

def main():

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

730

global _fonts_dir

Doug Felt

2016-07-08 17:42:15 -0700

[diff] [blame]

731

target_out = sys.argv[1]

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

732

_fonts_dir = path.join(target_out, 'fonts')

733

734

fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')

735

parse_fonts_xml(fonts_xml_path)

736

Seigo Nonaka

2017-07-05 16:06:23 -0700

[diff] [blame]

737

check_compact_only_fallback()

738

Roozbeh Pournader

2016-07-27 13:08:37 -0700

[diff] [blame]

739

check_vertical_metrics()

740

Roozbeh Pournader

2016-03-09 23:08:45 -0800

[diff] [blame]

741

hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')

742

check_hyphens(hyphens_dir)

743

Roozbeh Pournader

2017-05-18 18:38:36 -0700

[diff] [blame]

744

check_cjk_punctuation()

745

Seigo Nonaka

2021-04-16 00:11:43 -0700

[diff] [blame]

746

check_canonical_name()

747

Roozbeh Pournader

27ec3ac

2016-03-31 13:05:32 -0700

[diff] [blame]

748

check_emoji = sys.argv[2]

749

if check_emoji == 'true':

750

ucd_path = sys.argv[3]

751

parse_ucd(ucd_path)

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

752

all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()

Seigo Nonaka

2021-05-04 17:12:35 -0700

[diff] [blame^]

753

check_emoji_compat()

Roozbeh Pournader

2016-03-31 13:54:56 -0700

[diff] [blame]

754

check_emoji_coverage(all_emoji, equivalent_emoji)

755

check_emoji_defaults(default_emoji)

Roozbeh Pournader

2016-03-16 13:53:47 -0700

[diff] [blame]

756

Roozbeh Pournader