Blame - fs/unicode/utf8n.h - SHIFTPHONES/kernel/shift/mainline

blob: 0acd530c2c791e8247418bfac3d001a622fa170e [file] [log] [blame]

Thomas Gleixner	9f80685	2019-05-29 07:18:08 -0700	[diff] [blame]	1	/* SPDX-License-Identifier: GPL-2.0-only */
Olaf Weber	44594c2	2019-04-25 13:45:46 -0400	[diff] [blame]	2	/*
				3	* Copyright (c) 2014 SGI.
				4	* All rights reserved.
Olaf Weber	44594c2	2019-04-25 13:45:46 -0400	[diff] [blame]	5	*/
				6
				7	#ifndef UTF8NORM_H
				8	#define UTF8NORM_H
				9
				10	#include <linux/types.h>
				11	#include <linux/export.h>
				12	#include <linux/string.h>
				13	#include <linux/module.h>
				14
				15	/* Encoding a unicode version number as a single unsigned int. */
				16	#define UNICODE_MAJ_SHIFT (16)
				17	#define UNICODE_MIN_SHIFT (8)
				18
				19	#define UNICODE_AGE(MAJ, MIN, REV) \
				20	(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) \| \
				21	((unsigned int)(MIN) << UNICODE_MIN_SHIFT) \| \
				22	((unsigned int)(REV)))
				23
				24	/* Highest unicode version supported by the data tables. */
				25	extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
Gabriel Krisman Bertazi	9d53690	2019-04-25 13:51:22 -0400	[diff] [blame]	26	extern int utf8version_latest(void);
Olaf Weber	44594c2	2019-04-25 13:45:46 -0400	[diff] [blame]	27
				28	/*
				29	* Look for the correct const struct utf8data for a unicode version.
				30	* Returns NULL if the version requested is too new.
				31	*
				32	* Two normalization forms are supported: nfdi and nfdicf.
				33	*
				34	* nfdi:
				35	* - Apply unicode normalization form NFD.
				36	* - Remove any Default_Ignorable_Code_Point.
				37	*
				38	* nfdicf:
				39	* - Apply unicode normalization form NFD.
				40	* - Remove any Default_Ignorable_Code_Point.
				41	* - Apply a full casefold (C + F).
				42	*/
				43	extern const struct utf8data *utf8nfdi(unsigned int maxage);
				44	extern const struct utf8data *utf8nfdicf(unsigned int maxage);
				45
				46	/*
				47	* Determine the maximum age of any unicode character in the string.
				48	* Returns 0 if only unassigned code points are present.
				49	* Returns -1 if the input is not valid UTF-8.
				50	*/
				51	extern int utf8agemax(const struct utf8data data, const char s);
				52	extern int utf8nagemax(const struct utf8data data, const char s, size_t len);
				53
				54	/*
				55	* Determine the minimum age of any unicode character in the string.
				56	* Returns 0 if any unassigned code points are present.
				57	* Returns -1 if the input is not valid UTF-8.
				58	*/
				59	extern int utf8agemin(const struct utf8data data, const char s);
				60	extern int utf8nagemin(const struct utf8data data, const char s, size_t len);
				61
				62	/*
				63	* Determine the length of the normalized from of the string,
				64	* excluding any terminating NULL byte.
				65	* Returns 0 if only ignorable code points are present.
				66	* Returns -1 if the input is not valid UTF-8.
				67	*/
				68	extern ssize_t utf8len(const struct utf8data data, const char s);
				69	extern ssize_t utf8nlen(const struct utf8data data, const char s, size_t len);
				70
Olaf Weber	a8384c6	2019-04-25 13:49:18 -0400	[diff] [blame]	71	/* Needed in struct utf8cursor below. */
				72	#define UTF8HANGULLEAF (12)
				73
Olaf Weber	44594c2	2019-04-25 13:45:46 -0400	[diff] [blame]	74	/*
				75	* Cursor structure used by the normalizer.
				76	*/
				77	struct utf8cursor {
				78	const struct utf8data *data;
				79	const char *s;
				80	const char *p;
				81	const char *ss;
				82	const char *sp;
				83	unsigned int len;
				84	unsigned int slen;
				85	short int ccc;
				86	short int nccc;
Olaf Weber	a8384c6	2019-04-25 13:49:18 -0400	[diff] [blame]	87	unsigned char hangul[UTF8HANGULLEAF];
Olaf Weber	44594c2	2019-04-25 13:45:46 -0400	[diff] [blame]	88	};
				89
				90	/*
				91	* Initialize a utf8cursor to normalize a string.
				92	* Returns 0 on success.
				93	* Returns -1 on failure.
				94	*/
				95	extern int utf8cursor(struct utf8cursor u8c, const struct utf8data data,
				96	const char *s);
				97	extern int utf8ncursor(struct utf8cursor u8c, const struct utf8data data,
				98	const char *s, size_t len);
				99
				100	/*
				101	* Get the next byte in the normalization.
				102	* Returns a value > 0 && < 256 on success.
				103	* Returns 0 when the end of the normalization is reached.
				104	* Returns -1 if the string being normalized is not valid UTF-8.
				105	*/
				106	extern int utf8byte(struct utf8cursor *u8c);
				107
				108	#endif /* UTF8NORM_H */