Blame - arch/arm64/lib/strncmp.S - SHIFTPHONES/mainline/linux

blob: f571581888fa4ae654f669b9b8232cf78a78ad47 [file] [log] [blame]

Thomas Gleixner	caab277	2019-06-03 07:44:50 +0200	[diff] [blame^]	1	/* SPDX-License-Identifier: GPL-2.0-only */
zhichang.yuan	192c4d9	2014-04-28 13:11:33 +0800	[diff] [blame]	2	/*
				3	* Copyright (C) 2013 ARM Ltd.
				4	* Copyright (C) 2013 Linaro.
				5	*
				6	* This code is based on glibc cortex strings work originally authored by Linaro
zhichang.yuan	192c4d9	2014-04-28 13:11:33 +0800	[diff] [blame]	7	* be found @
				8	*
				9	* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
				10	* files/head:/src/aarch64/
zhichang.yuan	192c4d9	2014-04-28 13:11:33 +0800	[diff] [blame]	11	*/
				12
				13	#include <linux/linkage.h>
				14	#include <asm/assembler.h>
				15
				16	/*
				17	* compare two strings
				18	*
				19	* Parameters:
				20	* x0 - const string 1 pointer
				21	* x1 - const string 2 pointer
				22	* x2 - the maximal length to be compared
				23	* Returns:
				24	* x0 - an integer less than, equal to, or greater than zero if s1 is found,
				25	* respectively, to be less than, to match, or be greater than s2.
				26	*/
				27
				28	#define REP8_01 0x0101010101010101
				29	#define REP8_7f 0x7f7f7f7f7f7f7f7f
				30	#define REP8_80 0x8080808080808080
				31
				32	/* Parameters and result. */
				33	src1 .req x0
				34	src2 .req x1
				35	limit .req x2
				36	result .req x0
				37
				38	/* Internal variables. */
				39	data1 .req x3
				40	data1w .req w3
				41	data2 .req x4
				42	data2w .req w4
				43	has_nul .req x5
				44	diff .req x6
				45	syndrome .req x7
				46	tmp1 .req x8
				47	tmp2 .req x9
				48	tmp3 .req x10
				49	zeroones .req x11
				50	pos .req x12
				51	limit_wd .req x13
				52	mask .req x14
				53	endloop .req x15
				54
Andrey Ryabinin	19a2ca0	2018-10-26 15:02:30 -0700	[diff] [blame]	55	WEAK(strncmp)
zhichang.yuan	192c4d9	2014-04-28 13:11:33 +0800	[diff] [blame]	56	cbz limit, .Lret0
				57	eor tmp1, src1, src2
				58	mov zeroones, #REP8_01
				59	tst tmp1, #7
				60	b.ne .Lmisaligned8
				61	ands tmp1, src1, #7
				62	b.ne .Lmutual_align
				63	/* Calculate the number of full and partial words -1. */
				64	/*
				65	* when limit is mulitply of 8, if not sub 1,
				66	* the judgement of last dword will wrong.
				67	*/
				68	sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
				69	lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
				70
				71	/*
				72	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
				73	* (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
				74	* can be done in parallel across the entire word.
				75	*/
				76	.Lloop_aligned:
				77	ldr data1, [src1], #8
				78	ldr data2, [src2], #8
				79	.Lstart_realigned:
				80	subs limit_wd, limit_wd, #1
				81	sub tmp1, data1, zeroones
				82	orr tmp2, data1, #REP8_7f
				83	eor diff, data1, data2 /* Non-zero if differences found. */
				84	csinv endloop, diff, xzr, pl /* Last Dword or differences.*/
				85	bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
				86	ccmp endloop, #0, #0, eq
				87	b.eq .Lloop_aligned
				88
				89	/Not reached the limit, must have found the end or a diff. /
				90	tbz limit_wd, #63, .Lnot_limit
				91
				92	/* Limit % 8 == 0 => all bytes significant. */
				93	ands limit, limit, #7
				94	b.eq .Lnot_limit
				95
				96	lsl limit, limit, #3 /* Bits -> bytes. */
				97	mov mask, #~0
				98	CPU_BE( lsr mask, mask, limit )
				99	CPU_LE( lsl mask, mask, limit )
				100	bic data1, data1, mask
				101	bic data2, data2, mask
				102
				103	/* Make sure that the NUL byte is marked in the syndrome. */
				104	orr has_nul, has_nul, mask
				105
				106	.Lnot_limit:
				107	orr syndrome, diff, has_nul
				108	b .Lcal_cmpresult
				109
				110	.Lmutual_align:
				111	/*
				112	* Sources are mutually aligned, but are not currently at an
				113	* alignment boundary. Round down the addresses and then mask off
				114	* the bytes that precede the start point.
				115	* We also need to adjust the limit calculations, but without
				116	* overflowing if the limit is near ULONG_MAX.
				117	*/
				118	bic src1, src1, #7
				119	bic src2, src2, #7
				120	ldr data1, [src1], #8
				121	neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
				122	ldr data2, [src2], #8
				123	mov tmp2, #~0
				124	sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
				125	/* Big-endian. Early bytes are at MSB. */
				126	CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
				127	/* Little-endian. Early bytes are at LSB. */
				128	CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
				129
				130	and tmp3, limit_wd, #7
				131	lsr limit_wd, limit_wd, #3
				132	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
				133	add limit, limit, tmp1
				134	add tmp3, tmp3, tmp1
				135	orr data1, data1, tmp2
				136	orr data2, data2, tmp2
				137	add limit_wd, limit_wd, tmp3, lsr #3
				138	b .Lstart_realigned
				139
				140	/when src1 offset is not equal to src2 offset.../
				141	.Lmisaligned8:
				142	cmp limit, #8
				143	b.lo .Ltiny8proc /limit < 8... /
				144	/*
				145	* Get the align offset length to compare per byte first.
				146	* After this process, one string's address will be aligned.*/
				147	and tmp1, src1, #7
				148	neg tmp1, tmp1
				149	add tmp1, tmp1, #8
				150	and tmp2, src2, #7
				151	neg tmp2, tmp2
				152	add tmp2, tmp2, #8
				153	subs tmp3, tmp1, tmp2
				154	csel pos, tmp1, tmp2, hi /Choose the maximum. /
				155	/*
				156	* Here, limit is not less than 8, so directly run .Ltinycmp
				157	* without checking the limit.*/
				158	sub limit, limit, pos
				159	.Ltinycmp:
				160	ldrb data1w, [src1], #1
				161	ldrb data2w, [src2], #1
				162	subs pos, pos, #1
				163	ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
				164	ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
				165	b.eq .Ltinycmp
				166	cbnz pos, 1f /find the null or unequal.../
				167	cmp data1w, #1
				168	ccmp data1w, data2w, #0, cs
				169	b.eq .Lstart_align /the last bytes are equal..../
				170	1:
				171	sub result, data1, data2
				172	ret
				173
				174	.Lstart_align:
				175	lsr limit_wd, limit, #3
				176	cbz limit_wd, .Lremain8
				177	/process more leading bytes to make str1 aligned.../
				178	ands xzr, src1, #7
				179	b.eq .Lrecal_offset
				180	add src1, src1, tmp3 /tmp3 is positive in this branch./
				181	add src2, src2, tmp3
				182	ldr data1, [src1], #8
				183	ldr data2, [src2], #8
				184
				185	sub limit, limit, tmp3
				186	lsr limit_wd, limit, #3
				187	subs limit_wd, limit_wd, #1
				188
				189	sub tmp1, data1, zeroones
				190	orr tmp2, data1, #REP8_7f
				191	eor diff, data1, data2 /* Non-zero if differences found. */
				192	csinv endloop, diff, xzr, ne/if limit_wd is 0,will finish the cmp/
				193	bics has_nul, tmp1, tmp2
				194	ccmp endloop, #0, #0, eq /has_null is ZERO: no null byte/
				195	b.ne .Lunequal_proc
				196	/How far is the current str2 from the alignment boundary.../
				197	and tmp3, tmp3, #7
				198	.Lrecal_offset:
				199	neg pos, tmp3
				200	.Lloopcmp_proc:
				201	/*
				202	* Divide the eight bytes into two parts. First,backwards the src2
				203	* to an alignment boundary,load eight bytes from the SRC2 alignment
				204	* boundary,then compare with the relative bytes from SRC1.
				205	* If all 8 bytes are equal,then start the second part's comparison.
				206	* Otherwise finish the comparison.
				207	* This special handle can garantee all the accesses are in the
				208	* thread/task space in avoid to overrange access.
				209	*/
				210	ldr data1, [src1,pos]
				211	ldr data2, [src2,pos]
				212	sub tmp1, data1, zeroones
				213	orr tmp2, data1, #REP8_7f
				214	bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
				215	eor diff, data1, data2 /* Non-zero if differences found. */
				216	csinv endloop, diff, xzr, eq
				217	cbnz endloop, .Lunequal_proc
				218
				219	/The second part process/
				220	ldr data1, [src1], #8
				221	ldr data2, [src2], #8
				222	subs limit_wd, limit_wd, #1
				223	sub tmp1, data1, zeroones
				224	orr tmp2, data1, #REP8_7f
				225	eor diff, data1, data2 /* Non-zero if differences found. */
				226	csinv endloop, diff, xzr, ne/if limit_wd is 0,will finish the cmp/
				227	bics has_nul, tmp1, tmp2
				228	ccmp endloop, #0, #0, eq /has_null is ZERO: no null byte/
				229	b.eq .Lloopcmp_proc
				230
				231	.Lunequal_proc:
				232	orr syndrome, diff, has_nul
				233	cbz syndrome, .Lremain8
				234	.Lcal_cmpresult:
				235	/*
				236	* reversed the byte-order as big-endian,then CLZ can find the most
				237	* significant zero bits.
				238	*/
				239	CPU_LE( rev syndrome, syndrome )
				240	CPU_LE( rev data1, data1 )
				241	CPU_LE( rev data2, data2 )
				242	/*
				243	* For big-endian we cannot use the trick with the syndrome value
				244	* as carry-propagation can corrupt the upper bits if the trailing
				245	* bytes in the string contain 0x01.
				246	* However, if there is no NUL byte in the dword, we can generate
				247	* the result directly. We can't just subtract the bytes as the
				248	* MSB might be significant.
				249	*/
				250	CPU_BE( cbnz has_nul, 1f )
				251	CPU_BE( cmp data1, data2 )
				252	CPU_BE( cset result, ne )
				253	CPU_BE( cneg result, result, lo )
				254	CPU_BE( ret )
				255	CPU_BE( 1: )
				256	/* Re-compute the NUL-byte detection, using a byte-reversed value.*/
				257	CPU_BE( rev tmp3, data1 )
				258	CPU_BE( sub tmp1, tmp3, zeroones )
				259	CPU_BE( orr tmp2, tmp3, #REP8_7f )
				260	CPU_BE( bic has_nul, tmp1, tmp2 )
				261	CPU_BE( rev has_nul, has_nul )
				262	CPU_BE( orr syndrome, diff, has_nul )
				263	/*
				264	* The MS-non-zero bit of the syndrome marks either the first bit
				265	* that is different, or the top bit of the first zero byte.
				266	* Shifting left now will bring the critical information into the
				267	* top bits.
				268	*/
				269	clz pos, syndrome
				270	lsl data1, data1, pos
				271	lsl data2, data2, pos
				272	/*
				273	* But we need to zero-extend (char is unsigned) the value and then
				274	* perform a signed 32-bit subtraction.
				275	*/
				276	lsr data1, data1, #56
				277	sub result, data1, data2, lsr #56
				278	ret
				279
				280	.Lremain8:
				281	/* Limit % 8 == 0 => all bytes significant. */
				282	ands limit, limit, #7
				283	b.eq .Lret0
				284	.Ltiny8proc:
				285	ldrb data1w, [src1], #1
				286	ldrb data2w, [src2], #1
				287	subs limit, limit, #1
				288
				289	ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
				290	ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
				291	b.eq .Ltiny8proc
				292	sub result, data1, data2
				293	ret
				294
				295	.Lret0:
				296	mov result, #0
				297	ret
Ard Biesheuvel	2079184	2015-10-08 20:02:03 +0100	[diff] [blame]	298	ENDPIPROC(strncmp)
Mark Rutland	ac0e8c7	2018-12-07 18:08:21 +0000	[diff] [blame]	299	EXPORT_SYMBOL_NOKASAN(strncmp)