blob: 36dd638905c34566ca421c38a2fc40f918deaefd [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001/* SPDX-License-Identifier: GPL-2.0 */
Adrian Bunkb00dc832008-05-19 16:52:27 -07002/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 * memscan.S: Optimized memscan for Sparc64.
4 *
5 * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
6 * Copyright (C) 1998 David S. Miller (davem@redhat.com)
7 */
8
Al Virod3867f042016-01-16 21:39:30 -05009 #include <asm/export.h>
10
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#define HI_MAGIC 0x8080808080808080
12#define LO_MAGIC 0x0101010101010101
13#define ASI_PL 0x88
14
15 .text
16 .align 32
17 .globl __memscan_zero, __memscan_generic
Nagarathnam Muthusamyf5a651f2017-06-19 13:08:50 -040018 .type __memscan_zero,#function
19 .type __memscan_generic,#function
Linus Torvalds1da177e2005-04-16 15:20:36 -070020 .globl memscan
Al Virod3867f042016-01-16 21:39:30 -050021 EXPORT_SYMBOL(__memscan_zero)
22 EXPORT_SYMBOL(__memscan_generic)
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
24__memscan_zero:
25 /* %o0 = bufp, %o1 = size */
26 brlez,pn %o1, szzero
27 andcc %o0, 7, %g0
28 be,pt %icc, we_are_aligned
29 sethi %hi(HI_MAGIC), %o4
30 ldub [%o0], %o5
311: subcc %o1, 1, %o1
32 brz,pn %o5, 10f
33 add %o0, 1, %o0
34
35 be,pn %xcc, szzero
36 andcc %o0, 7, %g0
37 bne,a,pn %icc, 1b
38 ldub [%o0], %o5
39we_are_aligned:
40 ldxa [%o0] ASI_PL, %o5
41 or %o4, %lo(HI_MAGIC), %o3
42 sllx %o3, 32, %o4
43 or %o4, %o3, %o3
44
45 srlx %o3, 7, %o2
46msloop:
47 sub %o1, 8, %o1
48 add %o0, 8, %o0
49 sub %o5, %o2, %o4
50 xor %o4, %o5, %o4
51 andcc %o4, %o3, %g3
52 bne,pn %xcc, check_bytes
53 srlx %o4, 32, %g3
54
55 brgz,a,pt %o1, msloop
56 ldxa [%o0] ASI_PL, %o5
57check_bytes:
58 bne,a,pn %icc, 2f
59 andcc %o5, 0xff, %g0
60 add %o0, -5, %g2
61 ba,pt %xcc, 3f
62 srlx %o5, 32, %g7
63
642: srlx %o5, 8, %g7
65 be,pn %icc, 1f
66 add %o0, -8, %g2
67 andcc %g7, 0xff, %g0
68 srlx %g7, 8, %g7
69 be,pn %icc, 1f
70 inc %g2
71 andcc %g7, 0xff, %g0
72
73 srlx %g7, 8, %g7
74 be,pn %icc, 1f
75 inc %g2
76 andcc %g7, 0xff, %g0
77 srlx %g7, 8, %g7
78 be,pn %icc, 1f
79 inc %g2
80 andcc %g3, %o3, %g0
81
82 be,a,pn %icc, 2f
83 mov %o0, %g2
843: andcc %g7, 0xff, %g0
85 srlx %g7, 8, %g7
86 be,pn %icc, 1f
87 inc %g2
88 andcc %g7, 0xff, %g0
89 srlx %g7, 8, %g7
90
91 be,pn %icc, 1f
92 inc %g2
93 andcc %g7, 0xff, %g0
94 srlx %g7, 8, %g7
95 be,pn %icc, 1f
96 inc %g2
97 andcc %g7, 0xff, %g0
98 srlx %g7, 8, %g7
99
100 be,pn %icc, 1f
101 inc %g2
1022: brgz,a,pt %o1, msloop
103 ldxa [%o0] ASI_PL, %o5
104 inc %g2
1051: add %o0, %o1, %o0
106 cmp %g2, %o0
107 retl
108
109 movle %xcc, %g2, %o0
11010: retl
111 sub %o0, 1, %o0
112szzero: retl
113 nop
114
115memscan:
116__memscan_generic:
117 /* %o0 = addr, %o1 = c, %o2 = size */
118 brz,pn %o2, 3f
119 add %o0, %o2, %o3
120 ldub [%o0], %o5
121 sub %g0, %o2, %o4
1221:
123 cmp %o5, %o1
124 be,pn %icc, 2f
125 addcc %o4, 1, %o4
126 bne,a,pt %xcc, 1b
127 ldub [%o3 + %o4], %o5
128 retl
129 /* The delay slot is the same as the next insn, this is just to make it look more awful */
1302:
131 add %o3, %o4, %o0
132 retl
133 sub %o0, 1, %o0
1343:
135 retl
136 nop