blob: 5efee1f4be366f3b9f24c6b434523653664ae26f [file] [log] [blame]
Adrian Bunkb00dc832008-05-19 16:52:27 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * memscan.S: Optimized memscan for Sparc64.
3 *
4 * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
5 * Copyright (C) 1998 David S. Miller (davem@redhat.com)
6 */
7
Al Virod3867f042016-01-16 21:39:30 -05008 #include <asm/export.h>
9
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#define HI_MAGIC 0x8080808080808080
11#define LO_MAGIC 0x0101010101010101
12#define ASI_PL 0x88
13
14 .text
15 .align 32
16 .globl __memscan_zero, __memscan_generic
Nagarathnam Muthusamyf5a651f2017-06-19 13:08:50 -040017 .type __memscan_zero,#function
18 .type __memscan_generic,#function
Linus Torvalds1da177e2005-04-16 15:20:36 -070019 .globl memscan
Al Virod3867f042016-01-16 21:39:30 -050020 EXPORT_SYMBOL(__memscan_zero)
21 EXPORT_SYMBOL(__memscan_generic)
Linus Torvalds1da177e2005-04-16 15:20:36 -070022
23__memscan_zero:
24 /* %o0 = bufp, %o1 = size */
25 brlez,pn %o1, szzero
26 andcc %o0, 7, %g0
27 be,pt %icc, we_are_aligned
28 sethi %hi(HI_MAGIC), %o4
29 ldub [%o0], %o5
301: subcc %o1, 1, %o1
31 brz,pn %o5, 10f
32 add %o0, 1, %o0
33
34 be,pn %xcc, szzero
35 andcc %o0, 7, %g0
36 bne,a,pn %icc, 1b
37 ldub [%o0], %o5
38we_are_aligned:
39 ldxa [%o0] ASI_PL, %o5
40 or %o4, %lo(HI_MAGIC), %o3
41 sllx %o3, 32, %o4
42 or %o4, %o3, %o3
43
44 srlx %o3, 7, %o2
45msloop:
46 sub %o1, 8, %o1
47 add %o0, 8, %o0
48 sub %o5, %o2, %o4
49 xor %o4, %o5, %o4
50 andcc %o4, %o3, %g3
51 bne,pn %xcc, check_bytes
52 srlx %o4, 32, %g3
53
54 brgz,a,pt %o1, msloop
55 ldxa [%o0] ASI_PL, %o5
56check_bytes:
57 bne,a,pn %icc, 2f
58 andcc %o5, 0xff, %g0
59 add %o0, -5, %g2
60 ba,pt %xcc, 3f
61 srlx %o5, 32, %g7
62
632: srlx %o5, 8, %g7
64 be,pn %icc, 1f
65 add %o0, -8, %g2
66 andcc %g7, 0xff, %g0
67 srlx %g7, 8, %g7
68 be,pn %icc, 1f
69 inc %g2
70 andcc %g7, 0xff, %g0
71
72 srlx %g7, 8, %g7
73 be,pn %icc, 1f
74 inc %g2
75 andcc %g7, 0xff, %g0
76 srlx %g7, 8, %g7
77 be,pn %icc, 1f
78 inc %g2
79 andcc %g3, %o3, %g0
80
81 be,a,pn %icc, 2f
82 mov %o0, %g2
833: andcc %g7, 0xff, %g0
84 srlx %g7, 8, %g7
85 be,pn %icc, 1f
86 inc %g2
87 andcc %g7, 0xff, %g0
88 srlx %g7, 8, %g7
89
90 be,pn %icc, 1f
91 inc %g2
92 andcc %g7, 0xff, %g0
93 srlx %g7, 8, %g7
94 be,pn %icc, 1f
95 inc %g2
96 andcc %g7, 0xff, %g0
97 srlx %g7, 8, %g7
98
99 be,pn %icc, 1f
100 inc %g2
1012: brgz,a,pt %o1, msloop
102 ldxa [%o0] ASI_PL, %o5
103 inc %g2
1041: add %o0, %o1, %o0
105 cmp %g2, %o0
106 retl
107
108 movle %xcc, %g2, %o0
10910: retl
110 sub %o0, 1, %o0
111szzero: retl
112 nop
113
114memscan:
115__memscan_generic:
116 /* %o0 = addr, %o1 = c, %o2 = size */
117 brz,pn %o2, 3f
118 add %o0, %o2, %o3
119 ldub [%o0], %o5
120 sub %g0, %o2, %o4
1211:
122 cmp %o5, %o1
123 be,pn %icc, 2f
124 addcc %o4, 1, %o4
125 bne,a,pt %xcc, 1b
126 ldub [%o3 + %o4], %o5
127 retl
128 /* The delay slot is the same as the next insn, this is just to make it look more awful */
1292:
130 add %o3, %o4, %o0
131 retl
132 sub %o0, 1, %o0
1333:
134 retl
135 nop