Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * include/asm-alpha/xor.h |
| 3 | * |
| 4 | * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License as published by |
| 8 | * the Free Software Foundation; either version 2, or (at your option) |
| 9 | * any later version. |
| 10 | * |
| 11 | * You should have received a copy of the GNU General Public License |
| 12 | * (for example /usr/src/linux/COPYING); if not, write to the Free |
| 13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 14 | */ |
| 15 | |
| 16 | extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); |
| 17 | extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, |
| 18 | unsigned long *); |
| 19 | extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, |
| 20 | unsigned long *, unsigned long *); |
| 21 | extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, |
| 22 | unsigned long *, unsigned long *, unsigned long *); |
| 23 | |
| 24 | extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, |
| 25 | unsigned long *); |
| 26 | extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, |
| 27 | unsigned long *, unsigned long *); |
| 28 | extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, |
| 29 | unsigned long *, unsigned long *, |
| 30 | unsigned long *); |
| 31 | extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, |
| 32 | unsigned long *, unsigned long *, |
| 33 | unsigned long *, unsigned long *); |
| 34 | |
| 35 | asm(" \n\ |
| 36 | .text \n\ |
| 37 | .align 3 \n\ |
| 38 | .ent xor_alpha_2 \n\ |
| 39 | xor_alpha_2: \n\ |
| 40 | .prologue 0 \n\ |
| 41 | srl $16, 6, $16 \n\ |
| 42 | .align 4 \n\ |
| 43 | 2: \n\ |
| 44 | ldq $0,0($17) \n\ |
| 45 | ldq $1,0($18) \n\ |
| 46 | ldq $2,8($17) \n\ |
| 47 | ldq $3,8($18) \n\ |
| 48 | \n\ |
| 49 | ldq $4,16($17) \n\ |
| 50 | ldq $5,16($18) \n\ |
| 51 | ldq $6,24($17) \n\ |
| 52 | ldq $7,24($18) \n\ |
| 53 | \n\ |
| 54 | ldq $19,32($17) \n\ |
| 55 | ldq $20,32($18) \n\ |
| 56 | ldq $21,40($17) \n\ |
| 57 | ldq $22,40($18) \n\ |
| 58 | \n\ |
| 59 | ldq $23,48($17) \n\ |
| 60 | ldq $24,48($18) \n\ |
| 61 | ldq $25,56($17) \n\ |
| 62 | xor $0,$1,$0 # 7 cycles from $1 load \n\ |
| 63 | \n\ |
| 64 | ldq $27,56($18) \n\ |
| 65 | xor $2,$3,$2 \n\ |
| 66 | stq $0,0($17) \n\ |
| 67 | xor $4,$5,$4 \n\ |
| 68 | \n\ |
| 69 | stq $2,8($17) \n\ |
| 70 | xor $6,$7,$6 \n\ |
| 71 | stq $4,16($17) \n\ |
| 72 | xor $19,$20,$19 \n\ |
| 73 | \n\ |
| 74 | stq $6,24($17) \n\ |
| 75 | xor $21,$22,$21 \n\ |
| 76 | stq $19,32($17) \n\ |
| 77 | xor $23,$24,$23 \n\ |
| 78 | \n\ |
| 79 | stq $21,40($17) \n\ |
| 80 | xor $25,$27,$25 \n\ |
| 81 | stq $23,48($17) \n\ |
| 82 | subq $16,1,$16 \n\ |
| 83 | \n\ |
| 84 | stq $25,56($17) \n\ |
| 85 | addq $17,64,$17 \n\ |
| 86 | addq $18,64,$18 \n\ |
| 87 | bgt $16,2b \n\ |
| 88 | \n\ |
| 89 | ret \n\ |
| 90 | .end xor_alpha_2 \n\ |
| 91 | \n\ |
| 92 | .align 3 \n\ |
| 93 | .ent xor_alpha_3 \n\ |
| 94 | xor_alpha_3: \n\ |
| 95 | .prologue 0 \n\ |
| 96 | srl $16, 6, $16 \n\ |
| 97 | .align 4 \n\ |
| 98 | 3: \n\ |
| 99 | ldq $0,0($17) \n\ |
| 100 | ldq $1,0($18) \n\ |
| 101 | ldq $2,0($19) \n\ |
| 102 | ldq $3,8($17) \n\ |
| 103 | \n\ |
| 104 | ldq $4,8($18) \n\ |
| 105 | ldq $6,16($17) \n\ |
| 106 | ldq $7,16($18) \n\ |
| 107 | ldq $21,24($17) \n\ |
| 108 | \n\ |
| 109 | ldq $22,24($18) \n\ |
| 110 | ldq $24,32($17) \n\ |
| 111 | ldq $25,32($18) \n\ |
| 112 | ldq $5,8($19) \n\ |
| 113 | \n\ |
| 114 | ldq $20,16($19) \n\ |
| 115 | ldq $23,24($19) \n\ |
| 116 | ldq $27,32($19) \n\ |
| 117 | nop \n\ |
| 118 | \n\ |
| 119 | xor $0,$1,$1 # 8 cycles from $0 load \n\ |
| 120 | xor $3,$4,$4 # 6 cycles from $4 load \n\ |
| 121 | xor $6,$7,$7 # 6 cycles from $7 load \n\ |
| 122 | xor $21,$22,$22 # 5 cycles from $22 load \n\ |
| 123 | \n\ |
| 124 | xor $1,$2,$2 # 9 cycles from $2 load \n\ |
| 125 | xor $24,$25,$25 # 5 cycles from $25 load \n\ |
| 126 | stq $2,0($17) \n\ |
| 127 | xor $4,$5,$5 # 6 cycles from $5 load \n\ |
| 128 | \n\ |
| 129 | stq $5,8($17) \n\ |
| 130 | xor $7,$20,$20 # 7 cycles from $20 load \n\ |
| 131 | stq $20,16($17) \n\ |
| 132 | xor $22,$23,$23 # 7 cycles from $23 load \n\ |
| 133 | \n\ |
| 134 | stq $23,24($17) \n\ |
| 135 | xor $25,$27,$27 # 7 cycles from $27 load \n\ |
| 136 | stq $27,32($17) \n\ |
| 137 | nop \n\ |
| 138 | \n\ |
| 139 | ldq $0,40($17) \n\ |
| 140 | ldq $1,40($18) \n\ |
| 141 | ldq $3,48($17) \n\ |
| 142 | ldq $4,48($18) \n\ |
| 143 | \n\ |
| 144 | ldq $6,56($17) \n\ |
| 145 | ldq $7,56($18) \n\ |
| 146 | ldq $2,40($19) \n\ |
| 147 | ldq $5,48($19) \n\ |
| 148 | \n\ |
| 149 | ldq $20,56($19) \n\ |
| 150 | xor $0,$1,$1 # 4 cycles from $1 load \n\ |
| 151 | xor $3,$4,$4 # 5 cycles from $4 load \n\ |
| 152 | xor $6,$7,$7 # 5 cycles from $7 load \n\ |
| 153 | \n\ |
| 154 | xor $1,$2,$2 # 4 cycles from $2 load \n\ |
| 155 | xor $4,$5,$5 # 5 cycles from $5 load \n\ |
| 156 | stq $2,40($17) \n\ |
| 157 | xor $7,$20,$20 # 4 cycles from $20 load \n\ |
| 158 | \n\ |
| 159 | stq $5,48($17) \n\ |
| 160 | subq $16,1,$16 \n\ |
| 161 | stq $20,56($17) \n\ |
| 162 | addq $19,64,$19 \n\ |
| 163 | \n\ |
| 164 | addq $18,64,$18 \n\ |
| 165 | addq $17,64,$17 \n\ |
| 166 | bgt $16,3b \n\ |
| 167 | ret \n\ |
| 168 | .end xor_alpha_3 \n\ |
| 169 | \n\ |
| 170 | .align 3 \n\ |
| 171 | .ent xor_alpha_4 \n\ |
| 172 | xor_alpha_4: \n\ |
| 173 | .prologue 0 \n\ |
| 174 | srl $16, 6, $16 \n\ |
| 175 | .align 4 \n\ |
| 176 | 4: \n\ |
| 177 | ldq $0,0($17) \n\ |
| 178 | ldq $1,0($18) \n\ |
| 179 | ldq $2,0($19) \n\ |
| 180 | ldq $3,0($20) \n\ |
| 181 | \n\ |
| 182 | ldq $4,8($17) \n\ |
| 183 | ldq $5,8($18) \n\ |
| 184 | ldq $6,8($19) \n\ |
| 185 | ldq $7,8($20) \n\ |
| 186 | \n\ |
| 187 | ldq $21,16($17) \n\ |
| 188 | ldq $22,16($18) \n\ |
| 189 | ldq $23,16($19) \n\ |
| 190 | ldq $24,16($20) \n\ |
| 191 | \n\ |
| 192 | ldq $25,24($17) \n\ |
| 193 | xor $0,$1,$1 # 6 cycles from $1 load \n\ |
| 194 | ldq $27,24($18) \n\ |
| 195 | xor $2,$3,$3 # 6 cycles from $3 load \n\ |
| 196 | \n\ |
| 197 | ldq $0,24($19) \n\ |
| 198 | xor $1,$3,$3 \n\ |
| 199 | ldq $1,24($20) \n\ |
| 200 | xor $4,$5,$5 # 7 cycles from $5 load \n\ |
| 201 | \n\ |
| 202 | stq $3,0($17) \n\ |
| 203 | xor $6,$7,$7 \n\ |
| 204 | xor $21,$22,$22 # 7 cycles from $22 load \n\ |
| 205 | xor $5,$7,$7 \n\ |
| 206 | \n\ |
| 207 | stq $7,8($17) \n\ |
| 208 | xor $23,$24,$24 # 7 cycles from $24 load \n\ |
| 209 | ldq $2,32($17) \n\ |
| 210 | xor $22,$24,$24 \n\ |
| 211 | \n\ |
| 212 | ldq $3,32($18) \n\ |
| 213 | ldq $4,32($19) \n\ |
| 214 | ldq $5,32($20) \n\ |
| 215 | xor $25,$27,$27 # 8 cycles from $27 load \n\ |
| 216 | \n\ |
| 217 | ldq $6,40($17) \n\ |
| 218 | ldq $7,40($18) \n\ |
| 219 | ldq $21,40($19) \n\ |
| 220 | ldq $22,40($20) \n\ |
| 221 | \n\ |
| 222 | stq $24,16($17) \n\ |
| 223 | xor $0,$1,$1 # 9 cycles from $1 load \n\ |
| 224 | xor $2,$3,$3 # 5 cycles from $3 load \n\ |
| 225 | xor $27,$1,$1 \n\ |
| 226 | \n\ |
| 227 | stq $1,24($17) \n\ |
| 228 | xor $4,$5,$5 # 5 cycles from $5 load \n\ |
| 229 | ldq $23,48($17) \n\ |
| 230 | ldq $24,48($18) \n\ |
| 231 | \n\ |
| 232 | ldq $25,48($19) \n\ |
| 233 | xor $3,$5,$5 \n\ |
| 234 | ldq $27,48($20) \n\ |
| 235 | ldq $0,56($17) \n\ |
| 236 | \n\ |
| 237 | ldq $1,56($18) \n\ |
| 238 | ldq $2,56($19) \n\ |
| 239 | xor $6,$7,$7 # 8 cycles from $6 load \n\ |
| 240 | ldq $3,56($20) \n\ |
| 241 | \n\ |
| 242 | stq $5,32($17) \n\ |
| 243 | xor $21,$22,$22 # 8 cycles from $22 load \n\ |
| 244 | xor $7,$22,$22 \n\ |
| 245 | xor $23,$24,$24 # 5 cycles from $24 load \n\ |
| 246 | \n\ |
| 247 | stq $22,40($17) \n\ |
| 248 | xor $25,$27,$27 # 5 cycles from $27 load \n\ |
| 249 | xor $24,$27,$27 \n\ |
| 250 | xor $0,$1,$1 # 5 cycles from $1 load \n\ |
| 251 | \n\ |
| 252 | stq $27,48($17) \n\ |
| 253 | xor $2,$3,$3 # 4 cycles from $3 load \n\ |
| 254 | xor $1,$3,$3 \n\ |
| 255 | subq $16,1,$16 \n\ |
| 256 | \n\ |
| 257 | stq $3,56($17) \n\ |
| 258 | addq $20,64,$20 \n\ |
| 259 | addq $19,64,$19 \n\ |
| 260 | addq $18,64,$18 \n\ |
| 261 | \n\ |
| 262 | addq $17,64,$17 \n\ |
| 263 | bgt $16,4b \n\ |
| 264 | ret \n\ |
| 265 | .end xor_alpha_4 \n\ |
| 266 | \n\ |
| 267 | .align 3 \n\ |
| 268 | .ent xor_alpha_5 \n\ |
| 269 | xor_alpha_5: \n\ |
| 270 | .prologue 0 \n\ |
| 271 | srl $16, 6, $16 \n\ |
| 272 | .align 4 \n\ |
| 273 | 5: \n\ |
| 274 | ldq $0,0($17) \n\ |
| 275 | ldq $1,0($18) \n\ |
| 276 | ldq $2,0($19) \n\ |
| 277 | ldq $3,0($20) \n\ |
| 278 | \n\ |
| 279 | ldq $4,0($21) \n\ |
| 280 | ldq $5,8($17) \n\ |
| 281 | ldq $6,8($18) \n\ |
| 282 | ldq $7,8($19) \n\ |
| 283 | \n\ |
| 284 | ldq $22,8($20) \n\ |
| 285 | ldq $23,8($21) \n\ |
| 286 | ldq $24,16($17) \n\ |
| 287 | ldq $25,16($18) \n\ |
| 288 | \n\ |
| 289 | ldq $27,16($19) \n\ |
| 290 | xor $0,$1,$1 # 6 cycles from $1 load \n\ |
| 291 | ldq $28,16($20) \n\ |
| 292 | xor $2,$3,$3 # 6 cycles from $3 load \n\ |
| 293 | \n\ |
| 294 | ldq $0,16($21) \n\ |
| 295 | xor $1,$3,$3 \n\ |
| 296 | ldq $1,24($17) \n\ |
| 297 | xor $3,$4,$4 # 7 cycles from $4 load \n\ |
| 298 | \n\ |
| 299 | stq $4,0($17) \n\ |
| 300 | xor $5,$6,$6 # 7 cycles from $6 load \n\ |
| 301 | xor $7,$22,$22 # 7 cycles from $22 load \n\ |
| 302 | xor $6,$23,$23 # 7 cycles from $23 load \n\ |
| 303 | \n\ |
| 304 | ldq $2,24($18) \n\ |
| 305 | xor $22,$23,$23 \n\ |
| 306 | ldq $3,24($19) \n\ |
| 307 | xor $24,$25,$25 # 8 cycles from $25 load \n\ |
| 308 | \n\ |
| 309 | stq $23,8($17) \n\ |
| 310 | xor $25,$27,$27 # 8 cycles from $27 load \n\ |
| 311 | ldq $4,24($20) \n\ |
| 312 | xor $28,$0,$0 # 7 cycles from $0 load \n\ |
| 313 | \n\ |
| 314 | ldq $5,24($21) \n\ |
| 315 | xor $27,$0,$0 \n\ |
| 316 | ldq $6,32($17) \n\ |
| 317 | ldq $7,32($18) \n\ |
| 318 | \n\ |
| 319 | stq $0,16($17) \n\ |
| 320 | xor $1,$2,$2 # 6 cycles from $2 load \n\ |
| 321 | ldq $22,32($19) \n\ |
| 322 | xor $3,$4,$4 # 4 cycles from $4 load \n\ |
| 323 | \n\ |
| 324 | ldq $23,32($20) \n\ |
| 325 | xor $2,$4,$4 \n\ |
| 326 | ldq $24,32($21) \n\ |
| 327 | ldq $25,40($17) \n\ |
| 328 | \n\ |
| 329 | ldq $27,40($18) \n\ |
| 330 | ldq $28,40($19) \n\ |
| 331 | ldq $0,40($20) \n\ |
| 332 | xor $4,$5,$5 # 7 cycles from $5 load \n\ |
| 333 | \n\ |
| 334 | stq $5,24($17) \n\ |
| 335 | xor $6,$7,$7 # 7 cycles from $7 load \n\ |
| 336 | ldq $1,40($21) \n\ |
| 337 | ldq $2,48($17) \n\ |
| 338 | \n\ |
| 339 | ldq $3,48($18) \n\ |
| 340 | xor $7,$22,$22 # 7 cycles from $22 load \n\ |
| 341 | ldq $4,48($19) \n\ |
| 342 | xor $23,$24,$24 # 6 cycles from $24 load \n\ |
| 343 | \n\ |
| 344 | ldq $5,48($20) \n\ |
| 345 | xor $22,$24,$24 \n\ |
| 346 | ldq $6,48($21) \n\ |
| 347 | xor $25,$27,$27 # 7 cycles from $27 load \n\ |
| 348 | \n\ |
| 349 | stq $24,32($17) \n\ |
| 350 | xor $27,$28,$28 # 8 cycles from $28 load \n\ |
| 351 | ldq $7,56($17) \n\ |
| 352 | xor $0,$1,$1 # 6 cycles from $1 load \n\ |
| 353 | \n\ |
| 354 | ldq $22,56($18) \n\ |
| 355 | ldq $23,56($19) \n\ |
| 356 | ldq $24,56($20) \n\ |
| 357 | ldq $25,56($21) \n\ |
| 358 | \n\ |
| 359 | xor $28,$1,$1 \n\ |
| 360 | xor $2,$3,$3 # 9 cycles from $3 load \n\ |
| 361 | xor $3,$4,$4 # 9 cycles from $4 load \n\ |
| 362 | xor $5,$6,$6 # 8 cycles from $6 load \n\ |
| 363 | \n\ |
| 364 | stq $1,40($17) \n\ |
| 365 | xor $4,$6,$6 \n\ |
| 366 | xor $7,$22,$22 # 7 cycles from $22 load \n\ |
| 367 | xor $23,$24,$24 # 6 cycles from $24 load \n\ |
| 368 | \n\ |
| 369 | stq $6,48($17) \n\ |
| 370 | xor $22,$24,$24 \n\ |
| 371 | subq $16,1,$16 \n\ |
| 372 | xor $24,$25,$25 # 8 cycles from $25 load \n\ |
| 373 | \n\ |
| 374 | stq $25,56($17) \n\ |
| 375 | addq $21,64,$21 \n\ |
| 376 | addq $20,64,$20 \n\ |
| 377 | addq $19,64,$19 \n\ |
| 378 | \n\ |
| 379 | addq $18,64,$18 \n\ |
| 380 | addq $17,64,$17 \n\ |
| 381 | bgt $16,5b \n\ |
| 382 | ret \n\ |
| 383 | .end xor_alpha_5 \n\ |
| 384 | \n\ |
| 385 | .align 3 \n\ |
| 386 | .ent xor_alpha_prefetch_2 \n\ |
| 387 | xor_alpha_prefetch_2: \n\ |
| 388 | .prologue 0 \n\ |
| 389 | srl $16, 6, $16 \n\ |
| 390 | \n\ |
| 391 | ldq $31, 0($17) \n\ |
| 392 | ldq $31, 0($18) \n\ |
| 393 | \n\ |
| 394 | ldq $31, 64($17) \n\ |
| 395 | ldq $31, 64($18) \n\ |
| 396 | \n\ |
| 397 | ldq $31, 128($17) \n\ |
| 398 | ldq $31, 128($18) \n\ |
| 399 | \n\ |
| 400 | ldq $31, 192($17) \n\ |
| 401 | ldq $31, 192($18) \n\ |
| 402 | .align 4 \n\ |
| 403 | 2: \n\ |
| 404 | ldq $0,0($17) \n\ |
| 405 | ldq $1,0($18) \n\ |
| 406 | ldq $2,8($17) \n\ |
| 407 | ldq $3,8($18) \n\ |
| 408 | \n\ |
| 409 | ldq $4,16($17) \n\ |
| 410 | ldq $5,16($18) \n\ |
| 411 | ldq $6,24($17) \n\ |
| 412 | ldq $7,24($18) \n\ |
| 413 | \n\ |
| 414 | ldq $19,32($17) \n\ |
| 415 | ldq $20,32($18) \n\ |
| 416 | ldq $21,40($17) \n\ |
| 417 | ldq $22,40($18) \n\ |
| 418 | \n\ |
| 419 | ldq $23,48($17) \n\ |
| 420 | ldq $24,48($18) \n\ |
| 421 | ldq $25,56($17) \n\ |
| 422 | ldq $27,56($18) \n\ |
| 423 | \n\ |
| 424 | ldq $31,256($17) \n\ |
| 425 | xor $0,$1,$0 # 8 cycles from $1 load \n\ |
| 426 | ldq $31,256($18) \n\ |
| 427 | xor $2,$3,$2 \n\ |
| 428 | \n\ |
| 429 | stq $0,0($17) \n\ |
| 430 | xor $4,$5,$4 \n\ |
| 431 | stq $2,8($17) \n\ |
| 432 | xor $6,$7,$6 \n\ |
| 433 | \n\ |
| 434 | stq $4,16($17) \n\ |
| 435 | xor $19,$20,$19 \n\ |
| 436 | stq $6,24($17) \n\ |
| 437 | xor $21,$22,$21 \n\ |
| 438 | \n\ |
| 439 | stq $19,32($17) \n\ |
| 440 | xor $23,$24,$23 \n\ |
| 441 | stq $21,40($17) \n\ |
| 442 | xor $25,$27,$25 \n\ |
| 443 | \n\ |
| 444 | stq $23,48($17) \n\ |
| 445 | subq $16,1,$16 \n\ |
| 446 | stq $25,56($17) \n\ |
| 447 | addq $17,64,$17 \n\ |
| 448 | \n\ |
| 449 | addq $18,64,$18 \n\ |
| 450 | bgt $16,2b \n\ |
| 451 | ret \n\ |
| 452 | .end xor_alpha_prefetch_2 \n\ |
| 453 | \n\ |
| 454 | .align 3 \n\ |
| 455 | .ent xor_alpha_prefetch_3 \n\ |
| 456 | xor_alpha_prefetch_3: \n\ |
| 457 | .prologue 0 \n\ |
| 458 | srl $16, 6, $16 \n\ |
| 459 | \n\ |
| 460 | ldq $31, 0($17) \n\ |
| 461 | ldq $31, 0($18) \n\ |
| 462 | ldq $31, 0($19) \n\ |
| 463 | \n\ |
| 464 | ldq $31, 64($17) \n\ |
| 465 | ldq $31, 64($18) \n\ |
| 466 | ldq $31, 64($19) \n\ |
| 467 | \n\ |
| 468 | ldq $31, 128($17) \n\ |
| 469 | ldq $31, 128($18) \n\ |
| 470 | ldq $31, 128($19) \n\ |
| 471 | \n\ |
| 472 | ldq $31, 192($17) \n\ |
| 473 | ldq $31, 192($18) \n\ |
| 474 | ldq $31, 192($19) \n\ |
| 475 | .align 4 \n\ |
| 476 | 3: \n\ |
| 477 | ldq $0,0($17) \n\ |
| 478 | ldq $1,0($18) \n\ |
| 479 | ldq $2,0($19) \n\ |
| 480 | ldq $3,8($17) \n\ |
| 481 | \n\ |
| 482 | ldq $4,8($18) \n\ |
| 483 | ldq $6,16($17) \n\ |
| 484 | ldq $7,16($18) \n\ |
| 485 | ldq $21,24($17) \n\ |
| 486 | \n\ |
| 487 | ldq $22,24($18) \n\ |
| 488 | ldq $24,32($17) \n\ |
| 489 | ldq $25,32($18) \n\ |
| 490 | ldq $5,8($19) \n\ |
| 491 | \n\ |
| 492 | ldq $20,16($19) \n\ |
| 493 | ldq $23,24($19) \n\ |
| 494 | ldq $27,32($19) \n\ |
| 495 | nop \n\ |
| 496 | \n\ |
| 497 | xor $0,$1,$1 # 8 cycles from $0 load \n\ |
| 498 | xor $3,$4,$4 # 7 cycles from $4 load \n\ |
| 499 | xor $6,$7,$7 # 6 cycles from $7 load \n\ |
| 500 | xor $21,$22,$22 # 5 cycles from $22 load \n\ |
| 501 | \n\ |
| 502 | xor $1,$2,$2 # 9 cycles from $2 load \n\ |
| 503 | xor $24,$25,$25 # 5 cycles from $25 load \n\ |
| 504 | stq $2,0($17) \n\ |
| 505 | xor $4,$5,$5 # 6 cycles from $5 load \n\ |
| 506 | \n\ |
| 507 | stq $5,8($17) \n\ |
| 508 | xor $7,$20,$20 # 7 cycles from $20 load \n\ |
| 509 | stq $20,16($17) \n\ |
| 510 | xor $22,$23,$23 # 7 cycles from $23 load \n\ |
| 511 | \n\ |
| 512 | stq $23,24($17) \n\ |
| 513 | xor $25,$27,$27 # 7 cycles from $27 load \n\ |
| 514 | stq $27,32($17) \n\ |
| 515 | nop \n\ |
| 516 | \n\ |
| 517 | ldq $0,40($17) \n\ |
| 518 | ldq $1,40($18) \n\ |
| 519 | ldq $3,48($17) \n\ |
| 520 | ldq $4,48($18) \n\ |
| 521 | \n\ |
| 522 | ldq $6,56($17) \n\ |
| 523 | ldq $7,56($18) \n\ |
| 524 | ldq $2,40($19) \n\ |
| 525 | ldq $5,48($19) \n\ |
| 526 | \n\ |
| 527 | ldq $20,56($19) \n\ |
| 528 | ldq $31,256($17) \n\ |
| 529 | ldq $31,256($18) \n\ |
| 530 | ldq $31,256($19) \n\ |
| 531 | \n\ |
| 532 | xor $0,$1,$1 # 6 cycles from $1 load \n\ |
| 533 | xor $3,$4,$4 # 5 cycles from $4 load \n\ |
| 534 | xor $6,$7,$7 # 5 cycles from $7 load \n\ |
| 535 | xor $1,$2,$2 # 4 cycles from $2 load \n\ |
| 536 | \n\ |
| 537 | xor $4,$5,$5 # 5 cycles from $5 load \n\ |
| 538 | xor $7,$20,$20 # 4 cycles from $20 load \n\ |
| 539 | stq $2,40($17) \n\ |
| 540 | subq $16,1,$16 \n\ |
| 541 | \n\ |
| 542 | stq $5,48($17) \n\ |
| 543 | addq $19,64,$19 \n\ |
| 544 | stq $20,56($17) \n\ |
| 545 | addq $18,64,$18 \n\ |
| 546 | \n\ |
| 547 | addq $17,64,$17 \n\ |
| 548 | bgt $16,3b \n\ |
| 549 | ret \n\ |
| 550 | .end xor_alpha_prefetch_3 \n\ |
| 551 | \n\ |
| 552 | .align 3 \n\ |
| 553 | .ent xor_alpha_prefetch_4 \n\ |
| 554 | xor_alpha_prefetch_4: \n\ |
| 555 | .prologue 0 \n\ |
| 556 | srl $16, 6, $16 \n\ |
| 557 | \n\ |
| 558 | ldq $31, 0($17) \n\ |
| 559 | ldq $31, 0($18) \n\ |
| 560 | ldq $31, 0($19) \n\ |
| 561 | ldq $31, 0($20) \n\ |
| 562 | \n\ |
| 563 | ldq $31, 64($17) \n\ |
| 564 | ldq $31, 64($18) \n\ |
| 565 | ldq $31, 64($19) \n\ |
| 566 | ldq $31, 64($20) \n\ |
| 567 | \n\ |
| 568 | ldq $31, 128($17) \n\ |
| 569 | ldq $31, 128($18) \n\ |
| 570 | ldq $31, 128($19) \n\ |
| 571 | ldq $31, 128($20) \n\ |
| 572 | \n\ |
| 573 | ldq $31, 192($17) \n\ |
| 574 | ldq $31, 192($18) \n\ |
| 575 | ldq $31, 192($19) \n\ |
| 576 | ldq $31, 192($20) \n\ |
| 577 | .align 4 \n\ |
| 578 | 4: \n\ |
| 579 | ldq $0,0($17) \n\ |
| 580 | ldq $1,0($18) \n\ |
| 581 | ldq $2,0($19) \n\ |
| 582 | ldq $3,0($20) \n\ |
| 583 | \n\ |
| 584 | ldq $4,8($17) \n\ |
| 585 | ldq $5,8($18) \n\ |
| 586 | ldq $6,8($19) \n\ |
| 587 | ldq $7,8($20) \n\ |
| 588 | \n\ |
| 589 | ldq $21,16($17) \n\ |
| 590 | ldq $22,16($18) \n\ |
| 591 | ldq $23,16($19) \n\ |
| 592 | ldq $24,16($20) \n\ |
| 593 | \n\ |
| 594 | ldq $25,24($17) \n\ |
| 595 | xor $0,$1,$1 # 6 cycles from $1 load \n\ |
| 596 | ldq $27,24($18) \n\ |
| 597 | xor $2,$3,$3 # 6 cycles from $3 load \n\ |
| 598 | \n\ |
| 599 | ldq $0,24($19) \n\ |
| 600 | xor $1,$3,$3 \n\ |
| 601 | ldq $1,24($20) \n\ |
| 602 | xor $4,$5,$5 # 7 cycles from $5 load \n\ |
| 603 | \n\ |
| 604 | stq $3,0($17) \n\ |
| 605 | xor $6,$7,$7 \n\ |
| 606 | xor $21,$22,$22 # 7 cycles from $22 load \n\ |
| 607 | xor $5,$7,$7 \n\ |
| 608 | \n\ |
| 609 | stq $7,8($17) \n\ |
| 610 | xor $23,$24,$24 # 7 cycles from $24 load \n\ |
| 611 | ldq $2,32($17) \n\ |
| 612 | xor $22,$24,$24 \n\ |
| 613 | \n\ |
| 614 | ldq $3,32($18) \n\ |
| 615 | ldq $4,32($19) \n\ |
| 616 | ldq $5,32($20) \n\ |
| 617 | xor $25,$27,$27 # 8 cycles from $27 load \n\ |
| 618 | \n\ |
| 619 | ldq $6,40($17) \n\ |
| 620 | ldq $7,40($18) \n\ |
| 621 | ldq $21,40($19) \n\ |
| 622 | ldq $22,40($20) \n\ |
| 623 | \n\ |
| 624 | stq $24,16($17) \n\ |
| 625 | xor $0,$1,$1 # 9 cycles from $1 load \n\ |
| 626 | xor $2,$3,$3 # 5 cycles from $3 load \n\ |
| 627 | xor $27,$1,$1 \n\ |
| 628 | \n\ |
| 629 | stq $1,24($17) \n\ |
| 630 | xor $4,$5,$5 # 5 cycles from $5 load \n\ |
| 631 | ldq $23,48($17) \n\ |
| 632 | xor $3,$5,$5 \n\ |
| 633 | \n\ |
| 634 | ldq $24,48($18) \n\ |
| 635 | ldq $25,48($19) \n\ |
| 636 | ldq $27,48($20) \n\ |
| 637 | ldq $0,56($17) \n\ |
| 638 | \n\ |
| 639 | ldq $1,56($18) \n\ |
| 640 | ldq $2,56($19) \n\ |
| 641 | ldq $3,56($20) \n\ |
| 642 | xor $6,$7,$7 # 8 cycles from $6 load \n\ |
| 643 | \n\ |
| 644 | ldq $31,256($17) \n\ |
| 645 | xor $21,$22,$22 # 8 cycles from $22 load \n\ |
| 646 | ldq $31,256($18) \n\ |
| 647 | xor $7,$22,$22 \n\ |
| 648 | \n\ |
| 649 | ldq $31,256($19) \n\ |
| 650 | xor $23,$24,$24 # 6 cycles from $24 load \n\ |
| 651 | ldq $31,256($20) \n\ |
| 652 | xor $25,$27,$27 # 6 cycles from $27 load \n\ |
| 653 | \n\ |
| 654 | stq $5,32($17) \n\ |
| 655 | xor $24,$27,$27 \n\ |
| 656 | xor $0,$1,$1 # 7 cycles from $1 load \n\ |
| 657 | xor $2,$3,$3 # 6 cycles from $3 load \n\ |
| 658 | \n\ |
| 659 | stq $22,40($17) \n\ |
| 660 | xor $1,$3,$3 \n\ |
| 661 | stq $27,48($17) \n\ |
| 662 | subq $16,1,$16 \n\ |
| 663 | \n\ |
| 664 | stq $3,56($17) \n\ |
| 665 | addq $20,64,$20 \n\ |
| 666 | addq $19,64,$19 \n\ |
| 667 | addq $18,64,$18 \n\ |
| 668 | \n\ |
| 669 | addq $17,64,$17 \n\ |
| 670 | bgt $16,4b \n\ |
| 671 | ret \n\ |
| 672 | .end xor_alpha_prefetch_4 \n\ |
| 673 | \n\ |
| 674 | .align 3 \n\ |
| 675 | .ent xor_alpha_prefetch_5 \n\ |
| 676 | xor_alpha_prefetch_5: \n\ |
| 677 | .prologue 0 \n\ |
| 678 | srl $16, 6, $16 \n\ |
| 679 | \n\ |
| 680 | ldq $31, 0($17) \n\ |
| 681 | ldq $31, 0($18) \n\ |
| 682 | ldq $31, 0($19) \n\ |
| 683 | ldq $31, 0($20) \n\ |
| 684 | ldq $31, 0($21) \n\ |
| 685 | \n\ |
| 686 | ldq $31, 64($17) \n\ |
| 687 | ldq $31, 64($18) \n\ |
| 688 | ldq $31, 64($19) \n\ |
| 689 | ldq $31, 64($20) \n\ |
| 690 | ldq $31, 64($21) \n\ |
| 691 | \n\ |
| 692 | ldq $31, 128($17) \n\ |
| 693 | ldq $31, 128($18) \n\ |
| 694 | ldq $31, 128($19) \n\ |
| 695 | ldq $31, 128($20) \n\ |
| 696 | ldq $31, 128($21) \n\ |
| 697 | \n\ |
| 698 | ldq $31, 192($17) \n\ |
| 699 | ldq $31, 192($18) \n\ |
| 700 | ldq $31, 192($19) \n\ |
| 701 | ldq $31, 192($20) \n\ |
| 702 | ldq $31, 192($21) \n\ |
| 703 | .align 4 \n\ |
| 704 | 5: \n\ |
| 705 | ldq $0,0($17) \n\ |
| 706 | ldq $1,0($18) \n\ |
| 707 | ldq $2,0($19) \n\ |
| 708 | ldq $3,0($20) \n\ |
| 709 | \n\ |
| 710 | ldq $4,0($21) \n\ |
| 711 | ldq $5,8($17) \n\ |
| 712 | ldq $6,8($18) \n\ |
| 713 | ldq $7,8($19) \n\ |
| 714 | \n\ |
| 715 | ldq $22,8($20) \n\ |
| 716 | ldq $23,8($21) \n\ |
| 717 | ldq $24,16($17) \n\ |
| 718 | ldq $25,16($18) \n\ |
| 719 | \n\ |
| 720 | ldq $27,16($19) \n\ |
| 721 | xor $0,$1,$1 # 6 cycles from $1 load \n\ |
| 722 | ldq $28,16($20) \n\ |
| 723 | xor $2,$3,$3 # 6 cycles from $3 load \n\ |
| 724 | \n\ |
| 725 | ldq $0,16($21) \n\ |
| 726 | xor $1,$3,$3 \n\ |
| 727 | ldq $1,24($17) \n\ |
| 728 | xor $3,$4,$4 # 7 cycles from $4 load \n\ |
| 729 | \n\ |
| 730 | stq $4,0($17) \n\ |
| 731 | xor $5,$6,$6 # 7 cycles from $6 load \n\ |
| 732 | xor $7,$22,$22 # 7 cycles from $22 load \n\ |
| 733 | xor $6,$23,$23 # 7 cycles from $23 load \n\ |
| 734 | \n\ |
| 735 | ldq $2,24($18) \n\ |
| 736 | xor $22,$23,$23 \n\ |
| 737 | ldq $3,24($19) \n\ |
| 738 | xor $24,$25,$25 # 8 cycles from $25 load \n\ |
| 739 | \n\ |
| 740 | stq $23,8($17) \n\ |
| 741 | xor $25,$27,$27 # 8 cycles from $27 load \n\ |
| 742 | ldq $4,24($20) \n\ |
| 743 | xor $28,$0,$0 # 7 cycles from $0 load \n\ |
| 744 | \n\ |
| 745 | ldq $5,24($21) \n\ |
| 746 | xor $27,$0,$0 \n\ |
| 747 | ldq $6,32($17) \n\ |
| 748 | ldq $7,32($18) \n\ |
| 749 | \n\ |
| 750 | stq $0,16($17) \n\ |
| 751 | xor $1,$2,$2 # 6 cycles from $2 load \n\ |
| 752 | ldq $22,32($19) \n\ |
| 753 | xor $3,$4,$4 # 4 cycles from $4 load \n\ |
| 754 | \n\ |
| 755 | ldq $23,32($20) \n\ |
| 756 | xor $2,$4,$4 \n\ |
| 757 | ldq $24,32($21) \n\ |
| 758 | ldq $25,40($17) \n\ |
| 759 | \n\ |
| 760 | ldq $27,40($18) \n\ |
| 761 | ldq $28,40($19) \n\ |
| 762 | ldq $0,40($20) \n\ |
| 763 | xor $4,$5,$5 # 7 cycles from $5 load \n\ |
| 764 | \n\ |
| 765 | stq $5,24($17) \n\ |
| 766 | xor $6,$7,$7 # 7 cycles from $7 load \n\ |
| 767 | ldq $1,40($21) \n\ |
| 768 | ldq $2,48($17) \n\ |
| 769 | \n\ |
| 770 | ldq $3,48($18) \n\ |
| 771 | xor $7,$22,$22 # 7 cycles from $22 load \n\ |
| 772 | ldq $4,48($19) \n\ |
| 773 | xor $23,$24,$24 # 6 cycles from $24 load \n\ |
| 774 | \n\ |
| 775 | ldq $5,48($20) \n\ |
| 776 | xor $22,$24,$24 \n\ |
| 777 | ldq $6,48($21) \n\ |
| 778 | xor $25,$27,$27 # 7 cycles from $27 load \n\ |
| 779 | \n\ |
| 780 | stq $24,32($17) \n\ |
| 781 | xor $27,$28,$28 # 8 cycles from $28 load \n\ |
| 782 | ldq $7,56($17) \n\ |
| 783 | xor $0,$1,$1 # 6 cycles from $1 load \n\ |
| 784 | \n\ |
| 785 | ldq $22,56($18) \n\ |
| 786 | ldq $23,56($19) \n\ |
| 787 | ldq $24,56($20) \n\ |
| 788 | ldq $25,56($21) \n\ |
| 789 | \n\ |
| 790 | ldq $31,256($17) \n\ |
| 791 | xor $28,$1,$1 \n\ |
| 792 | ldq $31,256($18) \n\ |
| 793 | xor $2,$3,$3 # 9 cycles from $3 load \n\ |
| 794 | \n\ |
| 795 | ldq $31,256($19) \n\ |
| 796 | xor $3,$4,$4 # 9 cycles from $4 load \n\ |
| 797 | ldq $31,256($20) \n\ |
| 798 | xor $5,$6,$6 # 8 cycles from $6 load \n\ |
| 799 | \n\ |
| 800 | stq $1,40($17) \n\ |
| 801 | xor $4,$6,$6 \n\ |
| 802 | xor $7,$22,$22 # 7 cycles from $22 load \n\ |
| 803 | xor $23,$24,$24 # 6 cycles from $24 load \n\ |
| 804 | \n\ |
| 805 | stq $6,48($17) \n\ |
| 806 | xor $22,$24,$24 \n\ |
| 807 | ldq $31,256($21) \n\ |
| 808 | xor $24,$25,$25 # 8 cycles from $25 load \n\ |
| 809 | \n\ |
| 810 | stq $25,56($17) \n\ |
| 811 | subq $16,1,$16 \n\ |
| 812 | addq $21,64,$21 \n\ |
| 813 | addq $20,64,$20 \n\ |
| 814 | \n\ |
| 815 | addq $19,64,$19 \n\ |
| 816 | addq $18,64,$18 \n\ |
| 817 | addq $17,64,$17 \n\ |
| 818 | bgt $16,5b \n\ |
| 819 | \n\ |
| 820 | ret \n\ |
| 821 | .end xor_alpha_prefetch_5 \n\ |
| 822 | "); |
| 823 | |
| 824 | static struct xor_block_template xor_block_alpha = { |
| 825 | .name = "alpha", |
| 826 | .do_2 = xor_alpha_2, |
| 827 | .do_3 = xor_alpha_3, |
| 828 | .do_4 = xor_alpha_4, |
| 829 | .do_5 = xor_alpha_5, |
| 830 | }; |
| 831 | |
| 832 | static struct xor_block_template xor_block_alpha_prefetch = { |
| 833 | .name = "alpha prefetch", |
| 834 | .do_2 = xor_alpha_prefetch_2, |
| 835 | .do_3 = xor_alpha_prefetch_3, |
| 836 | .do_4 = xor_alpha_prefetch_4, |
| 837 | .do_5 = xor_alpha_prefetch_5, |
| 838 | }; |
| 839 | |
| 840 | /* For grins, also test the generic routines. */ |
| 841 | #include <asm-generic/xor.h> |
| 842 | |
| 843 | #undef XOR_TRY_TEMPLATES |
| 844 | #define XOR_TRY_TEMPLATES \ |
| 845 | do { \ |
| 846 | xor_speed(&xor_block_8regs); \ |
| 847 | xor_speed(&xor_block_32regs); \ |
| 848 | xor_speed(&xor_block_alpha); \ |
| 849 | xor_speed(&xor_block_alpha_prefetch); \ |
| 850 | } while (0) |
| 851 | |
| 852 | /* Force the use of alpha_prefetch if EV6, as it is significantly |
| 853 | faster in the cold cache case. */ |
| 854 | #define XOR_SELECT_TEMPLATE(FASTEST) \ |
| 855 | (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST) |