Thomas Gleixner | d2912cb | 2019-06-04 10:11:33 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 2 | /* |
| 3 | * Accelerated CRC32(C) using AArch64 CRC instructions |
| 4 | * |
| 5 | * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | #include <linux/linkage.h> |
| 9 | #include <asm/alternative.h> |
| 10 | #include <asm/assembler.h> |
| 11 | |
Mark Brown | 30218da | 2020-04-14 19:28:43 +0100 | [diff] [blame] | 12 | .arch armv8-a+crc |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 13 | |
| 14 | .macro __crc32, c |
Ard Biesheuvel | efdb25e | 2018-11-27 18:42:55 +0100 | [diff] [blame] | 15 | cmp x2, #16 |
| 16 | b.lt 8f // less than 16 bytes |
| 17 | |
| 18 | and x7, x2, #0x1f |
| 19 | and x2, x2, #~0x1f |
| 20 | cbz x7, 32f // multiple of 32 bytes |
| 21 | |
| 22 | and x8, x7, #0xf |
| 23 | ldp x3, x4, [x1] |
| 24 | add x8, x8, x1 |
| 25 | add x1, x1, x7 |
| 26 | ldp x5, x6, [x8] |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 27 | CPU_BE( rev x3, x3 ) |
| 28 | CPU_BE( rev x4, x4 ) |
Ard Biesheuvel | efdb25e | 2018-11-27 18:42:55 +0100 | [diff] [blame] | 29 | CPU_BE( rev x5, x5 ) |
| 30 | CPU_BE( rev x6, x6 ) |
| 31 | |
| 32 | tst x7, #8 |
| 33 | crc32\c\()x w8, w0, x3 |
| 34 | csel x3, x3, x4, eq |
| 35 | csel w0, w0, w8, eq |
| 36 | tst x7, #4 |
| 37 | lsr x4, x3, #32 |
| 38 | crc32\c\()w w8, w0, w3 |
| 39 | csel x3, x3, x4, eq |
| 40 | csel w0, w0, w8, eq |
| 41 | tst x7, #2 |
| 42 | lsr w4, w3, #16 |
| 43 | crc32\c\()h w8, w0, w3 |
| 44 | csel w3, w3, w4, eq |
| 45 | csel w0, w0, w8, eq |
| 46 | tst x7, #1 |
| 47 | crc32\c\()b w8, w0, w3 |
| 48 | csel w0, w0, w8, eq |
| 49 | tst x7, #16 |
| 50 | crc32\c\()x w8, w0, x5 |
| 51 | crc32\c\()x w8, w8, x6 |
| 52 | csel w0, w0, w8, eq |
| 53 | cbz x2, 0f |
| 54 | |
| 55 | 32: ldp x3, x4, [x1], #32 |
| 56 | sub x2, x2, #32 |
| 57 | ldp x5, x6, [x1, #-16] |
| 58 | CPU_BE( rev x3, x3 ) |
| 59 | CPU_BE( rev x4, x4 ) |
| 60 | CPU_BE( rev x5, x5 ) |
| 61 | CPU_BE( rev x6, x6 ) |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 62 | crc32\c\()x w0, w0, x3 |
| 63 | crc32\c\()x w0, w0, x4 |
Ard Biesheuvel | efdb25e | 2018-11-27 18:42:55 +0100 | [diff] [blame] | 64 | crc32\c\()x w0, w0, x5 |
| 65 | crc32\c\()x w0, w0, x6 |
| 66 | cbnz x2, 32b |
| 67 | 0: ret |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 68 | |
| 69 | 8: tbz x2, #3, 4f |
| 70 | ldr x3, [x1], #8 |
| 71 | CPU_BE( rev x3, x3 ) |
| 72 | crc32\c\()x w0, w0, x3 |
| 73 | 4: tbz x2, #2, 2f |
| 74 | ldr w3, [x1], #4 |
| 75 | CPU_BE( rev w3, w3 ) |
| 76 | crc32\c\()w w0, w0, w3 |
| 77 | 2: tbz x2, #1, 1f |
| 78 | ldrh w3, [x1], #2 |
| 79 | CPU_BE( rev16 w3, w3 ) |
| 80 | crc32\c\()h w0, w0, w3 |
| 81 | 1: tbz x2, #0, 0f |
| 82 | ldrb w3, [x1] |
| 83 | crc32\c\()b w0, w0, w3 |
| 84 | 0: ret |
| 85 | .endm |
| 86 | |
| 87 | .align 5 |
Mark Brown | 3ac0f45 | 2020-01-06 19:58:17 +0000 | [diff] [blame] | 88 | SYM_FUNC_START(crc32_le) |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 89 | alternative_if_not ARM64_HAS_CRC32 |
| 90 | b crc32_le_base |
| 91 | alternative_else_nop_endif |
| 92 | __crc32 |
Mark Brown | 3ac0f45 | 2020-01-06 19:58:17 +0000 | [diff] [blame] | 93 | SYM_FUNC_END(crc32_le) |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 94 | |
| 95 | .align 5 |
Mark Brown | 3ac0f45 | 2020-01-06 19:58:17 +0000 | [diff] [blame] | 96 | SYM_FUNC_START(__crc32c_le) |
Ard Biesheuvel | 7481cdd | 2018-08-27 13:02:44 +0200 | [diff] [blame] | 97 | alternative_if_not ARM64_HAS_CRC32 |
| 98 | b __crc32c_le_base |
| 99 | alternative_else_nop_endif |
| 100 | __crc32 c |
Mark Brown | 3ac0f45 | 2020-01-06 19:58:17 +0000 | [diff] [blame] | 101 | SYM_FUNC_END(__crc32c_le) |