Thomas Gleixner | d2912cb | 2019-06-04 10:11:33 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 2 | /* |
| 3 | * Accelerated GHASH implementation with Intel PCLMULQDQ-NI |
| 4 | * instructions. This file contains accelerated part of ghash |
| 5 | * implementation. More information about PCLMULQDQ can be found at: |
| 6 | * |
| 7 | * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ |
| 8 | * |
| 9 | * Copyright (c) 2009 Intel Corp. |
| 10 | * Author: Huang Ying <ying.huang@intel.com> |
| 11 | * Vinodh Gopal |
| 12 | * Erdinc Ozturk |
| 13 | * Deniz Karakoyunlu |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 14 | */ |
| 15 | |
| 16 | #include <linux/linkage.h> |
Huang Ying | 564ec0e | 2009-11-23 19:55:22 +0800 | [diff] [blame] | 17 | #include <asm/inst.h> |
Josh Poimboeuf | 8691ccd | 2016-01-21 16:49:19 -0600 | [diff] [blame] | 18 | #include <asm/frame.h> |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 19 | |
Denys Vlasenko | e183914 | 2017-01-19 22:33:04 +0100 | [diff] [blame] | 20 | .section .rodata.cst16.bswap_mask, "aM", @progbits, 16 |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 21 | .align 16 |
| 22 | .Lbswap_mask: |
| 23 | .octa 0x000102030405060708090a0b0c0d0e0f |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 24 | |
| 25 | #define DATA %xmm0 |
| 26 | #define SHASH %xmm1 |
| 27 | #define T1 %xmm2 |
| 28 | #define T2 %xmm3 |
| 29 | #define T3 %xmm4 |
| 30 | #define BSWAP %xmm5 |
| 31 | #define IN1 %xmm6 |
| 32 | |
| 33 | .text |
| 34 | |
| 35 | /* |
| 36 | * __clmul_gf128mul_ble: internal ABI |
| 37 | * input: |
| 38 | * DATA: operand1 |
| 39 | * SHASH: operand2, hash_key << 1 mod poly |
| 40 | * output: |
| 41 | * DATA: operand1 * operand2 mod poly |
| 42 | * changed: |
| 43 | * T1 |
| 44 | * T2 |
| 45 | * T3 |
| 46 | */ |
Jiri Slaby | 74d8b90 | 2019-10-11 13:50:46 +0200 | [diff] [blame] | 47 | SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 48 | movaps DATA, T1 |
| 49 | pshufd $0b01001110, DATA, T2 |
| 50 | pshufd $0b01001110, SHASH, T3 |
| 51 | pxor DATA, T2 |
| 52 | pxor SHASH, T3 |
| 53 | |
Huang Ying | 564ec0e | 2009-11-23 19:55:22 +0800 | [diff] [blame] | 54 | PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 |
| 55 | PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 |
| 56 | PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 57 | pxor DATA, T2 |
| 58 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 |
| 59 | |
| 60 | movaps T2, T3 |
| 61 | pslldq $8, T3 |
| 62 | psrldq $8, T2 |
| 63 | pxor T3, DATA |
| 64 | pxor T2, T1 # <T1:DATA> is result of |
| 65 | # carry-less multiplication |
| 66 | |
| 67 | # first phase of the reduction |
| 68 | movaps DATA, T3 |
| 69 | psllq $1, T3 |
| 70 | pxor DATA, T3 |
| 71 | psllq $5, T3 |
| 72 | pxor DATA, T3 |
| 73 | psllq $57, T3 |
| 74 | movaps T3, T2 |
| 75 | pslldq $8, T2 |
| 76 | psrldq $8, T3 |
| 77 | pxor T2, DATA |
| 78 | pxor T3, T1 |
| 79 | |
| 80 | # second phase of the reduction |
| 81 | movaps DATA, T2 |
| 82 | psrlq $5, T2 |
| 83 | pxor DATA, T2 |
| 84 | psrlq $1, T2 |
| 85 | pxor DATA, T2 |
| 86 | psrlq $1, T2 |
| 87 | pxor T2, T1 |
| 88 | pxor T1, DATA |
| 89 | ret |
Jiri Slaby | 74d8b90 | 2019-10-11 13:50:46 +0200 | [diff] [blame] | 90 | SYM_FUNC_END(__clmul_gf128mul_ble) |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 91 | |
Herbert Xu | 0ea4814 | 2014-04-04 20:24:03 +0800 | [diff] [blame] | 92 | /* void clmul_ghash_mul(char *dst, const u128 *shash) */ |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 93 | SYM_FUNC_START(clmul_ghash_mul) |
Josh Poimboeuf | 8691ccd | 2016-01-21 16:49:19 -0600 | [diff] [blame] | 94 | FRAME_BEGIN |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 95 | movups (%rdi), DATA |
| 96 | movups (%rsi), SHASH |
| 97 | movaps .Lbswap_mask, BSWAP |
Huang Ying | 564ec0e | 2009-11-23 19:55:22 +0800 | [diff] [blame] | 98 | PSHUFB_XMM BSWAP DATA |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 99 | call __clmul_gf128mul_ble |
Huang Ying | 564ec0e | 2009-11-23 19:55:22 +0800 | [diff] [blame] | 100 | PSHUFB_XMM BSWAP DATA |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 101 | movups DATA, (%rdi) |
Josh Poimboeuf | 8691ccd | 2016-01-21 16:49:19 -0600 | [diff] [blame] | 102 | FRAME_END |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 103 | ret |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 104 | SYM_FUNC_END(clmul_ghash_mul) |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 105 | |
| 106 | /* |
| 107 | * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, |
Herbert Xu | 0ea4814 | 2014-04-04 20:24:03 +0800 | [diff] [blame] | 108 | * const u128 *shash); |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 109 | */ |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 110 | SYM_FUNC_START(clmul_ghash_update) |
Josh Poimboeuf | 8691ccd | 2016-01-21 16:49:19 -0600 | [diff] [blame] | 111 | FRAME_BEGIN |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 112 | cmp $16, %rdx |
| 113 | jb .Lupdate_just_ret # check length |
| 114 | movaps .Lbswap_mask, BSWAP |
| 115 | movups (%rdi), DATA |
| 116 | movups (%rcx), SHASH |
Huang Ying | 564ec0e | 2009-11-23 19:55:22 +0800 | [diff] [blame] | 117 | PSHUFB_XMM BSWAP DATA |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 118 | .align 4 |
| 119 | .Lupdate_loop: |
| 120 | movups (%rsi), IN1 |
Huang Ying | 564ec0e | 2009-11-23 19:55:22 +0800 | [diff] [blame] | 121 | PSHUFB_XMM BSWAP IN1 |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 122 | pxor IN1, DATA |
| 123 | call __clmul_gf128mul_ble |
| 124 | sub $16, %rdx |
| 125 | add $16, %rsi |
| 126 | cmp $16, %rdx |
| 127 | jge .Lupdate_loop |
Huang Ying | 564ec0e | 2009-11-23 19:55:22 +0800 | [diff] [blame] | 128 | PSHUFB_XMM BSWAP DATA |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 129 | movups DATA, (%rdi) |
| 130 | .Lupdate_just_ret: |
Josh Poimboeuf | 8691ccd | 2016-01-21 16:49:19 -0600 | [diff] [blame] | 131 | FRAME_END |
Huang Ying | 0e1227d | 2009-10-19 11:53:06 +0900 | [diff] [blame] | 132 | ret |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 133 | SYM_FUNC_END(clmul_ghash_update) |