Thomas Gleixner | caab277 | 2019-06-03 07:44:50 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
Marc Zyngier | cfc5180 | 2012-11-12 13:24:27 +0000 | [diff] [blame] | 2 | /* |
| 3 | * FP/SIMD state saving and restoring macros |
| 4 | * |
| 5 | * Copyright (C) 2012 ARM Ltd. |
| 6 | * Author: Catalin Marinas <catalin.marinas@arm.com> |
Marc Zyngier | cfc5180 | 2012-11-12 13:24:27 +0000 | [diff] [blame] | 7 | */ |
| 8 | |
Marc Zyngier | f5e060d | 2021-03-11 11:52:38 +0000 | [diff] [blame] | 9 | #include <asm/assembler.h> |
| 10 | |
Marc Zyngier | cfc5180 | 2012-11-12 13:24:27 +0000 | [diff] [blame] | 11 | .macro fpsimd_save state, tmpnr |
| 12 | stp q0, q1, [\state, #16 * 0] |
| 13 | stp q2, q3, [\state, #16 * 2] |
| 14 | stp q4, q5, [\state, #16 * 4] |
| 15 | stp q6, q7, [\state, #16 * 6] |
| 16 | stp q8, q9, [\state, #16 * 8] |
| 17 | stp q10, q11, [\state, #16 * 10] |
| 18 | stp q12, q13, [\state, #16 * 12] |
| 19 | stp q14, q15, [\state, #16 * 14] |
| 20 | stp q16, q17, [\state, #16 * 16] |
| 21 | stp q18, q19, [\state, #16 * 18] |
| 22 | stp q20, q21, [\state, #16 * 20] |
| 23 | stp q22, q23, [\state, #16 * 22] |
| 24 | stp q24, q25, [\state, #16 * 24] |
| 25 | stp q26, q27, [\state, #16 * 26] |
| 26 | stp q28, q29, [\state, #16 * 28] |
| 27 | stp q30, q31, [\state, #16 * 30]! |
| 28 | mrs x\tmpnr, fpsr |
| 29 | str w\tmpnr, [\state, #16 * 2] |
| 30 | mrs x\tmpnr, fpcr |
| 31 | str w\tmpnr, [\state, #16 * 2 + 4] |
| 32 | .endm |
| 33 | |
Will Deacon | 5959e25 | 2014-07-10 12:40:09 +0100 | [diff] [blame] | 34 | .macro fpsimd_restore_fpcr state, tmp |
| 35 | /* |
| 36 | * Writes to fpcr may be self-synchronising, so avoid restoring |
| 37 | * the register if it hasn't changed. |
| 38 | */ |
| 39 | mrs \tmp, fpcr |
| 40 | cmp \tmp, \state |
| 41 | b.eq 9999f |
| 42 | msr fpcr, \state |
| 43 | 9999: |
| 44 | .endm |
| 45 | |
| 46 | /* Clobbers \state */ |
Marc Zyngier | cfc5180 | 2012-11-12 13:24:27 +0000 | [diff] [blame] | 47 | .macro fpsimd_restore state, tmpnr |
| 48 | ldp q0, q1, [\state, #16 * 0] |
| 49 | ldp q2, q3, [\state, #16 * 2] |
| 50 | ldp q4, q5, [\state, #16 * 4] |
| 51 | ldp q6, q7, [\state, #16 * 6] |
| 52 | ldp q8, q9, [\state, #16 * 8] |
| 53 | ldp q10, q11, [\state, #16 * 10] |
| 54 | ldp q12, q13, [\state, #16 * 12] |
| 55 | ldp q14, q15, [\state, #16 * 14] |
| 56 | ldp q16, q17, [\state, #16 * 16] |
| 57 | ldp q18, q19, [\state, #16 * 18] |
| 58 | ldp q20, q21, [\state, #16 * 20] |
| 59 | ldp q22, q23, [\state, #16 * 22] |
| 60 | ldp q24, q25, [\state, #16 * 24] |
| 61 | ldp q26, q27, [\state, #16 * 26] |
| 62 | ldp q28, q29, [\state, #16 * 28] |
| 63 | ldp q30, q31, [\state, #16 * 30]! |
| 64 | ldr w\tmpnr, [\state, #16 * 2] |
| 65 | msr fpsr, x\tmpnr |
| 66 | ldr w\tmpnr, [\state, #16 * 2 + 4] |
Will Deacon | 5959e25 | 2014-07-10 12:40:09 +0100 | [diff] [blame] | 67 | fpsimd_restore_fpcr x\tmpnr, \state |
Marc Zyngier | cfc5180 | 2012-11-12 13:24:27 +0000 | [diff] [blame] | 68 | .endm |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 69 | |
| 70 | /* Sanity-check macros to help avoid encoding garbage instructions */ |
| 71 | |
| 72 | .macro _check_general_reg nr |
| 73 | .if (\nr) < 0 || (\nr) > 30 |
| 74 | .error "Bad register number \nr." |
| 75 | .endif |
| 76 | .endm |
| 77 | |
| 78 | .macro _sve_check_zreg znr |
| 79 | .if (\znr) < 0 || (\znr) > 31 |
| 80 | .error "Bad Scalable Vector Extension vector register number \znr." |
| 81 | .endif |
| 82 | .endm |
| 83 | |
| 84 | .macro _sve_check_preg pnr |
| 85 | .if (\pnr) < 0 || (\pnr) > 15 |
| 86 | .error "Bad Scalable Vector Extension predicate register number \pnr." |
| 87 | .endif |
| 88 | .endm |
| 89 | |
| 90 | .macro _check_num n, min, max |
| 91 | .if (\n) < (\min) || (\n) > (\max) |
| 92 | .error "Number \n out of range [\min,\max]" |
| 93 | .endif |
| 94 | .endm |
| 95 | |
| 96 | /* SVE instruction encodings for non-SVE-capable assemblers */ |
| 97 | |
| 98 | /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */ |
| 99 | .macro _sve_str_v nz, nxbase, offset=0 |
| 100 | _sve_check_zreg \nz |
| 101 | _check_general_reg \nxbase |
| 102 | _check_num (\offset), -0x100, 0xff |
| 103 | .inst 0xe5804000 \ |
| 104 | | (\nz) \ |
| 105 | | ((\nxbase) << 5) \ |
| 106 | | (((\offset) & 7) << 10) \ |
| 107 | | (((\offset) & 0x1f8) << 13) |
| 108 | .endm |
| 109 | |
| 110 | /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */ |
| 111 | .macro _sve_ldr_v nz, nxbase, offset=0 |
| 112 | _sve_check_zreg \nz |
| 113 | _check_general_reg \nxbase |
| 114 | _check_num (\offset), -0x100, 0xff |
| 115 | .inst 0x85804000 \ |
| 116 | | (\nz) \ |
| 117 | | ((\nxbase) << 5) \ |
| 118 | | (((\offset) & 7) << 10) \ |
| 119 | | (((\offset) & 0x1f8) << 13) |
| 120 | .endm |
| 121 | |
| 122 | /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */ |
| 123 | .macro _sve_str_p np, nxbase, offset=0 |
| 124 | _sve_check_preg \np |
| 125 | _check_general_reg \nxbase |
| 126 | _check_num (\offset), -0x100, 0xff |
| 127 | .inst 0xe5800000 \ |
| 128 | | (\np) \ |
| 129 | | ((\nxbase) << 5) \ |
| 130 | | (((\offset) & 7) << 10) \ |
| 131 | | (((\offset) & 0x1f8) << 13) |
| 132 | .endm |
| 133 | |
| 134 | /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */ |
| 135 | .macro _sve_ldr_p np, nxbase, offset=0 |
| 136 | _sve_check_preg \np |
| 137 | _check_general_reg \nxbase |
| 138 | _check_num (\offset), -0x100, 0xff |
| 139 | .inst 0x85800000 \ |
| 140 | | (\np) \ |
| 141 | | ((\nxbase) << 5) \ |
| 142 | | (((\offset) & 7) << 10) \ |
| 143 | | (((\offset) & 0x1f8) << 13) |
| 144 | .endm |
| 145 | |
| 146 | /* RDVL X\nx, #\imm */ |
| 147 | .macro _sve_rdvl nx, imm |
| 148 | _check_general_reg \nx |
| 149 | _check_num (\imm), -0x20, 0x1f |
| 150 | .inst 0x04bf5000 \ |
| 151 | | (\nx) \ |
| 152 | | (((\imm) & 0x3f) << 5) |
| 153 | .endm |
| 154 | |
| 155 | /* RDFFR (unpredicated): RDFFR P\np.B */ |
| 156 | .macro _sve_rdffr np |
| 157 | _sve_check_preg \np |
| 158 | .inst 0x2519f000 \ |
| 159 | | (\np) |
| 160 | .endm |
| 161 | |
| 162 | /* WRFFR P\np.B */ |
| 163 | .macro _sve_wrffr np |
| 164 | _sve_check_preg \np |
| 165 | .inst 0x25289000 \ |
| 166 | | ((\np) << 5) |
| 167 | .endm |
| 168 | |
Julien Grall | 1e530f1 | 2020-08-28 19:11:52 +0100 | [diff] [blame] | 169 | /* PFALSE P\np.B */ |
| 170 | .macro _sve_pfalse np |
| 171 | _sve_check_preg \np |
| 172 | .inst 0x2518e400 \ |
| 173 | | (\np) |
| 174 | .endm |
| 175 | |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 176 | .macro __for from:req, to:req |
| 177 | .if (\from) == (\to) |
Julien Grall | 6d40f05 | 2020-08-28 19:11:50 +0100 | [diff] [blame] | 178 | _for__body %\from |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 179 | .else |
Julien Grall | 6d40f05 | 2020-08-28 19:11:50 +0100 | [diff] [blame] | 180 | __for %\from, %((\from) + ((\to) - (\from)) / 2) |
| 181 | __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 182 | .endif |
| 183 | .endm |
| 184 | |
| 185 | .macro _for var:req, from:req, to:req, insn:vararg |
| 186 | .macro _for__body \var:req |
Julien Grall | 6d40f05 | 2020-08-28 19:11:50 +0100 | [diff] [blame] | 187 | .noaltmacro |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 188 | \insn |
Julien Grall | 6d40f05 | 2020-08-28 19:11:50 +0100 | [diff] [blame] | 189 | .altmacro |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 190 | .endm |
| 191 | |
Julien Grall | 6d40f05 | 2020-08-28 19:11:50 +0100 | [diff] [blame] | 192 | .altmacro |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 193 | __for \from, \to |
Julien Grall | 6d40f05 | 2020-08-28 19:11:50 +0100 | [diff] [blame] | 194 | .noaltmacro |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 195 | |
| 196 | .purgem _for__body |
| 197 | .endm |
| 198 | |
Julien Grall | 315cf04 | 2020-08-28 19:11:51 +0100 | [diff] [blame] | 199 | /* Update ZCR_EL1.LEN with the new VQ */ |
| 200 | .macro sve_load_vq xvqminus1, xtmp, xtmp2 |
| 201 | mrs_s \xtmp, SYS_ZCR_EL1 |
| 202 | bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK |
| 203 | orr \xtmp2, \xtmp2, \xvqminus1 |
| 204 | cmp \xtmp2, \xtmp |
| 205 | b.eq 921f |
| 206 | msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising |
| 207 | 921: |
| 208 | .endm |
| 209 | |
Julien Grall | 1e530f1 | 2020-08-28 19:11:52 +0100 | [diff] [blame] | 210 | /* Preserve the first 128-bits of Znz and zero the rest. */ |
| 211 | .macro _sve_flush_z nz |
| 212 | _sve_check_zreg \nz |
| 213 | mov v\nz\().16b, v\nz\().16b |
| 214 | .endm |
| 215 | |
| 216 | .macro sve_flush |
| 217 | _for n, 0, 31, _sve_flush_z \n |
| 218 | _for n, 0, 15, _sve_pfalse \n |
| 219 | _sve_wrffr 0 |
| 220 | .endm |
| 221 | |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 222 | .macro sve_save nxbase, xpfpsr, nxtmp |
| 223 | _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 |
| 224 | _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 |
| 225 | _sve_rdffr 0 |
| 226 | _sve_str_p 0, \nxbase |
| 227 | _sve_ldr_p 0, \nxbase, -16 |
| 228 | |
| 229 | mrs x\nxtmp, fpsr |
| 230 | str w\nxtmp, [\xpfpsr] |
| 231 | mrs x\nxtmp, fpcr |
| 232 | str w\nxtmp, [\xpfpsr, #4] |
| 233 | .endm |
| 234 | |
Marc Zyngier | 1105b4d | 2021-03-16 08:52:40 +0000 | [diff] [blame] | 235 | .macro __sve_load nxbase, xpfpsr, nxtmp |
Dave Martin | 1fc5dce | 2017-10-31 15:51:01 +0000 | [diff] [blame] | 236 | _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 |
| 237 | _sve_ldr_p 0, \nxbase |
| 238 | _sve_wrffr 0 |
| 239 | _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 |
| 240 | |
| 241 | ldr w\nxtmp, [\xpfpsr] |
| 242 | msr fpsr, x\nxtmp |
| 243 | ldr w\nxtmp, [\xpfpsr, #4] |
| 244 | msr fpcr, x\nxtmp |
| 245 | .endm |
Marc Zyngier | 1105b4d | 2021-03-16 08:52:40 +0000 | [diff] [blame] | 246 | |
| 247 | .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 |
| 248 | sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 |
| 249 | __sve_load \nxbase, \xpfpsr, \nxtmp |
| 250 | .endm |