crypto: aesni-intel - Use gas macro for AES-NI instructions

Old binutils do not support AES-NI instructions, to make kernel can be
compiled by them, .byte code is used instead of AES-NI assembly
instructions. But the readability and flexibility of raw .byte code is
not good.

So corresponding assembly instruction like gas macro is used instead.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index eb0566e..20bb0e1a 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -16,6 +16,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/inst.h>
 
 .text
 
@@ -122,103 +123,72 @@
 	movups 0x10(%rsi), %xmm2	# other user key
 	movaps %xmm2, (%rcx)
 	add $0x10, %rcx
-	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
 	call _key_expansion_256a
-	# aeskeygenassist $0x1, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+	AESKEYGENASSIST 0x1 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
 	call _key_expansion_256a
-	# aeskeygenassist $0x2, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+	AESKEYGENASSIST 0x2 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
 	call _key_expansion_256a
-	# aeskeygenassist $0x4, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+	AESKEYGENASSIST 0x4 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
 	call _key_expansion_256a
-	# aeskeygenassist $0x8, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+	AESKEYGENASSIST 0x8 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
 	call _key_expansion_256a
-	# aeskeygenassist $0x10, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+	AESKEYGENASSIST 0x10 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
 	call _key_expansion_256a
-	# aeskeygenassist $0x20, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+	AESKEYGENASSIST 0x20 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
 	call _key_expansion_256a
 	jmp .Ldec_key
 .Lenc_key192:
 	movq 0x10(%rsi), %xmm2		# other user key
-	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
 	call _key_expansion_192a
-	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
 	call _key_expansion_192b
-	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
 	call _key_expansion_192a
-	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
 	call _key_expansion_192b
-	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
 	call _key_expansion_192a
-	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
 	call _key_expansion_192b
-	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
 	call _key_expansion_192a
-	# aeskeygenassist $0x80, %xmm2, %xmm1	# round 8
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
+	AESKEYGENASSIST 0x80 %xmm2 %xmm1	# round 8
 	call _key_expansion_192b
 	jmp .Ldec_key
 .Lenc_key128:
-	# aeskeygenassist $0x1, %xmm0, %xmm1	# round 1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+	AESKEYGENASSIST 0x1 %xmm0 %xmm1		# round 1
 	call _key_expansion_128
-	# aeskeygenassist $0x2, %xmm0, %xmm1	# round 2
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+	AESKEYGENASSIST 0x2 %xmm0 %xmm1		# round 2
 	call _key_expansion_128
-	# aeskeygenassist $0x4, %xmm0, %xmm1	# round 3
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+	AESKEYGENASSIST 0x4 %xmm0 %xmm1		# round 3
 	call _key_expansion_128
-	# aeskeygenassist $0x8, %xmm0, %xmm1	# round 4
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+	AESKEYGENASSIST 0x8 %xmm0 %xmm1		# round 4
 	call _key_expansion_128
-	# aeskeygenassist $0x10, %xmm0, %xmm1	# round 5
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+	AESKEYGENASSIST 0x10 %xmm0 %xmm1	# round 5
 	call _key_expansion_128
-	# aeskeygenassist $0x20, %xmm0, %xmm1	# round 6
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+	AESKEYGENASSIST 0x20 %xmm0 %xmm1	# round 6
 	call _key_expansion_128
-	# aeskeygenassist $0x40, %xmm0, %xmm1	# round 7
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
+	AESKEYGENASSIST 0x40 %xmm0 %xmm1	# round 7
 	call _key_expansion_128
-	# aeskeygenassist $0x80, %xmm0, %xmm1	# round 8
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
+	AESKEYGENASSIST 0x80 %xmm0 %xmm1	# round 8
 	call _key_expansion_128
-	# aeskeygenassist $0x1b, %xmm0, %xmm1	# round 9
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
+	AESKEYGENASSIST 0x1b %xmm0 %xmm1	# round 9
 	call _key_expansion_128
-	# aeskeygenassist $0x36, %xmm0, %xmm1	# round 10
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
+	AESKEYGENASSIST 0x36 %xmm0 %xmm1	# round 10
 	call _key_expansion_128
 .Ldec_key:
 	sub $0x10, %rcx
@@ -231,8 +201,7 @@
 .align 4
 .Ldec_key_loop:
 	movaps (%rdi), %xmm0
-	# aesimc %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
+	AESIMC %xmm0 %xmm1
 	movaps %xmm1, (%rsi)
 	add $0x10, %rdi
 	sub $0x10, %rsi
@@ -274,51 +243,37 @@
 	je .Lenc192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps -0x50(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 .align 4
 .Lenc192:
 	movaps -0x40(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps -0x30(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 .align 4
 .Lenc128:
 	movaps -0x20(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps -0x10(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps (TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x10(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x20(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x30(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x40(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x50(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x60(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x70(TKEYP), KEY
-	# aesenclast KEY, STATE	# last round
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+	AESENCLAST KEY STATE
 	ret
 
 /*
@@ -353,135 +308,79 @@
 	je .L4enc192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps -0x50(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 #.align 4
 .L4enc192:
 	movaps -0x40(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps -0x30(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 #.align 4
 .L4enc128:
 	movaps -0x20(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps -0x10(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps (TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x10(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x20(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x30(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x40(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x50(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x60(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x70(TKEYP), KEY
-	# aesenclast KEY, STATE1	# last round
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
-	# aesenclast KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
-	# aesenclast KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xea
-	# aesenclast KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
+	AESENCLAST KEY STATE1		# last round
+	AESENCLAST KEY STATE2
+	AESENCLAST KEY STATE3
+	AESENCLAST KEY STATE4
 	ret
 
 /*
@@ -518,51 +417,37 @@
 	je .Ldec192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps -0x50(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 .align 4
 .Ldec192:
 	movaps -0x40(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps -0x30(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 .align 4
 .Ldec128:
 	movaps -0x20(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps -0x10(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps (TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x10(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x20(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x30(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x40(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x50(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x60(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x70(TKEYP), KEY
-	# aesdeclast KEY, STATE		# last round
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+	AESDECLAST KEY STATE
 	ret
 
 /*
@@ -597,135 +482,79 @@
 	je .L4dec192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps -0x50(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 .align 4
 .L4dec192:
 	movaps -0x40(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps -0x30(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 .align 4
 .L4dec128:
 	movaps -0x20(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps -0x10(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps (TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x10(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x20(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x30(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x40(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x50(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x60(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x70(TKEYP), KEY
-	# aesdeclast KEY, STATE1	# last round
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
-	# aesdeclast KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
-	# aesdeclast KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xea
-	# aesdeclast KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
+	AESDECLAST KEY STATE1		# last round
+	AESDECLAST KEY STATE2
+	AESDECLAST KEY STATE3
+	AESDECLAST KEY STATE4
 	ret
 
 /*