crypto: camellia - use unaligned accessors instead of alignmask

Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the
Camellia input and output blocks, which propagates to mode drivers, and
results in pointless copying on architectures that don't care about
alignment, use the unaligned accessors, which will do the right thing on
each respective architecture, avoiding the need for double buffering.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index 0b9f409..fd1a88a 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -9,14 +9,6 @@
  *  https://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
  */
 
-/*
- *
- * NOTE --- NOTE --- NOTE --- NOTE
- * This implementation assumes that all memory addresses passed
- * as parameters are four-byte aligned.
- *
- */
-
 #include <linux/crypto.h>
 #include <linux/errno.h>
 #include <linux/init.h>
@@ -994,16 +986,14 @@ camellia_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 static void camellia_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct camellia_ctx *cctx = crypto_tfm_ctx(tfm);
-	const __be32 *src = (const __be32 *)in;
-	__be32 *dst = (__be32 *)out;
 	unsigned int max;
 
 	u32 tmp[4];
 
-	tmp[0] = be32_to_cpu(src[0]);
-	tmp[1] = be32_to_cpu(src[1]);
-	tmp[2] = be32_to_cpu(src[2]);
-	tmp[3] = be32_to_cpu(src[3]);
+	tmp[0] = get_unaligned_be32(in);
+	tmp[1] = get_unaligned_be32(in + 4);
+	tmp[2] = get_unaligned_be32(in + 8);
+	tmp[3] = get_unaligned_be32(in + 12);
 
 	if (cctx->key_length == 16)
 		max = 24;
@@ -1013,25 +1003,23 @@ static void camellia_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	camellia_do_encrypt(cctx->key_table, tmp, max);
 
 	/* do_encrypt returns 0,1 swapped with 2,3 */
-	dst[0] = cpu_to_be32(tmp[2]);
-	dst[1] = cpu_to_be32(tmp[3]);
-	dst[2] = cpu_to_be32(tmp[0]);
-	dst[3] = cpu_to_be32(tmp[1]);
+	put_unaligned_be32(tmp[2], out);
+	put_unaligned_be32(tmp[3], out + 4);
+	put_unaligned_be32(tmp[0], out + 8);
+	put_unaligned_be32(tmp[1], out + 12);
 }
 
 static void camellia_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct camellia_ctx *cctx = crypto_tfm_ctx(tfm);
-	const __be32 *src = (const __be32 *)in;
-	__be32 *dst = (__be32 *)out;
 	unsigned int max;
 
 	u32 tmp[4];
 
-	tmp[0] = be32_to_cpu(src[0]);
-	tmp[1] = be32_to_cpu(src[1]);
-	tmp[2] = be32_to_cpu(src[2]);
-	tmp[3] = be32_to_cpu(src[3]);
+	tmp[0] = get_unaligned_be32(in);
+	tmp[1] = get_unaligned_be32(in + 4);
+	tmp[2] = get_unaligned_be32(in + 8);
+	tmp[3] = get_unaligned_be32(in + 12);
 
 	if (cctx->key_length == 16)
 		max = 24;
@@ -1041,10 +1029,10 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	camellia_do_decrypt(cctx->key_table, tmp, max);
 
 	/* do_decrypt returns 0,1 swapped with 2,3 */
-	dst[0] = cpu_to_be32(tmp[2]);
-	dst[1] = cpu_to_be32(tmp[3]);
-	dst[2] = cpu_to_be32(tmp[0]);
-	dst[3] = cpu_to_be32(tmp[1]);
+	put_unaligned_be32(tmp[2], out);
+	put_unaligned_be32(tmp[3], out + 4);
+	put_unaligned_be32(tmp[0], out + 8);
+	put_unaligned_be32(tmp[1], out + 12);
 }
 
 static struct crypto_alg camellia_alg = {
@@ -1054,7 +1042,6 @@ static struct crypto_alg camellia_alg = {
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct camellia_ctx),
-	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
 	.cra_u			=	{
 		.cipher = {