mac80211: fix CCMP races

Since we can process multiple packets at the
same time for different ACs, but the PN is
allocated from a single counter, we need to
use an atomic value there. Use atomic64_t to
make this cheaper on 64-bit platforms, other
platforms will support this through software
emulation, see lib/atomic64.c.

We also need to use an on-stack scratch buf
so that multiple packets won't corrupt each
others scratch buffers.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 295ab74..3000b4c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -209,6 +209,7 @@
 	u8 seq[6] = {0};
 	struct key_params params;
 	struct ieee80211_key *key = NULL;
+	u64 pn64;
 	u32 iv32;
 	u16 iv16;
 	int err = -ENOENT;
@@ -256,12 +257,13 @@
 		params.seq_len = 6;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
-		seq[0] = key->u.ccmp.tx_pn[5];
-		seq[1] = key->u.ccmp.tx_pn[4];
-		seq[2] = key->u.ccmp.tx_pn[3];
-		seq[3] = key->u.ccmp.tx_pn[2];
-		seq[4] = key->u.ccmp.tx_pn[1];
-		seq[5] = key->u.ccmp.tx_pn[0];
+		pn64 = atomic64_read(&key->u.ccmp.tx_pn);
+		seq[0] = pn64;
+		seq[1] = pn64 >> 8;
+		seq[2] = pn64 >> 16;
+		seq[3] = pn64 >> 24;
+		seq[4] = pn64 >> 32;
+		seq[5] = pn64 >> 40;
 		params.seq = seq;
 		params.seq_len = 6;
 		break;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 33c58b8..4433760 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -79,6 +79,7 @@
 				size_t count, loff_t *ppos)
 {
 	const u8 *tpn;
+	u64 pn;
 	char buf[20];
 	int len;
 	struct ieee80211_key *key = file->private_data;
@@ -94,9 +95,10 @@
 				key->u.tkip.tx.iv16);
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
-		tpn = key->u.ccmp.tx_pn;
+		pn = atomic64_read(&key->u.ccmp.tx_pn);
 		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
-				tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
+				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
+				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		tpn = key->u.aes_cmac.tx_pn;
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 1493c3e..05ce4c0 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -82,7 +82,7 @@
 			struct tkip_ctx rx[NUM_RX_DATA_QUEUES];
 		} tkip;
 		struct {
-			u8 tx_pn[6];
+			atomic64_t tx_pn;
 			/*
 			 * Last received packet number. The first
 			 * NUM_RX_DATA_QUEUES counters are used with Data
@@ -92,12 +92,9 @@
 			u8 rx_pn[NUM_RX_DATA_QUEUES + 1][6];
 			struct crypto_cipher *tfm;
 			u32 replays; /* dot11RSNAStatsCCMPReplays */
-			/* scratch buffers for virt_to_page() (crypto API) */
 #ifndef AES_BLOCK_LEN
 #define AES_BLOCK_LEN 16
 #endif
-			u8 tx_crypto_buf[6 * AES_BLOCK_LEN];
-			u8 rx_crypto_buf[6 * AES_BLOCK_LEN];
 		} ccmp;
 		struct {
 			u8 tx_pn[6];
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 4ded2ae..7691e4e 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -15,6 +15,7 @@
 #include <linux/gfp.h>
 #include <asm/unaligned.h>
 #include <net/mac80211.h>
+#include <crypto/aes.h>
 
 #include "ieee80211_i.h"
 #include "michael.h"
@@ -290,6 +291,8 @@
 	unsigned int hdrlen;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
+	memset(scratch, 0, 6 * AES_BLOCK_LEN);
+
 	b_0 = scratch + 3 * AES_BLOCK_LEN;
 	aad = scratch + 4 * AES_BLOCK_LEN;
 
@@ -380,8 +383,10 @@
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, len, tail;
-	u8 *pos, *pn;
-	int i;
+	u8 *pos;
+	u8 pn[6];
+	u64 pn64;
+	u8 scratch[6 * AES_BLOCK_LEN];
 
 	if (info->control.hw_key &&
 	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
@@ -409,14 +414,14 @@
 	hdr = (struct ieee80211_hdr *) pos;
 	pos += hdrlen;
 
-	/* PN = PN + 1 */
-	pn = key->u.ccmp.tx_pn;
+	pn64 = atomic64_inc_return(&key->u.ccmp.tx_pn);
 
-	for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
-		pn[i]++;
-		if (pn[i])
-			break;
-	}
+	pn[5] = pn64;
+	pn[4] = pn64 >> 8;
+	pn[3] = pn64 >> 16;
+	pn[2] = pn64 >> 24;
+	pn[1] = pn64 >> 32;
+	pn[0] = pn64 >> 40;
 
 	ccmp_pn2hdr(pos, pn, key->conf.keyidx);
 
@@ -425,8 +430,8 @@
 		return 0;
 
 	pos += CCMP_HDR_LEN;
-	ccmp_special_blocks(skb, pn, key->u.ccmp.tx_crypto_buf, 0);
-	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, key->u.ccmp.tx_crypto_buf, pos, len,
+	ccmp_special_blocks(skb, pn, scratch, 0);
+	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len,
 				  pos, skb_put(skb, CCMP_MIC_LEN));
 
 	return 0;
@@ -482,11 +487,12 @@
 	}
 
 	if (!(status->flag & RX_FLAG_DECRYPTED)) {
+		u8 scratch[6 * AES_BLOCK_LEN];
 		/* hardware didn't decrypt/verify MIC */
-		ccmp_special_blocks(skb, pn, key->u.ccmp.rx_crypto_buf, 1);
+		ccmp_special_blocks(skb, pn, scratch, 1);
 
 		if (ieee80211_aes_ccm_decrypt(
-			    key->u.ccmp.tfm, key->u.ccmp.rx_crypto_buf,
+			    key->u.ccmp.tfm, scratch,
 			    skb->data + hdrlen + CCMP_HDR_LEN, data_len,
 			    skb->data + skb->len - CCMP_MIC_LEN,
 			    skb->data + hdrlen + CCMP_HDR_LEN))