mac80211: allow checksum offload only in fast-xmit

When we go through the complete TX processing, there are a number
of things like fragmentation and software crypto that require the
checksum to be calculated already.

In favour of maintainability, instead of adding the necessary call
to skb_checksum_help() in all the places that need it, just do it
once before the regular TX processing.

Right now this only affects the TI wlcore and QCA ath10k drivers
since they're the only ones using checksum offload. The previous
commits enabled fast-xmit for them in almost all cases.

For wlcore this even fixes a corner case: when a key fails to be
programmed to hardware software encryption gets used, encrypting
frames with a bad checksum.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 49ba43e..20a90b1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2851,10 +2851,8 @@
 
 	rcu_read_lock();
 
-	if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) {
-		kfree_skb(skb);
-		goto out;
-	}
+	if (ieee80211_lookup_ra_sta(sdata, skb, &sta))
+		goto out_free;
 
 	if (!IS_ERR_OR_NULL(sta)) {
 		struct ieee80211_fast_tx *fast_tx;
@@ -2866,6 +2864,21 @@
 			goto out;
 	}
 
+	/* the frame could be fragmented, software-encrypted, and other things
+	 * so we cannot really handle checksum offload with it - fix it up in
+	 * software before we handle anything else.
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb->encapsulation)
+			skb_set_inner_transport_header(skb,
+						       skb_checksum_start_offset(skb));
+		else
+			skb_set_transport_header(skb,
+						 skb_checksum_start_offset(skb));
+		if (skb_checksum_help(skb))
+			goto out_free;
+	}
+
 	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
 	if (IS_ERR(skb))
 		goto out;
@@ -2875,6 +2888,9 @@
 	dev->trans_start = jiffies;
 
 	ieee80211_xmit(sdata, sta, skb);
+	goto out;
+ out_free:
+	kfree_skb(skb);
  out:
 	rcu_read_unlock();
 }