mac80211: enable collecting station statistics per-CPU
If the driver advertises the new HW flag USE_RSS, make the
station statistics on the fast-rx path per-CPU. This will
enable calling the RX in parallel, only hitting locking or
shared cachelines when the fast-RX path isn't available.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a0ce7e4..cf2aca0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -254,6 +254,7 @@
#ifdef CONFIG_MAC80211_MESH
kfree(sta->mesh);
#endif
+ free_percpu(sta->pcpu_rx_stats);
kfree(sta);
}
@@ -311,6 +312,13 @@
if (!sta)
return NULL;
+ if (ieee80211_hw_check(hw, USES_RSS)) {
+ sta->pcpu_rx_stats =
+ alloc_percpu(struct ieee80211_sta_rx_stats);
+ if (!sta->pcpu_rx_stats)
+ goto free;
+ }
+
spin_lock_init(&sta->lock);
spin_lock_init(&sta->ps_lock);
INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
@@ -1932,6 +1940,28 @@
>> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
}
+static struct ieee80211_sta_rx_stats *
+sta_get_last_rx_stats(struct sta_info *sta)
+{
+ struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
+ struct ieee80211_local *local = sta->local;
+ int cpu;
+
+ if (!ieee80211_hw_check(&local->hw, USES_RSS))
+ return stats;
+
+ for_each_possible_cpu(cpu) {
+ struct ieee80211_sta_rx_stats *cpustats;
+
+ cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+
+ if (time_after(cpustats->last_rx, stats->last_rx))
+ stats = cpustats;
+ }
+
+ return stats;
+}
+
static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
struct rate_info *rinfo)
{
@@ -1967,7 +1997,7 @@
static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
- u16 rate = ACCESS_ONCE(sta->rx_stats.last_rate);
+ u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
rinfo->flags = 0;
@@ -2010,13 +2040,29 @@
}
}
+static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats)
+{
+ unsigned int start;
+ u64 value;
+
+ do {
+ start = u64_stats_fetch_begin(&rxstats->syncp);
+ value = rxstats->bytes;
+ } while (u64_stats_fetch_retry(&rxstats->syncp, start));
+
+ return value;
+}
+
void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct rate_control_ref *ref = NULL;
u32 thr = 0;
- int i, ac;
+ int i, ac, cpu;
+ struct ieee80211_sta_rx_stats *last_rxstats;
+
+ last_rxstats = sta_get_last_rx_stats(sta);
if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
ref = local->rate_ctrl;
@@ -2064,17 +2110,30 @@
if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
BIT(NL80211_STA_INFO_RX_BYTES)))) {
- unsigned int start;
+ sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
- do {
- start = u64_stats_fetch_begin(&sta->rx_stats.syncp);
- sinfo->rx_bytes = sta->rx_stats.bytes;
- } while (u64_stats_fetch_retry(&sta->rx_stats.syncp, start));
+ if (sta->pcpu_rx_stats) {
+ for_each_possible_cpu(cpu) {
+ struct ieee80211_sta_rx_stats *cpurxs;
+
+ cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ sinfo->rx_bytes += sta_get_stats_bytes(cpurxs);
+ }
+ }
+
sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
}
if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
sinfo->rx_packets = sta->rx_stats.packets;
+ if (sta->pcpu_rx_stats) {
+ for_each_possible_cpu(cpu) {
+ struct ieee80211_sta_rx_stats *cpurxs;
+
+ cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ sinfo->rx_packets += cpurxs->packets;
+ }
+ }
sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
}
@@ -2089,6 +2148,14 @@
}
sinfo->rx_dropped_misc = sta->rx_stats.dropped;
+ if (sta->pcpu_rx_stats) {
+ for_each_possible_cpu(cpu) {
+ struct ieee80211_sta_rx_stats *cpurxs;
+
+ cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ sinfo->rx_packets += cpurxs->dropped;
+ }
+ }
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
!(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
@@ -2100,27 +2167,34 @@
if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
- sinfo->signal = (s8)sta->rx_stats.last_signal;
+ sinfo->signal = (s8)last_rxstats->last_signal;
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
}
- if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
+ if (!sta->pcpu_rx_stats &&
+ !(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
sinfo->signal_avg =
-ewma_signal_read(&sta->rx_stats_avg.signal);
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
}
}
- if (sta->rx_stats.chains &&
+ /* for the average - if pcpu_rx_stats isn't set - rxstats must point to
+ * the sta->rx_stats struct, so the check here is fine with and without
+ * pcpu statistics
+ */
+ if (last_rxstats->chains &&
!(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
- sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
- BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
+ sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL);
+ if (!sta->pcpu_rx_stats)
+ sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
- sinfo->chains = sta->rx_stats.chains;
+ sinfo->chains = last_rxstats->chains;
+
for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
sinfo->chain_signal[i] =
- sta->rx_stats.chain_signal_last[i];
+ last_rxstats->chain_signal_last[i];
sinfo->chain_signal_avg[i] =
-ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]);
}
@@ -2213,7 +2287,9 @@
unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{
- if (time_after(sta->rx_stats.last_rx, sta->status_stats.last_ack))
- return sta->rx_stats.last_rx;
+ struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
+
+ if (time_after(stats->last_rx, sta->status_stats.last_ack))
+ return stats->last_rx;
return sta->status_stats.last_ack;
}