Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[firefly-linux-kernel-4.4.55.git] / net / mac80211 / agg-tx.c
index c163d0a149f49be05b07fa879562f2d821004beb..c893f236acea771076b5572b42c82162c3684913 100644 (file)
@@ -6,7 +6,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
- * Copyright 2007-2009, Intel Corporation
+ * Copyright 2007-2010, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 #include "wme.h"
 
 /**
- * DOC: TX aggregation
+ * DOC: TX A-MPDU aggregation
  *
  * Aggregation on the TX side requires setting the hardware flag
- * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues
- * hardware parameter to the number of hardware AMPDU queues. If there are no
- * hardware queues then the driver will (currently) have to do all frame
- * buffering.
+ * %IEEE80211_HW_AMPDU_AGGREGATION. The driver will then be handed
+ * packets with a flag indicating A-MPDU aggregation. The driver
+ * or device is responsible for actually aggregating the frames,
+ * as well as deciding how many and which to aggregate.
  *
- * When TX aggregation is started by some subsystem (usually the rate control
- * algorithm would be appropriate) by calling the
- * ieee80211_start_tx_ba_session() function, the driver will be notified via
- * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action.
+ * When TX aggregation is started by some subsystem (usually the rate
+ * control algorithm would be appropriate) by calling the
+ * ieee80211_start_tx_ba_session() function, the driver will be
+ * notified via its @ampdu_action function, with the
+ * %IEEE80211_AMPDU_TX_START action.
  *
  * In response to that, the driver is later required to call the
- * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe())
- * function, which will start the aggregation session.
+ * ieee80211_start_tx_ba_cb_irqsafe() function, which will really
+ * start the aggregation session after the peer has also responded.
+ * If the peer responds negatively, the session will be stopped
+ * again right away. Note that it is possible for the aggregation
+ * session to be stopped before the driver has indicated that it
+ * is done setting it up, in which case it must not indicate the
+ * setup completion.
  *
- * Similarly, when the aggregation session is stopped by
- * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will
- * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the
- * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb()
- * (or ieee80211_stop_tx_ba_cb_irqsafe()).
+ * Also note that, since we also need to wait for a response from
+ * the peer, the driver is notified of the completion of the
+ * handshake by the %IEEE80211_AMPDU_TX_OPERATIONAL action to the
+ * @ampdu_action callback.
+ *
+ * Similarly, when the aggregation session is stopped by the peer
+ * or something calling ieee80211_stop_tx_ba_session(), the driver's
+ * @ampdu_action function will be called with the action
+ * %IEEE80211_AMPDU_TX_STOP. In this case, the call must not fail,
+ * and the driver must later call ieee80211_stop_tx_ba_cb_irqsafe().
  */
 
 static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
@@ -125,25 +136,53 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
        ieee80211_tx_skb(sdata, skb);
 }
 
+static void kfree_tid_tx(struct rcu_head *rcu_head)
+{
+       struct tid_ampdu_tx *tid_tx =
+           container_of(rcu_head, struct tid_ampdu_tx, rcu_head);
+
+       kfree(tid_tx);
+}
+
 int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
                                    enum ieee80211_back_parties initiator)
 {
        struct ieee80211_local *local = sta->local;
+       struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
        int ret;
-       u8 *state;
+
+       lockdep_assert_held(&sta->ampdu_mlme.mtx);
+
+       if (!tid_tx)
+               return -ENOENT;
+
+       spin_lock_bh(&sta->lock);
+
+       if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
+               /* not even started yet! */
+               rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
+               spin_unlock_bh(&sta->lock);
+               call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
+               return 0;
+       }
+
+       spin_unlock_bh(&sta->lock);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
        printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
               sta->sta.addr, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-       state = &sta->ampdu_mlme.tid_state_tx[tid];
+       set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
 
-       if (*state == HT_AGG_STATE_OPERATIONAL)
-               sta->ampdu_mlme.addba_req_num[tid] = 0;
+       /*
+        * After this packets are no longer handed right through
+        * to the driver but are put onto tid_tx->pending instead,
+        * with locking to ensure proper access.
+        */
+       clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
 
-       *state = HT_AGG_STATE_REQ_STOP_BA_MSK |
-               (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
+       tid_tx->stop_initiator = initiator;
 
        ret = drv_ampdu_action(local, sta->sdata,
                               IEEE80211_AMPDU_TX_STOP,
@@ -174,16 +213,14 @@ static void sta_addba_resp_timer_expired(unsigned long data)
        u16 tid = *(u8 *)data;
        struct sta_info *sta = container_of((void *)data,
                struct sta_info, timer_to_tid[tid]);
-       u8 *state;
-
-       state = &sta->ampdu_mlme.tid_state_tx[tid];
+       struct tid_ampdu_tx *tid_tx;
 
        /* check if the TID waits for addBA response */
-       spin_lock_bh(&sta->lock);
-       if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK |
-                      HT_AGG_STATE_REQ_STOP_BA_MSK)) !=
-                                               HT_ADDBA_REQUESTED_MSK) {
-               spin_unlock_bh(&sta->lock);
+       rcu_read_lock();
+       tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
+       if (!tid_tx ||
+           test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
+               rcu_read_unlock();
 #ifdef CONFIG_MAC80211_HT_DEBUG
                printk(KERN_DEBUG "timer expired on tid %d but we are not "
                                "(or no longer) expecting addBA response there\n",
@@ -196,8 +233,8 @@ static void sta_addba_resp_timer_expired(unsigned long data)
        printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
 #endif
 
-       ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
-       spin_unlock_bh(&sta->lock);
+       ieee80211_stop_tx_ba_session(&sta->sta, tid);
+       rcu_read_unlock();
 }
 
 static inline int ieee80211_ac_from_tid(int tid)
@@ -205,14 +242,112 @@ static inline int ieee80211_ac_from_tid(int tid)
        return ieee802_1d_to_ac[tid & 7];
 }
 
+/*
+ * When multiple aggregation sessions on multiple stations
+ * are being created/destroyed simultaneously, we need to
+ * refcount the global queue stop caused by that in order
+ * to not get into a situation where one of the aggregation
+ * setup or teardown re-enables queues before the other is
+ * ready to handle that.
+ *
+ * These two functions take care of this issue by keeping
+ * a global "agg_queue_stop" refcount.
+ */
+static void __acquires(agg_queue)
+ieee80211_stop_queue_agg(struct ieee80211_local *local, int tid)
+{
+       int queue = ieee80211_ac_from_tid(tid);
+
+       if (atomic_inc_return(&local->agg_queue_stop[queue]) == 1)
+               ieee80211_stop_queue_by_reason(
+                       &local->hw, queue,
+                       IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+       __acquire(agg_queue);
+}
+
+static void __releases(agg_queue)
+ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
+{
+       int queue = ieee80211_ac_from_tid(tid);
+
+       if (atomic_dec_return(&local->agg_queue_stop[queue]) == 0)
+               ieee80211_wake_queue_by_reason(
+                       &local->hw, queue,
+                       IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+       __release(agg_queue);
+}
+
+void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
+{
+       struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
+       struct ieee80211_local *local = sta->local;
+       struct ieee80211_sub_if_data *sdata = sta->sdata;
+       u16 start_seq_num;
+       int ret;
+
+       lockdep_assert_held(&sta->ampdu_mlme.mtx);
+
+       /*
+        * While we're asking the driver about the aggregation,
+        * stop the AC queue so that we don't have to worry
+        * about frames that came in while we were doing that,
+        * which would require us to put them to the AC pending
+        * afterwards which just makes the code more complex.
+        */
+       ieee80211_stop_queue_agg(local, tid);
+
+       clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
+
+       /*
+        * make sure no packets are being processed to get
+        * valid starting sequence number
+        */
+       synchronize_net();
+
+       start_seq_num = sta->tid_seq[tid] >> 4;
+
+       ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
+                              &sta->sta, tid, &start_seq_num);
+       if (ret) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+               printk(KERN_DEBUG "BA request denied - HW unavailable for"
+                                       " tid %d\n", tid);
+#endif
+               spin_lock_bh(&sta->lock);
+               rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
+               spin_unlock_bh(&sta->lock);
+
+               ieee80211_wake_queue_agg(local, tid);
+               call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
+               return;
+       }
+
+       /* we can take packets again now */
+       ieee80211_wake_queue_agg(local, tid);
+
+       /* activate the timer for the recipient's addBA response */
+       mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+       printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
+#endif
+
+       spin_lock_bh(&sta->lock);
+       sta->ampdu_mlme.addba_req_num[tid]++;
+       spin_unlock_bh(&sta->lock);
+
+       /* send AddBA request */
+       ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
+                                    tid_tx->dialog_token, start_seq_num,
+                                    0x40, 5000);
+}
+
 int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 {
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
-       u8 *state;
+       struct tid_ampdu_tx *tid_tx;
        int ret = 0;
-       u16 start_seq_num;
 
        trace_api_start_tx_ba_session(pubsta, tid);
 
@@ -239,24 +374,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
            sdata->vif.type != NL80211_IFTYPE_AP)
                return -EINVAL;
 
-       if (test_sta_flags(sta, WLAN_STA_DISASSOC)) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-               printk(KERN_DEBUG "Disassociation is in progress. "
-                      "Denying BA session request\n");
-#endif
-               return -EINVAL;
-       }
-
        if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
-               printk(KERN_DEBUG "Suspend in progress. "
+               printk(KERN_DEBUG "BA sessions blocked. "
                       "Denying BA session request\n");
 #endif
                return -EINVAL;
        }
 
        spin_lock_bh(&sta->lock);
-       spin_lock(&local->ampdu_lock);
 
        /* we have tried too many times, receiver does not want A-MPDU */
        if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
@@ -264,9 +390,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
                goto err_unlock_sta;
        }
 
-       state = &sta->ampdu_mlme.tid_state_tx[tid];
+       tid_tx = sta->ampdu_mlme.tid_tx[tid];
        /* check if the TID is not in aggregation flow already */
-       if (*state != HT_AGG_STATE_IDLE) {
+       if (tid_tx) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
                printk(KERN_DEBUG "BA request denied - session is not "
                                 "idle on tid %u\n", tid);
@@ -275,94 +401,37 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
                goto err_unlock_sta;
        }
 
-       /*
-        * While we're asking the driver about the aggregation,
-        * stop the AC queue so that we don't have to worry
-        * about frames that came in while we were doing that,
-        * which would require us to put them to the AC pending
-        * afterwards which just makes the code more complex.
-        */
-       ieee80211_stop_queue_by_reason(
-               &local->hw, ieee80211_ac_from_tid(tid),
-               IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
-
        /* prepare A-MPDU MLME for Tx aggregation */
-       sta->ampdu_mlme.tid_tx[tid] =
-                       kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
-       if (!sta->ampdu_mlme.tid_tx[tid]) {
+       tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
+       if (!tid_tx) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
                if (net_ratelimit())
                        printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
                                        tid);
 #endif
                ret = -ENOMEM;
-               goto err_wake_queue;
+               goto err_unlock_sta;
        }
 
-       skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending);
+       skb_queue_head_init(&tid_tx->pending);
+       __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
 
        /* Tx timer */
-       sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
-                       sta_addba_resp_timer_expired;
-       sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
-                       (unsigned long)&sta->timer_to_tid[tid];
-       init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
-
-       /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
-        * call back right away, it must see that the flow has begun */
-       *state |= HT_ADDBA_REQUESTED_MSK;
+       tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
+       tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
+       init_timer(&tid_tx->addba_resp_timer);
 
-       start_seq_num = sta->tid_seq[tid] >> 4;
-
-       ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
-                              pubsta, tid, &start_seq_num);
-
-       if (ret) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-               printk(KERN_DEBUG "BA request denied - HW unavailable for"
-                                       " tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-               *state = HT_AGG_STATE_IDLE;
-               goto err_free;
-       }
+       /* assign a dialog token */
+       sta->ampdu_mlme.dialog_token_allocator++;
+       tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator;
 
-       /* Driver vetoed or OKed, but we can take packets again now */
-       ieee80211_wake_queue_by_reason(
-               &local->hw, ieee80211_ac_from_tid(tid),
-               IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+       /* finally, assign it to the array */
+       rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
 
-       spin_unlock(&local->ampdu_lock);
-       spin_unlock_bh(&sta->lock);
+       ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
 
-       /* send an addBA request */
-       sta->ampdu_mlme.dialog_token_allocator++;
-       sta->ampdu_mlme.tid_tx[tid]->dialog_token =
-                       sta->ampdu_mlme.dialog_token_allocator;
-       sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
-
-       ieee80211_send_addba_request(sdata, pubsta->addr, tid,
-                        sta->ampdu_mlme.tid_tx[tid]->dialog_token,
-                        sta->ampdu_mlme.tid_tx[tid]->ssn,
-                        0x40, 5000);
-       sta->ampdu_mlme.addba_req_num[tid]++;
-       /* activate the timer for the recipient's addBA response */
-       sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
-                               jiffies + ADDBA_RESP_INTERVAL;
-       add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-       printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
-#endif
-       return 0;
-
- err_free:
-       kfree(sta->ampdu_mlme.tid_tx[tid]);
-       sta->ampdu_mlme.tid_tx[tid] = NULL;
- err_wake_queue:
-       ieee80211_wake_queue_by_reason(
-               &local->hw, ieee80211_ac_from_tid(tid),
-               IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+       /* this flow continues off the work */
  err_unlock_sta:
-       spin_unlock(&local->ampdu_lock);
        spin_unlock_bh(&sta->lock);
        return ret;
 }
@@ -370,69 +439,65 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
 
 /*
  * splice packets from the STA's pending to the local pending,
- * requires a call to ieee80211_agg_splice_finish and holding
- * local->ampdu_lock across both calls.
+ * requires a call to ieee80211_agg_splice_finish later
  */
-static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
-                                        struct sta_info *sta, u16 tid)
+static void __acquires(agg_queue)
+ieee80211_agg_splice_packets(struct ieee80211_local *local,
+                            struct tid_ampdu_tx *tid_tx, u16 tid)
 {
+       int queue = ieee80211_ac_from_tid(tid);
        unsigned long flags;
-       u16 queue = ieee80211_ac_from_tid(tid);
-
-       ieee80211_stop_queue_by_reason(
-               &local->hw, queue,
-               IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 
-       if (!(sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK))
-               return;
+       ieee80211_stop_queue_agg(local, tid);
 
-       if (WARN(!sta->ampdu_mlme.tid_tx[tid],
-                "TID %d gone but expected when splicing aggregates from"
-                "the pending queue\n", tid))
+       if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates"
+                         " from the pending queue\n", tid))
                return;
 
-       if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) {
+       if (!skb_queue_empty(&tid_tx->pending)) {
                spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
                /* copy over remaining packets */
-               skb_queue_splice_tail_init(
-                       &sta->ampdu_mlme.tid_tx[tid]->pending,
-                       &local->pending[queue]);
+               skb_queue_splice_tail_init(&tid_tx->pending,
+                                          &local->pending[queue]);
                spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
        }
 }
 
-static void ieee80211_agg_splice_finish(struct ieee80211_local *local,
-                                       struct sta_info *sta, u16 tid)
+static void __releases(agg_queue)
+ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
 {
-       u16 queue = ieee80211_ac_from_tid(tid);
-
-       ieee80211_wake_queue_by_reason(
-               &local->hw, queue,
-               IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+       ieee80211_wake_queue_agg(local, tid);
 }
 
-/* caller must hold sta->lock */
 static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
                                         struct sta_info *sta, u16 tid)
 {
+       lockdep_assert_held(&sta->ampdu_mlme.mtx);
+
 #ifdef CONFIG_MAC80211_HT_DEBUG
        printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
 #endif
 
-       spin_lock(&local->ampdu_lock);
-       ieee80211_agg_splice_packets(local, sta, tid);
-       /*
-        * NB: we rely on sta->lock being taken in the TX
-        * processing here when adding to the pending queue,
-        * otherwise we could only change the state of the
-        * session to OPERATIONAL _here_.
-        */
-       ieee80211_agg_splice_finish(local, sta, tid);
-       spin_unlock(&local->ampdu_lock);
-
        drv_ampdu_action(local, sta->sdata,
                         IEEE80211_AMPDU_TX_OPERATIONAL,
                         &sta->sta, tid, NULL);
+
+       /*
+        * synchronize with TX path, while splicing the TX path
+        * should block so it won't put more packets onto pending.
+        */
+       spin_lock_bh(&sta->lock);
+
+       ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid);
+       /*
+        * Now mark as operational. This will be visible
+        * in the TX path, and lets it go lock-free in
+        * the common case.
+        */
+       set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state);
+       ieee80211_agg_splice_finish(local, tid);
+
+       spin_unlock_bh(&sta->lock);
 }
 
 void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
@@ -440,7 +505,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
        struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
        struct ieee80211_local *local = sdata->local;
        struct sta_info *sta;
-       u8 *state;
+       struct tid_ampdu_tx *tid_tx;
 
        trace_api_start_tx_ba_cb(sdata, ra, tid);
 
@@ -452,42 +517,36 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
                return;
        }
 
-       rcu_read_lock();
+       mutex_lock(&local->sta_mtx);
        sta = sta_info_get(sdata, ra);
        if (!sta) {
-               rcu_read_unlock();
+               mutex_unlock(&local->sta_mtx);
 #ifdef CONFIG_MAC80211_HT_DEBUG
                printk(KERN_DEBUG "Could not find station: %pM\n", ra);
 #endif
                return;
        }
 
-       state = &sta->ampdu_mlme.tid_state_tx[tid];
-       spin_lock_bh(&sta->lock);
+       mutex_lock(&sta->ampdu_mlme.mtx);
+       tid_tx = sta->ampdu_mlme.tid_tx[tid];
 
-       if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) {
+       if (WARN_ON(!tid_tx)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
-               printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
-                               *state);
+               printk(KERN_DEBUG "addBA was not requested!\n");
 #endif
-               spin_unlock_bh(&sta->lock);
-               rcu_read_unlock();
-               return;
+               goto unlock;
        }
 
-       if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK))
-               goto out;
-
-       *state |= HT_ADDBA_DRV_READY_MSK;
+       if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
+               goto unlock;
 
-       if (*state == HT_AGG_STATE_OPERATIONAL)
+       if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
                ieee80211_agg_tx_operational(local, sta, tid);
 
out:
-       spin_unlock_bh(&sta->lock);
-       rcu_read_unlock();
unlock:
+       mutex_unlock(&sta->ampdu_mlme.mtx);
+       mutex_unlock(&local->sta_mtx);
 }
-EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
 
 void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
                                      const u8 *ra, u16 tid)
@@ -508,44 +567,36 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
        ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
        memcpy(&ra_tid->ra, ra, ETH_ALEN);
        ra_tid->tid = tid;
-       ra_tid->vif = vif;
 
-       skb->pkt_type = IEEE80211_ADDBA_MSG;
-       skb_queue_tail(&local->skb_queue, skb);
-       tasklet_schedule(&local->tasklet);
+       skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START;
+       skb_queue_tail(&sdata->skb_queue, skb);
+       ieee80211_queue_work(&local->hw, &sdata->work);
 }
 EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
 
 int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
                                   enum ieee80211_back_parties initiator)
 {
-       u8 *state;
        int ret;
 
-       /* check if the TID is in aggregation */
-       state = &sta->ampdu_mlme.tid_state_tx[tid];
-       spin_lock_bh(&sta->lock);
-
-       if (*state != HT_AGG_STATE_OPERATIONAL) {
-               ret = -ENOENT;
-               goto unlock;
-       }
+       mutex_lock(&sta->ampdu_mlme.mtx);
 
        ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator);
 
- unlock:
-       spin_unlock_bh(&sta->lock);
+       mutex_unlock(&sta->ampdu_mlme.mtx);
+
        return ret;
 }
 
-int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
-                                enum ieee80211_back_parties initiator)
+int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 {
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
+       struct tid_ampdu_tx *tid_tx;
+       int ret = 0;
 
-       trace_api_stop_tx_ba_session(pubsta, tid, initiator);
+       trace_api_stop_tx_ba_session(pubsta, tid);
 
        if (!local->ops->ampdu_action)
                return -EINVAL;
@@ -553,7 +604,26 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
        if (tid >= STA_TID_NUM)
                return -EINVAL;
 
-       return __ieee80211_stop_tx_ba_session(sta, tid, initiator);
+       spin_lock_bh(&sta->lock);
+       tid_tx = sta->ampdu_mlme.tid_tx[tid];
+
+       if (!tid_tx) {
+               ret = -ENOENT;
+               goto unlock;
+       }
+
+       if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
+               /* already in progress stopping it */
+               ret = 0;
+               goto unlock;
+       }
+
+       set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state);
+       ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+
+ unlock:
+       spin_unlock_bh(&sta->lock);
+       return ret;
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
 
@@ -562,7 +632,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
        struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
        struct ieee80211_local *local = sdata->local;
        struct sta_info *sta;
-       u8 *state;
+       struct tid_ampdu_tx *tid_tx;
 
        trace_api_stop_tx_ba_cb(sdata, ra, tid);
 
@@ -579,51 +649,56 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
               ra, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-       rcu_read_lock();
+       mutex_lock(&local->sta_mtx);
+
        sta = sta_info_get(sdata, ra);
        if (!sta) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
                printk(KERN_DEBUG "Could not find station: %pM\n", ra);
 #endif
-               rcu_read_unlock();
-               return;
+               goto unlock;
        }
-       state = &sta->ampdu_mlme.tid_state_tx[tid];
 
-       /* NOTE: no need to use sta->lock in this state check, as
-        * ieee80211_stop_tx_ba_session will let only one stop call to
-        * pass through per sta/tid
-        */
-       if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
+       mutex_lock(&sta->ampdu_mlme.mtx);
+       spin_lock_bh(&sta->lock);
+       tid_tx = sta->ampdu_mlme.tid_tx[tid];
+
+       if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
                printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
 #endif
-               rcu_read_unlock();
-               return;
+               goto unlock_sta;
        }
 
-       if (*state & HT_AGG_STATE_INITIATOR_MSK)
+       if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR)
                ieee80211_send_delba(sta->sdata, ra, tid,
                        WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
 
-       spin_lock_bh(&sta->lock);
-       spin_lock(&local->ampdu_lock);
+       /*
+        * When we get here, the TX path will not be lockless any more wrt.
+        * aggregation, since the OPERATIONAL bit has long been cleared.
+        * Thus it will block on getting the lock, if it occurs. So if we
+        * stop the queue now, we will not get any more packets, and any
+        * that might be being processed will wait for us here, thereby
+        * guaranteeing that no packets go to the tid_tx pending queue any
+        * more.
+        */
 
-       ieee80211_agg_splice_packets(local, sta, tid);
+       ieee80211_agg_splice_packets(local, tid_tx, tid);
 
-       *state = HT_AGG_STATE_IDLE;
-       /* from now on packets are no longer put onto sta->pending */
-       kfree(sta->ampdu_mlme.tid_tx[tid]);
-       sta->ampdu_mlme.tid_tx[tid] = NULL;
+       /* future packets must not find the tid_tx struct any more */
+       rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
 
-       ieee80211_agg_splice_finish(local, sta, tid);
+       ieee80211_agg_splice_finish(local, tid);
 
-       spin_unlock(&local->ampdu_lock);
-       spin_unlock_bh(&sta->lock);
+       call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
 
-       rcu_read_unlock();
+ unlock_sta:
+       spin_unlock_bh(&sta->lock);
+       mutex_unlock(&sta->ampdu_mlme.mtx);
+ unlock:
+       mutex_unlock(&local->sta_mtx);
 }
-EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
 
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
                                     const u8 *ra, u16 tid)
@@ -644,11 +719,10 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
        ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
        memcpy(&ra_tid->ra, ra, ETH_ALEN);
        ra_tid->tid = tid;
-       ra_tid->vif = vif;
 
-       skb->pkt_type = IEEE80211_DELBA_MSG;
-       skb_queue_tail(&local->skb_queue, skb);
-       tasklet_schedule(&local->tasklet);
+       skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP;
+       skb_queue_tail(&sdata->skb_queue, skb);
+       ieee80211_queue_work(&local->hw, &sdata->work);
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
 
@@ -658,40 +732,40 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
                                  struct ieee80211_mgmt *mgmt,
                                  size_t len)
 {
+       struct tid_ampdu_tx *tid_tx;
        u16 capab, tid;
-       u8 *state;
 
        capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
        tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 
-       state = &sta->ampdu_mlme.tid_state_tx[tid];
-
-       spin_lock_bh(&sta->lock);
+       mutex_lock(&sta->ampdu_mlme.mtx);
 
-       if (!(*state & HT_ADDBA_REQUESTED_MSK))
+       tid_tx = sta->ampdu_mlme.tid_tx[tid];
+       if (!tid_tx)
                goto out;
 
-       if (mgmt->u.action.u.addba_resp.dialog_token !=
-               sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
+       if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
                printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
+#endif
                goto out;
        }
 
-       del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
+       del_timer(&tid_tx->addba_resp_timer);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
        printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
+#endif
 
        if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
                        == WLAN_STATUS_SUCCESS) {
-               u8 curstate = *state;
-
-               *state |= HT_ADDBA_RECEIVED_MSK;
+               if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
+                                    &tid_tx->state)) {
+                       /* ignore duplicate response */
+                       goto out;
+               }
 
-               if (*state != curstate && *state == HT_AGG_STATE_OPERATIONAL)
+               if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
                        ieee80211_agg_tx_operational(local, sta, tid);
 
                sta->ampdu_mlme.addba_req_num[tid] = 0;
@@ -700,5 +774,5 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
        }
 
  out:
-       spin_unlock_bh(&sta->lock);
+       mutex_unlock(&sta->ampdu_mlme.mtx);
 }