target: Fix NULL dereference during LUN lookup + active I/O shutdown
authorNicholas Bellinger <nab@linux-iscsi.org>
Thu, 23 Feb 2017 06:06:32 +0000 (22:06 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Mar 2017 01:57:14 +0000 (09:57 +0800)
commit bd4e2d2907fa23a11d46217064ecf80470ddae10 upstream.

When transport_clear_lun_ref() is shutting down a se_lun via
configfs with new I/O in-flight, it's possible to trigger a
NULL pointer dereference in transport_lookup_cmd_lun() due
to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD
checking before incrementing lun->lun_ref.count after
lun->lun_ref has switched to atomic_t mode.

This results in a NULL pointer dereference as LUN shutdown
code in core_tpg_remove_lun() continues running after the
existing ->release() -> core_tpg_lun_ref_release() callback
completes, and clears the RCU protected se_lun->lun_se_dev
pointer.

During the OOPs, the state of lun->lun_ref in the process
which triggered the NULL pointer dereference looks like
the following on v4.1.y stable code:

struct se_lun {
  lun_link_magic = 4294932337,
  lun_status = TRANSPORT_LUN_STATUS_FREE,

  .....

  lun_se_dev = 0x0,
  lun_sep = 0x0,

  .....

  lun_ref = {
    count = {
      counter = 1
    },
    percpu_count_ptr = 3,
    release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>,
    confirm_switch = 0x0,
    force_atomic = false,
    rcu = {
      next = 0xffff88154fa1a5d0,
      func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu>
    }
  }
}

To address this bug, use percpu_ref_tryget_live() to ensure
once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref
has switched to atomic_t, all new I/Os will fail to obtain
a new lun->lun_ref reference.

Also use an explicit percpu_ref_kill_and_confirm() callback
to block on ->lun_ref_comp to allow the first stage and
associated RCU grace period to complete, and then block on
->lun_ref_shutdown waiting for the final percpu_ref_put()
to drop the last reference via transport_lun_remove_cmd()
before continuing with core_tpg_remove_lun() shutdown.

Reported-by: Rob Millner <rlm@daterainc.com>
Tested-by: Rob Millner <rlm@daterainc.com>
Cc: Rob Millner <rlm@daterainc.com>
Tested-by: Vaibhav Tandon <vst@datera.io>
Cc: Vaibhav Tandon <vst@datera.io>
Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/target/target_core_device.c
drivers/target/target_core_tpg.c
drivers/target/target_core_transport.c
include/target/target_core_base.h

index 356c80fbb304147d2e8c37cbc46ca7133c369355..bb6a6c35324ae2f5f1bd7907b459da9291dca330 100644 (file)
@@ -77,12 +77,16 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
                                        &deve->read_bytes);
 
                se_lun = rcu_dereference(deve->se_lun);
+
+               if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
+                       se_lun = NULL;
+                       goto out_unlock;
+               }
+
                se_cmd->se_lun = rcu_dereference(deve->se_lun);
                se_cmd->pr_res_key = deve->pr_res_key;
                se_cmd->orig_fe_lun = unpacked_lun;
                se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
-
-               percpu_ref_get(&se_lun->lun_ref);
                se_cmd->lun_ref_active = true;
 
                if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
@@ -96,6 +100,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
                        goto ref_dev;
                }
        }
+out_unlock:
        rcu_read_unlock();
 
        if (!se_lun) {
@@ -826,6 +831,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
        xcopy_lun = &dev->xcopy_lun;
        rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
        init_completion(&xcopy_lun->lun_ref_comp);
+       init_completion(&xcopy_lun->lun_shutdown_comp);
        INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
        INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
        mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
index 028854cda97bf312beb12fdd4e63ede070b222ea..2794c6ec5c3c5e43daf4783189ea9a87b15a8604 100644 (file)
@@ -539,7 +539,7 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref)
 {
        struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
 
-       complete(&lun->lun_ref_comp);
+       complete(&lun->lun_shutdown_comp);
 }
 
 int core_tpg_register(
@@ -666,6 +666,7 @@ struct se_lun *core_tpg_alloc_lun(
        lun->lun_link_magic = SE_LUN_LINK_MAGIC;
        atomic_set(&lun->lun_acl_count, 0);
        init_completion(&lun->lun_ref_comp);
+       init_completion(&lun->lun_shutdown_comp);
        INIT_LIST_HEAD(&lun->lun_deve_list);
        INIT_LIST_HEAD(&lun->lun_dev_link);
        atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
index befe227448026a8497a9d13e672305dfaa8965c1..df2059984e147ee4e7417d8b4fb71c3fbe7e247a 100644 (file)
@@ -2680,10 +2680,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
 }
 EXPORT_SYMBOL(target_wait_for_sess_cmds);
 
+static void target_lun_confirm(struct percpu_ref *ref)
+{
+       struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
+
+       complete(&lun->lun_ref_comp);
+}
+
 void transport_clear_lun_ref(struct se_lun *lun)
 {
-       percpu_ref_kill(&lun->lun_ref);
+       /*
+        * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
+        * the initial reference and schedule confirm kill to be
+        * executed after one full RCU grace period has completed.
+        */
+       percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
+       /*
+        * The first completion waits for percpu_ref_switch_to_atomic_rcu()
+        * to call target_lun_confirm after lun->lun_ref has been marked
+        * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
+        * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
+        * fails for all new incoming I/O.
+        */
        wait_for_completion(&lun->lun_ref_comp);
+       /*
+        * The second completion waits for percpu_ref_put_many() to
+        * invoke ->release() after lun->lun_ref has switched to
+        * atomic_t mode, and lun->lun_ref.count has reached zero.
+        *
+        * At this point all target-core lun->lun_ref references have
+        * been dropped via transport_lun_remove_cmd(), and it's safe
+        * to proceed with the remaining LUN shutdown.
+        */
+       wait_for_completion(&lun->lun_shutdown_comp);
 }
 
 static bool
index 800fe16cc36f44c5b90f08f9c5e2589f4e379c7f..ed66414b91f098a5733ae9ce3246626b4239c587 100644 (file)
@@ -740,6 +740,7 @@ struct se_lun {
        struct config_group     lun_group;
        struct se_port_stat_grps port_stat_grps;
        struct completion       lun_ref_comp;
+       struct completion       lun_shutdown_comp;
        struct percpu_ref       lun_ref;
        struct list_head        lun_dev_link;
        struct hlist_node       link;