drbd: Move list of epochs from mdev to tconn
[firefly-linux-kernel-4.4.55.git] / drivers / block / drbd / drbd_main.c
index f0a0e1759baba330f48d5bb35c647efdf77b4bf7..8b99f4e28ccc2cdc5ff03df07aad7853fda73eaa 100644 (file)
@@ -120,7 +120,6 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
  */
 struct idr minors;
 struct list_head drbd_tconns;  /* list of struct drbd_tconn */
-DECLARE_RWSEM(drbd_cfg_rwsem);
 
 struct kmem_cache *drbd_request_cache;
 struct kmem_cache *drbd_ee_cache;      /* peer requests */
@@ -216,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn)
        tconn->oldest_tle = b;
        tconn->newest_tle = b;
        INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
+       INIT_LIST_HEAD(&tconn->barrier_acked_requests);
 
        return 1;
 }
@@ -316,11 +316,11 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
           These have been list_move'd to the out_of_sequence_requests list in
           _req_mod(, BARRIER_ACKED) above.
           */
-       list_del_init(&b->requests);
+       list_splice_init(&b->requests, &tconn->barrier_acked_requests);
        mdev = b->w.mdev;
 
        nob = b->next;
-       if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+       if (test_and_clear_bit(CREATE_BARRIER, &tconn->flags)) {
                _tl_add_barrier(tconn, b);
                if (nob)
                        tconn->oldest_tle = nob;
@@ -368,8 +368,10 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
                        req = list_entry(le, struct drbd_request, tl_requests);
                        rv = _req_mod(req, what);
 
-                       n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
-                       n_reads  += (rv & MR_READ) >> MR_READ_SHIFT;
+                       if (rv & MR_WRITE)
+                               n_writes++;
+                       if (rv & MR_READ)
+                               n_reads++;
                }
                tmp = b->next;
 
@@ -379,7 +381,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
                                if (b->w.cb == NULL) {
                                        b->w.cb = w_send_barrier;
                                        inc_ap_pending(b->w.mdev);
-                                       set_bit(CREATE_BARRIER, &b->w.mdev->flags);
+                                       set_bit(CREATE_BARRIER, &tconn->flags);
                                }
 
                                drbd_queue_work(&tconn->data.work, &b->w);
@@ -418,8 +420,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
                b = tmp;
                list_splice(&carry_reads, &b->requests);
        }
-}
 
+       /* Actions operating on the disk state, also want to work on
+          requests that got barrier acked. */
+       switch (what) {
+       case FAIL_FROZEN_DISK_IO:
+       case RESTART_FROZEN_DISK_IO:
+               list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+                       req = list_entry(le, struct drbd_request, tl_requests);
+                       _req_mod(req, what);
+               }
+       case CONNECTION_LOST_WHILE_PENDING:
+       case RESEND:
+               break;
+       default:
+               conn_err(tconn, "what = %d in _tl_restart()\n", what);
+       }
+}
 
 /**
  * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -431,10 +448,8 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
  */
 void tl_clear(struct drbd_tconn *tconn)
 {
-       struct drbd_conf *mdev;
        struct list_head *le, *tle;
        struct drbd_request *r;
-       int vnr;
 
        spin_lock_irq(&tconn->req_lock);
 
@@ -453,10 +468,7 @@ void tl_clear(struct drbd_tconn *tconn)
        }
 
        /* ensure bit indicating barrier is required is clear */
-       rcu_read_lock();
-       idr_for_each_entry(&tconn->volumes, mdev, vnr)
-               clear_bit(CREATE_BARRIER, &mdev->flags);
-       rcu_read_unlock();
+       clear_bit(CREATE_BARRIER, &tconn->flags);
 
        spin_unlock_irq(&tconn->req_lock);
 }
@@ -468,6 +480,41 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
        spin_unlock_irq(&tconn->req_lock);
 }
 
+/**
+ * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL
+ * @mdev:      DRBD device.
+ */
+void tl_abort_disk_io(struct drbd_conf *mdev)
+{
+       struct drbd_tconn *tconn = mdev->tconn;
+       struct drbd_tl_epoch *b;
+       struct list_head *le, *tle;
+       struct drbd_request *req;
+
+       spin_lock_irq(&tconn->req_lock);
+       b = tconn->oldest_tle;
+       while (b) {
+               list_for_each_safe(le, tle, &b->requests) {
+                       req = list_entry(le, struct drbd_request, tl_requests);
+                       if (!(req->rq_state & RQ_LOCAL_PENDING))
+                               continue;
+                       if (req->w.mdev == mdev)
+                               _req_mod(req, ABORT_DISK_IO);
+               }
+               b = b->next;
+       }
+
+       list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+               req = list_entry(le, struct drbd_request, tl_requests);
+               if (!(req->rq_state & RQ_LOCAL_PENDING))
+                       continue;
+               if (req->w.mdev == mdev)
+                       _req_mod(req, ABORT_DISK_IO);
+       }
+
+       spin_unlock_irq(&tconn->req_lock);
+}
+
 static int drbd_thread_setup(void *arg)
 {
        struct drbd_thread *thi = (struct drbd_thread *) arg;
@@ -503,7 +550,7 @@ restart:
        thi->task = NULL;
        thi->t_state = NONE;
        smp_mb();
-       complete(&thi->stop);
+       complete_all(&thi->stop);
        spin_unlock_irqrestore(&thi->t_lock, flags);
 
        conn_info(tconn, "Terminating %s\n", current->comm);
@@ -757,14 +804,24 @@ static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr,
                return prepare_header80(buffer, cmd, size);
 }
 
+static void *__conn_prepare_command(struct drbd_tconn *tconn,
+                                   struct drbd_socket *sock)
+{
+       if (!sock->socket)
+               return NULL;
+       return sock->sbuf + drbd_header_size(tconn);
+}
+
 void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock)
 {
+       void *p;
+
        mutex_lock(&sock->mutex);
-       if (!sock->socket) {
+       p = __conn_prepare_command(tconn, sock);
+       if (!p)
                mutex_unlock(&sock->mutex);
-               return NULL;
-       }
-       return sock->sbuf + drbd_header_size(tconn);
+
+       return p;
 }
 
 void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock)
@@ -798,13 +855,20 @@ static int __send_command(struct drbd_tconn *tconn, int vnr,
        return err;
 }
 
+static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
+                              enum drbd_packet cmd, unsigned int header_size,
+                              void *data, unsigned int size)
+{
+       return __send_command(tconn, 0, sock, cmd, header_size, data, size);
+}
+
 int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
                      enum drbd_packet cmd, unsigned int header_size,
                      void *data, unsigned int size)
 {
        int err;
 
-       err = __send_command(tconn, 0, sock, cmd, header_size, data, size);
+       err = __conn_send_command(tconn, sock, cmd, header_size, data, size);
        mutex_unlock(&sock->mutex);
        return err;
 }
@@ -849,6 +913,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev)
        const int apv = mdev->tconn->agreed_pro_version;
        enum drbd_packet cmd;
        struct net_conf *nc;
+       struct disk_conf *dc;
 
        sock = &mdev->tconn->data;
        p = drbd_prepare_command(mdev, sock);
@@ -870,14 +935,15 @@ int drbd_send_sync_param(struct drbd_conf *mdev)
        memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
        if (get_ldev(mdev)) {
-               p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate);
-               p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead);
-               p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target);
-               p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target);
-               p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate);
+               dc = rcu_dereference(mdev->ldev->disk_conf);
+               p->resync_rate = cpu_to_be32(dc->resync_rate);
+               p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
+               p->c_delay_target = cpu_to_be32(dc->c_delay_target);
+               p->c_fill_target = cpu_to_be32(dc->c_fill_target);
+               p->c_max_rate = cpu_to_be32(dc->c_max_rate);
                put_ldev(mdev);
        } else {
-               p->rate = cpu_to_be32(DRBD_RATE_DEF);
+               p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
                p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
                p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
                p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
@@ -893,7 +959,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev)
        return drbd_send_command(mdev, sock, cmd, size, NULL, 0);
 }
 
-int drbd_send_protocol(struct drbd_tconn *tconn)
+int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
 {
        struct drbd_socket *sock;
        struct p_protocol *p;
@@ -901,14 +967,14 @@ int drbd_send_protocol(struct drbd_tconn *tconn)
        int size, cf;
 
        sock = &tconn->data;
-       p = conn_prepare_command(tconn, sock);
+       p = __conn_prepare_command(tconn, sock);
        if (!p)
                return -EIO;
 
        rcu_read_lock();
        nc = rcu_dereference(tconn->net_conf);
 
-       if (nc->dry_run && tconn->agreed_pro_version < 92) {
+       if (nc->tentative && tconn->agreed_pro_version < 92) {
                rcu_read_unlock();
                mutex_unlock(&sock->mutex);
                conn_err(tconn, "--dry-run is not supported by peer");
@@ -925,9 +991,9 @@ int drbd_send_protocol(struct drbd_tconn *tconn)
        p->after_sb_2p   = cpu_to_be32(nc->after_sb_2p);
        p->two_primaries = cpu_to_be32(nc->two_primaries);
        cf = 0;
-       if (nc->want_lose)
-               cf |= CF_WANT_LOSE;
-       if (nc->dry_run)
+       if (nc->discard_my_data)
+               cf |= CF_DISCARD_MY_DATA;
+       if (nc->tentative)
                cf |= CF_DRY_RUN;
        p->conn_flags    = cpu_to_be32(cf);
 
@@ -935,7 +1001,18 @@ int drbd_send_protocol(struct drbd_tconn *tconn)
                strcpy(p->integrity_alg, nc->integrity_alg);
        rcu_read_unlock();
 
-       return conn_send_command(tconn, sock, P_PROTOCOL, size, NULL, 0);
+       return __conn_send_command(tconn, sock, cmd, size, NULL, 0);
+}
+
+int drbd_send_protocol(struct drbd_tconn *tconn)
+{
+       int err;
+
+       mutex_lock(&tconn->data.mutex);
+       err = __drbd_send_protocol(tconn, P_PROTOCOL);
+       mutex_unlock(&tconn->data.mutex);
+
+       return err;
 }
 
 int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
@@ -959,7 +1036,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
        mdev->comm_bm_set = drbd_bm_total_weight(mdev);
        p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
        rcu_read_lock();
-       uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->want_lose ? 1 : 0;
+       uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0;
        rcu_read_unlock();
        uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
        uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
@@ -1005,7 +1082,11 @@ void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
 
        D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
 
-       uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+       uuid = mdev->ldev->md.uuid[UI_BITMAP];
+       if (uuid && uuid != UUID_JUST_CREATED)
+               uuid = uuid + UUID_NEW_BM_OFFSET;
+       else
+               get_random_bytes(&uuid, sizeof(u64));
        drbd_uuid_set(mdev, UI_BITMAP, uuid);
        drbd_print_uuids(mdev, "updated sync UUID");
        drbd_md_sync(mdev);
@@ -1028,7 +1109,9 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
        if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
                D_ASSERT(mdev->ldev->backing_bdev);
                d_size = drbd_get_max_capacity(mdev->ldev);
-               u_size = mdev->ldev->dc.disk_size;
+               rcu_read_lock();
+               u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+               rcu_read_unlock();
                q_order_type = drbd_queue_order_type(mdev);
                max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
                max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
@@ -1044,6 +1127,12 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
        p = drbd_prepare_command(mdev, sock);
        if (!p)
                return -EIO;
+
+       if (mdev->tconn->agreed_pro_version <= 94)
+               max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+       else if (mdev->tconn->agreed_pro_version < 100)
+               max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE_P95);
+
        p->d_size = cpu_to_be64(d_size);
        p->u_size = cpu_to_be64(u_size);
        p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
@@ -1054,10 +1143,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
 }
 
 /**
- * drbd_send_state() - Sends the drbd state to the peer
+ * drbd_send_current_state() - Sends the drbd state to the peer
  * @mdev:      DRBD device.
  */
-int drbd_send_state(struct drbd_conf *mdev)
+int drbd_send_current_state(struct drbd_conf *mdev)
 {
        struct drbd_socket *sock;
        struct p_state *p;
@@ -1070,6 +1159,29 @@ int drbd_send_state(struct drbd_conf *mdev)
        return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
 }
 
+/**
+ * drbd_send_state() - After a state change, sends the new state to the peer
+ * @mdev:      DRBD device.
+ * @state:     the state to send, not necessarily the current state.
+ *
+ * Each state change queues an "after_state_ch" work, which will eventually
+ * send the resulting new state to the peer. If more state changes happen
+ * between queuing and processing of the after_state_ch work, we still
+ * want to send each intermediary state in the order it occurred.
+ */
+int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
+{
+       struct drbd_socket *sock;
+       struct p_state *p;
+
+       sock = &mdev->tconn->data;
+       p = drbd_prepare_command(mdev, sock);
+       if (!p)
+               return -EIO;
+       p->state = cpu_to_be32(state.i); /* Within the send mutex */
+       return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
+}
+
 int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val)
 {
        struct drbd_socket *sock;
@@ -1082,7 +1194,6 @@ int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drb
        p->mask = cpu_to_be32(mask.i);
        p->val = cpu_to_be32(val.i);
        return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0);
-
 }
 
 int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
@@ -1405,8 +1516,8 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
 void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
                      struct p_data *dp, int data_size)
 {
-       data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
-               crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
+       if (mdev->tconn->peer_integrity_tfm)
+               data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
        _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
                       dp->block_id);
 }
@@ -1689,11 +1800,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
        int dgs;
        int err;
 
-       dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
-               crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
-
        sock = &mdev->tconn->data;
        p = drbd_prepare_command(mdev, sock);
+       dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
+
        if (!p)
                return -EIO;
        p->sector = cpu_to_be64(req->i.sector);
@@ -1711,7 +1821,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
        }
        p->dp_flags = cpu_to_be32(dp_flags);
        if (dgs)
-               drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, p + 1);
+               drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1);
        err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
        if (!err) {
                /* For protocol A, we have to memcpy the payload into
@@ -1735,7 +1845,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
                        /* 64 byte, 512 bit, is the largest digest size
                         * currently supported in kernel crypto. */
                        unsigned char digest[64];
-                       drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest);
+                       drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest);
                        if (memcmp(p + 1, digest, dgs)) {
                                dev_warn(DEV,
                                        "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
@@ -1762,18 +1872,18 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
        int err;
        int dgs;
 
-       dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
-               crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
-
        sock = &mdev->tconn->data;
        p = drbd_prepare_command(mdev, sock);
+
+       dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
+
        if (!p)
                return -EIO;
        p->sector = cpu_to_be64(peer_req->i.sector);
        p->block_id = peer_req->block_id;
        p->seq_num = 0;  /* unused */
        if (dgs)
-               drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, p + 1);
+               drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1);
        err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size);
        if (!err)
                err = _drbd_send_zc_ee(mdev, peer_req);
@@ -1965,14 +2075,13 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        atomic_set(&mdev->rs_sect_in, 0);
        atomic_set(&mdev->rs_sect_ev, 0);
        atomic_set(&mdev->ap_in_flight, 0);
+       atomic_set(&mdev->md_io_in_use, 0);
 
-       mutex_init(&mdev->md_io_mutex);
        mutex_init(&mdev->own_state_mutex);
        mdev->state_mutex = &mdev->own_state_mutex;
 
        spin_lock_init(&mdev->al_lock);
        spin_lock_init(&mdev->peer_seq_lock);
-       spin_lock_init(&mdev->epoch_lock);
 
        INIT_LIST_HEAD(&mdev->active_ee);
        INIT_LIST_HEAD(&mdev->sync_ee);
@@ -2020,8 +2129,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        init_waitqueue_head(&mdev->al_wait);
        init_waitqueue_head(&mdev->seq_wait);
 
-       /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */
-       mdev->write_ordering = WO_bdev_flush;
        mdev->resync_wenr = LC_FREE;
        mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
        mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
@@ -2034,9 +2141,6 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
                dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
                                mdev->tconn->receiver.t_state);
 
-       /* no need to lock it, I'm the only thread alive */
-       if (atomic_read(&mdev->current_epoch->epoch_size) !=  0)
-               dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
        mdev->al_writ_cnt  =
        mdev->bm_writ_cnt  =
        mdev->read_cnt     =
@@ -2240,21 +2344,18 @@ static void drbd_release_all_peer_reqs(struct drbd_conf *mdev)
 }
 
 /* caution. no locking. */
-void drbd_delete_device(struct drbd_conf *mdev)
+void drbd_minor_destroy(struct kref *kref)
 {
+       struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref);
        struct drbd_tconn *tconn = mdev->tconn;
 
-       idr_remove(&mdev->tconn->volumes, mdev->vnr);
-       idr_remove(&minors, mdev_to_minor(mdev));
-       synchronize_rcu();
+       del_timer_sync(&mdev->request_timer);
 
        /* paranoia asserts */
        D_ASSERT(mdev->open_cnt == 0);
        D_ASSERT(list_empty(&mdev->tconn->data.work.q));
        /* end paranoia asserts */
 
-       del_gendisk(mdev->vdisk);
-
        /* cleanup stuff that may have been allocated during
         * device (re-)configuration or state changes */
 
@@ -2272,12 +2373,12 @@ void drbd_delete_device(struct drbd_conf *mdev)
        kfree(mdev->p_uuid);
        /* mdev->p_uuid = NULL; */
 
-       kfree(mdev->current_epoch);
        if (mdev->bitmap) /* should no longer be there. */
                drbd_bm_cleanup(mdev);
        __free_page(mdev->md_io_page);
        put_disk(mdev->vdisk);
        blk_cleanup_queue(mdev->rq_queue);
+       kfree(mdev->rs_plan_s);
        kfree(mdev);
 
        kref_put(&tconn->kref, &conn_destroy);
@@ -2287,6 +2388,7 @@ static void drbd_cleanup(void)
 {
        unsigned int i;
        struct drbd_conf *mdev;
+       struct drbd_tconn *tconn, *tmp;
 
        unregister_reboot_notifier(&drbd_notifier);
 
@@ -2303,10 +2405,20 @@ static void drbd_cleanup(void)
 
        drbd_genl_unregister();
 
-       down_write(&drbd_cfg_rwsem);
-       idr_for_each_entry(&minors, mdev, i)
-               drbd_delete_device(mdev);
-       up_write(&drbd_cfg_rwsem);
+       idr_for_each_entry(&minors, mdev, i) {
+               idr_remove(&minors, mdev_to_minor(mdev));
+               idr_remove(&mdev->tconn->volumes, mdev->vnr);
+               del_gendisk(mdev->vdisk);
+               /* synchronize_rcu(); No other threads running at this point */
+               kref_put(&mdev->kref, &drbd_minor_destroy);
+       }
+
+       /* not _rcu since, no other updater anymore. Genl already unregistered */
+       list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
+               list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */
+               /* synchronize_rcu(); */
+               kref_put(&tconn->kref, &conn_destroy);
+       }
 
        drbd_destroy_mempools();
        unregister_blkdev(DRBD_MAJOR, "drbd");
@@ -2369,8 +2481,8 @@ struct drbd_tconn *conn_get_by_name(const char *name)
        if (!name || !name[0])
                return NULL;
 
-       down_read(&drbd_cfg_rwsem);
-       list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
                if (!strcmp(tconn->name, name)) {
                        kref_get(&tconn->kref);
                        goto found;
@@ -2378,7 +2490,28 @@ struct drbd_tconn *conn_get_by_name(const char *name)
        }
        tconn = NULL;
 found:
-       up_read(&drbd_cfg_rwsem);
+       rcu_read_unlock();
+       return tconn;
+}
+
+struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+                                    void *peer_addr, int peer_addr_len)
+{
+       struct drbd_tconn *tconn;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
+               if (tconn->my_addr_len == my_addr_len &&
+                   tconn->peer_addr_len == peer_addr_len &&
+                   !memcmp(&tconn->my_addr, my_addr, my_addr_len) &&
+                   !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) {
+                       kref_get(&tconn->kref);
+                       goto found;
+               }
+       }
+       tconn = NULL;
+found:
+       rcu_read_unlock();
        return tconn;
 }
 
@@ -2406,21 +2539,61 @@ void conn_free_crypto(struct drbd_tconn *tconn)
        crypto_free_hash(tconn->csums_tfm);
        crypto_free_hash(tconn->verify_tfm);
        crypto_free_hash(tconn->cram_hmac_tfm);
-       crypto_free_hash(tconn->integrity_w_tfm);
-       crypto_free_hash(tconn->integrity_r_tfm);
+       crypto_free_hash(tconn->integrity_tfm);
+       crypto_free_hash(tconn->peer_integrity_tfm);
        kfree(tconn->int_dig_in);
        kfree(tconn->int_dig_vv);
 
        tconn->csums_tfm = NULL;
        tconn->verify_tfm = NULL;
        tconn->cram_hmac_tfm = NULL;
-       tconn->integrity_w_tfm = NULL;
-       tconn->integrity_r_tfm = NULL;
+       tconn->integrity_tfm = NULL;
+       tconn->peer_integrity_tfm = NULL;
        tconn->int_dig_in = NULL;
        tconn->int_dig_vv = NULL;
 }
 
-struct drbd_tconn *conn_create(const char *name)
+int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts)
+{
+       cpumask_var_t new_cpu_mask;
+       int err;
+
+       if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL))
+               return -ENOMEM;
+               /*
+               retcode = ERR_NOMEM;
+               drbd_msg_put_info("unable to allocate cpumask");
+               */
+
+       /* silently ignore cpu mask on UP kernel */
+       if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
+               /* FIXME: Get rid of constant 32 here */
+               err = __bitmap_parse(res_opts->cpu_mask, 32, 0,
+                               cpumask_bits(new_cpu_mask), nr_cpu_ids);
+               if (err) {
+                       conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
+                       /* retcode = ERR_CPU_MASK_PARSE; */
+                       goto fail;
+               }
+       }
+       tconn->res_opts = *res_opts;
+       if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
+               cpumask_copy(tconn->cpu_mask, new_cpu_mask);
+               drbd_calc_cpu_mask(tconn);
+               tconn->receiver.reset_cpu_mask = 1;
+               tconn->asender.reset_cpu_mask = 1;
+               tconn->worker.reset_cpu_mask = 1;
+       }
+       err = 0;
+
+fail:
+       free_cpumask_var(new_cpu_mask);
+       return err;
+
+}
+
+/* caller must be under genl_lock() */
+struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts)
 {
        struct drbd_tconn *tconn;
 
@@ -2440,13 +2613,24 @@ struct drbd_tconn *conn_create(const char *name)
        if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
                goto fail;
 
+       if (set_resource_options(tconn, res_opts))
+               goto fail;
+
        if (!tl_init(tconn))
                goto fail;
 
+       tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
+       if (!tconn->current_epoch)
+               goto fail;
+       INIT_LIST_HEAD(&tconn->current_epoch->list);
+       tconn->epochs = 1;
+       spin_lock_init(&tconn->epoch_lock);
+       tconn->write_ordering = WO_bdev_flush;
+
        tconn->cstate = C_STANDALONE;
        mutex_init(&tconn->cstate_mutex);
        spin_lock_init(&tconn->req_lock);
-       mutex_init(&tconn->net_conf_update);
+       mutex_init(&tconn->conf_update);
        init_waitqueue_head(&tconn->ping_wait);
        idr_init(&tconn->volumes);
 
@@ -2460,19 +2644,13 @@ struct drbd_tconn *conn_create(const char *name)
        drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
        drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
 
-       tconn->res_opts = (struct res_opts) {
-               {}, 0, /* cpu_mask */
-               DRBD_ON_NO_DATA_DEF, /* on_no_data */
-       };
-
-       down_write(&drbd_cfg_rwsem);
        kref_init(&tconn->kref);
-       list_add_tail(&tconn->all_tconn, &drbd_tconns);
-       up_write(&drbd_cfg_rwsem);
+       list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns);
 
        return tconn;
 
 fail:
+       kfree(tconn->current_epoch);
        tl_cleanup(tconn);
        free_cpumask_var(tconn->cpu_mask);
        drbd_free_socket(&tconn->meta);
@@ -2487,6 +2665,10 @@ void conn_destroy(struct kref *kref)
 {
        struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref);
 
+       if (atomic_read(&tconn->current_epoch->epoch_size) !=  0)
+               conn_err(tconn, "epoch_size:%d\n", atomic_read(&tconn->current_epoch->epoch_size));
+       kfree(tconn->current_epoch);
+
        idr_destroy(&tconn->volumes);
 
        free_cpumask_var(tconn->cpu_mask);
@@ -2568,13 +2750,6 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
        mdev->read_requests = RB_ROOT;
        mdev->write_requests = RB_ROOT;
 
-       mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
-       if (!mdev->current_epoch)
-               goto out_no_epoch;
-
-       INIT_LIST_HEAD(&mdev->current_epoch->list);
-       mdev->epochs = 1;
-
        if (!idr_pre_get(&minors, GFP_KERNEL))
                goto out_no_minor_idr;
        if (idr_get_new_above(&minors, mdev, minor, &minor_got))
@@ -2595,11 +2770,12 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
                goto out_idr_remove_vol;
        }
        add_disk(disk);
+       kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
 
        /* inherit the connection state */
        mdev->state.conn = tconn->cstate;
        if (mdev->state.conn == C_WF_REPORT_PARAMS)
-               drbd_connected(vnr, mdev, tconn);
+               drbd_connected(mdev);
 
        return NO_ERROR;
 
@@ -2609,8 +2785,6 @@ out_idr_remove_minor:
        idr_remove(&minors, minor_got);
        synchronize_rcu();
 out_no_minor_idr:
-       kfree(mdev->current_epoch);
-out_no_epoch:
        drbd_bm_cleanup(mdev);
 out_no_bitmap:
        __free_page(mdev->md_io_page);
@@ -2634,7 +2808,7 @@ int __init drbd_init(void)
 #ifdef MODULE
                return -EINVAL;
 #else
-               minor_count = 8;
+               minor_count = DRBD_MINOR_COUNT_DEF;
 #endif
        }
 
@@ -2766,15 +2940,17 @@ void drbd_md_sync(struct drbd_conf *mdev)
        if (!get_ldev_if_state(mdev, D_FAILED))
                return;
 
-       mutex_lock(&mdev->md_io_mutex);
-       buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+       buffer = drbd_md_get_buffer(mdev);
+       if (!buffer)
+               goto out;
+
        memset(buffer, 0, 512);
 
        buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
        for (i = UI_CURRENT; i < UI_SIZE; i++)
                buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
        buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
-       buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+       buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN);
 
        buffer->md_size_sect  = cpu_to_be32(mdev->ldev->md.md_size_sect);
        buffer->al_offset     = cpu_to_be32(mdev->ldev->md.al_offset);
@@ -2798,7 +2974,8 @@ void drbd_md_sync(struct drbd_conf *mdev)
         * since we updated it on metadata. */
        mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
 
-       mutex_unlock(&mdev->md_io_mutex);
+       drbd_md_put_buffer(mdev);
+out:
        put_ldev(mdev);
 }
 
@@ -2808,18 +2985,20 @@ void drbd_md_sync(struct drbd_conf *mdev)
  * @bdev:      Device from which the meta data should be read in.
  *
  * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
- * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ * something goes wrong.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
        struct meta_data_on_disk *buffer;
+       u32 magic, flags;
        int i, rv = NO_ERROR;
 
        if (!get_ldev_if_state(mdev, D_ATTACHING))
                return ERR_IO_MD_DISK;
 
-       mutex_lock(&mdev->md_io_mutex);
-       buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+       buffer = drbd_md_get_buffer(mdev);
+       if (!buffer)
+               goto out;
 
        if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
                /* NOTE: can't do normal error processing here as this is
@@ -2829,8 +3008,20 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
                goto err;
        }
 
-       if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
-               dev_err(DEV, "Error while reading metadata, magic not found.\n");
+       magic = be32_to_cpu(buffer->magic);
+       flags = be32_to_cpu(buffer->flags);
+       if (magic == DRBD_MD_MAGIC_84_UNCLEAN ||
+           (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) {
+                       /* btw: that's Activity Log clean, not "all" clean. */
+               dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n");
+               rv = ERR_MD_UNCLEAN;
+               goto err;
+       }
+       if (magic != DRBD_MD_MAGIC_08) {
+               if (magic == DRBD_MD_MAGIC_07)
+                       dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
+               else
+                       dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
                rv = ERR_MD_INVALID;
                goto err;
        }
@@ -2864,7 +3055,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
        for (i = UI_CURRENT; i < UI_SIZE; i++)
                bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
        bdev->md.flags = be32_to_cpu(buffer->flags);
-       bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents);
        bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
 
        spin_lock_irq(&mdev->tconn->req_lock);
@@ -2876,11 +3066,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
        }
        spin_unlock_irq(&mdev->tconn->req_lock);
 
-       if (bdev->dc.al_extents < 7)
-               bdev->dc.al_extents = 127;
-
  err:
-       mutex_unlock(&mdev->md_io_mutex);
+       drbd_md_put_buffer(mdev);
+ out:
        put_ldev(mdev);
 
        return rv;
@@ -3254,6 +3442,8 @@ const char *cmdname(enum drbd_packet cmd)
                [P_RS_CANCEL]           = "RSCancel",
                [P_CONN_ST_CHG_REQ]     = "conn_st_chg_req",
                [P_CONN_ST_CHG_REPLY]   = "conn_st_chg_reply",
+               [P_RETRY_WRITE]         = "retry_write",
+               [P_PROTOCOL_UPDATE]     = "protocol_update",
 
                /* enum drbd_packet, but not commands - obsoleted flags:
                 *      P_MAY_IGNORE