Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[firefly-linux-kernel-4.4.55.git] / net / rds / ib.h
index 301c483851668aaf4cbefc7fbb0fb485cbfc1e9d..b3fdebb57460392ae9a751bfaae2e218ff9b6e17 100644 (file)
@@ -9,8 +9,11 @@
 #include "rds.h"
 #include "rdma_transport.h"
 
-#define RDS_FMR_SIZE                   256
-#define RDS_FMR_POOL_SIZE              8192
+#define RDS_FMR_1M_POOL_SIZE           (8192 / 2)
+#define RDS_FMR_1M_MSG_SIZE            256
+#define RDS_FMR_8K_MSG_SIZE            2
+#define RDS_MR_8K_SCALE                        (256 / (RDS_FMR_8K_MSG_SIZE + 1))
+#define RDS_FMR_8K_POOL_SIZE           (RDS_MR_8K_SCALE * (8192 / 2))
 
 #define RDS_IB_MAX_SGE                 8
 #define RDS_IB_RECV_SGE                2
@@ -24,6 +27,9 @@
 
 #define RDS_IB_RECYCLE_BATCH_COUNT     32
 
+#define RDS_IB_WC_MAX                  32
+#define RDS_IB_SEND_OP                 BIT_ULL(63)
+
 extern struct rw_semaphore rds_ib_devices_lock;
 extern struct list_head rds_ib_devices;
 
@@ -93,6 +99,20 @@ struct rds_ib_work_ring {
        atomic_t        w_free_ctr;
 };
 
+/* Rings are posted with all the allocations they'll need to queue the
+ * incoming message to the receiving socket so this can't fail.
+ * All fragments start with a header, so we can make sure we're not receiving
+ * garbage, and we can tell a small 8 byte fragment from an ACK frame.
+ */
+struct rds_ib_ack_state {
+       u64             ack_next;
+       u64             ack_recv;
+       unsigned int    ack_required:1;
+       unsigned int    ack_next_valid:1;
+       unsigned int    ack_recv_valid:1;
+};
+
+
 struct rds_ib_device;
 
 struct rds_ib_connection {
@@ -106,6 +126,12 @@ struct rds_ib_connection {
        struct ib_pd            *i_pd;
        struct ib_cq            *i_send_cq;
        struct ib_cq            *i_recv_cq;
+       struct ib_wc            i_send_wc[RDS_IB_WC_MAX];
+       struct ib_wc            i_recv_wc[RDS_IB_WC_MAX];
+
+       /* interrupt handling */
+       struct tasklet_struct   i_send_tasklet;
+       struct tasklet_struct   i_recv_tasklet;
 
        /* tx */
        struct rds_ib_work_ring i_send_ring;
@@ -116,7 +142,6 @@ struct rds_ib_connection {
        atomic_t                i_signaled_sends;
 
        /* rx */
-       struct tasklet_struct   i_recv_tasklet;
        struct mutex            i_recv_mutex;
        struct rds_ib_work_ring i_recv_ring;
        struct rds_ib_incoming  *i_ibinc;
@@ -168,6 +193,12 @@ struct rds_ib_connection {
 struct rds_ib_ipaddr {
        struct list_head        list;
        __be32                  ipaddr;
+       struct rcu_head         rcu;
+};
+
+enum {
+       RDS_IB_MR_8K_POOL,
+       RDS_IB_MR_1M_POOL,
 };
 
 struct rds_ib_device {
@@ -176,9 +207,12 @@ struct rds_ib_device {
        struct list_head        conn_list;
        struct ib_device        *dev;
        struct ib_pd            *pd;
-       struct rds_ib_mr_pool   *mr_pool;
-       unsigned int            fmr_max_remaps;
        unsigned int            max_fmrs;
+       struct rds_ib_mr_pool   *mr_1m_pool;
+       struct rds_ib_mr_pool   *mr_8k_pool;
+       unsigned int            fmr_max_remaps;
+       unsigned int            max_8k_fmrs;
+       unsigned int            max_1m_fmrs;
        int                     max_sge;
        unsigned int            max_wrs;
        unsigned int            max_initiator_depth;
@@ -201,14 +235,14 @@ struct rds_ib_device {
 struct rds_ib_statistics {
        uint64_t        s_ib_connect_raced;
        uint64_t        s_ib_listen_closed_stale;
-       uint64_t        s_ib_tx_cq_call;
+       uint64_t        s_ib_evt_handler_call;
+       uint64_t        s_ib_tasklet_call;
        uint64_t        s_ib_tx_cq_event;
        uint64_t        s_ib_tx_ring_full;
        uint64_t        s_ib_tx_throttle;
        uint64_t        s_ib_tx_sg_mapping_failure;
        uint64_t        s_ib_tx_stalled;
        uint64_t        s_ib_tx_credit_updates;
-       uint64_t        s_ib_rx_cq_call;
        uint64_t        s_ib_rx_cq_event;
        uint64_t        s_ib_rx_ring_empty;
        uint64_t        s_ib_rx_refill_from_cq;
@@ -220,12 +254,18 @@ struct rds_ib_statistics {
        uint64_t        s_ib_ack_send_delayed;
        uint64_t        s_ib_ack_send_piggybacked;
        uint64_t        s_ib_ack_received;
-       uint64_t        s_ib_rdma_mr_alloc;
-       uint64_t        s_ib_rdma_mr_free;
-       uint64_t        s_ib_rdma_mr_used;
-       uint64_t        s_ib_rdma_mr_pool_flush;
-       uint64_t        s_ib_rdma_mr_pool_wait;
-       uint64_t        s_ib_rdma_mr_pool_depleted;
+       uint64_t        s_ib_rdma_mr_8k_alloc;
+       uint64_t        s_ib_rdma_mr_8k_free;
+       uint64_t        s_ib_rdma_mr_8k_used;
+       uint64_t        s_ib_rdma_mr_8k_pool_flush;
+       uint64_t        s_ib_rdma_mr_8k_pool_wait;
+       uint64_t        s_ib_rdma_mr_8k_pool_depleted;
+       uint64_t        s_ib_rdma_mr_1m_alloc;
+       uint64_t        s_ib_rdma_mr_1m_free;
+       uint64_t        s_ib_rdma_mr_1m_used;
+       uint64_t        s_ib_rdma_mr_1m_pool_flush;
+       uint64_t        s_ib_rdma_mr_1m_pool_wait;
+       uint64_t        s_ib_rdma_mr_1m_pool_depleted;
        uint64_t        s_ib_atomic_cswp;
        uint64_t        s_ib_atomic_fadd;
 };
@@ -277,7 +317,8 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
 extern struct ib_client rds_ib_client;
 
-extern unsigned int fmr_message_size;
+extern unsigned int rds_ib_fmr_1m_pool_size;
+extern unsigned int rds_ib_fmr_8k_pool_size;
 extern unsigned int rds_ib_retry_count;
 
 extern spinlock_t ib_nodev_conns_lock;
@@ -307,7 +348,8 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_destroy_nodev_conns(void);
-struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
+struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
+                                            int npages);
 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -327,7 +369,8 @@ void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
 void rds_ib_inc_free(struct rds_incoming *inc);
 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
-void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
+                            struct rds_ib_ack_state *state);
 void rds_ib_recv_tasklet_fn(unsigned long data);
 void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
 void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
@@ -335,6 +378,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
 void rds_ib_attempt_ack(struct rds_ib_connection *ic);
 void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
 u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required);
 
 /* ib_ring.c */
 void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
@@ -352,7 +396,7 @@ extern wait_queue_head_t rds_ib_ring_empty_wait;
 void rds_ib_xmit_complete(struct rds_connection *conn);
 int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                unsigned int hdr_off, unsigned int sg, unsigned int off);
-void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
 void rds_ib_send_init_ring(struct rds_ib_connection *ic);
 void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
 int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);